Python – Web scraping AMAZON Price Tracking

This code returns a csv file with product name, price, rating, review counts, and URL based on search terms entered. Multiple search terms can be entered with comma separator.

# import liabraries
from selenium import webdriver
import csv
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
# define URL using template
def get_url(search_text):

    template = 'https://www.amazon.com/s?k={}&ref=nb_sb_noss_1'
    search_term = search_text.replace(' ', '+')
    url = template.format(search_term)
    url += '&page{}'
        
    return url
# Extract and return data from a single record
def extract_record(item):
  
    try:
        atag = item.h2.a
        description = atag.text.strip()
        url = 'https://www.amazon.com' + atag.get('href')

    except AttributeError:
        description =''
        url = ''
    try:
        price_parent = item.find('span', 'a-price')
        price = price_parent.find('span', 'a-offscreen').text

    except AttributeError:
        return
    
    try:
        rating = item.i.text
        review_count = item.find('span', {'class': 'a-size-base', 'dir': 'auto'}).text

    except AttributeError:
        rating = ''
        review_count = ''
        
    result = (description, price, rating, review_count, url)
    
    return result
# Extract and return data from a single record
def main(search_term):
    
    options = Options()
    # open chrome with incognito option
    options.add_argument("--incognito")
    # by turning this on, code will run with chrome browser hidden
    #options.add_argument("--headless")

    # change chrome driver path accordingly
    driver = webdriver.Chrome("C:\\webdrivers\\chromedriver.exe", options=options)
    
    
    records = []
    url = get_url(search_term)
    
    for page in range(1, 11):
        driver.get(url.format(page))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        results = soup.find_all('div', {'data-component-type': 's-search-result'})
        for item in results:
            record = extract_record(item)
            if record:
                records.append(record)
    
    driver.close()
    
    # save data to csv file
    with open(search_term +'.csv', 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Description', 'Price', 'Rating', 'ReviewCount', 'Url'])
        writer.writerows(records)
# run program
search_terms = {'ultrawide curved monitor', 'gaming monitor curved'}
for search_term in search_terms:
    main(search_term)

This was originally from YouTube channel, Izzy Analytics. I have another post for product view data, which the same concept is used. Please visit Izzy Analytics channel for a original posting. https://www.youtube.com/channel/UCWHJGQc7Vqo37dlUmpiK6hQ.

Leave a Reply

Discover more from this and that @ work

Subscribe now to keep reading and get access to the full archive.

Continue reading