that – Page 3 – this and that @ work

def get_url(search_text): template = 'https://www.amazon.com/s?k={}&ref=nb_sb_noss_1' search_term = search_text.replace(' ', '+') url = template.format(search_term) url += '&page{}' return url

def extract_record(item): try: atag = item.h2.a description = atag.text.strip() url = 'https://www.amazon.com' + atag.get('href') except AttributeError: description ='' url = '' try: price_parent = item.find('span', 'a-price') price = price_parent.find('span', 'a-offscreen').text except AttributeError: return try: rating = item.i.text review_count = item.find('span', {'class': 'a-size-base', 'dir': 'auto'}).text except AttributeError: rating = '' review_count = '' result = (description, price, rating, review_count, url) return result

def main(search_term): options = Options() # open chrome with incognito option options.add_argument("--incognito") # by turning this on, code will run with chrome browser hidden #options.add_argument("--headless") # change chrome driver path accordingly driver = webdriver.Chrome("C:\\webdrivers\\chromedriver.exe", options=options) records = [] url = get_url(search_term) for page in range(1, 11): driver.get(url.format(page)) soup = BeautifulSoup(driver.page_source, 'html.parser') results = soup.find_all('div', {'data-component-type': 's-search-result'}) for item in results: record = extract_record(item) if record: records.append(record) driver.close() # save data to csv file with open(search_term +'.csv', 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(['Description', 'Price', 'Rating', 'ReviewCount', 'Url']) writer.writerows(records)

This code returns csv file with review title, reviewer, review date, rating, product size, verified purchase, and review details. You will need to find Amazon Standard Identification Numbers (ASINs) by searching your products in amazon web site.

Finding ASINs in Amazon.com

Type a product name in search bar and click the product link looking for to open product page. ASIN numbers can be found in URL address bar. ASIN numbers usually comes after ‘/dp/’.

I have used ‘ultrawide curved monitor’ in this example and added 2 ASINs in the code. ASINs can be added as many as you need with comma separator (‘,’).

Python code for product review

# import libraries

from selenium import webdriver
import csv
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options

# define url

def get_url(ASIN):
    
    template = 'https://www.amazon.com/product-reviews/{}/ref=cm_cr_getr_d_paging_btm_next_3?sortBy=recent'
    url = template.format(ASIN)
    url += '&pageNumber={}'
        
    return url

# Extract and return data from a single record

def extract_record(item):
    
    
    profile = item.find('span', 'a-profile-name').text.strip()
    rating = item.find('div','a-row').text.strip()[len(profile):len(profile)+3]
    title = item.find('a','a-size-base a-link-normal review-title a-color-base review-title-content a-text-bold').text.strip()
    
    try:
        product_size = item.find('a','a-size-mini a-link-normal a-color-secondary').text.strip()
        verified_purchase = item.find('span','a-size-mini a-color-state a-text-bold').text.strip()
    except AttributeError:
        product_size = ''
        verified_purchase = ''
    review = item.find('span','a-size-base review-text review-text-content').text.strip()    
    review_date = item.find('span','a-size-base a-color-secondary review-date').text.strip()
    date = review_date[review_date.index('on')+3:]

        
    result = (title, profile, date, rating,  product_size, verified_purchase, review)
    
    return result

# Run main program routine

def main(ASIN):
    
    # startup the webdriver
    options = Options()
    # open chrome in incognito mode
    options.add_argument("--incognito")

    # this option will run a code without opening chrome
    #options.add_argument("--headless")
    
    # my chrome driver location. change it accordingly
    driver = webdriver.Chrome("C:\\webdrivers\\chromedriver.exe", options=options)
    
    
    records = []
    url = get_url(ASIN)
    
    for page in range(1, 11):
        driver.get(url.format(page))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        results = soup.find_all('div', {'class': 'a-section review aok-relative'})
        for item in results:
            record = extract_record(item)
            if record:
                records.append(record)
   
   
    driver.close()
    
    # save data to csv file
    with open(ASIN +'.csv', 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Title', 'Profile', 'Date', 'Rating', 'ProductSize', 'Verified_Purchase', 'Review'])
        writer.writerows(records)

# Run program for entered ASINs. Multiple ASINs can be run with comma separator


ASINs = {'B07YGZ7C1K','B0812DKDD9'}
for ASIN in ASINs:
    main(ASIN)

The idea was from Amazon price tracking tutorial on YouTube channel, Izzy Analytics. Please visit Izzy Analytics channel for a original posting. https://www.youtube.com/channel/UCWHJGQc7Vqo37dlUmpiK6hQ.

Category: that

Python – Web scraping AMAZON Price Tracking

Python – Web scraping AMAZON Product Review