Python – Web scraping AMAZON Price Tracking

This code returns a csv file with product name, price, rating, review counts, and URL based on search terms entered. Multiple search terms can be entered with comma separator.

# import liabraries
from selenium import webdriver
import csv
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
# define URL using template
def get_url(search_text):

    template = 'https://www.amazon.com/s?k={}&ref=nb_sb_noss_1'
    search_term = search_text.replace(' ', '+')
    url = template.format(search_term)
    url += '&page{}'
        
    return url
# Extract and return data from a single record
def extract_record(item):
  
    try:
        atag = item.h2.a
        description = atag.text.strip()
        url = 'https://www.amazon.com' + atag.get('href')

    except AttributeError:
        description =''
        url = ''
    try:
        price_parent = item.find('span', 'a-price')
        price = price_parent.find('span', 'a-offscreen').text

    except AttributeError:
        return
    
    try:
        rating = item.i.text
        review_count = item.find('span', {'class': 'a-size-base', 'dir': 'auto'}).text

    except AttributeError:
        rating = ''
        review_count = ''
        
    result = (description, price, rating, review_count, url)
    
    return result
# Extract and return data from a single record
def main(search_term):
    
    options = Options()
    # open chrome with incognito option
    options.add_argument("--incognito")
    # by turning this on, code will run with chrome browser hidden
    #options.add_argument("--headless")

    # change chrome driver path accordingly
    driver = webdriver.Chrome("C:\\webdrivers\\chromedriver.exe", options=options)
    
    
    records = []
    url = get_url(search_term)
    
    for page in range(1, 11):
        driver.get(url.format(page))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        results = soup.find_all('div', {'data-component-type': 's-search-result'})
        for item in results:
            record = extract_record(item)
            if record:
                records.append(record)
    
    driver.close()
    
    # save data to csv file
    with open(search_term +'.csv', 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Description', 'Price', 'Rating', 'ReviewCount', 'Url'])
        writer.writerows(records)
# run program
search_terms = {'ultrawide curved monitor', 'gaming monitor curved'}
for search_term in search_terms:
    main(search_term)

This was originally from YouTube channel, Izzy Analytics. I have another post for product view data, which the same concept is used. Please visit Izzy Analytics channel for a original posting. https://www.youtube.com/channel/UCWHJGQc7Vqo37dlUmpiK6hQ.

Python – Web scraping AMAZON Product Review

This code returns a product review data from Amazon.com

This code returns csv file with review title, reviewer, review date, rating, product size, verified purchase, and review details. You will need to find Amazon Standard Identification Numbers (ASINs) by searching your products in amazon web site.

Finding ASINs in Amazon.com

Type a product name in search bar and click the product link looking for to open product page. ASIN numbers can be found in URL address bar. ASIN numbers usually comes after ‘/dp/’.

I have used ‘ultrawide curved monitor’ in this example and added 2 ASINs in the code. ASINs can be added as many as you need with comma separator (‘,’).

Python code for product review

# import libraries
from selenium import webdriver
import csv
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
# define url
def get_url(ASIN):
    
    template = 'https://www.amazon.com/product-reviews/{}/ref=cm_cr_getr_d_paging_btm_next_3?sortBy=recent'
    url = template.format(ASIN)
    url += '&pageNumber={}'
        
    return url
# Extract and return data from a single record
def extract_record(item):
    
    
    profile = item.find('span', 'a-profile-name').text.strip()
    rating = item.find('div','a-row').text.strip()[len(profile):len(profile)+3]
    title = item.find('a','a-size-base a-link-normal review-title a-color-base review-title-content a-text-bold').text.strip()
    
    try:
        product_size = item.find('a','a-size-mini a-link-normal a-color-secondary').text.strip()
        verified_purchase = item.find('span','a-size-mini a-color-state a-text-bold').text.strip()
    except AttributeError:
        product_size = ''
        verified_purchase = ''
    review = item.find('span','a-size-base review-text review-text-content').text.strip()    
    review_date = item.find('span','a-size-base a-color-secondary review-date').text.strip()
    date = review_date[review_date.index('on')+3:]

        
    result = (title, profile, date, rating,  product_size, verified_purchase, review)
    
    return result
# Run main program routine
def main(ASIN):
    
    # startup the webdriver
    options = Options()
    # open chrome in incognito mode
    options.add_argument("--incognito")

    # this option will run a code without opening chrome
    #options.add_argument("--headless")
    
    # my chrome driver location. change it accordingly
    driver = webdriver.Chrome("C:\\webdrivers\\chromedriver.exe", options=options)
    
    
    records = []
    url = get_url(ASIN)
    
    for page in range(1, 11):
        driver.get(url.format(page))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        results = soup.find_all('div', {'class': 'a-section review aok-relative'})
        for item in results:
            record = extract_record(item)
            if record:
                records.append(record)
   
   
    driver.close()
    
    # save data to csv file
    with open(ASIN +'.csv', 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Title', 'Profile', 'Date', 'Rating', 'ProductSize', 'Verified_Purchase', 'Review'])
        writer.writerows(records)
# Run program for entered ASINs. Multiple ASINs can be run with comma separator

ASINs = {'B07YGZ7C1K','B0812DKDD9'}
for ASIN in ASINs:
    main(ASIN)

The idea was from Amazon price tracking tutorial on YouTube channel, Izzy Analytics. Please visit Izzy Analytics channel for a original posting. https://www.youtube.com/channel/UCWHJGQc7Vqo37dlUmpiK6hQ.