Executable stand-alone Amazon Price Tracking tool using Python with tkinter library

creates stand-alone executable tool to scrap amazon price

Please send me your email address and public IP if you are interested in the tool. The customizations are also available (a different store, save the search result to a database, schedule to send a report as an email attachment, etc)

Requirement:

  1. Create ‘webscraptool.py’, and copy & paste ‘Main Code’ below (Assuming you have python installed in your machine)
  2. Download and save chromedriver under ./driver/ folder. Create ‘driver’ folder in the same folder that you have save ‘webscraptool.py’. (chrome driver download: https://chromedriver.chromium.org/downloads)
  3. To create stand-alone ‘webscraptool.exe’, install ‘pyinstaller’ (pip install pyinstaller)
  4. Run the code below in command line. This will create the stand-alone ‘.exe’ file.
 pyinstaller -F -w --add-binary "./driver/chromedriver.exe;./driver"

Main Code:

from tkinter import *
import tkinter.ttk as ttk
from tkinter import filedialog
import tkinter.messagebox as msgbox
import tkinter.font as font
import os
import time

from selenium import webdriver
import csv
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options

# web scrap main code block
def resource_path(relative_path):
    try:
        base_path = sys._MEIPASS
    except Exception:
        base_path = os.path.abspath(".")

    return os.path.join(base_path, relative_path)

def get_url(search_text):
    """Generate a url from search text"""
    template = 'https://www.amazon.com/s?k={}&ref=nb_sb_noss_1'
    search_term = search_text.replace(' ', '+')
    
    # add term query to url
    url = template.format(search_term)
    
    # add page query placeholder
    url += '&page{}'
        
    return url

def extract_record(item):
    """Extract and return data from a single record"""
    
    # description and url
    try:
        atag = item.h2.a
        description = atag.text.strip()
        url = 'https://www.amazon.com' + atag.get('href')
    except AttributeError:
        description =''
        url = ''
    try:
        # product price
        price_parent = item.find('span', 'a-price')
        price = price_parent.find('span', 'a-offscreen').text
    except AttributeError:
        return
    
    try:
        # rating and review count
        rating = item.i.text
        review_count = item.find('span', {'class': 'a-size-base', 'dir': 'auto'}).text
    except AttributeError:
        rating = ''
        review_count = ''
        
    result = (description, price, rating, review_count, url)
    
    return result

def main(search_term):
    """Run main program routine"""
    options = Options()
    # startup the webdriver
    options.add_argument("--incognito")
    options.add_argument("--headless")
    driver = webdriver.Chrome(resource_path("./driver/chromedriver.exe"), options=options)
    
    
    records = []
    url = get_url(search_term)
    
    for page in range(1, 11):
        driver.get(url.format(page))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        results = soup.find_all('div', {'data-component-type': 's-search-result'})
        for item in results:
            record = extract_record(item)
            if record:
                records.append(record)
    
    driver.close()
    
    # save data to csv file
    with open(search_term +'.csv', 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Description', 'Price', 'Rating', 'ReviewCount', 'Url'])
        writer.writerows(records)

# GUI tkinter codes begin

root = Tk()
root.title("Web Scrapper")
#root.geometry("640x480")
myFont=font.Font(size=12, weight="bold")
label=Label(root, text="Amazon Price Tracker")
label['font']=myFont
label.pack(side="top", padx=5, pady=5, ipady=3)

# search text frame

frm_search = LabelFrame(root, text="Search")
frm_search.pack(fill="x", padx=5, pady=5, ipady=3)

e = Entry(frm_search, width=30)
e.pack(fill="x", padx=5, pady=5, ipady=5)
e.insert(0, "Enter products to search")

def add_txt():
    search_list.insert(END,e.get())

def remove_txt():
    for index in reversed(search_list.curselection()):
        search_list.delete(index)

x = datetime.datetime(2021, 1, 15)
y = datetime.datetime.now()

def start():
    if search_list.size() == 0:
        msgbox.showwarning("Warning", "Add products to search")
        return

    search_terms = search_list.get(0, END)

    for idx, search_term in enumerate(search_terms):

        main(search_term)
       
        progress = (idx + 1) / len(search_terms) * 100
        p_var.set(progress)
        progressbar.update()

# add/remove search text frame
frm_search1 = LabelFrame(root, text="Add/Remove Products")
frm_search1.pack(fill="x", padx=5, pady=5, ipady=3)

btn_remove = Button(frm_search1, text="Remove", padx=5, pady=5, width=12, command=remove_txt)
btn_add = Button(frm_search1, text="Add", padx=5, pady=5, width=12, command=add_txt)

btn_remove.pack(side="right", padx=5, pady=5)
btn_add.pack(side="right", padx=5, pady=5)

#list frame
frm_list = Frame(root)
frm_list.pack(fill="both", padx=5, pady=5)


scrollbar = Scrollbar(frm_list)
scrollbar.pack(side="right", fill="y")

search_list = Listbox(frm_list, selectmode="extended", height=10, yscrollcommand=scrollbar.set)
search_list.pack(side="left", fill="both", expand=True)
scrollbar.config(command=search_list.yview)

# progress bar frame
frm_progress = LabelFrame(root, text="Progress")
frm_progress.pack(fill="x", padx=5, pady=5, ipady=3)

p_var = DoubleVar()
progressbar = ttk.Progressbar(frm_progress, maximum=100, length=150, variable=p_var)
progressbar.pack(fill="x", padx=5, pady=5)



# run/exit button frame
frm_run = LabelFrame(root)
frm_run.pack(fill="x", padx=5, pady=5)

btn_exit = Button(frm_run, text="Exit", padx=5, pady=5, width=12, command=root.quit)
btn_start = Button(frm_run, text="Start", padx=5, pady=5, width=12, command=start)

btn_exit.pack(side="right", padx=5, pady=5)
btn_start.pack(side="right", padx=5, pady=5)

root.mainloop()