Please send me your email address and public IP if you are interested in the tool. The customizations are also available (a different store, save the search result to a database, schedule to send a report as an email attachment, etc)

Requirement:
- Create ‘webscraptool.py’, and copy & paste ‘Main Code’ below (Assuming you have python installed in your machine)
- Download and save chromedriver under ./driver/ folder. Create ‘driver’ folder in the same folder that you have save ‘webscraptool.py’. (chrome driver download: https://chromedriver.chromium.org/downloads)
- To create stand-alone ‘webscraptool.exe’, install ‘pyinstaller’ (pip install pyinstaller)
- Run the code below in command line. This will create the stand-alone ‘.exe’ file.
pyinstaller -F -w --add-binary "./driver/chromedriver.exe;./driver"
Main Code:
from tkinter import *
import tkinter.ttk as ttk
from tkinter import filedialog
import tkinter.messagebox as msgbox
import tkinter.font as font
import os
import time
from selenium import webdriver
import csv
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
# web scrap main code block
def resource_path(relative_path):
try:
base_path = sys._MEIPASS
except Exception:
base_path = os.path.abspath(".")
return os.path.join(base_path, relative_path)
def get_url(search_text):
"""Generate a url from search text"""
template = 'https://www.amazon.com/s?k={}&ref=nb_sb_noss_1'
search_term = search_text.replace(' ', '+')
# add term query to url
url = template.format(search_term)
# add page query placeholder
url += '&page{}'
return url
def extract_record(item):
"""Extract and return data from a single record"""
# description and url
try:
atag = item.h2.a
description = atag.text.strip()
url = 'https://www.amazon.com' + atag.get('href')
except AttributeError:
description =''
url = ''
try:
# product price
price_parent = item.find('span', 'a-price')
price = price_parent.find('span', 'a-offscreen').text
except AttributeError:
return
try:
# rating and review count
rating = item.i.text
review_count = item.find('span', {'class': 'a-size-base', 'dir': 'auto'}).text
except AttributeError:
rating = ''
review_count = ''
result = (description, price, rating, review_count, url)
return result
def main(search_term):
"""Run main program routine"""
options = Options()
# startup the webdriver
options.add_argument("--incognito")
options.add_argument("--headless")
driver = webdriver.Chrome(resource_path("./driver/chromedriver.exe"), options=options)
records = []
url = get_url(search_term)
for page in range(1, 11):
driver.get(url.format(page))
soup = BeautifulSoup(driver.page_source, 'html.parser')
results = soup.find_all('div', {'data-component-type': 's-search-result'})
for item in results:
record = extract_record(item)
if record:
records.append(record)
driver.close()
# save data to csv file
with open(search_term +'.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['Description', 'Price', 'Rating', 'ReviewCount', 'Url'])
writer.writerows(records)
# GUI tkinter codes begin
root = Tk()
root.title("Web Scrapper")
#root.geometry("640x480")
myFont=font.Font(size=12, weight="bold")
label=Label(root, text="Amazon Price Tracker")
label['font']=myFont
label.pack(side="top", padx=5, pady=5, ipady=3)
# search text frame
frm_search = LabelFrame(root, text="Search")
frm_search.pack(fill="x", padx=5, pady=5, ipady=3)
e = Entry(frm_search, width=30)
e.pack(fill="x", padx=5, pady=5, ipady=5)
e.insert(0, "Enter products to search")
def add_txt():
search_list.insert(END,e.get())
def remove_txt():
for index in reversed(search_list.curselection()):
search_list.delete(index)
x = datetime.datetime(2021, 1, 15)
y = datetime.datetime.now()
def start():
if search_list.size() == 0:
msgbox.showwarning("Warning", "Add products to search")
return
search_terms = search_list.get(0, END)
for idx, search_term in enumerate(search_terms):
main(search_term)
progress = (idx + 1) / len(search_terms) * 100
p_var.set(progress)
progressbar.update()
# add/remove search text frame
frm_search1 = LabelFrame(root, text="Add/Remove Products")
frm_search1.pack(fill="x", padx=5, pady=5, ipady=3)
btn_remove = Button(frm_search1, text="Remove", padx=5, pady=5, width=12, command=remove_txt)
btn_add = Button(frm_search1, text="Add", padx=5, pady=5, width=12, command=add_txt)
btn_remove.pack(side="right", padx=5, pady=5)
btn_add.pack(side="right", padx=5, pady=5)
#list frame
frm_list = Frame(root)
frm_list.pack(fill="both", padx=5, pady=5)
scrollbar = Scrollbar(frm_list)
scrollbar.pack(side="right", fill="y")
search_list = Listbox(frm_list, selectmode="extended", height=10, yscrollcommand=scrollbar.set)
search_list.pack(side="left", fill="both", expand=True)
scrollbar.config(command=search_list.yview)
# progress bar frame
frm_progress = LabelFrame(root, text="Progress")
frm_progress.pack(fill="x", padx=5, pady=5, ipady=3)
p_var = DoubleVar()
progressbar = ttk.Progressbar(frm_progress, maximum=100, length=150, variable=p_var)
progressbar.pack(fill="x", padx=5, pady=5)
# run/exit button frame
frm_run = LabelFrame(root)
frm_run.pack(fill="x", padx=5, pady=5)
btn_exit = Button(frm_run, text="Exit", padx=5, pady=5, width=12, command=root.quit)
btn_start = Button(frm_run, text="Start", padx=5, pady=5, width=12, command=start)
btn_exit.pack(side="right", padx=5, pady=5)
btn_start.pack(side="right", padx=5, pady=5)
root.mainloop()