Product search using keywords. Returns title, Price, Shipping, Sold by.
# %%
import pandas as pd
import requests
from requests_html import HTMLSession
from bs4 import BeautifulSoup as soup
# import csv
# %%
def get_url(search_text):
"""Generate a url from search text"""
template = 'https://www.walmart.com/search?q={}'
search_term = search_text.replace(' ', '+')
# add term query to url
url = template.format(search_term)
# add page query placeholder
url += '&page={}'
return url
# %%
def extract_record(item):
try:
title = prd_bsobj.find('h1', {'class':'f3 b lh-copy dark-gray mt1 mb2'}).text.strip()
price = prd_bsobj.find('span', {'class':'b lh-copy dark-gray mr2 f1'}).text.strip()
shipping = prd_bsobj.find('div', {'class':'f6 bw0-xl b--near-white ml3 mr3 mid-gray ml0-m mr0-m'}).text.strip()
sold_by = prd_bsobj.find('div', {'class':'f6 ml0-xl mid-gray mt3 bw0-xl b--near-white mb3'}).text.strip()
except AttributeError:
title = ''
price = ''
shipping = ''
sold_by = ''
results = (title, price, shipping, sold_by)
return results
# %%
def main(search_terms):
global prd_bsobj
records = []
for search_term in search_terms:
url = get_url(search_term)
for page in range (1,2):
url = url.format(page)
headers = {
'authority': 'www.walmart.com',
'accept': 'application/json',
'wm_client_ip': '',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
'content-type': 'application/json',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': url,
'accept-language': 'en-US,en;q=0.9,ko-KR;q=0.8,ko;q=0.7',
'cookie': 'vtc=RIO4sLvDbO19PMfnQ0IOG8; __gads=ID=b67a6d7a2e6e92af:T=1586194404:S=ALNI_MY3JbhHhJIK5PnOcfX_-QmCh5qcbg; s_vi=[CS]v1|2F45B3F28515E699-600008970EB85509[CE]; _fbp=fb.1.1586194406343.1767263749; cid_csid=30952ecb-4390-4a8d-abd5-aef0eeaaf2b0; _ga=GA1.2.1917989675.1586368563; _uuid=RIO4sLvDbO19PMfnQ0IOG8; __hstc=195562739.673210eb139f877dada9e614c52c183a.1591968672889.1591968672889.1591968672889.1; hubspotutk=673210eb139f877dada9e614c52c183a; s_pers=%20s_cmpstack%3D%255B%255B%2527seo_un%2527%252C%25271589979941851%2527%255D%252C%255B%2527sem_un%2527%252C%25271589980000838%2527%255D%252C%255B%2527seo_un%2527%252C%25271591822547739%2527%255D%255D%7C1749588947739%3B%20om_mv3d%3Dseo_un%253A%7C1592081747798%3B%20om_mv7d%3Dseo_un%253A%7C1592427347800%3B%20om_mv14d%3Dseo_un%253A%7C1593032147802%3B%20om_mv30d%3Dseo_un%253A%7C1594414547804%3B%20s_fid%3D4E3B7A675BB3EAC3-083A78CCEDE19F2A%7C1656088244292%3B%20s_v%3DY%7C1593018044296%3B%20gpv_p11%3D%255BPatio%2520%2526%2520Garden%255D%2520Product%2520Page%7C1593018044314%3B%20gpv_p44%3DProduct%7C1593018044323%3B%20s_vs%3D1%7C1593018044327%3B; s_pers_2=%2Bs_cmpstack%3D%255B%255B%2527seo_un%2527%252C%25271589979941851%2527%255D%252C%255B%2527sem_un%2527%252C%25271589980000838%2527%255D%252C%255B%2527seo_un%2527%252C%25271591822547739%2527%255D%255D%7C1749588947739%3B%2Bom_mv3d%3Dseo_un%253A%7C1592081747798%3B%2Bom_mv7d%3Dseo_un%253A%7C1592427347800%3B%2Bom_mv14d%3Dseo_un%253A%7C1593032147802%3B%2Bom_mv30d%3Dseo_un%253A%7C1594414547804%3B%2Bs_fid%3D4E3B7A675BB3EAC3-083A78CCEDE19F2A%7C1656088244292%3B%2Bs_v%3DY%7C1593018044296%3B%2Bgpv_p11%3D%255BPatio%2520%2526%2520Garden%255D%2520Product%2520Page%7C1593018044314%3B%2Bgpv_p44%3DProduct%7C1593018044323%3B%2Bs_vs%3D1%7C1593018044327%3BuseVTC%3DN%7C1656814517%3Bps%3D1; _pxvid=9146d56c-fe8f-11ea-a919-0242ac120006; _gcl_au=1.1.2036042732.1602687602; DL=28227%2C%2C%2Cip%2C28227%2C%2C; ACID=0610f36c-ea13-4d72-b95a-4ae8b4dec586; hasACID=1; auth=MTAyOTYyMDE4yUWtbBAuOTnO%2FHY8R%2FRo0Cgt5A0Ax8pPHF0z4A09FRj%2B5CLRu8XVV8J6%2FE4yp81Jl%2F79TACnElSSnMnalfN3EB9HVYAdzZS42qdAp59yDkmHyTp7%2BhQVV6loriuoOtnX767wuZloTfhm7Wk2Kcjygv699%2F6tFVwuL3qJB39WKV9mqFkMKTOoBynFLIZn6c9WpYJvDm413TzZhALP43qhIhemjR0urqaZhzDta95%2BdQQUMk70P8glgOEpLOprhDfMb%2FEw67%2FGsLtdlJHpe1JgEGea43fXiMf7OZIBkWJeaTG6haCvIJ1uWAX0ZXnMDgERo8He18TaVIrSqn0SphYvMc8d3IEUwKFaaB2Rw%2Bdn77H7obzsSqgxk5I0sIxOzFnbjLZbvHUVU4F72UT5W5nZ4w%3D%3D; type=GUEST; cart-item-count=1; TBV=7; tb_sw_supported=true; location-data=28227%3ACharlotte%3ANC%3A%3A1%3A1|1el%3B%3B2.88%2C378%3B%3B4.28%2C3wn%3B%3B5.3%2C1aa%3B%3B6.97%2C2us%3B%3B7.18%2C14c%3B%3B9.33%2C1na%3B%3B9.73%2C4t5%3B%3B10.96%2C4fn%3B%3B12.28%2C52y%3B%3B12.73||7|1|1yju%3B16%3B7%3B10.23%2C1ylq%3B16%3B8%3B10.68%2C1ylp%3B16%3B10%3B11.14%2C1yjk%3B16%3B12%3B12.66%2C1y43%3B16%3B14%3B14.23; bstc=TNiFgUrdGKMzY7B73wr2cY; mobileweb=0; xpa=; xpm=3%2B1609169740%2BRIO4sLvDbO19PMfnQ0IOG8~%2B0; TS013ed49a=01538efd7cc909426db3ee1cda51c89fbb23590ed891435a3f1a23a9195798900188a33c460f91890bd3ca42adfc2924ad03a9b5bc; viq=Walmart; com.wm.reflector="reflectorid:0000000000000000000000@lastupd:1609171176679@firstcreate:1604945236885"; next-day=1609279200|true|false|1609329600|1609171176; TS01b0be75=01538efd7c302b7cf5de1d553fb4c3d86d29879f281822cdfbfd5b8dc0762c128bee6087fe7bcf86ce5011d1710f60e58bf44d48b1; akavpau_p8=1609171777~id=7a59e743f34e18f916d5384c77606a80; s_sess_2=ps%3D1; _uetsid=587b01e0492211eb94e9452426ed1340; _uetvid=629a2400415d11eba2b3cd20fdb13715; _px3=a3558f990c4314a29b8dd937b21430c99f473995d93598f316101442b307a5cd:01YsOg8RtkFWlzM7DUoGOsfO4g3ieqhHOxxf2j1MkrKJaAKUApLzUUS5+1UgpH/OFsoc8LlklO9iMPtO8ip2FA==:1000:VpD9BMtNFbA3uv9rxVpFZ47DexPJdDvY1Qzic1/ATzwTZY6HlGUHw7ID6NqVN0kg27ZnxbwpwBd39bC4n361Nsd5moeq/9F956o94pGC4UaYqxwZTpq+9XgcTrkEUnWAfD0Y9j9m7wMcn2Q9hDSbwBGlHOtthIiuBYUkiDQJsiQ=; _pxde=69b8f58603b6a72c3b3a4fa9b25fb05242dc800f7fd58b8f9df98354e21ae51e:eyJ0aW1lc3RhbXAiOjE2MDkxNzEyMDAzOTIsImZfa2IiOjAsImlwY19pZCI6W119; DL=28227%2C%2C%2Cip%2C28227%2C%2C; vtc=RIO4sLvDbO19PMfnQ0IOG8; bstc=TNiFgUrdGKMzY7B73wr2cY; mobileweb=0; xpa=; xpm=3%2B1609169740%2BRIO4sLvDbO19PMfnQ0IOG8~%2B0; TS013ed49a=01538efd7cc909426db3ee1cda51c89fbb23590ed891435a3f1a23a9195798900188a33c460f91890bd3ca42adfc2924ad03a9b5bc; TS01b0be75=01538efd7cf6007e7f521e5daf07d797a59dacfed56418ea5b2cf0e59f2f19a541e169c45314983b96ed9226ae71182d3baac38f9b; akavpau_p8=1609171833~id=cf8d6690cff3c708d9584550a97ac6ec'
}
r = requests.request("GET", url, headers=headers)
bsobj = soup(r.content,'html.parser')
atags = bsobj.findAll('a',{'class':'absolute w-100 h-100 z-1'})
for atag in atags:
if atag.get('href')[:1]=='/':
p_link = 'https://www.walmart.com' + atag.get('href')
else:
p_link = atag.get('href')
headers = {
'authority': 'www.walmart.com',
'accept': 'application/json',
'wm_client_ip': '',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
'content-type': 'application/json',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': p_link,
'accept-language': 'en-US,en;q=0.9,ko-KR;q=0.8,ko;q=0.7',
'cookie': 'vtc=RIO4sLvDbO19PMfnQ0IOG8; __gads=ID=b67a6d7a2e6e92af:T=1586194404:S=ALNI_MY3JbhHhJIK5PnOcfX_-QmCh5qcbg; s_vi=[CS]v1|2F45B3F28515E699-600008970EB85509[CE]; _fbp=fb.1.1586194406343.1767263749; cid_csid=30952ecb-4390-4a8d-abd5-aef0eeaaf2b0; _ga=GA1.2.1917989675.1586368563; _uuid=RIO4sLvDbO19PMfnQ0IOG8; __hstc=195562739.673210eb139f877dada9e614c52c183a.1591968672889.1591968672889.1591968672889.1; hubspotutk=673210eb139f877dada9e614c52c183a; s_pers=%20s_cmpstack%3D%255B%255B%2527seo_un%2527%252C%25271589979941851%2527%255D%252C%255B%2527sem_un%2527%252C%25271589980000838%2527%255D%252C%255B%2527seo_un%2527%252C%25271591822547739%2527%255D%255D%7C1749588947739%3B%20om_mv3d%3Dseo_un%253A%7C1592081747798%3B%20om_mv7d%3Dseo_un%253A%7C1592427347800%3B%20om_mv14d%3Dseo_un%253A%7C1593032147802%3B%20om_mv30d%3Dseo_un%253A%7C1594414547804%3B%20s_fid%3D4E3B7A675BB3EAC3-083A78CCEDE19F2A%7C1656088244292%3B%20s_v%3DY%7C1593018044296%3B%20gpv_p11%3D%255BPatio%2520%2526%2520Garden%255D%2520Product%2520Page%7C1593018044314%3B%20gpv_p44%3DProduct%7C1593018044323%3B%20s_vs%3D1%7C1593018044327%3B; s_pers_2=%2Bs_cmpstack%3D%255B%255B%2527seo_un%2527%252C%25271589979941851%2527%255D%252C%255B%2527sem_un%2527%252C%25271589980000838%2527%255D%252C%255B%2527seo_un%2527%252C%25271591822547739%2527%255D%255D%7C1749588947739%3B%2Bom_mv3d%3Dseo_un%253A%7C1592081747798%3B%2Bom_mv7d%3Dseo_un%253A%7C1592427347800%3B%2Bom_mv14d%3Dseo_un%253A%7C1593032147802%3B%2Bom_mv30d%3Dseo_un%253A%7C1594414547804%3B%2Bs_fid%3D4E3B7A675BB3EAC3-083A78CCEDE19F2A%7C1656088244292%3B%2Bs_v%3DY%7C1593018044296%3B%2Bgpv_p11%3D%255BPatio%2520%2526%2520Garden%255D%2520Product%2520Page%7C1593018044314%3B%2Bgpv_p44%3DProduct%7C1593018044323%3B%2Bs_vs%3D1%7C1593018044327%3BuseVTC%3DN%7C1656814517%3Bps%3D1; _pxvid=9146d56c-fe8f-11ea-a919-0242ac120006; _gcl_au=1.1.2036042732.1602687602; DL=28227%2C%2C%2Cip%2C28227%2C%2C; ACID=0610f36c-ea13-4d72-b95a-4ae8b4dec586; hasACID=1; auth=MTAyOTYyMDE4yUWtbBAuOTnO%2FHY8R%2FRo0Cgt5A0Ax8pPHF0z4A09FRj%2B5CLRu8XVV8J6%2FE4yp81Jl%2F79TACnElSSnMnalfN3EB9HVYAdzZS42qdAp59yDkmHyTp7%2BhQVV6loriuoOtnX767wuZloTfhm7Wk2Kcjygv699%2F6tFVwuL3qJB39WKV9mqFkMKTOoBynFLIZn6c9WpYJvDm413TzZhALP43qhIhemjR0urqaZhzDta95%2BdQQUMk70P8glgOEpLOprhDfMb%2FEw67%2FGsLtdlJHpe1JgEGea43fXiMf7OZIBkWJeaTG6haCvIJ1uWAX0ZXnMDgERo8He18TaVIrSqn0SphYvMc8d3IEUwKFaaB2Rw%2Bdn77H7obzsSqgxk5I0sIxOzFnbjLZbvHUVU4F72UT5W5nZ4w%3D%3D; type=GUEST; cart-item-count=1; TBV=7; tb_sw_supported=true; location-data=28227%3ACharlotte%3ANC%3A%3A1%3A1|1el%3B%3B2.88%2C378%3B%3B4.28%2C3wn%3B%3B5.3%2C1aa%3B%3B6.97%2C2us%3B%3B7.18%2C14c%3B%3B9.33%2C1na%3B%3B9.73%2C4t5%3B%3B10.96%2C4fn%3B%3B12.28%2C52y%3B%3B12.73||7|1|1yju%3B16%3B7%3B10.23%2C1ylq%3B16%3B8%3B10.68%2C1ylp%3B16%3B10%3B11.14%2C1yjk%3B16%3B12%3B12.66%2C1y43%3B16%3B14%3B14.23; bstc=TNiFgUrdGKMzY7B73wr2cY; mobileweb=0; xpa=; xpm=3%2B1609169740%2BRIO4sLvDbO19PMfnQ0IOG8~%2B0; TS013ed49a=01538efd7cc909426db3ee1cda51c89fbb23590ed891435a3f1a23a9195798900188a33c460f91890bd3ca42adfc2924ad03a9b5bc; viq=Walmart; com.wm.reflector="reflectorid:0000000000000000000000@lastupd:1609171176679@firstcreate:1604945236885"; next-day=1609279200|true|false|1609329600|1609171176; TS01b0be75=01538efd7c302b7cf5de1d553fb4c3d86d29879f281822cdfbfd5b8dc0762c128bee6087fe7bcf86ce5011d1710f60e58bf44d48b1; akavpau_p8=1609171777~id=7a59e743f34e18f916d5384c77606a80; s_sess_2=ps%3D1; _uetsid=587b01e0492211eb94e9452426ed1340; _uetvid=629a2400415d11eba2b3cd20fdb13715; _px3=a3558f990c4314a29b8dd937b21430c99f473995d93598f316101442b307a5cd:01YsOg8RtkFWlzM7DUoGOsfO4g3ieqhHOxxf2j1MkrKJaAKUApLzUUS5+1UgpH/OFsoc8LlklO9iMPtO8ip2FA==:1000:VpD9BMtNFbA3uv9rxVpFZ47DexPJdDvY1Qzic1/ATzwTZY6HlGUHw7ID6NqVN0kg27ZnxbwpwBd39bC4n361Nsd5moeq/9F956o94pGC4UaYqxwZTpq+9XgcTrkEUnWAfD0Y9j9m7wMcn2Q9hDSbwBGlHOtthIiuBYUkiDQJsiQ=; _pxde=69b8f58603b6a72c3b3a4fa9b25fb05242dc800f7fd58b8f9df98354e21ae51e:eyJ0aW1lc3RhbXAiOjE2MDkxNzEyMDAzOTIsImZfa2IiOjAsImlwY19pZCI6W119; DL=28227%2C%2C%2Cip%2C28227%2C%2C; vtc=RIO4sLvDbO19PMfnQ0IOG8; bstc=TNiFgUrdGKMzY7B73wr2cY; mobileweb=0; xpa=; xpm=3%2B1609169740%2BRIO4sLvDbO19PMfnQ0IOG8~%2B0; TS013ed49a=01538efd7cc909426db3ee1cda51c89fbb23590ed891435a3f1a23a9195798900188a33c460f91890bd3ca42adfc2924ad03a9b5bc; TS01b0be75=01538efd7cf6007e7f521e5daf07d797a59dacfed56418ea5b2cf0e59f2f19a541e169c45314983b96ed9226ae71182d3baac38f9b; akavpau_p8=1609171833~id=cf8d6690cff3c708d9584550a97ac6ec'
}
session = HTMLSession()
r = session.get(p_link,headers=headers)
prd_bsobj = soup(r.content, 'html.parser')
for item in prd_bsobj:
record = extract_record(item)
if record:
records.append(record)
df = pd.DataFrame (records, columns = ['Title', 'Price', 'Shipping', 'Sold_by'])
# with open(search_term +'.csv', 'w', newline='', encoding='utf-8') as f:
# writer = csv.writer(f)
# writer.writerow(['Search', 'Title', 'Price', 'Shipping', 'Sold_by'])
# writer.writerows(records)
df.to_csv('result.csv')
# %%
search_terms = {'LG OLED TV', 'Samsung QLED TV'}
# using CSV file
# file = 'search.csv'
# search_terms = pd.read_csv(file)
main(search_terms)
# %%