Created for
urllib.request
module
def get_html(url):
if url.startswith("http"):
# make the request - change UA to prevent server deny for scripts
req = request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
# get response
respone = request.urlopen(req)
return respone.read()
else:
# for tests and debugs
with open(url,"r") as f:
return f.read()
pipenv install beautifulsoup4
def scrape_data(page):
bs_parser = BeautifulSoup(page, 'html.parser')
products_html = bs_parser.find('ul', 'products')
for product in products_html.find_all("article"):
try:
name = product.select("div h2")[0].string
except:
name = "NoName"
try:
price = product.select("div tspan")[0].string
except:
price = None
products.append((name,int(price)))
You can play with the code on laptopbg-scraper-with-BS4
These slides are based on
customised version of
framework