import requests
from lxml import etree
class Baidu_product:
def init(self):
self.url = 'https://www.baidu.com/more/'
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36'}
def find_product(self):
res = requests.post(self.url,headers=self.headers)
html = etree.HTML(res.content.decode())
title1_lists = html.xpath('//div[@id="content"]/div/div[2]/a/text()')
title2_lists = html.xpath('//div[@id="content"]/div/div[2]/span/text()')
list1 = []
for i,j in zip(title1_lists,title2_lists):
dict1 = {}
dict1['title'] = i
dict1['content'] = j
list1.append(dict1)
print(dict1)
print(list1)
if name == 'main':
baidu=Baidu_product()
baidu.find_product()