from lxml import etree
import requests
url = 'https://resource.emagecompany.com/500/caifushijie500qianggongsi2021.html'
response = requests.get(url=url,headers=headers).text
tree = etree.HTML(response)
li_list = tree.xpath('//*[@style="WIDTH: 342pt; BORDER-COLLAPSE: collapse"]/tbody/tr')
li_list = li_list[1:]
list = []
for i in li_list:
rank = i.xpath('./td[1]/font/text()')[-1].encode('ISO-8859-1').decode('gbk')
name = i.xpath('./td[2]/font/text()')[-1].encode('ISO-8859-1').decode('gbk')
print(name)
我用utf-8会全部乱码
就我红色的地方弄一下编码,后面的不用弄编码。我截图这个代码可以正常运行