from lxml import etree
import requests
url = 'https://www.bilibili.com/video/BV1oW411r7fM/?spm_id_from=333.788.recommend_more_video.1'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/80.0.3987.87Safari/537.36 SE 2.X MetaSr 1.0 '
}
response = requests.get(url=url,headers=headers).text
tree = etree.HTML(response)
li_list = tree.xpath('//*[@id="multi_page"]/div[2]/ul/li')
print(li_list)
for li in li_list:
title = li.xpath('./a/div/div[1]/span[2]')
print(title)
我觉得是后面的XPath语法要换种写,我试了后面的改成.//span[@class="part"],但是结果还是为空。
浏览器的XPath helper生成的XPath有时候用到Python中会没用。
我现在几乎已经放弃lxml的etree,老是返回空值,我现在用正则比较多