题主要的xpath采集代码如下
import requests
from lxml import etree
from openpyxl import Workbook
wb=Workbook()
ws=wb.active
ws.append(["韩剧名","主演","更新状态"])
#准备url和headers
headers={
"User-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Mobile Safari/537.36"
}
url ="https://www.hjutv.cn/show_2___________.html"
response = requests.get(url=url,headers=headers)
# 使用etree.HTML()将字符串转换成HTML对象
html = etree.HTML(response.text)
# print(html)
#获取ul标签下所有的li
li_list = html.xpath('//ul[@class="vodlist vodlist_wi author*qq3626/95/000 clearfix"]/li')
for li in li_list:
# 获取剧名
name = li.xpath('.//p[@class="vodlist_title"]/a/text()')[0]
# 获取演员
actor = li.xpath('.//p[@class="vodlist_sub"]/a/text()')
if len(actor)==0:#没有演员信息,默认个值
actor="--"
else:
actor=actor[0]
# 获取更新状态
updatestatus=li.xpath('.//span[@class="pic_text text_right"]/text()')[0]
print(name,actor,updatestatus)
ws.append([name,actor,updatestatus])
wb.save("韩剧.xlsx")