python爬虫xpath
import requests
from lxml import etree
url = "https://movie.douban.com/chart"
head = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0"}
response = requests.get(url,headers=head).text
tree = etree.HtML(response)
li_list = tree.xpath('//div[@class="pl2"]/a')
fp=open('a.txt','w',encoding='utf-8')
#li遍历li_list
for li in li_list:
print(li+'\n')
#把文件写入文件
fp.write(li+'\n')
fp.close()
Traceback (most recent call last):
File "C:/Users/HY/PycharmProjects/pachong/main.py", line 8, in <module>
tree = etree.HtML(response)
AttributeError: module 'lxml.etree' has no attribute 'HtML'
from lxml import html
selector = html.etree.HTML(text)
etree没有html部分,有点懵逼
你HtML 中T写成小写了, HTML应该是全大写
另外 li 是 Element元素不能与字符串拼接. 需要用li.text获取元素中的文本
print(li+'\n')
fp.write(li+'\n')
应该改成
print(li.text+'\n')
fp.write(li.text+'\n')
你题目的解答代码如下:
import requests
from lxml import etree
url = "https://movie.douban.com/chart"
head = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0"}
response = requests.get(url,headers=head).text
tree = etree.HTML(response) #HTML应该是全大写
li_list = tree.xpath('//div[@class="pl2"]/a')
fp=open('a.txt','w',encoding='utf-8')
#li遍历li_list
for li in li_list:
print(li.text+'\n') #li是 Element元素不能与字符串拼接. 需要用li.text获取元素中的文本
#把文件写入文件
fp.write(li.text+'\n') #li是 Element元素不能与字符串拼接. 需要用li.text获取元素中的文本
fp.close()
如有帮助,请点击我的回答下方的【采纳该答案】按钮帮忙采纳下,谢谢!