import requests # 网络请求类库 第三方库
from lxml import etree # 数据处理类库
from lxml import html
url = ' 星秀直播_真人秀视频直播_美女热舞直播_虎牙直播 https://www.huya.com/g/1663#cate-1-116'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 Edg/92.0.902.73'}
response = requests.get(url=url, headers=headers)
data = etree.html(response.request)
girls = data.xpath('//img[@class="pic"]') # 拿到所以class=pic的标签,列表
print(girls) # 0x 内存地址 十六进制
data = etree.html(response.request)
改成
data = etree.HTML(response.text)
你题目的解答代码如下:(如有帮助,望采纳!谢谢! 点击我这个回答右上方的【采纳】按钮)
import requests # 网络请求类库 第三方库
from lxml import etree # 数据处理类库
from lxml import html
url = 'https://www.huya.com/g/1663#cate-1-116'
# 准备请求头 http协议,遵守反爬机制
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 Edg/92.0.902.73'}
# 发起请求获取数据
response = requests.get(url=url, headers=headers)
print(response.text)
# 数据的类型转换 data--整个网页的源代码
data = etree.HTML(response.text)
# 根据规律去拿指定的数据
girls = data.xpath('//img[@class="pic"]') # 拿到所以class=pic的标签,列表
print(girls) # 0x 内存地址 十六进制
模仿这个试试
tree = etree.HTML(page_text) # 创建一个etree实例对象
11 li_list = tree.xpath('//ul[@class="house-list-wrap"]/li')
12 fp = open('58.csv','w',encoding='utf-8')
13 for li in li_list:
14 title = li.xpath('./div[2]/h2/a/text()')[0]
15 price = li.xpath('./div[3]//text()')
16 price = ''.join(price)
17 fp.write(title+":"+price+'\n')
用错了对象
from lxml import html
导包这里报错的 你引入etree 后 这句去掉 直接使用 etree.HTML()这样用的