Python爬取微博热搜xpath一直返回空列表,检查好久还是找不出哪有问题啊

import requests #发送网络请求
from lxml import html #解析html格式数据
etree = html.etree
import time #获取时间

if name == 'main':
#获取当前的时间
print("当前的时间是:",time.strftime("%Y-%m-%d %H:%M:%S"))

# 1.确认目标url
url_ = 'https://weibo.com/hot/search'

# 2.手动构造请求头的参数
hearders_ = {
    'Cookie':'SINAGLOBAL=7394561624515.408.1650383019168; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9Whh4UK3Y2bSVWUYiQ0BSRn05JpX5KMhUgL.FoMcShnNSKeRSo-2dJLoIERLxKBLBo.L12eLxK.LBonLBKqLxK-L1hqL1K.LxK-LB-BL1KMLxK-L12qLBoW4; ALF=1681965077; SSOLoginState=1650429079; SCF=AiQ3jJjvvDcpmNHTjbx_0vlYymb4QJtOhoejprurcc_9TG2KyCCu7fY-LfW1TPuS6-qst2lD-Sc8oauBHVVebf0.; SUB=_2A25PW_zIDeRhGeFI71oW9S3EzTmIHXVsEWkArDV8PUNbmtB-LWzAkW9NfRr0-SjAY-1IOxYiP0NIcUdiq9mFbg82; XSRF-TOKEN=wOJ_Zbh1m9gozqGXOP4wdXiB; _s_tentry=-; Apache=2782197487262.352.1650447122781; ULV=1650447122921:2:2:2:2782197487262.352.1650447122781:1650383019177; PC_TOKEN=3b552ce365; UOR=,,www.baidu.com; webim_unReadCount=%7B%22time%22%3A1650451944807%2C%22dm_pub_total%22%3A1%2C%22chat_group_client%22%3A0%2C%22chat_group_notice%22%3A0%2C%22allcountNum%22%3A42%2C%22msgbox%22%3A0%7D; WBPSESS=DkxxJBv_KS1i8y_VfaadZn2I1uJuACLGKBcqA-U4Zbi6BC0_Phzuau44mW6T8JSRVh16FuPvZXFTGbbzP1XpX3XxZulgg5L-gyYwlVBbltKuuJhg1enU0rY5tzToafsdg8uZ7hh9zOtFQq7-zCbopA==',
    'Referer':'https://d.weibo.com/',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36'
}

# 3.发送请求,获取响应对象
response_ = requests.get(url_, headers=hearders_) #得到响应对象
data_ = response_.text #提取字符串类型文本数据
#print(data_) #检查是否成功获取数据,或者被反爬得到假数据、请求失败

# 4.数据的提取
html_obj = etree.HTML(data_)  # 类型的转换

# (1)置顶微博:名称、url链接
name_top = html_obj.xpath('//*[@id="scroller"]/div[1]/div[1]/div/div/a/div/div/div[1]/div[1]/div[2]/text()')
print(name_top)
url_top = html_obj.xpath('//*[@id="scroller"]/div[1]/div[1]/div/div/a/@href')  # []索引取值范围
print(url_top) #残缺的url

看你的注释是抓取置顶微博,没有置顶微博那不就是为空吗。

目测没问题啊,我觉得出问题也就出在路径上。但是常见的问题比如说使用了绝对路径或者加入了“tbody” 你这里都没有,你要不再检查下路径?

我也是这个问题,不知道楼主有没有解决,卡了好几天了