import requests
from lxml import etree
import csv
url = "https://s.weibo.com/top/summary?cate=realtimehot&sudaref=www.baidu.com%22
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.124 Safari/537.36 Edg/102.0.1245.44'
}
data = requests.get(url, headers=header).text
html = etree.HTML(data)
rank = html.xpath('//tr/td[@class="td-01 ranktop"]/text()')
print(rank)
把请求头补充完整就行了,例如:
header = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36 QIHU 360SE',
'cookie': 'SUB=_2AkMVzN8uf8NxqwFRmP8Uzmzib49wzw3EieKjkC71JRMxHRl-yT92qn09tRB6PkzxwVakfBCxIvwt5vj0fHHFOeEX3V6U; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9W5FrunrhKPH0_FVHmonbX4Z; _s_tentry=-; UOR=,s.weibo.com,www.baidu.com; Apache=9592974981533.29.1655950549498; SINAGLOBAL=9592974981533.29.1655950549498; ULV=1655950549507:1:1:1:9592974981533.29.1655950549498:',
}