我想爬取历史微博热搜,我需要把下面的代码(网上找的代码,自己改不动了)跑起来


from datetime import datetime,timedelta
import re
import requests
import xlwt
cookie ='Hm_lvt_eafafe9dd9041f948d8897cb295170d5=1627023856,1627027237; Hm_lpvt_eafafe9dd9041f948d8897cb295170d5=1627028614'
headers = {'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36',"Cookie":cookie}
def gen_dates(b_date, days):
    day = timedelta(days=1)
    for i in range(days):
        yield b_date + day*i
def get_date_list():
    #这里自己设置时间
    start = datetime.strptime("2021-7-20", "%Y-%m-%d").date()
    #.date()可以只截取日期
    end = datetime.strptime("2021-7-23", "%Y-%m-%d").date()
    datelist = []
    for d in gen_dates(start, (end-start).days):
        datelist.append(d)
    return datelist
#获取数据的文字部分(热搜词条)和时间(我只需要这两个)
def GetMiddleStr(final_set,content,time):
    try:
        print(re.match(content,'topic'))
        wenzi = re.match(content,'topic')
        url = re.match(content,'date')
        #final_set.add((url,wenzi,time))
    except:
        return 0
def export(result_set,date_str):
    excel = xlwt.Workbook(encoding="utf-8")
    sheet = excel.add_sheet("sheet1")
    sheet.write(0, 0, "热搜")
    sheet.write(0, 1, "时间")

    ex = 'D:/360/'+'Test.xls'
    i = 0
    for t in result_set:
        sheet.write(i + 1, 0, t[0])
        sheet.write(i + 1, 1, t[1])
        i += 1
    excel.save(ex)
def main():
    final_set = set()
    url = 'https://weibo.zhaoyizhe.com/'
    for i in range(len(get_date_list())):
        date_str = str(get_date_list()[i].year)+'-'+str(get_date_list()[i].month)+'-'+str(get_date_list()[i].day)
        print(url)
        data = {
        'date' : date_str
        }
        r = requests.post(url,data=data,headers=headers)
        result = r.content.decode('utf-8')
        print(result)
        result = result.split('},{')
        result[0] = result[0].strip('[{')
        for j in range(len(result)):
            GetMiddleStr(final_set,result[j],date_str)
        export(final_set,date_str)
        final_set = set()
        
if __name__ =="__main__":
    main()

import requests
headers={
'Host':'google-api.zhaoyizhe.com',
'Connection':'keep-alive',
'Pragma':'no-cache',
'Cache-Control':'no-cache',
'Accept':'application/json, text/plain, /',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
'Origin':'https://weibo.zhaoyizhe.com',
'Sec-Fetch-Site':'same-site',
'Sec-Fetch-Mode':'cors',
'Sec-Fetch-Dest':'empty',
'Referer':'https://weibo.zhaoyizhe.com/',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.9',
}
resp = requests.get('https://google-api.zhaoyizhe.com/google-api/index/mon/list',headers=headers)
print(resp.text)
再试试

直接返回json数据,自己解析把,给点个采纳,谢谢
import requests
headers={
'Accept':'application/json, text/plain, /',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.9',
'Connection':'keep-alive',
'Host':'google-api.zhaoyizhe.com',
'Origin':'https://weibo.zhaoyizhe.com',
'Referer':'https://weibo.zhaoyizhe.com/',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
}
resp = requests.get('https://google-api.zhaoyizhe.com/google-api/index/mon/list',headers=headers)
print(resp.text)

所以解决了吗

img

img
import requests
headers={
'Accept':'application/json, text/plain, /',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.9',
'Cache-Control':'no-cache',
'Connection':'keep-alive',
'Host':'google-api.zhaoyizhe.com',
'Origin':'https://weibo.zhaoyizhe.com',
'Pragma':'no-cache',
'Referer':'https://weibo.zhaoyizhe.com/',
'Sec-Fetch-Dest':'empty',
'Sec-Fetch-Mode':'cors',
'Sec-Fetch-Site':'same-site',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
}
resp = requests.get('https://google-api.zhaoyizhe.com/google-api/index/mon/list',headers=headers,verify=False)
resp.encoding='utf-8'
print resp.text
如果还是不行 你就自己慢慢调试把,