import json
import requests
import re
import pandas as pd
from pandas import json_normalize
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0, Win64, 64) Applellike Gecko) chrome/109.0.0.0 Safari/537.36",
"cookie": "buvid3=E345F73A-511A-BF9E-7CE8-92B066BAC09273242infoc; b_nut=1675755273; i-wanna-go-back=-1; _uuid=6645381010-AF1E-51024-61065-F23EAE44C36679682infoc; buvid_fp=7124b20b1dfb727c621a29bb7e499986; nostalgia_conf=-1; CURRENT_FNVAL=4048; buvid4=A5B19B32-7833-37F2-2DEF-61364D1EB8EA85116-023020715-js6gr3B%2BKX0Ja97hLY0x9w%3D%3D; rpdid=|(u)YJlkm)RY0J'uY~YRY|u)|; DedeUserID=482738501; DedeUserID__ckMd5=7b52a2d892bdfeba; b_ut=5; header_theme_version=CLOSE; home_feed_column=4; bp_video_offset_482738501=769995294581456900; b_lsid=C3796722_186FEF77A98; PVID=1; SESSDATA=b88354d0%2C1694867094%2Cbb43d%2A31; bili_jct=5f6c554803cf85ba5db3dd1d01394468; sid=g7436nlv"
}
def getJSONText(url):
try:
res = requests.get(url, headers=headers)
res.raise_for_status()
res.encoding = res.apparent_encoding
# json转字典
return json.loads(res.text)
except:
return ""
def find(target, dictData, notFound='没找到'):
queue = [dictData]
while len(queue) > 0:
data = queue.pop()
for key, value in data.items():
if key == target: return value
elif type(value) == dict: queue.append(value)
return notFound
def parsePage(ilt,json_data):
print(json_data)
author = find('author', json_data)
print(author)
def main():
url = "https://api.bilibili.com/x/web-interface/search/all/v2?keyword=%E6%99%A8%E9%92%9F%E9%85%B1&page=2&pagesize=20&context="
infoList = []
json_data = getJSONText(url)
parsePage(infoList, json_data)
if __name__ == '__main__':
main()
把json_data打印出来分析可以发现想要提取的结果在json_data["data"]["result"]里面;遍历result将其提取出来即可
def parsedate(json_data):
result = json_data.get('data', {}).get('result', [])
for result_dict in result:
data = result_dict.get('data', [])
for detail in data:
print("{}\t{}\t{}".format(detail.get("author", ""), detail.get("title", ""), detail.get("arcurl", "")))
我是宝贝,我来看看:
notFound给了个默认值,调用时没给参数,没赋值,那返回的不就是默认值吗?你这个函数什么都没干呀,按理来说,这个函数内部是解析数据获取你需要的内容,然后返回的