python爬虫,保存的JSON文件乱码 求解决
import json
import requests
def data_html(n):
url = f'https://spa1.scrape.center/api/movie?limit=10&offset={n}'
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,'
' like Gecko) Chrome/109.0.0.0 Safari/537.36'}
res = requests.get(url=url, headers=header)
return res.text
def data_save(food):
f = open('diany.json', 'a+', encoding='utf-8')
f.write(json.dumps(food))
if __name__ == '__main__':
for i in range(0, 101, 10):
n = i
food=data_html(n)
data_save(food)
输出结果为
requests.get
之后可以直接调用json()
:
import json
import requests
def data_html(n):
url = f'https://spa1.scrape.center/api/movie?limit=10&offset={n}'
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,'
' like Gecko) Chrome/109.0.0.0 Safari/537.36'}
res = requests.get(url=url, headers=header).json()
return res
def data_save(food):
f = open('diany.json', 'a+', encoding='utf-8')
json.dump(food, f)
if __name__ == '__main__':
for i in range(0, 101, 10):
n = i
food=data_html(n)
data_save(food)
补充:保存时会自动转码很正常,但是如果再用json库读进来就能看到正常的中文数据
这么规则的数据,直接保存成csv不好吗
dd=requests.get(url,headers)
data=pd.DataFrame(dd.json()['results'])