编了一个爬取豆瓣排行榜信息的爬虫代码
import requests
import json
if __name__ == '__main__':
url = 'https://movie.douban.com/typerank'
param = {
'type':'24',
'interval_id':'100: 90',
'action':'',
'start':'1',
'limit':'20',
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0'
}
response = requests.get(url=url,params=param,headers=headers)
list_data = response.json()
fp = open('./douban.jason','w',encoding='utf-8')
json.dump(list_data,fp=fp,ensure_ascii=False)
print('over!!!')
最终出现了一下错误,求大神帮忙解答
Traceback (most recent call last):
File "C:/Users/PycharmProjects/pythonProject/04requests实战之豆瓣电影爬取.py", line 21, in <module>
list_data = response.json()
File "C:\Users\PycharmProjects\pythonProject\venv\lib\site-packages\requests\models.py", line 900, in json
return complexjson.loads(self.text, **kwargs)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2032.0_x64__qbz5n2kfra8p0\lib\json\__init__.py", line 357, in loads
return _default_decoder.decode(s)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2032.0_x64__qbz5n2kfra8p0\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2032.0_x64__qbz5n2kfra8p0\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
你这个程序返回的是html,并不是json
正确的请求
import requests import json if __name__ == '__main__': url = 'https://movie.douban.com/j/search_subjects' param = { "type": "movie", "tag": "热门", "page_limit": 50, "page_start": 0, } headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0' } response = requests.get(url=url,params=param,headers=headers) response.encoding = response.apparent_encoding list_data = response.json() fp = open('./douban.jason','w',encoding='utf-8') json.dump(list_data,fp=fp,ensure_ascii=False) print('over!!!')