源代码地址:https://mp.weixin.qq.com/s/54dcJO2KR9aMTQJh5OlSzg
下面的是我自己修改了以后的 但还是有报错
import requests
import requests
import csv
import time
import json
import requests
from lxml import etree
from pymongo import MongoClient
from bs4 import BeautifulSoup
import pandas as pd
def getOnepage(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36',
'Cookie': 'bid=NX8M2NI7rfg; douban-fav-remind=1; __yadk_uid=TEJSv3vlFpxrnShgBGXWW51qExiqLCiD; __gads=ID=35ed214a9a0f04f3-229b40ae8cc5003e:T=1609826897:RT=1609826897:S=ALNI_MbjrOjiMxJC6bra_BWqa1z6LwJvFA; ll="118267"; viewed="1007305"; gr_user_id=38f71f7e-49a3-4463-a3e9-dd3f77625dad; _ga=GA1.2.1355032985.1609826898; _vwo_uuid_v2=D6A8A09C6232AC6A436C3775284DBE348|dadd79d0f01552308d196454329600a7; dbcl2="247324733:YcdnDsCblB0"; push_noty_num=0; push_doumail_num=0; __utmv=30149280.24732; ck=MMTN; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1637395735%2C%22https%3A%2F%2Fwww.gameres.com%2F%22%5D; _pk_ses.100001.8cb4=*; __utma=30149280.1355032985.1609826898.1637309781.1637395736.17; __utmc=30149280; __utmz=30149280.1637395736.17.14.utmcsr=gameres.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utmt=1; _pk_id.100001.8cb4=b87f2f239f7dd7c8.1609826897.15.1637395743.1637309781.; __utmb=30149280.4.10.1637395736'
}
# 发送请求,得到响应
response =requests.get(url,headers=headers)
return response.text #文本
# 解析一页的数据
def parseOnepage (res):
json_comment = json.loads(res)
col = ['name', 'star', 'rating', 'platforms', 'n_ratings', 'genres', 'content']
n = len(json_comment['games'])
datas = pd.DataFrame(index = range(n),columns = col)
for j in range(n):
datas.loc[j,'name'] = json_comment['games'][j]['title']
datas.loc[j,'star'] = json_comment['games'][j]['star']
datas.loc[j,'rating'] = json_comment['games'][j]['rating']
datas.loc[j,'platforms'] = json_comment['games'][j]['platforms']
datas.loc[j,'n_ratings'] = json_comment['games'][j]['n_ratings']
datas.loc[j,'genres'] = json_comment['games'][j]['genres']
datas.loc[j,'content'] = json_comment['games'][j]['review']['content']
def main():
for i in range(250):
url="https://www.douban.com/j/ilmen/game/search?genres=&platforms=&q=&sort=rating&more="+str(i)
response = getOnepage(url)
parseOnepage(response)
if __name__ == '__main__': # 程序的窗口
main()
看了下.JSON转换异常.判定下返回的数据