import requests
import json
import re
import csv
import pandas as pd
def get_movie_type():
url='https://movie.douban.com/chart'
resp=requests.get(url)
result=re.findall(r'(.*?)',resp.text)
return result
def get_movie_rank(type_url):
type_url = 'https://movie.douban.com/j/chart/top_list?' + type_url[23:] + '&start=0&limit=100'
resp=requests.get(type_url)
return resp.text
def save(movie):
movie=pd.DataFrame([movie],columns=['类型','电影名称','地区','上映日期','评分','排行','地址'])
movie.to_csv(r'C:\Users\Administrator\Desktop\python\练习\第五周作业_周峰\test3.csv',mode='a',index=None)
def run():
types_url=get_movie_type()
for url in types_url:
global movie_type_name
movie_type_name = url[1]
print('正在储存'+movie_type_name+'类型的电影')
type_url = url[0]
result=get_movie_rank(type_url)
#json解析
result=json.loads(result)
#获取所需电影信息
for massage in result:
#global title,regions,release_date,score,rank,movieurl
title=massage['title']
regions=massage['regions'][0]
release_date=massage['release_date']
score=massage['score']
rank = massage['rank']
url = massage['url']
movie=[movie_type_name,title,regions,release_date,score,rank,url]
# 储存包含电影信息
save(movie)
print(movie_type_name + '的类型储存完毕!')
run()
with open("demo.csv", "a+", encoding='utf-8', newline="") as f:
k = csv.writer(f, delimiter=',')
with open("demo.csv", "r", encoding='utf-8', newline="") as f1:
reader = csv.reader(f1)
if not [row for row in reader]:
k.writerow(['品牌索引', '品牌名称', '车系名称', '车型', '汽车型号', '指导价', '参考价', '经销商名称',
'地区', '简称', '地址', '销售区域', '电话1', '电话2', '时间'])
k.writerows(data)
print('第[{}]条数据插入成功'.format(self.count))
else:
k.writerows(data)
print('第[{}]条数据插入成功'.format(self.count))
自己对应着改一下参数吧,这段代码的意思就是写入数据前会判断有没有表头没有就写入,有就不写
修改和添加参数,mode='a'后面修改为下面的代码:
index=False,header=False
你每抓取一行就调用一个save
调用一次save
movie=pd.DataFrame([movie],columns=['类型','电影名称','地区','上映日期','评分','排行','地址'])
movie.to_csv(r'C:\Users\Administrator\Desktop\python\练习\第五周作业_周峰\test3.csv',mode='a',index=None)
就写入一个表头
要把数据全部添加到dataframe然后一次性调用to_csv