爬虫代码运行显示没有错误,但是excel不能自动出来
完整代码图如下
import re
import pandas
import requests
from bs4 import BeautifulSoup
def get_html(url):
try:
r = requests.get(url) # 使用get来获取网页数据
r.raise_for_status() # 如果返回参数不为200,抛出异常
r.encoding = r.apparent_encoding # 获取网页编码方式
return r.text # 返回获取的内容
except:
return '错误'
def save(html):
# 解析网页
soup = BeautifulSoup(html, 'html.parser') # 指定Beautiful的解析器为“html.parser”
with open('./data/B_data.txt', 'r+', encoding='UTF-8') as f:
f.write(soup.text)
# 定义好相关列表准备存储相关信息
TScore = [] # 综合评分
name = [] # 动漫名字
bfl = [] # 播放量
pls = [] # 评论数
scs = [] # 收藏数
# ******************************************** 动漫名字存储
for tag in soup.find_all('div', class_='info'):
# print(tag)
bf = tag.a.string
name.append(str(bf))
print(name)
# ******************************************** 播放量存储
for tag in soup.find_all('div', class_='detail'):
# print(tag)
bf = tag.find('span', class_='data-box').get_text()
# 统一单位为‘万’
if '亿' in bf:
num = float(re.search(r'd(.d)?', bf).group()) * 10000
# print(num)
bf = num
else:
bf = re.search(r'd*(.)?d', bf).group()
bfl.append(float(bf))
print(bfl)
# ******************************************** 评论数存储
for tag in soup.find_all('div', class_='detail'):
# pl = tag.span.next_sibling.next_sibling
pl = tag.find('span', class_='data-box').next_sibling.next_sibling.get_text()
# *********统一单位
if '万' not in pl:
pl = '%.1f' % (float(pl) / 10000)
# print(123, pl)
else:
pl = re.search(r'd*(.)?d', pl).group()
pls.append(float(pl))
print(pls)
# ******************************************** 收藏数
for tag in soup.find_all('div', class_='detail'):
sc = tag.find('span', class_='data-box').next_sibling.next_sibling.next_sibling.next_sibling.get_text()
sc = re.search(r'd*(.)?d', sc).group()
scs.append(float(sc))
print(scs)
# ******************************************** 综合评分
for tag in soup.find_all('div', class_='pts'):
zh = tag.find('div').get_text()
TScore.append(int(zh))
print('综合评分', TScore)
# 存储至excel表格中
info = {'动漫名': name, '播放量(万)': bfl, '评论数(万)': pls, '收藏数(万)': scs, '综合评分': TScore}
dm_file = pandas.DataFrame(info)
dm_file.to_excel('Dongman.xlsx', sheet_name="动漫数据分析")
# 将所有列表返回
return name, bfl, pls, scs, TScore
这么多内容放一张图也太“难”看了吧。可以插入代码块的吖
with open里的读写模式改成‘rw’试试