import requests from lxml import etree import datetime if __name__ == "__main__": headers = {"User-Agent": "Mozilla/5.0(Windows NT 10.0; Win64; x64; rv:84.0)" "Gecko/20100101 Firefox/84.0"} url = 'https://www.bilibili.com/v/popular/rank/all' page_text = requests.get(url=url, headers=headers).content.decode('utf-8') tree = etree.HTML(page_text) li_list = tree.xpath('//ul[@class="rank-list"]/li') with open('./bZhanRank.txt', 'w', encoding='utf-8') as fp: fp.write('时间:' + str(datetime.datetime.now()) + '\n\n') # 使用循环结构,提取各标签中的所需信息 for li in li_list: li_rank = li.xpath('.//div[@class="num"]/text()') # [0]使用索引从列表中拿出字符串 li_rank = '视频排行:' + li_rank[0] + '\n' # 读取视频标题 li_title = li.xpath('.//a/text()') li_title = '视频标题:' + li_title[0] + '\n' # 读取视频播放量 li_viewCount = li.xpath('.//div[@class="detail"]/span[1]/text()') # .strip()去掉字符串中多余的空格 li_viewCount = '视频播放量:' + li_viewCount[0].strip() + '\n' # 读取视频弹幕数量 li_barrageCount = li.xpath('.//div[@class="detail"]/span[2]/text()') li_barrageCount = '视频弹幕数量:' + li_barrageCount[0].strip() + '\n' # 读取视频up主昵称 li_upName = li.xpath('.//span[@class="data-box up-name"]//text()') li_upName = '视频up主:' + li_upName[0].strip() + '\n' # 读取视频的综合评分 li_score = li.xpath('.//div[@class="pts"]/div/text()') li_score = '视频综合评分:' + li_score[0] + '\n\n' # 存储文件 fp.write = (li_rank + li_title + li_viewCount + li_barrageCount + li_upName + li_score) print(fp.write + '爬取成功!!!!')
建议先存储爬取到的数据,最后打开文件一次性写入
result_list = []
for li in li_list:
# ...
# ... 解析数据
# ...
result_list.append(li_rank + li_title + li_viewCount + li_barrageCount + li_upName + li_score)
with open('./bZhanRank.txt', 'w', encoding='utf-8') as fp:
fp.write('时间:' + str(datetime.datetime.now()) + '\n\n')
fp.write('\n'.join(result_list))
您好,我是有问必答小助手,您的问题已经有小伙伴解答了,您看下是否解决,可以追评进行沟通哦~
如果有您比较满意的答案 / 帮您提供解决思路的答案,可以点击【采纳】按钮,给回答的小伙伴一些鼓励哦~~
ps: 问答会员年卡【8折】购 ,限时加赠IT实体书,即可 享受50次 有问必答服务,了解详情>>>https://t.csdnimg.cn/RW5m