import requests
import re
import json
from openpyxl import Workbook
def export(page_source, id='getAreaStat'):
re_script = re.search(f'script id=\"{id}\".+?/script' ,page_source)
str_json = re.search(r'\[.+\]', re_script.group()).group()
items = json.loads(str_json)
#with open(f'{id}.json', 'w', encoding='utf8') as f:
#f.write(str_json)
if items:
wb = Workbook()
ws0 = wb.active
ws0.title = '全国'
ws0.append([str(k) for k,v in items[0].items() if k!='cities'])
for item in items:
ws = wb.create_sheet(item['provinceName'])
if item['cities']:
headers = list(item['cities'][0].keys())
ws.append(headers)
for city in item['cities']:
ws.append(list(city.values()))
ws0.append([str(v) for k,v in item.items() if k!='cities'])
wb.save(f'{id}.xlsx')
wb.close()
def main():
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Host': 'ncov.dxy.cn',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
}
res = requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia', headers=headers)
res.encoding = res.apparent_encoding
export(res.text, 'getAreaStat')
export(res.text, 'fetchRecentStatV2')
if __name__ == '__main__':
main()
首先,你爬虫部分已经弄好了嘛?
直接用python的xlwt库吧,专门同于操作exel的,简单明了
xlwt教程
https://blog.csdn.net/weixin_34357436/article/details/91951410?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522165862825216782388043655%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fall.%2522%257D&request_id=165862825216782388043655&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~first_rank_ecpm_v1~rank_v31_ecpm-5-91951410-null-null.142^v33^control,185^v2^tag_show&utm_term=xlwt%E5%86%99%E5%85%A5excel%E8%AF%A6%E7%BB%86%E6%AD%A5%E9%AA%A4&spm=1018.2226.3001.4187
实战案例
https://blog.csdn.net/m0_61791601/article/details/125889849?spm=1001.2014.3001.5502