Excel表格内容要求整理出来
内容很多不想一个个手打
但是只能看,不能复制
截图识别文字乱码也很多
有没有复制方法˃ʍ˂
网页吗
直接ctrl+s下载到桌面
然后 打开桌面的网页
想怎么复制就怎么复制
当然F12 审查元素也可以
可以先下载到本地
import requests
import xlwings as xw
import json
import time
#爬取网站 https://news.qq.com/zt2020/page/feiyan.htm#/
# 中国各省数据
cn_url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=&_=%d' % int(time.time() * 1000)
# 中国每日
china_url = 'https://api.inews.qq.com/newsqa/v1/query/inner/publish/modules/list?modules=chinaDayList,chinaDayAddList,nowConfirmStatis,provinceCompare'
# 外国
forei_url = 'https://api.inews.qq.com/newsqa/v1/automation/modules/list?modules=FAutoCountryConfirmAdd,WomWorld,WomAboard'
# 请求中国各省疫情数据
def get_cn_data(page_cn):
try:
resp = requests.get(page_cn,headers = h)
if resp.status_code == 200:
return json.loads(resp.json()['data'])
except requests.exceptions.ConnectionError as e:
print('Error', e.args)
# 请求中国每日疫情数据
def get_china_page(page_china):
try:
response = requests.get(page_china,headers = h)
if response.status_code == 200:
return response.json()['data']
except requests.exceptions.ConnectionError as e:
print('Error', e.args)
#请求国外疫情数据
def get_foreign_page(page_forei):
try:
resp = requests.get(forei_url,headers = h)
if resp.status_code == 200:
return resp.json()['data']
except requests.exceptions.ConnectionError as e:
print('Error', e.args)
#解析各省数据并生成表格
def table_cn(items_cn):
app = xw.App(visible=True, add_book=False)
wb = app.books.add() # 打开Excel
sht = wb.sheets['Sheet1'] # 建表
sht.range('A1').value = '省份'
sht.range('B1').value = '日期'
sht.range('C1').value = '累计确诊'
sht.range('D1').value = '累计死亡'
sht.range('E1').value = '累计治愈'
sht.range('F1').value = '现有确诊'
sht.range('G1').value = '当日新增确诊'
sht.range('H1').value = '治愈率'
sht.range('I1').value = '死亡率'
for i in range(34):
province_data = []
province_data = items_cn['areaTree'][0]['children']
province_name = province_data[i]['name'] # 省份
sht.range(f'A{i + 2}').value = province_name
province_date =items_cn['lastUpdateTime'].split(' ')[0] # 当前日期
sht.range(f'B{i + 2}').value = province_date
province_confirm = json.dumps(province_data[i]['total']['confirm']) # 累计确诊
sht.range(f'C{i + 2}').value = province_confirm
province_dead = json.dumps(province_data[i]['total']['dead']) # 累计死亡
sht.range(f'D{i + 2}').value = province_dead
province_heal = json.dumps(province_data[i]['total']['heal']) # 累计治愈
sht.range(f'E{i + 2}').value = province_heal
province_nowConfirm = json.dumps(province_data[i]['total']['nowConfirm']) # 现有确诊
sht.range(f'F{i + 2}').value = province_nowConfirm
province_confirm_add = json.dumps(province_data[i]['today']['confirm']) # 当日新增确诊
sht.range(f'G{i + 2}').value = province_confirm_add
healRate = province_data[i]['total']['healRate'] # 治愈率
sht.range(f'H{i + 2}').value = healRate
deadRate = province_data[i]['total']['deadRate'] # 死亡率
sht.range(f'I{i + 2}').value = deadRate
wb.save(f'D:\\A疫情数据\\中国各省疫情数据.xlsx')
wb.close()
print("中国各省疫情数据表已存储至指定路径")
app.quit()
# 解析中国每日疫情数据并做成表格
def table_china_day(items_china):
app = xw.App(visible=True, add_book=False)
wb = app.books.add()
sht = wb.sheets('Sheet1') # 建表
sht.range('A1').value = '日期'
sht.range('B1').value = '当日新增'
sht.range('C1').value = '累计确诊'
sht.range('D1').value = '累计治愈'
sht.range('E1').value = '累计死亡'
for i in range(60):
item_dayadds = items_china['chinaDayAddList']
item_days = items_china['chinaDayList']
item_dayadd = item_dayadds[i]
item_day = item_days[i]
year = item_dayadd['y']
month, day = item_dayadd['date'].split('.')
date = year + '-' + month + '-' + day
sht.range(f'A{i + 2}').value = date
china_confirm_add = item_dayadd['confirm']
sht.range(f'B{i + 2}').value = china_confirm_add
china_confirm = item_day['confirm']
sht.range(f'C{i + 2}').value = china_confirm
china_heal = item_day['heal']
sht.range(f'D{i + 2}').value = china_heal
china_dead = item_day['dead']
sht.range(f'E{i + 2}').value = china_dead
wb.save(f'D:\\A疫情数据\\中国每日疫情数据.xlsx')
wb.close()
print("中国每日疫情数据表已存储至指定路径")
app.quit()
#解析国外疫情数据并生成表格
def parse_forei_page(items_forei):
app = xw.App(visible=True, add_book=False)
wb = app.books.add() # 打开Excel
sht = wb.sheets['Sheet1'] # 建表
sht.range('A1').value = '国家'
sht.range('B1').value = '日期'
sht.range('C1').value = '累计确诊'
sht.range('D1').value = '累计死亡'
sht.range('E1').value = '累计治愈'
sht.range('F1').value = '现有确诊'
sht.range('G1').value = '当日新增确诊'
for i in range(217):
item_forei = []
item_forei = items_forei['WomAboard'][i]
int_country = item_forei['name'] # 国家
sht.range(f'A{i + 2}').value = int_country
forei_confirm = item_forei['confirm'] # 累计确诊人数
sht.range(f'C{i + 2}').value = forei_confirm
forei_dead = item_forei['dead'] # 累计死亡人数
sht.range(f'D{i + 2}').value = forei_dead
forei_heal = item_forei['heal'] # 累计治愈人数
sht.range(f'E{i + 2}').value = forei_heal
forei_nowConfirm = item_forei['nowConfirm'] # 现有确诊人数
sht.range(f'F{i + 2}').value = forei_nowConfirm
forei_confirm_add = item_forei['confirmAdd'] # 新增确诊人数
sht.range(f'G{i + 2}').value = forei_confirm_add
year = item_forei['y'] # 当前年
month, day = item_forei['date'].split('.') # 当前月,日
forei_date = year + '-' + month + '-' + day #日期
sht.range(f'B{i + 2}').value = forei_date
wb.save(f'D:\\A疫情数据\\外国各国疫情数据.xlsx')
wb.close()
print("外国各国疫情数据表已存储至指定路径")
app.quit()
def main():
cn_data = get_cn_data(cn_url)
cn_table = table_cn(cn_data)
china_data = get_china_page(china_url)
day_table = table_china_day(china_data)
forei_data = get_foreign_page(forei_url)
forei_table = parse_forei_page(forei_data)
if __name__ == '__main__':
main()
参考代码稍加修改
可以按f12进入开发者模式,用左上角的选择工具指向内容,展开附近节点,就可以看到内容了,双击选择复制