# -*-coding = utf-8 -*-
# @Time : 2021/7/14 0014 下午 5:22
# @Author : TANQ
# @File : .py
# @Software : PyCharm
from bs4 import BeautifulSoup
import re
import urllib.request
import urllib.error
import xlwt
import sqlite3
import pandas as pd
import pandas as pd
df = pd.read_excel('./江苏省.xlsx')
data = []
def main():
baseurl = "https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_aladin_banner&city=%E6%B1%9F%E8%8B%8F-"
datalist = getData(baseurl)
# print(datalist)
savepath = ".\\江苏省.xlsx"
# saveDate(datalist, savepath)
findsure = re.compile(
r'<p class="ProvinceSummary_1-1-309_F8LjRz ProvinceSummary_1-1-309_1RW2uk ProvinceSummary_1-1-309_pBq9kt">(.*)</p>')
def getData(baseurl):
datalist = []
for i in range(0, 1):
url = baseurl + urllib.parse.quote(df['城市'][i])
html = askURL(url)
soup = BeautifulSoup(html, "html.parser")
# print("-" * 100)
# print(url)
# print("-" * 100)
# print("-" * 100)
# print(soup)
# print("-" * 100)
for item in soup.find_all('div', class_="item"):
data = []
data2 = []
item = str(item)
sure = re.findall(findsure, item)[0]
data.append(sure)
datalist.append(data)
return datalist
def askURL(url):
head = {
"User-Agent": "Mozilla / 5.0(Windows NT 10.0;Win64;x64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 84.0.4147.89 Safari / 537.36"}
request = urllib.request.Request(url, headers=head)
response = urllib.request.urlopen(request)
html = response.read().decode("utf-8")
return html
def saveDate(datalist,savepath):
print("saving....")
book = xlwt.Workbook(encoding="utf-8",style_compression=0)
sheet = book.add_sheet('江苏省',cell_overwrite_ok=True)
col = ("现有确诊")
print(0)
for i in range(0,2):
sheet.write(0,i,col[i])
for i in range(0,990):
print("第%d条" %(i+1))
data = datalist[i]
for j in range(0,2):
sheet.write(i+1,j,data[j])
book.save('江苏省.xlsx')
if __name__ == "__main__":
main()#-12
打印看看for循环里面item的值