循环运行过程中,有个别数据对应网页没有数据,提示'NoneType' object has no attribute 'find_all',网上的方案都是怎么解决搜索问题,想请教下怎么跳过这个点,直接到下个数据点?
加一个跳过的语句,比如
if ws_Src.cell().value == None: continue
你这个问题描述不清楚,请把代码贴出来,或者具体的网站及需求。
try:
(获取网页元素代码)
except:
pass
import urllib3
from calendar import isleap
import re
from bs4 import BeautifulSoup
from GetData import GetData
import datetime as DT
import csv
class GetData:
url = ""
headers = ""
def __init__(self, url, header=""):
"""
:param url: 获取的网址
:param header: 请求头,默认已内置
"""
self.url = url
if header == "":
self.headers = {
'Connection': 'Keep-Alive',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,'
'*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, '
'like Gecko) Chrome/87.0.4280.66 Mobile Safari/537.36 ',
'Host': 'www.meteomanz.com'
}
else:
self.headers = header
def Get(self):
"""
:return: 网址对应的网页内容
"""
http = urllib3.PoolManager()
return http.request('GET', self.url, headers=self.headers).data
def a(t):
return t.replace(" - ", "0")
# 功能: 写csv
def write(years, b, c,id):
"""
:param years: [开始日期距离现在的年份, 结束日期距离现在的年份]
:param b: [开始日期距离现在日期的天数, 结束日期距离现在日期的天数]
:param c: csv文件名
:param id: 城市id
:return: None
"""
# 1. 创建文件对象
f = open(c, 'a', encoding='utf-8', newline='')
# 2. 基于文件对象构建 csv写入对象
csv_writer = csv.writer(f)
# 3. 构建列表头
# , "negAve", "negMax", "negMin"
csv_writer.writerow(["Time", "Ave_t", "Max_t", "Min_t", "Prec", "SLpress", "Winddir", "Windsp", "Cloud"])
# 取现在日期
today = DT.datetime.today()
# 闰年片段
st = isleap(today.year)
# 取XX天前日期
week_ago = (today - DT.timedelta(days=b[0])).date()
# XX天后
week_pre = (today + DT.timedelta(days=b[1])).date()
if week_ago.month + week_pre.month == 3 or week_ago.month + week_pre.month == 5:
if week_ago.month == 2 and not st == isleap(today.year - years[0]):
if st:
# 今年是,去年或未来不是,所以-1
week_ago -= DT.timedelta(days=1)
else:
# 今年不是,去年或未来是,所以+1
week_ago += DT.timedelta(days=1)
if week_pre.month == 2 and not st == isleap(today.year - years[1]):
if st:
# 今年是,去年或未来不是,所以要-1
week_pre -= DT.timedelta(days=1)
else:
# 今年不是,去年或未来是,所以+1
week_pre += DT.timedelta(days=1)
# 爬取数据链接
url = "http://www.meteomanz.com/sy2?l=1&cou=2250&ind=" + id + "&d1=" + str(week_ago.day).zfill(2) + "&m1=" + str(
week_ago.month).zfill(2) + "&y1=" + str(week_ago.year - years[0]) + "&d2=" + str(week_pre.day).zfill(
2) + "&m2=" + str(week_pre.month).zfill(2) + "&y2=" + str(week_pre.year - years[1])
print(url)
# 显示获取数据集的网址
g = GetData(url).Get()
# beautifulsoup解析网页
soup = BeautifulSoup(g, "html5lib")
# 取<tbody>内容
tb = soup.find(name="tbody")
# 取tr内容
past_tr = tb.find_all(name="tr")
for tr in past_tr:
# 取tr内每个td的内容
text = tr.find_all(name="td")
flag = False
negA = negMax = negMin = False
for i in range(0, len(text)):
if i == 0:
text[i] = text[i].a.string
# 网站bug,会给每个月第0天,比如 00/11/2020,所以要drop掉
if "00/" in text[i]:
flag = True
elif i == 8:
# 把/8去掉,网页显示的格式
text[i] = text[i].string.replace("/8", "")
elif i == 5:
# 去掉单位
text[i] = text[i].string.replace(" Hpa", "")
elif i == 6:
# 去掉风力里括号内的内容
text[i] = re.sub(u"[º(.*?|N|W|E|S)]", "", text[i].string)
else:
# 取每个元素的内容
text[i] = text[i].string
# 丢失数据都取2(简陋做法)
# 这么做 MAE=3.6021
text[i] = "2" if text[i] == "-" else text[i]
text[i] = "2" if text[i] == "Tr" else text[i]
text = text[0:9]
# text += [str(int(negA)), str(int(negMax)), str(int(negMin))]
# 4. 写入csv文件内容
if not flag:
csv_writer.writerow(text)
# 5. 关闭文件
f.close()
# 创建文件对象
file = 'C:/Users/ADMIN/AppData/Local/Programs/Python/Python38/PYWeatherReport-main/Pre_Weather/list2.csv' #读取位置列表
with open(file, 'r', encoding='gb2312') as f:
reader = csv.reader(f)
id = [] #位置代号
for row in reader:
id.append(row[0])
for i in id:
for t in range (0,243):
t = -30 * t - t
if t == -4123:
continue
write([20, 0], [60, t], "weather6.csv",i)
主要是最后一个循环中,部分值对应的网页信息没有,想跳过当前循环,到下一次