Python爬虫 跳过'NoneType' error

循环运行过程中,有个别数据对应网页没有数据,提示'NoneType' object has no attribute 'find_all',网上的方案都是怎么解决搜索问题,想请教下怎么跳过这个点,直接到下个数据点?

加一个跳过的语句,比如

if ws_Src.cell().value == None:
    continue

 

你这个问题描述不清楚,请把代码贴出来,或者具体的网站及需求。

try:

    (获取网页元素代码)

except:

  pass

import urllib3
from calendar import isleap
import re
from bs4 import BeautifulSoup
from GetData import GetData
import datetime as DT
import csv

class GetData:
    url = ""
    headers = ""

    def __init__(self, url, header=""):
        """
        :param url: 获取的网址
        :param header: 请求头,默认已内置
        """
        self.url = url
        if header == "":
            self.headers = {
                'Connection': 'Keep-Alive',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,'
                          '*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
                'Accept-Encoding': 'gzip, deflate',
                'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, '
                              'like Gecko) Chrome/87.0.4280.66 Mobile Safari/537.36 ',
                'Host': 'www.meteomanz.com'
            }
        else:
            self.headers = header

    def Get(self):
        """
        :return: 网址对应的网页内容
        """
        http = urllib3.PoolManager()
        return http.request('GET', self.url, headers=self.headers).data

def a(t):
    return t.replace(" - ", "0")

# 功能: 写csv
def write(years, b, c,id):
    """
    :param years: [开始日期距离现在的年份, 结束日期距离现在的年份]
    :param b: [开始日期距离现在日期的天数, 结束日期距离现在日期的天数]
    :param c: csv文件名
    :param id: 城市id
    :return: None
    """
    # 1. 创建文件对象
    f = open(c, 'a', encoding='utf-8', newline='')

    # 2. 基于文件对象构建 csv写入对象
    csv_writer = csv.writer(f)

    # 3. 构建列表头
    # , "negAve", "negMax", "negMin"
    csv_writer.writerow(["Time", "Ave_t", "Max_t", "Min_t", "Prec", "SLpress", "Winddir", "Windsp", "Cloud"])
    # 取现在日期
    today = DT.datetime.today()
    # 闰年片段
    st = isleap(today.year)
    # 取XX天前日期
    week_ago = (today - DT.timedelta(days=b[0])).date()
    # XX天后
    week_pre = (today + DT.timedelta(days=b[1])).date()
    if week_ago.month + week_pre.month == 3 or week_ago.month + week_pre.month == 5:
        if week_ago.month == 2 and not st == isleap(today.year - years[0]):
            if st:
                # 今年是,去年或未来不是,所以-1
                week_ago -= DT.timedelta(days=1)
            else:
                # 今年不是,去年或未来是,所以+1
                week_ago += DT.timedelta(days=1)
        if week_pre.month == 2 and not st == isleap(today.year - years[1]):
            if st:
                # 今年是,去年或未来不是,所以要-1
                week_pre -= DT.timedelta(days=1)
            else:
                # 今年不是,去年或未来是,所以+1
                week_pre += DT.timedelta(days=1)
    # 爬取数据链接
    url = "http://www.meteomanz.com/sy2?l=1&cou=2250&ind=" + id + "&d1=" + str(week_ago.day).zfill(2) + "&m1=" + str(
        week_ago.month).zfill(2) + "&y1=" + str(week_ago.year - years[0]) + "&d2=" + str(week_pre.day).zfill(
        2) + "&m2=" + str(week_pre.month).zfill(2) + "&y2=" + str(week_pre.year - years[1])
    print(url)
    # 显示获取数据集的网址
    g = GetData(url).Get()
    # beautifulsoup解析网页
    soup = BeautifulSoup(g, "html5lib")
    # 取<tbody>内容
    tb = soup.find(name="tbody")
    # 取tr内容
    past_tr = tb.find_all(name="tr")

    for tr in past_tr:
        # 取tr内每个td的内容
        text = tr.find_all(name="td")
        flag = False
        negA = negMax = negMin = False
        for i in range(0, len(text)):
            if i == 0:
                text[i] = text[i].a.string
                # 网站bug,会给每个月第0天,比如 00/11/2020,所以要drop掉
                if "00/" in text[i]:
                    flag = True
            elif i == 8:
                # 把/8去掉,网页显示的格式
                text[i] = text[i].string.replace("/8", "")
            elif i == 5:
                # 去掉单位
                text[i] = text[i].string.replace(" Hpa", "")
            elif i == 6:
                # 去掉风力里括号内的内容
                text[i] = re.sub(u"[º(.*?|N|W|E|S)]", "", text[i].string)
            else:
                # 取每个元素的内容
                text[i] = text[i].string
            # 丢失数据都取2(简陋做法)
            # 这么做 MAE=3.6021
            text[i] = "2" if text[i] == "-" else text[i]
            text[i] = "2" if text[i] == "Tr" else text[i]
        text = text[0:9]
        # text += [str(int(negA)), str(int(negMax)), str(int(negMin))]
        # 4. 写入csv文件内容
        if not flag:
            csv_writer.writerow(text)
    # 5. 关闭文件
    f.close()

# 创建文件对象
file = 'C:/Users/ADMIN/AppData/Local/Programs/Python/Python38/PYWeatherReport-main/Pre_Weather/list2.csv' #读取位置列表

with open(file, 'r', encoding='gb2312') as f:
    reader = csv.reader(f)
    id = [] #位置代号
    for row in reader:
        id.append(row[0])

for i in id:
    for t in range (0,243):
        t = -30 * t - t
        if t == -4123:
            continue
        write([20, 0], [60, t], "weather6.csv",i)


主要是最后一个循环中,部分值对应的网页信息没有,想跳过当前循环,到下一次