使用Spyder爬取网站文本信息时,报错:AttributeError: 'NoneType' object has no attribute 'find'
这是源码:
from bs4 import BeautifulSoup
import requests
import os.path
import csv
import time
import pymysql
import random
class DrawBookMessage():
def __init__(self):
self.baseUrl='https://www.qidian.com/rank/recom';
#定义5个代理IP隐藏身份
def User_Agent(self):
user_agent1 = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
user_agent2 = 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0'
user_agent3 = 'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19'
user_agent4 = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0'
user_agent5 = 'Mozilla/5.0 (iPad; CPU OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3'
lst = [user_agent1,user_agent2,user_agent3,user_agent4,user_agent5]
return random.choice(lst)
#通过随机IP访问网页获取网页的内容
def getHtml(self,url):
user_agent = self.User_Agent()
headers = {"User-Agent":user_agent}
request = requests.get(url,headers=headers).text
return request
#转换文本类型
def commonsdk(self,url):
html = self.getHtml(url)
doc = BeautifulSoup(html, 'lxml')
return doc
#获取页面总数
def get_page_size(self,url):
doc = self.commonsdk(url)
self.pageNum = doc.find("div", class_="pagination fr")['data-pagemax']
return int(self.pageNum)
#初级网页内容
def draw_base_list(self,url):
doc = self.commonsdk(url)
listt = doc.find('div',class_ = "book-img-text").find_all('div',class_ ='book-mid-info')
for x in listt:
self.bookName = x.find('h2').text.strip()
self.bookUrl = 'https:'+x.find('h2').find('a')['href']
self.bookAuthor = x.find('p').find(class_='name').text.strip()
self.bookType = x.find('p').find('a',class_='').text.strip()
self.bookStatus = x.find('p').find('span').text.strip()
self.draw_Second_list()#调用获取二级网页内容
self.dict_data()#用生成字典调的函数
#获取二级网页内容
def draw_Second_list(self):
doc = self.commonsdk(self.bookUrl)
listt1 = doc.find('div',class_="book-info")
self.bookIntrodaction = listt1.find(class_="intro").text.strip()
listt2 = doc.find(class_="fans-interact cf")
self.monthTickets = listt2.find(class_ ='ticket month-ticket').find(class_ = 'num').text
self.weekTickets = listt2.find(class_ ='ticket rec-ticket hidden').find(class_ = 'num').text
self.weekWardNum = listt2.find(class_= 'rewardNum').text
#生成需要存入数据的字典
def dict_data(self):
ctime = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime());
data={
'书名':self.bookName,
'作者':self.bookAuthor,
'类型':self.bookType,
'状态':self.bookStatus,
'月票':int(self.monthTickets),
'周票':int(self.weekTickets),
'本周打赏人数':int(self.weekWardNum),
'本书简介':self.bookIntrodaction,
'爬取时间':ctime
}
print(data)
print("="*50)
self.write_to_MySQL(data,"spiders","bookMessage")
self.write_to_csv(data,'qidianranking.csv')
pass
这是报错:
Traceback (most recent call last):
File "D:\Anaconda\大作业.py", line 116, in
drawBook.draw_base_list(drawBook.baseUrl+'?page='+str(x))
File "D:\Anaconda\大作业.py", line 54, in draw_base_list
self.draw_Second_list()#调用获取二级网页内容
File "D:\Anaconda\大作业.py", line 60, in draw_Second_list
self.bookIntrodaction = listt1.find(class_="intro").text.strip()
AttributeError: 'NoneType' object has no attribute 'find'
在获取二级网页内容中,老是报错,我不知道如何解决
会不会是爬太快了~每次获取数据中间得休眠一下,得模仿人的行为。爬这么快,谁都知道你用爬虫