同样的问题搜索过,死活没找出问题在哪
import requests
import time
from tqdm import tqdm
from bs4 import BeautifulSoup
def get_contents(target):
req=requests.get(target)
req.encoding='gbk'
html=req.text
bs=BeautifulSoup(html,'lxml')
texts=bs.find('div',id='content')
contents=texts.text.strip().split('\xa0'*4)
return contents
if __name__ == '__main__':
server='https://www.bifeige.com'
book_name='一念永恒.txt'
chapter_target='https://www.bifeige.com/0_825/'
req=requests.get(chapter_target)
req.encoding='gbk'
html=req.text
chapters_bs=BeautifulSoup(html,'lxml')
chapters=chapters_bs.find('div',id='list')
chapters=chapters.find_all('a')
for chapter in chapters:
chapter_name=chapter.string
url=server+chapter.get('href')
content=get_contents(url)
with open(book_name,'a',encoding='utf-8') as f:
f.write(chapter_name)
f.write('\n')
f.write('\n'.join(content))
f.write('\n')
Traceback (most recent call last):
File "F:\python_work\web_spider\novels.py", line 28, in <module>
content=get_contents(url)
File "F:\python_work\web_spider\novels.py", line 12, in get_contents
contents=texts.text.strip().split('\xa0'*4)
AttributeError: 'NoneType' object has no attribute 'text'
应该是你的代码没采集到数据。加个异常处理看看
def get_contents(target):
try:
req=requests.get(target)
req.encoding='gbk'
html=req.text
bs=BeautifulSoup(html,'lxml')
texts=bs.find('div',id='content')
contents=texts.text.strip().split('\xa0'*4)
return contents
except Exception as e:
print("出错了",repr(e)
return "空的"