python爬取小说出现异常,该怎么解决?
import requests
import parsel
import re
list_url='http://huayu.zongheng.com/showchapter/1222064.html'
response=requests.get(list_url)
print(response.text)
selectors=parsel.Selector(response.text)
href = selectors.css('div.container div div div ul li a::attr(href)').getall()
href=''.join(href)
print(href)
name=re.findall('(.*?)
',response.text)[0]
print(name)
response=requests.get(href)
print(response.text)
selectors=parsel.Selector(response.text)
title=selectors.css('#readerFt > div > div.title > div.title_txtbox::text').get()
print(title)
content=selectors.css('p::text').getall()
print(content)
content=''.join(content)
with open(f'D:/python练习/{name}',mode='a',encoding='utf-8')as f:
f.write(title)
f.write('\n')
f.write(content)
f.write('\n')
几个问题
修改后,用循环取出每章节内容
import requests
import parsel
import re
list_url='http://huayu.zongheng.com/showchapter/1222064.html'
response=requests.get(list_url)
#print(response.text)
selectors=parsel.Selector(response.text)
href = selectors.css('div.container div div div ul li a::attr(href)').getall()
#href=''.join(href)
#print(href)
name=re.findall('(?<=<h1>).*(?=<\/h1>)',response.text)[0]
#print(name)
for chapter in href:
response=requests.get(chapter)
#print(response.text)
selectors=parsel.Selector(response.text)
title=selectors.css('#readerFt > div > div.title > div.title_txtbox::text').get()
print(title)
content=selectors.css('p::text').getall()
print(content)
content=''.join(content)
with open(f'D:/python练习/{name}.txt',mode='a',encoding='utf-8')as f:
f.write(title)
f.write('\n')
f.write(content)
f.write('\n')