有一个需要运用到爬虫的项目,教程配置好了东西,却怎么都无法运行,还报出了四种错误,有没有人可以解决这个问题啊,跪详细解决方案
```python
TimeoutError: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。
raise NewConnectionError(
urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x000002A81FEE9BB0>:
Failed to establish a new connection: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。
(Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002A81FEE9BB0>:
Failed to establish a new connection: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。'))
:
(Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002A81FEE9BB0>:
Failed to establish a new connection: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。'))
进程已结束,退出代码1
以下是爬虫代码
import re
import requests
from bs4 import BeautifulSoup
from lxml import etree
def photo_spider(url):
response = requests.get(url)
photo_content = response.content
return photo_content
def dataGet(url):
"""网页源代码获取"""
resp = requests.get(
url=url,
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0',
'Cookie': 'ZHID=6BF785E78FDC0B4D78C286319A2488F5; ver=2018; zh_visitTime=1621581138913; v_user=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DOW1kmcXBGzgDDW_1CI0BLkKeKLVmNdbw730kC8_-KPtyaa3nJC75bOggHK5hjT2U%26wd%3D%26eqid%3De182d2b1001cdc460000000460a75d50%7Chttp%3A%2F%2Fwww.zongheng.com%2F%7C46385937; zhffr=www.baidu.com; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%221798dc48c2d6af-0f3e091b793753-7e697a64-1327104-1798dc48c2e8fb%22%2C%22%24device_id%22%3A%221798dc48c2d6af-0f3e091b793753-7e697a64-1327104-1798dc48c2e8fb%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu.com%2Flink%22%2C%22%24latest_referrer_host%22%3A%22www.baidu.com%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%7D%7D'
}
)
return resp.text
def novelSearch(data):
"""在小说搜索网页获取小说信息"""
novelList = []
novelInfoList = []
linkList = []
root = etree.HTML(data)
for i in range(20):
try:
name = root.xpath('/html/body/div[2]/div[3]/div[1]/div[3]/div/div[2]/h2/a')[i].xpath('string(.)').strip()
href = root.xpath('/html/body/div[2]/div[3]/div[1]/div[3]/div/div[2]/h2/a/@href')[i]
except Exception:
return [novelList, novelInfoList, linkList]
novelList.append(name)
novelInfoList.append([name, href])
linkList.append(href)
return [novelList, novelInfoList, linkList]
def chapterGet(data):
"""在目录界面获取小说章节"""
name_list = []
href_list = []
chapters = []
root = etree.HTML(data)
href = root.xpath('/html/body/div[2]/div[5]/div[1]/div[1]/div[1]/div[2]/div[5]/div[2]/a[1]/@href')[0]
data = dataGet(href)
name_result = re.finditer('[title="]{7}(.*)[字数]{2}', data)
href_result = re.finditer('[http://book.zongheng.com/chapter/]{33}(.*)[.html]{5}', data)
for name in name_result:
name_list.append(name.group()[7:-3])
for href in href_result:
href_list.append(href.group())
for i in range(len(name_list)):
chapters.append([name_list[i], href_list[i]])
return chapters
def contentGet(data):
"""获取小说内容"""
root = etree.HTML(data)
title = root.xpath('//*[@id="readerFt"]/div/div[1]/div[2]/div[2]')[0].xpath('string(.)').strip()
content = root.xpath('//*[@id="readerFt"]/div/div[1]/div[5]')[0].xpath('string(.)').strip()
return [title, content]
# 小说下载开始
def noteDown(Download_url):
list = []
for i in Download_url:
url = i[1]
data = dataGet(url)
"""获取小说内容"""
root = etree.HTML(data)
title = root.xpath('//*[@id="readerFt"]/div/div[1]/div[2]/div[2]')[0].xpath('string(.)').strip()
content = root.xpath('//*[@id="readerFt"]/div/div[1]/div[5]')[0].xpath('string(.)').strip()
list.append([title, content])
return list
```
1.time.sleep( )
2.driver.implicitly_wait(30)
3.多用 try 捕捉,处理异常
很抱歉,参考资料中并没有关于爬虫的内容,也没有给出具体的问题或错误提示。请提供更详细的信息和上下文,以便我能够更好地帮助你解决问题。