我想多线程爬取笔趣阁的一部小说但是函数不执行

import re
import time
import os
import requests
from lxml import etree
from threading import Thread
import threading
from queue import Queue
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Mobile Safari/537.36'}
'''要爬取小说的网址'''
url = '


'这个网址是笔趣阁任意一部小说的网址'
'''对这页发起请求'''
resp = requests.get(url=url, headers=headers)
contentpage = resp.content.decode('utf-8', 'ignore')
html = etree.HTML(contentpage)
print(resp.status_code)
'''章节和内容队列+'''
titles_quenue = Queue(50)
text_urls = Queue(50)

'''定义生产者'''
'''获取数据
1.---获取小说名字
2.获取每一章的内容和标题'

title1 = html.xpath('/n//dd/a/text()')
for k in title1:
    titles_quenue.put(k)#标题
text_newurl = []
text_url2 =  html.xpath('/n//dd/a/@href')
for i in text_url2:
    j = 'https://www.biquwx.la/1_1760/' + i
    text_newurl.append(j)
for j in text_newurl:
    text_urls.put(j)

'''定义消费者'''
def consumer():

'''小说名字'''
name = re.findall('<h1>(.+?)</h1>', contentpage, re.DOTALL)[0]
print(name)
'''小说章节'''
title = titles_quenue.get()
'''小说链接--发起请求获取数据'''
text_url = text_urls.get()
resp = requests.get(url=text_url,headers =headers)
content = resp.content.decode('utf-8')
html1 = etree.HTML(content)
text = html1.xpath('//div[@id="content"]/text()')
text = "".join(text)
while len(text) == 0:
    '''如果没有内容,再次发起请求'''
    resp2 = requests.get(url=text_url, headers=headers)
    content2 = resp2.content.decode('utf-8')
    html2 = etree.HTML(content2)
    text = html2.xpath('//div[@id="content"]/text()')
    text = "".join(text)
    if len(text) != 0:
        break
if len(text) != 0:
    with open('D:\AAAA桃花青帝\Python文件\爬虫\爬取小说\{}/'.format(name) + '{}'.format(title) + '.txt', 'a', encoding='utf-8') as  f:
        f.write(text)
    print(title+'下载完成!!!!')

def multi():

#定义生产者
for i  in range(50):
    t = threading.Thread(target=producer)
    t.start()
for  j in range(50):
    t= threading.Thread(target=consumer)
    t.start()

multi()

章节链获取不到,一直卡在resp.staus的哪里,

流程如下代码所示,但是笔趣阁服务器不咋地,也没啥太大限制的反爬,所以开50个线程基本上就是反馈503了,如果你有代理ip可以加进去,然后就是 线程开少点,爬取速度限制一下,比如在每个章节获取里sleep一下。可以的话,希望能采纳!


import os
from queue import Queue
import threading
import requests
from lxml import etree


class BQG:
    def __init__(self):
        self.url_queue = Queue()
        self.prefix_url = 'https://www.biquwx.la/1_1760/'
        self.suffix_url = None
        self.dir_path = os.path.join(os.getcwd(), '武炼巅峰')
        self.create_dir()
        self.get_links()
        self.get_consumers()

    def create_dir(self):
        """创建目录"""
        if not os.path.exists(self.dir_path):
            os.mkdir(self.dir_path)

    def get_links(self):
        """获取所有url"""
        top_res = requests.get(self.prefix_url)
        top_html = etree.HTML(top_res.content.decode())
        suffix_url = top_html.xpath('//div[@id="list"]/dl/dd/a/@href')
        for i in suffix_url:
            url = self.prefix_url + i
            print(url)
            self.url_queue.put(url)

    def get_consumers(self):
        """多线程爬取"""
        for i in range(50):
            t = threading.Thread(target=self.get_content)
            t.start()

    def get_content(self):
        """获取章节内容"""
        url = self.url_queue.get()
        res = requests.get(url)
        html = etree.HTML(res.content.decode())
        title = ''.join(html.xpath('//h1/text()'))
        content = '\n'.join(html.xpath('//div[@id="content"]//text()'))
        file_path = os.path.join(self.dir_path, '{}.txt'.format(title))
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(title)
            f.write(content)


if __name__ == '__main__':
    bqg = BQG()

有的章节加载不出来,不允许访问

通过多线程或者多进程或者协程或者异步方式来爬取小说,必须要注意反爬的机制,而且在爬取过程中不能太快,以防止IP被封,还有一点就是不能用for循环,因为有些章节是没有的,如果用for循环可能会导致有些页面加载不出来,建议在目录页面把所有的目录都匹配到,然后根据这个目录的章节来爬取,我也经常这个做
下面代码是我在爬取庆余年第二部小说做的反爬机制,您可以参考下

import requests,re,time
import random,os

start_url = 'https://www.ctgf.net/xiaoshuo/84190.html'

USER_AGETN = [
    "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"
    ,"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"
    ,"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"
    ,"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)"
    ,"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)"
    ,"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
    ,"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1"
    ,"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1"
    ,"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11"
    ,"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11"
    ,"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)"
    ,"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)"
    ,"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
    "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36"
]

headers = {
    "User-Agent":random.choice(USER_AGETN),
    "Refer":start_url,
    "Host":"www.ctgf.net",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
}

我也写过笔趣阁的爬虫,可以给你参考一下

你想爬哪个小说,链接发出来,我写写看