麻烦大佬帮忙把程序改成多线程

import requests, re
from lxml import etree
import time
import random
import pymysql
from fake_useragent import UserAgent
from multiprocessing import Pool
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
navicode_list = [
                 'A001,A002,A003,A004,A005,A006,A007','A008,A009,A010,A011,A012,A013',
                 'F,G,H,J','B014_1,B014_2', 'B014_31,B014_32,B014_33,B014_34,B014_35,B014_36,B014_37',
                 'B014_38,B014_39,B014_3A','B014_4',
                 'B014_5,B014_6,B014_7,B014_8','B015_1,B015_3,B015_4,B015_5,B015_6,B015_7,B015_8',
                 'B015_2', 'B016_11', 'B016_12',
                 'B016_3,B016_5,B016_6,B016_7,B016_8', 'B016_4',
                 'B016_21,B016_22,B016_23,B016_24,B016_26,B016_27,B016_28,B016_29', 'B016_25', 'B017,B018,B019',
                 'B020_1',
                 'B020_2,B020_3,B020_4,B020_5,B020_6,B020_7,B020_8,B020_9,B020_A,B020_B,B020_C',
                 'B021,B023,B025','B022_1,B022_2,B022_3,B022_4,B022_5', 'B022_6,B022_7', 'B022_8,B022_B,B022_C','B022_9',
                 'B022_A','B024_3',
                 'B024_7', 'B024_1,B024_2,B024_4,B024_5,B024_6','B024_A',
                 'B024_B,B024_E','B024_C',
                 'B024_D','B024_8,B024_9','B026', 'B027_1,B027_2,B027_3,B027_4', 'B027_5,B027_6',
                 'C028_1,C028_2,C028_4,C028_5,C028_6,C028_7,C028_8','C028_9','C028_38',
                 'C028_31,C028_32,C028_33,C028_34,C028_35',
                 'C028_36,C028_37,C028_39,C028_3A,C028_3B,C028_3C',
                 'C029_1,C029_2,C029_3,C029_4,C029_6,C029_7', 'C029_8,C029_9,C029_B,C029_C,C029_D',
                 'C029_51,C029_52,C029_53,C029_54,C029_55','C029_56,C029_57,C029_58,C029_59',
                 'C029_A1,C029_A3','C029_A2', 'C030_1,C030_2,C030_3,C030_4,C030_5',
                 'C030_6,C030_7,C030_8,C030_9,C030_A',
                 'C030_B,C030_C,C030_D,C030_E,C030_F,C030_G,C030_H,C030_I', 'C031,C032,C033,C034',
                 'C035_1,C035_2,C035_3,C035_4,C035_5,C035_6,C035_7,C035_8',
                 'C035_9,C035_A,C035_B,C035_C,C035_D,C035_E', 'C036,C037,C040,C041',
                 'C038_1,C038_21,C038_22,C038_23,C038_24,C038_3',
                 'C038_25,C038_26,C038_27,C038_28,C038_29', 'C039', 'C042',
                 'D043,D044,D045,D046,D047', 'D044,D045,D046,D047',
                 'I135_1,I135_2,I135_3,I135_4,I135_7,I135_8', 'I135_6',
                 'I135_522,I135_523,I135_524', 'I135_521',
                 'I138_1,I138_2,I138_3,I138_4,I138_5,I138_6,I138_7,I138_8,I138_9,I138_A,I138_B',
                 'I138_C12,I138_C13,I138_C14,I138_C2', 'I138_C11',
                 'I136_87,I136_88','I136_84,I136_85,I136_86',
                 'I136_81,I136_82,I136_83',
                 'I137_3,I137_4,I137_5','I137_1,I137_2',
                 'I139,I140,I141,I142,I143,I144',
                 'I136_1,I136_2,I136_3,I136_4,I136_5,I136_6,I136_7',
                 'I136_9,I136_A,I136_B,I136_C,I136_D,I136_E,I136_F,I136_G'
]
time_list = ['2020-04',
             # '2021-04','2021-03','2021-01','2021-02',
             # '2020-12','2020-11','2020-10','2020-09','2020-08','2020-07','2020-06','2020-05','2020-04','2020-03','2020-02','2020-01',
             # '2019-12','2019-11','2019-10','2019-09','2019-08','2019-07','2019-06','2019-05','2019-04','2019-03','2019-02','2019-01',
             # '2018-12','2018-11','2018-10','2018-09','2018-08','2018-07','2018-06','2018-05','2018-04','2018-03','2018-02','2018-01',
             # '2017-12','2017-11','2017-10','2017-09','2017-08','2017-07','2017-06','2017-05','2017-04','2017-03','2017-02','2017-01',
             # '2016-12','2016-11','2016-10','2016-09','2016-08','2016-07','2016-06','2016-05','2016-04','2016-03','2016-02','2016-01',
             # '2015-12','2015-11','2015-10','2015-09','2015-08','2015-07','2015-06','2015-05','2015-04','2015-03','2015-02','2015-01',
             # '2014-12','2014-11','2014-10','2014-09','2014-08','2014-07','2014-06','2014-05','2014-04','2014-03','2014-02','2014-01',
             ]
# db = pymysql.connect(host='',user='root',password='',database='industrynav')
# cursor = db.cursor()
times = time.strftime('%a %b %d %Y %H:%M:%S') + ' GMT+0800 (中国标准时间)'
t = random.randint(1,2)
for time1 in time_list:
    for LB in navicode_list:
        print(time1,LB)
        params1 = (
            ('action', ''),
            ('NaviCode', LB),  # 筛选的类别
            ('ua', '1.21'),
            ('PageName', 'ASP.brief_result_aspx'),
            ('DbPrefix', 'SCPD'),
            ('DbCatalog', '\u4E2D\u56FD\u4E13\u5229\u6570\u636E\u5E93'),
            ('ConfigFile', 'SCPD.xml'),
            ('db_opt', '\u4E2D\u56FD\u4E13\u5229\u6570\u636E\u5E93'),
            ('db_value', '\u4E2D\u56FD\u4E13\u5229\u6570\u636E\u5E93'),
            ('date_gkr_from', time1),  # 筛选日期
            ('date_gkr_to', time1),  # 筛选日期
            ('his', '0'),
            ('__', times),
        )

        def get_cookie():  # 获取访问的cookie
            session = requests.session()
            try:
                session.get('https://epub.cnki.net/kns/request/SearchHandler.ashx', headers=headers, params=params1)
                return session
            except:
                pass
        def max11():  #获取最大页
            session = get_cookie()
            params = (
                ('curpage', ''),  # 当前页数
                ('RecordsPerPage', '50'),
                ('QueryID', '20'),
                ('ID', ''),
                ('turnpage', '1'),
                ('tpagemode', 'L'),
                ('dbPrefix', 'SCPD'),
                ('Fields', ''),
                ('DisplayMode', 'listmode'),
                ('SortType', "(公开日, 'DATE')desc"),
                ('PageName', 'ASP.brief_result_aspx'),
            )
            response = session.get('https://epub.cnki.net/kns/brief/brief.aspx', headers=headers, params=params)
            selector = etree.HTML(response.text)
            try:
                page_info = selector.xpath('//*[@id="J_ORDER"]/tr[2]/td/table/tr/td[2]/div/span[1]')[0].text
                max1 = int(re.compile('浏览.*/(.*)').findall(page_info)[0])


                # print(max1)
                return max1
            except:
                pass

        def get_list_info():  # 获取列表页
            max1 = max11()
            for i in range(1, max1 + 1):
                session = get_cookie()
                params = (
                    ('curpage', i),  # 当前页数
                    ('RecordsPerPage', '50'),
                    ('QueryID', '20'),
                    ('ID', ''),
                    ('turnpage', '1'),
                    ('tpagemode', 'L'),
                    ('dbPrefix', 'SCPD'),
                    ('Fields', ''),
                    ('DisplayMode', 'listmode'),
                    ('SortType', "(公开日, 'DATE')desc"),
                    ('PageName', 'ASP.brief_result_aspx'),
                )
                try:
                    response = session.get('https://epub.cnki.net/kns/brief/brief.aspx', headers=headers, params=params)
                    selector = etree.HTML(response.text)
                    urls_info = re.compile("<a class='fz14' href='/kns/detail/detail.aspx(.*?)'").findall(response.text)
                    page_info = selector.xpath('//*[@id="J_ORDER"]/tr[2]/td/table/tr/td[2]/div/span[1]')[0].text
                    print(page_info)
                    nums = len(urls_info)
                    now_page = int(re.compile('浏览(.*?)/').findall(page_info)[0])
                    print("当前获取第{}页数据".format(now_page), "数目", nums)
                    print(LB, time1)
                except:
                    pass
                for url in urls_info:
                    detail_url = 'https://kns.cnki.net/kcms/detail/detail.aspx?' + url  # 详情页地址
                    print(detail_url)
                    try:
                        response = requests.get(url=detail_url, headers=headers)
                    except:
                        time.sleep(5)
                        response = requests.get(url=detail_url, headers=headers)
                    main_info = str(response.text)
                    a = str(main_info.split(' ')).replace(' ', '').replace(',', '').replace('\r\n', '').replace('\'', '')
                    b = str(a.replace('</span><pclass="funds">', '').replace('</h5>\\r\\n<divclass="abstract-text">', ''))
                    # print(b)
                    title = ''.join(re.compile('<title>(.*?)-中国知网').findall(b)[0])  # 专利名称
                    leixing = ''.join(re.compile('>专利类型:(.*?)<').findall(b)[0])  # 类型
                    sqgb = ''.join(re.compile('>申请公布号:(.*?)<').findall(b))  # 申请公布
                    if (sqgb == ''):
                        gb_id = ''.join(re.compile('授权公布号:(.*?)<').findall(b))
                    else:
                        gb_id = sqgb
                    gkr = ''.join(re.compile('>公开公告日:(.*?)<').findall(b))  # 公开日
                    if (gkr == ''):
                        gb_time = ''.join(re.compile('授权公告日:(.*?)<').findall(b))
                    else:
                        gb_time = gkr
                    sq_id = ''.join(re.compile('>申请\(专利\)号:(.*?)<').findall(b))  # 申请号
                    sq_time = ''.join(re.compile('>申请日:(.*?)<').findall(b))  # 申请日
                    sqr = ''.join(re.compile('申请人:.*?">(.*?)<').findall(b))  # 申请人
                    if (sqr == ''):
                        sq_person = ''.join(re.compile('申请人:(.*?)</p>').findall(b))
                    else:
                        sq_person = sqr
                    addr = ''.join(re.compile('>地址:(.*?)<').findall(b))  # 地址
                    cl = ''.join(re.compile('>主分类号:(.*?)<').findall(b))  # 主分类号
                    flh = ''.join(re.compile('>分类号:(.*?)<').findall(b))  # 分类号
                    gsdm = ''.join(re.compile('>国省代码:(.*?)<').findall(b))  # 国省代码
                    abstracts = ''.join((re.compile('>摘要:(.*?)<').findall(b)))  # 摘要
                    The_inventor = ''.join(re.compile('>发明人:(.*?)<', re.S).findall(b))  # 发明人
                    if (The_inventor == ''):
                        fmr = ''.join(re.compile('au([\u4e00-\u9fa5a-zA-Z. ]+)\d+', re.S).findall(b))
                    else:
                        fmr = The_inventor
                    try:
                        patent_agency = ''.join(re.compile('>代理机构:(.*?)<').findall(b))  # 专利代理机构
                        agent = ''.join(re.compile('">代理人:(.*?)<').findall(b))  # 代理人
                    except Exception:
                        patent_agency = agent = ''
                    print(title, leixing, gb_id, gb_time, sq_id, sq_time, sq_person, addr, cl, flh, gsdm, abstracts,
                          patent_agency, agent, fmr)


                    # sql = """insert into patent(title, leixing, gb_id, gb_time, sq_id, sq_time, sq_person, addr, cl, flh, gsdm, abstracts,patent_agency, agent, fmr) values ('{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}')""".format(
                    #         title, leixing, gb_id, gb_time, sq_id, sq_time, sq_person, addr, cl, flh, gsdm, abstracts,patent_agency, agent, fmr)
                    # print(sql)
                    # cursor.execute(sql)
                    # db.commit()


        if __name__ == '__main__':
            ua = UserAgent().random
            headers = {
                'Accept-Encoding': 'gzip, deflate, br',
                'Accept-Language': 'zh-CN,zh;q=0.9',
                'User-Agent': ua,
                'Referer': 'https://epub.cnki.net/kns/brief/result.aspx?dbprefix=SCPD',
            }
            urls_info = get_list_info()

# db.close()

多线程你要将同一个操作分给不同线程操作,比如一个任务循环10次,5条线程,每条线程循环2次,你要将任务细分就可以了

您好,我是有问必答小助手,您的问题已经有小伙伴解答了,您看下是否解决,可以追评进行沟通哦~

如果有您比较满意的答案 / 帮您提供解决思路的答案,可以点击【采纳】按钮,给回答的小伙伴一些鼓励哦~~

ps:问答VIP仅需29元,即可享受5次/月 有问必答服务,了解详情>>>https://vip.csdn.net/askvip?utm_source=1146287632