import requests
import re
import time
import random
url='https://www.1qxs.com/search.html?'
wz={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.50',
'Cookie': 'uid=ded5e157f5c8414dbfa9991ded4b772f; Hm_lvt_dac3e73c19939b4f372cc10336edc17e=1686375393,1686663314; __gads=ID=eadff3488856b80c-225e196d5fe1006c:T=1686375390:RT=1686663624:S=ALNI_MZVmkXdYC5n62l4bxtiXfWLS5WLZA; __gpi=UID=00000c465bcc5e5a:T=1686375390:RT=1686663624:S=ALNI_MZSNXeAZLCDCT6MhpoI3abXzntunA; read_history=13:2; Hm_lpvt_dac3e73c19939b4f372cc10336edc17e=1686663749',
'Referer': 'Referer: https://www.1qxs.com/'
}
kw=input('请输入小说名:')
#kw = kw.encode("utf-8").decode("latin1")
qt={
'kw': kw
}
res=requests.get(url,headers=wz,params=qt)
#https://www.1qxs.com/list/4.html
#https://www.1qxs.com/xs/13.html
#https://www.1qxs.com/list/13.html
s1list=re.findall('<div class="name"><a href="/xs(.*?)" target="_blank">(.*?)</a></div>',res.text)
for s2,bookname in s1list:
bookurl=f'https://www.1qxs.com/list{s2}'
print(bookname)
w=input('请输入完整的书名:')
for s2,bookname in s1list:
bookurl=f'https://www.1qxs.com/list{s2}'
if bookname != w:
continue
bookurl1=bookurl
bookname1=bookname
print(bookurl1, bookname1)
kw = kw.encode("utf-8").decode("latin1")
wz1={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.50',
'Cookie': 'uid=ded5e157f5c8414dbfa9991ded4b772f; Hm_lvt_dac3e73c19939b4f372cc10336edc17e=1686375393,1686663314; __gads=ID=eadff3488856b80c-225e196d5fe1006c:T=1686375390:RT=1686663624:S=ALNI_MZVmkXdYC5n62l4bxtiXfWLS5WLZA; __gpi=UID=00000c465bcc5e5a:T=1686375390:RT=1686663624:S=ALNI_MZSNXeAZLCDCT6MhpoI3abXzntunA; read_history=13:2; Hm_lpvt_dac3e73c19939b4f372cc10336edc17e=1686663749',
'Referer': f'https://www.1qxs.com/search.html?kw={kw}'
}
res1=requests.get(bookurl1,headers=wz1)
编号=re.findall('<a href="/xs/(.*?)/10.html">',res1.text)[0]
s2list=re.findall(f'<a href="/xs/{编号}/(.*?).html">',res1.text)
i=1
for a in s2list:
bookdownurl = f'https://www.1qxs.com/xs/{编号}/{a}.html'
res2=requests.get(bookdownurl,headers=wz)
章节名=re.findall('<h1>(.*?)</h1>',res2.text)[0]
print(章节名)
章节名1=章节名+'\r\n'
page=re.findall('tpg="(.*?)">',res2.text)[0]
print(page)
f = open(f'C:\\Users\\lty\\Desktop\\{bookname1}.txt', 'a')
f.write(章节名1)
for k in range(1,int(page)+1):
bookdownurl1 = f'https://www.1qxs.com/xs/{编号}/{a}/{k}.html'
print(bookdownurl1)
res3=requests.get(bookdownurl1,headers=wz)
文章 = re.findall('<p> (.*?)</p>', res3.text)
for li in range(1,len(文章)):
str = 文章[li]
if li == 1:
str = " "+文章[li]
if li == len(文章)-1 and k == int(page):
str = 文章[li]+'\n'+'\n'
open(f'C:\\Users\\lty\\Desktop\\{bookname1}.txt', 'a',encoding='gb18030').write(str)
time.sleep(random.randint(1,2))
print(f'---------第{i}章{章节名}下载完毕--------------')
i=i+1
time.sleep(1)
你这个是一个循环,如果第一个章节名没有输出,然后再输出下一个正文,那么就会产生章节名在后面的错觉。