入门cn的烦恼,请求帮我看一下

import requests
import re
main_url = 'https://www.qb5.tw/book_116663/'
main_html = requests.get(main_url).text
info_list = re.findall('
(.*?), main_html) for inf in info_list: url = 'https://www.qb5.tw/book_116663/'+inf[0] response = requests.get(url) html_date = response.text text = re.findall('
(.*?)
'
, html_date)[0] text = text.replace(' 全本小说 www.qb5.tw,最快更新星门最新章节!

'
, '') text = text.replace(' ', ' ') text = inf[1]+'\n\n'+text.replace('
'
, '\n') print(text) open('星门.txt', mode='a', encoding='gbk').write(text)

帮我试试看看这段代码什么问题,为什么得不到我想要的东西呀

剩下的你自己慢慢处理吧


import requests
import re
from lxml import etree
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
main_url = 'https://www.qb5.tw/book_116663/'
main_html = requests.get(main_url,headers=headers).text
info_list = re.findall('<dd><a href="(.*?)">第', main_html)
for inf in info_list:
    print(inf)
    if len(inf)!=13:
        continue
    url = f'https://www.qb5.tw/book_116663/{inf}'
    response = requests.get(url,headers=headers)
    html_date = response.text
   
    html = etree.HTML(html_date)
    sen_list = html.xpath('//*[@id="content"]//text()')
    text=''
    for i in sen_list:
        print(i)
    break