```import requests
from lxml import etree
def get_data():
url="https://read.qidian.com/hankread/1017688451/81405882"
#Response [200] 200 请求成功
#反爬
#开始伪装自己是一个浏览器
#爬虫三件套
headers = { #浏览器的信息
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
#我从这里来
"Referer: https://read.qidian.com/hankread/1017688451/81405882"
}
#发送请求时带上headers页面源代码
r=requests.get(url,headers=headers).text
#print(r)
html = etree.HTML(r)
#写xpath
chapter = html.xpath("//div[@class='main-text-wrap']")
#print chapter
for item in chapter:
print(item)
title = item.xpath(".//span[@class='content-wrap']/text()")
content = item.xpath(".//div[@class='read-content j_readContent']/p/text()")
#print(content)
#文件保存,保存下来
with open("./baocun/%s.txt" % title[0],'w',encoding='utf-8') as f:
f.write("".join(content))
get_data()
https://www.csdn.net/gather_29/MtjakgysMTA3MjgtYmxvZwO0O0OO0O0O.html