不知道为啥还是显示验证码错误,验证码已经由第三方提供而且我的传参没有问题
import requests
from lxml import etree
from ClassCode import Chaojiying_Client
#1.对验证码图片进行捕获和识别
url='https://so.gushiwen.cn/user/login.aspx'
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36 Edg/98.0.1108.43'
}
page_text=requests.get(url=url,headers=headers).text
tree=etree.HTML(page_text)
code_img_src='https://so.gushiwen.cn/'+tree.xpath('//*[@id="imgCode"]/@src')[0]
code_img_data=requests.get(url=code_img_src,headers=headers).content
with open('./code.jpg','wb') as fp:
fp.write(code_img_data)
viewstate=tree.xpath('//*[@id="aspnetForm"]/div[1]/input/@value')[0]
viewstategenerator=tree.xpath('//*[@id="aspnetForm"]/div[2]/input/@value')[0]
print(viewstate)
print(viewstategenerator)
def getcodetext(imgpath,codeType):
result = None
if __name__ == '__main__':
chaojiying = Chaojiying_Client('ylw74127', 'ldwl1001', '928883') # 用户中心>>软件ID 生成一个替换 96001
im = open(imgpath, 'rb').read() # 本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
x = chaojiying.PostPic(im, codeType)
result = x['pic_str']
return result
#使用第三方打码对验证码进行识别
result=getcodetext('code.jpg',1902)
print(result)
print
#post请求的发送(模拟登录)
data={
'__VIEWSTATE':viewstate,
'__VIEWSTATEGENERATOR': viewstategenerator,
'from': '',
'email': '1658838620@qq.com',
'pwd': 'ldwl1001',
'code': result ,
'denglu': '登录'
}
#response=requests.post(url=login_url,headers=headers,data=data)
#print(response.status_code)
login_page_text=requests.post(url=url,headers=headers,data=data).text
with open('古诗文.html','w',encoding='UTF-8') as fp:
fp.write(login_page_text)
试试增加 session , 保持下连接
req = requests.session()
# 对应修改
page_text=req.get(url=url,headers=headers).text
# 对应修改
login_page_text=req.post(url=url,headers=headers,data=data).text