爬虫模拟登录人人网并爬取页面数据失败

import requests
from lxml import etree
from CodeClass import Chaojiying_Client

#封装超级鹰识别验证码图片的函数
def getCodeText(filename,codeType):
    chaojiying = Chaojiying_Client('Echoyay', '0822CNTTge', '914431')  # 用户中心>>软件ID 生成一个替换 96001
    im = open(filename, 'rb').read()  # 本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
    return chaojiying.PostPic(im, codeType)

#1.对验证码图片进行捕获和识别
url = 'http://www.renren.com/SysHome.do'
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'
}

page_text = requests.get(url=url,headers=headers).text
tree = etree.HTML(page_text)
code_img_src = tree.xpath('//dl[@id="codeimg"]//img/@src')[0]
#print(code_img_src)
code_img_data = requests.get(url=code_img_src,headers=headers).content
with open('code.jpg','wb') as fp:
    fp.write(code_img_data)

#使用超级鹰平台对验证码进行识别
code_info = dict()
code_info = getCodeText('code.jpg','1902')
code = code_info['pic_str']
print(code)

#post请求的发送(模拟登录)
login_url = 'http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2021251628151'
data = {
    'email': '15633250136',
    'icode': code,
    'origURL': 'http://www.renren.com/home',
    'domain': 'renren.com',
    'key_id': '1',
    'captcha_type': 'web_login',
    'password': '2a1fd0f39035c05ecbe1fa8f8782c0a4c41027978c72cf21f484b42998a89a11',
    'rkey': '9a2b62f121c905c1104fe9ecc17e936a',
    'f': 'http%3A%2F%2Fwww.renren.com%2F976474165%2Fnewsfeed%2Fphoto',
}
login_page_text = requests.post(url=login_url,data=data,headers=headers).text
with open('renren.html','w',encoding='utf-8') as fp:
    fp.write(login_page_text)

验证码可识别,但登录失败,renren.html内容为{"code":true,"homeUrl":"http://www.renren.com/home"}。查了资料说是因为登陆失败,请问应该怎么处理?

具体错误信息帖出来呗

您好,我是问答小助手,你的问题已经有小伙伴为您解答了问题,您看下是否解决了您的问题,可以追评进行沟通哦~

如果有您比较满意的答案 / 帮您提供解决思路的答案,可以点击【采纳】按钮,给回答的小伙伴一些鼓励哦~~

ps:问答VIP仅需29元,即可享受5次/月 有问必答服务,了解详情>>> https://vip.csdn.net/askvip?utm_source=1146287632

response = requests.post(url=login_url,data=data,headers=headers)
login_page_text = response.text
print(response.status_code) 

将44行改为上图所示代码,检查了响应状态码为200,表示模拟登录成功。返回字典{"code":true,"homeUrl":"http://www.renren.com/home"}是正常现象,与模拟登录是否成功无关。

https://yiyele.blog.csdn.net/article/details/72301501
爬虫简单介绍