源代码
#利用cookie存储做一个模拟登录
import requests
from lxml import etree
import ddddocr
import time
if __name__ == '__main__':
#创建一个session对象
session = requests.Session()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36'
}
url = 'http://www.zjnep.com/lms/web/default/login'
page_text = requests.get(url=url,headers=headers).text
tree = etree.HTML(page_text)
code_img_src = 'http://www.zjnep.com' + tree.xpath('//*[@id="imgCode"]/@src')[0]
print(code_img_src)
code_img_data = requests.get(url=code_img_src,headers=headers).content
with open('./code.jpg','wb') as fp:
fp.write(code_img_data)
begin = time.time()
ocr = ddddocr.DdddOcr()
with open('code.jpg', 'rb') as f:
img_bytes = f.read()
res = ocr.classification(img_bytes)
finish = time.time()
print("结果:")
print(res)
print("用时:%s 秒" % str(finish - begin))
#post请求的发送(模拟登录)
# -login里面找到post请求
logincheck_url = 'http://www.zjnep.com/lms/web/default/login'
data = {
' _csrf': 'B_a2FMTuS1ajuVWzxeqWaue1LMBkmSb-HqN-BZYJanQxxfNkp58tZcT_GePyreciq8dHhgPrcc9O-RJLzHw9MA==',
'LoginForm[userid]': '43022320010501919X',
'LoginForm[password]': '01919X',
'veri_code': res
}
response = session.post(url=logincheck_url,headers=headers,data=data)
print(response.status_code)
#爬取个人主页页面的数据
detail_url = 'http://www.zjnep.com/lms/web/course/index'
detail_page_text = session.get(url=detail_url,headers=headers).text
with open('wdkc.html','w',encoding='utf-8') as f:
f.write(detail_page_text)
爬取的页面