在编写aiohttp异步的时候,session.post获取不到信息,但是同样的信息(data,headers,url),request.post可以正常获取信息
import asyncio
import json
from datetime import datetime
import aiohttp
import requests
from lxml import etree
header_text = [
"""accept: application/json, text/plain, */*
accept-encoding: gzip, deflate, br
accept-language: zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6
appid: 1024
authorization: bearer 5cfbf325-eeb2-4a8b-bcdd-4063693e4466
content-length: 240
content-type: application/json
origin: https://staff.kaikeba.com
referer: https://staff.kaikeba.com/
sec-ch-ua: "Microsoft Edge";v="93", " Not;A Brand";v="99", "Chromium";v="93"
sec-ch-ua-mobile: ?0
sec-ch-ua-platform: "Windows"
sec-fetch-dest: empty
sec-fetch-mode: cors
sec-fetch-site: same-site
tenantid: 6XWFVymtaB68REyRBuf
user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36 Edg/93.0.961.52"""
]
def get_headers():
headers = {}
for t in header_text[0].split('\n'):
# print(t)
# print(t.split(': ')[0],t.split(': ')[1])
headers[t.split(': ')[0]] = t.split(': ')[1]
return headers
async def get_page_post():
async with aiohttp.ClientSession() as session:
async with session.post(
url=url,
headers=headers,
data = data) as response:
result = await response.text()
print(result)
if __name__ == '__main__':
data = {
"size": 20,
"current": 1,
"ascs": [],
"descs": [
"createTime"
],
"condition": {
"startTime": 1634572800,
"endTime": 1637337599,
"qrCodeSellerIdStr": "69971",
"search": "",
"nickname": "",
"trackName": "",
"outOrderId": "",
"payBatchNo": "",
"mobile": "",
"uId": ""
}
}
url = 'https://kmos-api.kaikeba.com/mos-console/vipcourse/order/center'
data = json.dumps(data)
headers = get_headers()
# 运行异步 获取不到信息
asyncio.run(get_page_post())
# 用request。post 可以获取信息
response1 = requests.post(url= url,headers = get_headers(), data =data)
print(response1.status_code)
异步爬虫的结果
{"code":1,"msg":"Failure","data":null}
request.post的结果
200
经过多次尝试,有的网站用异步爬虫可以获取,但是这个网站不可以
有的网站有的不可以说明不可以的网站应该被限制了,你将不可以的网址单独用requests发送请求试试