试了用本地服务器测试接收,请求头和cookies能正常传过去,但是请求的时候aiohttp一直403
windows系统,aiohttp版本是3.6.2,requests版本是2.25.1
测试代码如下
# coding=utf-8
import aiohttp
import asyncio
import hashlib
import execjs
import requests
def get_dc0():
"""
搞一个dc0
:return:
"""
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
}
session = requests.session()
session.headers = headers
session.get("https://www.zhihu.com/question/19996225", headers=headers)
dc0 = dict(session.cookies)["d_c0"]
return dc0
def get_headers(dc0, url):
"""
执行js加密,获得加密过的请求头
:param dc0:
:param url:
:return:
"""
x81 = "3_2.0aR_sn77yn6O92wOB8hPZnQr0EMYxc4f18wNBUgpTk7tu6L2qK6P0ET9y-LS9-hp1DufI-we8gGHPgJO1xuPZ0GxCTJHR7820XM20cLRGDJXfgGCBxupMuD_Ie8FL7AtqM6O1VDQyQ6nxrRPCHukMoCXBEgOsiRP0XL2ZUBXmDDV9qhnyTXFMnXcTF_ntRueTh_29ZrL96Tes_bNMggpmtbSYobSYIuw9_BYBkXOK8gYBCqSm-C20ZueBtGLPvM38TBHKCrXm6HSX8BHm17X1SLH0SrLV6QOKKLH9JQ9Z-JOpxDCmiCLZkJVmegL1LJL1t9YKOh3LbDpV9hcGqUxmOC30wqoKABVZgBVfJ4o9sJx18JN95qfzWUL8pB3xHGSMbgYLeg90X9V8QU2qNUoVcH3qPBCBSLY_18F_OcV_zwpOfrxKwweCUgV1swxmVBCZECV8xcw1DC2xbiVChwC1TvwLkCLBOcHMuwLCe8eC"
dc0 = '"' + dc0 + '"'
url = url.replace("https://www.zhihu.com", "")
code1 = "+".join(["101_3_2.0", url, dc0, x81])
md5_code = hashlib.md5(bytes(code1, encoding="utf8")).hexdigest()
ctx2 = execjs.compile(open('zh_96.js', 'r', encoding="utf-8").read())
encrypt_str = "2.0_" + ctx2.call('b', md5_code)
headers = {'Host': 'www.zhihu.com', 'Connection': 'keep-alive', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache',
'x-zse-93': '101_3_2.0', 'sec-ch-ua-mobile': '?0',
'User-Agent': 'Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/98.0.4758.82Safari/537.36',
'x-zst-81': x81,
'x-requested-with': 'fetch', 'x-zse-96': encrypt_str,
'sec-ch-ua': '"NotA;Brand";v="99","Chromium";v="98","GoogleChrome";v="98"',
'sec-ch-ua-platform': '"Windows"',
'Accept': '*/*',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Referer https': '//www.zhihu.com/question/516475589',
'Accept-Encoding': 'gzip,deflate',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7',
'Cookie': 'd_c0={};'.format(dc0)}
return headers
async def run():
# 连接,dc0,请求头,cookies
server_url = "http://127.0.0.1:8080/test"
url = "https://www.zhihu.com/api/v4/questions/516475589/answers?include=data%5B%2A%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cattachment%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Cis_labeled%2Cpaid_info%2Cpaid_info_content%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cis_recognized%3Bdata%5B%2A%5D.mark_infos%5B%2A%5D.url%3Bdata%5B%2A%5D.author.follower_count%2Cvip_info%2Cbadge%5B%2A%5D.topics%3Bdata%5B%2A%5D.settings.table_of_content.enabled&limit=5&offset=0&platform=desktop&sort_by=default"
dc0 = get_dc0()
headers = get_headers(dc0, url)
cookies = {"d_c0": dc0}
# aiohttp请求
aio_session = aiohttp.ClientSession()
async with aio_session.request("GET", url, headers=headers, cookies=cookies) as aio_response:
aio_content = await aio_response.read()
print("aio status:{} 数据长度 {} ".format(aio_response.status, len(aio_content)))
# requests 请求
session = requests.session()
session.headers = headers
response = session.get(url, cookies=cookies)
content = response.content
print("request status:{} 数据长度 {} ".format(response.status_code, len(content)))
await aio_session.close()
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(run())
用到的zh_96.js
var jsdom = require("jsdom");
const {JSDOM} = jsdom;
const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`);
window = dom.window;
document = window.document;
XMLHttpRequest = window.XMLHttpRequest;
var exports = {}
function t(e) {
return (t = "function" == typeof Symbol && "symbol" == typeof Symbol.A ? function (e) {
return typeof e
}
: function (e) {
return e && "function" == typeof Symbol && e.constructor === Symbol && e !== Symbol.prototype ? "symbol" : typeof e
}
)(e)
}
Object.defineProperty(exports, "__esModule", {
value: !0
});
var A = "2.0"
, __g = {};
function s() {
}
function i(e) {
this.t = (2048 & e) >> 11,
this.s = (1536 & e) >> 9,
this.i = 511 & e,
this.h = 511 & e
}
function h(e) {
this.s = (3072 & e) >> 10,
this.h = 1023 & e
}
function a(e) {
this.a = (3072 & e) >> 10,
this.c = (768 & e) >> 8,
this.n = (192 & e) >> 6,
this.t = 63 & e
}
function c(e) {
this.s = e >> 10 & 3,
this.i = 1023 & e
}
function n() {
}
function e(e) {
this.a = (3072 & e) >> 10,
this.c = (768 & e) >> 8,
this.n = (192 & e) >> 6,
this.t = 63 & e
}
function o(e) {
this.h = (4095 & e) >> 2,
this.t = 3 & e
}
function r(e) {
this.s = e >> 10 & 3,
this.i = e >> 2 & 255,
this.t = 3 & e
}
s.prototype.e = function (e) {
e.o = !1
}
,
i.prototype.e = function (e) {
switch (this.t) {
case 0:
e.r[this.s] = this.i;
break;
case 1:
e.r[this.s] = e.k[this.h]
}
}
,
h.prototype.e = function (e) {
e.k[this.h] = e.r[this.s]
}
,
a.prototype.e = function (e) {
switch (this.t) {
case 0:
e.r[this.a] = e.r[this.c] + e.r[this.n];
break;
case 1:
e.r[this.a] = e.r[this.c] - e.r[this.n];
break;
case 2:
e.r[this.a] = e.r[this.c] * e.r[this.n];
break;
case 3:
e.r[this.a] = e.r[this.c] / e.r[this.n];
break;
case 4:
e.r[this.a] = e.r[this.c] % e.r[this.n];
break;
case 5:
e.r[this.a] = e.r[this.c] == e.r[this.n];
break;
case 6:
e.r[this.a] = e.r[this.c] >= e.r[this.n];
break;
case 7:
e.r[this.a] = e.r[this.c] || e.r[this.n];
break;
case 8:
e.r[this.a] = e.r[this.c] && e.r[this.n];
break;
case 9:
e.r[this.a] = e.r[this.c] !== e.r[this.n];
break;
case 10:
e.r[this.a] = t(e.r[this.c]);
break;
case 11:
e.r[this.a] = e.r[this.c] in e.r[this.n];
break;
case 12:
e.r[this.a] = e.r[this.c] > e.r[this.n];
break;
case 13:
e.r[this.a] = -e.r[this.c];
break;
case 14:
e.r[this.a] = e.r[this.c] < e.r[this.n];
break;
case 15:
e.r[this.a] = e.r[this.c] & e.r[this.n];
break;
case 16:
e.r[this.a] = e.r[this.c] ^ e.r[this.n];
break;
case 17:
e.r[this.a] = e.r[this.c] << e.r[this.n];
break;
case 18:
e.r[this.a] = e.r[this.c] >>> e.r[this.n];
break;
case 19:
e.r[this.a] = e.r[this.c] | e.r[this.n];
break;
case 20:
e.r[this.a] = !e.r[this.c]
}
}
,
c.prototype.e = function (e) {
e.Q.push(e.C),
e.B.push(e.k),
e.C = e.r[this.s],
e.k = [];
for (var t = 0; t < this.i; t++)
e.k.unshift(e.f.pop());
e.g.push(e.f),
e.f = []
}
,
n.prototype.e = function (e) {
e.C = e.Q.pop(),
e.k = e.B.pop(),
e.f = e.g.pop()
}
,
e.prototype.e = function (e) {
switch (this.t) {
case 0:
e.u = e.r[this.a] >= e.r[this.c];
break;
case 1:
e.u = e.r[this.a] <= e.r[this.c];
break;
case 2:
e.u = e.r[this.a] > e.r[this.c];
break;
case 3:
e.u = e.r[this.a] < e.r[this.c];
break;
case 4:
e.u = e.r[this.a] == e.r[this.c];
break;
case 5:
e.u = e.r[this.a] != e.r[this.c];
break;
case 6:
e.u = e.r[this.a];
break;
case 7:
e.u = !e.r[this.a]
}
}
,
o.prototype.e = function (e) {
switch (this.t) {
case 0:
e.C = this.h;
break;
case 1:
e.u && (e.C = this.h);
break;
case 2:
e.u || (e.C = this.h);
break;
case 3:
e.C = this.h,
e.w = null
}
e.u = !1
}
,
r.prototype.e = function (e) {
switch (this.t) {
case 0:
for (var t = [], n = 0; n < this.i; n++)
t.unshift(e.f.pop());
e.r[3] = e.r[this.s](t[0], t[1]);
break;
case 1:
for (var r = e.f.pop(), o = [], i = 0; i < this.i; i++)
o.unshift(e.f.pop());
e.r[3] = e.r[this.s][r](o[0], o[1]);
break;
case 2:
for (var a = [], c = 0; c < this.i; c++)
a.unshift(e.f.pop());
e.r[3] = new e.r[this.s](a[0], a[1])
}
}
;
var k = function (e) {
for (var t = 66, n = [], r = 0; r < e.length; r++) {
var o = 24 ^ e.charCodeAt(r) ^ t;
n.push(String.fromCharCode(o)),
t = o
}
return n.join("")
};
function Q(e) {
this.t = (4095 & e) >> 10,
this.s = (1023 & e) >> 8,
this.i = 1023 & e,
this.h = 63 & e
}
function C(e) {
this.t = (4095 & e) >> 10,
this.a = (1023 & e) >> 8,
this.c = (255 & e) >> 6
}
function B(e) {
this.s = (3072 & e) >> 10,
this.h = 1023 & e
}
function f(e) {
this.h = 4095 & e
}
function g(e) {
this.s = (3072 & e) >> 10
}
function u(e) {
this.h = 4095 & e
}
function w(e) {
this.t = (3840 & e) >> 8,
this.s = (192 & e) >> 6,
this.i = 63 & e
}
function G() {
this.r = [0, 0, 0, 0],
this.C = 0,
this.Q = [],
this.k = [],
this.B = [],
this.f = [],
this.g = [],
this.u = !1,
this.G = [],
this.b = [],
this.o = !1,
this.w = null,
this.U = null,
this.F = [],
this.R = 0,
this.J = {
0: s,
1: i,
2: h,
3: a,
4: c,
5: n,
6: e,
7: o,
8: r,
9: Q,
10: C,
11: B,
12: f,
13: g,
14: u,
15: w
}
}
Q.prototype.e = function (e) {
switch (this.t) {
case 0:
e.f.push(e.r[this.s]);
break;
case 1:
e.f.push(this.i);
break;
case 2:
e.f.push(e.k[this.h]);
break;
case 3:
e.f.push(k(e.b[this.h]))
}
}
,
C.prototype.e = function (A) {
switch (this.t) {
case 0:
var t = A.f.pop();
A.r[this.a] = A.r[this.c][t];
break;
case 1:
var s = A.f.pop()
, i = A.f.pop();
A.r[this.c][s] = i;
break;
case 2:
var h = A.f.pop();
A.r[this.a] = eval(h)
}
}
,
B.prototype.e = function (e) {
e.r[this.s] = k(e.b[this.h])
}
,
f.prototype.e = function (e) {
e.w = this.h
}
,
g.prototype.e = function (e) {
throw e.r[this.s]
}
,
u.prototype.e = function (e) {
var t = this
, n = [0];
e.k.forEach(function (e) {
n.push(e)
}
);
var r = function (r) {
var o = new G;
return o.k = n,
o.k[0] = r,
o.v(e.G, t.h, e.b, e.F),
o.r[3]
};
r.toString = function () {
return "() { [native code] }"
}
,
e.r[3] = r
}
,
w.prototype.e = function (e) {
switch (this.t) {
case 0:
for (var t = {}, n = 0; n < this.i; n++) {
var r = e.f.pop();
t[e.f.pop()] = r
}
e.r[this.s] = t;
break;
case 1:
for (var o = [], i = 0; i < this.i; i++)
o.unshift(e.f.pop());
e.r[this.s] = o
}
}
,
G.prototype.D = function (e) {
for (var t = window.atob(e), n = t.charCodeAt(0) << 8 | t.charCodeAt(1), r = [], o = 2; o < n + 2; o += 2)
r.push(t.charCodeAt(o) << 8 | t.charCodeAt(o + 1));
this.G = r;
for (var i = [], a = n + 2; a < t.length;) {
var c = t.charCodeAt(a) << 8 | t.charCodeAt(a + 1)
, u = t.slice(a + 2, a + 2 + c);
i.push(u),
a += c + 2
}
this.b = i
}
,
G.prototype.v = function (e, t, n) {
for (t = t || 0,
n = n || [],
this.C = t,
"string" == typeof e ? this.D(e) : (this.G = e,
this.b = n),
this.o = !0,
this.R = Date.now(); this.o;) {
var r = this.G[this.C++];
if ("number" != typeof r)
break;
var o = Date.now();
if (500 < o - this.R)
return;
this.R = o;
try {
this.e(r)
} catch (e) {
this.U = e,
this.w && (this.C = this.w)
}
}
}
,
G.prototype.e = function (e) {
var t = (61440 & e) >> 12;
new this.J[t](e).e(this)
}
,
(new G).v("AxjgB5MAnACoAJwBpAAAABAAIAKcAqgAMAq0AzRJZAZwUpwCqACQACACGAKcBKAAIAOcBagAIAQYAjAUGgKcBqFAuAc5hTSHZAZwqrAIGgA0QJEAJAAYAzAUGgOcCaFANRQ0R2QGcOKwChoANECRACQAsAuQABgDnAmgAJwMgAGcDYwFEAAzBmAGcSqwDhoANECRACQAGAKcD6AAGgKcEKFANEcYApwRoAAxB2AGcXKwEhoANECRACQAGAKcE6AAGgKcFKFANEdkBnGqsBUaADRAkQAkABgCnBagAGAGcdKwFxoANECRACQAGAKcGKAAYAZx+rAZGgA0QJEAJAAYA5waoABgBnIisBsaADRAkQAkABgCnBygABoCnB2hQDRHZAZyWrAeGgA0QJEAJAAYBJwfoAAwFGAGcoawIBoANECRACQAGAOQALAJkAAYBJwfgAlsBnK+sCEaADRAkQAkABgDkACwGpAAGAScH4AJbAZy9rAiGgA0QJEAJACwI5AAGAScH6AAkACcJKgAnCWgAJwmoACcJ4AFnA2MBRAAMw5gBnNasCgaADRAkQAkABgBEio0R5EAJAGwKSAFGACcKqAAEgM0RCQGGAYSATRFZAZzshgAtCs0QCQAGAYSAjRFZAZz1hgAtCw0QCQAEAAgB7AtIAgYAJwqoAASATRBJAkYCRIANEZkBnYqEAgaBxQBOYAoBxQEOYQ0giQKGAmQABgAnC6ABRgBGgo0UhD/MQ8zECALEAgaBxQBOYAoBxQEOYQ0gpEAJAoYARoKNFIQ/zEPkAAgChgLGgkUATmBkgAaAJwuhAUaCjdQFAg5kTSTJAsQCBoHFAE5gCgHFAQ5hDSCkQAkChgBGgo0UhD/MQ+QACAKGAsaCRQCOYGSABoAnC6EBRoKN1AUEDmRNJMkCxgFGgsUPzmPkgAaCJwvhAU0wCQFGAUaCxQGOZISPzZPkQAaCJwvhAU0wCQFGAUaCxQMOZISPzZPkQAaCJwvhAU0wCQFGAUaCxQSOZISPzZPkQAaCJwvhAU0wCQFGAkSAzRBJAlz/B4FUAAAAwUYIAAIBSITFQkTERwABi0GHxITAAAJLwMSGRsXHxMZAAk0Fw8HFh4NAwUABhU1EBceDwAENBcUEAAGNBkTGRcBAAFKAAkvHg4PKz4aEwIAAUsACDIVHB0QEQ4YAAsuAzs7AAoPKToKDgAHMx8SGQUvMQABSAALORoVGCQgERcCAxoACAU3ABEXAgMaAAsFGDcAERcCAxoUCgABSQAGOA8LGBsPAAYYLwsYGw8AAU4ABD8QHAUAAU8ABSkbCQ4BAAFMAAktCh8eDgMHCw8AAU0ADT4TGjQsGQMaFA0FHhkAFz4TGjQsGQMaFA0FHhk1NBkCHgUbGBEPAAFCABg9GgkjIAEmOgUHDQ8eFSU5DggJAwEcAwUAAUMAAUAAAUEADQEtFw0FBwtdWxQTGSAACBwrAxUPBR4ZAAkqGgUDAwMVEQ0ACC4DJD8eAx8RAAQ5GhUYAAFGAAAABjYRExELBAACWhgAAVoAQAg/PTw0NxcQPCQ5C3JZEBs9fkcnDRcUAXZia0Q4EhQgXHojMBY3MWVCNT0uDhMXcGQ7AUFPHigkQUwQFkhaAkEACjkTEQspNBMZPC0ABjkTEQsrLQ==");
// exejs执行
function b(e) {
result = __g._encrypt(encodeURIComponent(e))
console.log(result)
return __g._encrypt(encodeURIComponent(e))
}
//node执行
result = __g._encrypt(encodeURIComponent(e))
console.log(result)
requests就是最吊的, 有些网站我用urllib爬返回503用requests爬就是正常的
我也会批量出现403,我是scrapy写的,抓一会就出现403
把get方法改成post方法再试试。