有道翻译爬虫:
方法一:
导入JS加密代码进行加密(md5也写在JS里面的),翻译英语单词正常,翻译汉语报错,百度说被识别出是爬虫,但是不知道翻译英汉的区别,所以不知道原因在哪里。
var hexcase = 0;
var b64pad = "";
var chrsz = 8;
function hex_md5(s) {
return binl2hex(core_md5(str2binl(s), s.length * chrsz));
}
function b64_md5(s) {
return binl2b64(core_md5(str2binl(s), s.length * chrsz));
}
function hex_hmac_md5(key, data) {
return binl2hex(core_hmac_md5(key, data));
}
function b64_hmac_md5(key, data) {
return binl2b64(core_hmac_md5(key, data));
}
function calcMD5(s) {
return binl2hex(core_md5(str2binl(s), s.length * chrsz));
}
function md5_vm_test() {
return hex_md5("abc") == "900150983cd24fb0d6963f7d28e17f72";
}
function core_md5(x, len) {
x[len >> 5] |= 0x80 << ((len) % 32);
x[(((len + 64) >>> 9) << 4) + 14] = len;
var a = 1732584193;
var b = -271733879;
var c = -1732584194;
var d = 271733878;
for (var i = 0; i < x.length; i += 16) {
var olda = a;
var oldb = b;
var oldc = c;
var oldd = d;
a = md5_ff(a, b, c, d, x[i + 0], 7, -680876936);
d = md5_ff(d, a, b, c, x[i + 1], 12, -389564586);
c = md5_ff(c, d, a, b, x[i + 2], 17, 606105819);
b = md5_ff(b, c, d, a, x[i + 3], 22, -1044525330);
a = md5_ff(a, b, c, d, x[i + 4], 7, -176418897);
d = md5_ff(d, a, b, c, x[i + 5], 12, 1200080426);
c = md5_ff(c, d, a, b, x[i + 6], 17, -1473231341);
b = md5_ff(b, c, d, a, x[i + 7], 22, -45705983);
a = md5_ff(a, b, c, d, x[i + 8], 7, 1770035416);
d = md5_ff(d, a, b, c, x[i + 9], 12, -1958414417);
c = md5_ff(c, d, a, b, x[i + 10], 17, -42063);
b = md5_ff(b, c, d, a, x[i + 11], 22, -1990404162);
a = md5_ff(a, b, c, d, x[i + 12], 7, 1804603682);
d = md5_ff(d, a, b, c, x[i + 13], 12, -40341101);
c = md5_ff(c, d, a, b, x[i + 14], 17, -1502002290);
b = md5_ff(b, c, d, a, x[i + 15], 22, 1236535329);
a = md5_gg(a, b, c, d, x[i + 1], 5, -165796510);
d = md5_gg(d, a, b, c, x[i + 6], 9, -1069501632);
c = md5_gg(c, d, a, b, x[i + 11], 14, 643717713);
b = md5_gg(b, c, d, a, x[i + 0], 20, -373897302);
a = md5_gg(a, b, c, d, x[i + 5], 5, -701558691);
d = md5_gg(d, a, b, c, x[i + 10], 9, 38016083);
c = md5_gg(c, d, a, b, x[i + 15], 14, -660478335);
b = md5_gg(b, c, d, a, x[i + 4], 20, -405537848);
a = md5_gg(a, b, c, d, x[i + 9], 5, 568446438);
d = md5_gg(d, a, b, c, x[i + 14], 9, -1019803690);
c = md5_gg(c, d, a, b, x[i + 3], 14, -187363961);
b = md5_gg(b, c, d, a, x[i + 8], 20, 1163531501);
a = md5_gg(a, b, c, d, x[i + 13], 5, -1444681467);
d = md5_gg(d, a, b, c, x[i + 2], 9, -51403784);
c = md5_gg(c, d, a, b, x[i + 7], 14, 1735328473);
b = md5_gg(b, c, d, a, x[i + 12], 20, -1926607734);
a = md5_hh(a, b, c, d, x[i + 5], 4, -378558);
d = md5_hh(d, a, b, c, x[i + 8], 11, -2022574463);
c = md5_hh(c, d, a, b, x[i + 11], 16, 1839030562);
b = md5_hh(b, c, d, a, x[i + 14], 23, -35309556);
a = md5_hh(a, b, c, d, x[i + 1], 4, -1530992060);
d = md5_hh(d, a, b, c, x[i + 4], 11, 1272893353);
c = md5_hh(c, d, a, b, x[i + 7], 16, -155497632);
b = md5_hh(b, c, d, a, x[i + 10], 23, -1094730640);
a = md5_hh(a, b, c, d, x[i + 13], 4, 681279174);
d = md5_hh(d, a, b, c, x[i + 0], 11, -358537222);
c = md5_hh(c, d, a, b, x[i + 3], 16, -722521979);
b = md5_hh(b, c, d, a, x[i + 6], 23, 76029189);
a = md5_hh(a, b, c, d, x[i + 9], 4, -640364487);
d = md5_hh(d, a, b, c, x[i + 12], 11, -421815835);
c = md5_hh(c, d, a, b, x[i + 15], 16, 530742520);
b = md5_hh(b, c, d, a, x[i + 2], 23, -995338651);
a = md5_ii(a, b, c, d, x[i + 0], 6, -198630844);
d = md5_ii(d, a, b, c, x[i + 7], 10, 1126891415);
c = md5_ii(c, d, a, b, x[i + 14], 15, -1416354905);
b = md5_ii(b, c, d, a, x[i + 5], 21, -57434055);
a = md5_ii(a, b, c, d, x[i + 12], 6, 1700485571);
d = md5_ii(d, a, b, c, x[i + 3], 10, -1894986606);
c = md5_ii(c, d, a, b, x[i + 10], 15, -1051523);
b = md5_ii(b, c, d, a, x[i + 1], 21, -2054922799);
a = md5_ii(a, b, c, d, x[i + 8], 6, 1873313359);
d = md5_ii(d, a, b, c, x[i + 15], 10, -30611744);
c = md5_ii(c, d, a, b, x[i + 6], 15, -1560198380);
b = md5_ii(b, c, d, a, x[i + 13], 21, 1309151649);
a = md5_ii(a, b, c, d, x[i + 4], 6, -145523070);
d = md5_ii(d, a, b, c, x[i + 11], 10, -1120210379);
c = md5_ii(c, d, a, b, x[i + 2], 15, 718787259);
b = md5_ii(b, c, d, a, x[i + 9], 21, -343485551);
a = safe_add(a, olda);
b = safe_add(b, oldb);
c = safe_add(c, oldc);
d = safe_add(d, oldd);
}
return Array(a, b, c, d);
}
function md5_cmn(q, a, b, x, s, t) {
return safe_add(bit_rol(safe_add(safe_add(a, q), safe_add(x, t)), s), b);
}
function md5_ff(a, b, c, d, x, s, t) {
return md5_cmn((b & c) | ((~b) & d), a, b, x, s, t);
}
function md5_gg(a, b, c, d, x, s, t) {
return md5_cmn((b & d) | (c & (~d)), a, b, x, s, t);
}
function md5_hh(a, b, c, d, x, s, t) {
return md5_cmn(b ^ c ^ d, a, b, x, s, t);
}
function md5_ii(a, b, c, d, x, s, t) {
return md5_cmn(c ^ (b | (~d)), a, b, x, s, t);
}
function core_hmac_md5(key, data) {
var bkey = str2binl(key);
if (bkey.length > 16) bkey = core_md5(bkey, key.length * chrsz);
var ipad = Array(16),
opad = Array(16);
for (var i = 0; i < 16; i++) {
ipad[i] = bkey[i] ^ 0x36363636;
opad[i] = bkey[i] ^ 0x5C5C5C5C;
}
var hash = core_md5(ipad.concat(str2binl(data)), 512 + data.length * chrsz);
return core_md5(opad.concat(hash), 512 + 128);
}
function safe_add(x, y) {
var lsw = (x & 0xFFFF) + (y & 0xFFFF);
var msw = (x >> 16) + (y >> 16) + (lsw >> 16);
return (msw << 16) | (lsw & 0xFFFF);
}
function bit_rol(num, cnt) {
return (num << cnt) | (num >>> (32 - cnt));
}
function str2binl(str) {
var bin = Array();
var mask = (1 << chrsz) - 1;
for (var i = 0; i < str.length * chrsz; i += chrsz) bin[i >> 5] |= (str.charCodeAt(i / chrsz) & mask) << (i % 32);
return bin;
}
function binl2hex(binarray) {
var hex_tab = hexcase ? "0123456789ABCDEF": "0123456789abcdef";
var str = "";
for (var i = 0; i < binarray.length * 4; i++) {
str += hex_tab.charAt((binarray[i >> 2] >> ((i % 4) * 8 + 4)) & 0xF) + hex_tab.charAt((binarray[i >> 2] >> ((i % 4) * 8)) & 0xF);
}
return str;
}
function binl2b64(binarray) {
var tab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
var str = "";
for (var i = 0; i < binarray.length * 4; i += 3) {
var triplet = (((binarray[i >> 2] >> 8 * (i % 4)) & 0xFF) << 16) | (((binarray[i + 1 >> 2] >> 8 * ((i + 1) % 4)) & 0xFF) << 8) | ((binarray[i + 2 >> 2] >> 8 * ((i + 2) % 4)) & 0xFF);
for (var j = 0; j < 4; j++) {
if (i * 8 + j * 6 > binarray.length * 32) str += b64pad;
else str += tab.charAt((triplet >> 6 * (3 - j)) & 0x3F);
}
}
return str;
};
function getData(word, userAgent) {
var t = hex_md5(userAgent),
r = "" + (new Date).getTime(),
i = r + parseInt(10 * Math.random(), 10);
//return md5("fanyideskweb" + word + i + "Y2FYu%TNSbMCxc3t2u^XT")
return {
ts: r,
bv: t,
salt: i,
sign: hex_md5("fanyideskweb" + word + i + "Y2FYu%TNSbMCxc3t2u^XT")
};
};
import execjs,requests
headers={
"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Length": "237",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Cookie": "OUTFOX_SEARCH_USER_ID=-1602850490@10.169.0.84; OUTFOX_SEARCH_USER_ID_NCOO=1987462289.855914; JSESSIONID=aaa-O50JGlWWl46CCC_Xx; ___rl__test__cookies=1634213107363",
"Host": "fanyi.youdao.com",
"Origin": "https://fanyi.youdao.com",
"Pragma": "no-cache",
"Referer": "https://fanyi.youdao.com/",
"sec-ch-ua-mobile": "?0",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
}
userAgent='5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36'
url='https://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
node=execjs.get()
while True:
word = input('词语:')
etc=node.compile(open('YouDao.js',encoding='utf-8').read())
functionName='getData("{0}","{1}")'.format(word,userAgent)
data=etc.eval(functionName)
data={
"i": word,
"from": "AUTO",
"to": "AUTO",
"smartresult": "dict",
"client": "fanyideskweb",
"salt": data['salt'],
"sign": data['sign'],
"lts": data['ts'],
"bv": data['bv'],
"doctype": "json",
"version": "2.1",
"keyfrom": "fanyi.web",
"action": "FY_BY_REALTlME",
}
print(data)
res=requests.post(url,headers=headers,data=data).json()
print(res)
print('word:',res['translateResult'][0][0]['tgt'])
方法二:全过程全部用Python完成,翻译英汉都正常
'''有道js加密反爬'''
import requests,hashlib,time,random
''' var r = function(e) {
var t = n.md5("5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36")
, r = "" + int(time.time()*1000)
, i = r + int(random.randint(0,9));
return {
ts: r,
bv: t,
salt: i,
sign: n.md5("fanyideskweb" + e + i + "Y2FYu%TNSbMCxc3t2u^XT")
}'''
#加密函数
def encrypt(str):
n=hashlib.md5()
n.update(str.encode())
str=n.hexdigest()
return str
class YouDaoSpider:
def __init__(self):
self.url='https://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
self.t= encrypt("5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36")
self.r=''+str(int(time.time()*1000))
self.i=self.r+str(int(random.randint(0,9)))
self.headers={
"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive",
"Content-Length": "252",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Cookie": "OUTFOX_SEARCH_USER_ID=1071802923@10.108.160.100; OUTFOX_SEARCH_USER_ID_NCOO=1234791700.07841; JSESSIONID=aaaR4_eMtXgQwc1_IoWSx; ___rl__test__cookies=1628601756663",
"Host": "fanyi.youdao.com",
"Origin": "https://fanyi.youdao.com",
"Referer": "https://fanyi.youdao.com/",
"sec-ch-ua-mobile": "?0",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
}
def post_html(self):
self.data = {
"i": self.word,
"from": "AUTO",
"to": "AUTO",
"smartresult": "dict",
"client": "fanyideskweb",
"salt": self.i,
"sign": encrypt("fanyideskweb" + self.e + self.i + "Y2FYu%TNSbMCxc3t2u^XT"),
"lts": self.r,
"bv": self.t,
"doctype": "json",
"version": "2.1",
"keyfrom": "fanyi.web",
"action": "FY_BY_REALTlME",
}
res=requests.post(url=self.url,data=self.data,headers=self.headers).json()
return res
def params_html(self,res):
mean=res['translateResult'][0][0]['tgt']
return mean
def run(self):
self.e = self.word = input('请输入词汇:')
res=self.post_html()
mean=self.params_html(res)
print(self.e)
print('mean:',mean)
# if __name__=='__main__':
# spider=YouDaoSpider()
# spider.run()
spider1 = YouDaoSpider()
while True:
spider1.run()