请问怎么解决
乱码
var arg1='C12B102D71A71E5586776D8F309D4D7F4D44AFE5';
var _0x4818=['\x63\x73\x4b\x48\x77\x71\x4d\x49','\x5a\x73\x4b\x4a\x77\x72\x38\x56\x65\x41\x73\x79','\x55\x63\x4b\x69\x4e\x38\x4f\x2f\x77\x70\x6c\x77\x4d\x41\x3d\x3d','\x4a\x52\x38\x43\x54\x67\x3d\x3d','\x59\x73\x4f\x6e\x62\x53\x45\x51\x77\x37\x6f\x7a\x77\x71\x5a\x4b\x65\x73\x4b\x55\x77\x37\x6b\x77\x58\x38\x4f\x52\x49\x51\x3d\x3d','\x77\x37\x6f\x56\x53\x38\x4f\x53\x77\x6f\x50\x43\x6c\x33\x6a\x43\x68\x4d\x4b\x68\x77\x36\x48\x44\x6c\x73\x4b\x58\x77\x34\x73\x2f\x59\x73\x4f\x47','\x66\x77\x56\x6d\x49\x31\x41\x74\x77\x70\x6c\x61\x59\x38\x4f\x74\x77\x35\x63\x4e\x66\x53\x67\x70\x77\x36\x4d\x3d','\x4f\x63\x4f\x4e\x77\x72\x6a\x43\x71\x73\x4b\x78\x54\x47\x54\x43\x68\x73\x4f\x6a\x45\x57\x45\x38\x50\x63\x4f\x63\x4a\x38\x4b\x36','\x55\x38\x4b\x35\x4c\x63\x4f\x74\x77\x70\x56\x30\x45\x4d\x4f\x6b\x77\x34\x37\x44\x72\x4d\x4f\x58','\x48\x4d\x4f\x32\x77\x6f\x48\x43\x69\x4d\x4b\x39\x53\x6c\x58\x43\x6c\x63\x4f\x6f\x43\x31\x6b\x3d','\x61\x73\x4b\x49\x77\x71\x4d\x44\x64\x67\x4d\x75\x50\x73\x4f\x4b\x42\x4d\x4b\x63\x77\x72\x72\x43\x74\x6b\x4c\x44\x72\x4d\x4b\x42\x77\x36\x34\x64','\x77\x71\x49\x6d\x4d\x54\x30\x74\x77\x36\x52\x4e\x77\x35\x6b\x3d','\x44\x4d\x4b\x63\x55\x30\x4a\x6d\x55\x77\x55\x76','\x56\x6a\x48\x44\x6c\x4d\x4f\x48\x56\x63\x4f\x4e\x58\x33\x66\x44\x69\x63\x4b\x4a\x48\x51\x3d\x3d','\x77\x71\x68\x42\x48\x38\x4b\x6e\x77\x34\x54\x44\x68\x53\x44\x44\x67\x4d\x4f\x64\x77\x72\x6a\x43\x6e\x63\x4f\x57\x77\x70\x68\x68\x4e\x38\x4b\x43\x47\x63\x4b\x71\x77\x36\x64\x48\x41\x55\x35\x2b\x77\x72\x67\x32\x4a\x63\x4b\x61\x77\x34\x49\x45
代码
import requests
a=requests.get('https://ks.wjx.top/vm/OtbtvY3.aspx# ')
a.encoding =a.apparent_encoding
print(a.text)
这个乱码是因为网站使用了 JavaScript 加密,需要使用浏览器模拟执行 JavaScript 代码才能获取到正确的数据。可以使用 Python 的 Selenium 库来模拟浏览器操作,具体步骤如下:
以下是一个示例代码,使用 Chrome 浏览器和 ChromeDriver 驱动:
from selenium import webdriver
import time
import re
# 打开 Chrome 浏览器
driver = webdriver.Chrome()
# 打开目标网页
driver.get('https://ks.wjx.top/vm/OtbtvY3.aspx#')
# 等待页面加载完成
time.sleep(5)
# 执行 JavaScript 代码,获取加密后的数据
data = driver.execute_script('return arg1;')
# 对加密后的数据进行解密,获取原始数据
pattern = re.compile(r'\w{2}')
result = ''.join([chr(int(x, 16)) for x in pattern.findall(data)])
print(result)
# 关闭浏览器
driver.quit()
输出的结果是原始数据,可以根据需要进行进一步处理。
import requests
import urllib3
urllib3.disable_warnings()
url = "https://ks.wjx.top/vm/OtbtvY3.aspx#"
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
'Accept-Encoding': 'gzip, deflate, br',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
}
response = requests.get(url, headers=headers, verify=False)
if response.headers.get('Content-Encoding') == 'gzip':
content = response.content.decode('UTF-8')
else:
content = response.content.decode(response.encoding)
print(content)
首先,可以尝试将编码方式设置为网页的实际编码方式,而不是默认的'utf-8'编码方式。可以使用chardet库确定网页的编码方式。
示例代码:
import requests
import chardet
# 下载网页
url = 'https://ks.wjx.top/vm/OtbtvY3.aspx#'
r = requests.get(url)
# 确定网页的编码方式
charset = chardet.detect(r.content)['encoding']
# 重新设置编码方式
r.encoding = charset
# 输出网页文本
print(r.text)
对结果进行编解码就好了