用python读取顶级域名时发生Forbidden问题,也在网上找了好多相关问题,好多说伪装成浏览器就好了,但是加上伪装头之后还会出像这样的问题
def get_ipbelong(this_url):
global data
url = 'http://ip.tool.chinaz.com/' + this_url
req = urllib.request.Request(url) # 创建一个Request对象
req.add_header('User-Agent',
'Mozilla/5.0(Windows NT 10.0 Win64 x64) AppleWebKit/537.36(KHTML, like Gecko)'
'Chrome/96.0.4664.93 Safari/537.36')
#'Mozilla/5.0 (Windows NT 10.0 Win64 x64) AppleWebKit/537.36 (KHTML, like Gecko)'
try:
data = urllib.request.urlopen(req, timeout=10).read().decode('utf-8', errors='ignore')
except socket.timeout as e:
print(type(e))
info_list = re.findall('w50-0">(.*?)</span>', data)
for i in range(len(info_list)):
info_list[i] = info_list[i].strip()
if len(info_list) == 0:
geo = 'None'
else:
geo = info_list[i]
return geo
Traceback (most recent call last):
File "D:/我/3.py", line 430, in
update_dic(csv_path, old_dic)
File "D:/我/3.py", line 89, in update_dic
ip_geo = get_ipbelong(domain_list[i])
File "D:/我/3.py", line 51, in get_ipbelong
data = urllib.request.urlopen(req, timeout=10).read().decode('utf-8', errors='ignore')
File "C:\Users\DELL\AppData\Local\Programs\Python\Python38\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\DELL\AppData\Local\Programs\Python\Python38\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Users\DELL\AppData\Local\Programs\Python\Python38\lib\urllib\request.py", line 640, in http_response
response = self.parent.error(
File "C:\Users\DELL\AppData\Local\Programs\Python\Python38\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
File "C:\Users\DELL\AppData\Local\Programs\Python\Python38\lib\urllib\request.py", line 502, in _call_chain
result = func(*args)
File "C:\Users\DELL\AppData\Local\Programs\Python\Python38\lib\urllib\request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
添加user-agent之后还是出现同样的问题
传递请求头这样写:
def get_ipbelong(this_url):
global data
url = 'http://ip.tool.chinaz.com/' + this_url
headers={'User-Agent':
'Mozilla/5.0(Windows NT 10.0 Win64 x64) AppleWebKit/537.36(KHTML, like Gecko)'
'Chrome/96.0.4664.93 Safari/537.36'}
#'Mozilla/5.0 (Windows NT 10.0 Win64 x64) AppleWebKit/537.36 (KHTML, like Gecko)'
req = urllib.request.Request(url,headers=headers) # 创建一个Request对象
try:
data = urllib.request.urlopen(req,timeout=20).read().decode('utf-8', errors='ignore')
...