import urllib.request import re # 请求头信息,字段名:字段值 格式:header = {"user-agent",具体用户代理值} headers = {"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Mobile Safari/537.36"} opener = urllib.request.build_opener() url = "http://www.xiaohuabang.cn/duanzi/hunduanzi/12800.htm" # 把header赋给opener.addheaders opener.addheaders = [headers] # 安装为全局 urllib.request.install_opener(opener) for i in range(12800,12810): # 实现自动翻页网址拼接 this_url = "http://www.xiaohuabang.cn/duanzi/hunduanzi/"+str(i+1)+".htm" # # 'utf-8','ignore' data = urllib.request.urlopen(this_url).read().decode("utf-8") print(data)
headers应该是一个元组而不是字典,参考 https://blog.csdn.net/qdPython/article/details/108682245
顺便一提,学爬虫不推荐使用urllib,建议使用requests