python3.4.1中,爬虫出现UnicodeEncodeError。

python3.4.1中,出现UnicodeEncodeError: 'ascii' codec can't encode characters in position 36-39: ordinal not in range(128)
以下是编写的代码:
import urllib.request
import urllib.parse
from bs4 import BeautifulSoup
import re
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
response = urllib.request.urlopen("http://baike.baidu.com/search/word?%s" % keyword)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
for each in soup.find_all(href=re.compile("view")):
content = ''.join([each.text])
url2 = ''.join(["http://baike.baidu.com", each["href"]])
response2 = urllib.request.urlopen(url2)
html2 = response2.read()
soup2 = BeautifulSoup(html2, "html.parser")
if soup2.h2:
content = ''.join([content, soup2.h2.text])
content = ''.join([content, " -> ", url2])
print(content)

if name=="__main__":
main()

https://blog.csdn.net/weixin_43196541/article/details/82687595

楼主的代码很多问题,http://baike.baidu.com/search/word?%s 网址应该是错的,现在很少有http的了,都是https,爬虫最好用requests这个模块,这个会比urllib优化很多,在爬网址的时候,记得要加请求头headers,for 前面的代码改好了


import urllib.request
import urllib.parse
from bs4 import BeautifulSoup
import re
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)  ###禁止提醒SSL警告

def main():
    keyword = input("请输入关键词:")
    url=r'https://baike.baidu.com/search?word={}'.format(keyword)
    headers={
        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding':'gzip, deflate, br',
        'Accept-Language':'zh-CN,zh;q=0.9',
        'Connection':'keep-alive',
        'Host':'baike.baidu.com',
        'Upgrade-Insecure-Requests':'1',
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.15 Safari/537.36',
    }
    html = requests.get(url=url,headers=headers,verify=False).text
    print(html)
    soup = BeautifulSoup(html, "html.parser")
    for each in soup.find_all(href=re.compile("view")):
        content = ''.join([each.text])
        url2 = ''.join(["http://baike.baidu.com", each["href"]])
        response2 = urllib.request.urlopen(url2)
        html2 = response2.read()
        soup2 = BeautifulSoup(html2, "html.parser")
        if soup2.h2:
            content = ''.join([content, soup2.h2.text])
            content = ''.join([content, " -> ", url2])
            print(content)

if __name__ == '__main__':
    main()