# -*- coding = urf-8 -*-
import urllib.request # 制定URL,获得网页数据
def main():
baseurl = 'https://movie.douban.com/top250?start=0'
# 1.爬取网页
datalist = getData(baseurl)
savepath = ".\\豆瓣top250.xls"
# saveData(savepath)
askURL("https://movie.douban.com/top250?start=0")
# 爬取网页
def getData(baseurl):
datalist = []
# 2.解析数据
return datalist
# 指定一个URL的网页内容
def askURL(url):
# 模拟头部信息,像豆瓣服务器发送消息
head = {
"User-Agent"": Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Mobile Safari/537.36"
}
# 用户代理告诉豆瓣服务器,我们是什么类型的机器、浏览器(本质上是高速浏览器我能接受什么类型的数据)
request = urllib.request.Request(url, headers=head)
html = ""
try:
response = urllib.request.urlopen(request)
html = response.read().decode("utf-8")
except Exception as e:
print(e)
return html
# 3.保存数据
def saveData(savepath):
print('saving ...')
if __name__ == "__main__":
main()
Traceback (most recent call last):
File "D:\pythonworld\codecode\pythonscrpit\weishabudui.py", line 49, in <module>
main()
File "D:\pythonworld\codecode\pythonscrpit\weishabudui.py", line 15, in main
askURL("https://movie.douban.com/top250?start=0")
File "D:\pythonworld\codecode\pythonscrpit\weishabudui.py", line 30, in askURL
request = urllib.request.Request(url, headers=head)
File "C:\Users\30295\AppData\Local\Programs\Python\Python39\lib\urllib\request.py", line 326, in __init__
for key, value in headers.items():
AttributeError: 'set' object has no attribute 'items'
Process finished with exit code 1
补充一下,文中30行报错应该是29行,粘贴过来时候把前面不需要的库删掉了。不好意思哈