import requests
import re
#爬取十个页面
url="https://movie.douban.com/top250?start={}"
for i in range(10):
start = i * 25
a = url.format(start)
headers={"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.55"}
r=requests.get(a,headers=headers)
b=r.text
# 解析网页
obj=re.compile(r'<li>.*?<div class="item">.*?<span class="title">(?P<name>.*?)</span>',re.S)
result=obj.finditer(b)
for it in result:
print(it.group("name"))
因为解析语句没有在for循环当中,下面这样就可以
有帮助,望采纳!
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author: YangPC
@QQ:327844761
@微信公众号:ewbang
"""
import requests
import re
# 爬取十个页面
url = "https://movie.douban.com/top250?start={}"
for i in range(10):
start = i * 25
a = url.format(start)
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.55"}
r = requests.get(a, headers=headers)
b = r.text
# 解析网页
obj = re.compile(r'<li>.*?<div class="item">.*?<span class="title">(?P<name>.*?)</span>', re.S)
result = obj.finditer(b)
for it in result:
print(it.group("name"))
你这个代码有问题吧,解析网页下面的代码块要和上面的for..range对齐
import requests
import re
#爬取十个页面
url="https://movie.douban.com/top250?start={}"
index=1
for i in range(10):
start = i * 25
a = url.format(start)
headers={"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.55"}
r=requests.get(a,headers=headers)
b=r.text
# 解析网页
obj=re.compile(r'<li>.*?<div class="item">.*?<span class="title">(?P<name>.*?)</span>',re.S)
result=obj.finditer(b)
for it in result:
print(str(index)+"."+it.group("name"))
index+=1