目标网站:https://sc.chinaz.com/yinxiao/
需求:
1、翻页爬网页上的音乐名字,音乐链接
2、保存到csv
import requests
from lxml import etree
import csv
start = int(input('请输入你的起始页:'))
end = int(input('请输入你的结束页:'))
for k in range(start, end+1):
url = 'https://sc.chinaz.com/yinxiao/index_{}.html'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'}
response = requests.get(url=url, headers=headers)
response.encoding = 'utf-8'
data = response.text
html = etree.HTML(data)
div_tag = html.xpath('//div[@class="right-head"]/a')
lis = []
for a in div_tag:
name = a.xpath('./p/text()')
href = a.xpath('./@href')
# print(name, href)
name = [s.strip() for s in name]
for i in zip(name, href):
dic = {}
dic['name'] = i[0]
dic['href'] = 'https://sc.chinaz.com'+i[1]
lis.append(dic)
print(lis)
with open('音效.csv', 'w', encoding='utf-8', newline='') as f:
write = csv.DictWriter(f, fieldnames=['name', 'href'])
write.writeheader()
write.writerows(lis)
你题目的解答代码如下:
import requests
from lxml import etree
import csv
start = int(input('请输入你的起始页:'))
end = int(input('请输入你的结束页:'))
lis = [] #放到所有循环上边
for k in range(start, end+1):
url = f'https://sc.chinaz.com/yinxiao/index_{k}.html' #改下
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'}
response = requests.get(url=url, headers=headers)
response.encoding = 'utf-8'
data = response.text
# print(data)
html = etree.HTML(data)
div_tag = html.xpath('//div[@class="right-head"]/a')
for a in div_tag:
name = a.xpath('./p/text()')
href = a.xpath('./@href')
# print(name, href)
name = [s.strip() for s in name]
for i in zip(name, href):
dic = {}
dic['name'] = i[0]
dic['href'] = 'https://sc.chinaz.com'+i[1]
lis.append(dic)
print(lis)
#放到所有循环之后
with open('音效.csv', 'w', encoding='utf-8', newline='') as f:
write = csv.DictWriter(f, fieldnames=['name', 'href'])
write.writeheader()
write.writerows(lis)
如有帮助,请点击我的回答下方的【采纳该答案】按钮帮忙采纳下,谢谢!
为什么运行结果会多一个https:?
{'name': '汽车加速离开的声音', 'href': 'https://sc.chinaz.comhttps:/yinxiao/220711552202.htm'}, {'name': '用力关门的声音MP3', 'href': 'https://sc.chinaz.comhttps:/yinxiao/220711517571.htm'}, {'name': '风铃清脆的响声MP3', 'href': 'https://sc.chinaz.comhttps:/yinxiao/220711485070.htm'}, {'name': '动感鼓点节奏音乐', 'href': 'https://sc.chinaz.comhttps:/yinxiao/220710448212.htm'}]