目标网站:https://music.163.com/#/song?id=399354373
需求:
1、爬取前10页此音乐对应的名字以及评论信息
2、保存到csv(名字和评论要对应)
from selenium import webdriver
import time
import csv
drive = webdriver.Chrome()
drive.get('https://music.163.com/#/song?id=399354373')
ifranme = drive.find_element_by_id('g_iframe')
drive.switch_to.frame(ifranme)
time.sleep(3)
name = drive.find_element_by_class_name('itm')
去开发者面板复制节点的定位
去审查元素 然后 复制 出 节点的 js 路径 。 选中元素 ,右击 鼠标 copy 里 copy js path
使用xpath复制节点去爬,这个是因为元素标签没找对
题主的代码好像没有什么问题,好像只需加上drive.implicitly_wait()这句代码就可以正常出结果了。
from selenium import webdriver
import time
import csv
drive = webdriver.Chrome()
drive.get('https://music.163.com/#/song?id=399354373')
drive.implicitly_wait(10)
iframe = drive.find_element_by_id('g_iframe')
drive.switch_to.frame(iframe)
time.sleep(3)
list1 = []
for i in range(10):
names = drive.find_elements_by_class_name('itm')
for name in names:
content = name.find_element_by_xpath('.//div[@class="cnt f-brk"]')
name_1 = content.find_element_by_xpath('./a')
name_2 = name_1.text.strip()
content_1 = content.text.strip()
print(name_2,content_1)
list1.append([name_2,content_1])
print(f'第{i+1}页')
drive.execute_script(script="let t = document.documentElement.scrollHeight;window.scroll({ top: t, left: 0, behavior: 'smooth' });")
# 跳到界面最下端,否则会出现问题
time.sleep(2)
next_page = drive.find_elements_by_xpath("//div[@id='comment-box']//div[@class='m-cmmt']/div[3]//a")[-1]
# 下一页按钮
next_page.click()
drive.close()
with open(file='test.csv',mode='w',encoding='utf-8-sig',newline='') as f:
writer = csv.writer(f)
writer.writerows(list1)