无法获取全部数据,不知道文章和粉丝之间的页面的数据怎么进行连接
from selenium import webdriver
import random
from selenium.webdriver.common.keys import Keys
import time
import re
# user_ = input('请输入账号:')
# password = input('请输入密码:')
url = "https://weibo.com/login.php"
dirver = webdriver.Chrome()
dirver.get(url)
time.sleep(0.5)
dirver.maximize_window()
dirver.find_element_by_id('loginname').send_keys('18327862307')
time.sleep(2)
dirver.find_element_by_xpath('//*[@id="pl_login_form"]/div/div[3]/div[2]/div/input').send_keys('love572461914')
time.sleep(2)
dirver.find_element_by_xpath('//*[@id="pl_login_form"]/div/div[3]/div[6]/a').click()
time.sleep(20)
# 登录进入六星网页
url_six = "https://weibo.com/liuxingedu"
dirver.get(url_six)
time.sleep(2)
# dirver.find_element_by_xpath('//*[@id="__sidebar"]/div/div[1]/div/div[2]/div/div[1]/div[1]/div[2]/div[2]').click()
# 进行页面滚动
for i in range(1,1000):
js="var q=document.documentElement.scrollTop=%s"%(i*300)
time.sleep(0.3)
dirver.execute_script(js)
# 获取文章内容
title_url = "https://weibo.com/ajax/statuses/mymblog?uid=7617227236&page=1&feature=0"
dirver.get(title_url)
titles = dirver.find_elements_by_xpath('//*[@id="app"]/div[1]/div[2]/div[2]/main/div[1]/div/div[2]/div[2]')
for title in titles:
print(1,title.text)
# 点击进入粉丝页
dirver.find_element_by_xpath('//*[@id="app"]/div[1]/div[2]/div[2]/main/div[1]/div/div[2]/div[1]/div[1]/div[2]/div[2]/div[2]/a[1]').click()
fans_url = 'https://weibo.com/u/page/follow/7617227236?relate=fans'
dirver.get(fans_url)
fans_list = []
fans_nums = dirver.find_elements_by_class_name('vue-recycle-scroller__item-view')
for fans in fans_nums:
f_dict = {fans}
fans_list.append(f_dict)
print(fans_list)
# 进行页面滚动
for i in range(1,1000):
js="var q=document.documentElement.scrollTop=%s"%(i*300)
time.sleep(0.3)
dirver.execute_script(js)
# 粉丝页面的爬取
import time
import requests
import jsonpath
import xlwt
import random
workbook = xlwt.Workbook(encoding='utf-8')
worksheet = workbook.add_sheet('A Test Sheet')
for page in range(0,131):
url = 'https://weibo.com/ajax/friendships/friends?relate=fans&page=%s&uid=7617227236&type=all&newFollowerCount=0'%page
# driver.get(url)
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
'referer': 'https://weibo.com/u/page/follow/7617227236?relate=fans',
'cookie': 'SINAGLOBAL=952213467348.6305.1639400540189; UOR=,,www.baidu.com; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WFzM5R6fkmylG1Bsx0lb-Au5JpX5KMhUgL.Fo-N1hqNSK5ceK52dJLoI0qLxK-L1hqL12BLxKqLB-eLBK2LxK.L1h2L1hMLxKnLBKML1h2LxKqL1h5L1-BLxK-LB.qLBo-t; ALF=1672398461; SSOLoginState=1640862467; SCF=AsfyXOZASw2leaGE0wOd0-xfdKGsXu4NlX1brFYrIAUfrFA7b7inDCVkxhGTvfxKMmsqrC4_L-qZh6hqyhtWrN4.; SUB=_2A25MyeNUDeRhGeNJ41QW9S7KyjyIHXVvv1OcrDV8PUNbmtAKLWejkW9NS42SEELUcme7sf4NK48nJwkUpQyPF7JJ; XSRF-TOKEN=WKOjPGXNROUArWrCYPSjs83k; _s_tentry=weibo.com; Apache=9889665561211.238.1640862511154; ULV=1640862511459:10:10:7:9889665561211.238.1640862511154:1640795957229; WBPSESS=fIbd3V6i4l9SZkn2E2NwS22Dsu6CK1p-IB2b5veAbVJDBQsr17NiGFdnLvixjZ_ilAWNNr9jlz3nU5fxFXH-kpLNoxBX0tl2NLolKHkk05mGY2J3XE5pw6cIWX47bYi0JgXP6Qj0kQ3CS_8YRvq1eA=='
}
res = requests.get(url,headers=headers)
# print(res.content.decode())
print('-'*50)
names = jsonpath.jsonpath(res.json(),'$.[*].name')
ids = jsonpath.jsonpath(res.json(),'$.[*].id')
print(names)
print(ids)
time.sleep(random.random())
# new_list = []
# for i in range(0, 10):
# new_dict = {}
# new_dict['names'] = names[i]
# new_dict['ids'] = ids[i]
# new_list.append(new_dict)
# print(new_list)
for i in range(len(names)):
print(names[i])
print(ids[i])
worksheet.write(page*20+i,0,names[i])
worksheet.write(page*20+i,1,ids[i])
workbook.save('weibofans.xls')
进行页面滚动,获取全部数据加载。确定数据有多少页,在进行循环遍历取值,xwlt包进行写入到excel表中
**
怎么获取全部的粉丝和文章内容并且写进excel表**
你粉丝获取用requests实现,你输出响应内容看看有没有粉丝数据,有可能是反爬了
from selenium.webdriver import ChromeOptions
option = ChromeOptions()
option.add_experimental_option('excludeSwitches', ['enable-automation'])#开启实验性功能
option.add_argument("--disable-blink-features=AutomationControlled")
browser = webdriver.Chrome(options=option)
这段代码应该可以实现反爬,你可以去试一下
请问各位大佬,可以用代码来吗?
谢谢