from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
import time
import pandas as pd
driver = webdriver.Chrome()
#发送请求
driver.get("https://sale.1688.com/factory/home.html")
wait = WebDriverWait(driver,5)
#重要:暂停1分钟进行预登陆,此处填写账号密码及验证
time.sleep(60)
driver.find_element(By.XPATH,'//*[@id="home-header-searchbox"]').send_keys('诚信通')
driver.find_element(By.XPATH,'//*[@id="root"]/div[2]/div/div[1]/div/div/div/div[1]/form/fieldset/div/div[2]/button').click()
time.sleep(1)
#人为移动driver
driver.switch_to.window(driver.window_handles[1])
name = []
keeptime = []
nodes = driver.find_elements(By.CSS_SELECTOR,'#app > div > div.space-company-offer-list-temp > div.offerlist > div')
for i in range(0,len(nodes),1):
name.append(nodes[i].find_element(By.CSS_SELECTOR,'#app > div > div.space-company-offer-list-temp > div.offerlist > div > div:nth-child(1) > a > div.left-container > div.main-content > div.title-container > div.title').text)
keeptime.append(nodes[i].find_element(By.CSS_SELECTOR,'#app > div > div.space-company-offer-list-temp > div.offerlist > div > div:nth-child(1) > a > div.left-container > div.main-content > div.desc-container > div.identity-container > div.credit-tag.identity-tag').text)
for page in range(3):
try:
print(page)
# 定位下一页按钮
nextpage_button = driver.find_element(By.LINK_TEXT,'下一页')
#点击按键
driver.execute_script("arguments[0].click();", nextpage_button)
wait = WebDriverWait(driver,5)
nodes1 = driver.find_elements(By.CSS_SELECTOR,'#app > div > div.space-company-offer-list-temp > div.offerlist > div')
for i in range(0,len(nodes1),1):
name.append(nodes[i].find_element(By.CSS_SELECTOR,'#app > div > div.space-company-offer-list-temp > div.offerlist > div > div:nth-child(1) > a > div.left-container > div.main-content > div.title-container > div.title').text)
keeptime.append(nodes[i].find_element(By.CSS_SELECTOR,'#app > div > div.space-company-offer-list-temp > div.offerlist > div > div:nth-child(1) > a > div.left-container > div.main-content > div.desc-container > div.identity-container > div.credit-tag.identity-tag').text)
except:
pass
'''保存数据'''
data = pd.DataFrame({'诚信通会员名称':name,'诚信通开通':keeptime})
data.to_excel("weibo.xlsx")
问题出在这里,在for循环中定位元素不需要再从头开始定位了,从你for循环的元素开始定位:
for循环中用的绝对定位,一直都是拿第一条数据
1、尝试把红框中的定位去掉。变成相对定位。
2、或者以这种方式,获取第i条数据。
我建议你可以debug下;
1、看看每个变量的值,这是最直观的;
2、看看代码的流程
这两个方面,可以让你知道大概是哪个位置不对了。