写一个selenium的dy爬虫,可以获取webelement,但是无法获取text文本
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
option = ChromeOptions()
option.add_argument(
'user-agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.101 Safari/537.36"'
)
option.add_experimental_option('excludeSwitches', ['enable-automation']) #防止系统检测到自动化工具
option.add_experimental_option('useAutomationExtension', False)
browser = webdriver.Chrome(options=option)
browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'
})
url = 'https://www.douyin.com/video/6989430095081442596'
browser.get(url)
try:
WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.XPATH,'//*[@id="login-pannel"]/div[2]')))
browser.find_element(By.XPATH, '//*[@id="login-pannel"]/div[2]').click()
print('找到验证信息,已关闭')
except:
pass
try:
WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.XPATH,'//*[@id="login-pannel"]/div[2]')))
browser.find_element(By.XPATH, '//*[@id="login-pannel"]/div[2]').click()
except:
pass
print('找到登录信息,已关闭')
video_url = browser.find_element(By.XPATH, '//*[@id="root"]/div/div[2]/div/div/div[1]/div[2]/div/div[1]/div/div[2]/div[2]/xg-video-container/video')
print(video_url)
print(video_url.id)
print(type(video_url))
print(video_url.get_attribute("innerText"))
browser.quit()
-1a71-4cdc-9143-4a9668391948")>
583e7f9a-1a71-4cdc-9143-4a9668391948
你确定这个元素有“innerText”这个属性??
import time
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
option = ChromeOptions()
option.add_argument(
'user-agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.101 Safari/537.36"'
)
option.add_experimental_option('excludeSwitches', ['enable-automation']) # 防止系统检测到自动化工具
option.add_experimental_option('useAutomationExtension', False)
browser = webdriver.Chrome(options=option)
browser.maximize_window()
browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'
})
url = 'https://www.douyin.com/video/6989430095081442596'
browser.get(url)
try:
WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.XPATH, '//[@id="login-pannel"]/div[2]')))
browser.find_element(By.XPATH, '//[@id="login-pannel"]/div[2]').click()
print('找到验证信息,已关闭')
except:
pass
video_url = browser.find_elements(By.XPATH,
'/html/body/div[1]/div/div[2]/div/div/div[1]/div[2]/div/div[1]/div/div[2]/div[2]/xg-video-container/video/source')
print(video_url)
for e in video_url:
print(e.get_attribute("src"))
time.sleep(5)
browser.quit()
#没有你写的这个属性,实际是src这个属性,如果获取多个的时候直接用find_elements