selenium写的dy爬虫无法获取webelement文本内容

问题遇到的现象和发生背景

写一个selenium的dy爬虫,可以获取webelement,但是无法获取text文本

问题相关代码,请勿粘贴截图
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC



option = ChromeOptions()
option.add_argument(
    'user-agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.101 Safari/537.36"'
)
option.add_experimental_option('excludeSwitches', ['enable-automation'])  #防止系统检测到自动化工具
option.add_experimental_option('useAutomationExtension', False)
browser = webdriver.Chrome(options=option)
browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
   'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'
})

url = 'https://www.douyin.com/video/6989430095081442596'
browser.get(url)
try:
    WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.XPATH,'//*[@id="login-pannel"]/div[2]')))
    browser.find_element(By.XPATH, '//*[@id="login-pannel"]/div[2]').click()
    print('找到验证信息,已关闭')
except:
    pass
try:
    WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.XPATH,'//*[@id="login-pannel"]/div[2]')))
    browser.find_element(By.XPATH, '//*[@id="login-pannel"]/div[2]').click()
except:
    pass
    print('找到登录信息,已关闭')
video_url = browser.find_element(By.XPATH, '//*[@id="root"]/div/div[2]/div/div/div[1]/div[2]/div/div[1]/div/div[2]/div[2]/xg-video-container/video')
print(video_url)
print(video_url.id)
print(type(video_url))
print(video_url.get_attribute("innerText"))


browser.quit()


运行结果及报错内容
-1a71-4cdc-9143-4a9668391948")>
583e7f9a-1a71-4cdc-9143-4a9668391948

你确定这个元素有“innerText”这个属性??

img

import time

from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

option = ChromeOptions()
option.add_argument(
'user-agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.101 Safari/537.36"'
)
option.add_experimental_option('excludeSwitches', ['enable-automation']) # 防止系统检测到自动化工具
option.add_experimental_option('useAutomationExtension', False)
browser = webdriver.Chrome(options=option)
browser.maximize_window()
browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'
})

url = 'https://www.douyin.com/video/6989430095081442596'
browser.get(url)
try:
WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.XPATH, '//[@id="login-pannel"]/div[2]')))
browser.find_element(By.XPATH, '//
[@id="login-pannel"]/div[2]').click()
print('找到验证信息,已关闭')
except:
pass

video_url = browser.find_elements(By.XPATH,
'/html/body/div[1]/div/div[2]/div/div/div[1]/div[2]/div/div[1]/div/div[2]/div[2]/xg-video-container/video/source')
print(video_url)
for e in video_url:
print(e.get_attribute("src"))
time.sleep(5)
browser.quit()
#没有你写的这个属性,实际是src这个属性,如果获取多个的时候直接用find_elements

不知道你这个问题是否已经解决, 如果还没有解决的话:

如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 以帮助更多的人 ^-^