浏览器装个xpath插件,方便调试 https://blog.csdn.net/weixin_43847283/article/details/125571325
import pandas as pd
from pandas import DataFrame
import unittest
import time
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.select import Select
from selenium.webdriver.support.ui import WebDriverWait
class autoLogin(unittest.TestCase):
URL = 'http://zhidao.baidu.com/business/profile?id=87701'
@classmethod
def setUpClass(cls):
cls.driver = webdriver.Firefox()
cls.driver.implicitly_wait(20)
cls.driver.maximize_window()
def test_search_by_selenium(self):
self.driver.get(self.URL)
self.driver.title
time.sleep(1)
#将滚动条移动到页面的底部
all_window_height = []
all_window_height.append(self.driver.execute_script("return document.body.scrollHeight;"))
while True:
self.driver.execute_script("scroll(0,100000)")
time.sleep(3)
check_height = self.driver.execute_script("return document.body.scrollHeight;")
if check_height == all_window_height[-1]:
print("我已下拉完毕")
break
else:
all_window_height.append(check_height)
print("我正在下拉")
@classmethod
def tearDownClass(cls):
html=cls.driver.page_source
problem = cls.driver.find_elements_by_css_selector("li h2.item-title a")
data = pd.read_excel('example.xlsx', sheet_name = 'Sheet1')
row = 1
for i in problem:
data.loc[row,'问题'] = i.text
row += 1
row = 1
reply = cls.driver.find_elements_by_css_selector("div.item-right p")
for j in reply:
data.loc[row,'答案'] = j.text
row += 1
DataFrame(data).to_excel('test.xlsx', sheet_name='Sheet1')
#保存成网页
with open("index.html", "wb") as f:
f.write(html.encode())
f.close()
cls.driver.quit()
if __name__ == '__main__':
unittest.main(verbosity=2)