请教各位,python初学者该如何修改以下代码?用这段代码可以抓出来部分结果,但是格式非常奇怪,而且有很多重复的数据。怎样修改才能使得抓出来的格式正常,并且无重复数据?
import requests
import pandas as pd
import bs4
import time
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from lxml import etree
driver = webdriver.Chrome()
driver.get('https://data.eastmoney.com/report/industry.jshtml?hyid=1036')
time.sleep(2)
for ii in range(7):
element = driver.find_elements(By.CLASS_NAME, "table-model")
tr_lists = driver.find_elements(By.TAG_NAME,'tr')
window_2 = driver.current_window_handle
for i in range(len(tr_lists)-2):
data1=[]
td_lists= driver.find_elements(By.TAG_NAME,'td')
for td_list in td_lists:
data1.append(td_list.text)
abstract_buttons = driver.find_elements(By.XPATH,"//a[contains(@href,'/report/zw_industry.jshtml?infocode=')]")
abstract_buttons[0+i*1].click()
print('abstract item ' + str(i+1) + ' has done')
time.sleep(2)
window_1 = driver.current_window_handle
hand4 = driver.window_handles
for current_window in hand4:
if current_window != window_1:
driver.switch_to.window(current_window)
abstracts = driver.find_elements(By.CLASS_NAME,'ctx-content')
for abstract in abstracts:
data1.append(abstract.text)
driver.close
driver.switch_to.window(window_2)
data2 = pd.DataFrame(data1).T
data2.to_csv('Research report.csv',header=False, index=False, mode='a+', encoding='utf-8-sig')
print('page ' + str(ii+1) + ' has done')
next_page_button = driver.find_elements(By.XPATH,"//a[text()='下一页']")
next_page_button.click()
time.sleep(2)