import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from time import sleep
import csv
# 这个爬虫爬取结果的最后几列需要手工处理(可能会多出来几列)
def get_infos(ID):
driver.get(r"http://192.168.3.252/xhlisweb-inspection_id/XHlisWebReport.aspx")
sleep(1)
driver.find_element_by_name("txtoutpatient_id").send_keys(ID)
driver.find_element_by_name("btnConfirm").click()
cols = driver.find_elements_by_xpath('''//tr[contains(@onclick, "return btnClick")]''')
times = len(cols)
# 思路:挨个去点击一行的病员号,然后获取下方表格的信息
# print(driver.page_source)
# cols = driver.find_elements_by_xpath("//td[text()=" + ID + "]")
# col = cols[4]
# col_info = col.text.split(' ')[:14]
# col.click()
# items = driver.find_elements_by_xpath("//div[@id='report-content']//tbody//tr")[1:]
# item = items[0]
infos = []
for i in range(times):
driver.get(r"http://192.168.3.252/xhlisweb-inspection_id/XHlisWebReport.aspx")
sleep(2)
driver.find_element_by_name("txtoutpatient_id").send_keys(ID)
driver.find_element_by_name("btnConfirm").click()
cols = driver.find_elements_by_xpath('''//tr[contains(@onclick, "return btnClick")]''')
col = cols[i]
col_info = col.text.split(' ')[:14]
col.click()
items = driver.find_elements_by_xpath("//div[@id='report-content']//tbody//tr")[1:]
for item in items:
a = item.text.split(' ')
try:
a.remove('')
except:
pass
# 这里要做点长度判断,如果a的长度大于7,那就截断;如果不够,就填充''
#if len(a) <= 7:
# for i in range(7-len(a)):
# a.append('')
#else:
# a = a[:7]
infos.append([ID] + col_info + a)
return infos
# start最小为0, end最大为641
start = 200
end = 641
data = pd.read_excel(r"C:\Users\cc\Desktop\资料\数据录入\ALL_raw.xlsx")
IDs = data['登记号'].tolist()[start:end]
# IDs = ["0005248871", '0010610644']
options = Options()
options.binary_location = r"C:\Users\newceshi\Desktop\蒋丽莎病历检查\pzwj\google\chrome.exe"
driver = webdriver.Chrome(r"C:\Users\newceshi\Desktop\蒋丽莎病历检查\pzwj\chromedriver.exe", chrome_options=options)
driver.maximize_window()
ALL = []
for ID in IDs:
try:
infos = get_infos(ID)
ALL += infos
except:
pass
headers = ['ID', '序号', '检验单', '病员号', '类型', '送检', '目的', '姓名', '性别', '年龄', '科别', '病区', '工作组', '审核人员', '审核日期', '审核时间', 'NO', '英文名称', '检验项目', '结果', '单位', '状态', '参考值']
with open(r"result_检验_" + str(start) + "_" + str(end) +".csv", 'w', newline='') as f:
f_csv = csv.writer(f)
f_csv.writerow(headers)
for i in ALL:
f_csv.writerow(i)
sleep(3)
driver.quit()
C:\Users\cc\AppData\Local\Programs\Python\Python39\python.exe D:/Pycharm/data/jianyan.py
D:\Pycharm\data\jianyan.py:64: DeprecationWarning: executable_path has been deprecated, please pass in a Service object
driver = webdriver.Chrome(r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe", chrome_options=options)
D:\Pycharm\data\jianyan.py:64: DeprecationWarning: use options instead of chrome_options
driver = webdriver.Chrome(r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe", chrome_options=options)
Process finished with exit code 0
已经把chromedriver安装到chrome文件夹中,但用cmd检测时好像没安装上,运行代码结果就如上
安装与chrome浏览器对应的chromedriver.exe版本到python的Scripts文件夹下,该路径在系统环境变量中,在代码就不需要指定路径,直接写
driver = webdriver.Chrome(options=options)即可。
如有帮助,请点采纳。
建议将ChromeDriver.exe放入到下面路径下(如果你的chrome是默认安装的话):
C:\Program Files\Google\Chrome\Application
然后给webdriver的路径(也就是上面的路径)加入环境变量。然后你就不用每次都导入了,webdriver了,直接可以调用。
注意webdriver版本要与chrome对应哦。
另外,Google-Chrome的对selenium的支持远不如Firefox浏览器,推荐使用。
如有帮助,请采纳,多谢!
把“\”换成"\"试试