第143行有问题,本来是没问题的,但是加了个非连续采集也就是末尾print(df)后面的try那句报了一个这个错,代码在回答栏
这个是对象没有set_scientific方法
是不是对象获取错了或者set_scientific方法写错了
# -*- coding:utf-8 -*-
from selenium.webdriver.common.by import By
import lxml.html
import matplotlib.pyplot as plt
import pandas as pd
import time
from selenium.webdriver.chrome.options import Options
import json
import requests
from io import BytesIO
from PIL import Image
from selenium import webdriver
import lxml.html
opt = Options()
opt.add_experimental_option('excludeSwitches', ['enable-automation'])
opt.add_argument('--disable-blink-features')
opt.add_argument('--disable-extensions')
opt.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome(options=opt)
driver.maximize_window()
driver.get("https://www.huya.com/")
# 输入用户名密码点击登录
a = driver.find_element(By.XPATH, '//a[@class="hy-nav-link hy-nav-link-ext clickstat"]')
a.click()
time.sleep(1)
c = driver.find_element(By.XPATH, '//a[@class="LoginHd--1Jf6S0CCU3DUkJdjVqVn3"]')
c.click()
time.sleep(10)
# 进入子iframe
driver.switch_to.frame(driver.find_element_by_xpath("//iframe[@id='UDBSdkLgn_iframe']"))
d = driver.find_element(By.XPATH, '//div[@eid="Click/QuickLogin/AccountLogin"]/i')
d.click()
time.sleep(2)
x = driver.find_element(By.XPATH, '//input[@type="text"]')
x.send_keys('13082827515')
f = driver.find_element(By.XPATH, '//input[@type="password"]')
f.send_keys('1234567895a')
time.sleep(2)
q = driver.find_element(By.XPATH, '//div[@class="udb-button clickstat"]')
q.click()
time.sleep(10) # 手动验证时间
# ---- 获得cookies -----
cookie = driver.get_cookies()
print(cookie) # list
# 将cookies数据存入文件
cookie_data_file = "D:\CookieData.json"
json.dump(cookie,open(cookie_data_file,"w"))
# ---- cookie 登录-----
# 打开网址
driver = webdriver.Chrome()
driver.maximize_window()
driver.get("https://www.huya.com/")
# 从cookie json文件读取python数据
cookie_data_file = "D:\CookieData.json"
python_data = json.load(open(cookie_data_file))
# 添加cookie
for cookie in python_data:
driver.add_cookie(cookie)
# 登录成功的页面地址
driver.maximize_window()
driver.get("https://www.huya.com/g/lol")
time.sleep(10)
driver.switch_to.window(driver.window_handles[-1]) # 读取新页面
html = driver.page_source
xp = lxml.html.fromstring(html) # lxml对象
z=xp.xpath('//span[@class="avatar fl"]')
for w in z:
name=w.xpath('.//img/@alt')
url=w.xpath('.//img/@data-original')
print(url)
name=name[0]
url= url[0]
if url[:6]=='https:':
url=url
else:
url = 'https:'+url
print(name,url)
response = requests.get(url)
image = Image.open(BytesIO(response.content))
image.save(f'd:\w\{name}.png')
x = lxml.html.fromstring(html) # lxml对象
r = x.xpath('//li[@class="game-live-item"]/a[1]/@href')
z = 0
for i in r:
print(i)
driver = webdriver.Chrome()
driver.get(i) # 打开页面
import time
time.sleep(3)
html = driver.page_source # 访问整个页面的内容
driver.close() # 关闭页面
import lxml.html
x = lxml.html.fromstring(html) # lxml对象
units = x.xpath('//div[@class="host-info"]')
z += 1
if z > 3:
break
else:
for u in units:
a = {}
a['标题'] = u.xpath('./h1/@title')[0]
a['主播'] = u.xpath('./div/h3[@class="host-name"]/text()')[0]
a['房间号'] = u.xpath('./div/span[@class="host-rid"]/em/text()')[0]
a['人气'] = u.xpath('./div/span[@class="host-spectator"]/em/text()')[0]
print(a)
units = xp.xpath('//li[@class="game-live-item"]')
df = pd.DataFrame() # 没有text()
for u in units:
t = {}
t['标题'] = u.xpath('./a/@title')[0]
t['主播'] = u.xpath('./span/span/i[@class="nick"]')[0].xpath("string()")
t['人气'] = u.xpath('./span/span[2]/i[@class="js-num"]/text()')[0]
df0 = pd.DataFrame([t])
df = pd.concat([df, df0], ignore_index=True)
if len(df)>100:
break
df=df.drop_duplicates()
print(df)
df1=df
df.to_excel(r'D:\a623.xlsx',index=False)
try:
df2=pd.read_excel(r'D:\a623.xlsx')
df3 = pd.concat([df1,df2],ignore_index=True)
df3 = df3.drop_duplicates()
except:
df3= df1
df3.to_excel(r'D:\a633.xlsx',index=False)
df['人气'] = df['人气'].apply(lambda x:float(x[:-1])*10000 if "万" in x else float(x))
df = pd.read_excel(r'D:\a633.xlsx')
plt.figure(figsize=(12,8),dpi=150)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.bar(x='主播',height='人气',data=df)
plt.xticks(rotation=90,font={'size':6})
plt.yticks(font={'size': 8})
plt.gca().yaxis.get_major_formatter().set_scientific(False)
plt.tight_layout()
plt.show()
您好,我是有问必答小助手,您的问题已经有小伙伴帮您解答,感谢您对有问必答的支持与关注!