可视化图形不正常
#coding=gbk
from selenium import webdriver
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import re
import pandas as pd
import lxml.html
import requests
from PIL import Image
from io import BytesIO
opt = Options()
opt.add_experimental_option('excludeSwitches', ['enable-automation'])
opt.add_argument('--disable-blink-features')
opt.add_argument('--disable-extensions')
opt.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome(options=opt) # 创建driver对象
driver.get('https://www.acfun.cn/u/1075162') #打开页面
html=driver.page_source #访问整个页面的内容
time.sleep(2)
driver.close() #关闭页面
xp=lxml.html.fromstring(html) #lxml对象
units=xp.xpath('//figcaption') #没有text()
df = pd.DataFrame()
df = None
for u in units:
w = {}
w["标题"]=u.xpath('./p [@class="title line"]/text()')[0]
w["观看与弹幕"]=u.xpath('./p [@class="play-info"]/text()')[0]
w["日期"] = u.xpath('./p [@class="date"]/text()')[0]
if w["标题"]==[]:
print('无')
df1 = pd.DataFrame([w])
df = pd.concat([df,df1], ignore_index=True)
a = re.findall(r'\d+[.]\d+', w["观看与弹幕"])[0]
print(a)
a=float(a)
print(df)
print(df.dtypes)
import pandas as pd
df=pd.DataFrame({'观看量':[a]},index=[w["标题"]])
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体
pd.to_numeric(df['观看量']).plot(kind='bar')
plt.show()
plt.show()只有一条柱状图
尝试改字符串,字符串改不成功
因为你只保存了最后一组数据啊
帮你做了点修改,你试试:
#coding=gbk
from selenium import webdriver
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import re
import pandas as pd
import lxml.html
import requests
from PIL import Image
from io import BytesIO
opt = Options()
opt.add_experimental_option('excludeSwitches', ['enable-automation'])
opt.add_argument('--disable-blink-features')
opt.add_argument('--disable-extensions')
opt.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome(options=opt) # 创建driver对象
driver.get('https://www.acfun.cn/u/1075162') #打开页面
html=driver.page_source #访问整个页面的内容
time.sleep(2)
driver.close() #关闭页面
xp=lxml.html.fromstring(html) #lxml对象
units=xp.xpath('//figcaption') #没有text()
df = pd.DataFrame()
df = None
volume=[]
title=[]
for u in units:
w = {}
w["标题"]=u.xpath('./p [@class="title line"]/text()')[0]
w["观看与弹幕"]=u.xpath('./p [@class="play-info"]/text()')[0]
w["日期"] = u.xpath('./p [@class="date"]/text()')[0]
if w["标题"]==[]:
print('无')
df1 = pd.DataFrame([w])
df = pd.concat([df,df1], ignore_index=True)
a = re.findall(r'\d+[.]\d+', w["观看与弹幕"])[0]
print(a)
volume.append(float(a))
title.append(w['标题'])
print(df)
print(df.dtypes)
import pandas as pd
df=pd.DataFrame({'观看量':volume},index=title)
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体
pd.to_numeric(df['观看量']).plot(kind='bar')
plt.show()