df['人气']=df['人气'].astype(float)
df=pd.read_excel(r'd:\a4.xls')
df=df.set_index ('人气')
plt.rcParams['font.sans-serif'] = ['SimHei']
df.plot(kind='bar')
plt.show()
数据框画图时,爬的数据有一项是183万的人气,正则应该怎么去除
可以这样写,示例:
import pandas as pd
df=pd.DataFrame({'number':[1,2,3,4],'value':['123人','456万人','789万','']})
print(df.info())
#df.value=df.value.apply(lambda x:re.sub('[^\d]+',"",x))
df.value = df.value.replace('[^\d]+', "",regex=True).replace("",0).astype(int)
print(df)
运行结果:
number value
0 1 123
1 2 456
2 3 789
3 4 0
#coding=gbk
from selenium.webdriver.common.by import By
import lxml.html
import matplotlib.pyplot as plt
from selenium import webdriver
import pandas as pd
import time
from selenium import webdriver
driver=webdriver.Chrome() #创建driver对象
driver.get(' ') #打开页面
time.sleep(2)
a=driver.find_element(By.XPATH,'//a[@class="hy-nav-link hy-nav-link-ext clickstat"]')
a.click()
time.sleep(2)
b=driver.find_element(By.XPATH,'//li[@data-gid="1"]')
b.click()
time.sleep(2)
driver.switch_to.window(driver.window_handles[-1]) #读取新页面
html=driver.page_source
xp=lxml.html.fromstring(html) #lxml对象
units=xp.xpath('//li[@class="game-live-item"]')
df=None#没有text()
for u in units:
t={}
t['标题']=u.xpath('./a/@title')[0]
t['主播']=u.xpath('./span/span/i[@class="nick"]')[0].xpath("string()")
t['人气']=u.xpath('./span/span[2]/i[@class="js-num"]/text()')[0]
if (df is None):
df = pd.DataFrame([t])
else:
df = df.append(t, ignore_index=True)
df = pd.DataFrame(df)
print(df)
print(df.dtypes)
df['人气']=df['人气'].astype(float)
df=pd.read_excel(r'd:\a4.xls')
df=df.set_index ('人气')
plt.rcParams['font.sans-serif'] = ['SimHei']
df.plot(kind='bar')
plt.show()
您好,我是有问必答小助手,您的问题已经有小伙伴帮您解答,感谢您对有问必答的支持与关注!