python爬虫正则表达式


df['人气']=df['人气'].astype(float)
df=pd.read_excel(r'd:\a4.xls')

df=df.set_index ('人气')
plt.rcParams['font.sans-serif'] = ['SimHei']
df.plot(kind='bar')
plt.show()

数据框画图时，爬的数据有一项是183万的人气，正则应该怎么去除

可以这样写，示例：

import pandas as pd
df=pd.DataFrame({'number':[1,2,3,4],'value':['123人','456万人','789万','']})
print(df.info())
#df.value=df.value.apply(lambda x:re.sub('[^\d]+',"",x))
df.value = df.value.replace('[^\d]+', "",regex=True).replace("",0).astype(int)
print(df)

运行结果：

number  value
0       1    123
1       2    456
2       3    789
3       4      0


#coding=gbk
from selenium.webdriver.common.by import By
import lxml.html
import matplotlib.pyplot as plt
from selenium import webdriver
import pandas as pd
import time
from selenium import webdriver
driver=webdriver.Chrome()  #创建driver对象
driver.get(' ')  #打开页面
time.sleep(2)
a=driver.find_element(By.XPATH,'//a[@class="hy-nav-link hy-nav-link-ext clickstat"]')
a.click()
time.sleep(2)
b=driver.find_element(By.XPATH,'//li[@data-gid="1"]')
b.click()
time.sleep(2)
driver.switch_to.window(driver.window_handles[-1])  #读取新页面
html=driver.page_source
xp=lxml.html.fromstring(html)     #lxml对象
units=xp.xpath('//li[@class="game-live-item"]')
df=None#没有text()
for u in units:
    t={}
    t['标题']=u.xpath('./a/@title')[0]
    t['主播']=u.xpath('./span/span/i[@class="nick"]')[0].xpath("string()")
    t['人气']=u.xpath('./span/span[2]/i[@class="js-num"]/text()')[0]
    if (df is None):
        df = pd.DataFrame([t])
    else:
        df = df.append(t, ignore_index=True)
df = pd.DataFrame(df)
print(df)
print(df.dtypes)
df['人气']=df['人气'].astype(float)
df=pd.read_excel(r'd:\a4.xls')

df=df.set_index ('人气')
plt.rcParams['font.sans-serif'] = ['SimHei']
df.plot(kind='bar')
plt.show()

您好，我是有问必答小助手，您的问题已经有小伙伴帮您解答，感谢您对有问必答的支持与关注！
PS：问答VIP年卡【限时加赠：IT技术图书免费领】，了解详情>>> https://vip.csdn.net/askvip?utm_source=1146287632