对数据进行MAD去极值的时候出错,不知怎么修改。hushen文件里的文件名分别为date、time、volume、price、cum。
import numpy as np
import pandas as pd
hs = pd.read_excel(r"C:/Users/DELL/Desktop/hushen.xls")
#对数据进行中位数去极值处理(MAD)
def extreme_MAD(data, n):
median = data.quantile(0.5) # 找出中位数
new_median = (abs((data - median)).quantile(0.5)) # 偏差值的中位数
data_up = median + n * new_median # 上限
data_down = median - n * new_median # 下限
return data.clip(data_down, data_up, axis=1) # 超出上下限的值,赋值为上下限
hs_MAD = extreme_MAD(hs, 5.2)
#对数据进行Z值标准化处理
def ZscoreNormalization(data):
data = (data - np.mean(data)) / np.std(data)
return data
hs_MAD_ZS=ZscoreNormalization(hs_MAD)
hs_MAD_ZS.sort_values(by='hushen', inplace=True, ascending=False)
y=hs_MAD_ZS.index
print(y)
File "C:\Users\DELL\PycharmProjects\VPIN\chuli.py", line 76, in
hs_MAD = extreme_MAD(hs, 5.2)
File "C:\Users\DELL\PycharmProjects\VPIN\chuli.py", line 71, in extreme_MAD
new_median = (abs((data - median)).quantile(0.5)) # 偏差值的中位数
不明白这俩地方为啥报错,该怎么解决