对某个数据集读取的dataframe进行线性插值。
dataframe的格式是:date列、整型列
def search_none(df,date_index):
list_1 = [] # 存储生成的
list_2 = [] # 存储df的
day = pd.DataFrame(date_index, columns=['date'])
for d in range(len(day)):
# day['date'][d] = day['date'][d].strftime('%Y-%m-%d')
list_1.append(day['date'][d])
for i in range(len(df)):
list_2.append(df['date'][i])
lack = []
for j in range(len(list_1)):
if list_1[j] not in list_2:
lack.append(list_1[j])
print("缺少价格的天数:", len(lack))
# print("具体是:\n", lack)
def fix_none(df,date_index):
# 寻找缺失值
search_none(df.copy(), date_index)
df.set_index('date', inplace=True)
df = df.reindex(date_index)
df.reset_index(inplace=True)
df.rename(columns={'index': 'date'}, inplace=True)
print(df)
# 进行线性插值填充---------------------报错
df.interpolate(method='linear', inplace=True)
# df.interpolate(method='nearest', inplace=True)
print(df)
# sys.exit()
def transfor(path,col,to_path):
df = pd.read_excel(path)
df.columns = col
# 对时间序列中的不存在的日期进行填充,防止月度数据出现偏小---------------------------------------
date_index = pd.date_range(start='20161130', end='20211223', freq='D')
df1 = df[['date','GZ_maize_prince']]
# sys.exit()
#修复缺失值
df_new = fix_none(df1, date_index)
print(df_new)
报错内容:
File "E:/Users/Administrator/Desktop/chicken_price_predict/data/original_factory/Uncleaned_data/转换.py", line 62, in transfor
df_new = fix_none(df1, date_index)
File "E:/Users/Administrator/Desktop/chicken_price_predict/data/original_factory/Uncleaned_data/转换.py", line 45, in fix_none
df.interpolate(method='linear', inplace=True)
File "C:\Users\Administrator\anaconda3\envs\tf2\lib\site-packages\pandas\util\_decorators.py", line 311, in wrapper
return func(*args, **kwargs)
File "C:\Users\Administrator\anaconda3\envs\tf2\lib\site-packages\pandas\core\frame.py", line 10720, in interpolate
**kwargs,
File "C:\Users\Administrator\anaconda3\envs\tf2\lib\site-packages\pandas\core\generic.py", line 6908, in interpolate
**kwargs,
File "C:\Users\Administrator\anaconda3\envs\tf2\lib\site-packages\pandas\core\internals\managers.py", line 377, in interpolate
return self.apply("interpolate", **kwargs)
File "C:\Users\Administrator\anaconda3\envs\tf2\lib\site-packages\pandas\core\internals\managers.py", line 327, in apply
applied = getattr(b, f)(**kwargs)
File "C:\Users\Administrator\anaconda3\envs\tf2\lib\site-packages\pandas\core\internals\blocks.py", line 1369, in interpolate
new_values = values.fillna(value=fill_value, method=method, limit=limit)
File "C:\Users\Administrator\anaconda3\envs\tf2\lib\site-packages\pandas\core\arrays\_mixins.py", line 219, in fillna
value, method, validate_scalar_dict_value=False
File "C:\Users\Administrator\anaconda3\envs\tf2\lib\site-packages\pandas\util\_validators.py", line 372, in validate_fillna_kwargs
method = clean_fill_method(method)
File "C:\Users\Administrator\anaconda3\envs\tf2\lib\site-packages\pandas\core\missing.py", line 120, in clean_fill_method
raise ValueError(f"Invalid fill method. Expecting {expecting}. Got {method}")
ValueError: Invalid fill method. Expecting pad (ffill) or backfill (bfill). Got linear
已经解决!
将date列置为index列即可,该函数只需要输入目标列。