def cust_period(value, op="days", op_period=0, base_dt=None):
value_list = value.tolist()
while "-1" in value_list:
value_list.remove("-1")
if len(value_list) != 0:
if not base_dt:
max_dt = max(value_list)
else:
max_dt = base_dt
if op == "years":
end_dt = (datetime.datetime.strptime(max_dt, "%Y/%m/%d %H:%M") + relativedelta(years=-op_period)).strftime("%Y/%m/%d %H:%M")
elif op == "months":
end_dt = (datetime.datetime.strptime(max_dt, "%Y/%m/%d %H:%M") + relativedelta(months=-op_period)).strftime("%Y/%m/%d %H:%M")
elif op == "weeks":
end_dt = (datetime.datetime.strptime(max_dt, "%Y/%m/%d %H:%M") + relativedelta(weeks=-op_period)).strftime("%Y/%m/%d %H:%M")
elif op == "days":
end_dt = (datetime.datetime.strptime(max_dt, "%Y/%m/%d %H:%M") + relativedelta(days=-op_period)).strftime("%Y/%m/%d %H:%M")
else:
print("Wrong operation unit of time!")
exit()
period = list(value.values >= end_dt)
return period
else:
return [True]*len(value.tolist())
data1自然流量['date_time']=data1['date_time'].astype('datetime64')
agg_2m = data1自然流量[data1自然流量['date_time'].dt.month !=12].groupby('date_time')['date','全部自然流量']
agg_2m = agg_2m.apply(lambda x: x[cust_period(x['date'], "months", 2)])
agg_2m = agg_2m.groupby("date_time")["全部自然流量"].agg(["sum"])
为什么到最后一步时只报错KeyError: 'date_time'
因为在定义函数cust_period时,对参数value调用了tolist()方法,将其转换为列表。在最后一步中,agg_2m已经不是原来的数据框,而是一个经过计算的新数据框,它没有'date_time'这一列。所以在最后一步中会报错KeyError: 'date_time'。