from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold
columns = data_df.columns[1:]
data_df = pd.get_dummies(data_df,columns=columns)
data_df.shape
(400000, 1077)
feature_names = data_df.columns[data_df.columns != 'isClick']
X_sample = data_df[feature_names].values
y_sample = data_df['isClick'].values
X_train, X_test, y_train, y_test = train_test_split(X_sample,y_sample,
test_size=0.25,random_state=2019,
stratify=y_sample)
print (X_train.shape, X_test.shape, y_train.shape, y_test.shape)
KeyError Traceback (most recent call last)
D:\CAS\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'isClick'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-60-ecfec88f8b60> in <module>
15 #构造样本数据
16 X_sample = data_df[feature_names].values
---> 17 y_sample = data_df['isClick'].values
18 #将样本数据分为训练及测试数据集,将stratify参数设置为y_sample,按照y_sample中的0、1比例分配
19 X_train, X_test, y_train, y_test = train_test_split(X_sample,y_sample,
D:\CAS\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
D:\CAS\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'isClick'
这个错误是说数据表中没有'isClick'列名
检测下数据表文件,是不是'isClick'列名写错了
如有帮助,请点击我的回答下方的【采纳该答案】按钮帮忙采纳下,谢谢!
感觉是原数据里没有'isClick'这列
您好,我是有问必答小助手,您的问题已经有小伙伴帮您解答,感谢您对有问必答的支持与关注!