import pandas as pd
import numpy as np
import itertools
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, ElasticNetCV, LarsCV
from sklearn.cross_decomposition import PLSRegression
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings("ignore")
data = pd.read_csv("C:/Users\Lenovo\Desktop\Prostate Cancer.csv")
print(data.head())
y_train = np.array(data[data["train"] == "T"]['lpsa'])
y_test = np.array(data[data["train"] == "F"]['lpsa'])
X_train = np.array(data[data["train"] == "T"].drop(['lpsa', 'train', 'Unnamed: 0'], axis=1))
X_test = np.array(data[data["train"]== "F"].drop(['lpsa', 'train', 'Unnamed: 0'], axis=1))
数据读取报错
结果如下
C:\Users\Lenovo\anaconda\envs\pythonProjec\python.exe C:/Users/Lenovo/PycharmProjects/pythonProject16/main.py
Id lcavo lweight age ... gleason pgg45 lpsa train
0 1 -0.579818 2.769459 50 ... 6 0 -0.430783 T
1 2 -0.994252 3.319626 58 ... 6 0 -0.162519 T
2 3 -0.510826 2.691243 74 ... 7 20 -0.162519 T
3 4 -1.203973 3.282789 58 ... 6 0 -0.162519 T
4 5 0.751416 3.432373 62 ... 6 0 0.371564 T
[5 rows x 11 columns]
Traceback (most recent call last):
File "C:\Users\Lenovo\anaconda\envs\pythonProjec\lib\site-packages\pandas\core\indexes\base.py", line 2889, in get_loc
return self._engine.get_loc(casted_key)
File "pandas_libs\index.pyx", line 70, in pandas._libs.index.IndexEngine.get_loc
File "pandas_libs\index.pyx", line 97, in pandas._libs.index.IndexEngine.get_loc
File "pandas_libs\hashtable_class_helper.pxi", line 1675, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas_libs\hashtable_class_helper.pxi", line 1683, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'train'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/Users/Lenovo/PycharmProjects/pythonProject16/main.py", line 20, in
y_train = np.array(data[data["train"] == "T"]['lpsa'])
File "C:\Users\Lenovo\anaconda\envs\pythonProjec\lib\site-packages\pandas\core\frame.py", line 2899, in getitem
indexer = self.columns.get_loc(key)
File "C:\Users\Lenovo\anaconda\envs\pythonProjec\lib\site-packages\pandas\core\indexes\base.py", line 2891, in get_loc
raise KeyError(key) from err
KeyError: 'train'
C:/Users\Lenovo\Desktop\Prostate Cancer.csv
里有 名字为 train 的列吗?