错误信息为
File "F:/python项目/titanic.py", line 55, in
kf = KFold(titanic.shape[0],n_splits=3,random_state=1)
TypeError: init() got multiple values for argument 'n_splits'
Process finished with exit code 1
代码为:
import pandas
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn import cross_validation
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn import cross_validation
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
#导入
titanic = pandas.read_csv("all/train.csv")
#print(titanic.head(3))
#print(titanic.describe())
#处理缺失数据
titanic["Age"] = titanic["Age"].fillna(titanic["Age"].median())
#print(titanic.describe())
titanic.loc[titanic["Sex"]=="male","Sex"]=0
titanic.loc[titanic["Sex"]=="female","Sex"]=1
titanic["Embarked"] = titanic["Embarked"].fillna('S')
titanic.loc[titanic["Embarked"]=="S","Embarked"]=0
titanic.loc[titanic["Embarked"]=="C","Embarked"]=1
titanic.loc[titanic["Embarked"]=="Q","Embarked"]=2
#print(titanic["Sex"].unique())
#print(titanic["Embarked"].unique())
predictors = ["Pclass","Sex","Age","Sibsp","Parch","Fare","Embarked"]
alg = RandomForestClassifier(random_state=1, n_estimators=10,min_samples_split=2,min_samples_leaf=1)
kf = KFold(titanic.shape[0],n_splits=3,random_state=1)
scores = cross_val_score(alg,titanic[predictors],titanic["Survived"],cv=kf)
print(scores.means())
谢谢各位
导入的包的版本可能不同
我后面修改后不一样
我是这样修改的,你看能否可行
# 将要预测的目标列
predictors = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]
# 初始化算法类
alg = LinearRegression()
kf = KFold(n_splits=3, random_state=1,shuffle=False)
predictions = []
for train, test in kf.split(titanic["Survived"]):
train_predictors = titanic[predictors].iloc[train,:]
train_target = titanic["Survived"].iloc[train]
alg.fit(train_predictors, train_target)
test_predictions = alg.predict(titanic[predictors].iloc[test,:])
predictions.append(test_predictions)