请问一下10×10交叉验证是重复分层交叉验证吗,10×10交叉验证的python代码怎么实现啊
10×10交叉验证是将数据集分成十份,轮流将其中9份做训练1份做验证,10次的结果的均值作为对算法精度的估计,以此进行10次10折交叉验证求均值
这是一个例子
# -*- coding: utf-8 -*-
import numpy as np
from sklearn import cross_validation
from sklearn import datasets
from sklearn import svm
iris = datasets.load_iris()
X=iris.data
Y=iris.target
def tenFolds(X,Y):
from sklearn.model_selection import StratifiedKFold
skf= StratifiedKFold(n_splits=10)
from sklearn.cross_validation import cross_val_score
clf = svm.SVC(kernel='linear', C=5)
zhou=[]
#shoushou=list(skf.split(X,Y)) #发现每一个都是tuple类型的数据
for train_index,test_index in skf.split(X,Y):
print('Train: ',train_index,'Test: ',test_index)
X_train,X_test=X[train_index],X[test_index]
Y_train,Y_test=Y[train_index],Y[test_index]
v10=cross_val_score(clf,X_train,Y_train,cv=5).mean()
zhou.append(v10)
print('均值:',np.mean(zhou))
print('方差:',np.std(zhou))
return zhou,np.mean(zhou)+np.std(zhou)
# X:features Y:targets cv:k
import pandas as pd
all_valid=[]
mean_var=[]
#做10次10折交叉验证结果分析
for i in range(1,11):
df1=pd.DataFrame(X)
df2=pd.DataFrame(Y)
df3=pd.concat([df1,df2],axis=1)
df3.columns=['f1','f2','f3','f4','label']
df3=df3.sample(frac = 1) #随机打乱样本
df3.index=range(150)
X1=np.array(df3[['f1','f2','f3','f4']])
Y1=np.array(df3['label'])
zhou,vsd= tenFolds(X1,Y1)
all_valid.append(zhou)
mean_var.append(vsd)
np.mean(mean_var)
您好,我是有问必答小助手,您的问题已经有小伙伴帮您解答,感谢您对有问必答的支持与关注!