使用sklearn.svm 解决多分类分体时,测试集的准确率100%
import nibabel as nib
import os
import numpy as np
import sklearn
from sklearn import svm
from sklearn.model_selection import KFold
from sklearn.multiclass import OneVsRestClassifier
from sklearn import preprocessing
import tensorflow as tf
os.chdir("C:/Users/Desktop/hap_nii/hap_nii") # 设置工作目录
file_chdir = os.getcwd() # 获得工作目录
# matplotlib.use('TkAgg')
# 文件名,nii或nii.gz
filename_npy = [] # 文件名列表
file_npy = [] # 数据列表
label_npy = []
acc_s_train = [] # 准确率
acc_s_test = []
auc_s = [] # 准确率 (两种不同计算方法的准确率)
# 数据处理(从文件夹中获取数据集,和标签)
for root, dirs, files in os.walk(file_chdir): # os.walk会便利该目录下的所有文件
for file in files:
if os.path.splitext(file)[-1] == '.nii': # 判断文件格式是否符合nii格式
filename_npy.append(file) # 存储文件名
lab = file[10] # s102_w1_v1 第七位表示类别
file_nii = nib.load(file)
file_niidata = file_nii.get_fdata()
files_npy = np.squeeze(np.array(file_niidata))
print(np.shape(files_npy))
# 每15分一组
files_len = files_npy.shape[3]
i = 15
while i < files_len:
file_npy.append((files_npy[:, :, :, i-15:i]).flatten())
label_npy.append(lab)
i += 15
data = file_npy # data就是所有数据的存储
label = label_npy
# 转化为数组格式
data = np.array(data)
# 归一化
min_max_scaler = preprocessing.MinMaxScaler()
data = min_max_scaler.fit_transform(data)
label = np.array(label)
label = tf.keras.utils.to_categorical(label)
# 用索引打乱数据
length = len(data)
index = [i for i in range(length)]
np.random.seed(42)
np.random.shuffle(index)
data = data[index]
label = label[index]
rate = 0.1
center = int(float(length) * rate)
train_index = [x for x in range(0, center)]
test_index = [y for y in range(center, length)]
fold_train_data, fold_train_label = data[train_index], label[train_index]
fold_test_data, fold_test_label = data[test_index], label[test_index]
clf = OneVsRestClassifier(svm.SVC(decision_function_shape='ovr', kernel='rbf', C=1))
clf.fit(fold_train_data, fold_train_label)
label_pre = clf.predict(fold_test_data) # 预测值
label_pre_train = clf.predict(fold_train_data)
acc_test = sum(fold_test_label == label_pre) / len(fold_test_label)
acc_train = sum(fold_train_label == label_pre_train) / len(fold_train_label)
auc = sklearn.metrics.accuracy_score(fold_test_label, label_pre)
acc_s_test.append(acc_test)
acc_s_train.append(acc_train)
auc_s.append(auc)
训练结果的准确率为100%
我认为主要是这两句代码可能出现了问题
clf = OneVsRestClassifier(svm.SVC(decision_function_shape='ovr', kernel='rbf', C=1))
clf.fit(fold_train_data, fold_train_label)
label_pre = clf.predict(fold_test_data) # 预测值
label_pre_train = clf.predict(fold_train_data)
解决准确率100%的问题
高维小样本数据;医学数据数据原本是261515*16的数据,我将其拉伸成了1维进行分类。用了svm和对数几率回归,五折交叉验证。测试准确率都是100%
python - sklearn 计算准确率
https://blog.csdn.net/weixin_42272869/article/details/123782054
有其他问题,可以一起探讨