轴承数据集的故障预测,用混淆矩阵显示测试结果,总数相加数量和样本数量对不上
代码如下
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
import torch.utils.data
from torch.utils.data import TensorDataset
import torch
from torch.utils.data import DataLoader
import numpy as np
import model
from scipy.io import loadmat
data = loadmat('data_process.mat')
x_train1 = data['train_X']
x_valid1 = data['valid_X']
x_test1 = data['test_X']
y_train = data['train_Y'].argmax(axis=1)
y_valid = data['valid_Y'].argmax(axis=1)
y_test = data['test_Y'].argmax(axis=1)
ss1 = StandardScaler().fit(x_train1) # MinMaxScaler StandardScaler
x_train1 = ss1.transform(x_train1)
x_valid1 = ss1.transform(x_valid1)
# 输入卷积的时候还需要修改一下,增加通道数目
x_train1, x_valid1, x_test1 = x_train1[:,np.newaxis,:,np.newaxis], x_valid1[:,np.newaxis,:,np.newaxis], x_test1[:,np.newaxis,:,np.newaxis] #[:,:,np.newaxis]是什么意思
# 输入数据的维度
input_shape =x_test1.shape[1:]
print('测试样本个数', x_test1.shape[0])
print('测试标签的维度', y_test.shape)
# 转换为torch的输入格式
test_features1 = torch.tensor(x_test1).type(torch.FloatTensor)
test_labels = torch.tensor(y_test).type(torch.LongTensor)
print(test_features1.shape)
print(test_labels.shape)
N = test_features1.size(0)
epoch = 300
batch_size = 200
Learning_Rate = 0.1
from sklearn.model_selection import train_test_split
test_dataset = TensorDataset(test_features1, test_labels)
def restore_params(): # load the params of the net
models = model.CNN()
models.load_state_dict(torch.load('Yourmodel.pkl'))
return models
BATCH_SIZE = 200
test_acc_list = []
con_matrix = np.zeros([10, 10])
for i in range(10):
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True) # BATCH_SIZE,1,1024,1
mymodel = restore_params()
use_cuda = torch.cuda.is_available()
if use_cuda:
print('CUDA is available')
device = torch.device("cuda" if use_cuda else "cpu")
mymodel = mymodel.to(device)
for XTest, YTest in test_loader:
XTest, YTest = XTest.float(), YTest.long()
XTest, YTest = XTest.to(device), YTest.to(device)
test_logits, test_probas = mymodel(XTest)
test_preds = test_probas.max(1, keepdim=True)[1]
test_acc = test_preds.eq(YTest.view_as(test_preds)).sum().item() / len(YTest)
#cm = confusion_matrix(YTest, test_preds)
cm = confusion_matrix(YTest.cpu(), test_preds.cpu())
con_matrix += cm
test_acc_list.append(test_acc)
# visualization the Network
from matplotlib import pyplot as plt
classes = ['Ball_1','Ball_2','Ball_3','Inner_1','Inner_2','Inner_3', 'Outer_1', 'Outer_2', 'Outer_3','Normal']
yourconfusion = np.array(cm)
plt.imshow(yourconfusion, interpolation='nearest', cmap=plt.cm.Oranges) #按照像素显示出矩阵
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=-45)
plt.yticks(tick_marks, classes)
thresh = yourconfusion.max() / 2.
iters = [[i,j] for i in range(len(classes)) for j in range((len(classes)))]
for i, j in iters:
plt.text(j, i, format(yourconfusion [i, j]), fontsize=7)
plt.ylabel('True labels')
plt.xlabel('Predict labels')
plt.tight_layout()
plt.savefig('confusion_matrix.jpg')
plt.show()
测试样本个数2500
#硬投票和软投票对比
import numpy as np
import os
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
import warnings
warnings.filterwarnings('ignore')
#随机种子
np.random.seed(42)
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons
# 主要参数作用如下:
# n_numbers:生成样本数量
# noise:默认是false,数据集是否加入高斯噪声
# random_state:生成随机种子,给定一个int型数据,能够保证每次生成数据相同。
#X:500行两列的数据 ,Y 500行一列的标签数据
X,y = make_moons(n_samples=500, noise=0.30, random_state=42)
print(X,y)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
plt.plot(X[:,0][y==0],X[:,1][y==0],'yo',alpha = 0.6) # 黄色的圆
plt.plot(X[:,0][y==0],X[:,1][y==1],'bs',alpha = 0.6) # 蓝色的矩形
#准备好数据集之后
#硬投票实验:
from sklearn.ensemble import RandomForestClassifier,VotingClassifier #投票分类器
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC #分类
#此处是为了实验,参数任意了
log_clf = LogisticRegression(random_state=42)
rnd_clf = RandomForestClassifier(random_state=42)
svm_clf = SVC(random_state=42)
#投票 #在这里导入多个基本分类器
voting_clf = VotingClassifier(estimators= [ ('lr',log_clf),('rf',rnd_clf),('svc',svm_clf) ] ,voting= 'hard' )
from sklearn.metrics import accuracy_score #准确率包
for clf in (log_clf,rnd_clf,svm_clf,voting_clf):
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
print(clf.__class__.__name__,accuracy_score(y_test,y_pred) )
软投票是根据概率:
对于问题描述中的混淆矩阵问题,可能是因为混淆矩阵的计算方式存在问题。下面给出解决方案:
from sklearn.metrics import confusion_matrix
# y_true表示真实标签,y_pred表示预测标签
cm = confusion_matrix(y_true, y_pred)
# cm即为计算出来的混淆矩阵
以上是解决方案,根据问题描述可能性比较大的是混淆矩阵计算方式存在问题。建议仔细检查代码和数据,确认样本个数和计算方式是否正确。