python用高斯朴素贝叶斯模型绘制三分类ROC曲线报错

#引入必要的库
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from itertools import  cycle
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from scipy import interp

#加载数据
df = pd.read_excel('c:\\Users\\asus\Desktop\\mm.xls')
X = df.iloc[:142, 0:38]
y = df['y']
X = np.array(X.values)
y = np.array(y.values)
#将标签二值化
y = label_binarize(y, classes=[2, 1, 0])
#设置种类
n_classes = y.shape[1]

#shuffle and split training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=0)

#learn to predict each class against the other
GNB = GaussianNB()
GNB.fit(X_train, y_train)
y_score = GNB.predict(X_test)


#计算每一类的ROC
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

#compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])


#plot all roc curves
lw=2
plt.figure()
plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC curve (area = {0:0.2f})' 
                                         ''.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=4)

colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
for i, color in zip(range(n_classes), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=lw, label='ROC curve of class{0}(area = {1:0.2f})' 
                                                       ''.format(i, roc_auc[i]))

plt.plot([0, 1], [0, 1], "k--", lw=lw)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Some extension of Receiver operating characteristic to multi-class')
plt.legend(loc="lower right")
plt.show()

报错

Traceback (most recent call last):
  File "C:/Users/asus/.ipython/untitled19/FEI.py", line 29, in <module>
    GNB.fit(X_train, y_train)
  File "C:\Users\asus\Anaconda3\lib\site-packages\sklearn\naive_bayes.py", line 182, in fit
    X, y = check_X_y(X, y)
  File "C:\Users\asus\Anaconda3\lib\site-packages\sklearn\utils\validation.py", line 526, in check_X_y
    y = column_or_1d(y, warn=True)
  File "C:\Users\asus\Anaconda3\lib\site-packages\sklearn\utils\validation.py", line 562, in column_or_1d
    raise ValueError("bad input shape {0}".format(shape))
ValueError: bad input shape (71, 3)

这个g

看信息,是模型训练这一行(代码第29行)出错了,貌似是X_train或者y_train的shape有问题,你可以在这一行之前打印一下,看是否和想象的一样。

看样子有点像是你的y标签二值化失败了,所以shape会是(71,3),而正常期望应该是(71,)

博主,请问你解决了吗?