#引入必要的库
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from itertools import cycle
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from scipy import interp
#加载数据
df = pd.read_excel('c:\\Users\\asus\Desktop\\mm.xls')
X = df.iloc[:142, 0:38]
y = df['y']
X = np.array(X.values)
y = np.array(y.values)
#将标签二值化
y = label_binarize(y, classes=[2, 1, 0])
#设置种类
n_classes = y.shape[1]
#shuffle and split training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=0)
#learn to predict each class against the other
GNB = GaussianNB()
GNB.fit(X_train, y_train)
y_score = GNB.predict(X_test)
#计算每一类的ROC
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
#compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
#plot all roc curves
lw=2
plt.figure()
plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC curve (area = {0:0.2f})'
''.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=4)
colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
for i, color in zip(range(n_classes), colors):
plt.plot(fpr[i], tpr[i], color=color, lw=lw, label='ROC curve of class{0}(area = {1:0.2f})'
''.format(i, roc_auc[i]))
plt.plot([0, 1], [0, 1], "k--", lw=lw)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Some extension of Receiver operating characteristic to multi-class')
plt.legend(loc="lower right")
plt.show()
报错
Traceback (most recent call last):
File "C:/Users/asus/.ipython/untitled19/FEI.py", line 29, in <module>
GNB.fit(X_train, y_train)
File "C:\Users\asus\Anaconda3\lib\site-packages\sklearn\naive_bayes.py", line 182, in fit
X, y = check_X_y(X, y)
File "C:\Users\asus\Anaconda3\lib\site-packages\sklearn\utils\validation.py", line 526, in check_X_y
y = column_or_1d(y, warn=True)
File "C:\Users\asus\Anaconda3\lib\site-packages\sklearn\utils\validation.py", line 562, in column_or_1d
raise ValueError("bad input shape {0}".format(shape))
ValueError: bad input shape (71, 3)
这个g
看信息,是模型训练这一行(代码第29行)出错了,貌似是X_train或者y_train的shape有问题,你可以在这一行之前打印一下,看是否和想象的一样。
看样子有点像是你的y标签二值化失败了,所以shape会是(71,3),而正常期望应该是(71,)
博主,请问你解决了吗?