将基础0-1二分类模型与Bagging算法集成后,集成模型的预测结果全为0,这是出了什么问题?
def TrainPredict(self, x_train, y_train, x_test):
clf = self.estimator.fit(x_train, y_train, batch_size=16, epochs=5, shuffle=True)
y_predict = self.estimator.predict(x_test)
y_predict = y_predict.reshape(-1, 1)
return y_predict
def Bagging_clf(self, x_train, x_test, y_train, y_test):
y_predict_ensemble = []
for i in range(self.n_estimator):
x_train = x_train.reshape(-1, 23)
y_train = y_train.reshape(-1, 1)
train = np.append(x_train, y_train, axis=1)
sample = self.RepititionRandomSampling(data=train, number=len(train))
sample = np.array(sample)
x_train = sample[:, :-1].reshape(-1, 10, 23).astype('int64')
print('-----------x_train=------------')
print(x_train)
y_train = sample[:, -1].reshape(-1, 10, 1).astype('int64')
print('-----------y_train=------------')
print(y_train)
print('--------------y_train是否为全零矩阵------------')
array2 = np.zeros(shape=y_train.shape)
print((array2==y_train).all())
y_predict_ensemble.append(self.TrainPredict(x_train=x_train, y_train=y_train, x_test=x_test))
array1 = np.zeros(shape=(7060, 3)).astype('int64')
y_predict_ensemble = np.array(y_predict_ensemble).astype('int64')
print('-----------y_predict_ensemble是否为全零矩阵-------------')
print((array1 == y_predict_ensemble).all())
print('------------------------')
预测结果全为0
-----------y_predict_ensemble是否为全零矩阵-------------
True
其中Vote方法为自编硬投票函数,一开始以为是Vote出了问题,倒腾着发现训练集中Y的值是存在1的,所以不存在训练基础模型后预测所有特征值的结果都会是0。后来追根溯源的时候发现问题始发于TrainPredict这一步,导致其生成的Y_predict是一个全零数组。
如何正确训练基础模型后进行更准确的预测?