BPNN回归问题预测混凝土强度,用dataset数据预处理,预测结果很差,望救助!

大佬们,我用dataset进行数据预处理,然后想用BPNN做一个对混凝土强度的回归预测任务,但是预测结果很奇怪,基本不收敛,准确度也很低(在计算相关系数的时候还经常出现[nan],mae也一直下降不下去,预测结果经常如下图),真的不知道哪里码错了,望纠正!(我用的数据集没有热编码,数据处理部分的热编码都可以跳过)

img

数据集从github上下载的共1030个数据:https://github.com/maajdl/Yeh-concrete-data-sklearn

这是我的Dataset数据预处理模块

import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset
import torch.nn.functional as F
from numpy import mean

class Excel_dataset(Dataset):

    def __init__(self, dir, if_normalize=False, if_onehot=False):
        super(Excel_dataset, self).__init__()

        data = pd.read_csv(dir)
        nplist = data.T.to_numpy()
        data = nplist[0:-1].T

        self.data = np.float64(data)
        self.target = nplist[-1]

        # 记录标签有几类
        self.target_type = []
        for i in self.target:
            try:
                self.target_type.index(i)
            except(BaseException):
                self.target_type.append(i)


        # 将标签转为自然数编码
        self.target_num = []
        for i in self.target:
            self.target_num.append(self.target_type.index(i))


        # Tensor化
        self.data = np.array(self.data)
        self.data = torch.FloatTensor(self.data)
        self.target = np.array(self.target)
        self.target = torch.FloatTensor(self.target)
        self.target_num = np.array(self.target_num)
        self.target_num = self.target_num.astype(float)
        self.target_num = torch.LongTensor(self.target_num)

        self.if_onehot = if_onehot
        #生成独热编码
        self.target_onehot = []
        if if_onehot == True:

            for i in self.target_num:
                tar = F.one_hot(i.to(torch.int64), len(self.target_type))
                self.target_onehot.append(tar)

        if if_normalize == True:
            self.data = nn.functional.normalize(self.data, dim=0)
            #pass

    def __getitem__(self, index):

        if self.if_onehot == True:
            return self.data[index], self.target_onehot[index]

        else:
            return self.data[index], self.target[index]

    def __len__(self):
        return len(self.target)


def data_split(data, rate):
    train_l = int(len(data) * rate)
    test_l = len(data) - train_l
    """打乱数据集并且划分"""
    train_set, test_set = torch.utils.data.random_split(data, [train_l, test_l])
    return train_set, test_set


#相关系数计算公式
def R_Square(x,y):
    p1=x2=y2=0.0
    x = x.tolist()
    y = y.tolist()
    x_=mean(x)
    y_=mean(y)
    for i in range(len(x)):
        p1+=(x[i]-x_)*(y[i]-y_)
        x2+=(x[i]-x_)**2
        y2+=(y[i]-y_)**2
    #计算相关系数
    r=p1/((x2** 0.5)*(y2** 0.5))
    return r

def main():

    data = Excel_dataset("Concrete_Data_Yeh-1.csv", if_normalize=True, if_onehot=False)
    data_train, data_test = data_split(data, 0.7)

if __name__ == '__main__':
    main()

以下是main代码,包含BPNN网络和绘图

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from tabel_pretreatment import data_split, Excel_dataset, R_Square
import time
from sklearn.metrics import mean_absolute_error
import numpy as np


# 定义BP神经网络
class BPNerualNetwork(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(nn.Linear(input_size, hidden_size1, bias=True),
                                   nn.ReLU(),
                                   nn.Linear(hidden_size1, hidden_size2, bias=True),
                                   nn.ReLU(),
                                   nn.Linear(hidden_size2, output_size),
                                   nn.ReLU()
                                   )

    def forward(self, x):
        x = self.model(x)
        return x


if __name__ == "__main__":
    data = Excel_dataset("Concrete_Data_Yeh.csv", if_normalize=True, if_onehot=False)
    data_train, data_test = data_split(data, 0.7)

    input_size = 8
    hidden_size1 = 5
    hidden_size2 = 10
    output_size = 1
    batchsize = 30

    net = BPNerualNetwork()
    print(net)

    optimizer = torch.optim.SGD(net.parameters(), lr=0.001)

    # 设定损失函数
    loss_func = torch.nn.MSELoss()

    # 训练并且记录每次准确率,loss     函数输入为:训练输入,训练标签,测试输入,测试标签,一个batch大小
    data_loader = DataLoader(data_train, batch_size=batchsize, shuffle=False)

    for epoch in range(5000):
        # print(epoch)
        net.train()
        for step, (inputs, labels) in enumerate(data_loader):
            # 前向传播
            out = net(inputs)
            # 计算损失函数
            loss = loss_func(out, labels)
            # 清空上一轮的梯度
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        net.eval()
        with torch.no_grad():
            test_dataloader = DataLoader(data_test, batch_size=len(data_test), shuffle=False)
            for i, (test_inputs, test_labels) in enumerate(test_dataloader):
                # images = images.cuda(device)
                # labels = labels.cuda(device)
                prediction = net(test_inputs)

                mae = mean_absolute_error(test_labels, prediction)
                R = R_Square(test_labels, prediction)

        if epoch % 1 == 0:  # num除以10
            print("训练次数为", epoch, "的mae为:", mae, "R为:", R)

    #绘图
    index = np.argsort(test_labels)
    plt.figure(figsize=(12, 5))
    plt.plot(np.arange(len(test_labels)), test_labels[index], "r", label="Original Y")
    plt.scatter(np.arange(len(prediction)), prediction[index], s=3, c="b", label="Prediction")
    plt.legend(loc="upper left")
    plt.grid()
    plt.xlabel("Index")
    plt.ylabel("Y")
    plt.show()


我有尝试对normalize做一些调整,比如各变量都先除以他们各自的最大值,让每个变量都处在[0,1]之内,但是结果基本没有差别,我也有点怀疑是dataset数据预处理设置的有点问题,但发现不了错误,又或者是学习速率设置的不合适,但是从0.1-0.0001我都试过了也没啥用,新手小白想要大佬救助!

改模型或者公式试试

混凝土的强度预测(R语言 超详细完整代码介绍)神经网络+线性回归
如有帮助,望采纳
https://blog.csdn.net/sereasuesue/article/details/105975841

img


标签不用处理吗😄