deep learning 模型不收敛

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
x=load_boston()
x_data=(x.data-np.mean(x.data,axis=0))/np.std(x.data)
y_data=(x.target-np.mean(x.target,axis=0))/np.std(x.target)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x_data,y_data,shuffle=True,random_state=1,test_size=0.3)
x_train,x_test,y_train,y_test=tf.cast(x_train,dtype=tf.float32),tf.cast(x_test,dtype=tf.float32),tf.constant(y_train,shape=(y_train.shape[0],1),dtype=tf.float32),tf.constant(y_test,shape=(y_test.shape[0],1),dtype=tf.float32)
# *************************以下为初始化参数******************************************

# *************************以下设定超参数******************************************
lr1=0.1
lr2=0.1
lr3=0.1
lr4=0.1
eporch=3000
test_loss=[]
train_loss=[]

# *******************************循环开始****************
for eporch in range(eporch):
    w11=tf.Variable(tf.random.normal([13,20],mean=0,stddev=1,dtype=tf.float32,seed=2020))
    b11=tf.Variable(tf.random.normal([20],mean=0,stddev=1,dtype=tf.float32,seed=2020))
    w21=tf.Variable(tf.random.normal([20,20],mean=0,stddev=1,dtype=tf.float32,seed=2021))
    b21=tf.Variable(tf.random.normal([20],mean=0,stddev=1,dtype=tf.float32,seed=2021))
    w31=tf.Variable(tf.random.normal([20,20],mean=0,stddev=1,dtype=tf.float32,seed=2023))
    b31=tf.Variable(tf.random.normal([20],mean=0,stddev=1,dtype=tf.float32,seed=2023))
    w41=tf.Variable(tf.random.normal([20,1],mean=0,stddev=1,dtype=tf.float32,seed=2024))
    b41=tf.Variable(tf.random.normal([1],mean=0,stddev=1,dtype=tf.float32,seed=2024))

    with tf.GradientTape() as tp:
        y_1=tf.matmul(x_train,w11)+b11
        r1=tf.nn.relu(y_1)
        y_2=tf.matmul(r1,w21)+b21
        r2=tf.nn.relu(y_2)
        y_3=tf.matmul(r2,w31)+b31
        r3=tf.nn.relu(y_3)
        y_4=tf.matmul(r3,w41)+b41
        y_1_t=tf.matmul(x_test,w11)+b11
        r1_t=tf.nn.relu(y_1_t)
        y_2_t=tf.matmul(r1_t,w21)+b21
        r2_t=tf.nn.relu(y_2_t)
        y_3_t=tf.matmul(r2_t,w31)+b31
        r3_t=tf.nn.relu(y_3_t)
        y_4_t=tf.matmul(r3_t,w41)+b41
        TestLoss=tf.reduce_mean(tf.square(y_test-y_4_t))
        TestLoss=TestLoss.numpy()
        loss=tf.reduce_mean(tf.square(y_train-y_4))
        loss_np=loss.numpy()
        grad=tp.gradient(loss,[w11,w21,w31,w41,b11,b21,b31,b41])
        w11.assign_sub(grad[0]*lr1)
        w21.assign_sub(grad[1]*lr2)
        w31.assign_sub(grad[2]*lr3)
        w41.assign_sub(grad[3]*lr4)
        b11.assign_sub(grad[4]*lr1)
        b21.assign_sub(grad[5]*lr2)
        b31.assign_sub(grad[6]*lr3)
        b41.assign_sub(grad[7]*lr4)
        test_loss.append(TestLoss)
        train_loss.append(loss_np)
    if eporch%10==0:
        test_loss.append(TestLoss)
        print("第{}次迭代后的损失率为{}".format(eporch,loss))
        print("第{}次迭代后的测试集损失率为{}".format(eporch,TestLoss))
        
plt.figure(figsize=(10,10),facecolor="white",edgecolor="r")
plt.suptitle("test rsults",horizontalalignment="center",verticalalignment='top')
plt.subplot(211)
plt.title("loss曲线",loc="left")
plt.plot(train_loss,label="loss")
plt.legend(loc="best")
plt.ylabel("loss")
plt.subplot(212)
plt.title("test loss",loc="right")
plt.plot(test_loss,label="test_loss曲线")
plt.legend(loc="best")
plt.ylabel("test loss")
plt.show()

上述代码是一个有3个隐含层的神经网络,优化器是SGD,激活函数为RELU函数,经营模型训练时,训练了3000次,无论困在测试集还是在验证集,loss函数都没有收敛,呈现震荡,如下图,请大家个与指点,感谢!!!!

loss走势图和test loss走势图

 

现在看应该是loss设计的问题,可以后面私聊我发代码看下。