GRU模型源设置的隐层数量在后续模型调用中不起作用码解析

设置的隐层数量在后续模型调用中不起作用
代码

# 导入所需的库
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import Input, Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import Dense, Layer,Activation
from keras import optimizers, losses, metrics
from keras.layers import Dropout,Dense,Flatten
from sklearn.metrics import mean_squared_error, mean_absolute_error,mean_absolute_percentage_error,r2_score,explained_variance_score
import time
from keras.models import Sequential
from tensorflow.keras.layers import Input, SimpleRNN
from tensorflow.keras.models import Model

class SRULayer(tf.keras.layers.Layer):
    def __init__(self, hidden_units, using_highway=False,return_sequences=False):
        super(SRULayer, self).__init__()
        self.hidden_units = hidden_units
        self._using_highway = using_highway

@property
def state_size(self):
    return (self.hidden_units, self.hidden_units)

@property
def output_size(self):
    return self.hidden_units

def __call__(self, x, state, scope=None):
    if self._using_highway:
        return self.call_with_highway(x, state, scope)
    else:
        return self.call_without_highway(x, state, scope)

def call_without_highway(self, x, state, scope=None):
    with tf.variable_scope(scope or type(self).__name__):
        c, _ = state     #保留state的第一个维度  批次数量batch_size
        x_size = x.get_shape().as_list()[1]
        W_u = tf.Variable(tf.random.normal([x_size, 3 * self.output_size]), name='W_u')
        b_f = tf.Variable(tf.random.normal([self.hidden_units]), name='b_f')
        b_r = tf.Variable(tf.random.normal([self.hidden_units]), name='b_r')

        xh = tf.matmul(x, W_u)
        z, f, r = tf.split(xh, 3, 1)
        f = tf.sigmoid(f + b_f)
        r = tf.sigmoid(r + b_r)
        new_c = f * c + (1 - f) * z
        new_h = r * tf.tanh(new_c)
        return new_h, (new_c, new_h)

def call_with_highway(self, x, state, scope=None):
    with tf.compat.v1.variable_scope(scope or type(self).__name__):
        c, _ = state
        x_size = x.get_shape().as_list()[1]

        W_u = tf.Variable(tf.random.normal([x_size, 4 * self.output_size]), name='W_u')
        b_f = tf.Variable(tf.random.normal([self.hidden_units]), name='b_f')
        b_r = tf.Variable(tf.random.normal([self.hidden_units]), name='b_r')

        xh = tf.matmul(x, W_u)
        z, f, r, x = tf.split(xh, 4, 1)

        f = tf.sigmoid(f + b_f)
        r = tf.sigmoid(r + b_r)

        new_c = f * c + (1 - f) * z
        new_h = r * tf.tanh(new_c) + (1 - r) * x

        return new_h, (new_c, new_h)



dataframe = pd.read_csv('E:\Python\SRU\SRU111\ZQS27测点.csv')
x1_set = dataframe["ZQS27"]
y_set = dataframe["ZQS27"]

x1_set = x1_set.values
y_set = y_set.values

sc1 = MinMaxScaler(feature_range=(0, 1))
scy = MinMaxScaler(feature_range=(0, 1))

x1_Normalization=sc1.fit_transform(x1_set.reshape(-1,1))
y_Normalization=scy.fit_transform(y_set.reshape(-1,1))

n_steps_in=7
n_steps_out=1
n_dim_input=1

x_total=np.zeros((len(x1_Normalization),n_dim_input))  #返回元素全为0，给定形状的数组（160，1）
y_total=np.zeros((len(x1_Normalization),n_steps_out))  #返回元素全为0，给定形状的数组（160，1）

for i in range(len(x1_Normalization)):
    x_total[i][0]=x1_Normalization[i]
    y_total[i]=y_Normalization[i]

X_total=np.zeros((len(x1_Normalization)-n_steps_in,n_steps_in,n_dim_input)) #返回元素全为0，给定形状的数组（153，7,1）
Y_total=np.zeros((len(x1_Normalization)-n_steps_in,n_steps_out))            #返回元素全为0，给定形状的数组（153，1）


for j in range(n_steps_in,len(x_total)):  #从（7，160）
    X_total[j-n_steps_in]=np.reshape(x_total[j - n_steps_in:j, :], (n_steps_in,n_dim_input))  #j=8， X_total[1]=np.reshape(x_total[1:8, :], (7,1)）
    Y_total[j-n_steps_in]=y_total[j:j+ n_steps_out , 0]                                       #j=8， Y_total[1]=y_total[8:9,0]


x_train = X_total[0:int(0.8*X_total.shape[0])]
y_train = Y_total[0:int(0.8*X_total.shape[0])]
x_test = X_total[int(0.8*X_total.shape[0]):X_total.shape[0]]
y_test = Y_total[int(0.8*X_total.shape[0]):X_total.shape[0]]

#打乱数据集
np.random.seed(7)             #设置随机数
np.random.shuffle(x_train)    #打乱数据集
np.random.seed(7)
np.random.shuffle(y_train)    #打乱数据集
tf.random.set_seed(7)

def build_model():
    lr,Neurons,Neurons1=0.001,80,100
    inputs = Input(shape=(n_steps_in,n_dim_input))
    x = SRULayer(hidden_units=Neurons, return_sequences=True)(inputs)
    x1=Dropout(0.1)(x)
    x2=Flatten()(x1) # 展平层
    x3 = SRULayer(Neurons1)(x2)
    x4 = Dropout(0.1)(x3)
    outputs = Dense(1,activation="relu")(x4)
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=optimizers.Adam(lr),
                       loss='mae')
    model.summary()
    return model
model=build_model()

# 训练模型
epoch=200
history = model.fit(x_train, y_train,batch_size=16, epochs=epoch,validation_freq=1,shuffle=True)

网络结果

【相关推荐】

这篇博客: GRU模型中的 GRU的优势 部分也许能够解决你的问题, 你可以仔细阅读以下内容或跳转源博客中阅读:
●GRU和LSTM作用相同,在捕捉长序列语义关联时,能有效抑制梯度消失或爆炸,效果都优于传统RNN且计算复杂度相比STM要小。

如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 写成博客, 将相关链接放在评论区, 以帮助更多的人 ^-^