在训练一个用于音乐生成的VAE模型时,遇到报错:
ValueError Traceback (most recent call last)
/tmp/ipykernel_27/1449102967.py in <module>
98
99 random_vector_for_generation = tf.random.normal(shape = [num_examples_to_generate, latent_dim])
--> 100 model = CVAE(latent_dim)
/tmp/ipykernel_27/1449102967.py in __init__(self, latent_dim)
52
53 layers.Flatten(name = 'flatten'),
---> 54 layers.Dense(latent_dim + latent_dim, name = 'dense'),
55 ]
56 )
...
报错:
All layers added to a Sequential model should have unique names. Name "" is already the name of a layer in this model. Update the name argument to pass a unique name.
python version:3.7
tensorflow version:2.3
class Resnet1DBlock(tf.keras.Model):
def __init__(self, kernel_size, filters, type = 'encode', prefix = ''):
super(Resnet1DBlock, self).__init__(name = '')
if type == 'encode':
self.conv1a = layers.Conv1D(filters, kernel_size, 2, padding = "same", \
name = prefix + 'conv1a')
self.conv1b = layers.Conv1D(filters, kernel_size, 1, padding = "same", \
name = prefix + 'conv1b')
self.norm1a = tfa.layers.InstanceNormalization(name = prefix + 'norm1a')
self.norm1b = tfa.layers.InstanceNormalization(name = prefix + 'norm1b')
elif type == 'decode':
self.conv1a = layers.Conv1DTranspose(filters, kernel_size, 1, padding = "same", \
name = prefix + 'conv1a')
self.conv1b = layers.Conv1DTranspose(filters, kernel_size, 1, padding = "same", \
name = prefix + 'conv1b')
self.norm1a = tf.keras.layers.BatchNormalization(name = prefix + 'norm1a')
self.norm1b = tf.keras.layers.BatchNormalization(name = prefix + 'norm1b')
else:
return None
def call(self, input_tensor):
x = tf.nn.relu(input_tensor)
x = self.conv1a(x)
x = self.norm1a(x)
x = layers.LeakyReLU(0.4)(x)
x = self.conv1b(x)
x = self.norm1b(x)
x = layers.LeakyReLU(0.4)(x)
x += input_tensor
return tf.nn.relu(x)
class CVAE(tf.keras.Model):
def __init__(self, latent_dim):
super(CVAE, self).__init__()
self.latent_dim = latent_dim
self.encoder = tf.keras.Sequential(
[
tf.keras.layers.InputLayer(input_shape = (1, 90001), name = 'input_encoder'),
layers.Conv1D(64, 1, 2, name = 'conv1_layer1'),
Resnet1DBlock(64, 1, 'encode', prefix = 'res1_'),
layers.Conv1D(128, 1, 2, name = 'conv1_layer2'),
Resnet1DBlock(128, 1, 'encode', prefix = 'res2_'),
layers.Conv1D(128, 1, 2, name = 'conv1_layer3'),
Resnet1DBlock(128, 1, 'encode', prefix = 'res3_'),
layers.Conv1D(256, 1, 2, name = 'conv1_layer4'),
Resnet1DBlock(256, 1, 'encode', prefix = 'res4_'),
layers.Flatten(name = 'flatten'),
layers.Dense(latent_dim + latent_dim, name = 'dense'),
]
)
self.decoder = tf.keras.Sequential(
[
tf.keras.layers.InputLayer(input_shape = (latent_dim,), name = 'input_decoder'),
layers.Reshape(target_shape = (1, latent_dim)),
Resnet1DBlock(512, 1, 'decode', prefix = 'res1_'),
layers.Conv1DTranspose(512, 1, 1, name = 'Conv1Trans_Layer1'),
Resnet1DBlock(256, 1, 'decode', prefix = 'res2_'),
layers.Conv1DTranspose(256, 1, 1, name = 'Conv1Trans_Layer2'),
Resnet1DBlock(128, 1, 'decode', prefix = 'res3_'),
layers.Conv1DTranspose(128, 1, 1, name = 'Conv1Trans_Layer3'),
Resnet1DBlock(64, 1, 'decode', prefix = 'res4_'),
layers.Conv1DTranspose(64, 1, 1, name = 'Conv1Trans_Layer4'),
layers.Conv1DTranspose(90001, 1, 1, name = 'Conv1Trans_Layer5')
]
)
# 省略了与报错无关的函数
optimizer = tf.keras.optimizers.Adam(0.0003, beta_1 = 0.9, beta_2 = 0.999, epsilon = 1e-08)
random_vector_for_generation = tf.random.normal(shape = [num_examples_to_generate, latent_dim])
model = CVAE(latent_dim)
我非常疑惑,明明已经给所有网络层命名了。
修改后的代码如下:
import tensorflow_addons as tfa
class Resnet1DBlock(tf.keras.Model):
def __init__(self, kernel_size, filters, type='encode', prefix=''):
super(Resnet1DBlock, self).__init__(name='')
if type == 'encode':
self.conv1a = layers.Conv1D(filters, kernel_size, 2, padding="same", \
name=prefix + 'conv1a')
self.conv1b = layers.Conv1D(filters, kernel_size, 1, padding="same", \
name=prefix + 'conv1b')
self.norm1a = tfa.layers.InstanceNormalization(name=prefix + 'norm1a')
self.norm1b = tfa.layers.InstanceNormalization(name=prefix + 'norm1b')
elif type == 'decode':
self.conv1a = layers.Conv1DTranspose(filters, kernel_size, 1, padding="same", \
name=prefix + 'conv1a')
self.conv1b = layers.Conv1DTranspose(filters, kernel_size, 1, padding="same", \
name=prefix + 'conv1b')
self.norm1a = tfa.layers.InstanceNormalization(name=prefix + 'norm1a')
self.norm1b = tfa.layers.InstanceNormalization(name=prefix + 'norm1b')
else:
return None
def call(self, input_tensor):
x = tf.nn.relu(input_tensor)
x = self.conv1a(x)
x = self.norm1a(x)
x = layers.LeakyReLU(0.4)(x)
x = self.conv1b(x)
x = self.norm1b(x)
x = layers.LeakyReLU(0.4)(x)
x += input_tensor
return tf.nn.relu(x)
class CVAE(tf.keras.Model):
def __init__(self, latent_dim):
super(CVAE, self).__init__()
self.latent_dim = latent_dim
self.encoder = tf.keras.Sequential(
[
tf.keras.layers.InputLayer(input_shape=(1, 90001), name='input_encoder'),
layers.Conv1D(64, 1, 2, name='conv1_layer1'),
Resnet1DBlock(64, 1, 'encode', prefix='res1_'),
layers.Conv1D(128, 1, 2, name='conv1_layer2'),
Resnet1DBlock(128, 1, 'encode', prefix='res2_'),
layers.Conv1D(128, 1, 2, name='conv1_layer3'),
Resnet1DBlock(128, 1, 'encode', prefix='res3_'),
layers.Conv1D(256, 1, 2, name='conv1_layer4'),
Resnet1DBlock(256, 1, 'encode', prefix='res4_'),
layers.Flatten(name='flatten'),
layers.Dense(latent_dim + latent_dim, name='dense'),
]
)
self.decoder = tf.keras.Sequential(
[
tf.keras.layers.InputLayer(input_shape=(latent_dim,), name='input_decoder'),
layers.Reshape(target_shape=(1, latent_dim)),
Resnet1DBlock(512, 1, 'decode', prefix='res1_'),
layers.Conv1DTranspose(512, 1, 1, name='Conv1Trans_Layer1'),
Resnet1DBlock(256, 1, 'decode', prefix='res2_'),
layers.Conv1DTranspose(256, 1, 1, name='Conv1Trans_Layer2'),
Resnet1DBlock(128, 1, 'decode', prefix='res3_'),
layers.Conv1DTranspose(128, 1, 1, name='Conv1Trans_Layer3'),
Resnet1DBlock(64, 1, 'decode', prefix='res4_'),
layers.Conv1DTranspose(64, 1, 1, name='Conv1Trans_Layer4'),
layers.Conv1DTranspose(90001, 1, 1, name='Conv1Trans_Layer5')
]
)
# 省略了与报错无关的函数
optimizer = tf.keras.optimizers.Adam(0.0003, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
random_vector_for_generation = tf.random.normal(shape=[num_examples_to_generate, latent_dim])
model = CVAE(latent_dim)
在Resnet1DBlock类中,BatchNormalization应该改为InstanceNormalization,因为在encode和decode中使用的都是InstanceNormalization。
同时,在CVAE类中,应该使用tfa.layers.InstanceNormalization而不是tf.keras.layers.BatchNormalization。因为在Resnet1DBlock类中使用的是tfa.layers.InstanceNormalization,为了保持一致性,应该在整个模型中都使用InstanceNormalization。
# 考虑使用一个Squential构建VAE
model = tf.keras.Squential([
encoder, #伪代码,encoder为神经网络,里边的层请自己脑补
lambda层/自定义层, # 重采样层
decoder # decoder神经网络
])
至此,网络结构搭好了。但正如上述所说,loss如何计算呢?
因为keras.losses库中并没有为VAE准备损失,所以我们肯定考虑我们自己搞一个损失函数就得了,好,那进入下一个问题:
损失函数有哪些?
重构误差 + KL(q(z|x)||p(z))
重构误差:使用交叉熵损失函数,需要y_true,y_pred.
我们可以定义一个函数去计算loss,包括重构误差和KL散度,**重点看一下KL散度**
def compute_loss(model,x):
"""
x:真实的图片
model:我们的模型
"""
# 当decoder最后一层没有使用激活函数时,from_logits置为True,保证数据的稳定性。
cross_entropy = tf.keras.losses.BinaryCrossEntropy(from_logit=True)
out = model(x)
rec_loss = cross_entropy(y_true=x,y_pred=out)
# 那么KL散度如何计算,我们得不到encoder的均值和方差,咋弄?
可以看到,构建方式的选择将会影响我们能否计算loss。
至此,我们决定放弃第一期讲的构建网络的方式!
②考虑第二期介绍的构建网络的方式:Input与output
# 定义自己的模型
latent_dimension = 50
# 重采样函数,应用在Lambda层
def sampling(agrs):
mean,logvar = agrs[0],agrs[1]
eps = tf.random.normal(tf.shape(mean))
return mean + eps*tf.exp(logvar * 0.5)
# 编码阶段
x = layers.Input(shape=(784,))
h1 = layers.Dense(200,activation='softplus')(x)
h2 = layers.Dense(200,activation='softplus')(h1)
# 均值和方差层不需要激活函数
mean = layers.Dense(latent_dimension)(h2)
log_var = layers.Dense(latent_dimension)(h2)
# 将采样过程看成一个Lambda层
z = layers.Lambda(sampling,output_shape=(latent_dimension,))([mean,log_var])
# 解码阶段
h3 = layers.Dense(200,activation='softplus')
h4 = layers.Dense(200,activation='softplus')
h5 = layers.Dense(200,activation='softplus')
# No activation
end = layers.Dense(784)
z1 = h3(z)
z2 = h4(z1)
z3 = h5(z2)
out = end(z3)
# 建立模型
model = tf.keras.Model(x,out)
cross_ent = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.split(x,num_or_size_splits=2,axis=-1)[0],logits=out),axis=-1) # 重构误差
KL = -tf.reduce_sum(1+log_var-tf.square(mean)-tf.exp(log_var),axis=-1)
dvae_loss = tf.reduce_mean(cross_ent + KL)
model.add_loss(dvae_loss)
# 编译
model.compile(optimizer='adam')
# fit
history = model.fit(train_dataset,epochs=80,validation_data=test_dataset)
# 在训练好模型后,我们肯定是要从生成器中生成模型,这很方便
# 构造生成器
decoder_input = layers.Input(shape=(latent_dimension,))
h1_decoder = h3(decoder_input) #200,h3层的参数在之前就训练好了
h2_decoder = h4(h1_decoder) #200
h3_decoder = h5(h2_decoder) #200
decoder_output = end(h3_decoder)
generator = tf.keras.Model(decoder_input,decoder_output)