import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras import layers
import time
import keras
import numpy as np
import re
import os
from datetime import datetime
import io
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import unicodedata
from matplotlib.font_manager import FontProperties
font = FontProperties(fname="/Library/Fonts/Songti.ttc",size=8)
def process_cut(source_path,cut_path):
convs = []
with open(source_path,'r',encoding='utf8') as f:
complete_dialog = []
for line in f:
line = line.strip('\n')
line = re.sub("[\s+\.\!\/_,$%?^*(+\"\']+|[+!,。?~@#.&*()""]+","",line)
if line == "":
continue
if line[0] == "E":
if complete_dialog:
convs.append(complete_dialog)
complete_dialog = []
if line[0] == "M":
complete_dialog.append(line[1:])
return convs
def question_answer(convs):
questions = []
answers = []
for conv in convs:
if len(conv) == 1:
continue
if len(conv) % 2 !=0 :
conv = conv[:-1]
for i in range(len(conv)):
if i % 2 ==0:
questions.append("<start>"+" ".join(conv[i])+"<end>")
else:
answers.append("<start>"+" ".join(conv[i])+"<end>")
return questions,answers
def tokenize(datas):
tokenizer = keras.preprocessing.text.Tokenizer(filters="")
tokenizer.fit_on_texts(datas)
voc_li = tokenizer.texts_to_sequences(datas)
voc_li = keras.preprocessing.sequence.pad_sequences(voc_li,padding="post")
return voc_li,tokenizer
class Encoder(tf.keras.Model):
def __init__(self,vocab_size,embedding_dim,enc_units,batch_sz):
super(Encoder,self).__init__()
self.batch_sz = batch_sz
self.enc_units = enc_units
self.embedding = keras.layers.Embedding(vocab_size,embedding_dim)
self.gru = keras.layers.GRU(
self.enc_units,
return_sequences=True,
return_state=True,
recurrent_initializer="glorot_uniform"
)
@tf.function
def call(self,x,hidden):
x = self.embedding(x)
output, state = self.gru(x,initial_state=hidden)
return output,state
def initialize_hidden_state(self):
return tf.zeros((self.batch_sz,self.enc_units))
class BahdanauAttentionMechanism(tf.keras.layers.Layer):
def __init__(self,units):
super(BahdanauAttentionMechanism,self).__init__()
self.W1 = layers.Dense(units)
self.W2 = layers.Dense(units)
self.v = layers.Dense(1)
@tf.function
def call(self,query,values):
hidden_with_time_axis = tf.expand_dims(query,1)
score = self.v(tf.nn.tanh(self.W1(values) + self.W2(hidden_with_time_axis = 1)))
attention_weights = tf.nn.softmax(score,axis=1)
context_vector = attention_weights * values
context_vector = tf.math.reduce_sum(context_vector,axis=1)
return context_vector,attention_weights
class Decoder(tf.keras.Model):
def __init__(self,vocab_size,embedding_dim,dec_units,batch_sz):
super(Decoder,self).__init__()
self.batch_sz = batch_sz
self.dec_units = dec_units
self.embedding = layers.Embedding(vocab_size,embedding_dim)
self.gru = layers.GRU(self.dec_units,retrun_sequences=True,return_state=True,return_initializer=True)
self.fc = layers.Dense(vocab_size)
self.attention = BahdanauAttentionMechanism(self.dec_units)
@tf.function
def call(self,x,hidden,enc_output):
context_vector, attention_weights = self.attention(hidden, enc_output)
x = self.embedding(x)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=1)
output, state = self.gru(x)
output = tf.reshape(output, (-1, output.shape[2]))
x = self.fc(output)
return x, state, attention_weights
def loss(real, pred):
mask = tf.math.logical_not(tf.math.equal(real,0))
loss_obj = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction="none")
loss_value = loss_obj(real, pred)
mask = tf.cast(mask, dtype=loss_value.dtype)
loss_value *= mask
return tf.math.reduce_mean(loss_value)
def grad_loss(q,a,q_hidden,encoder,decoder,q_index,BATCH_SIZE):
loss_value = 0
with tf.GradientTape() as tape:
q_output, q_hidden = encoder(q, q_hidden)
a_hidden = q_hidden
a_input = tf.expand_dims(
[q_index.word_index["<start>"]]*BATCH_SIZE,1)
for vector in range(1, a.shape[1]):
predictions, a_hidden, _=decoder(a_input, a_hidden, q_output)
loss_value += loss(a[:vector], predictions)
a_input = tf.expand_dims(a[:, vector], 1)
batch_loss = (loss_value/int(a.shape[1]))
variables = encoder.trainable_variables + decoder.trainable_variables
return batch_loss, tape.gradient(loss_value, variables)
def optimizer_loss(q,a,q_hidden,encoder,decoder,q_index,BATCH_SIZE,optimizer):
batch_loss, grads = grad_loss(q,a,q_hidden,encoder,decoder,q_index,BATCH_SIZE)
variables = encoder.trainable_variables + decoder.trainable_variables
optimizer.apply_gradients(zip(grads, variables))
return batch_loss
def source_data(source_path):
convs = process_cut(source_path,None)
questions, answers = question_answer(convs)
return questions, answers
def train_model(q_hidden, encoder, decoder, q_index, BATCH_SIZE, dataset, steps_per_epoch, optimizer, checkpoint,checkpoint_prefix,summary_writer):
i = 0
EPOCHS = 200
for epoch in range(EPOCHS):
start = time.time()
a_hidden = encoder.initialize_hidden_state()
total_loss = 0
for(batch, (q, a)) in enumerate(dataset.take(steps_per_epoch)):
batch_loss = optimizer_loss(q,a,q_hidden,encoder,decoder,q_index,BATCH_SIZE,optimizer)
total_loss += batch_loss
with summary_writer.as_default():
tf.summary.scalar("batch loss", batch_loss.numpy(), step=epoch)
if batch % 100 == 0:
print("第{}次训练,第{}批数据损失值:{:.4f}".format(epoch+1,batch+1,batch_loss.numpy()))
with summary_writer.as_default():
tf.summary.scalar("total loss", total_loss/steps_per_epoch, step=epoch)
if(epoch+1) % 100 == 0:
i += 1
print("====第{}几次保存训练模型====".format(i))
checkpoint.save(file_prefix=checkpoint_prefix)
print("第{}次训练,总损失值:{:.4f}".format(epoch+1,total_loss/steps_per_epoch))
print("训练耗时:{:.1f}秒".format(time.time()-start))
def preprocess_question(question):
question = "<start> "+" ".join(question) + " <end>"
return question
def max_length(vectors):
return max(len(vector) for vector in vectors)
def convert(index, vectors):
for vector in vectors:
if vector != 0:
print("{}-->{}".format(vector, index.index_word[vector]))
if __name__ == "__main__":
stamp = datetime.now().strftime("%Y%m%d-%H:%M:%S")
source_path ="./chat/chat data.py"
convs = process_cut(source_path,None)
questions,answers = question_answer(convs)
q_vec , q_index = tokenize(questions)
a_vec, a_index = tokenize(answers)
q_max_len = max_length(q_vec)
a_max_len = max_length(a_vec)
convert(q_index, q_vec[0])
BUFFER_SIZE = len(q_vec)
print("buffer size:",BUFFER_SIZE)
BATCH_SIZE = 64
step_per_epoch = len(q_vec)//BATCH_SIZE
embedding_dim = 256
units = 1024
q_vocab_size = len(q_index.word_index)+1
a_vocab_size = len(a_index.word_index)+1
dataset = tf.data.Dataset.from_tensor_slices((q_vec, a_vec)).shuffle(BUFFER_SIZE)
q_batch, a_batch = next(iter(dataset))
print("question batch:", q_batch.shape)
print("answer batch:", a_batch.shape)
log_path = "logs3\\chat\\"
summary_writer = tf.summary.create_file_writer(log_path)
tf.summary.trace_on(graph=True, profiler=True)
encoder = Encoder(
q_vocab_size,
embedding_dim,
units,
BATCH_SIZE)
q_hidden = encoder.initialize_hidden_state()
print(q_hidden)
q_output, q_hidden = encoder.call(q_batch, q_hidden)
with summary_writer.as_default():
tf.summary.trace_export(name="chat-en", step=0, profiler_outdir=log_path)
tf.summary.trace_on(graph=True, profiler=True)
attention_layer =BahdanauAttentionMechanism(10)
attention_result, attention_weights = attention_layer.call(q_hidden, q_output)
with summary_writer.as_default():
tf.summary.trace_export(name="chat-atten", step=0, profiler_outdir=log_path)
tf.summary.trace_on(graph=True, profiler=True)
decoder =Decoder(
a_vocab_size,
embedding_dim,
units,
BATCH_SIZE
)
a_output, _, _ = decoder.call(
tf.random.uniform((64,1)),
q_hidden,
q_output
)
with summary_writer.as_default():
tf.summary.trace_export(name="chat-dec", step=0, profiler_outdir=log_path)
optimizer = tf.keras.optimizers.Adam()
checkpoint_dir = "./models"
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(
optimizer=optimizer,
encoder=encoder,
decoder=decoder
)
为什么报错说我ValueError: Input 0 of layer "gru" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (17, 256)
看下你模型当中这个是ndim=3, found ndim=2. Full shape received: (17, 256) 是不是有误已经提示你写的ndim是3但是找到的是2