keras单独使用CNN和LSTM时训练和测试的准确率都很低
import keras
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from keras.models import Model, load_model, Sequential
from keras.layers import *
from tensorflow import keras
from keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD, Adam
from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import train_test_split
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
# ——————————————————————————数据预处理————————————————————————————
data = pd.read_excel(r".\data\lncRNA(function).xlsx") # lncRNA与其他分子的相互作用关系
for i in range(len(data)):
data['cancer type'][i].lower()
data2 = pd.read_excel(r".\data\lncRNA(mechanism).xlsx") # lncRNA与其他分子的相互作用关系
for i in range(len(data2)):
data2['cancer type'][i].lower()
data = pd.merge(data, data2, on=['name', 'cancer type'], how='inner') # 将两个表按cancer type进行内连接
output = data['cancer type']
print(len(output))
print(type(output))
# 用lncRNA的结构来判断该疾病产生原因
# 输入矩阵 10个RNA结构
data = [list(t) for t in zip(data['Immune'], data['autophagy/Apoptosis'], data['Cell Growth'], data['EMT'],
data['Survival'], data['TF'], data['Enhancer'], data['Variant'], data['MiRNA'],
data['Methylation'])]
# 将结构中的单词和字符串转换成数字便于后续处理
print(type(data))
for i in range(len(data)):
if data[i][1] == '0':
data[i][1] = 0
else:
data[i][1] = 1
if data[i][0] == '0':
data[i][0] = 0
else:
data[i][0] = 1
data = np.array(data)
input = data
mylist = set(output)
mydata = []
for item in mylist:
mydata += [item]
print("type:", len(mydata))
# 将疾病名字标号成数字便于训练
for i in range(len(output)):
output[i] = mydata.index(output[i])
output = list(output)
output = minmax_scale(output, feature_range=(0, 143))
output = np.array(output, dtype=int)
print("预处理完毕")
# ——————————————————————————模型训练————————————————————————————
train_rate = 0.6
y = output
x, test_X, _, test_y = train_test_split(input, output, train_size=train_rate,test_size=1-train_rate,
random_state=2, shuffle=True)
print(x.shape, y.shape)
# design network
# CNN
def design_model():
# 模型参数
model = Sequential()
# Convolution 卷积
pool_length = 1 # 池化长度
# LSTM
lstm_output_size = 70 # LSTM 层输出尺寸
# Training 训练参数
batch_size = 64 # 批数据量大小
nb_epoch = 10000 # 迭代次数
learning_rate = 0.0001
inputs = Input(shape=(10, 1)) # 输入特征接收维度
a = Dropout(0.25)(inputs)
conv1 = Convolution1D(1, 3, strides=1, padding="same", dilation_rate=1, activation='relu')(a) # filters, kernel_size, strides=1
max1 = MaxPooling1D(pool_size=pool_length)(conv1)
conv2 = Convolution1D(2, 3, strides=1, padding="same", dilation_rate=1, activation='relu')(max1) # filters, kernel_size, strides=1
max2 = MaxPooling1D(pool_size=pool_length)(conv2)
conv3 = Convolution1D(4, 3, strides=1, padding="same", dilation_rate=1, activation='relu')(max2) # filters, kernel_size, strides=1
max3 = MaxPooling1D(pool_size=pool_length)(conv3)
lstm1 = LSTM(lstm_output_size)(max3)
lstm1 = Dropout(0.25)(lstm1)
output = Dense(144, activation='softmax')(lstm1)
model = Model(inputs=inputs, outputs=output)
model.summary()
adam = Adam(lr=learning_rate)
learningRate = 0.00001
momentum = 0.9
decay_rate = 0.01
sgd = SGD(lr=learningRate, momentum=momentum, decay=decay_rate, nesterov=False)
model.compile(optimizer=adam,
loss='sparse_categorical_crossentropy',
metrics=['sparse_categorical_accuracy'])
# sparse_categorical_crossentropy
# accuracy
# 训练
callback = keras.callbacks.EarlyStopping(monitor='loss', patience=100)
# 读取已保存的模型
#model = load_model('my_model9.h5')
history = model.fit(x, y, batch_size=batch_size, epochs=nb_epoch,
validation_split=0.2, callbacks=[callback])
# 保存模型 17.3%
model.save('my_model9.h5')
# plot history
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='val')
score = model.evaluate(test_X, test_y, batch_size=32)[1]
print(score) # [loss,accuracy]
print("%s: %.2f%%" % (model.metrics_names[1], score * 100))
plt.legend()
plt.show()
design_model()
数据集下载链接
链接:https://pan.baidu.com/s/1Ce1-ADIo8NtkBt_zH81BNg?pwd=7av9
提取码:7av9
训练和验证准确率:
测试准确率:
1 网络结构是否合理
2 关键参数是否合理