使用keras进行深度学习时准确率很低的原因

keras单独使用CNN和LSTM时训练和测试的准确率都很低

import keras
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from keras.models import Model, load_model, Sequential
from keras.layers import *
from tensorflow import keras
from keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD, Adam
from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import train_test_split
import os

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

from tensorflow.python.client import device_lib

print(device_lib.list_local_devices())

# ——————————————————————————数据预处理————————————————————————————
data = pd.read_excel(r".\data\lncRNA(function).xlsx")  # lncRNA与其他分子的相互作用关系
for i in range(len(data)):
    data['cancer type'][i].lower()
data2 = pd.read_excel(r".\data\lncRNA(mechanism).xlsx")  # lncRNA与其他分子的相互作用关系
for i in range(len(data2)):
    data2['cancer type'][i].lower()
data = pd.merge(data, data2, on=['name', 'cancer type'], how='inner')  # 将两个表按cancer type进行内连接
output = data['cancer type']
print(len(output))
print(type(output))
# 用lncRNA的结构来判断该疾病产生原因
# 输入矩阵 10个RNA结构
data = [list(t) for t in zip(data['Immune'], data['autophagy/Apoptosis'], data['Cell Growth'], data['EMT'],
                             data['Survival'], data['TF'], data['Enhancer'], data['Variant'], data['MiRNA'],
                             data['Methylation'])]
# 将结构中的单词和字符串转换成数字便于后续处理
print(type(data))
for i in range(len(data)):
    if data[i][1] == '0':
        data[i][1] = 0
    else:
        data[i][1] = 1
    if data[i][0] == '0':
        data[i][0] = 0
    else:
        data[i][0] = 1
data = np.array(data)
input = data
mylist = set(output)
mydata = []
for item in mylist:
    mydata += [item]
print("type:", len(mydata))
# 将疾病名字标号成数字便于训练
for i in range(len(output)):
    output[i] = mydata.index(output[i])
output = list(output)
output = minmax_scale(output, feature_range=(0, 143))
output = np.array(output, dtype=int)
print("预处理完毕")
# ——————————————————————————模型训练————————————————————————————
train_rate = 0.6
y = output
x, test_X, _, test_y = train_test_split(input, output,  train_size=train_rate,test_size=1-train_rate,
                                                 random_state=2, shuffle=True)
print(x.shape, y.shape)

# design network
# CNN


def design_model():
    # 模型参数
    model = Sequential()
    # Convolution  卷积
    pool_length = 1  # 池化长度
    # LSTM
    lstm_output_size = 70  # LSTM 层输出尺寸
    # Training   训练参数
    batch_size = 64  # 批数据量大小
    nb_epoch = 10000  # 迭代次数
    learning_rate = 0.0001

    inputs = Input(shape=(10, 1))  # 输入特征接收维度
    a = Dropout(0.25)(inputs)
    conv1 = Convolution1D(1, 3, strides=1, padding="same", dilation_rate=1, activation='relu')(a)  # filters, kernel_size, strides=1
    max1 = MaxPooling1D(pool_size=pool_length)(conv1)
    conv2 = Convolution1D(2, 3, strides=1, padding="same", dilation_rate=1, activation='relu')(max1)  # filters, kernel_size, strides=1
    max2 = MaxPooling1D(pool_size=pool_length)(conv2)
    conv3 = Convolution1D(4, 3, strides=1, padding="same", dilation_rate=1, activation='relu')(max2)  # filters, kernel_size, strides=1
    max3 = MaxPooling1D(pool_size=pool_length)(conv3)
    lstm1 = LSTM(lstm_output_size)(max3)
    lstm1 = Dropout(0.25)(lstm1)
    output = Dense(144, activation='softmax')(lstm1)
    model = Model(inputs=inputs, outputs=output)
    model.summary()
    adam = Adam(lr=learning_rate)

    learningRate = 0.00001
    momentum = 0.9
    decay_rate = 0.01
    sgd = SGD(lr=learningRate, momentum=momentum, decay=decay_rate, nesterov=False)

    model.compile(optimizer=adam,
                  loss='sparse_categorical_crossentropy',
                  metrics=['sparse_categorical_accuracy'])
    # sparse_categorical_crossentropy
    # accuracy
    # 训练
    callback = keras.callbacks.EarlyStopping(monitor='loss', patience=100)
    # 读取已保存的模型
    #model = load_model('my_model9.h5')
    history = model.fit(x, y, batch_size=batch_size, epochs=nb_epoch,
                        validation_split=0.2, callbacks=[callback])
    # 保存模型 17.3%
    model.save('my_model9.h5')
    # plot history
    plt.plot(history.history['loss'], label='train')
    plt.plot(history.history['val_loss'], label='val')
    score = model.evaluate(test_X, test_y, batch_size=32)[1]
    print(score)  # [loss,accuracy]
    print("%s: %.2f%%" % (model.metrics_names[1], score * 100))

    plt.legend()
    plt.show()


design_model()




数据集下载链接
链接:https://pan.baidu.com/s/1Ce1-ADIo8NtkBt_zH81BNg?pwd=7av9
提取码:7av9
训练和验证准确率:

img

测试准确率:

img

1 网络结构是否合理
2 关键参数是否合理