cnn只加载了神经神经网络,还没开始训练,12G显存就占了12G,怎么回事?

seed = 7
np.random.seed(seed)
#设置了seed后,会让后面每次产生的随机数相同。

def Conv2d_BN(x, nb_filter,kernel_size, strides=(1,1), padding='same',name=None):
    if name is not None:
        bn_name = name + '_bn'
        conv_name = name + '_conv'
    else:
        bn_name = None
        conv_name = None
 
    x = Conv2D(nb_filter,kernel_size,padding=padding,strides=strides,activation='relu',name=conv_name)(x)
    x = BatchNormalization(axis=3,name=bn_name)(x)
    return x

def Conv_Block(inpt,nb_filter,kernel_size,strides=(1,1), with_conv_shortcut=False):
    x = Conv2d_BN(inpt,nb_filter=nb_filter[0],kernel_size=(1,1),strides=strides,padding='same')
    x = Conv2d_BN(x, nb_filter=nb_filter[1], kernel_size=(3,3), padding='same')
    x = Conv2d_BN(x, nb_filter=nb_filter[2], kernel_size=(1,1), padding='same')
    if with_conv_shortcut:
        shortcut = Conv2d_BN(inpt,nb_filter=nb_filter[2],strides=strides,kernel_size=kernel_size)
        x = add([x,shortcut])
        return x
    else:
        x = add([x,inpt])
        return x

inpt = Input(shape=(224,224,3))
#预期的输入将是一批224*224*3维度的向量

x = ZeroPadding2D((3,3))(inpt)
x = Conv2d_BN(x,nb_filter=64,kernel_size=(95,95),strides=(2,2),padding='valid')   #找64种特征,特征基本像素的大小为7*7
x = MaxPooling2D(pool_size=(3,3),strides=(2,2),padding='same')(x)
 
x = Conv_Block(x,nb_filter=[64,64,256],kernel_size=(3,3),strides=(1,1),with_conv_shortcut=True)
x = Conv_Block(x,nb_filter=[64,64,256],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[64,64,256],kernel_size=(3,3))
 
x = Conv_Block(x,nb_filter=[128,128,512],kernel_size=(3,3),strides=(2,2),with_conv_shortcut=True)
x = Conv_Block(x,nb_filter=[128,128,512],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[128,128,512],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[128,128,512],kernel_size=(3,3))
 
x = Conv_Block(x,nb_filter=[256,256,1024],kernel_size=(3,3),strides=(2,2),with_conv_shortcut=True)
x = Conv_Block(x,nb_filter=[256,256,1024],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[256,256,1024],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[256,256,1024],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[256,256,1024],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[256,256,1024],kernel_size=(3,3))
 
x = Conv_Block(x,nb_filter=[512,512,2048],kernel_size=(3,3),strides=(2,2),with_conv_shortcut=True)
x = Conv_Block(x,nb_filter=[512,512,2048],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[512,512,2048],kernel_size=(3,3))

x = AveragePooling2D(pool_size=(5,5))(x)
# x = Dropout(0.9)(x)
x = Flatten()(x)
x = Dense(25,activation='softmax')(x)

model = Model(inputs=inpt,outputs=x)
sgd = SGD(decay=0.0001,momentum=0.9)
model.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy'])
model.summary()

神经网络如上所示,只运行到上面的这个步骤,我的12G显存就占了11G多。

然后下一步

print('Training ------------')

# training the model,加上shuffle=True,要不然可能会overfit。只要validation随着acc上升,说明模型就没问题。
# model_load('my_model_resnet.h5')
model.fit(X_train, y_train, validation_split = 0.2,shuffle = True,epochs=50, batch_size=64)


NotFoundError:  No algorithm worked!
	 [[node model/conv2d/Relu (defined at <ipython-input-17-25b86b2dbd84>:5) ]] [Op:__inference_train_function_12948]

Function call stack:
train_function

 

在ubuntu系统里面。我用的是3060显卡,已经安装了显卡驱动,CUDA驱动,cuDNN驱动等。

为什么会出现这个情况,还没training显存就占满了。求助,麻烦各位大佬帮忙。

Keras默认会占满显存的。下列代码是动态使用,也就是按需分配的:

import keras.backend.tensorflow_backend as KTF
import tensorflow as tf
import os
 
 
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 
config = tf.ConfigProto()
config.gpu_options.allow_growth=True   #不全部占满显存, 按需分配
sess = tf.Session(config=config)
 
KTF.set_session(sess)


os.environ 指的时占用的 GPU编号;allow_growth 为动态申请显存占用。

physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True) 

 

可以参考这篇文章https://blog.csdn.net/Zserendipity/article/details/105301983