在做垃圾分类的卷积神经网络训练时出现的问题

from keras.layers import Dense
from keras.models import Model
from keras.metrics import top_k_categorical_accuracy
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.preprocessing import image
from keras.applications.inception_resnet_v2 import preprocess_input
from keras.preprocessing.image import load_img
from keras.callbacks import ModelCheckpoint
from keras.utils import to_categorical
import os
import random
import numpy as np

class_names_to_ids = {'cardboard': 0, 'glass': 1, 'metal': 2, 'paper':3, 'plastic':4, 'trash':5}
data_dir = 'dataset/'
output_path = 'list.txt'
fd = open(output_path, 'w')
for class_name in class_names_to_ids.keys():
    images_list = os.listdir(data_dir + class_name)
    for image_name in images_list:
        fd.write('{}/{} {}\n'.format(class_name, image_name, class_names_to_ids[class_name]))
fd.close()

_NUM_VALIDATION = 505
_RANDOM_SEED = 0
list_path = 'list.txt'
train_list_path = 'list_train.txt'
val_list_path = 'list_val.txt'
fd = open(list_path)
lines = fd.readlines()
fd.close()
random.seed(_RANDOM_SEED)
random.shuffle(lines)
fd = open(train_list_path, 'w')
for line in lines[_NUM_VALIDATION:]:
    fd.write(line)
fd.close()
fd = open(val_list_path, 'w')
for line in lines[:_NUM_VALIDATION]:
    fd.write(line)
fd.close()

def get_train_test_data(list_file):
    list_train = open(list_file)
    x_train = []
    y_train = []
    for line in list_train.readlines():
        x_train.append(line.strip()[:-2])
        y_train.append(int(line.strip()[-1]))
        #print(line.strip())
    return x_train, y_train
x_train, y_train = get_train_test_data('list_train.txt')
x_test, y_test = get_train_test_data('list_val.txt')

def process_train_test_data(x_path):
    images = []
    for image_path in x_path:
        img_load = load_img('dataset/'+image_path)
        img = image.img_to_array(img_load)
        img = preprocess_input(img)
        images.append(img)
    return images
train_images = process_train_test_data(x_train)
test_images = process_train_test_data(x_test)

base_model = InceptionResNetV2(include_top=False, pooling='avg')
outputs = Dense(6, activation='softmax')(base_model.output)
model = Model(base_model.inputs, outputs)

save_dir='train_model'
filepath="model_{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint(os.path.join(save_dir, filepath), monitor='val_acc',verbose=1, 
                            save_best_only=True)

def acc_top3(y_true, y_pred):
    return top_k_categorical_accuracy(y_true, y_pred, k=3)
  
def acc_top5(y_true, y_pred):
    return top_k_categorical_accuracy(y_true, y_pred, k=5)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy', acc_top3, acc_top5])      

model.fit(np.array(train_images), to_categorical(y_train),
          batch_size=4,
          epochs=5,
          shuffle=True,
          validation_data=(np.array(test_images), to_categorical(y_test)),
          callbacks=[checkpoint])                                    

代码如上

2021-05-11 19:32:13.478926: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'cudart64_101.dll'; dlerror: cudart64_101.dll not found
2021-05-11 19:32:13.485047: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-05-11 19:35:00.428481: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library nvcuda.dll
2021-05-11 19:35:00.708562: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce GTX 1050 computeCapability: 6.1
coreClock: 1.493GHz coreCount: 5 deviceMemorySize: 4.00GiB deviceMemoryBandwidth: 104.43GiB/s
2021-05-11 19:35:00.721166: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'cudart64_101.dll'; dlerror: cudart64_101.dll not found
2021-05-11 19:35:00.728740: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'cublas64_10.dll'; dlerror: cublas64_10.dll not found
2021-05-11 19:35:00.985090: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'cufft64_10.dll'; dlerror: cufft64_10.dll not found
2021-05-11 19:35:00.991515: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'curand64_10.dll'; dlerror: curand64_10.dll not found  
2021-05-11 19:35:00.997949: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'cusolver64_10.dll'; dlerror: cusolver64_10.dll not found
2021-05-11 19:35:01.771515: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'cusparse64_10.dll'; dlerror: cusparse64_10.dll not found
2021-05-11 19:35:01.779146: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'cudnn64_7.dll'; dlerror: cudnn64_7.dll not found      
2021-05-11 19:35:01.878622: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1598] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are 
installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2021-05-11 19:35:02.402577: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
2021-05-11 19:35:03.168393: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x270ae419cb0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2021-05-11 19:35:03.174235: I tensorflow/compiler/xla/service/service.cc:176]   StreamEe 
cutor device (0): Host, Default Version
2021-05-11 19:35:03.181851: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] Device interconnect StreamExecutor with strength 1 edge matrix:
2021-05-11 19:35:03.186329: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1108]     
Epoch 1/5
2021-05-11 19:42:56.087465: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 49873920 exceeds 10% of free system memory.
2021-05-11 19:42:56.200571: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 49873920 exceeds 10% of free system memory.
2021-05-11 19:42:56.659807: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 97929216 exceeds 10% of free system memory.
2021-05-11 19:42:56.746821: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 97929216 exceeds 10% of free system memory.
2021-05-11 19:42:57.348089: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 70090752 exceeds 10% of free system memory.

终端结果如上,出不来结果,是内存太小了吗?

Could not load dynamic library 'cudart64_101.dll'; dlerror: cudart64_101.dll not found

你的cuda都没装好。最后面的内存警告应该是cuda没装好改用cpu训练,内存不足,图像或者batch size再改小点试试

 

你好,我是有问必答小助手。为了技术专家团更好地为您解答问题,烦请您补充下(1)问题背景详情,(2)您想解决的具体问题,(3)问题相关代码图片或者报错信息。便于技术专家团更好地理解问题,并给出解决方案。

您可以点击问题下方的【编辑】,进行补充修改问题。