用tensorflow图片分割遇到:“Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode. ”
最近做毕设在网上找到了相关的fcn代码,是要用这个做一个图片分割,之前跑通过unet,但是使用这个的时候就出了点问题,代码如下
import sys
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.models import load_model
# from keras.optimizers import Adam
from keras.optimizer_v2.adam import Adam
from custom_loss import *
from custom_metrics import *
from data_gens import *
from models import *
sys.setrecursionlimit(1000)
if __name__ == "__main__":
# Use VOC 2012 Dataset
horse_path = 'weizmann_horse_db'
batch_size = 2
train_gen = horse_gen.get_horse_generator(horse_path, train_or_val='train', batch_size=batch_size,
input_hw=(299, 299, 3), mask_hw=(299, 299, 2))
val_gen = horse_gen.get_horse_generator(horse_path, 'val', batch_size=batch_size * 1, input_hw=(299, 299, 3), mask_hw=(299, 299, 2))
# model = FCN.get_fcn8s_model(input_shape=(299, 299, 3), class_no=2)
# model = FCN.get_fcn16s_model(input_shape=(299, 299, 3), class_no=2)
model = FCN.get_fcn32s_model(input_shape=(299, 299, 3), class_no=2)
# model = Unet.get_unet_model(input_shape=(299, 299, 3), class_no=2)
# model = DeepLabV3Plus.get_model(input_shape=(299, 299, 3), atrous_rate=(4, 8, 12), class_no=2)
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[mean_iou, 'acc'])
model.compile(loss=categorical_focal_loss(alpha=None, gamma=2.), optimizer='adam', metrics=[mean_iou, 'acc'])
model.summary()
checkpoint = ModelCheckpoint('fcn32s.h5', verbose=1, save_best_only=False, period=3) # every 3 epoch
tensor_board = TensorBoard(log_dir='log', histogram_freq=0, write_graph=True, write_grads=True, write_images=True)
learning_rate_reduction = ReduceLROnPlateau(monitor='loss', patience=2, verbose=1, factor=0.5, min_lr=0.000001)
model.fit(
train_gen,
steps_per_epoch=250,
epochs=50,
validation_data=val_gen,
validation_steps=3,
callbacks=[checkpoint, tensor_board, learning_rate_reduction]
)
print('Start saving model into h5 file')
model.save('fcn32s.h5')
# tf.contrib.saved_model.save_keras_model(model, 'output') # available on tensorflow 1.12
print('======== Start Test ===========')
model = load_model('fcn32s.h5', compile=False, custom_objects={'BilinearResizeLayer2D': BilinearResizeLayer2D})
# 取val集100张图片,测试一下效果
val_gen2 = horse_gen.get_horse_generator(horse_path, 'val', batch_size=1, input_hw=(299, 299, 3),
mask_hw=(299, 299, 2))
i = 0
for val_images, mask in val_gen2:
img_np = val_images[0]
img_np = (img_np + 1.) * 127.5
im0 = Image.fromarray(np.uint8(img_np))
im0.save('output/{}_img.jpg'.format(i))
res = model.predict(val_images)[0]
pred_label = res.argmax(axis=2)
pred_label[pred_label == 1] = 255
im1 = Image.fromarray(np.uint8(pred_label))
im1.save('output/{}_pred.png'.format(i))
true_label = mask[0].argmax(axis=2)
true_label[true_label == 1] = 255
im2 = Image.fromarray(np.uint8(true_label))
im2.save('output/{}_true.png'.format(i))
i += 1
if i == 100:
print('End test')
exit(1)
2022-04-13 19:45:26.764431: W tensorflow/core/common_runtime/bfc_allocator.cc:275] Allocator (GPU_0_bfc) ran out of memory trying to allocate 572.75MiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
Traceback (most recent call last):
File "E:\anaconda3\envs\hr\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-2c9bf53164ed>", line 1, in <module>
runfile('E:/pycharm数据/AI/image_segmentation-master/main.py', wdir='E:/pycharm数据/AI/image_segmentation-master')
File "E:\PyCharm 2021.3\plugins\python\helpers\pydev\_pydev_bundle\pydev_umd.py", line 198, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "E:\PyCharm 2021.3\plugins\python\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "E:/pycharm数据/AI/image_segmentation-master/main.py", line 39, in <module>
model.fit(
File "E:\anaconda3\envs\hr\lib\site-packages\keras\engine\training_v1.py", line 777, in fit
return func.fit(
File "E:\anaconda3\envs\hr\lib\site-packages\keras\engine\training_generator_v1.py", line 570, in fit
return fit_generator(
File "E:\anaconda3\envs\hr\lib\site-packages\keras\engine\training_generator_v1.py", line 252, in model_iteration
batch_outs = batch_function(*batch_data)
File "E:\anaconda3\envs\hr\lib\site-packages\keras\engine\training_v1.py", line 1076, in train_on_batch
outputs = self.train_function(ins) # pylint: disable=not-callable
File "E:\anaconda3\envs\hr\lib\site-packages\keras\backend.py", line 4186, in __call__
fetched = self._callable_fn(*array_vals,
File "E:\anaconda3\envs\hr\lib\site-packages\tensorflow\python\client\session.py", line 1483, in __call__
ret = tf_session.TF_SessionRunCallable(self._session._session,
tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[7,7,512,4096] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[{{node training/Adam/Adam/update_vgg_decoder/fc6/kernel/ResourceApplyAdam}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
Process finished with exit code 0
去网上搜了搜类似的问题,说是gpu内存不足的问题,我按照解答修改了batch size也还是不可以,各位可不可以给我一点别的解决方案?