RTX 2080Ti 单路GPU,我在训练模型的时候out of memory,我觉得现在程序是用CPU跑的,我想知道怎么改代码能使用GPU跑?
import torch
import sys
import time
import torchvision.utils as vutils
from models.transformer_model import TransformerModel
from options.transformer_options import TransformerOptions
from data.data_loader import CreateDataLoader
from util.util import create_path_list
from util.visualizer import Visualizer
#device = 'cuda'
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
opt = TransformerOptions().parse()
torch.manual_seed(opt.random_seed)
torch.cuda.manual_seed(opt.random_seed)
prefix = ['Emmanuel_Macron', 'Kathleen', 'Jack_Ma', 'Theresa_May', 'Donald_Trump']
#prefix = ['P_Emmanuel/video_0', 'P_Kathleen/video_0', 'P_MaYun/video_0', 'P_Theresa/video_0', 'P_Trump/video_0']
A_path, B_path = create_path_list(opt, prefix)
#init data loader
A_loader = CreateDataLoader(opt, A_path)
B_loader = [CreateDataLoader(opt, [pp]) for pp in B_path]
A_loader_iter = enumerate(A_loader.load_data())
B_loader_iter = [enumerate(bb.load_data()) for bb in B_loader]
print('A: {} B: {}\n'.format(A_loader.shape, ' '.join([str(ii.shape) for ii in B_loader])))
model = TransformerModel(opt, len(B_loader))
visualizer = Visualizer(opt)
#valid
valid_x_A = model._get_variable(next(A_loader_iter)[-1])
vutils.save_image(model._inver_trans(valid_x_A.detach()), '{}/face.jpg'.format(opt.model_dir), nrow=model.nrow)
with torch.no_grad():
valid_x_A = model.Bound(valid_x_A)[1]
valid_x_B = [model.Bound(model._get_variable(next(bb)[-1]))[1] for bb in B_loader_iter]
model._save_valid_pic(valid_x_A, valid_x_B)
#start trainning
for step in range(opt.max_step):
try:
x_A = next(A_loader_iter)[-1]
except StopIteration:
A_loader_iter = enumerate(A_loader.load_data())
x_A = next(A_loader_iter)[-1]
x_B = []
for ii, bb in enumerate(B_loader_iter):
try:
x_B.append(next(bb)[-1])
except StopIteration:
B_loader_iter[ii] = enumerate(B_loader[ii].load_data())
x_B.append(next(B_loader_iter[ii])[-1])
if x_A.size(0) != x_B[0].size(0):
print("[!] Sampled dataset from A and B have different # of data. Try resampling...")
continue
iter_start_time = time.time()
visualizer.reset()
model.set_input(x_A, x_B)
model.optimize_parameters()
if step % opt.display_freq == 0:
save_result = step % opt.update_html_freq == 0
Boundary, Channel, Edge = model.get_current_visuals([valid_x_A, valid_x_B], idx=step)
visualizer.display_current_results([Boundary, Channel, Edge], step, save_result, transformer=True)
if step % opt.print_freq == 0:
errors = model.get_current_errors()
t = (time.time() - iter_start_time) / opt.batchSize
visualizer.print_current_errors(opt.max_step, step, errors, t)
if step % opt.save_epoch_freq == 0:
print("[*] Save models to {}...".format(opt.model_dir))
model.save(step)
```shell
python train_Transformer.py --root_dir F:/CelebV/ --name_landmarks_list all_98pt.txt --checkpoints_dir ./checkpoints/ --component Transformer --batchSize 256 --nThreads 0 --drop_last --dataset_mode transformer --display_freq 2000 --save_epoch_freq 2000 --update_html_freq 2000 --gpu_ids 0 --rotate_range 5 --translate_range 10 --zoom_range 0.97 1.03 --mirror --normalise --normalisation_type regular --no_dropout --input_nc 15 --output_nc 15 --ngf 64 --ndf 64 --which_model_netD n_layers --which_model_netG resnet_9blocks --n_layers_D 3 --norm batch --init_type transformer --pretrain_root ./pretrained_models --which_target 1 --default_r 3 --pca_dim 3 --lam_align 10 --lam_pix 50 --bound_size 64 --max_step 500002 --lr 0.00005
import torch
import sys
import time
import torchvision.utils as vutils
from models.transformer_model import TransformerModel
from options.transformer_options import TransformerOptions
from data.data_loader import CreateDataLoader
from util.util import create_path_list
from util.visualizer import Visualizer
device = 'cuda'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
opt = TransformerOptions().parse()
torch.manual_seed(opt.random_seed)
torch.cuda.manual_seed(opt.random_seed)
prefix = ['Emmanuel_Macron', 'Kathleen', 'Jack_Ma', 'Theresa_May', 'Donald_Trump']
#prefix = ['P_Emmanuel/video_0', 'P_Kathleen/video_0', 'P_MaYun/video_0', 'P_Theresa/video_0', 'P_Trump/video_0']
A_path, B_path = create_path_list(opt, prefix)
#init data loader
A_loader = CreateDataLoader(opt, A_path)
B_loader = [CreateDataLoader(opt, [pp]) for pp in B_path]
A_loader_iter = enumerate(A_loader.load_data())
B_loader_iter = [enumerate([pp]) for pp in B_path]
model = TransformerModel().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
criterion = torch.nn.CrossEntropyLoss()
def init_loader(loader):
model.eval()
return 在每个批次中,代码会在GPU上对模型进行前向传播和计算损失,并在CPU上进行反向传播和优化。最后,代码会在GPU上绘制输出。
请注意,这段代码只是一个示例,并且可能需要根据你的具体情况进行调整。在训练任务开始前,请确保将设备调整为正
import torch
import sys
import time
import torchvision.utils as vutils
from models.transformer_model import TransformerModel
from options.transformer_options import TransformerOptions
from data.data_loader import CreateDataLoader
from util.util import create_path_list
from util.visualizer import Visualizer
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')
opt = TransformerOptions().parse()
torch.manual_seed(opt.random_seed)
torch.cuda.manual_seed(opt.random_seed)
prefix = ['Emmanuel_Macron', 'Kathleen', 'Jack_Ma', 'Theresa_May', 'Donald_Trump']
A_path, B_path = create_path_list(opt, prefix)
#init data loader
A_loader = CreateDataLoader(opt, A_path)
B_loader = [CreateDataLoader(opt, [pp]) for pp in B_path]
A_loader_iter = enumerate(A_loader.load_data())
B_loader_iter = [enumerate(bb.load_data()) for bb in B_loader]
print('A: {} B: {}\n'.format(A_loader.shape, ' '.join([str(ii.shape) for ii in B_loader])))
model = TransformerModel(opt, len(B_loader)).to(device)
visualizer = Visualizer(opt)
#valid
valid_x_A = model._get_variable(next(A_loader_iter)[-1].to(device))
vutils.save_image(model._inver_trans(valid_x_A.detach()), '{}/face.jpg'.format(opt.model_dir), nrow=model.nrow)
with torch.no_grad():
valid_x_A = model.Bound(valid_x_A)[1]
valid_x_B = [model.Bound(model._get_variable(next(bb)[-1]).to(device))[1] for bb in B_loader_iter]
model._save_valid_pic(valid_x_A, valid_x_B)
#start trainning
for step in range(opt.max_step):
try:
x_A = next(A_loader_iter)[-1].to(device)
except StopIteration:
A_loader_iter = enumerate(A_loader.load_data())
x_A = next(A_loader_iter)[-1].to(device)
x_B = []
for ii, bb in enumerate(B_loader_iter):
try:
x_B.append(next(bb)[-1].to(device))
except StopIteration:
B_loader_iter[ii] = enumerate(B_loader[ii].load_data())
x_B.append(next(B_loader_iter[ii])[-1].to(device))
if x_A.size(0) != x_B[0].size(0):
print("[!] Sampled dataset from A and B have different # of data. Try resampling...")
continue
iter_start_time = time.time()
visualizer.reset()
model.set_input(x_A, x_B)
model.optimize_parameters()
if step % opt.display_freq == 0:
save_result = step % opt.update_html_freq == 0
Boundary, Channel, Edge = model.get_current_visuals([valid_x_A, valid_x_B], idx=step)
visualizer.display_current_results([Boundary, Channel, Edge], step, save_result, transformer=True)
if step % opt.print_freq == 0:
errors = model.get_current_errors()
t = (time.time() - iter_start_time) / opt.batchSize
visualizer.print_current_errors(opt.max_step, step, errors, t)
if step % opt.save_epoch_freq == 0:
print("[*] Save models to {}...".format(opt.model_dir))
model.save(step)
不知道你这个问题是否已经解决, 如果还没有解决的话:首先,需要检查当前环境是否安装了支持GPU的深度学习框架,如Tensorflow、PyTorch等。如果没有安装,需要根据你正在使用的深度学习框架进行安装和配置。
接下来,需要将模型加载到GPU上进行训练。如果使用的是Tensorflow,可以参考以下代码片段:
import tensorflow as tf
# 将模型加载到GPU上
with tf.device('/gpu:0'):
# 构造模型,定义损失函数等
model = ...
# 定义优化器等
optimizer = ...
# 定义训练操作
train_op = optimizer.minimize(loss)
# 在Session中启动训练操作
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# 数据集迭代训练
for epoch in range(num_epochs):
for batch_data in dataset:
sess.run(train_op, feed_dict={input_data: batch_data})
如果使用的是PyTorch,可以参考以下代码片段:
import torch
# 将模型加载到GPU上
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
# 定义优化器等
optimizer = ...
# 定义训练操作
def train_step(model, optimizer, input_data, label):
input_data = input_data.to(device)
label = label.to(device)
optimizer.zero_grad()
loss = model(input_data, label)
loss.backward()
optimizer.step()
# 数据集迭代训练
for epoch in range(num_epochs):
for batch_data in dataset:
input_data, label = batch_data
train_step(model, optimizer, input_data, label)
另外,还需要注意避免一次性将大量数据加载到GPU中,可采用适当的batch大小等方式进行优化,以避免出现out of memory的问题。