Faster-rcnn代码运行时出现错误

用python运行深度学习 用pytorch跑 Faster-Rcnn
训练集跑通了
验证集代码运行时出现The size of tensor a (1800) must match the size of tensor b (300) at non-singleton dimension 1错误

import os
import torch
import config as cfg
import numpy as np
import pickle
from tqdm import tqdm
from net.resnet import resnet
from torch.utils.data import DataLoader
from rpn.bbox_transform import bbox_transform_inv,clip_boxes
from data.pascal_voc import PASCAL_VOC
from torchvision.ops import nms
import xml.etree.ElementTree as ET

@torch.no_grad()
def evalue(check_point, cache_path='./result.pkl', class_agnostic=False, ovthresh=0.5, use_07_metric=False):

    ind_class = {v: k for k, v in cfg.class_to_ind.items()}
    class_result_dic = {k: [] for k in cfg.class_to_ind.keys()}  # store every class result

    imagenames = []

    if not os.path.exists(cache_path):

        test_set = PASCAL_VOC(cfg.testset_root_path, 'test')
        dataloader = DataLoader(test_set, batch_size=cfg.batch_size, shuffle=True, num_workers=4)

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        fasterRCNN = resnet(cfg.backbone, is_training=False, pretrained=False, class_agnostic=class_agnostic)
        fasterRCNN.create_architecture()

        print("load checkpoint %s" % (check_point))

        checkpoint = torch.load(check_point)
        fasterRCNN.load_state_dict(checkpoint['model_state_dict'])

        print('load model successfully!')

        fasterRCNN.eval()
        fasterRCNN.to(device)

        im_data = torch.FloatTensor(1)
        im_info = torch.FloatTensor(1)
        gt_boxes = torch.FloatTensor(1)
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        gt_boxes = gt_boxes.cuda()

        #detect for result
        for batch_data in tqdm(dataloader):
            # batch_data = dataloader.next()
            with torch.no_grad():
                im_data.resize_(batch_data['image'].size()).copy_(batch_data['image'])
                gt_boxes.resize_(batch_data['gt_boxes'].size()).copy_(batch_data['gt_boxes'])
                im_info.resize_(batch_data['im_info'].size()).copy_(batch_data['im_info'])

                image_name = os.path.basename(batch_data['imname'][0]).split('.')[0]
                imagenames.append(image_name)

                rois, cls_prob, bbox_pred, _, _, _, _, _ = fasterRCNN(im_data, gt_boxes, im_info)

                scores = cls_prob.data
                boxes = rois.data[:, :, 1:5]

                box_deltas = bbox_pred.data

                if cfg.bbox_normalize_targets_precomputed:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.bbox_normalize_std).cuda() \
                                 + torch.FloatTensor(cfg.bbox_normalize_means).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                print(boxes.shape)
                print(box_deltas.shape)

                pred_boxes = bbox_transform_inv(boxes, box_deltas,1)
                pred_boxes = clip_boxes(pred_boxes, im_info, 1)
                pred_boxes = pred_boxes / batch_data['im_info'][0, 2]

                scores = scores.squeeze()
                pred_boxes = pred_boxes.squeeze()

                for j in range(1, len(cfg.class_to_ind)):
                    inds = torch.nonzero(scores[:, j] > 0).view(-1)
                    if inds.numel() > 0:
                        cls_scores = scores[:, j][inds]
                        _, order = torch.sort(cls_scores, 0, True)

                        if class_agnostic:
                            cls_boxes = pred_boxes[inds, :]
                        else:
                            cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                        cls_dets = pred_boxes[order]
                        cls_scores = cls_scores[order]

                        keep = nms(cls_dets, cls_scores, cfg.test_nms_threshold)
                        cls_dets = cls_dets[keep.view(-1).long()]  # 当前类别保留下来的目标框
                        cls_scores = cls_scores[keep.view(-1).long()]

                        for score, bbox in zip(cls_scores, cls_dets):
                            class_result_dic[ind_class[j]].append({
                                    'image_name': image_name,
                                    'score': score,
                                    'bbox': [bbox[0], bbox[1], bbox[2], bbox[3]]
                                })

        print('writting result cache ......')
        with open(cache_path, 'wb') as fp:
            pickle.dump(class_result_dic, fp)
    else:
        with open(os.path.join(cfg.testset_root_path, 'ImageSets', 'Main', 'test.txt')) as fp:
            for line in fp:
                imagenames.append(line.strip())
        with open(cache_path, 'rb') as fp:
            class_result_dic = pickle.load(fp)


    print('computer mAP... ')
    # computer map
    recs = {}
    for i, imagename in enumerate(imagenames):
        recs[imagename] = parse_rec(os.path.join(cfg.testset_root_path, 'Annotations', imagename + '.xml'))

    # extract gt objects for this class
    mAP = 0
    for classname in cfg.class_to_ind.keys():
        if classname == 'BG':
            continue
        print(classname, end=' ')
        class_recs = {}
        npos = 0
        for imagename in imagenames:
            R = [obj for obj in recs[imagename] if obj['name'] == classname]
            bbox = np.array([x['bbox'] for x in R])
            difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
            det = [False] * len(R)
            npos = npos + sum(~difficult)
            class_recs[imagename] = {'bbox': bbox,
                                     'difficult': difficult,
                                     'det': det}

        class_result = class_result_dic[classname]
        image_ids = [r['image_name'] for r in class_result]
        confidence = np.array([float(r['score']) for r in class_result])
        BB = np.array([r['bbox'] for r in class_result])

        # sort by confidence
        sorted_ind = np.argsort(-confidence)
        BB = BB[sorted_ind, :]
        image_ids = [image_ids[x] for x in sorted_ind]

        # go down dets and mark TPs and FPs
        nd = len(image_ids)
        tp = np.zeros(nd)
        fp = np.zeros(nd)
        for d in range(nd):
            R = class_recs[image_ids[d]]
            bb = BB[d, :].astype(float)
            ovmax = -np.inf
            BBGT = R['bbox'].astype(float)
            if BBGT.size > 0:
                # compute overlaps
                # intersection
                ixmin = np.maximum(BBGT[:, 0], bb[0])
                iymin = np.maximum(BBGT[:, 1], bb[1])
                ixmax = np.minimum(BBGT[:, 2], bb[2])
                iymax = np.minimum(BBGT[:, 3], bb[3])
                iw = np.maximum(ixmax - ixmin + 1., 0.)
                ih = np.maximum(iymax - iymin + 1., 0.)
                inters = iw * ih

                # union
                uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                       (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                       (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

                overlaps = inters / uni
                ovmax = np.max(overlaps)
                jmax = np.argmax(overlaps)

            if ovmax > ovthresh:
                if not R['difficult'][jmax]:
                    if not R['det'][jmax]:
                        tp[d] = 1.
                        R['det'][jmax] = 1
                    else:
                        fp[d] = 1.
            else:
                fp[d] = 1.

        # compute precision recall
        fp = np.cumsum(fp)
        tp = np.cumsum(tp)
        rec = tp / float(npos)
        # avoid divide by zero in case the first detection matches a difficult
        # ground truth
        prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
        ap = voc_ap(rec, prec, use_07_metric)
        print(ap)
        mAP += ap
    mAP = mAP / (len(cfg.class_to_ind) - 1)

    print('mAP:', mAP)


def parse_rec(filename):
    """Parse a PASCAL VOC xml file."""
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['pose'] = obj.find('pose').text
        obj_struct['truncated'] = int(obj.find('truncated').text)
        obj_struct['difficult'] = int(obj.find('difficult').text)
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
        objects.append(obj_struct)

    return objects


def voc_ap(rec, prec, use_07_metric=False):
    """Compute VOC AP given precision and recall. If use_07_metric is true, uses
    the VOC 07 11-point method (default:False).
    """
    if use_07_metric:
        # 11 point metric
        ap = 0.
        for t in np.arange(0., 1.1, 0.1):
            if np.sum(rec >= t) == 0:
                p = 0
            else:
                p = np.max(prec[rec >= t])
            ap = ap + p / 11.
    else:
        # correct AP calculation
        # first append sentinel values at the end
        mrec = np.concatenate(([0.], rec, [1.]))
        mpre = np.concatenate(([0.], prec, [0.]))

        # compute the precision envelope
        for i in range(mpre.size - 1, 0, -1):
            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

        # to calculate area under PR curve, look for points
        # where X axis (recall) changes value
        i = np.where(mrec[1:] != mrec[:-1])[0]

        # and sum (\Delta recall) * prec
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap



if __name__ == '__main__':
    check_point = './work_dir/fasterrcnn_r101-1.pth'
    evalue(check_point)







C:\Users\w10\AppData\Local\Programs\Python\Python37\python.exe D:/pytorch-fasterrcnn-master/evalue.py
loading data......
over!
load checkpoint ./work_dir/fasterrcnn_r101-1.pth
load model successfully!
  0%|          | 0/175 [00:00<?, ?it/s]tensor([[6.0000e+02, 8.3400e+02, 6.5934e-01]], device='cuda:0')
torch.Size([1, 300, 4]) 
torch.Size([1, 1800, 4]) 
  0%|          | 0/175 [00:09<?, ?it/s]
Traceback (most recent call last):
  File "D:/pytorch-fasterrcnn-master/evalue.py", line 260, in <module>
    evalue(check_point)
  File "C:\Users\w10\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\autograd\grad_mode.py", line 28, in decorate_context
    return func(*args, **kwargs)
  File "D:/pytorch-fasterrcnn-master/evalue.py", line 74, in evalue
    pred_boxes = bbox_transform_inv(boxes, box_deltas,1)
  File "D:\pytorch-fasterrcnn-master\rpn\bbox_transform.py", line 131, in bbox_transform_inv
    pred_ctr_x = dx * widths.unsqueeze(2) + ctr_x.unsqueeze(2)
RuntimeError: The size of tensor a (1800) must match the size of tensor b (300) at non-singleton dimension 1

Process finished with exit code 1


在自己调试时

```python
print(boxes.shape)
print(box_deltas.shape)

```运行结果torch.Size([1, 300, 4])
torch.Size([1, 1800, 4])
不知道如何调试

训练集跑通卡在验证集了,希望各位能指出我的错误

两个tensor的维度不一致导致的, 一般出错都是在卷积层的输出的维度匹配不上下一个卷积层要求的输出维度,这时候我们要仔细检查上一层卷积的out_channel是否和下一层卷积的in_channel一样。当然还要注意concatenation拼接等操作引起的维度变化。