Pytorch Dataloader

這個問題 RuntimeError: stack expects each tensor to be equal size, but got [10, 5] at entry 0 and [23, 5] at entry 1



```python
# Preprocess
import os
import cv2
import xml.etree.ElementTree as ET
import csv
from pathlib import Path
import glob
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from keras.preprocessing.image import img_to_array, array_to_img

# Machine Learning
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms


def convert_label(lab):
    result = []
    for str in lab:
        if str == 'leaf':
            result.append(0)
        elif str == 'stem':
            result.append(1)
        elif str == 'soil':
            result.append(2)
    return result


def normalize(img_numpy_array):  # gray scale
    return img_numpy_array / 255.0


def parse_xml(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()

    bounding_boxes = []
    labels = []
    for obj in root.findall('object'):
        name = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)
        bounding_boxes.append([xmin, ymin, xmax, ymax])
        labels.append(name)
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)

    return bounding_boxes, labels, width, height


""" def write_folder_names_to_text(folder_path, output_file):   #list all name of jpg into txt 
    # Get the list of file names in the folder
    file_names = os.listdir(folder_path)

    # Create or overwrite the output file
    with open(output_file, 'w') as f:
        # Write each file name to a new line in the text file
        for file_name in file_names:
            if file_name.endswith('.jpg'):
                f.write(file_name + '\n')

    print(f"File names written to {output_file} successfully.")


folder_path = "C:/Users/willi/OneDrive/桌面/Dataset/test"
output_file = "C:/Users/willi/OneDrive/桌面/Dataset/test/file_names.txt"
write_folder_names_to_text(folder_path, output_file) """

""" def xml_to_csv(root_dir):                      #XML to CSV format 
    bbox = []
    labels = []
    for xml_file in glob.glob(root_dir + '/*.xml'):
        if os.path.exists(xml_file):
            bboxes, lbls, width, height = parse_xml(xml_file)
            bbox.append(bboxes)
            labels.append(lbls)
            label_in_num = convert_label(lbls)

    print(len(bbox))
    print(len(labels))
    csv_file = os.path.join(root_dir, 'annotations.csv')
    with open(csv_file, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['filename', 'width', 'height', 'label', 'xmin', 'ymin', 'xmax', 'ymax'])

        for i in range(len(bbox)):
            for j in range(len(bbox[i])):
                writer.writerow([os.path.basename(xml_file), width, height, labels[i][j], bbox[i][j][0], bbox[i][j][1],
                                 bbox[i][j][2], bbox[i][j][3]])

    print(f"CSV file saved at: {csv_file}")  """


def display_image_with_boxes(image, boxes):
    img_with_boxes = image.clone().permute(1, 2, 0).numpy()
    plt.imshow(img_with_boxes)
    ax = plt.gca()
    for bbox in boxes:
        xmin, ymin, xmax, ymax, lbls = bbox
        color = 'blue' if lbls == 0 else 'red'  # blus as leaf , red as stem
        rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=color, linewidth=2)
        ax.add_patch(rect)
    plt.show()


def pad_boxes(boxes, max_num_boxes):
    padded_boxes = np.zeros((max_num_boxes, 4), dtype=np.float32)
    for i, box in enumerate(boxes):
        if i >= max_num_boxes:
            break
            padded_boxes[i] = box
    return padded_boxes


voc_label = {'leaf,stem,soil'}
dict_labels = dict(zip(voc_label, range(len(voc_label))))


class Read_voc(Dataset):
    def __init__(self, root_path):
        super(Read_voc, self).__init__()
        self.root_path = root_path
        self.img_idx = []
        self.anno_idx = []
        self.bbox = []
        self.obj_name = []
        train_txt_path = self.root_path + "/file_names.txt"
        self.img_path = self.root_path
        self.anno_path = self.root_path

        train_txt = open(train_txt_path)
        lines = train_txt.readlines()
        for line in lines:
            name = line.strip().split()[0]
            name = name.rstrip('.jpg')
            self.img_idx.append(self.img_path + name + '.jpg')
            self.anno_idx.append(self.anno_path + name + '.xml')

    def __getitem__(self, item):
        img = Image.open(self.img_idx[item])
        img = transforms.ToTensor()(img)
        normalize(img)
        targets = ET.parse(self.anno_idx[item])
        res = []  # Store annotation information, i.e., coordinates of the bounding box's top left and bottom right
        # points and the target's class label
        result = []
        if os.path.exists(self.root_path):
            bboxes, labels, width, height = parse_xml(self.anno_idx[item])
            lbls = convert_label(labels)  # Convert label to number using convert_label() function
            res.append(bboxes)
            res.append(lbls)
            max_num_boxes = 100  # Determine the maximum number of boxes

            for bbox, label in zip(*res):
                num_boxes = len(bbox)
                bbox_padded = pad_boxes(bbox, max_num_boxes)  # Pad the bounding boxes with zeros
                result.append(bbox_padded + [label])

        else:
            raise Exception('Path does not Exist!')

        # Separate coordinates and labels
        boxes = [box[:-1] for box in result]  # Exclude the label from each bounding box
        labels = [box[-1] for box in result]

        # Convert coordinates and labels to NumPy arrays, then to PyTorch tensors
        boxes_array = np.array(boxes, dtype=np.float32)
        labels_array = np.array(labels, dtype=np.float32)
        boxes_tensor = torch.from_numpy(boxes_array)
        labels_tensor = torch.from_numpy(labels_array)

        return img, boxes_tensor

    def __len__(self):
        return len(self.img_idx)


def main():
    root_dir = "C:/Users/willi/OneDrive/桌面/Dataset/test/"
    image_size = (256, 256)
    train_data = Read_voc(root_path=root_dir)  # DataSet Preprocessing
    img, res = train_data[1]
    print(img.size())
    print(len(res))  # row
    print(len(res[0]))  # cols
    print(train_data.__len__())

    # display_image_with_boxes(img, res)
    # Display image and label.
    train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True, num_workers=0, drop_last=False)
    train_features, train_labels = train_dataloader
    print(train_features.size())


main()


```

【以下回答由 GPT 生成】

问题分析: 该错误是由于使用Dataloader加载数据时,输入的样本尺寸大小不一致导致的。具体来说,在这个例子中,有两个输入的tensor尺寸不匹配,一个尺寸为[10, 5],另一个尺寸为[23, 5]。为了解决这个问题,我们需要对数据进行预处理,确保所有的输入tensor具有相同的尺寸。

解决方案: 以下是解决该问题的步骤: 1. 首先,我们需要确定所有输入tensor的目标尺寸。在这个例子中,所有输入tensor的目标尺寸为[10, 5]。 2. 在parse_xml函数中,获取bounding box后,我们需要根据目标尺寸将其调整为相同的尺寸。可以使用cv2.resize函数将bounding box调整为目标尺寸。 3. 在parse_xml函数中,获取bounding box后,我们需要在labels列表中添加标签。同时,我们需要将标签字符串转换为相应的数字。可以使用convert_label函数将标签字符串转换为数字。 4. 修改Dataset类的实现,确保样本的尺寸都与目标尺寸一致。具体来说,修改__getitem__方法中的代码,将加载的图像和labels调整为目标尺寸。可以使用cv2.resize函数将图像调整为目标尺寸。同时,可以使用torch.Tensor将labels转换为tensor格式。 5. 创建并使用修改后的Dataset类来创建DataLoader实例。 6. 运行修改后的代码,确保加载器能够正常工作。

下面是具体的代码优化:

# Preprocess
import os
import cv2
import xml.etree.ElementTree as ET
import csv
from pathlib import Path
import glob
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from keras.preprocessing.image import img_to_array, array_to_img

# Machine Learning
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms


def convert_label(lab):
    result = []
    for str in lab:
        if str == 'leaf':
            result.append(0)
        elif str == 'stem':
            result.append(1)
        elif str == 'soil':
            result.append(2)
    return result


def normalize(img_numpy_array):  # gray scale
    return img_numpy_array / 255.0


def parse_xml(xml_path, target_size):
    tree = ET.parse(xml_path)
    root = tree.getroot()

    bounding_boxes = []
    labels = []
    for obj in root.findall('object'):
        name = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)

        # Resize bounding box to target size
        xmin = int(xmin * target_size[1] / orig_img_size[1])
        ymin = int(ymin * target_size[0] / orig_img_size[0])
        xmax = int(xmax * target_size[1] / orig_img_size[1])
        ymax = int(ymax * target_size[0] / orig_img_size[0])
        bounding_boxes.append([xmin, ymin, xmax, ymax])

        labels.append(name)

    labels = convert_label(labels)

    return bounding_boxes, labels


class CustomDataset(Dataset):
    def __init__(self, data_folder, target_size):
        self.images = glob.glob(os.path.join(data_folder, '*.jpg'))
        self.annotations = glob.glob(os.path.join(data_folder, '*.xml'))
        self.target_size = target_size

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        image_path = self.images[index]
        annotation_path = self.annotations[index]

        # Load and preprocess image
        img = Image.open(image_path)
        img = img.resize(self.target_size)  # Resize image to target size
        img_numpy_array = img_to_array(img)
        img_tensor = normalize(img_numpy_array)
        img_tensor = torch.from_numpy(img_tensor)

        # Parse XML and preprocess annotations
        bounding_boxes, labels = parse_xml(annotation_path, self.target_size)
        labels = torch.tensor(labels)

        return img_tensor, labels


# Set target size
target_size = (10, 5)

# Set original image size
orig_img_size = (100, 50)

# Set data folder
data_folder = 'path/to/your/data/folder'

# Create dataset instance
dataset = CustomDataset(data_folder, target_size)

# Create dataloader
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Iterate over the dataloader
for images, labels in dataloader:
    # Perform training or evaluation
    pass

请注意,此代码中的parse_xml函数中的图像和bounding box尺寸调整是根据原始图像的大小和目标尺寸进行的。在实际情况中,请根据你的数据和需求进行相应调整。

希望这个解决方案能够解决你遇到的问题。如果还有其他问题,请随时提问。



【相关推荐】



如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 写成博客, 将相关链接放在评论区, 以帮助更多的人 ^-^