這個問題 RuntimeError: stack expects each tensor to be equal size, but got [10, 5] at entry 0 and [23, 5] at entry 1
```python
# Preprocess
import os
import cv2
import xml.etree.ElementTree as ET
import csv
from pathlib import Path
import glob
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from keras.preprocessing.image import img_to_array, array_to_img
# Machine Learning
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
def convert_label(lab):
result = []
for str in lab:
if str == 'leaf':
result.append(0)
elif str == 'stem':
result.append(1)
elif str == 'soil':
result.append(2)
return result
def normalize(img_numpy_array): # gray scale
return img_numpy_array / 255.0
def parse_xml(xml_path):
tree = ET.parse(xml_path)
root = tree.getroot()
bounding_boxes = []
labels = []
for obj in root.findall('object'):
name = obj.find('name').text
bndbox = obj.find('bndbox')
xmin = int(bndbox.find('xmin').text)
ymin = int(bndbox.find('ymin').text)
xmax = int(bndbox.find('xmax').text)
ymax = int(bndbox.find('ymax').text)
bounding_boxes.append([xmin, ymin, xmax, ymax])
labels.append(name)
size = root.find('size')
width = int(size.find('width').text)
height = int(size.find('height').text)
return bounding_boxes, labels, width, height
""" def write_folder_names_to_text(folder_path, output_file): #list all name of jpg into txt
# Get the list of file names in the folder
file_names = os.listdir(folder_path)
# Create or overwrite the output file
with open(output_file, 'w') as f:
# Write each file name to a new line in the text file
for file_name in file_names:
if file_name.endswith('.jpg'):
f.write(file_name + '\n')
print(f"File names written to {output_file} successfully.")
folder_path = "C:/Users/willi/OneDrive/桌面/Dataset/test"
output_file = "C:/Users/willi/OneDrive/桌面/Dataset/test/file_names.txt"
write_folder_names_to_text(folder_path, output_file) """
""" def xml_to_csv(root_dir): #XML to CSV format
bbox = []
labels = []
for xml_file in glob.glob(root_dir + '/*.xml'):
if os.path.exists(xml_file):
bboxes, lbls, width, height = parse_xml(xml_file)
bbox.append(bboxes)
labels.append(lbls)
label_in_num = convert_label(lbls)
print(len(bbox))
print(len(labels))
csv_file = os.path.join(root_dir, 'annotations.csv')
with open(csv_file, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerow(['filename', 'width', 'height', 'label', 'xmin', 'ymin', 'xmax', 'ymax'])
for i in range(len(bbox)):
for j in range(len(bbox[i])):
writer.writerow([os.path.basename(xml_file), width, height, labels[i][j], bbox[i][j][0], bbox[i][j][1],
bbox[i][j][2], bbox[i][j][3]])
print(f"CSV file saved at: {csv_file}") """
def display_image_with_boxes(image, boxes):
img_with_boxes = image.clone().permute(1, 2, 0).numpy()
plt.imshow(img_with_boxes)
ax = plt.gca()
for bbox in boxes:
xmin, ymin, xmax, ymax, lbls = bbox
color = 'blue' if lbls == 0 else 'red' # blus as leaf , red as stem
rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=color, linewidth=2)
ax.add_patch(rect)
plt.show()
def pad_boxes(boxes, max_num_boxes):
padded_boxes = np.zeros((max_num_boxes, 4), dtype=np.float32)
for i, box in enumerate(boxes):
if i >= max_num_boxes:
break
padded_boxes[i] = box
return padded_boxes
voc_label = {'leaf,stem,soil'}
dict_labels = dict(zip(voc_label, range(len(voc_label))))
class Read_voc(Dataset):
def __init__(self, root_path):
super(Read_voc, self).__init__()
self.root_path = root_path
self.img_idx = []
self.anno_idx = []
self.bbox = []
self.obj_name = []
train_txt_path = self.root_path + "/file_names.txt"
self.img_path = self.root_path
self.anno_path = self.root_path
train_txt = open(train_txt_path)
lines = train_txt.readlines()
for line in lines:
name = line.strip().split()[0]
name = name.rstrip('.jpg')
self.img_idx.append(self.img_path + name + '.jpg')
self.anno_idx.append(self.anno_path + name + '.xml')
def __getitem__(self, item):
img = Image.open(self.img_idx[item])
img = transforms.ToTensor()(img)
normalize(img)
targets = ET.parse(self.anno_idx[item])
res = [] # Store annotation information, i.e., coordinates of the bounding box's top left and bottom right
# points and the target's class label
result = []
if os.path.exists(self.root_path):
bboxes, labels, width, height = parse_xml(self.anno_idx[item])
lbls = convert_label(labels) # Convert label to number using convert_label() function
res.append(bboxes)
res.append(lbls)
max_num_boxes = 100 # Determine the maximum number of boxes
for bbox, label in zip(*res):
num_boxes = len(bbox)
bbox_padded = pad_boxes(bbox, max_num_boxes) # Pad the bounding boxes with zeros
result.append(bbox_padded + [label])
else:
raise Exception('Path does not Exist!')
# Separate coordinates and labels
boxes = [box[:-1] for box in result] # Exclude the label from each bounding box
labels = [box[-1] for box in result]
# Convert coordinates and labels to NumPy arrays, then to PyTorch tensors
boxes_array = np.array(boxes, dtype=np.float32)
labels_array = np.array(labels, dtype=np.float32)
boxes_tensor = torch.from_numpy(boxes_array)
labels_tensor = torch.from_numpy(labels_array)
return img, boxes_tensor
def __len__(self):
return len(self.img_idx)
def main():
root_dir = "C:/Users/willi/OneDrive/桌面/Dataset/test/"
image_size = (256, 256)
train_data = Read_voc(root_path=root_dir) # DataSet Preprocessing
img, res = train_data[1]
print(img.size())
print(len(res)) # row
print(len(res[0])) # cols
print(train_data.__len__())
# display_image_with_boxes(img, res)
# Display image and label.
train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True, num_workers=0, drop_last=False)
train_features, train_labels = train_dataloader
print(train_features.size())
main()
```
【以下回答由 GPT 生成】
问题分析: 该错误是由于使用Dataloader加载数据时,输入的样本尺寸大小不一致导致的。具体来说,在这个例子中,有两个输入的tensor尺寸不匹配,一个尺寸为[10, 5],另一个尺寸为[23, 5]。为了解决这个问题,我们需要对数据进行预处理,确保所有的输入tensor具有相同的尺寸。
解决方案: 以下是解决该问题的步骤: 1. 首先,我们需要确定所有输入tensor的目标尺寸。在这个例子中,所有输入tensor的目标尺寸为[10, 5]。 2. 在parse_xml
函数中,获取bounding box后,我们需要根据目标尺寸将其调整为相同的尺寸。可以使用cv2.resize
函数将bounding box调整为目标尺寸。 3. 在parse_xml
函数中,获取bounding box后,我们需要在labels列表中添加标签。同时,我们需要将标签字符串转换为相应的数字。可以使用convert_label
函数将标签字符串转换为数字。 4. 修改Dataset
类的实现,确保样本的尺寸都与目标尺寸一致。具体来说,修改__getitem__
方法中的代码,将加载的图像和labels调整为目标尺寸。可以使用cv2.resize
函数将图像调整为目标尺寸。同时,可以使用torch.Tensor
将labels转换为tensor格式。 5. 创建并使用修改后的Dataset
类来创建DataLoader
实例。 6. 运行修改后的代码,确保加载器能够正常工作。
下面是具体的代码优化:
# Preprocess
import os
import cv2
import xml.etree.ElementTree as ET
import csv
from pathlib import Path
import glob
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from keras.preprocessing.image import img_to_array, array_to_img
# Machine Learning
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
def convert_label(lab):
result = []
for str in lab:
if str == 'leaf':
result.append(0)
elif str == 'stem':
result.append(1)
elif str == 'soil':
result.append(2)
return result
def normalize(img_numpy_array): # gray scale
return img_numpy_array / 255.0
def parse_xml(xml_path, target_size):
tree = ET.parse(xml_path)
root = tree.getroot()
bounding_boxes = []
labels = []
for obj in root.findall('object'):
name = obj.find('name').text
bndbox = obj.find('bndbox')
xmin = int(bndbox.find('xmin').text)
ymin = int(bndbox.find('ymin').text)
xmax = int(bndbox.find('xmax').text)
ymax = int(bndbox.find('ymax').text)
# Resize bounding box to target size
xmin = int(xmin * target_size[1] / orig_img_size[1])
ymin = int(ymin * target_size[0] / orig_img_size[0])
xmax = int(xmax * target_size[1] / orig_img_size[1])
ymax = int(ymax * target_size[0] / orig_img_size[0])
bounding_boxes.append([xmin, ymin, xmax, ymax])
labels.append(name)
labels = convert_label(labels)
return bounding_boxes, labels
class CustomDataset(Dataset):
def __init__(self, data_folder, target_size):
self.images = glob.glob(os.path.join(data_folder, '*.jpg'))
self.annotations = glob.glob(os.path.join(data_folder, '*.xml'))
self.target_size = target_size
def __len__(self):
return len(self.images)
def __getitem__(self, index):
image_path = self.images[index]
annotation_path = self.annotations[index]
# Load and preprocess image
img = Image.open(image_path)
img = img.resize(self.target_size) # Resize image to target size
img_numpy_array = img_to_array(img)
img_tensor = normalize(img_numpy_array)
img_tensor = torch.from_numpy(img_tensor)
# Parse XML and preprocess annotations
bounding_boxes, labels = parse_xml(annotation_path, self.target_size)
labels = torch.tensor(labels)
return img_tensor, labels
# Set target size
target_size = (10, 5)
# Set original image size
orig_img_size = (100, 50)
# Set data folder
data_folder = 'path/to/your/data/folder'
# Create dataset instance
dataset = CustomDataset(data_folder, target_size)
# Create dataloader
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
# Iterate over the dataloader
for images, labels in dataloader:
# Perform training or evaluation
pass
请注意,此代码中的parse_xml
函数中的图像和bounding box尺寸调整是根据原始图像的大小和目标尺寸进行的。在实际情况中,请根据你的数据和需求进行相应调整。
希望这个解决方案能够解决你遇到的问题。如果还有其他问题,请随时提问。
【相关推荐】