使用imgaug库可以方便地进行图像增强操作,包括调整图像大小。以下是使用imgaug库调整图像大小并修改对应的XML标签框的示例脚本:
- 注意修改输入文件夹路径、输出文件夹路径和目标尺寸为自己内容。
input_folder = "path/to/your/input_folder"
output_folder = "path/to/your/output_folder"
target_size = (640, 640)
- 输入文件夹结构:
全部代码:
import os
import xml.etree.ElementTree as ET
from PIL import Image
import imgaug.augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
import numpy as np
def resize_and_augment(image_path, xml_folder, output_folder, target_size):
# 读取图像
image = Image.open(image_path)
# 获取XML文件路径
xml_filename = os.path.splitext(os.path.basename(image_path))[0] + '.xml'
xml_path = os.path.join(xml_folder, xml_filename)
# 解析XML文件
tree = ET.parse(xml_path)
root = tree.getroot()
# 读取原始图像大小
original_size = image.size
# 使用imgaug进行图像和标签框调整
seq = iaa.Sequential([
iaa.Resize({"height": target_size[0], "width": target_size[1]}),
])
# 将PIL图像转换为numpy数组
image_np = np.array(image)
# 从XML中提取所有标签框信息
bounding_boxes = []
for object_tag in root.iter('object'):
bbox_tag = object_tag.find('bndbox')
xmin = float(bbox_tag.find('xmin').text)
ymin = float(bbox_tag.find('ymin').text)
xmax = float(bbox_tag.find('xmax').text)
ymax = float(bbox_tag.find('ymax').text)
bounding_boxes.append(BoundingBox(x1=xmin, y1=ymin, x2=xmax, y2=ymax))
# 创建BoundingBoxesOnImage对象
bbs = BoundingBoxesOnImage(bounding_boxes, shape=image_np.shape)
# 进行图像和标签框调整
augmented_image, bbs_aug = seq(image=image_np, bounding_boxes=bbs)
# 创建输出文件夹路径
output_images_folder = os.path.join(output_folder, 'images')
output_xml_folder = os.path.join(output_folder, 'xml')
# 确保输出文件夹存在,如果不存在则创建
os.makedirs(output_images_folder, exist_ok=True)
os.makedirs(output_xml_folder, exist_ok=True)
# 保存调整后的图像
augmented_image_path = os.path.join(output_images_folder, os.path.basename(image_path))
Image.fromarray(augmented_image).save(augmented_image_path)
# 更新XML文件
update_xml(xml_path, bbs_aug, original_size, target_size, output_xml_folder)
def update_xml(xml_path, bbs_aug, original_size, new_size, output_folder):
# 解析XML文件
tree = ET.parse(xml_path)
root = tree.getroot()
# 更新图像尺寸
for size_tag in root.iter('size'):
width_tag = size_tag.find('width')
height_tag = size_tag.find('height')
width_tag.text = str(new_size[1])
height_tag.text = str(new_size[0])
# 更新对象边界框
for object_tag, bbox_aug in zip(root.iter('object'), bbs_aug.bounding_boxes):
bbox_tag = object_tag.find('bndbox')
# 更新坐标
for coord, coord_value in zip(['xmin', 'ymin', 'xmax', 'ymax'], [bbox_aug.x1, bbox_aug.y1, bbox_aug.x2, bbox_aug.y2]):
coord_tag = bbox_tag.find(coord)
coord_tag.text = str(int(coord_value))
# 保存更新后的XML文件到输出文件夹
updated_xml_filename = os.path.basename(xml_path)
updated_xml_path = os.path.join(output_folder, updated_xml_filename)
tree.write(updated_xml_path)
if __name__ == "__main__":
# 输入文件夹路径、输出文件夹路径和目标尺寸
input_folder = "path/to/your/input_folder"
output_folder = "path/to/your/output_folder"
target_size = (640, 640)
# 遍历图像文件夹
for filename in os.listdir(os.path.join(input_folder, 'images')):
if filename.endswith('.jpg'):
image_path = os.path.join(input_folder, 'images', filename)
resize_and_augment(image_path, os.path.join(input_folder, 'xml'), output_folder, target_size)