一、标签转换
我们在使用labeme
标签工具,标注完数据后会获得json
文件。在标注结束过后,我们需要通过标签转换操作,生成jpg
格式原始图片和png
格式mask
标签图。
1.1 使用img_b64_to_arr
将json
标签中二进制图像数据变成numpy
格式数据,再变成jpg
图像
/************ Anaconda3\Lib\site-packages\labelme\utils\image.py *************/
def img_b64_to_arr(img_b64):
img_data = base64.b64decode(img_b64)
img_arr = img_data_to_arr(img_data)
return img_arr
# 将输出结果保存,
PIL.Image.fromarray(img).save(osp.join(out_jpgs_path, file_name.split(".")[0]+'.jpg'))
1.2 使用shapes_to_label
将分割标签数据变成单通道png mask
标签图
/************ Anaconda3\Lib\site-packages\labelme\utils\shape.py *************/
def shapes_to_label(img_shape, shapes, label_name_to_value):
cls = np.zeros(img_shape[:2], dtype=np.int32)
ins = np.zeros_like(cls)
instances = []
for shape in shapes:
points = shape["points"]
label = shape["label"]
group_id = shape.get("group_id")
if group_id is None:
group_id = uuid.uuid1()
shape_type = shape.get("shape_type", None)
cls_name = label
instance = (cls_name, group_id)
if instance not in instances:
instances.append(instance)
ins_id = instances.index(instance) + 1
cls_id = label_name_to_value[cls_name]
mask = shape_to_mask(img_shape[:2], points, shape_type)
cls[mask] = cls_id
ins[mask] = ins_id
return cls, ins
1.3 语义分割标签转换完整代码
def json2mask(json_file,img_file,filter_cls:list,out_jpgs_path,out_mask_path):
assert json_file.endswith(".json")
file_name = Path(json_file).stem
if os.path.isfile(json_file):
data = json.load(open(json_file))
# 获取json里面的图片数据,也就是二进制数据
imageData = data.get("imageData")
# 如果通过data.get获取到的数据为空,就重新读取图片数据
if not imageData:
# imagePath = os.path.join(json_file, data["imagePath"])
imagePath = img_file
with open(imagePath, "rb") as f:
imageData = f.read()
imageData = base64.b64encode(imageData).decode("utf-8")
# 将二进制数据转变成numpy格式的数据
img = utils.img_b64_to_arr(imageData)
if len(filter_cls):
data["shapes"]=[data_item for data_item in data["shapes"] if not data_item['label'] in filter_cls]
label_name_2_id =label_name_2_id_sample
# 将标签数据变成单通道的png mask图
lbl, _ = utils.shapes_to_label(img.shape, data["shapes"], label_name_2_id)
label_names = [None] * (max(label_name_2_id.values()) + 1)
for name, value in label_name_2_id.items():
label_names[value] = name
lbl_viz = imgviz.label2rgb(
label=lbl, image=imgviz.asgray(img), label_names=label_names, loc="rb"
)
# 将输出结果保存,
PIL.Image.fromarray(img).save(osp.join(out_jpgs_path, file_name.split(".")[0]+'.jpg'))
utils.lblsave(osp.join(out_mask_path, "%s.png" % file_name.split(".")[0]), lbl)
二、验证、训练数据集划分
在经过标签转换后,生成原始jpg
图片和png mask
标签图。我们还需要进一步进行验证集,训练集划分。
def split_mask(mask_path_list,Base_mask,trainval_percent,train_percent):
# random.sample随机划分
num = len(mask_path_list)
_indexes = range(num)
tv = int(num*trainval_percent)
tr = int(tv*train_percent)
trainval= random.sample(_indexes,tv)
train = random.sample(trainval,tr)
print("train and val size",tv)
print("train size",tr)
# 创建txt文件
ftrainval = open(os.path.join(Base_mask,'trainval.txt'), 'w')
ftest = open(os.path.join(Base_mask,'test.txt'), 'w')
ftrain = open(os.path.join(Base_mask,'train.txt'), 'w')
fval = open(os.path.join(Base_mask,'val.txt'), 'w')
# 写入txt文件
for i in _indexes:
name = mask_path_list[i].split('.')[0]+'\n'
if i in trainval:
ftrainval.write(name)
if i in train:
ftrain.write(name)
else:
fval.write(name)
else:
ftest.write(name)
# 关闭txt文件
ftrainval.close()
ftrain.close()
fval.close()
生成的验证集,训练集路径txt
文件