训练yolov8+SAM的过程记录

1-首先将拿到的数据集进行重新命名（dataset1：是经过校色之后裁剪的图片；dataset2：原图）
图片文件从1.jpg开始命名的代码：

folder_path = r'C:\Users\23608\Desktop\Luli_work\data\fanStudent\tongueseg\Fan\Fan\.jpg'
new_folder = r'C:\Users\23608\Desktop\Luli_work\data\fanStudent\tongueseg\imgOrig'

jpg_files = [f for f in os.listdir(folder_path) if f.endswith('.jpg')]
n = 1
for i, jpg_file in enumerate(jpg_files):
    new_filename = f'{n}.jpg'
    n =n+1
    
    # 构造原始文件和新文件的完整路径
    original_path = os.path.join(folder_path, jpg_file)
    new_path = os.path.join(new_folder, new_filename)
    
    # 复制文件到新文件夹并重命名
    shutil.copy(original_path, new_path)
    
print("重命名完成！")

2-将数据预处理之后的数据上传到服务器，接着使用yolov8SAM代码将代码中的舌体掩码跑出来：
数据存放位置
Imgorig：/share1/luli/tongueseg/data/dataset2/imgOrig/
IMgcrop：/share1/luli/tongueseg/data/dataset1/imgCrop/

微调SAM的查找

How to Fine-Tune Segment Anything

1. How to Fine-Tune Segment Anything

We gave an overview of the SAM architecture in the introduction section. The image encoder has a complex architecture with many parameters. To fine-tune the model, it makes sense for us to focus on the mask decoder which is lightweight and therefore easier, faster and more memory efficient to fine-tune.

In order to fine tune SAM, we need to extract the underlying pieces of its architecture (image and prompt encoders, mask decoder). We cannot use SamPredictor.predict (link) for two reasons:

· We want to fine tune only the mask decoder
· This function calls SamPredictor.predict_torch which has the @torch.no_grad() decorator (link), which prevents us from computing gradients

Thus, we need to examine the SamPredictor.predict function and call the appropriate functions with gradient calculation enabled on the part we want to fine tune (the mask decoder). Doing this is also a good way to learn more about how SAM works.

2. Creating a Custom Dataset

We need three things to fine tune our model:(这里其实并没有说GT、datase的具体类型)

· Images on which to draw segmentations
· Segmentation ground truth masks
· Prompts to feed into the model

后续在代码里面可以看到图片代码里面是png格式，mask掩码是黑白二值图。

我现在使用的是labelme标记的舌头，json文件，需要把json文件转化成二值图。

这里涉及到3个数据类型的转化：

· png图片转json文件（也就是使用yolov8+SAM对数据集进行简单的分割之后再人工进行微调，此时的微调使用的是labelme格式是json，这里使用到TongueSAM里马赛克json里面的代码）
· json文件转成png图片，这里使用到labelme里面的自己的代码（参考链接）[labelme] json格式批量转换为mask.png，步骤入下：
1.使用labelme制作语义分割数据集，生成.json格式文件，将所有放置于一个文件夹下。
2.找到labelme安装位置的json_to_dataset.py文件，（可以使用Everything软件）
用下面的代码替换里面的代码：

import argparse
import json
import os
import os.path as osp
import warnings
import copy
import numpy as np
import PIL.Image
from skimage import io
import yaml
from labelme import utils

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('json_file')   # 标注文件json所在的文件夹
    parser.add_argument('-o', '--out', default=None)
    args = parser.parse_args()

    json_file = args.json_file

    list = os.listdir(json_file)   # 获取json文件列表
    for i in range(0, len(list)):
        path = os.path.join(json_file, list[i])  # 获取每个json文件的绝对路径
        filename = list[i][:-5]       # 提取出.json前的字符作为文件名，以便后续保存Label图片的时候使用
        extension = list[i][-4:]
        if extension == 'json':
            if os.path.isfile(path):
                data = json.load(open(path))
                img = utils.image.img_b64_to_arr(data['imageData'])  # 根据'imageData'字段的字符可以得到原图像
                # lbl为label图片（标注的地方用类别名对应的数字来标，其他为0）lbl_names为label名和数字的对应关系字典
                lbl, lbl_names = utils.shape.labelme_shapes_to_label(img.shape, data['shapes'])   # data['shapes']是json文件中记录着标注的位置及label等信息的字段

                #captions = ['%d: %s' % (l, name) for l, name in enumerate(lbl_names)]
                #lbl_viz = utils.draw.draw_label(lbl, img, captions)
                out_dir = osp.basename(list[i])[:-5]+'_json'
                out_dir = osp.join(osp.dirname(list[i]), out_dir)
                if not osp.exists(out_dir):
                    os.mkdir(out_dir)

                PIL.Image.fromarray(img).save(osp.join(out_dir, '{}_source.png'.format(filename)))
                PIL.Image.fromarray(lbl).save(osp.join(out_dir, '{}_mask.png'.format(filename)))
                #PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, '{}_viz.jpg'.format(filename)))

                with open(osp.join(out_dir, 'label_names.txt'), 'w') as f:
                    for lbl_name in lbl_names:
                        f.write(lbl_name + '\n')

                warnings.warn('info.yaml is being replaced by label_names.txt')
                info = dict(label_names=lbl_names)
                with open(osp.join(out_dir, 'info.yaml'), 'w') as f:
                    yaml.safe_dump(info, f, default_flow_style=False)

                print('Saved to: %s' % out_dir)


if __name__ == '__main__':
    main()

在这里插入图片描述

找到 labelme_json_to_dataset的文件夹，在文件夹路径里面输入cmd
执行命令： labelme_json_to_dataset path/to/json
这样就可以生成以下四个文件（可以使用Everything软件找一下在哪）

上面的1_mask.png就是转化成功的了，看起来都是黑色的，但是没错。因为我的是单个目标检测，所以我又使用了下面的代码将目标区域变成白色，也就是重新将这图片转化成一个二值图。

# 这是转haul一张图片的代码==从全黑变成了黑白二值图==
import cv2
import numpy as np
from PIL import Image

# 图像文件路径
image_path = r"C:\Users\23608\Desktop\Luli_work\data\fanStudent\dataset1\GTpng\GTpng\0000_mask.png"
filename = '0.png'
image = Image.open(image_path)
img_arr = np.array(image)
# print(type(img_arr))
# print(img_arr.shape) # (777, 1286)
height, width = img_arr.shape
print(height, width)

mask = np.zeros((height, width))
h, w = mask.shape
print(h, w)
# 遍历图像的每个像素，并输出像素值
for y in range(height):
    for x in range(width):
        pixel_value = img_arr[y, x]
        if pixel_value:
            mask[y, x] = 255
            
cv2.imwrite(filename, mask)

# 这是一个文件夹的情况
import os
import cv2
import numpy as np
from PIL import Image

def mask2binary(input_folder, output_folder):
    # 获取输入文件夹中的所有PNG图片
    input_files = [f for f in os.listdir(input_folder) if f.endswith('.png')]

    for input_file in input_files:
        input_path = os.path.join(input_folder, input_file)
        image = Image.open(input_path)

        img_name = os.path.splitext(os.path.basename(input_file))[0]
        print(img_name)
        filename = img_name[:4]

        img_arr = np.array(image)
        height, width = img_arr.shape

        # 创建空的掩码
        mask = np.zeros((height, width))

        # 遍历图像的每个像素，并将非零值置为255
        for y in range(height):
            for x in range(width):
                pixel_value = img_arr[y, x]
                if pixel_value:
                    mask[y, x] = 255

        # 保存二进制掩码
        output_path = os.path.join(output_folder, f"{filename}.png")
        cv2.imwrite(output_path, mask)

if __name__ == "__main__":
    input_folder = r"C:\Users\23608\Desktop\Luli_work\data\fanStudent\dataset1\GTpng\GTpng"
    output_folder = r"C:\Users\23608\Desktop\Luli_work\data\fanStudent\dataset1\GTpng\mask"
    os.makedirs(output_folder, exist_ok=True)
    mask2binary(input_folder, output_folder)

将二值图掩码转化成json文件，代码（需要这个pycococreator-master，应该是在github上直接下载的）

# mask2json.py
import os
import io
import json
import numpy as np
from pycococreatortools import pycococreatortools
from PIL import Image
import base64

'''
该函数用于将图像转化为base64字符类型
:param img_pil: Image类型
:return base64_string: 字符串
'''
def img_tobyte(img_pil):
    ENCODING = 'utf-8'
    img_byte = io.BytesIO()
    img_pil.save(img_byte, format = 'PNG')
    binary_str2 = img_byte.getvalue()
    imageData = base64.b64encode(binary_str2)
    base64_string = imageData.decode(ENCODING)
    return base64_string

# ROOT_DIR = '/home/luli/TongueSAM/mask2json/' # 请输入你文件的根目录
ROOT_DIR = '/home/luli/TongueSAM/fanStudent/jsonOutput/dataset2'
# Image_DIR = os.path.join(ROOT_DIR, 'Images') # 目录底下包含图片
Image_DIR = '/home/luli/TongueSAM/fanStudent/imgInput/dataset2'
Label_DIR = '/home/luli/TongueSAM/fanStudent/imgOutput/dataset2Output/output1'

# Image_DIR = '/home/luli/TongueSAM/mask2json/Images'
# Label_DIR = '/home/luli/TongueSAM/mask2json/GT'

# Label_DIR = os.path.join(ROOT_DIR, "GT")     # 目录底下包含label文件
Label_files = os.listdir(Label_DIR)          # 读取路径下的掩码
class_names = ['_background_', 'tongue']     # 指定png中index中对应的label

for Label_filename in Label_files:
    Json_output = {
        "version": "3.16.7",
        "flags": {},
        "fillColor": [255, 0, 0, 128],
        "lineColor": [0, 255, 0, 128],
        "imagePath": {},
        "shapes": [],
        "imageData": {}}
    print(Label_filename)
    
    name = Label_filename.split('.', 3)[0]
    name1 = name+'.jpg'
    Json_output['imagePath'] = name1
    image = Image.open(Image_DIR + '/' +name1)
    imageData = img_tobyte(image)
    Json_output['imageData'] = imageData

    # 获得注释的掩码
    binary_mask = np.array(np.array(Image.open(Label_DIR + '/' + Label_filename))).astype(np.uint8)

    # 分别对掩码中的label结果绘制边界点
    for i in np.unique(binary_mask):
        if i !=0:
            temp_mask = np.where(binary_mask == i, 1, 0)
            # 将二进制掩码转换为多边形表示：
            segmentation = pycococreatortools.binary_mask_to_polygon(temp_mask[:,:,0], tolerance = 7)
            for item in segmentation:
                if (len(item) > 10):
                    list1 = []
                    for j in range(0, len(item), 2):
                        list1.append([item[j], item[j+1]])
                    label = class_names[1]
                    seg_info = {'points': list1, "fill_color": None, "line_color": None, "label": label,
                                "shape_type": "polygon", "flags": {}}
                    Json_output['shapes'].append(seg_info)
    Json_output['imageHeight'] = binary_mask.shape[0]
    Json_output['imageWidth'] = binary_mask.shape[1]

    # 保存在根目录下的json文件中
    full_path = '{}/'+name+'.json'
    with open(full_path.format(ROOT_DIR), 'w') as output_json_file:
        json.dump(Json_output, output_json_file)

现在有了mask.png， img.jpg

在运行finetunne_and_inference_tutorial_D_data.ipynb这个代码代码之前，需要执行pre_gre_rgb2d.py制作数据集
创建mytrain.sh执行文件，制作数据集

python -m  pre_grey_rgb2D.py \
    --img_path /share1/luli/MedSAM/data/samfune/train/images \
    --gt_path /share1/luli/MedSAM/data/samfune/train/labels \
    --npz_path /share1/luli/MedSAM/data/my_npz_path \
    --data_name mytrain \
    --img_name_suffix .jpg