import torch
from ptsemseg.models import get_model
# 设置device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# torch.onnx.export的两个参数input和output
# 加载模型model
model = get_model('pspnet', 150, version = 'ade20k')
model = model.eval().to(device)
# 构造一个输入图像Tensor为x
x = torch.randn(1, 3, 256, 256).to(device)
# 导出为onnx
torch.onnx.export(model, x, "pspnet.onnx256", verbose=True, input_names=input, output_names=output, opset_version=11)
import numpy as np
from skimage.transform import resize
import cv2
import torch
import time
import onnx
import onnxruntime as ort
from ptsemseg.models import get_model
from ptsemseg.utils import convert_state_dict
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
weight_path = "../../pspnet_50_ade20k.pth"
def color_map(N=256, normalized=False):
Return Color Map in PASCAL VOC format (rgb)
\param N (int) number of classes
\param normalized (bool) whether colors are normalized (float 0-1)
\return (Nx3 numpy array) a color map
def bitget(byteval, idx):
return ((byteval & (1 << idx)) != 0)
dtype = 'float32' if normalized else 'uint8'
cmap = np.zeros((N, 3), dtype=dtype)
for i in range(N):
r = g = b = 0
c = i
for j in range(8):
r = r | (bitget(c, 0) << 7-j)
g = g | (bitget(c, 1) << 7-j)
b = b | (bitget(c, 2) << 7-j)
c = c >> 3
cmap[i] = np.array([r, g, b])
cmap = cmap/255.0 if normalized else cmap
return cmap
def decode_segmap(temp, n_classes, cmap):
Given an image of class predictions, produce an bgr8 image with class colors
\param temp (2d numpy int array) input image with semantic classes (as integer)
\param n_classes (int) number of classes
\cmap (Nx3 numpy array) input color map
\return (numpy array bgr8) the decoded image with class colors
r = temp.copy()
g = temp.copy()
b = temp.copy()
for l in range(0, n_classes):
r[temp == l] = cmap[l,0]
g[temp == l] = cmap[l,1]
b[temp == l] = cmap[l,2]
bgr = np.zeros((temp.shape[0], temp.shape[1], 3))
bgr[:, :, 0] = b
bgr[:, :, 1] = g
bgr[:, :, 2] = r
return bgr.astype(np.uint8)
def predict_max(img):
Do semantic prediction for max fusion
\param img (numpy array rgb8)
class_probs = predict(img)
# Take best prediction and confidence
pred_confidence, pred_label = class_probs.max(1)
pred_confidence = pred_confidence.squeeze(0).cpu().numpy()
pred_label = pred_label.squeeze(0).cpu().numpy()
pred_label = resize(pred_label, (375, 500), order = 0, mode = 'reflect', anti_aliasing=False, preserve_range = True) # order = 0, nearest neighbour
pred_label = pred_label.astype(np.int64)
# Add semantic color
semantic_color = decode_segmap(pred_label, 150, cmap)
pred_confidence = resize(pred_confidence, (375, 500), mode = 'reflect', anti_aliasing=True, preserve_range = True)
return (semantic_color, pred_confidence)
def predict(img):
Do semantic segmantation
\param img: (numpy array bgr8) The input cv image
img = img.copy() # Make a copy of image because the method will modify the image
#orig_size = (img.shape[0], img.shape[1]) # Original image size
# Prepare image: first resize to CNN input size then extract the mean value of SUNRGBD dataset. No normalization
img = resize(img, cnn_input_size, mode = 'reflect', anti_aliasing=True, preserve_range = True) # Give float64
img = img.astype(np.float32)
img -= mean
# Convert HWC -> CHW
img = img.transpose(2, 0, 1)
# Convert to tensor
img = torch.tensor(img, dtype = torch.float32)
img = img.unsqueeze(0) # Add batch dimension required by CNN
with torch.no_grad():
img = img.to(device)
# Do inference
since = time.time()
outputs = model(img) #N,C,W,H
# Apply softmax to obtain normalized probabilities
outputs = torch.nn.functional.softmax(outputs, 1)
return outputs
if __name__ == '__main__':
cmap = color_map(N = 150, normalized = False)
color_img = cv2.imread("./include/2007_000661.jpg")
weight_dict = torch.load(weight_path, map_location={'cuda:0':'cpu'})
cnn_input_size = (473, 473)
mean = np.array([104.00699, 116.66877, 122.67892]) # Mean value of dataset
model = get_model("pspnet",150, version = 'ade20k')
state = torch.load(weight_path, map_location={'cuda:0':'cpu'})
model.load_state_dict(convert_state_dict(state['model_state'])) # Remove 'module' from dictionary keys
model = model.to(device)
semantic_color, pred_confidence = predict_max(color_img)
cv2.imshow("semantic_color", semantic_color)
if cv2.waitKey(0) & 0xFF == ord('q'):
import numpy as np
from skimage.transform import resize
import cv2
import onnxruntime as ort
import torch
import time
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Load the ONNX model
onnx_model_path = "./pspnet473.onnx"
ort_session = ort.InferenceSession(onnx_model_path)
if __name__ == '__main__':
cmap = color_map(N=150, normalized=False)
color_img = cv2.imread("./2007_000661.jpg")
# Resize image to match the model's input size
input_size = (473, 473) #473 256
color_img_resized = cv2.resize(color_img, input_size)
# Preprocess the image
img = color_img_resized.astype(np.float32)
img -= np.array([104.00699, 116.66877, 122.67892]) # Mean value of dataset
img = img.transpose(2, 0, 1)
img = np.expand_dims(img, axis=0)
# Perform inference
outputs = ort_session.run(None, {'input': img})
class_probs = outputs[0]
# Process the output to get semantic color and confidence
pred_confidence, pred_label = np.max(class_probs, axis=1), np.argmax(class_probs, axis=1)
pred_label = pred_label.squeeze(0)
pred_label = resize(pred_label, (375, 500), order=0, mode='reflect', anti_aliasing=False, preserve_range=True)
pred_label = pred_label.astype(np.int64)
semantic_color = decode_segmap(pred_label, 150, cmap)
pred_confidence = resize(pred_confidence, (375, 500), mode='reflect', anti_aliasing=True, preserve_range=True)
# Display the result
cv2.imshow("semantic_color", semantic_color)
if cv2.waitKey(0) & 0xFF == ord('q'):
def detect_image(self, image, count=False, name_classes=None):
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
image = cvtColor(image)
# 对输入图像进行一个备份,后面用于绘图
old_img = copy.deepcopy(image)
orininal_h = np.array(image).shape[0]
orininal_w = np.array(image).shape[1]
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
image_data, nw, nh = resize_image(image, (self.input_shape[1],self.input_shape[0]))
# 添加上batch_size维度
image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)
with torch.no_grad():
images = torch.from_numpy(image_data)
if self.cuda:
images = images.cuda()
# 图片传入网络进行预测
pr = self.net(images)[0] # tensor:[21,512,512]
# 取出每一个像素点的种类,使用pr.permute(1,2,0)进行维度转换,并转为numpy
pr = F.softmax(pr.permute(1,2,0),dim = -1).cpu().numpy() # numpy:[512,512,21]
# 将灰条部分截取掉
pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]
# 进行图片的resize
pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation = cv2.INTER_LINEAR)
# 取出每一个像素点的种类
pr = pr.argmax(axis=-1) # numpy:[512,512]
seg_img = np.reshape(np.array(self.colors, np.uint8)[np.reshape(pr, [-1])], [orininal_h, orininal_w, -1])
# 将新图片转换成Image的形式
image = Image.fromarray(np.uint8(seg_img))
# 将新图与原图及进行混合
image = Image.blend(old_img, image, 0.7)
return image
import time
import copy
import colorsys
import onnx
import onnxruntime as ort
import cv2
import numpy as np
from PIL import Image
from utils.utils import cvtColor, preprocess_input, resize_image, show_config
import torch
from torch import nn
import torch.nn.functional as F
name_classes = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
"diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train",
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Load the ONNX model
onnx_model_path = "./model_data/models.onnx"
ort_session = ort.InferenceSession(onnx_model_path)
mix_type = 0
colors = [(0, 0, 0), (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128), (0, 128, 128),
(128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0), (192, 128, 0), (64, 0, 128), (192, 0, 128),
(64, 128, 128), (192, 128, 128), (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128),
(128, 64, 12)]
def onnx_detect(image):
image = cvtColor(image)
old_img = copy.deepcopy(image)
orininal_h = np.array(image).shape[0]
orininal_w = np.array(image).shape[1]
image_data, nw, nh = resize_image(image, (512, 512))
image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)
outputs = ort_session.run(None, {'images': image_data})
# 获取想要的信息
pr = outputs[0] # numpy:[1,21,512,512]
# 将由tensor转换成的numpy的多余维度去掉,使用np.squeeze方法
pr = np.squeeze(pr, axis=0) # numpy:[21,512,512]
# 改变numpy维度顺序,将类别信息放在最后一维
pr = np.transpose(pr,(1,2,0)) # numpy:[512,512,21]
pr = np.exp(pr) / np.sum(np.exp(pr), axis=-1, keepdims=True)
pr = pr[int((512 - nh) // 2): int((512 - nh) // 2 + nh), \
int((512 - nw) // 2): int((512 - nw) // 2 + nw)]
# 进行图片的resize
pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation=cv2.INTER_LINEAR)
# 取出每一个像素点的种类
pr = pr.argmax(axis=-1) # numpy:[512,512]
seg_img = np.reshape(np.array(colors, np.uint8)[np.reshape(pr, [-1])], [orininal_h, orininal_w, -1])
# ------------------------------------------------#
# 将新图片转换成Image的形式
# ------------------------------------------------#
image = Image.fromarray(np.uint8(seg_img))
# ------------------------------------------------#
# 将新图与原图及进行混合
# ------------------------------------------------#
image = Image.blend(old_img, image, 0.7)
return image
while True:
frame = cv2.imread("/home/pc/文档/deeplabv3-plus-pytorch-main/img/street.jpg")
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# 转变成Image
frame = Image.fromarray(np.uint8(frame))
# 进行检测
frame = np.array(onnx_detect(frame))
# RGBtoBGR满足opencv显示格式
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
cv2.imshow("video", frame)
if cv2.waitKey(1) & 0xff == ord('q'):
image_data, nw, nh = resize_image(image, (512, 512))
np.transpose(..., (2, 0, 1))
:将预处理后的图像数据的维度进行转置。原始图像数据的维度可能是(高度,宽度,通道数),而模型通常要求输入数据的维度为(通道数,高度,宽度)。通过转置操作,可以将数据维度调整为符合模型要求的格式。np.expand_dims(..., 0)
image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1))
outputs = ort_session.run(None, {'images': image_data})
pr = outputs[0]
4. 对pr维度进行调整,去掉多余的第一维(batch),此时pr为[21,512,512],对应[类别数,宽,高]
pr = np.squeeze(pr, axis=0)
5. 进行维度变换,把[类别数,宽,高]改为[宽,高,类别数]
pr = np.transpose(pr,(1,2,0))
6. 归一化
pr = np.exp(pr) / np.sum(np.exp(pr), axis=-1, keepdims=True)
int((512 - nh) // 2)
来确保结果为整数。int((512 - nh) // 2 + nh)
:计算裁剪后的图像高度的结束位置。通过起始位置加上原始图像的高度nh,可以得到裁剪后的图像高度的结束位置。int((512 - nw) // 2)
:计算裁剪后的图像宽度的起始位置。同样地,假设原始图像的宽度为nw,而模型要求的输入宽度为512,那么需要将图像在宽度方向上居中裁剪,使得裁剪后的图像宽度为512。int((512 - nw) // 2 + nw)
pr = pr[int((512 - nh) // 2): int((512 - nh) // 2 + nh), int((512 - nw) // 2): int((512 - nw) // 2 + nw)]
pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation=cv2.INTER_LINEAR)
9. 取出每一个像素的类别,因为axis=-1代表最后一维,也就是类别这一维,然后对每一个像素取argmax,得到对应的置信度最大的类别作为这个像素的类别
pr = pr.argmax(axis=-1)
np.reshape(pr, [-1])
表示自动计算展平后的维度大小,即原始数组的元素总数。np.array(colors, np.uint8)
:将颜色列表(colors)转换为NumPy数组,并将数据类型设置为uint8。np.array(colors, np.uint8)[np.reshape(pr, [-1])]
:使用展平后的预测结果作为索引,从颜色数组中获取对应的颜色值。这样可以得到与预测结果相同大小的彩色值数组。np.reshape(..., [orininal_h, orininal_w, -1])
:将彩色值数组重新调整为原始图像的高度和宽度,以及通道数(这里使用-1表示自动计算通道数)。- 综上所述,这段代码的目的是将预测结果转换为彩色图像,以便可视化分割结果
seg_img = np.reshape(np.array(colors, np.uint8)[np.reshape(pr, [-1])], [orininal_h, orininal_w, -1])
image = Image.fromarray(np.uint8(seg_img))
image = Image.blend(old_img, image, 0.7)
import time
import copy
import colorsys
import numpy as np
from PIL import Image
from utils.utils import cvtColor, preprocess_input, resize_image, show_config
import torch
from torch import nn
import torch.nn.functional as F
import openvino
from openvino.inference_engine import IECore
import cv2
from openvino.inference_engine import IECore
name_classes = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
"diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train",
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Load the OpenVINO IR model
ir_model_path = "./model_data/models.xml"
ie = IECore()
net = ie.read_network(model=ir_model_path)
exec_net = ie.load_network(network=net, device_name="CPU")
mix_type = 0
colors = [(0, 0, 0), (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128), (0, 128, 128),
(128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0), (192, 128, 0), (64, 0, 128), (192, 0, 128),
(64, 128, 128), (192, 128, 128), (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128),
(128, 64, 12)]
def openvino_detect(image):
image = cvtColor(image)
old_img = copy.deepcopy(image)
orininal_h = np.array(image).shape[0]
orininal_w = np.array(image).shape[1]
image_data, nw, nh = resize_image(image, (512, 512))
image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)
input_blob = next(iter(net.input_info))
output_blob = next(iter(net.outputs))
res = exec_net.infer(inputs={input_blob: image_data})
pr = res[output_blob]
pr = np.squeeze(pr, axis=0) # numpy:[21,512,512]
# 改变numpy维度顺序,将类别信息放在最后一维
pr = np.transpose(pr,(1,2,0)) # numpy:[512,512,21]
pr = np.exp(pr) / np.sum(np.exp(pr), axis=-1, keepdims=True)
pr = pr[int((512 - nh) // 2): int((512 - nh) // 2 + nh), int((512 - nw) // 2): int((512 - nw) // 2 + nw)]
pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation=cv2.INTER_LINEAR)
pr = pr.argmax(axis=-1)
seg_img = np.reshape(np.array(colors, np.uint8)[np.reshape(pr, [-1])], [orininal_h, orininal_w, -1])
image = Image.fromarray(np.uint8(seg_img))
# image = Image.blend(old_img, image, 0.7)
return image
if __name__ == "__main__":
while True:
frame = cv2.imread("/home/pc/文档/deeplabv3-plus-pytorch-main/img/street.jpg")
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = Image.fromarray(np.uint8(frame))
start_time = time.time()
frame = np.array(openvino_detect(frame))
end_time = time.time()
print(f"推理耗时:{end_time - start_time} 秒")
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
cv2.imshow("video", frame)
if cv2.waitKey(1) & 0xff == ord('q'):
在使用 pth进行预测时,他的操作方法主要是通过torch自带的预测推理函数得出输出为tensor,转换维度后进行softmax,再转为numpy,再得出输出;而在使用onnx进行预测时,他的操作方法主要是通过onnxruntime的函数outputs = ort_session.run(None, {'images': image_data}),这里images'为转换为onnx时使用的input_name,image_data为输入图片,得到outputs后,pr = outputs[0]得到想要的信息,这里此时的pr为numpy,再根据pth预测时的操作,使用numpy的相关函数即可。