targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
## 把正样本所对应的锚框所对应的类别的列置为1
# aim = torch. randint ( 0 , 1 , ( 1 , 80 ) )
# tensor ( [ [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
# 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
# 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
# 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ] )
# aim[ 0 , 12 ] = 1
# tensor ( [ [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
# 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
# 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
# 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ] )
result = torch.where(condition, x, y)
import torch
condition = torch. tensor ( [ True, False, True, False] )
x = torch. tensor ( [ 1 , 2 , 3 , 4 ] )
y = torch. tensor ( [ 10 , 20 , 30 , 40 ] )
result = torch. where ( condition, x, y)
print ( result)
tensor ( [ 1 , 20 , 3 , 40 ] )
targets = torch. tensor ( [ 1 , 0 , 1 , 0 , 1 ] )
torch. eq ( targets, 1. )
Out[ 20 ] : tensor ( [ True, False, True, False, True] )
retinaNet FocalLoss源码详解
class FocalLoss ( nn. Module) :
#def __init__ ( self) :
def forward ( self, classifications, regressions, anchors, annotations) :
alpha = 0.25
gamma = 2.0
batch_size = classifications. shape[ 0 ]
classification_losses = [ ]
regression_losses = [ ]
anchor = anchors[ 0 , : , : ]
anchor_widths = anchor[ : , 2 ] - anchor[ : , 0 ]
anchor_heights = anchor[ : , 3 ] - anchor[ : , 1 ]
anchor_ctr_x = anchor[ : , 0 ] + 0.5 * anchor_widths
anchor_ctr_y = anchor[ : , 1 ] + 0.5 * anchor_heights
for j in range ( batch_size) :
classification = classifications[ j, : , : ]
regression = regressions[ j, : , : ]
bbox_annotation = annotations[ j, : , : ]
bbox_annotation = bbox_annotation[ bbox_annotation[ : , 4 ] != - 1 ]
classification = torch. clamp ( classification, 1e-4 , 1.0 - 1e-4 )
if bbox_annotation. shape[ 0 ] == 0 :
if torch. cuda. is_available ( ) :
alpha_factor = torch. ones ( classification. shape) . cuda ( ) * alpha
alpha_factor = 1. - alpha_factor
focal_weight = classification
focal_weight = alpha_factor * torch. pow ( focal_weight, gamma)
bce = - ( torch. log ( 1.0 - classification) )
# cls_loss = focal_weight * torch. pow ( bce, gamma)
cls_loss = focal_weight * bce
classification_losses. append ( cls_loss. sum ( ) )
regression_losses. append ( torch. tensor ( 0 ) . float ( ) . cuda ( ) )
else :
alpha_factor = torch. ones ( classification. shape) * alpha
alpha_factor = 1. - alpha_factor
focal_weight = classification
focal_weight = alpha_factor * torch. pow ( focal_weight, gamma)
bce = - ( torch. log ( 1.0 - classification) )
# cls_loss = focal_weight * torch. pow ( bce, gamma)
cls_loss = focal_weight * bce
classification_losses. append ( cls_loss. sum ( ) )
regression_losses. append ( torch. tensor ( 0 ) . float ( ) )
continue
IoU = calc_iou ( anchors[ 0 , : , : ] , bbox_annotation[ : , : 4 ] ) # num_anchors x num_annotations
IoU_max, IoU_argmax = torch. max ( IoU, dim= 1 ) # num_anchors x 1
#Iou_max 每一行的最大值,即锚框与标注框iou的最大值,iou_argmax代表是第几个标注框
#import pdb
#pdb. set_trace ( )
# compute the loss for classification
targets = torch. ones ( classification. shape) * - 1
if torch. cuda. is_available ( ) :
targets = targets. cuda ( )
targets[ torch. lt ( IoU_max, 0.4 ) , : ] = 0
#把锚框与目标框iou低于0.4 的targets值置为0
positive_indices = torch. ge ( IoU_max, 0.5 )
#iou_max 大于0.5 的置为True
num_positive_anchors = positive_indices. sum ( )
assigned_annotations = bbox_annotation[ IoU_argmax, : ]
##assigned_annotations 存放与锚框 iou最大的标注框
targets[ positive_indices, : ] = 0
targets[ positive_indices, assigned_annotations[ positive_indices, 4 ] . long ( ) ] = 1
## 把正样本所对应的锚框所对应的类别的列置为1
# aim = torch. randint ( 0 , 1 , ( 1 , 80 ) )
# tensor ( [ [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
# 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
# 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
# 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ] )
# aim[ 0 , 12 ] = 1
# tensor ( [ [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
# 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
# 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
# 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ] )
if torch. cuda. is_available ( ) :
alpha_factor = torch. ones ( targets. shape) . cuda ( ) * alpha
else :
alpha_factor = torch. ones ( targets. shape) * alpha
alpha_factor = torch. where ( torch. eq ( targets, 1. ) , alpha_factor, 1. - alpha_factor)
#对应为1 的位置设置为alpha_factor
focal_weight = torch. where ( torch. eq ( targets, 1. ) , 1. - classification, classification)
focal_weight = alpha_factor * torch. pow ( focal_weight, gamma)
bce = - ( targets * torch. log ( classification) + ( 1.0 - targets) * torch. log ( 1.0 - classification) )
#注意这里的log是以e为底
#二元交叉熵损失(Binary Cross Entropy, BCE)
# cls_loss = focal_weight * torch. pow ( bce, gamma)
# 80 中为一位都使用该公式进行计算
cls_loss = focal_weight * bce
if torch. cuda. is_available ( ) :
cls_loss = torch. where ( torch. ne ( targets, - 1.0 ) , cls_loss, torch. zeros ( cls_loss. shape) . cuda ( ) )
else :
cls_loss = torch. where ( torch. ne ( targets, - 1.0 ) , cls_loss, torch. zeros ( cls_loss. shape) )
temp = cls_loss. sum ( ) / torch. clamp ( num_positive_anchors. float ( ) , min= 1.0 )
classification_losses. append ( cls_loss. sum ( ) / torch. clamp ( num_positive_anchors. float ( ) , min= 1.0 ) )
# compute the loss for regression
if positive_indices. sum ( ) > 0 :
assigned_annotations = assigned_annotations[ positive_indices, : ]
anchor_widths_pi = anchor_widths[ positive_indices]
#把正样本的anchor_widths拿出来
#注意理解anchor_widths[ positive_indices]
anchor_heights_pi = anchor_heights[ positive_indices]
anchor_ctr_x_pi = anchor_ctr_x[ positive_indices]
anchor_ctr_y_pi = anchor_ctr_y[ positive_indices]
gt_widths = assigned_annotations[ : , 2 ] - assigned_annotations[ : , 0 ]
gt_heights = assigned_annotations[ : , 3 ] - assigned_annotations[ : , 1 ]
gt_ctr_x = assigned_annotations[ : , 0 ] + 0.5 * gt_widths
gt_ctr_y = assigned_annotations[ : , 1 ] + 0.5 * gt_heights
# clip widths to 1
gt_widths = torch. clamp ( gt_widths, min= 1 )
gt_heights = torch. clamp ( gt_heights, min= 1 )
targets_dx = ( gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
# 整个表达式
# targets_dx = ( gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
# 的意义是计算目标框和锚框在x方向上的相对位移,并将其归一化到锚框的宽度上。
#
# 具体地说:
# ( gt_ctr_x - anchor_ctr_x_pi)
# 计算了真实目标框和锚框在x方向上的中心点的差值。
# 除以anchor_widths_pi 将这个差值归一化到锚框的宽度上。
targets_dy = ( gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
targets_dw = torch. log ( gt_widths / anchor_widths_pi)
targets_dh = torch. log ( gt_heights / anchor_heights_pi)
# 表达式gt_widths / anchor_widths_pi
# 计算了真实目标框宽度和锚框宽度之间的比例。然后,对这个比例取自然对数(torch. log),得到的结果
# targets_dw是对数空间中的相对宽度差。
# 这样的计算通常在目标检测任务中用于计算宽度方向上的损失。对数变换有助于处理不同尺度的宽度,因为当宽度差异很大时,
# 对数尺度上的差异会变得更加均匀。此外,对数变换还有助于模型更好地学习如何调整锚框的宽度以匹配真实的目标框。
# 例如,如果gt_widths是100 ,而anchor_widths_pi是50 ,那么gt_widths / anchor_widths_pi将是2 ,而
# torch. log ( 2 ) 大约是0.693 。这意味着目标框的宽度是锚框宽度的两倍,而在对数尺度上,这个差异被表示为大约0.693 。
targets = torch. stack ( ( targets_dx, targets_dy, targets_dw, targets_dh) )
targets = targets. t ( )
if torch. cuda. is_available ( ) :
targets = targets/ torch. Tensor ( [ [ 0.1 , 0.1 , 0.2 , 0.2 ] ] ) . cuda ( )
else :
targets = targets/ torch. Tensor ( [ [ 0.1 , 0.1 , 0.2 , 0.2 ] ] )
negative_indices = 1 + ( ~ positive_indices)
regression_diff = torch. abs ( targets - regression[ positive_indices, : ] )
regression_loss = torch. where (
torch. le ( regression_diff, 1.0 / 9.0 ) ,
0.5 * 9.0 * torch. pow ( regression_diff, 2 ) ,
regression_diff - 0.5 / 9.0
)
regression_losses. append ( regression_loss. mean ( ) )
else :
if torch. cuda. is_available ( ) :
regression_losses. append ( torch. tensor ( 0 ) . float ( ) . cuda ( ) )
else :
regression_losses. append ( torch. tensor ( 0 ) . float ( ) )
return torch. stack ( classification_losses) . mean ( dim= 0 , keepdim= True) , torch. stack ( regression_losses) . mean ( dim= 0 , keepdim= True)