Source code for super_gradients.training.utils.ssd_utils

import itertools
from math import sqrt
import numpy as np
import torch
from torch.nn import functional as F

from super_gradients.training.utils.detection_utils import non_max_suppression, NMS_Type, \
    matrix_non_max_suppression, DetectionPostPredictionCallback


[docs]class DefaultBoxes(object): """ Default Boxes, (aka: anchor boxes or priors boxes) used by SSD model """ def __init__(self, fig_size, feat_size, steps, scales, aspect_ratios, scale_xy=0.1, scale_wh=0.2): self.feat_size = feat_size self.fig_size = fig_size self.scale_xy_ = scale_xy self.scale_wh_ = scale_wh # According to https://github.com/weiliu89/caffe # Calculation method slightly different from paper self.steps = steps self.scales = scales fk = fig_size / np.array(steps) self.aspect_ratios = aspect_ratios self.default_boxes = [] # size of feature and number of feature for idx, sfeat in enumerate(self.feat_size): sk1 = scales[idx] / fig_size sk2 = scales[idx + 1] / fig_size sk3 = sqrt(sk1 * sk2) all_sizes = [(sk1, sk1), (sk3, sk3)] for alpha in aspect_ratios[idx]: w, h = sk1 * sqrt(alpha), sk1 / sqrt(alpha) all_sizes.append((w, h)) all_sizes.append((h, w)) for w, h in all_sizes: for i, j in itertools.product(range(sfeat), repeat=2): cx, cy = (j + 0.5) / fk[idx], (i + 0.5) / fk[idx] self.default_boxes.append((cx, cy, w, h)) self.dboxes = torch.tensor(self.default_boxes, dtype=torch.float) self.dboxes.clamp_(min=0, max=1) # For IoU calculation self.dboxes_xyxy = self.dboxes.clone() self.dboxes_xyxy[:, 0] = self.dboxes[:, 0] - 0.5 * self.dboxes[:, 2] self.dboxes_xyxy[:, 1] = self.dboxes[:, 1] - 0.5 * self.dboxes[:, 3] self.dboxes_xyxy[:, 2] = self.dboxes[:, 0] + 0.5 * self.dboxes[:, 2] self.dboxes_xyxy[:, 3] = self.dboxes[:, 1] + 0.5 * self.dboxes[:, 3] @property def scale_xy(self): return self.scale_xy_ @property def scale_wh(self): return self.scale_wh_ def __call__(self, order="xyxy"): if order == "xyxy": return self.dboxes_xyxy if order == "xywh": return self.dboxes
[docs] @staticmethod def dboxes300_coco(): figsize = 300 feat_size = [38, 19, 10, 5, 3, 1] steps = [8, 16, 32, 64, 100, 300] # use the scales here: https://github.com/amdegroot/ssd.pytorch/blob/master/data/config.py scales = [21, 45, 99, 153, 207, 261, 315] aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]] return DefaultBoxes(figsize, feat_size, steps, scales, aspect_ratios)
[docs] @staticmethod def dboxes300_coco_from19(): """ This dbox configuration is a bit different from the original dboxes300_coco It is suitable for a network taking the first skip connection from a 19x19 layer (instead of 38x38 in the original paper). This offers less coverage for small objects but more aspect ratios options to larger objects (the original paper supports object starting from size 21 pixels, while this config support objects starting from 60 pixels) """ # https://github.com/qfgaohao/pytorch-ssd/blob/f61ab424d09bf3d4bb3925693579ac0a92541b0d/vision/ssd/config/mobilenetv1_ssd_config.py figsize = 300 feat_size = [19, 10, 5, 3, 2, 1] steps = [16, 32, 64, 100, 150, 300] scales = [60, 105, 150, 195, 240, 285, 330] aspect_ratios = [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]] return DefaultBoxes(figsize, feat_size, steps, scales, aspect_ratios)
[docs] @staticmethod def dboxes256_coco(): figsize = 256 feat_size = [32, 16, 8, 4, 2, 1] steps = [8, 16, 32, 64, 128, 256] # use the scales here: https://github.com/amdegroot/ssd.pytorch/blob/master/data/config.py scales = [18, 38, 84, 131, 1177, 223, 269] aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]] return DefaultBoxes(figsize, feat_size, steps, scales, aspect_ratios)
[docs]class SSDPostPredictCallback(DetectionPostPredictionCallback): """ post prediction callback module to convert and filter predictions coming from the SSD net to a format used by all other detection models """ def __init__(self, conf: float = 0.1, iou: float = 0.45, classes: list = None, max_predictions: int = 300, nms_type: NMS_Type = NMS_Type.ITERATIVE, dboxes: DefaultBoxes = DefaultBoxes.dboxes300_coco(), device='cuda'): """ :param conf: confidence threshold :param iou: IoU threshold :param classes: (optional list) filter by class :param nms_type: the type of nms to use (iterative or matrix) """ super(SSDPostPredictCallback, self).__init__() self.conf = conf self.iou = iou self.nms_type = nms_type self.classes = classes self.max_predictions = max_predictions self.dboxes_xywh = dboxes('xywh').to(device) self.scale_xy = dboxes.scale_xy self.scale_wh = dboxes.scale_wh self.img_size = dboxes.fig_size
[docs] def forward(self, x, device=None): bboxes_in = x[0] scores_in = x[1] bboxes_in = bboxes_in.permute(0, 2, 1) scores_in = scores_in.permute(0, 2, 1) bboxes_in[:, :, :2] *= self.scale_xy bboxes_in[:, :, 2:] *= self.scale_wh # CONVERT RELATIVE LOCATIONS INTO ABSOLUTE LOCATION (OUTPUT LOCATIONS ARE RELATIVE TO THE DBOXES) bboxes_in[:, :, :2] = bboxes_in[:, :, :2] * self.dboxes_xywh[:, 2:] + self.dboxes_xywh[:, :2] bboxes_in[:, :, 2:] = bboxes_in[:, :, 2:].exp() * self.dboxes_xywh[:, 2:] scores_in = F.softmax(scores_in, dim=-1) # TODO softmax without first item? # REPLACE THE CONFIDENCE OF CLASS NONE WITH OBJECT CONFIDENCE # SSD DOES NOT OUTPUT OBJECT CONFIDENCE, REQUIRED FOR THE NMS scores_in[:, :, 0] = torch.max(scores_in[:, :, 1:], dim=2)[0] bboxes_in *= self.img_size nms_input = torch.cat((bboxes_in, scores_in), dim=2) if self.nms_type == NMS_Type.ITERATIVE: nms_res = non_max_suppression(nms_input, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) else: nms_res = matrix_non_max_suppression(nms_input, conf_thres=self.conf, max_num_of_detections=self.max_predictions) # NMS OUTPUT A 0-BASED CLASS LABEL, BUT SSD WORKS WITH 1-BASED CLASS LABEL for t in nms_res: if t is not None: t[:, 5] += 1 return nms_res