Source code for super_gradients.training.models.csp_darknet53

"""
CSP Darknet

credits: https://github.com/ultralytics
"""
import torch
import torch.nn as nn
from super_gradients.training.utils.utils import get_param, HpmStruct
from super_gradients.training.models.sg_module import SgModule


[docs]def autopad(kernel, padding=None): # PAD TO 'SAME' if padding is None: padding = kernel // 2 if isinstance(kernel, int) else [x // 2 for x in kernel] return padding
[docs]def width_multiplier(original, factor): return int(original * factor)
[docs]class NumClassesMissingException(Exception): pass
[docs]class Conv(nn.Module): # STANDARD CONVOLUTION def __init__(self, input_channels, output_channels, kernel=1, stride=1, padding=None, groups=1, activation_func_type: type = nn.Hardswish): super().__init__() self.conv = nn.Conv2d(input_channels, output_channels, kernel, stride, autopad(kernel, padding), groups=groups, bias=False) self.bn = nn.BatchNorm2d(output_channels) self.act = activation_func_type()
[docs] def forward(self, x): return self.act(self.bn(self.conv(x)))
[docs] def fuseforward(self, x): return self.act(self.conv(x))
[docs]class Bottleneck(nn.Module): # STANDARD BOTTLENECK def __init__(self, input_channels, output_channels, shortcut=True, groups=1, width_mult_factor: float = 1.0, activation_func_type: type = nn.Hardswish): super().__init__() input_channels = width_multiplier(input_channels, width_mult_factor) output_channels = width_multiplier(output_channels, width_mult_factor) hidden_channels = output_channels self.cv1 = Conv(input_channels, hidden_channels, 1, 1, activation_func_type=activation_func_type) self.cv2 = Conv(hidden_channels, output_channels, 3, 1, groups=groups, activation_func_type=activation_func_type) self.add = shortcut and input_channels == output_channels
[docs] def forward(self, x): return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
[docs]class C3(nn.Module): # CSP Bottleneck with 3 convolutions https://github.com/ultralytics/yolov5 def __init__(self, input_channels, output_channels, bottleneck_blocks_num=1, shortcut=True, groups=1, expansion=0.5, width_mult_factor: float = 1.0, depth_mult_factor: float = 1.0, activation_func_type: type = nn.SiLU): super().__init__() input_channels = width_multiplier(input_channels, width_mult_factor) output_channels = width_multiplier(output_channels, width_mult_factor) hidden_channels = int(output_channels * expansion) bottleneck_blocks_num = max(round(bottleneck_blocks_num * depth_mult_factor), 1) if bottleneck_blocks_num > 1 else bottleneck_blocks_num self.cv1 = Conv(input_channels, hidden_channels, 1, 1, activation_func_type=activation_func_type) self.cv2 = Conv(input_channels, hidden_channels, 1, 1, activation_func_type=activation_func_type) self.cv3 = Conv(2 * hidden_channels, output_channels, 1, activation_func_type=activation_func_type) self.m = nn.Sequential(*[Bottleneck(hidden_channels, hidden_channels, shortcut, groups, activation_func_type=activation_func_type) for _ in range(bottleneck_blocks_num)])
[docs] def forward(self, x): return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
[docs]class BottleneckCSP(nn.Module): # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks def __init__(self, input_channels, output_channels, bottleneck_blocks_num=1, shortcut=True, groups=1, expansion=0.5, width_mult_factor: float = 1.0, depth_mult_factor: float = 1.0): super().__init__() input_channels = width_multiplier(input_channels, width_mult_factor) output_channels = width_multiplier(output_channels, width_mult_factor) hidden_channels = int(output_channels * expansion) bottleneck_blocks_num = max(round(bottleneck_blocks_num * depth_mult_factor), 1) if bottleneck_blocks_num > 1 else bottleneck_blocks_num self.cv1 = Conv(input_channels, hidden_channels, 1, 1) self.cv2 = nn.Conv2d(input_channels, hidden_channels, 1, 1, bias=False) self.cv3 = nn.Conv2d(hidden_channels, hidden_channels, 1, 1, bias=False) self.cv4 = Conv(2 * hidden_channels, output_channels, 1, 1) self.bn = nn.BatchNorm2d(2 * hidden_channels) # APPLIED TO CAT(CV2, CV3) self.act = nn.LeakyReLU(0.1, inplace=True) self.m = nn.Sequential(*[Bottleneck(hidden_channels, hidden_channels, shortcut, groups) for _ in range(bottleneck_blocks_num)])
[docs] def forward(self, x): y1 = self.cv3(self.m(self.cv1(x))) y2 = self.cv2(x) return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
[docs]class SPP(nn.Module): # SPATIAL PYRAMID POOLING LAYER USED IN YOLOV3-SPP def __init__(self, input_channels, output_channels, k=(5, 9, 13), width_mult_factor: float = 1.0): super().__init__() input_channels = width_multiplier(input_channels, width_mult_factor) output_channels = width_multiplier(output_channels, width_mult_factor) hidden_channels = input_channels // 2 self.cv1 = Conv(input_channels, hidden_channels, 1, 1) self.cv2 = Conv(hidden_channels * (len(k) + 1), output_channels, 1, 1) self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
[docs] def forward(self, x): x = self.cv1(x) return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
[docs]class SPPF(nn.Module): # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher https://github.com/ultralytics/yolov5 # equivalent to SPP(k=(5, 9, 13)) def __init__(self, input_channels, output_channels, k: int = 5, width_mult_factor: float = 1.0, activation_func_type: type = nn.SiLU): super().__init__() input_channels = width_multiplier(input_channels, width_mult_factor) output_channels = width_multiplier(output_channels, width_mult_factor) hidden_channels = input_channels // 2 # hidden channels self.cv1 = Conv(input_channels, hidden_channels, 1, 1, activation_func_type=activation_func_type) self.cv2 = Conv(hidden_channels * 4, output_channels, 1, 1, activation_func_type=activation_func_type) self.maxpool = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
[docs] def forward(self, x): x = self.cv1(x) y1 = self.maxpool(x) y2 = self.maxpool(y1) return self.cv2(torch.cat([x, y1, y2, self.maxpool(y2)], 1))
[docs]class Focus(nn.Module): # FOCUS WH INFORMATION INTO C-SPACE def __init__(self, input_channels, output_channels, kernel=1, stride=1, padding=None, groups=1, width_mult_factor: float = 1.0): super().__init__() output_channels = width_multiplier(output_channels, width_mult_factor) self.conv = Conv(input_channels * 4, output_channels, kernel, stride, padding, groups)
[docs] def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
[docs]class ViewModule(nn.Module): """ Returns a reshaped version of the input, to be used in None-Backbone Mode """ def __init__(self, features=1024): super(ViewModule, self).__init__() self.features = features
[docs] def forward(self, x): return x.view(-1, self.features)
[docs]class CSPDarknet53(SgModule): def __init__(self, arch_params: HpmStruct): super().__init__() self.num_classes = arch_params.num_classes self.backbone_mode = get_param(arch_params, 'backbone_mode', False) self.depth_mult_factor = get_param(arch_params, 'depth_mult_factor', 1.) self.width_mult_factor = get_param(arch_params, 'width_mult_factor', 1.) self.channels_in = get_param(arch_params, 'channels_in', 3) self.struct = get_param(arch_params, 'backbone_struct', [3, 9, 9, 3]) width_mult = lambda channels: width_multiplier(channels, self.width_mult_factor) # # THE MODULES LIST IS APPROACHABLE FROM "OUTSIDE THE CLASS - SO WE CAN CHANGE IT'S STRUCTURE" self._modules_list = nn.ModuleList() # THE MODULES LIST IS APPROACHABLE FROM "OUTSIDE THE CLASS - SO WE CAN CHANGE IT'S STRUCTURE" self._modules_list.append(Focus(self.channels_in, 64, 3, width_mult_factor=self.width_mult_factor)) # 0 self._modules_list.append(Conv(width_mult(64), width_mult(128), 3, 2)) # 1 self._modules_list.append( BottleneckCSP(128, 128, self.struct[0], width_mult_factor=self.width_mult_factor, depth_mult_factor=self.depth_mult_factor)) # 2 self._modules_list.append(Conv(width_mult(128), width_mult(256), 3, 2)) # 3 self._modules_list.append( BottleneckCSP(256, 256, self.struct[1], width_mult_factor=self.width_mult_factor, depth_mult_factor=self.depth_mult_factor)) # 4 self._modules_list.append(Conv(width_mult(256), width_mult(512), 3, 2)) # 5 self._modules_list.append( BottleneckCSP(512, 512, self.struct[2], width_mult_factor=self.width_mult_factor, depth_mult_factor=self.depth_mult_factor)) # 6 self._modules_list.append(Conv(width_mult(512), width_mult(1024), 3, 2)) # 7 self._modules_list.append(SPP(1024, 1024, k=(5, 9, 13), width_mult_factor=self.width_mult_factor)) # 8 self._modules_list.append( BottleneckCSP(1024, 1024, self.struct[3], False, width_mult_factor=self.width_mult_factor, depth_mult_factor=self.depth_mult_factor)) # 9 if not self.backbone_mode: # IF NOT USED AS A BACKEND BUT AS A CLASSIFIER WE ADD THE CLASSIFICATION LAYERS self._modules_list.append(nn.AdaptiveAvgPool2d((1, 1))) self._modules_list.append(ViewModule(1024)) self._modules_list.append(nn.Linear(1024, self.num_classes))
[docs] def forward(self, x): return self._modules_list(x)