Source code for super_gradients.training.models.resnet

"""ResNet in PyTorch.
For Pre-activation ResNet, see 'preact_resnet.py'.
Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Deep Residual Learning for Image Recognition. arXiv:1512.03385

Code adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
"""

import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict

from super_gradients.training.models import SgModule


[docs]def width_multiplier(original, factor): return int(original * factor)
[docs]class BasicBlock(nn.Module): def __init__(self, in_planes, planes, stride=1, expansion=1, final_relu=True): super(BasicBlock, self).__init__() self.expansion = expansion self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.final_relu = final_relu self.shortcut = nn.Sequential() if stride != 1 or in_planes != self.expansion * planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion * planes) )
[docs] def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.bn2(self.conv2(out)) out += self.shortcut(x) if self.final_relu: out = F.relu(out) return out
[docs]class Bottleneck(nn.Module): def __init__(self, in_planes, planes, stride=1, expansion=4, final_relu=True): super(Bottleneck, self).__init__() self.expansion = expansion self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(self.expansion * planes) self.final_relu = final_relu self.shortcut = nn.Sequential() if stride != 1 or in_planes != self.expansion * planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion * planes) )
[docs] def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = F.relu(self.bn2(self.conv2(out))) out = self.bn3(self.conv3(out)) out += self.shortcut(x) if self.final_relu: out = F.relu(out) return out
[docs]class CifarResNet(SgModule): def __init__(self, block, num_blocks, num_classes=10, width_mult=1, expansion=1): super(CifarResNet, self).__init__() self.expansion = expansion self.structure = [num_blocks, width_mult] self.in_planes = width_multiplier(64, width_mult) self.conv1 = nn.Conv2d(3, width_multiplier(64, width_mult), kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(width_multiplier(64, width_mult)) self.layer1 = self._make_layer(block, width_multiplier(64, width_mult), num_blocks[0], stride=1) self.layer2 = self._make_layer(block, width_multiplier(128, width_mult), num_blocks[1], stride=2) self.layer3 = self._make_layer(block, width_multiplier(256, width_mult), num_blocks[2], stride=2) self.layer4 = self._make_layer(block, width_multiplier(512, width_mult), num_blocks[3], stride=2) self.avgpool = nn.AdaptiveAvgPool2d(1) self.linear = nn.Linear(width_multiplier(512, width_mult) * self.expansion, num_classes) def _make_layer(self, block, planes, num_blocks, stride): strides = [stride] + [1] * (num_blocks - 1) layers = [] if num_blocks == 0: # When the number of blocks is zero but spatial dimension and/or number of filters about to change we put 1 # 3X3 conv layer to make this change to the new dimensions. if stride != 1 or self.in_planes != planes: layers.append(nn.Sequential( nn.Conv2d(self.in_planes, planes, kernel_size=3, stride=stride, bias=False, padding=1), nn.BatchNorm2d(planes)) ) self.in_planes = planes else: for stride in strides: layers.append(block(self.in_planes, planes, stride)) self.in_planes = planes * self.expansion return nn.Sequential(*layers)
[docs] def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) out = self.avgpool(out) out = out.view(out.size(0), -1) out = self.linear(out) return out
[docs]class ResNet(SgModule): def __init__(self, block, num_blocks: list, num_classes: int = 10, width_mult: float = 1, expansion: int = 1, input_batchnorm: bool = False, backbone_mode: bool = False): super(ResNet, self).__init__() self.expansion = expansion self.backbone_mode = backbone_mode self.structure = [num_blocks, width_mult] self.in_planes = width_multiplier(64, width_mult) self.input_batchnorm = input_batchnorm if self.input_batchnorm: self.bn0 = nn.BatchNorm2d(3) self.conv1 = nn.Conv2d(3, width_multiplier(64, width_mult), kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(width_multiplier(64, width_mult)) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, width_multiplier(64, width_mult), num_blocks[0], stride=1) self.layer2 = self._make_layer(block, width_multiplier(128, width_mult), num_blocks[1], stride=2) self.layer3 = self._make_layer(block, width_multiplier(256, width_mult), num_blocks[2], stride=2) self.layer4 = self._make_layer(block, width_multiplier(512, width_mult), num_blocks[3], stride=2) if not self.backbone_mode: # IF RESNET IS IN BACK_BONE MODE WE DON'T NEED THE FINAL CLASSIFIER LAYERS, BUT ONLY THE NET BLOCK STRUCTURE self.linear = nn.Linear(width_multiplier(512, width_mult) * self.expansion, num_classes) self.avgpool = nn.AdaptiveAvgPool2d(1) def _make_layer(self, block, planes, num_blocks, stride): strides = [stride] + [1] * (num_blocks - 1) layers = [] if num_blocks == 0: # When the number of blocks is zero but spatial dimension and/or number of filters about to change we put 1 # 3X3 conv layer to make this change to the new dimensions. if stride != 1 or self.in_planes != planes: layers.append(nn.Sequential( nn.Conv2d(self.in_planes, planes, kernel_size=3, stride=stride, bias=False, padding=1), nn.BatchNorm2d(planes)) ) self.in_planes = planes else: for stride in strides: layers.append(block(self.in_planes, planes, stride)) self.in_planes = planes * self.expansion return nn.Sequential(*layers)
[docs] def forward(self, x): if self.input_batchnorm: x = self.bn0(x) out = F.relu(self.bn1(self.conv1(x))) out = self.maxpool(out) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) if not self.backbone_mode: # IF RESNET IS *NOT* IN BACK_BONE MODE WE NEED THE FINAL CLASSIFIER LAYERS OUTPUTS out = self.avgpool(out) out = out.squeeze(dim=2).squeeze(dim=2) out = self.linear(out) return out
[docs] def load_state_dict(self, state_dict, strict=True): """ load_state_dict - Overloads the base method and calls it to load a modified dict for usage as a backbone :param state_dict: The state_dict to load :param strict: strict loading (see super() docs) """ pretrained_model_weights_dict = state_dict.copy() if self.backbone_mode: # FIRST LET'S POP THE LAST TWO LAYERS - NO NEED TO LOAD THEIR VALUES SINCE THEY ARE IRRELEVANT AS A BACKBONE pretrained_model_weights_dict.popitem() pretrained_model_weights_dict.popitem() pretrained_backbone_weights_dict = OrderedDict() for layer_name, weights in pretrained_model_weights_dict.items(): # GET THE LAYER NAME WITHOUT THE 'module.' PREFIX name_without_module_prefix = layer_name.split('module.')[1] # MAKE SURE THESE ARE NOT THE FINAL LAYERS pretrained_backbone_weights_dict[name_without_module_prefix] = weights # RETURNING THE UNMODIFIED/MODIFIED STATE DICT DEPENDING ON THE backbone_mode VALUE super().load_state_dict(pretrained_backbone_weights_dict, strict) else: super().load_state_dict(pretrained_model_weights_dict, strict)
[docs]def ResNet18(arch_params, num_classes=None, backbone_mode=None): return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes or arch_params.num_classes, backbone_mode=backbone_mode)
[docs]def ResNet18Cifar(arch_params, num_classes=None): return CifarResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes or arch_params.num_classes)
[docs]def ResNet34(arch_params, num_classes=None, backbone_mode=None): return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes or arch_params.num_classes, backbone_mode=backbone_mode)
[docs]def ResNet50(arch_params, num_classes=None, backbone_mode=None): return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes or arch_params.num_classes, backbone_mode=backbone_mode, expansion=4)
[docs]def ResNet50_3343(arch_params, num_classes=None, backbone_mode=None): return ResNet(Bottleneck, [3, 3, 4, 3], num_classes=num_classes or arch_params.num_classes, backbone_mode=backbone_mode, expansion=4)
[docs]def ResNet101(arch_params, num_classes=None, backbone_mode=None): return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes or arch_params.num_classes, backbone_mode=backbone_mode, expansion=4)
[docs]def ResNet152(arch_params, num_classes=None, backbone_mode=None): return ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes or arch_params.num_classes, backbone_mode=backbone_mode, expansion=4)
[docs]def CustomizedResnetCifar(arch_params, num_classes=None): return CifarResNet(BasicBlock, arch_params.structure, width_mult=arch_params.width_mult, num_classes=num_classes or arch_params.num_classes)
[docs]def CustomizedResnet50Cifar(arch_params, num_classes=None): return CifarResNet(Bottleneck, arch_params.structure, width_mult=arch_params.width_mult, num_classes=num_classes or arch_params.num_classes, expansion=4)
[docs]def CustomizedResnet(arch_params, num_classes=None, backbone_mode=None): return ResNet(BasicBlock, arch_params.structure, width_mult=arch_params.width_mult, num_classes=num_classes or arch_params.num_classes, backbone_mode=backbone_mode)
[docs]def CustomizedResnet50(arch_params, num_classes=None, backbone_mode=None): return ResNet(Bottleneck, arch_params.structure, width_mult=arch_params.width_mult, num_classes=num_classes or arch_params.num_classes, backbone_mode=backbone_mode, expansion=4)