Source code for super_gradients.training.models.laddernet

import torch
import torch.nn as nn
import torch.nn.functional as F

up_kwargs = {'mode': 'bilinear', 'align_corners': True}


# from encoding.nn import SyncBatchNorm # FIXME - ORIGINAL CODE TORCH-ENCODING


[docs]class LadderBottleneck(nn.Module): """ResNet Bottleneck """ # pylint: disable=unused-argument expansion = 4 def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, previous_dilation=1, norm_layer=None): super().__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = norm_layer(planes) self.conv2 = nn.Conv2d( planes, planes, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False) self.bn2 = norm_layer(planes) self.conv3 = nn.Conv2d( planes, planes * 4, kernel_size=1, bias=False) self.bn3 = norm_layer(planes * 4) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.dilation = dilation self.stride = stride def _sum_each(self, x, y): assert (len(x) == len(y)) z = [] for i in range(len(x)): z.append(x[i] + y[i]) return z
[docs] def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out
[docs]class LadderResNet(nn.Module): """Dilated Pre-trained ResNet Model, which preduces the stride of 8 featuremaps at conv5. Parameters ---------- block : Block Class for the residual block. Options are BasicBlockV1, BottleneckV1. layers : list of int Numbers of layers in each block classes : int, default 1000 Number of classification classes. dilated : bool, default False Applying dilation strategy to pretrained ResNet yielding a stride-8 model, typically used in Semantic Segmentation. norm_layer : object Normalization layer used in backbone network (default: :class:`mxnet.gluon.nn.BatchNorm`; for Synchronized Cross-GPU BachNormalization). Reference: - He, Kaiming, et al. "Deep residual learning for image recognition." Proceedings of the IEEE conference on computer vision and pattern recognition. 2016. - Yu, Fisher, and Vladlen Koltun. "Multi-scale context aggregation by dilated convolutions." """ # pylint: disable=unused-variable # def __init__(self, block, layers, num_classes=1000, dilated=False, norm_layer=SyncBatchNorm): # FIXME - ORIGINAL CODE def __init__(self, block, layers, num_classes=1000, dilated=False, norm_layer=nn.BatchNorm2d): # FIXME - TIME MEASUREMENT CODE self.inplanes = 64 super().__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer) if dilated: self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2, norm_layer=norm_layer) self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, norm_layer=norm_layer) else: self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer) self.avgpool = nn.AvgPool2d(7) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): import math if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, norm_layer): m.weight.data.fill_(1) m.bias.data.zero_() def _make_layer(self, block, planes, blocks, stride=1, dilation=1, norm_layer=None): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), norm_layer(planes * block.expansion), ) layers = [] if dilation == 1 or dilation == 2: layers.append(block(self.inplanes, planes, stride, dilation=1, downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer)) elif dilation == 4: layers.append(block(self.inplanes, planes, stride, dilation=2, downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer)) else: raise RuntimeError("=> unknown dilation size: {}".format(dilation)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes, dilation=dilation, previous_dilation=dilation, norm_layer=norm_layer)) return nn.Sequential(*layers)
[docs] def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) x = x.view(x.size(0), -1) x = self.fc(x) return x
[docs]class LadderNetBackBone503433(LadderResNet): def __init__(self, num_classes: int): super().__init__(LadderBottleneck, [3, 4, 3, 3], num_classes=num_classes)
[docs]class LadderNetBackBone50(LadderResNet): def __init__(self, num_classes: int): super().__init__(LadderBottleneck, [3, 4, 6, 3], num_classes=num_classes)
[docs]class LadderNetBackBone101(LadderResNet): def __init__(self, num_classes: int): super().__init__(LadderBottleneck, [3, 4, 23, 3], num_classes=num_classes)
[docs]class BaseNet(nn.Module): def __init__(self, nclass, backbone, aux, se_loss, dilated=True, norm_layer=None, base_size=576, crop_size=608, mean=[.485, .456, .406], std=[.229, .224, .225], root='~/.encoding/models'): super(BaseNet, self).__init__() self.nclass = nclass self.aux = aux self.se_loss = se_loss self.mean = mean self.std = std self.base_size = base_size self.crop_size = crop_size self.image_size = self.crop_size # copying modules from pretrained models if backbone == 'resnet50': self.backbone = LadderNetBackBone50(num_classes=1000) elif backbone == 'resnet50_3433': self.backbone = LadderNetBackBone503433(num_classes=1000) elif backbone == 'resnet101': self.backbone = LadderNetBackBone101(num_classes=1000) # elif backbone == 'resnet152': # self.pretrained = resnet.resnet152(pretrained=True, dilated=dilated, # norm_layer=norm_layer, root=root) # elif backbone == 'resnet18': # self.pretrained = resnet.resnet18(pretrained=True, dilated=dilated, # norm_layer=norm_layer, root=root) # elif backbone == 'resnet34': # self.pretrained = resnet.resnet34(pretrained=True, dilated=dilated, # norm_layer=norm_layer, root=root) else: raise RuntimeError('unknown backbone: {}'.format(backbone)) # bilinear upsample options self._up_kwargs = up_kwargs
[docs] def base_forward(self, x): x = self.backbone.conv1(x) x = self.backbone.bn1(x) x = self.backbone.relu(x) x = self.backbone.maxpool(x) c1 = self.backbone.layer1(x) c2 = self.backbone.layer2(c1) c3 = self.backbone.layer3(c2) c4 = self.backbone.layer4(c3) return c1, c2, c3, c4
# def evaluate(self, x, target=None): # pred = self.forward(x) # if isinstance(pred, (tuple, list)): # pred = pred[0] # if target is None: # return pred # correct, labeled = batch_pix_accuracy(pred.data, target.data) # inter, union = batch_intersection_union(pred.data, target.data, self.nclass) # return correct, labeled, inter, union drop = 0.25
[docs]def conv3x3(in_planes, out_planes, stride=1): """3x3 convolution with padding""" return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=True)
[docs]class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, rate=1, downsample=None): super(BasicBlock, self).__init__() if inplanes != planes: self.conv0 = conv3x3(inplanes, planes, rate) self.inplanes = inplanes self.planes = planes self.conv1 = conv3x3(planes, planes, stride) self.bn1 = nn.BatchNorm2d(planes) self.relu = nn.ReLU(inplace=True) # self.conv2 = conv3x3(planes, planes) self.bn2 = nn.BatchNorm2d(planes) self.downsample = downsample self.stride = stride self.drop = nn.Dropout2d(p=drop)
[docs] def forward(self, x): if self.inplanes != self.planes: x = self.conv0(x) x = F.relu(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.drop(out) out1 = self.conv1(out) out1 = self.bn2(out1) # out1 = self.relu(out1) out2 = out1 + x return F.relu(out2)
[docs]class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride
[docs] def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out
[docs]class Initial_LadderBlock(nn.Module): def __init__(self, planes, layers, kernel=3, block=BasicBlock, inplanes=3): super().__init__() self.planes = planes self.layers = layers self.kernel = kernel self.padding = int((kernel - 1) / 2) self.inconv = nn.Conv2d(in_channels=inplanes, out_channels=planes, kernel_size=3, stride=1, padding=1, bias=True) self.in_bn = nn.BatchNorm2d(planes) # create module list for down branch self.down_module_list = nn.ModuleList() for i in range(0, layers): self.down_module_list.append(block(planes * (2 ** i), planes * (2 ** i))) # use strided conv instead of poooling self.down_conv_list = nn.ModuleList() for i in range(0, layers): self.down_conv_list.append( nn.Conv2d(planes * 2 ** i, planes * 2 ** (i + 1), stride=2, kernel_size=kernel, padding=self.padding)) # create module for bottom block self.bottom = block(planes * (2 ** layers), planes * (2 ** layers)) # create module list for up branch self.up_conv_list = nn.ModuleList() self.up_dense_list = nn.ModuleList() for i in range(0, layers): self.up_conv_list.append(nn.ConvTranspose2d(in_channels=planes * 2 ** (layers - i), out_channels=planes * 2 ** max(0, layers - i - 1), kernel_size=3, stride=2, padding=1, output_padding=1, bias=True)) self.up_dense_list.append( block(planes * 2 ** max(0, layers - i - 1), planes * 2 ** max(0, layers - i - 1)))
[docs] def forward(self, x): out = self.inconv(x) out = self.in_bn(out) out = F.relu(out) down_out = [] # down branch for i in range(0, self.layers): out = self.down_module_list[i](out) down_out.append(out) out = self.down_conv_list[i](out) out = F.relu(out) # bottom branch out = self.bottom(out) bottom = out # up branch up_out = [] up_out.append(bottom) for j in range(0, self.layers): out = self.up_conv_list[j](out) + down_out[self.layers - j - 1] # out = F.relu(out) out = self.up_dense_list[j](out) up_out.append(out) return up_out
[docs]class Decoder(nn.Module): def __init__(self, planes, layers, kernel=3, block=BasicBlock): super().__init__() self.planes = planes self.layers = layers self.kernel = kernel self.padding = int((kernel - 1) / 2) self.inconv = block(planes, planes) # create module for bottom block self.bottom = block(planes * (2 ** (layers - 1)), planes * (2 ** (layers - 1))) # create module list for up branch self.up_conv_list = nn.ModuleList() self.up_dense_list = nn.ModuleList() for i in range(0, layers - 1): self.up_conv_list.append( nn.ConvTranspose2d(planes * 2 ** (layers - 1 - i), planes * 2 ** max(0, layers - i - 2), kernel_size=3, stride=2, padding=1, output_padding=1, bias=True)) self.up_dense_list.append( block(planes * 2 ** max(0, layers - i - 2), planes * 2 ** max(0, layers - i - 2)))
[docs] def forward(self, x): # bottom branch out = self.bottom(x[-1]) bottom = out # up branch up_out = [] up_out.append(bottom) for j in range(0, self.layers - 1): out = self.up_conv_list[j](out) + x[self.layers - j - 2] # out = F.relu(out) out = self.up_dense_list[j](out) up_out.append(out) return up_out
[docs]class LadderBlock(nn.Module): def __init__(self, planes, layers, kernel=3, block=BasicBlock): super().__init__() self.planes = planes self.layers = layers self.kernel = kernel self.padding = int((kernel - 1) / 2) self.inconv = block(planes, planes) # create module list for down branch self.down_module_list = nn.ModuleList() for i in range(0, layers - 1): self.down_module_list.append(block(planes * (2 ** i), planes * (2 ** i))) # use strided conv instead of pooling self.down_conv_list = nn.ModuleList() for i in range(0, layers - 1): self.down_conv_list.append( nn.Conv2d(planes * 2 ** i, planes * 2 ** (i + 1), stride=2, kernel_size=kernel, padding=self.padding)) # create module for bottom block self.bottom = block(planes * (2 ** (layers - 1)), planes * (2 ** (layers - 1))) # create module list for up branch self.up_conv_list = nn.ModuleList() self.up_dense_list = nn.ModuleList() for i in range(0, layers - 1): self.up_conv_list.append( nn.ConvTranspose2d(planes * 2 ** (layers - i - 1), planes * 2 ** max(0, layers - i - 2), kernel_size=3, stride=2, padding=1, output_padding=1, bias=True)) self.up_dense_list.append( block(planes * 2 ** max(0, layers - i - 2), planes * 2 ** max(0, layers - i - 2)))
[docs] def forward(self, x): out = self.inconv(x[-1]) down_out = [] # down branch for i in range(0, self.layers - 1): out = out + x[-i - 1] out = self.down_module_list[i](out) down_out.append(out) out = self.down_conv_list[i](out) out = F.relu(out) # bottom branch out = self.bottom(out) bottom = out # up branch up_out = [] up_out.append(bottom) for j in range(0, self.layers - 1): out = self.up_conv_list[j](out) + down_out[self.layers - j - 2] # out = F.relu(out) out = self.up_dense_list[j](out) up_out.append(out) return up_out
[docs]class Final_LadderBlock(nn.Module): def __init__(self, planes, layers, kernel=3, block=BasicBlock, inplanes=3): super().__init__() self.block = LadderBlock(planes, layers, kernel=kernel, block=block)
[docs] def forward(self, x): out = self.block(x) return out[-1]
[docs]class FCNHead(nn.Module): def __init__(self, in_channels, out_channels, norm_layer): super(FCNHead, self).__init__() inter_channels = in_channels // 4 self.conv5 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False), norm_layer(inter_channels), nn.ReLU(), nn.Dropout2d(0.1, False), nn.Conv2d(inter_channels, out_channels, 1))
[docs] def forward(self, x): return self.conv5(x)
[docs]class LadderNet(BaseNet): def __init__(self, nclass, backbone, aux=True, se_loss=True, lateral=False, arch_params=None, # norm_layer=SyncBatchNorm, dilated=False, **kwargs): # FIXME - ORIGINAL CODE TORCH-ENCODING norm_layer=nn.BatchNorm2d, dilated=False, **kwargs): # FIXME - TIME MEASUREMENT CODE super().__init__(nclass, backbone, aux, se_loss, norm_layer=norm_layer, dilated=dilated, **kwargs) self.head = LadderHead(base_inchannels=256, base_outchannels=64, out_channels=nclass, norm_layer=norm_layer, se_loss=se_loss, nclass=nclass, up_kwargs=self._up_kwargs) if aux: self.auxlayer = FCNHead(1024, nclass, norm_layer=norm_layer)
[docs] def forward(self, x): imsize = x.size()[2:] features = self.base_forward(x) x = list(self.head(features)) x[0] = F.upsample(x[0], imsize, **self._up_kwargs) if self.aux: auxout = self.auxlayer(features[2]) auxout = F.upsample(auxout, imsize, **self._up_kwargs) x.append(auxout) return tuple(x)
[docs]class LadderHead(nn.Module): def __init__(self, base_inchannels, base_outchannels, out_channels, norm_layer, se_loss, nclass, up_kwargs): super(LadderHead, self).__init__() self.conv1 = nn.Conv2d(in_channels=base_inchannels, out_channels=base_outchannels, kernel_size=1, bias=False) self.conv2 = nn.Conv2d(in_channels=base_inchannels * 2, out_channels=base_outchannels * 2, kernel_size=1, bias=False) self.conv3 = nn.Conv2d(in_channels=base_inchannels * 2 ** 2, out_channels=base_outchannels * 2 ** 2, kernel_size=1, bias=False) self.conv4 = nn.Conv2d(in_channels=base_inchannels * 2 ** 3, out_channels=base_outchannels * 2 ** 3, kernel_size=1, bias=False) self.bn1 = norm_layer(base_outchannels) self.bn2 = norm_layer(base_outchannels * 2) self.bn3 = norm_layer(base_outchannels * 2 ** 2) self.bn4 = norm_layer(base_outchannels * 2 ** 3) self.decoder = Decoder(planes=base_outchannels, layers=4) self.ladder = LadderBlock(planes=base_outchannels, layers=4) self.final = nn.Conv2d(base_outchannels, out_channels, 1) self.se_loss = se_loss if self.se_loss: self.selayer = nn.Linear(base_outchannels * 2 ** 3, nclass)
[docs] def forward(self, x): x1, x2, x3, x4 = x out1 = self.conv1(x1) out1 = self.bn1(out1) out1 = F.relu(out1) out2 = self.conv2(x2) out2 = self.bn2(out2) out2 = F.relu(out2) out3 = self.conv3(x3) out3 = self.bn3(out3) out3 = F.relu(out3) out4 = self.conv4(x4) out4 = self.bn4(out4) out4 = F.relu(out4) out = self.decoder([out1, out2, out3, out4]) out = self.ladder(out) pred = [self.final(out[-1])] if self.se_loss: enc = F.max_pool2d(out[0], kernel_size=out[0].size()[2:]) enc = torch.squeeze(enc, -1) enc = torch.squeeze(enc, -1) se = self.selayer(enc) pred.append(se) return pred
[docs]class LadderNet50(LadderNet): def __init__(self, *args, **kwargs): super().__init__(backbone='resnet50', nclass=21, *args, **kwargs)
[docs]class LadderNet503433(LadderNet): def __init__(self, *args, **kwargs): super().__init__(backbone='resnet50_3433', nclass=21, *args, **kwargs)
[docs]class LadderNet101(LadderNet): def __init__(self, *args, **kwargs): super().__init__(backbone='resnet101', nclass=21, *args, **kwargs)