import torch
import torch.nn as nn
import torch.nn.functional as F
up_kwargs = {'mode': 'bilinear', 'align_corners': True}
# from encoding.nn import SyncBatchNorm # FIXME - ORIGINAL CODE TORCH-ENCODING
[docs]class LadderBottleneck(nn.Module):
"""ResNet Bottleneck
"""
# pylint: disable=unused-argument
expansion = 4
def __init__(self, inplanes, planes, stride=1, dilation=1,
downsample=None, previous_dilation=1, norm_layer=None):
super().__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = norm_layer(planes)
self.conv2 = nn.Conv2d(
planes, planes, kernel_size=3, stride=stride,
padding=dilation, dilation=dilation, bias=False)
self.bn2 = norm_layer(planes)
self.conv3 = nn.Conv2d(
planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = norm_layer(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.dilation = dilation
self.stride = stride
def _sum_each(self, x, y):
assert (len(x) == len(y))
z = []
for i in range(len(x)):
z.append(x[i] + y[i])
return z
[docs] def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
[docs]class LadderResNet(nn.Module):
"""Dilated Pre-trained ResNet Model, which preduces the stride of 8 featuremaps at conv5.
Parameters
----------
block : Block
Class for the residual block. Options are BasicBlockV1, BottleneckV1.
layers : list of int
Numbers of layers in each block
classes : int, default 1000
Number of classification classes.
dilated : bool, default False
Applying dilation strategy to pretrained ResNet yielding a stride-8 model,
typically used in Semantic Segmentation.
norm_layer : object
Normalization layer used in backbone network (default: :class:`mxnet.gluon.nn.BatchNorm`;
for Synchronized Cross-GPU BachNormalization).
Reference:
- He, Kaiming, et al. "Deep residual learning for image recognition." Proceedings of the IEEE conference on computer vision and pattern recognition. 2016.
- Yu, Fisher, and Vladlen Koltun. "Multi-scale context aggregation by dilated convolutions."
"""
# pylint: disable=unused-variable
# def __init__(self, block, layers, num_classes=1000, dilated=False, norm_layer=SyncBatchNorm): # FIXME - ORIGINAL CODE
def __init__(self, block, layers, num_classes=1000, dilated=False,
norm_layer=nn.BatchNorm2d): # FIXME - TIME MEASUREMENT CODE
self.inplanes = 64
super().__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer)
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer)
if dilated:
self.layer3 = self._make_layer(block, 256, layers[2], stride=1,
dilation=2, norm_layer=norm_layer)
self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
dilation=4, norm_layer=norm_layer)
else:
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
norm_layer=norm_layer)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
norm_layer=norm_layer)
self.avgpool = nn.AvgPool2d(7)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
import math
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, norm_layer):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1, dilation=1, norm_layer=None):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
norm_layer(planes * block.expansion),
)
layers = []
if dilation == 1 or dilation == 2:
layers.append(block(self.inplanes, planes, stride, dilation=1,
downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer))
elif dilation == 4:
layers.append(block(self.inplanes, planes, stride, dilation=2,
downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer))
else:
raise RuntimeError("=> unknown dilation size: {}".format(dilation))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, dilation=dilation, previous_dilation=dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
[docs] def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
[docs]class LadderNetBackBone503433(LadderResNet):
def __init__(self, num_classes: int):
super().__init__(LadderBottleneck, [3, 4, 3, 3], num_classes=num_classes)
[docs]class LadderNetBackBone50(LadderResNet):
def __init__(self, num_classes: int):
super().__init__(LadderBottleneck, [3, 4, 6, 3], num_classes=num_classes)
[docs]class LadderNetBackBone101(LadderResNet):
def __init__(self, num_classes: int):
super().__init__(LadderBottleneck, [3, 4, 23, 3], num_classes=num_classes)
[docs]class BaseNet(nn.Module):
def __init__(self, nclass, backbone, aux, se_loss, dilated=True, norm_layer=None,
base_size=576, crop_size=608, mean=[.485, .456, .406],
std=[.229, .224, .225], root='~/.encoding/models'):
super(BaseNet, self).__init__()
self.nclass = nclass
self.aux = aux
self.se_loss = se_loss
self.mean = mean
self.std = std
self.base_size = base_size
self.crop_size = crop_size
self.image_size = self.crop_size
# copying modules from pretrained models
if backbone == 'resnet50':
self.backbone = LadderNetBackBone50(num_classes=1000)
elif backbone == 'resnet50_3433':
self.backbone = LadderNetBackBone503433(num_classes=1000)
elif backbone == 'resnet101':
self.backbone = LadderNetBackBone101(num_classes=1000)
# elif backbone == 'resnet152':
# self.pretrained = resnet.resnet152(pretrained=True, dilated=dilated,
# norm_layer=norm_layer, root=root)
# elif backbone == 'resnet18':
# self.pretrained = resnet.resnet18(pretrained=True, dilated=dilated,
# norm_layer=norm_layer, root=root)
# elif backbone == 'resnet34':
# self.pretrained = resnet.resnet34(pretrained=True, dilated=dilated,
# norm_layer=norm_layer, root=root)
else:
raise RuntimeError('unknown backbone: {}'.format(backbone))
# bilinear upsample options
self._up_kwargs = up_kwargs
[docs] def base_forward(self, x):
x = self.backbone.conv1(x)
x = self.backbone.bn1(x)
x = self.backbone.relu(x)
x = self.backbone.maxpool(x)
c1 = self.backbone.layer1(x)
c2 = self.backbone.layer2(c1)
c3 = self.backbone.layer3(c2)
c4 = self.backbone.layer4(c3)
return c1, c2, c3, c4
# def evaluate(self, x, target=None):
# pred = self.forward(x)
# if isinstance(pred, (tuple, list)):
# pred = pred[0]
# if target is None:
# return pred
# correct, labeled = batch_pix_accuracy(pred.data, target.data)
# inter, union = batch_intersection_union(pred.data, target.data, self.nclass)
# return correct, labeled, inter, union
drop = 0.25
[docs]def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=True)
[docs]class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, rate=1, downsample=None):
super(BasicBlock, self).__init__()
if inplanes != planes:
self.conv0 = conv3x3(inplanes, planes, rate)
self.inplanes = inplanes
self.planes = planes
self.conv1 = conv3x3(planes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
# self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
self.drop = nn.Dropout2d(p=drop)
[docs] def forward(self, x):
if self.inplanes != self.planes:
x = self.conv0(x)
x = F.relu(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.drop(out)
out1 = self.conv1(out)
out1 = self.bn2(out1)
# out1 = self.relu(out1)
out2 = out1 + x
return F.relu(out2)
[docs]class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
[docs] def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
[docs]class Initial_LadderBlock(nn.Module):
def __init__(self, planes, layers, kernel=3, block=BasicBlock, inplanes=3):
super().__init__()
self.planes = planes
self.layers = layers
self.kernel = kernel
self.padding = int((kernel - 1) / 2)
self.inconv = nn.Conv2d(in_channels=inplanes, out_channels=planes,
kernel_size=3, stride=1, padding=1, bias=True)
self.in_bn = nn.BatchNorm2d(planes)
# create module list for down branch
self.down_module_list = nn.ModuleList()
for i in range(0, layers):
self.down_module_list.append(block(planes * (2 ** i), planes * (2 ** i)))
# use strided conv instead of poooling
self.down_conv_list = nn.ModuleList()
for i in range(0, layers):
self.down_conv_list.append(
nn.Conv2d(planes * 2 ** i, planes * 2 ** (i + 1), stride=2, kernel_size=kernel,
padding=self.padding))
# create module for bottom block
self.bottom = block(planes * (2 ** layers), planes * (2 ** layers))
# create module list for up branch
self.up_conv_list = nn.ModuleList()
self.up_dense_list = nn.ModuleList()
for i in range(0, layers):
self.up_conv_list.append(nn.ConvTranspose2d(in_channels=planes * 2 ** (layers - i),
out_channels=planes * 2 ** max(0, layers - i - 1),
kernel_size=3,
stride=2, padding=1, output_padding=1, bias=True))
self.up_dense_list.append(
block(planes * 2 ** max(0, layers - i - 1), planes * 2 ** max(0, layers - i - 1)))
[docs] def forward(self, x):
out = self.inconv(x)
out = self.in_bn(out)
out = F.relu(out)
down_out = []
# down branch
for i in range(0, self.layers):
out = self.down_module_list[i](out)
down_out.append(out)
out = self.down_conv_list[i](out)
out = F.relu(out)
# bottom branch
out = self.bottom(out)
bottom = out
# up branch
up_out = []
up_out.append(bottom)
for j in range(0, self.layers):
out = self.up_conv_list[j](out) + down_out[self.layers - j - 1]
# out = F.relu(out)
out = self.up_dense_list[j](out)
up_out.append(out)
return up_out
[docs]class Decoder(nn.Module):
def __init__(self, planes, layers, kernel=3, block=BasicBlock):
super().__init__()
self.planes = planes
self.layers = layers
self.kernel = kernel
self.padding = int((kernel - 1) / 2)
self.inconv = block(planes, planes)
# create module for bottom block
self.bottom = block(planes * (2 ** (layers - 1)), planes * (2 ** (layers - 1)))
# create module list for up branch
self.up_conv_list = nn.ModuleList()
self.up_dense_list = nn.ModuleList()
for i in range(0, layers - 1):
self.up_conv_list.append(
nn.ConvTranspose2d(planes * 2 ** (layers - 1 - i), planes * 2 ** max(0, layers - i - 2),
kernel_size=3,
stride=2, padding=1, output_padding=1, bias=True))
self.up_dense_list.append(
block(planes * 2 ** max(0, layers - i - 2), planes * 2 ** max(0, layers - i - 2)))
[docs] def forward(self, x):
# bottom branch
out = self.bottom(x[-1])
bottom = out
# up branch
up_out = []
up_out.append(bottom)
for j in range(0, self.layers - 1):
out = self.up_conv_list[j](out) + x[self.layers - j - 2]
# out = F.relu(out)
out = self.up_dense_list[j](out)
up_out.append(out)
return up_out
[docs]class LadderBlock(nn.Module):
def __init__(self, planes, layers, kernel=3, block=BasicBlock):
super().__init__()
self.planes = planes
self.layers = layers
self.kernel = kernel
self.padding = int((kernel - 1) / 2)
self.inconv = block(planes, planes)
# create module list for down branch
self.down_module_list = nn.ModuleList()
for i in range(0, layers - 1):
self.down_module_list.append(block(planes * (2 ** i), planes * (2 ** i)))
# use strided conv instead of pooling
self.down_conv_list = nn.ModuleList()
for i in range(0, layers - 1):
self.down_conv_list.append(
nn.Conv2d(planes * 2 ** i, planes * 2 ** (i + 1), stride=2, kernel_size=kernel,
padding=self.padding))
# create module for bottom block
self.bottom = block(planes * (2 ** (layers - 1)), planes * (2 ** (layers - 1)))
# create module list for up branch
self.up_conv_list = nn.ModuleList()
self.up_dense_list = nn.ModuleList()
for i in range(0, layers - 1):
self.up_conv_list.append(
nn.ConvTranspose2d(planes * 2 ** (layers - i - 1), planes * 2 ** max(0, layers - i - 2),
kernel_size=3,
stride=2, padding=1, output_padding=1, bias=True))
self.up_dense_list.append(
block(planes * 2 ** max(0, layers - i - 2), planes * 2 ** max(0, layers - i - 2)))
[docs] def forward(self, x):
out = self.inconv(x[-1])
down_out = []
# down branch
for i in range(0, self.layers - 1):
out = out + x[-i - 1]
out = self.down_module_list[i](out)
down_out.append(out)
out = self.down_conv_list[i](out)
out = F.relu(out)
# bottom branch
out = self.bottom(out)
bottom = out
# up branch
up_out = []
up_out.append(bottom)
for j in range(0, self.layers - 1):
out = self.up_conv_list[j](out) + down_out[self.layers - j - 2]
# out = F.relu(out)
out = self.up_dense_list[j](out)
up_out.append(out)
return up_out
[docs]class Final_LadderBlock(nn.Module):
def __init__(self, planes, layers, kernel=3, block=BasicBlock, inplanes=3):
super().__init__()
self.block = LadderBlock(planes, layers, kernel=kernel, block=block)
[docs] def forward(self, x):
out = self.block(x)
return out[-1]
[docs]class FCNHead(nn.Module):
def __init__(self, in_channels, out_channels, norm_layer):
super(FCNHead, self).__init__()
inter_channels = in_channels // 4
self.conv5 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False),
norm_layer(inter_channels),
nn.ReLU(),
nn.Dropout2d(0.1, False),
nn.Conv2d(inter_channels, out_channels, 1))
[docs] def forward(self, x):
return self.conv5(x)
[docs]class LadderNet(BaseNet):
def __init__(self, nclass, backbone, aux=True, se_loss=True, lateral=False, arch_params=None,
# norm_layer=SyncBatchNorm, dilated=False, **kwargs): # FIXME - ORIGINAL CODE TORCH-ENCODING
norm_layer=nn.BatchNorm2d, dilated=False, **kwargs): # FIXME - TIME MEASUREMENT CODE
super().__init__(nclass, backbone, aux, se_loss, norm_layer=norm_layer, dilated=dilated, **kwargs)
self.head = LadderHead(base_inchannels=256, base_outchannels=64, out_channels=nclass,
norm_layer=norm_layer, se_loss=se_loss, nclass=nclass, up_kwargs=self._up_kwargs)
if aux:
self.auxlayer = FCNHead(1024, nclass, norm_layer=norm_layer)
[docs] def forward(self, x):
imsize = x.size()[2:]
features = self.base_forward(x)
x = list(self.head(features))
x[0] = F.upsample(x[0], imsize, **self._up_kwargs)
if self.aux:
auxout = self.auxlayer(features[2])
auxout = F.upsample(auxout, imsize, **self._up_kwargs)
x.append(auxout)
return tuple(x)
[docs]class LadderHead(nn.Module):
def __init__(self, base_inchannels, base_outchannels, out_channels, norm_layer, se_loss, nclass, up_kwargs):
super(LadderHead, self).__init__()
self.conv1 = nn.Conv2d(in_channels=base_inchannels, out_channels=base_outchannels, kernel_size=1, bias=False)
self.conv2 = nn.Conv2d(in_channels=base_inchannels * 2, out_channels=base_outchannels * 2, kernel_size=1,
bias=False)
self.conv3 = nn.Conv2d(in_channels=base_inchannels * 2 ** 2, out_channels=base_outchannels * 2 ** 2,
kernel_size=1, bias=False)
self.conv4 = nn.Conv2d(in_channels=base_inchannels * 2 ** 3, out_channels=base_outchannels * 2 ** 3,
kernel_size=1, bias=False)
self.bn1 = norm_layer(base_outchannels)
self.bn2 = norm_layer(base_outchannels * 2)
self.bn3 = norm_layer(base_outchannels * 2 ** 2)
self.bn4 = norm_layer(base_outchannels * 2 ** 3)
self.decoder = Decoder(planes=base_outchannels, layers=4)
self.ladder = LadderBlock(planes=base_outchannels, layers=4)
self.final = nn.Conv2d(base_outchannels, out_channels, 1)
self.se_loss = se_loss
if self.se_loss:
self.selayer = nn.Linear(base_outchannels * 2 ** 3, nclass)
[docs] def forward(self, x):
x1, x2, x3, x4 = x
out1 = self.conv1(x1)
out1 = self.bn1(out1)
out1 = F.relu(out1)
out2 = self.conv2(x2)
out2 = self.bn2(out2)
out2 = F.relu(out2)
out3 = self.conv3(x3)
out3 = self.bn3(out3)
out3 = F.relu(out3)
out4 = self.conv4(x4)
out4 = self.bn4(out4)
out4 = F.relu(out4)
out = self.decoder([out1, out2, out3, out4])
out = self.ladder(out)
pred = [self.final(out[-1])]
if self.se_loss:
enc = F.max_pool2d(out[0], kernel_size=out[0].size()[2:])
enc = torch.squeeze(enc, -1)
enc = torch.squeeze(enc, -1)
se = self.selayer(enc)
pred.append(se)
return pred
[docs]class LadderNet50(LadderNet):
def __init__(self, *args, **kwargs):
super().__init__(backbone='resnet50', nclass=21, *args, **kwargs)
[docs]class LadderNet503433(LadderNet):
def __init__(self, *args, **kwargs):
super().__init__(backbone='resnet50_3433', nclass=21, *args, **kwargs)
[docs]class LadderNet101(LadderNet):
def __init__(self, *args, **kwargs):
super().__init__(backbone='resnet101', nclass=21, *args, **kwargs)