Pytorch框架下的语义分割实战（二，网络搭建），超详细讲解！！

科技2024-11-21 19

接上一篇

Pytorch框架下的语义分割实战（数据集处理）

我们接着聊聊FCN.py中的小细节的吧。。。

先附上代码

# -*- coding: utf-8 -*- """ FCN.py """ import torch.nn as nn from torchvision.models.vgg import VGG #继承nn.Module，撰写自己的网络层 class FCNs(nn.Module): ''' 类FCNs：将最后一个特征图直接上采样32倍(5次步长为2、卷积核为3*3的反卷积操作)得到的最终分割结果。 ''' def __init__(self, pretrained_net, n_class): super().__init__() self.n_class = n_class self.pretrained_net = pretrained_net self.relu = nn.ReLU(inplace=True) self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn1 = nn.BatchNorm2d(512) self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn2 = nn.BatchNorm2d(256) self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn3 = nn.BatchNorm2d(128) self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn4 = nn.BatchNorm2d(64) self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn5 = nn.BatchNorm2d(32) #分类器是1*1大小的卷积，将channel个数从32减小到n_class self.classifier = nn.Conv2d(32, n_class, kernel_size=1) def forward(self, x): output = self.pretrained_net(x) x5 = output['x5'] x4 = output['x4'] x3 = output['x3'] x2 = output['x2'] x1 = output['x1'] score = self.bn1(self.relu(self.deconv1(x5))) score = score + x4 score = self.bn2(self.relu(self.deconv2(score))) score = score + x3 score = self.bn3(self.relu(self.deconv3(score))) score = score + x2 score = self.bn4(self.relu(self.deconv4(score))) score = score + x1 score = self.bn5(self.relu(self.deconv5(score))) score = self.classifier(score) return score class VGGNet(VGG): def __init__(self, pretrained=False, model='vgg16', requires_grad=True, remove_fc=True, show_params=False): super().__init__(make_layers(cfg[model])) self.ranges = ranges[model] if pretrained: exec("self.load_state_dict(models.%s(pretrained=False).state_dict())" % model) if not requires_grad: for param in super().parameters(): param.requires_grad = False # 删除多余的全连接层参数，以节省内存。 # 去掉vgg最后的全连接层(classifier) if remove_fc: del self.classifier if show_params: for name, param in self.named_parameters(): print(name, param.size()) def forward(self, x): output = {} #得到每个最大池化层的输出，VGG网络有5个最大池化层。 for idx, (begin, end) in enumerate(self.ranges): #self.ranges = ((0, 5), (5, 10), (10, 17), (17, 24), (24, 31)) (vgg16 examples) for layer in range(begin, end): x = self.features[layer](x) output["x%d"%(idx+1)] = x return output ranges = { 'vgg11': ((0, 3), (3, 6), (6, 11), (11, 16), (16, 21)), 'vgg13': ((0, 5), (5, 10), (10, 15), (15, 20), (20, 25)), 'vgg16': ((0, 5), (5, 10), (10, 17), (17, 24), (24, 31)), 'vgg19': ((0, 5), (5, 10), (10, 19), (19, 28), (28, 37)) } # Vgg网络结构配置 cfg = { 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], } # make layers using Vgg-Net config(cfg) # 由cfg构建vgg-Net def make_layers(cfg, batch_norm=False): layers = [] in_channels = 3 for v in cfg: if v == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] else: layers += [conv2d, nn.ReLU(inplace=True)] in_channels = v return nn.Sequential(*layers) if __name__ == "__main__": pass

①nn.ReLU

nn.ReLU(inplace = True) inplace为True时，将会改变输入数据，否则不会改变原输入，只会产生新的输出。举个栗子吧...

import torch import torch.nn as nn m = nn.ReLU(inplace=False) input = torch.randn(7) print('Original size: \n', input) output = m(input) print('ReLU output：\n', output) print('Output size: \n', output.size()) print("Let's see the input whether change... \n", input) ''' Out: Original size: tensor([ 1.2788, 1.4480, -0.4185, 0.9940, -0.3533, 0.4067, -0.1551]) ReLU output： tensor([1.2788, 1.4480, 0.0000, 0.9940, 0.0000, 0.4067, 0.0000]) Output size: torch.Size([7]) Let's see the input whether change... tensor([ 1.2788, 1.4480, -0.4185, 0.9940, -0.3533, 0.4067, -0.1551]) '''

②nn.Conv2d & nn.ConvTranspose2d

torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)

nn.Conv2d:对由多个输入平面组成的输入信号进行二维卷积。输入数据格式为(N,C,H,W). 参数说明： N：表示batch_size大小 C：表示channel个数 H,W：分别表示特征图的高和宽 stride：步长，控制cross-correlation步长默认为1。可设为一个整数，或一个(int,int)型的元组。 kenel_size：卷积核尺寸，单个整数（在各个空间维度的相同长度）或由两个整数组成的list或tuple。 padding：zero-padding个数，注意，在卷积之前补零。 dilation：扩张，控制kernel点的间距。 groups：卷积核个数，捐几个数通常唯一，但特殊情况下，范围可设置为[1-in_channels]之间。 bias：添加偏置，为输出设定可学习的偏差。图片经一次卷积后的尺寸：(original_size-(kernel_size-1))/stride

下面举一个例子，来看看吧...

import torch #batch_size=2, channels=1, in_h=7, in_w=3 x = torch.randn(2,1,7,3) #channels=1, output=8, filter_h=2, filter_w=3 conv = torch.nn.Conv2d(1,8,(2,3)) res = conv(x) #out_batch_size=batch_size=2, output=8, out_h=in_h-filter_h+1=6, out_w=in_w-filter_w+1=1 print(res.shape) ''' Out: shape = (2, 8, 6, 1) '''

nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1)

ConvTranspose2d是进行反卷积操作。参数说明： in_channels(int)：输入图像的通道个数。 out_channels(int)：经卷积后，输出的通道个数。 kenel_size(int or tuple)：卷积核大小。 stride(int,tuple or optional)：步长。 padding(int,tuple or optional)：输出的每一条边补充0的层数，高和宽都增加2*padding。 output_padding(int,tuple or optional):输出边补充0的层数，高和宽都增加padding。 groups(int,optional):从输入到输出通道的阻塞连接数。 bias(bool,optional):若为True，添加偏置。 dilation(int,tuple,optional):卷积核元素之间的间距。

③nn.BatchNorm2d

作用是将数据归一化，不会因为数据太大而导致网络性能不稳定。其数学原理如下：

参数说明： num_features：特征数量。 eps：为了计算的稳定性，默认值为1e-5 momentum：运行过程中均值和方差的估计参数（相当于稳定系数，类似SGD中的momentum的系数）,默认值为0.1。 affine：当为True时，会给定可学习的系数矩阵gamma和beta，weight和bias会被使用。

举个栗子看看吧...

import torch import torch.nn as nn m = nn.BatchNorm2d(2, affine=True) input = torch.randn(1,2,3,4) output = m(input) print('Original input is \n', input) print('\n The weight of BN2d is \n', m.weight) print('\n The bias of BN2d is \n', m.bias) print('\n The output is \n', output) print('\n The output size is \n', output.size()) ''' Out: Original input is tensor([[[[ 0.7190, 0.6217, 2.4163, -1.2585], [-0.0488, 0.8982, -0.0727, -0.8649], [-1.6953, 0.0204, 1.1490, 0.2674]], [[-0.1274, 0.5982, -3.3383, 3.4249], [-1.1584, -0.7943, -1.8608, 1.2922], [-1.4878, -0.2357, -0.7568, 0.4004]]]]) The weight of BN2d is Parameter containing: tensor([1., 1.], requires_grad=True) The bias of BN2d is Parameter containing: tensor([0., 0.], requires_grad=True) The output is tensor([[[[ 0.5039, 0.4130, 2.0883, -1.3423], [-0.2130, 0.6711, -0.2352, -0.9749], [-1.7501, -0.1483, 0.9052, 0.0822]], [[ 0.1283, 0.5727, -1.8381, 2.3039], [-0.5031, -0.2801, -0.9332, 0.9978], [-0.7048, 0.0620, -0.2571, 0.4516]]]], grad_fn=<NativeBatchNormBackward>) The output size is torch.Size([1, 2, 3, 4]) '''

分析：输入是一个1*2*3*4的四维矩阵。gamma和beta是一维数组，针对input[0][0],input[0][1]两个3*4的二维矩阵分别进行处理的。不妨将input[0][0]按照上述公式进行计算，看看是否和input[0][0]中的数据相对应。

''' 首先将input[0][0]中的数据输出，并计算均值和方差。 ''' print('The first dimention of input \n', input[0][0]) FDMean = torch.Tensor.mean(input[0][0]) FDVar = torch.Tensor.var(input[0][0],False) #False表示不会用贝塞尔校正 print('m:', m) print('m.eps:', m.eps) print('First dimention mean is', FDMean) print('First dimention var is', FDVar) ''' Out: The first dimention of input tensor([[ 0.7190, 0.6217, 2.4163, -1.2585], [-0.0488, 0.8982, -0.0727, -0.8649], [-1.6953, 0.0204, 1.1490, 0.2674]]) m: BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) m.eps: 1e-05 First dimention mean is tensor(0.1793) First dimention var is tensor(1.1474) ''' ''' 代入上述公式 ''' BatchNormOne = ((input[0][0][0][0]-FDMean)/(torch.pow(FDVar,0.5)+m.eps))*m.weight[0]+m.bias[0] print('BatchMormOne is', BatchNormOne) ''' Out: BatchMormOne is tensor(0.5039, grad_fn=<AddBackward0>) '''

What a surprise!!! 按照公式计算的结果等于函数得到的output[0][0][0][0]=0.5039 ps:参考网址https://blog.csdn.net/bigFatCat_Tom/article/details/91619977

嗝~~，今儿栗子吃饱了，下篇我们再见吧(๑╹◡╹)ﾉ"""

Processed: 0.009, SQL: 8