4.2 使用pytorch搭建VGG网络

科技2022-09-11 120

文章目录

将VGG分成两部分提取特征网络结构分类网络结构 model输入：非关键字参数或有序字典P[ython-非关键字参数和关键字参数(*args **kw)](https://blog.csdn.net/weixin_44023658/article/details/105925199?utm_medium=distribute.wap_relevant.none-task-blog-title-1) predict很多人会在RGB减去这三个值，是IMAGENET的三个通道上的均值，迁移学习可能要减 train

将VGG分成两部分

另外这个网络很大，跑得很慢，数据要求大

提取特征网络结构

分类网络结构

model

import torch.nn as nn import torch class VGG(nn.Module): def __init__(self, features, num_classes=1000, init_weights=False):#features传入 super(VGG, self).__init__() self.features = features self.classifier = nn.Sequential( nn.Dropout(p=0.5), nn.Linear(512*7*7, 2048), nn.ReLU(True), nn.Dropout(p=0.5), nn.Linear(2048, 2048), nn.ReLU(True), nn.Linear(2048, num_classes) ) if init_weights:#还要判断下是否需要初始化参数，传入的参数为true的话就初始化 self._initialize_weights() def forward(self, x): # N x 3 x 224 x 224 x = self.features(x) # N x 512 x 7 x 7 x = torch.flatten(x, start_dim=1)#展平#start_dim从哪个维度开始进行展平处理，第0个维度是batch维度 # N x 512*7*7 x = self.classifier(x) return x def _initialize_weights(self):#初始化权重函数，遍历每一层 for m in self.modules(): if isinstance(m, nn.Conv2d):#如果卷积层，就用xavier方法 # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') nn.init.xavier_uniform_(m.weight) if m.bias is not None:#如果采用了偏置就要把偏置全置0 nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear):#全连接层的话 nn.init.xavier_uniform_(m.weight) # nn.init.normal_(m.weight, 0, 0.01) nn.init.constant_(m.bias, 0) def make_features(cfg: list):#传入配置变量，只要传入对应配置的列表就行 layers = [] in_channels = 3#RGB for v in cfg: if v == "M": layers += [nn.MaxPool2d(kernel_size=2, stride=2)]#池化核的大小和步距都是2 else: conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)#stride默认为1所以没写 layers += [conv2d, nn.ReLU(True)] in_channels = v#输出的深度变成V了 return nn.Sequential(*layers)#将列表作为（非关键字参数）输入

输入：非关键字参数或有序字典

Python-非关键字参数和关键字参数(*args **kw)

#模型配置文件 cfgs = { 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],#A配置，数字代表卷积层个数，M是池化层结构从（最大池化下采样） 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],#B配置 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],#D配置 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],#E配置 } def vgg(model_name="vgg16", **kwargs): try: cfg = cfgs[model_name] except: print("Warning: model number {} not in cfgs dict!".format(model_name)) exit(-1) model = VGG(make_features(cfg), **kwargs)#第一个参数是features，后面是关键字是可变长度的的字典变量（num_classes=1000, init_weights=False） return model

predict

import torch from model import vgg from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt import json data_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # load image img = Image.open("../tulip.jpg") plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict try: json_file = open('./class_indices.json', 'r') class_indict = json.load(json_file) except Exception as e: print(e) exit(-1) # create model model = vgg(model_name="vgg16", num_classes=5) # load model weights model_weight_path = "./vgg16Net.pth" model.load_state_dict(torch.load(model_weight_path)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img)) predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print(class_indict[str(predict_cla)]) plt.show()

很多人会在RGB减去这三个值，是IMAGENET的三个通道上的均值，迁移学习可能要减

train

import torch.nn as nn from torchvision import transforms, datasets import json import os import torch.optim as optim from model import vgg import torch device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]), "val": transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])} data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = data_root + "/data_set/flower_data/" # flower data set path train_dataset = datasets.ImageFolder(root=image_path+"train", transform=data_transform["train"]) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 32 train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0) validate_dataset = datasets.ImageFolder(root=image_path + "val", transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=0) # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() model_name = "vgg16"#取16 net = vgg(model_name=model_name, num_classes=5, init_weights=True) net.to(device) loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.0001) best_acc = 0.0 save_path = './{}Net.pth'.format(model_name) for epoch in range(30): # train net.train() running_loss = 0.0 for step, data in enumerate(train_loader, start=0): images, labels = data optimizer.zero_grad() outputs = net(images.to(device)) loss = loss_function(outputs, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() # print train process rate = (step + 1) / len(train_loader) a = "*" * int(rate * 50) b = "." * int((1 - rate) * 50) print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="") print() # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): for val_data in validate_loader: val_images, val_labels = val_data optimizer.zero_grad() outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] acc += (predict_y == val_labels.to(device)).sum().item() val_accurate = acc / val_num if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('[epoch %d] train_loss: %.3f test_accuracy: %.3f' % (epoch + 1, running_loss / step, val_accurate)) print('Finished Training')

Processed: 0.008, SQL: 10