https://www.emperinter.info/2020/08/05/change-leaning-rate-by-reducelronplateau-in-pytorch/
缘由
自己之前写过一个Pytorch学习率更新,其中感觉依据是否loss升高或降低的次数来动态更新学习率,感觉是个挺好玩的东西,自己弄了好久都设置错误,今天算是搞出来了!
解析
说明
torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode=‘min’, factor=0.1, patience=10, verbose=False, threshold=0.0001, threshold_mode=‘rel’, cooldown=0, min_lr=0, eps=1e-08)
在发现loss不再降低或者acc不再提高之后,降低学习率。各参数意义如下:
参数含义
mode'min’模式检测metric是否不再减小,'max’模式检测metric是否不再增大;factor触发条件后lr*=factor;patience不再减小(或增大)的累计次数;verbose触发条件后print;threshold只关注超过阈值的显著变化;threshold_mode有rel和abs两种阈值计算模式,rel规则:max模式下如果超过best(1+threshold)为显著,min模式下如果低于best(1-threshold)为显著;abs规则:max模式下如果超过best+threshold为显著,min模式下如果低于best-threshold为显著;cooldown触发一次条件后,等待一定epoch再进行检测,避免lr下降过速;min_lr最小的允许lr;eps如果新旧lr之间的差异小与1e-8,则忽略此次更新。
例子,如图所示的y轴为lr,x为调整的次序,初始的学习率为0.0009575 则学习率的方程为:lr = 0.0009575 * (0.35)^x
import math
import matplotlib
.pyplot
as plt
x
= 0
o
= []
p
= []
o
.append
(0)
p
.append
(0.0009575)
while(x
< 8):
x
+= 1
y
= 0.0009575 * math
.pow(0.35,x
)
o
.append
(x
)
p
.append
(y
)
print('%d: %.50f' %(x
,y
))
plt
.plot
(o
,p
,c
='red',label
='test')
plt
.legend
(loc
='best')
plt
.show
()
难点
我感觉这里面最难的时这几个参数的选择,第一个是初始的学习率(我目前接触的miniest和下面的图像分类貌似都是0.001,我这里训练调整时才发现自己设置的为0.0009575,这个值是上一个实验忘更改了,但发现结果不错,第一次运行该代码接近到0.001这么小的损失值),这里面的乘积系数以及判断说多少次没有减少(增加)后决定变换学习率都是难以估计的。我自己的最好方法是先按默认不变的0.001来训练一下(结合**tensoarboard** )观察从哪里开始出现问题就可以从这里来确定次数,而乘积系数,个人感觉还是用上面的代码来获取一个较为平滑且变化极小的数字来作为选择。建议在做这种测试时可以把模型先备份一下以免浪费过多的时间!
例子
该例子初始学习率为0.0009575,乘积项系数为:0.35,在我的例子中x变化的条件是:累计125次没有减小则x加1;自己训练在第一次lr变化后(从0.0009575变化到0.00011729)损失值慢慢取向于0.001(如第一张图所示),准确率达到69%;
import torch
import torchvision
import torchvision
.transforms
as transforms
import matplotlib
.pyplot
as plt
import numpy
as np
import torch
.nn
as nn
import torch
.nn
.functional
as F
import torch
.optim
as optim
from datetime
import datetime
from torch
.utils
.tensorboard
import SummaryWriter
from torch
.optim
import *
PATH
= './cifar_net_tensorboard_net_width_200_and_chang_lr_by_decrease_0_35^x.pth'
transform
= transforms
.Compose
(
[transforms
.ToTensor
(),
transforms
.Normalize
((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset
= torchvision
.datasets
.CIFAR10
(root
='./data', train
=True,
download
=True, transform
=transform
)
trainloader
= torch
.utils
.data
.DataLoader
(trainset
, batch_size
=4,
shuffle
=True, num_workers
=0)
testset
= torchvision
.datasets
.CIFAR10
(root
='./data', train
=False,
download
=True, transform
=transform
)
testloader
= torch
.utils
.data
.DataLoader
(testset
, batch_size
=4,
shuffle
=False, num_workers
=0)
classes
= ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
device
= torch
.device
("cuda:0" if torch
.cuda
.is_available
() else "cpu")
print(device
)
print("获取一些随机训练数据")
dataiter
= iter(trainloader
)
images
, labels
= dataiter
.next()
def imshow(img
):
img
= img
/ 2 + 0.5
npimg
= img
.numpy
()
plt
.imshow
(np
.transpose
(npimg
, (1, 2, 0)))
plt
.show
()
imshow
(torchvision
.utils
.make_grid
(images
))
print(' '.join
('%5s' % classes
[labels
[j
]] for j
in range(4)))
print("**********************")
def matplotlib_imshow(img
, one_channel
=False):
if one_channel
:
img
= img
.mean
(dim
=0)
img
= img
/ 2 + 0.5
npimg
= img
.cpu
().numpy
()
if one_channel
:
plt
.imshow
(npimg
, cmap
="Greys")
else:
plt
.imshow
(np
.transpose
(npimg
, (1, 2, 0)))
writer
= SummaryWriter
('runs/train')
dataiter
= iter(trainloader
)
images
, labels
= dataiter
.next()
img_grid
= torchvision
.utils
.make_grid
(images
)
imshow
(img_grid
)
def images_to_probs(net
, images
):
'''
Generates predictions and corresponding probabilities from a trained
network and a list of images
'''
output
= net
(images
)
_
, preds_tensor
= torch
.max(output
, 1)
preds
= np
.squeeze
(preds_tensor
.cpu
().numpy
())
return preds
, [F
.softmax
(el
, dim
=0)[i
].item
() for i
, el
in zip(preds
, output
)]
def plot_classes_preds(net
, images
, labels
):
preds
, probs
= images_to_probs
(net
, images
)
fig
= plt
.figure
(figsize
=(12, 48))
for idx
in np
.arange
(4):
ax
= fig
.add_subplot
(1, 4, idx
+1, xticks
=[], yticks
=[])
matplotlib_imshow
(images
[idx
], one_channel
=True)
ax
.set_title
("{0}, {1:.1f}%\n(label: {2})".format(
classes
[preds
[idx
]],
probs
[idx
] * 100.0,
classes
[labels
[idx
]]),
color
=("green" if preds
[idx
]==labels
[idx
].item
() else "red"))
return fig
class Net(nn
.Module
):
def __init__(self
):
super(Net
, self
).__init__
()
self
.conv1
= nn
.Conv2d
(3, 200, 5)
self
.pool
= nn
.MaxPool2d
(2, 2)
self
.conv2
= nn
.Conv2d
(200, 16, 5)
self
.fc1
= nn
.Linear
(16 * 5 * 5, 120)
self
.fc2
= nn
.Linear
(120, 84)
self
.fc3
= nn
.Linear
(84, 10)
def forward(self
, x
):
x
= self
.pool
(F
.relu
(self
.conv1
(x
)))
x
= self
.pool
(F
.relu
(self
.conv2
(x
)))
x
= x
.view
(-1, 16 * 5 * 5)
x
= F
.relu
(self
.fc1
(x
))
x
= F
.relu
(self
.fc2
(x
))
x
= self
.fc3
(x
)
return x
net
= Net
()
writer
.add_graph
(net
, images
)
net
.to
(device
)
·······
·······
·······
如需了解完整代码请跳转到:
https://www.emperinter.info/2020/08/05/change-leaning-rate-by-reducelronplateau-in-pytorch/