Module ktrain.lroptimize.lrfinder
Expand source code
from ..imports import *
from .. import utils as U
class LRFinder:
"""
```
Tracks (and plots) the change in loss of a Keras model as learning rate is gradually increased.
Used to visually identify a good learning rate, given model and data.
Reference:
Original Paper: https://arxiv.org/abs/1506.01186
```
"""
def __init__(self, model, stop_factor=4):
self.model = model
self.losses = []
self.lrs = []
self.best_loss = 1e9
self._weightfile = None
self.stop_factor = stop_factor
self.avg_loss = 0
self.batch_num = 0
self.beta = 0.98
# stats computed by _compute_stats
self.mg = None # index of minimum numerical gradient
self.ml = None # index of minimum loss
def on_batch_end(self, batch, logs):
# Log the learning rate
lr = K.get_value(self.model.optimizer.lr)
self.lrs.append(lr)
# Log the loss
loss = logs['loss']
self.batch_num +=1
self.avg_loss = self.beta * self.avg_loss + (1-self.beta) *loss
smoothed_loss = self.avg_loss / (1 - self.beta**self.batch_num)
self.losses.append(smoothed_loss)
# Check whether the loss got too large or NaN
#print("\n%s:%s\n" % (smoothed_loss, self.stop_factor * self.best_loss))
if self.batch_num > 1 and smoothed_loss > self.stop_factor * self.best_loss:
self.model.stop_training = True
return
# record best loss
if smoothed_loss < self.best_loss or self.batch_num==1:
self.best_loss = smoothed_loss
# Increase the learning rate for the next batch
lr *= self.lr_mult
K.set_value(self.model.optimizer.lr, lr)
# stop if LR grows too large
if lr > 10.:
self.model.stop_training = True
return
def find(self, train_data, steps_per_epoch, use_gen=False, class_weight=None,
start_lr=1e-7, lr_mult=1.01, max_epochs=None,
batch_size=U.DEFAULT_BS, workers=1, use_multiprocessing=False, verbose=1):
"""
```
Track loss as learning rate is increased.
NOTE: batch_size is ignored when train_data is instance of Iterator.
```
"""
# check arguments and initialize
if train_data is None:
raise ValueError('train_data is required')
#U.data_arg_check(train_data=train_data, train_required=True)
self.lrs = []
self.losses = []
# compute steps_per_epoch
#num_samples = U.nsamples_from_data(train_data)
#if U.is_iter(train_data):
#use_gen = True
#steps_per_epoch = num_samples // train_data.batch_size
#else:
#use_gen = False
#steps_per_epoch = np.ceil(num_samples/batch_size)
# max_epochs and lr_mult are None, set max_epochs
# using sample size of 1500 batches
if max_epochs is None and lr_mult is None:
max_epochs = int(np.ceil(1500./steps_per_epoch))
if max_epochs:
epochs = max_epochs
num_batches = epochs * steps_per_epoch
end_lr = 10 if start_lr < 10 else start_lr * 10
self.lr_mult = (end_lr / start_lr) ** (1 / num_batches)
else:
epochs = 1024
self.lr_mult = lr_mult
# Save weights into a file
new_file, self._weightfile = tempfile.mkstemp()
self.model.save_weights(self._weightfile)
# Remember the original learning rate
original_lr = K.get_value(self.model.optimizer.lr)
# Set the initial learning rate
K.set_value(self.model.optimizer.lr, start_lr)
callback = LambdaCallback(on_batch_end=lambda batch, logs: self.on_batch_end(batch, logs))
if use_gen:
# *_generator methods are deprecated from TF 2.1.0
fit_fn = self.model.fit
fit_fn(train_data, steps_per_epoch=steps_per_epoch,
epochs=epochs, class_weight=class_weight,
workers=workers, use_multiprocessing=use_multiprocessing,
verbose=verbose,
callbacks=[callback])
else:
self.model.fit(train_data[0], train_data[1],
batch_size=batch_size, epochs=epochs, class_weight=class_weight,
verbose=verbose,
callbacks=[callback])
# Restore the weights to the state before model fitting
self.model.load_weights(self._weightfile)
self._weightfile=None
# Restore the original learning rate
K.set_value(self.model.optimizer.lr, original_lr)
# compute stats for numerical estimates of lr
self._compute_stats()
return
def plot_loss(self, n_skip_beginning=10, n_skip_end=1, suggest=False, return_fig=False):
"""
```
Plots the loss.
Args:
n_skip_beginning(int): number of batches to skip on the left.
n_skip_end(int): number of batches to skip on the right.
suggest(bool): will highlight numerical estimate
of best lr if True - methods adapted from fastai
return_fig(bool): If True, return matplotlib.figure.Figure
Returns:
matplotlib.figure.Figure if return_fig else None
```
"""
if not self.find_called: raise ValueError('Please call find first.')
fig, ax = plt.subplots()
plt.ylabel("loss")
plt.xlabel("learning rate (log scale)")
ax.plot(self.lrs[n_skip_beginning:-n_skip_end], self.losses[n_skip_beginning:-n_skip_end])
plt.xscale('log')
fig = None
if suggest:
# this code was adapted from fastai: https://github.com/fastai/fastai
if self.mg is None:
print("Failed to compute the gradients, there might not be enough points.\n" +\
"Plot displayed without suggestion.")
else:
mg = self.mg
ml = self.ml
print('Two possible suggestions for LR from plot:')
print(f"\tMin numerical gradient: {self.lrs[mg]:.2E}")
print(f"\tMin loss divided by 10: {self.lrs[ml]/10:.2E}")
ax.plot(self.lrs[mg],self.losses[mg], markersize=10,marker='o',color='red')
fig = plt.gcf()
plt.show()
if return_fig: return fig
return
def _compute_stats(self):
"""
```
generates the index associated with minum numerical gradient and the
index associated with minum loss.
Stored as mg and ml respectively
```
"""
# this code was adapted from fastai: https://github.com/fastai/fastai
self.ml = np.argmin(self.losses)
try:
self.mg = (np.gradient(np.array(self.losses[32:self.ml]))).argmin()
except:
self.mg = None
return
def estimate_lr(self):
"""
```
Generates two numerical estimates of lr:
1. lr associated with minum numerical gradient (None if gradient computation fails)
2. lr associated with minimum loss divided by 10
Args:
tuple: (float, float)
If gradient computation fails, first element of tuple will be None.
```
"""
if not self.find_called(): raise ValueError('Please call find first.')
lr1 = None
lr2 = None
if self.mg is not None:
lr1 = self.lrs[self.mg]
lr2 = self.lrs[self.ml]/10
return (lr1, lr2)
def find_called(self):
return self.ml is not None
def plot_loss_change(self, sma=1, n_skip_beginning=10, n_skip_end=5, y_lim=(-0.01, 0.01)):
"""
```
Plots rate of change of the loss function.
Parameters:
sma - number of batches for simple moving average to smooth out the curve.
n_skip_beginning - number of batches to skip on the left.
n_skip_end - number of batches to skip on the right.
y_lim - limits for the y axis.
```
"""
assert sma >= 1
derivatives = [0] * sma
for i in range(sma, len(self.lrs)):
derivative = (self.losses[i] - self.losses[i - sma]) / sma
derivatives.append(derivative)
plt.ylabel("rate of loss change")
plt.xlabel("learning rate (log scale)")
plt.plot(self.lrs[n_skip_beginning:-n_skip_end], derivatives[n_skip_beginning:-n_skip_end])
plt.xscale('log')
plt.ylim(y_lim)
Classes
class LRFinder (model, stop_factor=4)
-
Tracks (and plots) the change in loss of a Keras model as learning rate is gradually increased. Used to visually identify a good learning rate, given model and data. Reference: Original Paper: https://arxiv.org/abs/1506.01186
Expand source code
class LRFinder: """ ``` Tracks (and plots) the change in loss of a Keras model as learning rate is gradually increased. Used to visually identify a good learning rate, given model and data. Reference: Original Paper: https://arxiv.org/abs/1506.01186 ``` """ def __init__(self, model, stop_factor=4): self.model = model self.losses = [] self.lrs = [] self.best_loss = 1e9 self._weightfile = None self.stop_factor = stop_factor self.avg_loss = 0 self.batch_num = 0 self.beta = 0.98 # stats computed by _compute_stats self.mg = None # index of minimum numerical gradient self.ml = None # index of minimum loss def on_batch_end(self, batch, logs): # Log the learning rate lr = K.get_value(self.model.optimizer.lr) self.lrs.append(lr) # Log the loss loss = logs['loss'] self.batch_num +=1 self.avg_loss = self.beta * self.avg_loss + (1-self.beta) *loss smoothed_loss = self.avg_loss / (1 - self.beta**self.batch_num) self.losses.append(smoothed_loss) # Check whether the loss got too large or NaN #print("\n%s:%s\n" % (smoothed_loss, self.stop_factor * self.best_loss)) if self.batch_num > 1 and smoothed_loss > self.stop_factor * self.best_loss: self.model.stop_training = True return # record best loss if smoothed_loss < self.best_loss or self.batch_num==1: self.best_loss = smoothed_loss # Increase the learning rate for the next batch lr *= self.lr_mult K.set_value(self.model.optimizer.lr, lr) # stop if LR grows too large if lr > 10.: self.model.stop_training = True return def find(self, train_data, steps_per_epoch, use_gen=False, class_weight=None, start_lr=1e-7, lr_mult=1.01, max_epochs=None, batch_size=U.DEFAULT_BS, workers=1, use_multiprocessing=False, verbose=1): """ ``` Track loss as learning rate is increased. NOTE: batch_size is ignored when train_data is instance of Iterator. ``` """ # check arguments and initialize if train_data is None: raise ValueError('train_data is required') #U.data_arg_check(train_data=train_data, train_required=True) self.lrs = [] self.losses = [] # compute steps_per_epoch #num_samples = U.nsamples_from_data(train_data) #if U.is_iter(train_data): #use_gen = True #steps_per_epoch = num_samples // train_data.batch_size #else: #use_gen = False #steps_per_epoch = np.ceil(num_samples/batch_size) # max_epochs and lr_mult are None, set max_epochs # using sample size of 1500 batches if max_epochs is None and lr_mult is None: max_epochs = int(np.ceil(1500./steps_per_epoch)) if max_epochs: epochs = max_epochs num_batches = epochs * steps_per_epoch end_lr = 10 if start_lr < 10 else start_lr * 10 self.lr_mult = (end_lr / start_lr) ** (1 / num_batches) else: epochs = 1024 self.lr_mult = lr_mult # Save weights into a file new_file, self._weightfile = tempfile.mkstemp() self.model.save_weights(self._weightfile) # Remember the original learning rate original_lr = K.get_value(self.model.optimizer.lr) # Set the initial learning rate K.set_value(self.model.optimizer.lr, start_lr) callback = LambdaCallback(on_batch_end=lambda batch, logs: self.on_batch_end(batch, logs)) if use_gen: # *_generator methods are deprecated from TF 2.1.0 fit_fn = self.model.fit fit_fn(train_data, steps_per_epoch=steps_per_epoch, epochs=epochs, class_weight=class_weight, workers=workers, use_multiprocessing=use_multiprocessing, verbose=verbose, callbacks=[callback]) else: self.model.fit(train_data[0], train_data[1], batch_size=batch_size, epochs=epochs, class_weight=class_weight, verbose=verbose, callbacks=[callback]) # Restore the weights to the state before model fitting self.model.load_weights(self._weightfile) self._weightfile=None # Restore the original learning rate K.set_value(self.model.optimizer.lr, original_lr) # compute stats for numerical estimates of lr self._compute_stats() return def plot_loss(self, n_skip_beginning=10, n_skip_end=1, suggest=False, return_fig=False): """ ``` Plots the loss. Args: n_skip_beginning(int): number of batches to skip on the left. n_skip_end(int): number of batches to skip on the right. suggest(bool): will highlight numerical estimate of best lr if True - methods adapted from fastai return_fig(bool): If True, return matplotlib.figure.Figure Returns: matplotlib.figure.Figure if return_fig else None ``` """ if not self.find_called: raise ValueError('Please call find first.') fig, ax = plt.subplots() plt.ylabel("loss") plt.xlabel("learning rate (log scale)") ax.plot(self.lrs[n_skip_beginning:-n_skip_end], self.losses[n_skip_beginning:-n_skip_end]) plt.xscale('log') fig = None if suggest: # this code was adapted from fastai: https://github.com/fastai/fastai if self.mg is None: print("Failed to compute the gradients, there might not be enough points.\n" +\ "Plot displayed without suggestion.") else: mg = self.mg ml = self.ml print('Two possible suggestions for LR from plot:') print(f"\tMin numerical gradient: {self.lrs[mg]:.2E}") print(f"\tMin loss divided by 10: {self.lrs[ml]/10:.2E}") ax.plot(self.lrs[mg],self.losses[mg], markersize=10,marker='o',color='red') fig = plt.gcf() plt.show() if return_fig: return fig return def _compute_stats(self): """ ``` generates the index associated with minum numerical gradient and the index associated with minum loss. Stored as mg and ml respectively ``` """ # this code was adapted from fastai: https://github.com/fastai/fastai self.ml = np.argmin(self.losses) try: self.mg = (np.gradient(np.array(self.losses[32:self.ml]))).argmin() except: self.mg = None return def estimate_lr(self): """ ``` Generates two numerical estimates of lr: 1. lr associated with minum numerical gradient (None if gradient computation fails) 2. lr associated with minimum loss divided by 10 Args: tuple: (float, float) If gradient computation fails, first element of tuple will be None. ``` """ if not self.find_called(): raise ValueError('Please call find first.') lr1 = None lr2 = None if self.mg is not None: lr1 = self.lrs[self.mg] lr2 = self.lrs[self.ml]/10 return (lr1, lr2) def find_called(self): return self.ml is not None def plot_loss_change(self, sma=1, n_skip_beginning=10, n_skip_end=5, y_lim=(-0.01, 0.01)): """ ``` Plots rate of change of the loss function. Parameters: sma - number of batches for simple moving average to smooth out the curve. n_skip_beginning - number of batches to skip on the left. n_skip_end - number of batches to skip on the right. y_lim - limits for the y axis. ``` """ assert sma >= 1 derivatives = [0] * sma for i in range(sma, len(self.lrs)): derivative = (self.losses[i] - self.losses[i - sma]) / sma derivatives.append(derivative) plt.ylabel("rate of loss change") plt.xlabel("learning rate (log scale)") plt.plot(self.lrs[n_skip_beginning:-n_skip_end], derivatives[n_skip_beginning:-n_skip_end]) plt.xscale('log') plt.ylim(y_lim)
Methods
def estimate_lr(self)
-
Generates two numerical estimates of lr: 1. lr associated with minum numerical gradient (None if gradient computation fails) 2. lr associated with minimum loss divided by 10 Args: tuple: (float, float) If gradient computation fails, first element of tuple will be None.
Expand source code
def estimate_lr(self): """ ``` Generates two numerical estimates of lr: 1. lr associated with minum numerical gradient (None if gradient computation fails) 2. lr associated with minimum loss divided by 10 Args: tuple: (float, float) If gradient computation fails, first element of tuple will be None. ``` """ if not self.find_called(): raise ValueError('Please call find first.') lr1 = None lr2 = None if self.mg is not None: lr1 = self.lrs[self.mg] lr2 = self.lrs[self.ml]/10 return (lr1, lr2)
def find(self, train_data, steps_per_epoch, use_gen=False, class_weight=None, start_lr=1e-07, lr_mult=1.01, max_epochs=None, batch_size=32, workers=1, use_multiprocessing=False, verbose=1)
-
Track loss as learning rate is increased. NOTE: batch_size is ignored when train_data is instance of Iterator.
Expand source code
def find(self, train_data, steps_per_epoch, use_gen=False, class_weight=None, start_lr=1e-7, lr_mult=1.01, max_epochs=None, batch_size=U.DEFAULT_BS, workers=1, use_multiprocessing=False, verbose=1): """ ``` Track loss as learning rate is increased. NOTE: batch_size is ignored when train_data is instance of Iterator. ``` """ # check arguments and initialize if train_data is None: raise ValueError('train_data is required') #U.data_arg_check(train_data=train_data, train_required=True) self.lrs = [] self.losses = [] # compute steps_per_epoch #num_samples = U.nsamples_from_data(train_data) #if U.is_iter(train_data): #use_gen = True #steps_per_epoch = num_samples // train_data.batch_size #else: #use_gen = False #steps_per_epoch = np.ceil(num_samples/batch_size) # max_epochs and lr_mult are None, set max_epochs # using sample size of 1500 batches if max_epochs is None and lr_mult is None: max_epochs = int(np.ceil(1500./steps_per_epoch)) if max_epochs: epochs = max_epochs num_batches = epochs * steps_per_epoch end_lr = 10 if start_lr < 10 else start_lr * 10 self.lr_mult = (end_lr / start_lr) ** (1 / num_batches) else: epochs = 1024 self.lr_mult = lr_mult # Save weights into a file new_file, self._weightfile = tempfile.mkstemp() self.model.save_weights(self._weightfile) # Remember the original learning rate original_lr = K.get_value(self.model.optimizer.lr) # Set the initial learning rate K.set_value(self.model.optimizer.lr, start_lr) callback = LambdaCallback(on_batch_end=lambda batch, logs: self.on_batch_end(batch, logs)) if use_gen: # *_generator methods are deprecated from TF 2.1.0 fit_fn = self.model.fit fit_fn(train_data, steps_per_epoch=steps_per_epoch, epochs=epochs, class_weight=class_weight, workers=workers, use_multiprocessing=use_multiprocessing, verbose=verbose, callbacks=[callback]) else: self.model.fit(train_data[0], train_data[1], batch_size=batch_size, epochs=epochs, class_weight=class_weight, verbose=verbose, callbacks=[callback]) # Restore the weights to the state before model fitting self.model.load_weights(self._weightfile) self._weightfile=None # Restore the original learning rate K.set_value(self.model.optimizer.lr, original_lr) # compute stats for numerical estimates of lr self._compute_stats() return
def find_called(self)
-
Expand source code
def find_called(self): return self.ml is not None
def on_batch_end(self, batch, logs)
-
Expand source code
def on_batch_end(self, batch, logs): # Log the learning rate lr = K.get_value(self.model.optimizer.lr) self.lrs.append(lr) # Log the loss loss = logs['loss'] self.batch_num +=1 self.avg_loss = self.beta * self.avg_loss + (1-self.beta) *loss smoothed_loss = self.avg_loss / (1 - self.beta**self.batch_num) self.losses.append(smoothed_loss) # Check whether the loss got too large or NaN #print("\n%s:%s\n" % (smoothed_loss, self.stop_factor * self.best_loss)) if self.batch_num > 1 and smoothed_loss > self.stop_factor * self.best_loss: self.model.stop_training = True return # record best loss if smoothed_loss < self.best_loss or self.batch_num==1: self.best_loss = smoothed_loss # Increase the learning rate for the next batch lr *= self.lr_mult K.set_value(self.model.optimizer.lr, lr) # stop if LR grows too large if lr > 10.: self.model.stop_training = True return
def plot_loss(self, n_skip_beginning=10, n_skip_end=1, suggest=False, return_fig=False)
-
Plots the loss. Args: n_skip_beginning(int): number of batches to skip on the left. n_skip_end(int): number of batches to skip on the right. suggest(bool): will highlight numerical estimate of best lr if True - methods adapted from fastai return_fig(bool): If True, return matplotlib.figure.Figure Returns: matplotlib.figure.Figure if return_fig else None
Expand source code
def plot_loss(self, n_skip_beginning=10, n_skip_end=1, suggest=False, return_fig=False): """ ``` Plots the loss. Args: n_skip_beginning(int): number of batches to skip on the left. n_skip_end(int): number of batches to skip on the right. suggest(bool): will highlight numerical estimate of best lr if True - methods adapted from fastai return_fig(bool): If True, return matplotlib.figure.Figure Returns: matplotlib.figure.Figure if return_fig else None ``` """ if not self.find_called: raise ValueError('Please call find first.') fig, ax = plt.subplots() plt.ylabel("loss") plt.xlabel("learning rate (log scale)") ax.plot(self.lrs[n_skip_beginning:-n_skip_end], self.losses[n_skip_beginning:-n_skip_end]) plt.xscale('log') fig = None if suggest: # this code was adapted from fastai: https://github.com/fastai/fastai if self.mg is None: print("Failed to compute the gradients, there might not be enough points.\n" +\ "Plot displayed without suggestion.") else: mg = self.mg ml = self.ml print('Two possible suggestions for LR from plot:') print(f"\tMin numerical gradient: {self.lrs[mg]:.2E}") print(f"\tMin loss divided by 10: {self.lrs[ml]/10:.2E}") ax.plot(self.lrs[mg],self.losses[mg], markersize=10,marker='o',color='red') fig = plt.gcf() plt.show() if return_fig: return fig return
def plot_loss_change(self, sma=1, n_skip_beginning=10, n_skip_end=5, y_lim=(-0.01, 0.01))
-
Plots rate of change of the loss function. Parameters: sma - number of batches for simple moving average to smooth out the curve. n_skip_beginning - number of batches to skip on the left. n_skip_end - number of batches to skip on the right. y_lim - limits for the y axis.
Expand source code
def plot_loss_change(self, sma=1, n_skip_beginning=10, n_skip_end=5, y_lim=(-0.01, 0.01)): """ ``` Plots rate of change of the loss function. Parameters: sma - number of batches for simple moving average to smooth out the curve. n_skip_beginning - number of batches to skip on the left. n_skip_end - number of batches to skip on the right. y_lim - limits for the y axis. ``` """ assert sma >= 1 derivatives = [0] * sma for i in range(sma, len(self.lrs)): derivative = (self.losses[i] - self.losses[i - sma]) / sma derivatives.append(derivative) plt.ylabel("rate of loss change") plt.xlabel("learning rate (log scale)") plt.plot(self.lrs[n_skip_beginning:-n_skip_end], derivatives[n_skip_beginning:-n_skip_end]) plt.xscale('log') plt.ylim(y_lim)