Module ktrain.lroptimize.lrfinder

Expand source code
from ..imports import *
from .. import utils as U


class LRFinder:
    """
    ```
    Tracks (and plots) the change in loss of a Keras model as learning rate is gradually increased.
    Used to visually identify a good learning rate, given model and data.
    Reference:
        Original Paper: https://arxiv.org/abs/1506.01186
    ```
    """
    def __init__(self, model, stop_factor=4):
        self.model = model
        self.losses = []
        self.lrs = []
        self.best_loss = 1e9
        self._weightfile = None
        self.stop_factor = stop_factor

        self.avg_loss = 0
        self.batch_num = 0
        self.beta = 0.98

        # stats computed by _compute_stats
        self.mg = None # index of minimum numerical gradient
        self.ml = None # index of minimum loss

    def on_batch_end(self, batch, logs):
        # Log the learning rate
        lr = K.get_value(self.model.optimizer.lr)
        self.lrs.append(lr)

        # Log the loss
        loss = logs['loss']
        self.batch_num +=1
        self.avg_loss = self.beta * self.avg_loss + (1-self.beta) *loss
        smoothed_loss = self.avg_loss / (1 - self.beta**self.batch_num)
        self.losses.append(smoothed_loss)


        # Check whether the loss got too large or NaN
        #print("\n%s:%s\n" % (smoothed_loss, self.stop_factor * self.best_loss))
        if self.batch_num > 1 and smoothed_loss > self.stop_factor * self.best_loss:
            self.model.stop_training = True
            return

        # record best loss
        if smoothed_loss < self.best_loss or self.batch_num==1:
            self.best_loss = smoothed_loss

        # Increase the learning rate for the next batch
        lr *= self.lr_mult
        K.set_value(self.model.optimizer.lr, lr)

        # stop if LR grows too large
        if lr > 10.:
            self.model.stop_training = True
            return


    def find(self, train_data, steps_per_epoch, use_gen=False, class_weight=None,
             start_lr=1e-7, lr_mult=1.01, max_epochs=None, 
             batch_size=U.DEFAULT_BS, workers=1, use_multiprocessing=False, verbose=1):
        """
        ```
        Track loss as learning rate is increased.
        NOTE: batch_size is ignored when train_data is instance of Iterator.
        ```
        """

        # check arguments and initialize
        if train_data is None:
            raise ValueError('train_data is required')
        #U.data_arg_check(train_data=train_data, train_required=True)
        self.lrs = []
        self.losses = []

         # compute steps_per_epoch
        #num_samples = U.nsamples_from_data(train_data)
        #if U.is_iter(train_data):
            #use_gen = True
            #steps_per_epoch = num_samples // train_data.batch_size
        #else:
            #use_gen = False
            #steps_per_epoch = np.ceil(num_samples/batch_size)

        # max_epochs and lr_mult are None, set max_epochs
        # using sample size of 1500 batches
        if max_epochs is None and lr_mult is None:
            max_epochs = int(np.ceil(1500./steps_per_epoch))

        if max_epochs:
            epochs = max_epochs
            num_batches = epochs * steps_per_epoch
            end_lr = 10 if start_lr < 10 else start_lr * 10
            self.lr_mult = (end_lr / start_lr) ** (1 / num_batches)
        else:
            epochs = 1024
            self.lr_mult = lr_mult

        # Save weights into a file
        new_file, self._weightfile = tempfile.mkstemp()
        self.model.save_weights(self._weightfile)

        # Remember the original learning rate
        original_lr = K.get_value(self.model.optimizer.lr)

        # Set the initial learning rate
        K.set_value(self.model.optimizer.lr, start_lr)

        callback = LambdaCallback(on_batch_end=lambda batch, logs: self.on_batch_end(batch, logs))


        if use_gen:
            # *_generator methods are deprecated from TF 2.1.0
            fit_fn = self.model.fit
            fit_fn(train_data, steps_per_epoch=steps_per_epoch, 
                   epochs=epochs, class_weight=class_weight,
                   workers=workers, use_multiprocessing=use_multiprocessing,
                   verbose=verbose,
                   callbacks=[callback])
        else:
            self.model.fit(train_data[0], train_data[1],
                            batch_size=batch_size, epochs=epochs, class_weight=class_weight, 
                            verbose=verbose,
                            callbacks=[callback])


        # Restore the weights to the state before model fitting
        self.model.load_weights(self._weightfile)
        self._weightfile=None

        # Restore the original learning rate
        K.set_value(self.model.optimizer.lr, original_lr)

        # compute stats for numerical estimates of lr
        self._compute_stats()


        return 


    def plot_loss(self, n_skip_beginning=10, n_skip_end=1, suggest=False, return_fig=False):
        """
        ```
        Plots the loss.
        Args:
            n_skip_beginning(int): number of batches to skip on the left.
            n_skip_end(int):  number of batches to skip on the right.
            suggest(bool): will highlight numerical estimate
                           of best lr if True - methods adapted from fastai
            return_fig(bool):  If True, return matplotlib.figure.Figure
        Returns:
          matplotlib.figure.Figure if return_fig else None
        ```
        """
        if not self.find_called: raise ValueError('Please call find first.')
        
        fig, ax = plt.subplots()
        plt.ylabel("loss")
        plt.xlabel("learning rate (log scale)")
        ax.plot(self.lrs[n_skip_beginning:-n_skip_end], self.losses[n_skip_beginning:-n_skip_end])
        plt.xscale('log')

        fig = None
        if suggest:
            # this code was adapted from fastai: https://github.com/fastai/fastai
            if self.mg is None:
                print("Failed to compute the gradients, there might not be enough points.\n" +\
                       "Plot displayed without suggestion.")
            else:
                valley = self.valley(self.lrs, self.losses)
                mg = self.mg
                ml = self.ml
                print('Three possible suggestions for LR from plot:')
                print(f"\tLongest valley (red): {self.lrs[valley]:.2E}")
                print(f"\tMin numerical gradient (purple): {self.lrs[mg]:.2E}")
                print(f"\tMin loss divided by 10 (omitted from plot): {self.lrs[ml]/10:.2E}")
                ax.plot(self.lrs[valley],self.losses[valley], markersize=10,marker='o',color='red')
                ax.plot(self.lrs[mg],self.losses[mg], markersize=10,marker='o',color='purple')
        fig = plt.gcf()
        plt.show()
        if return_fig: return fig
        return


    def valley(self, lrs, losses):
        """
        valley method for LR suggestions:
        https://github.com/fastai/fastai/pull/3377
        """
        n = len(losses)
        max_start, max_end = 0,0

        # find the longest valley
        lds = [1]*n
        for i in range(1,n):
            for j in range(0,i):
                if (losses[i] < losses[j]) and (lds[i] < lds[j] + 1):
                    lds[i] = lds[j] + 1
                if lds[max_end] < lds[i]:
                    max_end = i
                    max_start = max_end - lds[max_end]

        sections = (max_end - max_start) / 3
        idx = max_start + int(sections) + int(sections/2)

        #return lrs[idx], (lrs[idx], losses[idx])
        return idx


    def _compute_stats(self):
        """
        ```
        generates the index associated with minum numerical gradient and the 
        index associated with minum loss.
        Stored as mg and ml respectively
        ```
        """
        # this code was adapted from fastai: https://github.com/fastai/fastai
        self.ml = np.argmin(self.losses)
        try: 
            self.mg = (np.gradient(np.array(self.losses[32:self.ml]))).argmin()
        except Exception as e:
            self.mg = None
            warnings.warn(e)
        return


    def estimate_lr(self):
        """
        ```
        Generates two numerical estimates of lr: 
          1. lr associated with minum numerical gradient (None if gradient computation fails)
          2. lr associated with minimum loss divided by 10
          3. lr associated with longest valley
        Args:
          tuple: (float, float)

          If gradient computation fails, first element of tuple will be None.
        ```
        """
        if not self.find_called(): raise ValueError('Please call find first.')
        lr1 = None
        lr2 = None
        if self.mg is not None:
            lr1 = self.lrs[self.mg]
        lr2 = self.lrs[self.ml]/10
        lr3 = self.lrs[self.valley(self.lrs, self.losses)]

        return (lr1, lr2, lr3)


    def find_called(self):
        return self.ml is not None


        
    def plot_loss_change(self, sma=1, n_skip_beginning=10, n_skip_end=5, y_lim=(-0.01, 0.01)):
        """
        ```
        Plots rate of change of the loss function.
        Parameters:
            sma - number of batches for simple moving average to smooth out the curve.
            n_skip_beginning - number of batches to skip on the left.
            n_skip_end - number of batches to skip on the right.
            y_lim - limits for the y axis.
        ```
        """
        assert sma >= 1
        derivatives = [0] * sma
        for i in range(sma, len(self.lrs)):
            derivative = (self.losses[i] - self.losses[i - sma]) / sma
            derivatives.append(derivative)

        plt.ylabel("rate of loss change")
        plt.xlabel("learning rate (log scale)")
        plt.plot(self.lrs[n_skip_beginning:-n_skip_end], derivatives[n_skip_beginning:-n_skip_end])
        plt.xscale('log')
        plt.ylim(y_lim)

Classes

class LRFinder (model, stop_factor=4)
Tracks (and plots) the change in loss of a Keras model as learning rate is gradually increased.
Used to visually identify a good learning rate, given model and data.
Reference:
    Original Paper: https://arxiv.org/abs/1506.01186
Expand source code
class LRFinder:
    """
    ```
    Tracks (and plots) the change in loss of a Keras model as learning rate is gradually increased.
    Used to visually identify a good learning rate, given model and data.
    Reference:
        Original Paper: https://arxiv.org/abs/1506.01186
    ```
    """
    def __init__(self, model, stop_factor=4):
        self.model = model
        self.losses = []
        self.lrs = []
        self.best_loss = 1e9
        self._weightfile = None
        self.stop_factor = stop_factor

        self.avg_loss = 0
        self.batch_num = 0
        self.beta = 0.98

        # stats computed by _compute_stats
        self.mg = None # index of minimum numerical gradient
        self.ml = None # index of minimum loss

    def on_batch_end(self, batch, logs):
        # Log the learning rate
        lr = K.get_value(self.model.optimizer.lr)
        self.lrs.append(lr)

        # Log the loss
        loss = logs['loss']
        self.batch_num +=1
        self.avg_loss = self.beta * self.avg_loss + (1-self.beta) *loss
        smoothed_loss = self.avg_loss / (1 - self.beta**self.batch_num)
        self.losses.append(smoothed_loss)


        # Check whether the loss got too large or NaN
        #print("\n%s:%s\n" % (smoothed_loss, self.stop_factor * self.best_loss))
        if self.batch_num > 1 and smoothed_loss > self.stop_factor * self.best_loss:
            self.model.stop_training = True
            return

        # record best loss
        if smoothed_loss < self.best_loss or self.batch_num==1:
            self.best_loss = smoothed_loss

        # Increase the learning rate for the next batch
        lr *= self.lr_mult
        K.set_value(self.model.optimizer.lr, lr)

        # stop if LR grows too large
        if lr > 10.:
            self.model.stop_training = True
            return


    def find(self, train_data, steps_per_epoch, use_gen=False, class_weight=None,
             start_lr=1e-7, lr_mult=1.01, max_epochs=None, 
             batch_size=U.DEFAULT_BS, workers=1, use_multiprocessing=False, verbose=1):
        """
        ```
        Track loss as learning rate is increased.
        NOTE: batch_size is ignored when train_data is instance of Iterator.
        ```
        """

        # check arguments and initialize
        if train_data is None:
            raise ValueError('train_data is required')
        #U.data_arg_check(train_data=train_data, train_required=True)
        self.lrs = []
        self.losses = []

         # compute steps_per_epoch
        #num_samples = U.nsamples_from_data(train_data)
        #if U.is_iter(train_data):
            #use_gen = True
            #steps_per_epoch = num_samples // train_data.batch_size
        #else:
            #use_gen = False
            #steps_per_epoch = np.ceil(num_samples/batch_size)

        # max_epochs and lr_mult are None, set max_epochs
        # using sample size of 1500 batches
        if max_epochs is None and lr_mult is None:
            max_epochs = int(np.ceil(1500./steps_per_epoch))

        if max_epochs:
            epochs = max_epochs
            num_batches = epochs * steps_per_epoch
            end_lr = 10 if start_lr < 10 else start_lr * 10
            self.lr_mult = (end_lr / start_lr) ** (1 / num_batches)
        else:
            epochs = 1024
            self.lr_mult = lr_mult

        # Save weights into a file
        new_file, self._weightfile = tempfile.mkstemp()
        self.model.save_weights(self._weightfile)

        # Remember the original learning rate
        original_lr = K.get_value(self.model.optimizer.lr)

        # Set the initial learning rate
        K.set_value(self.model.optimizer.lr, start_lr)

        callback = LambdaCallback(on_batch_end=lambda batch, logs: self.on_batch_end(batch, logs))


        if use_gen:
            # *_generator methods are deprecated from TF 2.1.0
            fit_fn = self.model.fit
            fit_fn(train_data, steps_per_epoch=steps_per_epoch, 
                   epochs=epochs, class_weight=class_weight,
                   workers=workers, use_multiprocessing=use_multiprocessing,
                   verbose=verbose,
                   callbacks=[callback])
        else:
            self.model.fit(train_data[0], train_data[1],
                            batch_size=batch_size, epochs=epochs, class_weight=class_weight, 
                            verbose=verbose,
                            callbacks=[callback])


        # Restore the weights to the state before model fitting
        self.model.load_weights(self._weightfile)
        self._weightfile=None

        # Restore the original learning rate
        K.set_value(self.model.optimizer.lr, original_lr)

        # compute stats for numerical estimates of lr
        self._compute_stats()


        return 


    def plot_loss(self, n_skip_beginning=10, n_skip_end=1, suggest=False, return_fig=False):
        """
        ```
        Plots the loss.
        Args:
            n_skip_beginning(int): number of batches to skip on the left.
            n_skip_end(int):  number of batches to skip on the right.
            suggest(bool): will highlight numerical estimate
                           of best lr if True - methods adapted from fastai
            return_fig(bool):  If True, return matplotlib.figure.Figure
        Returns:
          matplotlib.figure.Figure if return_fig else None
        ```
        """
        if not self.find_called: raise ValueError('Please call find first.')
        
        fig, ax = plt.subplots()
        plt.ylabel("loss")
        plt.xlabel("learning rate (log scale)")
        ax.plot(self.lrs[n_skip_beginning:-n_skip_end], self.losses[n_skip_beginning:-n_skip_end])
        plt.xscale('log')

        fig = None
        if suggest:
            # this code was adapted from fastai: https://github.com/fastai/fastai
            if self.mg is None:
                print("Failed to compute the gradients, there might not be enough points.\n" +\
                       "Plot displayed without suggestion.")
            else:
                valley = self.valley(self.lrs, self.losses)
                mg = self.mg
                ml = self.ml
                print('Three possible suggestions for LR from plot:')
                print(f"\tLongest valley (red): {self.lrs[valley]:.2E}")
                print(f"\tMin numerical gradient (purple): {self.lrs[mg]:.2E}")
                print(f"\tMin loss divided by 10 (omitted from plot): {self.lrs[ml]/10:.2E}")
                ax.plot(self.lrs[valley],self.losses[valley], markersize=10,marker='o',color='red')
                ax.plot(self.lrs[mg],self.losses[mg], markersize=10,marker='o',color='purple')
        fig = plt.gcf()
        plt.show()
        if return_fig: return fig
        return


    def valley(self, lrs, losses):
        """
        valley method for LR suggestions:
        https://github.com/fastai/fastai/pull/3377
        """
        n = len(losses)
        max_start, max_end = 0,0

        # find the longest valley
        lds = [1]*n
        for i in range(1,n):
            for j in range(0,i):
                if (losses[i] < losses[j]) and (lds[i] < lds[j] + 1):
                    lds[i] = lds[j] + 1
                if lds[max_end] < lds[i]:
                    max_end = i
                    max_start = max_end - lds[max_end]

        sections = (max_end - max_start) / 3
        idx = max_start + int(sections) + int(sections/2)

        #return lrs[idx], (lrs[idx], losses[idx])
        return idx


    def _compute_stats(self):
        """
        ```
        generates the index associated with minum numerical gradient and the 
        index associated with minum loss.
        Stored as mg and ml respectively
        ```
        """
        # this code was adapted from fastai: https://github.com/fastai/fastai
        self.ml = np.argmin(self.losses)
        try: 
            self.mg = (np.gradient(np.array(self.losses[32:self.ml]))).argmin()
        except Exception as e:
            self.mg = None
            warnings.warn(e)
        return


    def estimate_lr(self):
        """
        ```
        Generates two numerical estimates of lr: 
          1. lr associated with minum numerical gradient (None if gradient computation fails)
          2. lr associated with minimum loss divided by 10
          3. lr associated with longest valley
        Args:
          tuple: (float, float)

          If gradient computation fails, first element of tuple will be None.
        ```
        """
        if not self.find_called(): raise ValueError('Please call find first.')
        lr1 = None
        lr2 = None
        if self.mg is not None:
            lr1 = self.lrs[self.mg]
        lr2 = self.lrs[self.ml]/10
        lr3 = self.lrs[self.valley(self.lrs, self.losses)]

        return (lr1, lr2, lr3)


    def find_called(self):
        return self.ml is not None


        
    def plot_loss_change(self, sma=1, n_skip_beginning=10, n_skip_end=5, y_lim=(-0.01, 0.01)):
        """
        ```
        Plots rate of change of the loss function.
        Parameters:
            sma - number of batches for simple moving average to smooth out the curve.
            n_skip_beginning - number of batches to skip on the left.
            n_skip_end - number of batches to skip on the right.
            y_lim - limits for the y axis.
        ```
        """
        assert sma >= 1
        derivatives = [0] * sma
        for i in range(sma, len(self.lrs)):
            derivative = (self.losses[i] - self.losses[i - sma]) / sma
            derivatives.append(derivative)

        plt.ylabel("rate of loss change")
        plt.xlabel("learning rate (log scale)")
        plt.plot(self.lrs[n_skip_beginning:-n_skip_end], derivatives[n_skip_beginning:-n_skip_end])
        plt.xscale('log')
        plt.ylim(y_lim)

Methods

def estimate_lr(self)
Generates two numerical estimates of lr: 
  1. lr associated with minum numerical gradient (None if gradient computation fails)
  2. lr associated with minimum loss divided by 10
  3. lr associated with longest valley
Args:
  tuple: (float, float)

  If gradient computation fails, first element of tuple will be None.
Expand source code
def estimate_lr(self):
    """
    ```
    Generates two numerical estimates of lr: 
      1. lr associated with minum numerical gradient (None if gradient computation fails)
      2. lr associated with minimum loss divided by 10
      3. lr associated with longest valley
    Args:
      tuple: (float, float)

      If gradient computation fails, first element of tuple will be None.
    ```
    """
    if not self.find_called(): raise ValueError('Please call find first.')
    lr1 = None
    lr2 = None
    if self.mg is not None:
        lr1 = self.lrs[self.mg]
    lr2 = self.lrs[self.ml]/10
    lr3 = self.lrs[self.valley(self.lrs, self.losses)]

    return (lr1, lr2, lr3)
def find(self, train_data, steps_per_epoch, use_gen=False, class_weight=None, start_lr=1e-07, lr_mult=1.01, max_epochs=None, batch_size=32, workers=1, use_multiprocessing=False, verbose=1)
Track loss as learning rate is increased.
NOTE: batch_size is ignored when train_data is instance of Iterator.
Expand source code
def find(self, train_data, steps_per_epoch, use_gen=False, class_weight=None,
         start_lr=1e-7, lr_mult=1.01, max_epochs=None, 
         batch_size=U.DEFAULT_BS, workers=1, use_multiprocessing=False, verbose=1):
    """
    ```
    Track loss as learning rate is increased.
    NOTE: batch_size is ignored when train_data is instance of Iterator.
    ```
    """

    # check arguments and initialize
    if train_data is None:
        raise ValueError('train_data is required')
    #U.data_arg_check(train_data=train_data, train_required=True)
    self.lrs = []
    self.losses = []

     # compute steps_per_epoch
    #num_samples = U.nsamples_from_data(train_data)
    #if U.is_iter(train_data):
        #use_gen = True
        #steps_per_epoch = num_samples // train_data.batch_size
    #else:
        #use_gen = False
        #steps_per_epoch = np.ceil(num_samples/batch_size)

    # max_epochs and lr_mult are None, set max_epochs
    # using sample size of 1500 batches
    if max_epochs is None and lr_mult is None:
        max_epochs = int(np.ceil(1500./steps_per_epoch))

    if max_epochs:
        epochs = max_epochs
        num_batches = epochs * steps_per_epoch
        end_lr = 10 if start_lr < 10 else start_lr * 10
        self.lr_mult = (end_lr / start_lr) ** (1 / num_batches)
    else:
        epochs = 1024
        self.lr_mult = lr_mult

    # Save weights into a file
    new_file, self._weightfile = tempfile.mkstemp()
    self.model.save_weights(self._weightfile)

    # Remember the original learning rate
    original_lr = K.get_value(self.model.optimizer.lr)

    # Set the initial learning rate
    K.set_value(self.model.optimizer.lr, start_lr)

    callback = LambdaCallback(on_batch_end=lambda batch, logs: self.on_batch_end(batch, logs))


    if use_gen:
        # *_generator methods are deprecated from TF 2.1.0
        fit_fn = self.model.fit
        fit_fn(train_data, steps_per_epoch=steps_per_epoch, 
               epochs=epochs, class_weight=class_weight,
               workers=workers, use_multiprocessing=use_multiprocessing,
               verbose=verbose,
               callbacks=[callback])
    else:
        self.model.fit(train_data[0], train_data[1],
                        batch_size=batch_size, epochs=epochs, class_weight=class_weight, 
                        verbose=verbose,
                        callbacks=[callback])


    # Restore the weights to the state before model fitting
    self.model.load_weights(self._weightfile)
    self._weightfile=None

    # Restore the original learning rate
    K.set_value(self.model.optimizer.lr, original_lr)

    # compute stats for numerical estimates of lr
    self._compute_stats()


    return 
def find_called(self)
Expand source code
def find_called(self):
    return self.ml is not None
def on_batch_end(self, batch, logs)
Expand source code
def on_batch_end(self, batch, logs):
    # Log the learning rate
    lr = K.get_value(self.model.optimizer.lr)
    self.lrs.append(lr)

    # Log the loss
    loss = logs['loss']
    self.batch_num +=1
    self.avg_loss = self.beta * self.avg_loss + (1-self.beta) *loss
    smoothed_loss = self.avg_loss / (1 - self.beta**self.batch_num)
    self.losses.append(smoothed_loss)


    # Check whether the loss got too large or NaN
    #print("\n%s:%s\n" % (smoothed_loss, self.stop_factor * self.best_loss))
    if self.batch_num > 1 and smoothed_loss > self.stop_factor * self.best_loss:
        self.model.stop_training = True
        return

    # record best loss
    if smoothed_loss < self.best_loss or self.batch_num==1:
        self.best_loss = smoothed_loss

    # Increase the learning rate for the next batch
    lr *= self.lr_mult
    K.set_value(self.model.optimizer.lr, lr)

    # stop if LR grows too large
    if lr > 10.:
        self.model.stop_training = True
        return
def plot_loss(self, n_skip_beginning=10, n_skip_end=1, suggest=False, return_fig=False)
Plots the loss.
Args:
    n_skip_beginning(int): number of batches to skip on the left.
    n_skip_end(int):  number of batches to skip on the right.
    suggest(bool): will highlight numerical estimate
                   of best lr if True - methods adapted from fastai
    return_fig(bool):  If True, return matplotlib.figure.Figure
Returns:
  matplotlib.figure.Figure if return_fig else None
Expand source code
def plot_loss(self, n_skip_beginning=10, n_skip_end=1, suggest=False, return_fig=False):
    """
    ```
    Plots the loss.
    Args:
        n_skip_beginning(int): number of batches to skip on the left.
        n_skip_end(int):  number of batches to skip on the right.
        suggest(bool): will highlight numerical estimate
                       of best lr if True - methods adapted from fastai
        return_fig(bool):  If True, return matplotlib.figure.Figure
    Returns:
      matplotlib.figure.Figure if return_fig else None
    ```
    """
    if not self.find_called: raise ValueError('Please call find first.')
    
    fig, ax = plt.subplots()
    plt.ylabel("loss")
    plt.xlabel("learning rate (log scale)")
    ax.plot(self.lrs[n_skip_beginning:-n_skip_end], self.losses[n_skip_beginning:-n_skip_end])
    plt.xscale('log')

    fig = None
    if suggest:
        # this code was adapted from fastai: https://github.com/fastai/fastai
        if self.mg is None:
            print("Failed to compute the gradients, there might not be enough points.\n" +\
                   "Plot displayed without suggestion.")
        else:
            valley = self.valley(self.lrs, self.losses)
            mg = self.mg
            ml = self.ml
            print('Three possible suggestions for LR from plot:')
            print(f"\tLongest valley (red): {self.lrs[valley]:.2E}")
            print(f"\tMin numerical gradient (purple): {self.lrs[mg]:.2E}")
            print(f"\tMin loss divided by 10 (omitted from plot): {self.lrs[ml]/10:.2E}")
            ax.plot(self.lrs[valley],self.losses[valley], markersize=10,marker='o',color='red')
            ax.plot(self.lrs[mg],self.losses[mg], markersize=10,marker='o',color='purple')
    fig = plt.gcf()
    plt.show()
    if return_fig: return fig
    return
def plot_loss_change(self, sma=1, n_skip_beginning=10, n_skip_end=5, y_lim=(-0.01, 0.01))
Plots rate of change of the loss function.
Parameters:
    sma - number of batches for simple moving average to smooth out the curve.
    n_skip_beginning - number of batches to skip on the left.
    n_skip_end - number of batches to skip on the right.
    y_lim - limits for the y axis.
Expand source code
def plot_loss_change(self, sma=1, n_skip_beginning=10, n_skip_end=5, y_lim=(-0.01, 0.01)):
    """
    ```
    Plots rate of change of the loss function.
    Parameters:
        sma - number of batches for simple moving average to smooth out the curve.
        n_skip_beginning - number of batches to skip on the left.
        n_skip_end - number of batches to skip on the right.
        y_lim - limits for the y axis.
    ```
    """
    assert sma >= 1
    derivatives = [0] * sma
    for i in range(sma, len(self.lrs)):
        derivative = (self.losses[i] - self.losses[i - sma]) / sma
        derivatives.append(derivative)

    plt.ylabel("rate of loss change")
    plt.xlabel("learning rate (log scale)")
    plt.plot(self.lrs[n_skip_beginning:-n_skip_end], derivatives[n_skip_beginning:-n_skip_end])
    plt.xscale('log')
    plt.ylim(y_lim)
def valley(self, lrs, losses)

valley method for LR suggestions: https://github.com/fastai/fastai/pull/3377

Expand source code
def valley(self, lrs, losses):
    """
    valley method for LR suggestions:
    https://github.com/fastai/fastai/pull/3377
    """
    n = len(losses)
    max_start, max_end = 0,0

    # find the longest valley
    lds = [1]*n
    for i in range(1,n):
        for j in range(0,i):
            if (losses[i] < losses[j]) and (lds[i] < lds[j] + 1):
                lds[i] = lds[j] + 1
            if lds[max_end] < lds[i]:
                max_end = i
                max_start = max_end - lds[max_end]

    sections = (max_end - max_start) / 3
    idx = max_start + int(sections) + int(sections/2)

    #return lrs[idx], (lrs[idx], losses[idx])
    return idx