Package ktrain
Expand source code
from .version import __version__
from . import imports as I
from .core import ArrayLearner, GenLearner, get_predictor, load_predictor, release_gpu_memory
from .vision.learner import ImageClassLearner
from .text.learner import BERTTextClassLearner, TransformerTextClassLearner
from .text.ner.learner import NERLearner
from .graph.learner import NodeClassLearner, LinkPredLearner
from . import utils as U
__all__ = ['get_learner', 'get_predictor', 'load_predictor', 'release_gpu_memory' ]
def get_learner(model, train_data=None, val_data=None,
batch_size=U.DEFAULT_BS, eval_batch_size=U.DEFAULT_BS,
workers=1, use_multiprocessing=False):
"""
```
Returns a Learner instance that can be used to tune and train Keras models.
model (Model): A compiled instance of keras.engine.training.Model
train_data (tuple or generator): Either a:
1) tuple of (x_train, y_train), where x_train and
y_train are numpy.ndarrays or
2) Iterator
val_data (tuple or generator): Either a:
1) tuple of (x_test, y_test), where x_testand
y_test are numpy.ndarrays or
2) Iterator
Note: Should be same type as train_data.
batch_size (int): Batch size to use in training. default:32
eval_batch_size(int): batch size used by learner.predict
only applies to validaton data during training if
val_data is instance of utils.Sequence.
default:32
workers (int): number of cpu processes used to load data.
This is ignored unless train_data/val_data is an instance of
tf.keras.preprocessing.image.DirectoryIterator or tf.keras.preprocessing.image.DataFrameIterator.
use_multiprocessing(bool): whether or not to use multiprocessing for workers
This is ignored unless train_data/val_data is an instance of
tf.keras.preprocessing.image.DirectoryIterator or tf.keras.preprocessing.image.DataFrameIterator.
```
"""
# check arguments
if not isinstance(model, I.keras.Model):
raise ValueError('model must be of instance Model')
U.data_arg_check(train_data=train_data, val_data=val_data)
if type(workers) != type(1) or workers < 1:
workers =1
# check for NumpyArrayIterator
if train_data and not U.ondisk(train_data):
if workers > 1 and not use_multiprocessing:
use_multiprocessing = True
wrn_msg = 'Changed use_multiprocessing to True because NumpyArrayIterator with workers>1'
wrn_msg +=' is slow when use_multiprocessing=False.'
wrn_msg += ' If you experience issues with this, please set workers=1 and use_multiprocessing=False.'
I.warnings.warn(wrn_msg)
# verify BERT
is_bert = U.bert_data_tuple(train_data)
if is_bert:
maxlen = U.shape_from_data(train_data)[1]
msg = """For a GPU with 12GB of RAM, the following maxima apply:
sequence len=64, max_batch_size=64
sequence len=128, max_batch_size=32
sequence len=256, max_batch_size=16
sequence len=320, max_batch_size=14
sequence len=384, max_batch_size=12
sequence len=512, max_batch_size=6
You've exceeded these limits.
If using a GPU with <=12GB of memory, you may run out of memory during training.
If necessary, adjust sequence length or batch size based on above."""
wrn = False
if maxlen > 64 and batch_size > 64:
wrn=True
elif maxlen > 128 and batch_size>32:
wrn=True
elif maxlen>256 and batch_size>16:
wrn=True
elif maxlen>320 and batch_size>14:
wrn=True
elif maxlen>384 and batch_size>12:
wrn=True
elif maxlen > 512 and batch_size>6:
wrn=True
if wrn: I.warnings.warn(msg)
# return the appropriate trainer
if U.is_iter(train_data):
if U.is_ner(model=model, data=train_data):
learner = NERLearner
elif U.is_imageclass_from_data(train_data):
learner = ImageClassLearner
elif U.is_nodeclass(data=train_data):
learner = NodeClassLearner
elif U.is_nodeclass(data=train_data):
learner = LinkPredLearner
elif U.is_huggingface(data=train_data):
learner = TransformerTextClassLearner
else:
learner = GenLearner
else:
if is_bert:
learner = BERTTextClassLearner
else: # vanilla text classifiers use standard ArrayLearners
learner = ArrayLearner
return learner(model, train_data=train_data, val_data=val_data,
batch_size=batch_size, eval_batch_size=eval_batch_size,
workers=workers, use_multiprocessing=use_multiprocessing)
# keys
# currently_unsupported: unsupported or disabled features (e.g., xai graph neural networks have not been implemented)
# dep_fix: a fix to address a problem in a dependency
# TODO: things to change
# NOTES: As of 0.30.x, TensorFlow is optional and no longer forced to allow for use of pretrained PyTorch or sklearn models.
# In core, lroptimize imports were localized to allow for optional TF
# References to ktrain.dataset (keras.utils) and anago (keras.Callback) were also localized (from module-level) for optional TF
Sub-modules
ktrain.core
ktrain.dataset
ktrain.graph
ktrain.imports
ktrain.lroptimize
ktrain.models
ktrain.predictor
ktrain.preprocessor
ktrain.tabular
ktrain.text
ktrain.torch_base
ktrain.utils
ktrain.version
ktrain.vision
Functions
def get_learner(model, train_data=None, val_data=None, batch_size=32, eval_batch_size=32, workers=1, use_multiprocessing=False)
-
Returns a Learner instance that can be used to tune and train Keras models. model (Model): A compiled instance of keras.engine.training.Model train_data (tuple or generator): Either a: 1) tuple of (x_train, y_train), where x_train and y_train are numpy.ndarrays or 2) Iterator val_data (tuple or generator): Either a: 1) tuple of (x_test, y_test), where x_testand y_test are numpy.ndarrays or 2) Iterator Note: Should be same type as train_data. batch_size (int): Batch size to use in training. default:32 eval_batch_size(int): batch size used by learner.predict only applies to validaton data during training if val_data is instance of utils.Sequence. default:32 workers (int): number of cpu processes used to load data. This is ignored unless train_data/val_data is an instance of tf.keras.preprocessing.image.DirectoryIterator or tf.keras.preprocessing.image.DataFrameIterator. use_multiprocessing(bool): whether or not to use multiprocessing for workers This is ignored unless train_data/val_data is an instance of tf.keras.preprocessing.image.DirectoryIterator or tf.keras.preprocessing.image.DataFrameIterator.
Expand source code
def get_learner(model, train_data=None, val_data=None, batch_size=U.DEFAULT_BS, eval_batch_size=U.DEFAULT_BS, workers=1, use_multiprocessing=False): """ ``` Returns a Learner instance that can be used to tune and train Keras models. model (Model): A compiled instance of keras.engine.training.Model train_data (tuple or generator): Either a: 1) tuple of (x_train, y_train), where x_train and y_train are numpy.ndarrays or 2) Iterator val_data (tuple or generator): Either a: 1) tuple of (x_test, y_test), where x_testand y_test are numpy.ndarrays or 2) Iterator Note: Should be same type as train_data. batch_size (int): Batch size to use in training. default:32 eval_batch_size(int): batch size used by learner.predict only applies to validaton data during training if val_data is instance of utils.Sequence. default:32 workers (int): number of cpu processes used to load data. This is ignored unless train_data/val_data is an instance of tf.keras.preprocessing.image.DirectoryIterator or tf.keras.preprocessing.image.DataFrameIterator. use_multiprocessing(bool): whether or not to use multiprocessing for workers This is ignored unless train_data/val_data is an instance of tf.keras.preprocessing.image.DirectoryIterator or tf.keras.preprocessing.image.DataFrameIterator. ``` """ # check arguments if not isinstance(model, I.keras.Model): raise ValueError('model must be of instance Model') U.data_arg_check(train_data=train_data, val_data=val_data) if type(workers) != type(1) or workers < 1: workers =1 # check for NumpyArrayIterator if train_data and not U.ondisk(train_data): if workers > 1 and not use_multiprocessing: use_multiprocessing = True wrn_msg = 'Changed use_multiprocessing to True because NumpyArrayIterator with workers>1' wrn_msg +=' is slow when use_multiprocessing=False.' wrn_msg += ' If you experience issues with this, please set workers=1 and use_multiprocessing=False.' I.warnings.warn(wrn_msg) # verify BERT is_bert = U.bert_data_tuple(train_data) if is_bert: maxlen = U.shape_from_data(train_data)[1] msg = """For a GPU with 12GB of RAM, the following maxima apply: sequence len=64, max_batch_size=64 sequence len=128, max_batch_size=32 sequence len=256, max_batch_size=16 sequence len=320, max_batch_size=14 sequence len=384, max_batch_size=12 sequence len=512, max_batch_size=6 You've exceeded these limits. If using a GPU with <=12GB of memory, you may run out of memory during training. If necessary, adjust sequence length or batch size based on above.""" wrn = False if maxlen > 64 and batch_size > 64: wrn=True elif maxlen > 128 and batch_size>32: wrn=True elif maxlen>256 and batch_size>16: wrn=True elif maxlen>320 and batch_size>14: wrn=True elif maxlen>384 and batch_size>12: wrn=True elif maxlen > 512 and batch_size>6: wrn=True if wrn: I.warnings.warn(msg) # return the appropriate trainer if U.is_iter(train_data): if U.is_ner(model=model, data=train_data): learner = NERLearner elif U.is_imageclass_from_data(train_data): learner = ImageClassLearner elif U.is_nodeclass(data=train_data): learner = NodeClassLearner elif U.is_nodeclass(data=train_data): learner = LinkPredLearner elif U.is_huggingface(data=train_data): learner = TransformerTextClassLearner else: learner = GenLearner else: if is_bert: learner = BERTTextClassLearner else: # vanilla text classifiers use standard ArrayLearners learner = ArrayLearner return learner(model, train_data=train_data, val_data=val_data, batch_size=batch_size, eval_batch_size=eval_batch_size, workers=workers, use_multiprocessing=use_multiprocessing)
def get_predictor(model, preproc, batch_size=32)
-
Returns a Predictor instance that can be used to make predictions on unlabeled examples. Can be saved to disk and reloaded as part of a larger application. Args model (Model): A compiled instance of keras.engine.training.Model preproc(Preprocessor): An instance of TextPreprocessor,ImagePreprocessor, or NERPreprocessor. These instances are returned from the data loading functions in the ktrain vision and text modules: ktrain.vision.images_from_folder ktrain.vision.images_from_csv ktrain.vision.images_from_array ktrain.text.texts_from_folder ktrain.text.texts_from_csv ktrain.text.ner.entities_from_csv batch_size(int): batch size to use. default:32
Expand source code
def get_predictor(model, preproc, batch_size=U.DEFAULT_BS): """ ``` Returns a Predictor instance that can be used to make predictions on unlabeled examples. Can be saved to disk and reloaded as part of a larger application. Args model (Model): A compiled instance of keras.engine.training.Model preproc(Preprocessor): An instance of TextPreprocessor,ImagePreprocessor, or NERPreprocessor. These instances are returned from the data loading functions in the ktrain vision and text modules: ktrain.vision.images_from_folder ktrain.vision.images_from_csv ktrain.vision.images_from_array ktrain.text.texts_from_folder ktrain.text.texts_from_csv ktrain.text.ner.entities_from_csv batch_size(int): batch size to use. default:32 ``` """ # check arguments if not isinstance(model, keras.Model): raise ValueError('model must be of instance keras.Model') if not isinstance(preproc, (ImagePreprocessor,TextPreprocessor, NERPreprocessor, NodePreprocessor, LinkPreprocessor, TabularPreprocessor)): raise ValueError('preproc must be instance of ktrain.preprocessor.Preprocessor') if isinstance(preproc, ImagePreprocessor): return ImagePredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, TextPreprocessor): #elif type(preproc).__name__ == 'TextPreprocessor': return TextPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, NERPreprocessor): return NERPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, NodePreprocessor): return NodePredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, LinkPreprocessor): return LinkPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, TabularPreprocessor): return TabularPredictor(model, preproc, batch_size=batch_size) else: raise Exception('preproc of type %s not currently supported' % (type(preproc)))
def load_predictor(fpath, batch_size=32, custom_objects=None)
-
Loads a previously saved Predictor instance Args fpath(str): predictor path name (value supplied to predictor.save) From v0.16.x, this is always the path to a folder. Pre-v0.16.x, this is the base name used to save model and .preproc instance. batch_size(int): batch size to use for predictions. default:32 custom_objects(dict): custom objects required to load model. This is useful if you compiled the model with a custom loss function, for example. For models included with ktrain as is, this is populated automatically and can be disregarded.
Expand source code
def load_predictor(fpath, batch_size=U.DEFAULT_BS, custom_objects=None): """ ``` Loads a previously saved Predictor instance Args fpath(str): predictor path name (value supplied to predictor.save) From v0.16.x, this is always the path to a folder. Pre-v0.16.x, this is the base name used to save model and .preproc instance. batch_size(int): batch size to use for predictions. default:32 custom_objects(dict): custom objects required to load model. This is useful if you compiled the model with a custom loss function, for example. For models included with ktrain as is, this is populated automatically and can be disregarded. ``` """ # load the preprocessor preproc = None try: preproc_name = os.path.join(fpath, U.PREPROC_NAME) with open(preproc_name, 'rb') as f: preproc = pickle.load(f) except: try: preproc_name = fpath +'.preproc' #warnings.warn('could not load .preproc file as %s - attempting to load as %s' % (os.path.join(fpath, U.PREPROC_NAME), preproc_name)) with open(preproc_name, 'rb') as f: preproc = pickle.load(f) except: raise Exception('Failed to load .preproc file in either the post v0.16.x loction (%s) or pre v0.16.x location (%s)' % (os.path.join(fpath, U.PREPROC_NAME), fpath+'.preproc')) # load the model model = _load_model(fpath, preproc=preproc, custom_objects=custom_objects) # preprocessing functions in ImageDataGenerators are not pickable # so, we must reconstruct if hasattr(preproc, 'datagen') and hasattr(preproc.datagen, 'ktrain_preproc'): preproc_name = preproc.datagen.ktrain_preproc if preproc_name == 'resnet50': preproc.datagen.preprocessing_function = keras.applications.resnet50.preprocess_input elif preproc_name == 'mobilenet': preproc.datagen.preprocessing_function = keras.applications.mobilenet.preprocess_input elif preproc_name == 'mobilenetv3': preproc.datagen.preprocessing_function = keras.applications.mobilenet_v3.preprocess_input elif preproc_name == 'inception': preproc.datagen.preprocessing_function = keras.applications.inception_v3.preprocess_input elif preproc_name == 'efficientnet': preproc.datagen.preprocessing_function = keras.applications.efficientnet.preprocess_input else: raise Exception('Uknown preprocessing_function name: %s' % (preproc_name)) # return the appropriate predictor if not isinstance(model, keras.Model): raise ValueError('model must be of instance keras.Model') if not isinstance(preproc, (ImagePreprocessor, TextPreprocessor, NERPreprocessor, NodePreprocessor, LinkPreprocessor, TabularPreprocessor)): raise ValueError('preproc must be instance of ktrain.preprocessor.Preprocessor') if isinstance(preproc, ImagePreprocessor): return ImagePredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, TextPreprocessor): return TextPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, NERPreprocessor): return NERPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, NodePreprocessor): return NodePredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, LinkPreprocessor): return LinkPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, TabularPreprocessor): return TabularPredictor(model, preproc, batch_size=batch_size) else: raise Exception('preprocessor not currently supported')
def release_gpu_memory(device=0)
-
Relase GPU memory allocated by Tensorflow Source: https://stackoverflow.com/questions/51005147/keras-release-memory-after-finish-training-process
Expand source code
def release_gpu_memory(device=0): """ ``` Relase GPU memory allocated by Tensorflow Source: https://stackoverflow.com/questions/51005147/keras-release-memory-after-finish-training-process ``` """ from numba import cuda K.clear_session() cuda.select_device(device) cuda.close() return