# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from nlp_architect.nn.tensorflow.python.keras.layers.crf import CRF
from nlp_architect.nn.tensorflow.python.keras.utils import load_model, save_model
[docs]class SequenceTagger(object):
"""
A sequence tagging model for POS and Chunks written in Tensorflow (and Keras) based on the
paper 'Deep multi-task learning with low level tasks supervised at lower layers'.
The model has 3 Bi-LSTM layers and outputs POS and Chunk tags.
Args:
use_cudnn (bool, optional): use GPU based model (CUDNNA cells)
"""
def __init__(self, use_cudnn=False):
self.vocabulary_size = None
self.num_pos_labels = None
self.num_chunk_labels = None
self.char_vocab_size = None
self.feature_size = None
self.dropout = None
self.max_word_len = None
self.classifier = None
self.optimizer = None
self.model = None
self.use_cudnn = use_cudnn
[docs] def build(self,
vocabulary_size,
num_pos_labels,
num_chunk_labels,
char_vocab_size=None,
max_word_len=25,
feature_size=100,
dropout=0.5,
classifier='softmax',
optimizer=None):
"""
Build a chunker/POS model
Args:
vocabulary_size (int): the size of the input vocabulary
num_pos_labels (int): the size of of POS labels
num_chunk_labels (int): the sie of chunk labels
char_vocab_size (int, optional): character vocabulary size
max_word_len (int, optional): max characters in a word
feature_size (int, optional): feature size - determines the embedding/LSTM layer \
hidden state size
dropout (float, optional): dropout rate
classifier (str, optional): classifier layer, 'softmax' for softmax or 'crf' for \
conditional random fields classifier. default is 'softmax'.
optimizer (tensorflow.python.training.optimizer.Optimizer, optional): optimizer, if \
None will use default SGD (paper setup)
"""
self.vocabulary_size = vocabulary_size
self.char_vocab_size = char_vocab_size
self.num_pos_labels = num_pos_labels
self.num_chunk_labels = num_chunk_labels
self.max_word_len = max_word_len
self.feature_size = feature_size
self.dropout = dropout
self.classifier = classifier
word_emb_layer = tf.keras.layers.Embedding(self.vocabulary_size, self.feature_size,
name='embedding', mask_zero=False)
word_input = tf.keras.layers.Input(shape=(None,))
word_embedding = word_emb_layer(word_input)
input_src = word_input
features = word_embedding
# add char input if present
if self.char_vocab_size is not None:
char_input = tf.keras.layers.Input(shape=(None, self.max_word_len))
char_emb_layer = tf.keras.layers.Embedding(self.char_vocab_size, 30,
name='char_embedding',
mask_zero=False)
char_embedding = char_emb_layer(char_input)
char_embedding = tf.keras.layers.TimeDistributed(
tf.keras.layers.Conv1D(30, 3, padding='same'))(char_embedding)
char_embedding = tf.keras.layers.TimeDistributed(tf.keras.layers.GlobalMaxPooling1D())(
char_embedding)
input_src = [input_src, char_input]
features = tf.keras.layers.concatenate([word_embedding, char_embedding])
rnn_layer_1 = tf.keras.layers.Bidirectional(self._rnn_cell(return_sequences=True))(
features)
rnn_layer_2 = tf.keras.layers.Bidirectional(self._rnn_cell(return_sequences=True))(
rnn_layer_1)
rnn_layer_3 = tf.keras.layers.Bidirectional(self._rnn_cell(return_sequences=True))(
rnn_layer_2)
# outputs
pos_out = tf.keras.layers.Dense(self.num_pos_labels, activation='softmax',
name='pos_output')(rnn_layer_1)
losses = {'pos_output': 'categorical_crossentropy'}
metrics = {'pos_output': 'categorical_accuracy'}
if 'crf' in self.classifier:
with tf.device('/cpu:0'):
chunk_crf = CRF(self.num_chunk_labels, name='chunk_crf')
rnn_layer_3_dense = tf.keras.layers.Dense(self.num_chunk_labels)(
tf.keras.layers.Dropout(self.dropout)(rnn_layer_3))
chunks_out = chunk_crf(rnn_layer_3_dense)
losses['chunk_crf'] = chunk_crf.loss
metrics['chunk_crf'] = chunk_crf.viterbi_accuracy
else:
chunks_out = tf.keras.layers.TimeDistributed(
tf.keras.layers.Dense(self.num_chunk_labels,
activation='softmax'),
name='chunk_out')(rnn_layer_3)
losses['chunk_out'] = 'categorical_crossentropy'
metrics['chunk_out'] = 'categorical_accuracy'
model = tf.keras.Model(input_src, [pos_out, chunks_out])
if optimizer is None:
self.optimizer = tf.keras.optimizers.Adam(0.001, clipnorm=5.)
else:
self.optimizer = optimizer
model.compile(optimizer=self.optimizer,
loss=losses,
metrics=metrics)
self.model = model
[docs] def load_embedding_weights(self, weights):
"""
Load word embedding weights into the model embedding layer
Args:
weights (numpy.ndarray): 2D matrix of word weights
"""
assert self.model is not None, 'Cannot assign weights, apply build() before trying to ' \
'loading embedding weights '
emb_layer = self.model.get_layer(name='embedding')
assert emb_layer.output_dim == weights.shape[1], 'embedding vectors shape mismatch'
emb_layer.set_weights([weights])
def _rnn_cell(self, **kwargs):
if self.use_cudnn:
rnn_cell = tf.keras.layers.CuDNNLSTM(self.feature_size, **kwargs)
else:
rnn_cell = tf.keras.layers.LSTM(self.feature_size, **kwargs)
return rnn_cell
[docs] def fit(self, x, y, batch_size=1, epochs=1, validation_data=None, callbacks=None):
"""
Fit provided X and Y on built model
Args:
x: x samples
y: y samples
batch_size (int, optional): batch size per sample
epochs (int, optional): number of epochs to run before ending training process
validation_data (optional): x and y samples to validate at the end of the epoch
callbacks (optional): additional callbacks to run with fitting
"""
self.model.fit(x=x, y=y, batch_size=batch_size, epochs=epochs,
validation_data=validation_data, callbacks=callbacks)
[docs] def predict(self, x, batch_size=1):
"""
Predict labels given x.
Args:
x: samples for inference
batch_size (int, optional): forward pass batch size
Returns:
tuple of numpy arrays of pos and chunk labels
"""
return self.model.predict(x=x, batch_size=batch_size)
[docs] def save(self, filepath):
"""
Save the model to disk
Args:
filepath (str): file name to save model
"""
topology = {k: v for k, v in self.__dict__.items()}
topology.pop('model')
topology.pop('optimizer')
topology.pop('use_cudnn')
save_model(self.model, topology, filepath)
[docs] def load(self, filepath):
"""
Load model from disk
Args:
filepath (str): file name of model
"""
load_model(filepath, self)
[docs]class SequenceChunker(SequenceTagger):
"""
A sequence Chunker model written in Tensorflow (and Keras) based SequenceTagger model.
The model uses only the chunking output of the model.
"""
[docs] def predict(self, x, batch_size=1):
"""
Predict labels given x.
Args:
x: samples for inference
batch_size (int, optional): forward pass batch size
Returns:
tuple of numpy arrays of chunk labels
"""
model = tf.keras.Model(self.model.input, self.model.output[-1])
return model.predict(x=x, batch_size=batch_size)
[docs]class SequencePOSTagger(SequenceTagger):
"""
A sequence POS tagger model written in Tensorflow (and Keras) based SequenceTagger model.
The model uses only the chunking output of the model.
"""
[docs] def predict(self, x, batch_size=1):
"""
Predict labels given x.
Args:
x: samples for inference
batch_size (int, optional): forward pass batch size
Returns:
tuple of numpy arrays of POS labels
"""
model = tf.keras.Model(self.model.input, self.model.output[0])
return model.predict(x=x, batch_size=batch_size)