Source code for sptm.inference

# -*- coding: utf-8 -*-

"""
    Inferencer functions
"""

#######################################

import numpy as np

import sptm.preprocess

__author__ = "Rochan Avlur Venkat"
__credits__ = ["Anupam Mediratta"]
__license__ = "MIT"
__version__ = "1.0"
__maintainer__ = "Rochan Avlur Venkat"
__email__ = "rochan170543@mechyd.ac.in"

#######################################

[docs]class Inferencer: """Inferncer object to compute probability of next sentence given the current sentence Attributes: model: LDA Mallet model dictionary: Dictionary used to train LDA model """ def __init__(self, model, dictionary): """Inits Inferencer with model and vocabulary dictionary Args: model: sptm.Model object dictionary: sptm.Model.id2word """ self.model = model self.dictionary = dictionary
[docs] def infer(self, query, sentence_ml=2, token_ml=1): """Run an inference on the query NOTE: use the same minimum lengths here as used during preprocessing Args: query: List of reviews sentence_ml: Minimum length of the sentence in words token_ml: Minimum length of the tokens in characters Returns: List of topics with their probability """ query_corpus = sptm.preprocess.Corpus(None, query, None, None) query_corpus.split_sentence(min_len=sentence_ml) query_corpus.tokenize_custom(min_len=token_ml) query_bow = self.dictionary.doc2bow(query_corpus.tokens) return self.model.get_document_topics(query_bow)