# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
import re
from typing import Dict
from nlp_architect.data.cdc_resources.data_types.wiki.wikipedia_page_extracted_relations import \
WikipediaPageExtractedRelations, DISAMBIGUATION_TITLE
from nlp_architect.utils.string_utils import StringUtils
[docs]class WikipediaPage(object):
def __init__(self, orig_phrase: str = None, orig_phrase_norm: str = None,
wiki_title: str = None, wiki_title_norm: str = None,
score: int = 0, pageid: int = 0, description: str = None,
relations: WikipediaPageExtractedRelations = None) -> None:
"""
Object represent a Wikipedia Page and extracted fields.
Args:
orig_phrase (str): original search phrase
orig_phrase_norm (str): original search phrase normalized
wiki_title (str): page title
wiki_title_norm (str): page title normalized
score (int): score for getting wiki_title from orig_phrase
pageid (int): the unique page identifier
description (str, optional): the page description
relations (WikipediaPageExtractedRelations): Object that represent all
extracted Wikipedia relations
"""
self.orig_phrase = orig_phrase
if orig_phrase_norm is None:
self.orig_phrase_norm = StringUtils.normalize_str(orig_phrase)
else:
self.orig_phrase_norm = orig_phrase_norm
self.wiki_title = wiki_title.replace(DISAMBIGUATION_TITLE, '')
if wiki_title_norm is None:
self.wiki_title_norm = StringUtils.normalize_str(wiki_title)
else:
self.wiki_title_norm = wiki_title_norm
self.score = score
self.pageid = int(pageid)
self.description = description
self.relations = relations
[docs] def toJson(self) -> Dict:
result_dict = {}
result_dict['orig_phrase'] = self.orig_phrase
result_dict['orig_phrase_norm'] = self.orig_phrase_norm
result_dict['wiki_title'] = self.wiki_title
result_dict['wiki_title_norm'] = self.wiki_title_norm
result_dict['score'] = self.score
result_dict['pageid'] = self.pageid
result_dict['description'] = self.description
result_dict['relations'] = self.relations.toJson()
return result_dict
def __eq__(self, other):
return self.orig_phrase == other.orig_phrase and self.wiki_title == other.wiki_title and \
self.pageid == other.pageid
def __hash__(self):
return hash(self.orig_phrase) + hash(self.pageid) + hash(self.wiki_title)
def __str__(self) -> str:
result_str = ''
try:
title_strip = re.sub(u'(\u2018|\u2019)', '\'', self.orig_phrase)
wiki_title_strip = re.sub(u'(\u2018|\u2019)', '\'', self.wiki_title)
result_str = str(title_strip) + ', ' + str(wiki_title_strip) + ', ' + \
str(self.score) + ', ' + str(self.pageid) + ', ' + \
str(self.description) + ', ' + str(self.relations)
except Exception:
result_str = 'error in to_string()'
return result_str