Source code for visions.application.summaries.series.text_summary

from collections import Counter
from typing import Dict

import pandas as pd

from tangled_up_in_unicode import category, category_long, script, block, block_abbr


def get_character_counts(series) -> Dict[str, int]:
    """Function to return the character counts

    Args:
        series: the Series to process

    Returns:
        A dict with character counts
    """
    return dict(Counter(series.str.cat()))


[docs]def text_summary(series: pd.Series) -> dict: """ Args: series: series to summarize Returns: """ # Distribution of length summary = {"length": series.map(lambda x: len(str(x))).value_counts().to_dict()} # Unicode Character Summaries (category and script name) character_counts = get_character_counts(series) summary["category_short_values"] = { key: category(key) for key in character_counts.keys() } summary["category_alias_values"] = { key: category_long(value) for key, value in summary["category_short_values"].items() } summary["script_values"] = {key: script(key) for key in character_counts.keys()} summary["block_values"] = {key: block(key) for key in character_counts.keys()} summary["block_alias_values"] = { key: block_abbr(value) for key, value in summary["block_values"].items() } return summary