Source code for visions.application.summaries.series.numerical_summary

import numpy as np
import pandas as pd


[docs]def numerical_summary(series: pd.Series) -> dict: """ Args: series: series to summarize Returns: """ aggregates = [ "mean", "std", "var", "max", "min", "median", "kurt", "skew", "sum", "mad", ] summary = series.agg(aggregates).to_dict() quantiles = [0.05, 0.25, 0.5, 0.75, 0.95] for percentile, value in series.quantile(quantiles).to_dict().items(): summary["quantile_{:d}".format(int(percentile * 100))] = value summary["iqr"] = summary["quantile_75"] - summary["quantile_25"] summary["range"] = summary["max"] - summary["min"] summary["cv"] = summary["std"] / summary["mean"] if summary["mean"] else np.NaN summary["monotonic_increase"] = series.is_monotonic_increasing summary["monotonic_decrease"] = series.is_monotonic_decreasing # TODO: need access to n_unique # summary['monotonic_increase_strict'] = summary['monotonic_increase'] and summary['unique'] # summary['monotonic_decrease_strict'] = summary['monotonic_increase'] and summary['unique'] # TODO: only calculations for histogram, not the plotting # summary['image'] = plotting.histogram(series) return summary