Module pandas_profiling.report.presentation.frequency_table_utils
Expand source code
from typing import List, Dict
def freq_table(freqtable, n: int, max_number_to_print: int) -> List[Dict]:
"""Render the rows for a frequency table (value, count).
Args:
freqtable: The frequency table.
n: The total number of values.
max_number_to_print: The maximum number of observations to print.
Returns:
The rows of the frequency table.
"""
# TODO: replace '' by '(Empty)' ?
if max_number_to_print > n:
max_number_to_print = n
if max_number_to_print < len(freqtable):
freq_other = sum(freqtable.iloc[max_number_to_print:])
min_freq = freqtable.values[max_number_to_print]
else:
freq_other = 0
min_freq = 0
freq_missing = n - sum(freqtable)
# No values
if len(freqtable) == 0:
return []
max_freq = max(freqtable.values[0], freq_other, freq_missing)
# TODO: Correctly sort missing and other
# No values
if max_freq == 0:
return []
rows = []
for label, freq in freqtable.iloc[0:max_number_to_print].items():
rows.append(
{
"label": label,
"width": freq / max_freq,
"count": freq,
"percentage": float(freq) / n,
"n": n,
"extra_class": "",
}
)
if freq_other > min_freq:
rows.append(
{
"label": "Other values ({})".format(
str(freqtable.count() - max_number_to_print)
),
"width": freq_other / max_freq,
"count": freq_other,
# Hack for tables with combined...
"percentage": min(float(freq_other) / n, 1.0),
"n": n,
"extra_class": "other",
}
)
if freq_missing > min_freq:
rows.append(
{
"label": "(Missing)",
"width": freq_missing / max_freq,
"count": freq_missing,
"percentage": float(freq_missing) / n,
"n": n,
"extra_class": "missing",
}
)
return rows
def extreme_obs_table(freqtable, number_to_print, n, ascending=True) -> list:
"""Similar to the frequency table, for extreme observations.
Args:
freqtable: The frequency table.
number_to_print: The number of observations to print.
n: The total number of observations.
ascending: The ordering of the observations (Default value = True)
Returns:
The HTML rendering of the extreme observation table.
"""
# If it's mixed between base types (str, int) convert to str. Pure "mixed" types are filtered during type
# discovery
# TODO: should be in cast?
if "mixed" in freqtable.index.inferred_type:
freqtable.index = freqtable.index.astype(str)
sorted_freqtable = freqtable.sort_index(ascending=ascending)
obs_to_print = sorted_freqtable.iloc[:number_to_print]
max_freq = max(obs_to_print.values)
rows = []
for label, freq in obs_to_print.items():
rows.append(
{
"label": label,
"width": freq / max_freq if max_freq != 0 else 0,
"count": freq,
"percentage": float(freq) / n,
"extra_class": "",
"n": n,
}
)
return rows
Functions
def extreme_obs_table(freqtable, number_to_print, n, ascending=True)
-
Similar to the frequency table, for extreme observations.
Args
freqtable
- The frequency table.
number_to_print
- The number of observations to print.
n
- The total number of observations.
ascending
- The ordering of the observations (Default value = True)
Returns
The HTML rendering of the extreme observation table.
Expand source code
def extreme_obs_table(freqtable, number_to_print, n, ascending=True) -> list: """Similar to the frequency table, for extreme observations. Args: freqtable: The frequency table. number_to_print: The number of observations to print. n: The total number of observations. ascending: The ordering of the observations (Default value = True) Returns: The HTML rendering of the extreme observation table. """ # If it's mixed between base types (str, int) convert to str. Pure "mixed" types are filtered during type # discovery # TODO: should be in cast? if "mixed" in freqtable.index.inferred_type: freqtable.index = freqtable.index.astype(str) sorted_freqtable = freqtable.sort_index(ascending=ascending) obs_to_print = sorted_freqtable.iloc[:number_to_print] max_freq = max(obs_to_print.values) rows = [] for label, freq in obs_to_print.items(): rows.append( { "label": label, "width": freq / max_freq if max_freq != 0 else 0, "count": freq, "percentage": float(freq) / n, "extra_class": "", "n": n, } ) return rows
def freq_table(freqtable, n, max_number_to_print)
-
Render the rows for a frequency table (value, count).
Args
freqtable
- The frequency table.
n
- The total number of values.
max_number_to_print
- The maximum number of observations to print.
Returns
The rows of the frequency table.
Expand source code
def freq_table(freqtable, n: int, max_number_to_print: int) -> List[Dict]: """Render the rows for a frequency table (value, count). Args: freqtable: The frequency table. n: The total number of values. max_number_to_print: The maximum number of observations to print. Returns: The rows of the frequency table. """ # TODO: replace '' by '(Empty)' ? if max_number_to_print > n: max_number_to_print = n if max_number_to_print < len(freqtable): freq_other = sum(freqtable.iloc[max_number_to_print:]) min_freq = freqtable.values[max_number_to_print] else: freq_other = 0 min_freq = 0 freq_missing = n - sum(freqtable) # No values if len(freqtable) == 0: return [] max_freq = max(freqtable.values[0], freq_other, freq_missing) # TODO: Correctly sort missing and other # No values if max_freq == 0: return [] rows = [] for label, freq in freqtable.iloc[0:max_number_to_print].items(): rows.append( { "label": label, "width": freq / max_freq, "count": freq, "percentage": float(freq) / n, "n": n, "extra_class": "", } ) if freq_other > min_freq: rows.append( { "label": "Other values ({})".format( str(freqtable.count() - max_number_to_print) ), "width": freq_other / max_freq, "count": freq_other, # Hack for tables with combined... "percentage": min(float(freq_other) / n, 1.0), "n": n, "extra_class": "other", } ) if freq_missing > min_freq: rows.append( { "label": "(Missing)", "width": freq_missing / max_freq, "count": freq_missing, "percentage": float(freq_missing) / n, "n": n, "extra_class": "missing", } ) return rows