Module pandas_profiling.report.structure.variables.render_real
Expand source code
from pandas_profiling.config import config
from pandas_profiling.report.formatters import fmt_array
from pandas_profiling.visualisation.plot import mini_histogram, histogram
from pandas_profiling.report.presentation.core import (
Table,
Sequence,
Image,
FrequencyTable,
VariableInfo,
)
from pandas_profiling.report.structure.variables.render_common import render_common
def render_real(summary):
template_variables = render_common(summary)
image_format = config["plot"]["image_format"].get(str)
if summary["min"] >= 0:
name = "Real number (ℝ<sub>≥0</sub>)"
else:
name = "Real number (ℝ)"
# Top
info = VariableInfo(summary["varid"], summary["varname"], name, summary["warnings"])
table1 = Table(
[
{
"name": "Distinct count",
"value": summary["n_unique"],
"fmt": "fmt",
"alert": "n_unique" in summary["warn_fields"],
},
{
"name": "Unique (%)",
"value": summary["p_unique"],
"fmt": "fmt_percent",
"alert": "p_unique" in summary["warn_fields"],
},
{
"name": "Missing",
"value": summary["n_missing"],
"fmt": "fmt",
"alert": "n_missing" in summary["warn_fields"],
},
{
"name": "Missing (%)",
"value": summary["p_missing"],
"fmt": "fmt_percent",
"alert": "p_missing" in summary["warn_fields"],
},
{
"name": "Infinite",
"value": summary["n_infinite"],
"fmt": "fmt",
"alert": "n_infinite" in summary["warn_fields"],
},
{
"name": "Infinite (%)",
"value": summary["p_infinite"],
"fmt": "fmt_percent",
"alert": "p_infinite" in summary["warn_fields"],
},
]
)
table2 = Table(
[
{"name": "Mean", "value": summary["mean"], "fmt": "fmt", "alert": False},
{"name": "Minimum", "value": summary["min"], "fmt": "fmt", "alert": False},
{"name": "Maximum", "value": summary["max"], "fmt": "fmt", "alert": False},
{
"name": "Zeros",
"value": summary["n_zeros"],
"fmt": "fmt",
"alert": "n_zeros" in summary["warn_fields"],
},
{
"name": "Zeros (%)",
"value": summary["p_zeros"],
"fmt": "fmt_percent",
"alert": "p_zeros" in summary["warn_fields"],
},
{
"name": "Memory size",
"value": summary["memory_size"],
"fmt": "fmt_bytesize",
"alert": False,
},
]
)
histogram_bins = 10
# TODO: replace with SmallImage...
mini_histo = Image(
mini_histogram(summary["histogram_data"], summary, histogram_bins),
image_format=image_format,
alt="Mini histogram",
)
template_variables["top"] = Sequence(
[info, table1, table2, mini_histo], sequence_type="grid"
)
quantile_statistics = Table(
[
{"name": "Minimum", "value": summary["min"], "fmt": "fmt_numeric"},
{"name": "5-th percentile", "value": summary["5%"], "fmt": "fmt_numeric"},
{"name": "Q1", "value": summary["25%"], "fmt": "fmt_numeric"},
{"name": "median", "value": summary["50%"], "fmt": "fmt_numeric"},
{"name": "Q3", "value": summary["75%"], "fmt": "fmt_numeric"},
{"name": "95-th percentile", "value": summary["95%"], "fmt": "fmt_numeric"},
{"name": "Maximum", "value": summary["max"], "fmt": "fmt_numeric"},
{"name": "Range", "value": summary["range"], "fmt": "fmt_numeric"},
{
"name": "Interquartile range (IQR)",
"value": summary["iqr"],
"fmt": "fmt_numeric",
},
],
name="Quantile statistics",
)
descriptive_statistics = Table(
[
{
"name": "Standard deviation",
"value": summary["std"],
"fmt": "fmt_numeric",
},
{
"name": "Coefficient of variation (CV)",
"value": summary["cv"],
"fmt": "fmt_numeric",
},
{"name": "Kurtosis", "value": summary["kurtosis"], "fmt": "fmt_numeric"},
{"name": "Mean", "value": summary["mean"], "fmt": "fmt_numeric"},
{
"name": "Median Absolute Deviation (MAD)",
"value": summary["mad"],
"fmt": "fmt_numeric",
},
{
"name": "Skewness",
"value": summary["skewness"],
"fmt": "fmt_numeric",
"class": "alert" if "skewness" in summary["warn_fields"] else "",
},
{"name": "Sum", "value": summary["sum"], "fmt": "fmt_numeric"},
{"name": "Variance", "value": summary["variance"], "fmt": "fmt_numeric"},
],
name="Descriptive statistics",
)
statistics = Sequence(
[quantile_statistics, descriptive_statistics],
anchor_id="{varid}statistics".format(varid=summary["varid"]),
name="Statistics",
sequence_type="grid",
)
seqs = [
Image(
histogram(summary["histogram_data"], summary, histogram_bins),
image_format=image_format,
alt="Histogram",
caption="<strong>Histogram with fixed size bins</strong> (bins={})".format(
histogram_bins
),
name="Histogram",
anchor_id="{varid}histogram".format(varid=summary["varid"]),
)
]
fq = FrequencyTable(
template_variables["freq_table_rows"],
name="Common values",
anchor_id="{varid}common_values".format(varid=summary["varid"]),
)
evs = Sequence(
[
FrequencyTable(
template_variables["firstn_expanded"],
name="Minimum 5 values",
anchor_id="{varid}firstn".format(varid=summary["varid"]),
),
FrequencyTable(
template_variables["lastn_expanded"],
name="Maximum 5 values",
anchor_id="{varid}lastn".format(varid=summary["varid"]),
),
],
sequence_type="tabs",
name="Extreme values",
anchor_id="{varid}extreme_values".format(varid=summary["varid"]),
)
if "histogram_bins_bayesian_blocks" in summary:
histo_dyn = Image(
histogram(
summary["histogram_data"],
summary,
summary["histogram_bins_bayesian_blocks"],
),
image_format=image_format,
alt="Histogram",
caption='<strong>Histogram with variable size bins</strong> (bins={}, <a href="https://ui.adsabs.harvard.edu/abs/2013ApJ...764..167S/abstract" target="_blank">"bayesian blocks"</a> binning strategy used)'.format(
fmt_array(summary["histogram_bins_bayesian_blocks"], threshold=5)
),
name="Dynamic Histogram",
anchor_id="{varid}dynamic_histogram".format(varid=summary["varid"]),
)
seqs.append(histo_dyn)
template_variables["bottom"] = Sequence(
[
statistics,
Sequence(
seqs,
sequence_type="tabs",
name="Histogram(s)",
anchor_id="{varid}histograms".format(varid=summary["varid"]),
),
fq,
evs,
],
sequence_type="tabs",
anchor_id="{varid}bottom".format(varid=summary["varid"]),
)
return template_variables
Functions
def render_real(summary)
-
Expand source code
def render_real(summary): template_variables = render_common(summary) image_format = config["plot"]["image_format"].get(str) if summary["min"] >= 0: name = "Real number (ℝ<sub>≥0</sub>)" else: name = "Real number (ℝ)" # Top info = VariableInfo(summary["varid"], summary["varname"], name, summary["warnings"]) table1 = Table( [ { "name": "Distinct count", "value": summary["n_unique"], "fmt": "fmt", "alert": "n_unique" in summary["warn_fields"], }, { "name": "Unique (%)", "value": summary["p_unique"], "fmt": "fmt_percent", "alert": "p_unique" in summary["warn_fields"], }, { "name": "Missing", "value": summary["n_missing"], "fmt": "fmt", "alert": "n_missing" in summary["warn_fields"], }, { "name": "Missing (%)", "value": summary["p_missing"], "fmt": "fmt_percent", "alert": "p_missing" in summary["warn_fields"], }, { "name": "Infinite", "value": summary["n_infinite"], "fmt": "fmt", "alert": "n_infinite" in summary["warn_fields"], }, { "name": "Infinite (%)", "value": summary["p_infinite"], "fmt": "fmt_percent", "alert": "p_infinite" in summary["warn_fields"], }, ] ) table2 = Table( [ {"name": "Mean", "value": summary["mean"], "fmt": "fmt", "alert": False}, {"name": "Minimum", "value": summary["min"], "fmt": "fmt", "alert": False}, {"name": "Maximum", "value": summary["max"], "fmt": "fmt", "alert": False}, { "name": "Zeros", "value": summary["n_zeros"], "fmt": "fmt", "alert": "n_zeros" in summary["warn_fields"], }, { "name": "Zeros (%)", "value": summary["p_zeros"], "fmt": "fmt_percent", "alert": "p_zeros" in summary["warn_fields"], }, { "name": "Memory size", "value": summary["memory_size"], "fmt": "fmt_bytesize", "alert": False, }, ] ) histogram_bins = 10 # TODO: replace with SmallImage... mini_histo = Image( mini_histogram(summary["histogram_data"], summary, histogram_bins), image_format=image_format, alt="Mini histogram", ) template_variables["top"] = Sequence( [info, table1, table2, mini_histo], sequence_type="grid" ) quantile_statistics = Table( [ {"name": "Minimum", "value": summary["min"], "fmt": "fmt_numeric"}, {"name": "5-th percentile", "value": summary["5%"], "fmt": "fmt_numeric"}, {"name": "Q1", "value": summary["25%"], "fmt": "fmt_numeric"}, {"name": "median", "value": summary["50%"], "fmt": "fmt_numeric"}, {"name": "Q3", "value": summary["75%"], "fmt": "fmt_numeric"}, {"name": "95-th percentile", "value": summary["95%"], "fmt": "fmt_numeric"}, {"name": "Maximum", "value": summary["max"], "fmt": "fmt_numeric"}, {"name": "Range", "value": summary["range"], "fmt": "fmt_numeric"}, { "name": "Interquartile range (IQR)", "value": summary["iqr"], "fmt": "fmt_numeric", }, ], name="Quantile statistics", ) descriptive_statistics = Table( [ { "name": "Standard deviation", "value": summary["std"], "fmt": "fmt_numeric", }, { "name": "Coefficient of variation (CV)", "value": summary["cv"], "fmt": "fmt_numeric", }, {"name": "Kurtosis", "value": summary["kurtosis"], "fmt": "fmt_numeric"}, {"name": "Mean", "value": summary["mean"], "fmt": "fmt_numeric"}, { "name": "Median Absolute Deviation (MAD)", "value": summary["mad"], "fmt": "fmt_numeric", }, { "name": "Skewness", "value": summary["skewness"], "fmt": "fmt_numeric", "class": "alert" if "skewness" in summary["warn_fields"] else "", }, {"name": "Sum", "value": summary["sum"], "fmt": "fmt_numeric"}, {"name": "Variance", "value": summary["variance"], "fmt": "fmt_numeric"}, ], name="Descriptive statistics", ) statistics = Sequence( [quantile_statistics, descriptive_statistics], anchor_id="{varid}statistics".format(varid=summary["varid"]), name="Statistics", sequence_type="grid", ) seqs = [ Image( histogram(summary["histogram_data"], summary, histogram_bins), image_format=image_format, alt="Histogram", caption="<strong>Histogram with fixed size bins</strong> (bins={})".format( histogram_bins ), name="Histogram", anchor_id="{varid}histogram".format(varid=summary["varid"]), ) ] fq = FrequencyTable( template_variables["freq_table_rows"], name="Common values", anchor_id="{varid}common_values".format(varid=summary["varid"]), ) evs = Sequence( [ FrequencyTable( template_variables["firstn_expanded"], name="Minimum 5 values", anchor_id="{varid}firstn".format(varid=summary["varid"]), ), FrequencyTable( template_variables["lastn_expanded"], name="Maximum 5 values", anchor_id="{varid}lastn".format(varid=summary["varid"]), ), ], sequence_type="tabs", name="Extreme values", anchor_id="{varid}extreme_values".format(varid=summary["varid"]), ) if "histogram_bins_bayesian_blocks" in summary: histo_dyn = Image( histogram( summary["histogram_data"], summary, summary["histogram_bins_bayesian_blocks"], ), image_format=image_format, alt="Histogram", caption='<strong>Histogram with variable size bins</strong> (bins={}, <a href="https://ui.adsabs.harvard.edu/abs/2013ApJ...764..167S/abstract" target="_blank">"bayesian blocks"</a> binning strategy used)'.format( fmt_array(summary["histogram_bins_bayesian_blocks"], threshold=5) ), name="Dynamic Histogram", anchor_id="{varid}dynamic_histogram".format(varid=summary["varid"]), ) seqs.append(histo_dyn) template_variables["bottom"] = Sequence( [ statistics, Sequence( seqs, sequence_type="tabs", name="Histogram(s)", anchor_id="{varid}histograms".format(varid=summary["varid"]), ), fq, evs, ], sequence_type="tabs", anchor_id="{varid}bottom".format(varid=summary["varid"]), ) return template_variables