Module pandas_profiling.report.structure.correlations
Expand source code
from typing import Optional, List
from pandas_profiling.config import config
from pandas_profiling.report.presentation.abstract.renderable import Renderable
from pandas_profiling.report.presentation.core import (
Sequence,
HTML,
Image,
ToggleButton,
Collapse,
)
from pandas_profiling.visualisation import plot
def get_items() -> List[Renderable]:
return []
def get_correlation_items(summary) -> Optional[Renderable]:
"""Create the list of correlation items
Args:
summary: dict of correlations
Returns:
List of correlation items to show in the interface.
"""
items = get_items()
pearson_description = (
"The Pearson's correlation coefficient (<em>r</em>) is a measure of linear correlation "
"between two variables. It's value lies between -1 and +1, -1 indicating total negative "
"linear correlation, 0 indicating no linear correlation and 1 indicating total positive "
"linear correlation. Furthermore, <em>r</em> is invariant under separate changes in location "
"and scale of the two variables, implying that for a linear function the angle to the "
"x-axis does not affect <em>r</em>.<br /><br />To calculate <em>r</em> for two "
"variables <em>X</em> and <em>Y</em>, one divides the covariance of <em>X</em> and "
"<em>Y</em> by the product of their standard deviations. "
)
spearman_description = """The Spearman's rank correlation coefficient (<em>ρ</em>) is a measure of monotonic
correlation between two variables, and is therefore better in catching nonlinear monotonic correlations than
Pearson's <em>r</em>. It's value lies between -1 and +1, -1 indicating total negative monotonic correlation,
0 indicating no monotonic correlation and 1 indicating total positive monotonic correlation.<br /><br />To
calculate <em>ρ</em> for two variables <em>X</em> and <em>Y</em>, one divides the covariance of the rank
variables of <em>X</em> and <em>Y</em> by the product of their standard deviations. """
kendall_description = """Similarly to Spearman's rank correlation coefficient, the Kendall rank correlation
coefficient (<em>τ</em>) measures ordinal association between two variables. It's value lies between -1 and +1,
-1 indicating total negative correlation, 0 indicating no correlation and 1 indicating total positive correlation.
<br /><br />To calculate <em>τ</em> for two variables <em>X</em> and <em>Y</em>, one determines the number of
concordant and discordant pairs of observations. <em>τ</em> is given by the number of concordant pairs minus the
discordant pairs divided by the total number of pairs."""
key_to_data = {
"pearson": (-1, "Pearson's r", pearson_description),
"spearman": (-1, "Spearman's ρ", spearman_description),
"kendall": (-1, "Kendall's τ", kendall_description),
"phi_k": (0, "Phik (φk)", ""),
"cramers": (0, "Cramér's V (φc)", ""),
"recoded": (0, "Recoded", ""),
}
image_format = config["plot"]["image_format"].get(str)
for key, item in summary["correlations"].items():
vmin, name, description = key_to_data[key]
diagram = Image(
plot.correlation_matrix(item, vmin=vmin),
image_format=image_format,
alt=name,
anchor_id="{key}_diagram".format(key=key),
name=name,
classes="correlation-diagram",
)
if len(description) > 0:
desc = HTML(
'<div style="padding:20px" class="text-muted"><h3>{name}</h3>{description}</div>'.format(
description=description, name=name
),
anchor_id="{key}_html".format(key=key),
classes="correlation-description",
)
tbl = Sequence(
[diagram, desc], anchor_id=key, name=name, sequence_type="grid"
)
items.append(tbl)
else:
items.append(diagram)
corr = Sequence(
items,
sequence_type="tabs",
name="Correlations Tab",
anchor_id="correlations_tab",
)
if len(items) > 0:
btn = ToggleButton(
"Toggle correlation descriptions",
anchor_id="toggle-correlation-description",
name="Toggle correlation descriptions",
)
return Collapse(
name="Correlations", anchor_id="correlations", button=btn, item=corr
)
else:
return None
Functions
def get_correlation_items(summary)
-
Create the list of correlation items
Args
summary
- dict of correlations
Returns
List of correlation items to show in the interface.
Expand source code
def get_correlation_items(summary) -> Optional[Renderable]: """Create the list of correlation items Args: summary: dict of correlations Returns: List of correlation items to show in the interface. """ items = get_items() pearson_description = ( "The Pearson's correlation coefficient (<em>r</em>) is a measure of linear correlation " "between two variables. It's value lies between -1 and +1, -1 indicating total negative " "linear correlation, 0 indicating no linear correlation and 1 indicating total positive " "linear correlation. Furthermore, <em>r</em> is invariant under separate changes in location " "and scale of the two variables, implying that for a linear function the angle to the " "x-axis does not affect <em>r</em>.<br /><br />To calculate <em>r</em> for two " "variables <em>X</em> and <em>Y</em>, one divides the covariance of <em>X</em> and " "<em>Y</em> by the product of their standard deviations. " ) spearman_description = """The Spearman's rank correlation coefficient (<em>ρ</em>) is a measure of monotonic correlation between two variables, and is therefore better in catching nonlinear monotonic correlations than Pearson's <em>r</em>. It's value lies between -1 and +1, -1 indicating total negative monotonic correlation, 0 indicating no monotonic correlation and 1 indicating total positive monotonic correlation.<br /><br />To calculate <em>ρ</em> for two variables <em>X</em> and <em>Y</em>, one divides the covariance of the rank variables of <em>X</em> and <em>Y</em> by the product of their standard deviations. """ kendall_description = """Similarly to Spearman's rank correlation coefficient, the Kendall rank correlation coefficient (<em>τ</em>) measures ordinal association between two variables. It's value lies between -1 and +1, -1 indicating total negative correlation, 0 indicating no correlation and 1 indicating total positive correlation. <br /><br />To calculate <em>τ</em> for two variables <em>X</em> and <em>Y</em>, one determines the number of concordant and discordant pairs of observations. <em>τ</em> is given by the number of concordant pairs minus the discordant pairs divided by the total number of pairs.""" key_to_data = { "pearson": (-1, "Pearson's r", pearson_description), "spearman": (-1, "Spearman's ρ", spearman_description), "kendall": (-1, "Kendall's τ", kendall_description), "phi_k": (0, "Phik (φk)", ""), "cramers": (0, "Cramér's V (φc)", ""), "recoded": (0, "Recoded", ""), } image_format = config["plot"]["image_format"].get(str) for key, item in summary["correlations"].items(): vmin, name, description = key_to_data[key] diagram = Image( plot.correlation_matrix(item, vmin=vmin), image_format=image_format, alt=name, anchor_id="{key}_diagram".format(key=key), name=name, classes="correlation-diagram", ) if len(description) > 0: desc = HTML( '<div style="padding:20px" class="text-muted"><h3>{name}</h3>{description}</div>'.format( description=description, name=name ), anchor_id="{key}_html".format(key=key), classes="correlation-description", ) tbl = Sequence( [diagram, desc], anchor_id=key, name=name, sequence_type="grid" ) items.append(tbl) else: items.append(diagram) corr = Sequence( items, sequence_type="tabs", name="Correlations Tab", anchor_id="correlations_tab", ) if len(items) > 0: btn = ToggleButton( "Toggle correlation descriptions", anchor_id="toggle-correlation-description", name="Toggle correlation descriptions", ) return Collapse( name="Correlations", anchor_id="correlations", button=btn, item=corr ) else: return None
def get_items()
-
Expand source code
def get_items() -> List[Renderable]: return []