Module pandas_profiling.report.structure.report

Generate the report.

Expand source code
"""Generate the report."""
from datetime import datetime
from typing import List

from pandas_profiling.config import config
from pandas_profiling.model.base import (
    Boolean,
    Real,
    Count,
    Complex,
    Date,
    Categorical,
    Url,
    AbsolutePath,
    ExistingPath,
    ImagePath,
    Generic,
)
from pandas_profiling.model.messages import MessageType
from pandas_profiling.report.structure.correlations import get_correlation_items
from pandas_profiling.report.structure.overview import (
    get_dataset_overview,
    get_dataset_reproduction,
    get_dataset_warnings,
)
from pandas_profiling.report.structure.variables import (
    render_boolean,
    render_categorical,
    render_complex,
    render_date,
    render_real,
    render_path,
    render_path_image,
    render_url,
    render_generic,
)
from pandas_profiling.report.presentation.abstract.renderable import Renderable
from pandas_profiling.report.presentation.core import (
    Image,
    Sequence,
    Sample,
    Variable,
    Collapse,
    ToggleButton,
)


def get_missing_items(summary) -> list:
    image_format = config["plot"]["image_format"].get(str)
    items = []
    for key, item in summary["missing"].items():
        items.append(
            # TODO: Add informative caption
            Image(
                item["matrix"],
                image_format=image_format,
                alt=item["name"],
                name=item["name"],
                anchor_id=key,
            )
        )

    return items


# TODO: split in per variable function
def render_variables_section(dataframe_summary: dict) -> list:
    """Render the HTML for each of the variables in the DataFrame.

    Args:
        dataframe_summary: The statistics for each variable.

    Returns:
        The rendered HTML, where each row represents a variable.
    """
    type_to_func = {
        Boolean: render_boolean,
        Real: render_real,
        Count: render_real,
        Complex: render_complex,
        Date: render_date,
        Categorical: render_categorical,
        Url: render_url,
        AbsolutePath: render_path,
        ExistingPath: render_path,
        # ImagePath: render_path_image,
        Generic: render_generic,
    }

    templs = []

    for idx, summary in dataframe_summary["variables"].items():
        # Common template variables
        warnings = [
            warning.fmt()
            for warning in dataframe_summary["messages"]
            if warning.column_name == idx
        ]

        warning_fields = {
            field
            for warning in dataframe_summary["messages"]
            if warning.column_name == idx
            for field in warning.fields
        }

        warning_types = {
            warning.message_type
            for warning in dataframe_summary["messages"]
            if warning.column_name == idx
        }

        template_variables = {
            "varname": idx,
            "varid": hash(idx),
            "warnings": warnings,
            "warn_fields": warning_fields,
        }

        template_variables.update(summary)

        # Per type template variables
        template_variables.update(type_to_func[summary["type"]](template_variables))

        # Ignore these
        if config["reject_variables"].get(bool):
            ignore = MessageType.REJECTED in warning_types
        else:
            ignore = False

        bottom = None
        if "bottom" in template_variables and template_variables["bottom"] is not None:
            btn = ToggleButton("Toggle details", anchor_id=template_variables["varid"])
            bottom = Collapse(btn, template_variables["bottom"])

        var = Variable(
            template_variables["top"],
            bottom=bottom,
            anchor_id=template_variables["varid"],
            name=idx,
            ignore=ignore,
        )

        templs.append(var)

    return templs


def get_sample_items(sample: dict):
    """Create the list of sample items

    Args:
        sample: dict of samples

    Returns:
        List of sample items to show in the interface.
    """
    items = []
    names = {"head": "First rows", "tail": "Last rows"}
    for key, value in sample.items():
        items.append(
            Sample(
                sample=value.to_html(classes="sample table table-striped"),
                name=names[key],
                anchor_id=key,
            )
        )
    return items


def get_scatter_matrix(scatter_matrix):
    image_format = config["plot"]["image_format"].get(str)

    titems = []
    for x_col, y_cols in scatter_matrix.items():
        items = []
        for y_col, splot in y_cols.items():
            items.append(
                Image(
                    splot,
                    image_format=image_format,
                    alt="{x_col} x {y_col}".format(x_col=x_col, y_col=y_col),
                    anchor_id="interactions_{x_col}_{y_col}".format(
                        x_col=x_col, y_col=y_col
                    ),
                    name=y_col,
                )
            )

        titems.append(
            Sequence(
                items,
                sequence_type="tabs",
                name=x_col,
                anchor_id="interactions_{x_col}".format(x_col=x_col),
            )
        )
    return titems


def get_dataset_items(summary, date_start, date_end, warnings):
    items = [
        get_dataset_overview(summary),
        get_dataset_reproduction(summary, date_start, date_end),
    ]

    count = len(
        [
            warning
            for warning in warnings
            if warning.message_type
            not in [
                MessageType.UNIFORM,
                MessageType.UNIQUE,
                MessageType.REJECTED,
                MessageType.CONSTANT,
            ]
        ]
    )
    if count > 0:
        items.append(get_dataset_warnings(warnings, count))

    return items


def get_section_items() -> List[Renderable]:
    return []


def get_report_structure(
    date_start: datetime, date_end: datetime, sample: dict, summary: dict
) -> Renderable:
    """Generate a HTML report from summary statistics and a given sample.

    Args:
      sample: A dict containing the samples to print.
      summary: Statistics to use for the overview, variables, correlations and missing values.

    Returns:
      The profile report in HTML format
    """

    warnings = summary["messages"]

    section_items = get_section_items()

    section_items.append(
        Sequence(
            get_dataset_items(summary, date_start, date_end, warnings),
            sequence_type="tabs",
            name="Overview",
            anchor_id="overview",
        )
    )
    section_items.append(
        Sequence(
            render_variables_section(summary),
            sequence_type="accordion",
            name="Variables",
            anchor_id="variables",
        )
    )
    section_items.append(
        Sequence(
            get_scatter_matrix(summary["scatter"]),
            sequence_type="tabs",
            name="Interactions",
            anchor_id="interactions",
        )
    )

    corr = get_correlation_items(summary)
    if corr is not None:
        section_items.append(corr)

    section_items.append(
        Sequence(
            get_missing_items(summary),
            sequence_type="tabs",
            name="Missing values",
            anchor_id="missing",
        )
    )
    section_items.append(
        Sequence(
            get_sample_items(sample),
            sequence_type="list",
            name="Sample",
            anchor_id="sample",
        )
    )

    sections = Sequence(section_items, name="Report", sequence_type="sections")

    return sections

Functions

def get_dataset_items(summary, date_start, date_end, warnings)
Expand source code
def get_dataset_items(summary, date_start, date_end, warnings):
    items = [
        get_dataset_overview(summary),
        get_dataset_reproduction(summary, date_start, date_end),
    ]

    count = len(
        [
            warning
            for warning in warnings
            if warning.message_type
            not in [
                MessageType.UNIFORM,
                MessageType.UNIQUE,
                MessageType.REJECTED,
                MessageType.CONSTANT,
            ]
        ]
    )
    if count > 0:
        items.append(get_dataset_warnings(warnings, count))

    return items
def get_missing_items(summary)
Expand source code
def get_missing_items(summary) -> list:
    image_format = config["plot"]["image_format"].get(str)
    items = []
    for key, item in summary["missing"].items():
        items.append(
            # TODO: Add informative caption
            Image(
                item["matrix"],
                image_format=image_format,
                alt=item["name"],
                name=item["name"],
                anchor_id=key,
            )
        )

    return items
def get_report_structure(date_start, date_end, sample, summary)

Generate a HTML report from summary statistics and a given sample.

Args

sample
A dict containing the samples to print.
summary
Statistics to use for the overview, variables, correlations and missing values.

Returns

The profile report in HTML format
 
Expand source code
def get_report_structure(
    date_start: datetime, date_end: datetime, sample: dict, summary: dict
) -> Renderable:
    """Generate a HTML report from summary statistics and a given sample.

    Args:
      sample: A dict containing the samples to print.
      summary: Statistics to use for the overview, variables, correlations and missing values.

    Returns:
      The profile report in HTML format
    """

    warnings = summary["messages"]

    section_items = get_section_items()

    section_items.append(
        Sequence(
            get_dataset_items(summary, date_start, date_end, warnings),
            sequence_type="tabs",
            name="Overview",
            anchor_id="overview",
        )
    )
    section_items.append(
        Sequence(
            render_variables_section(summary),
            sequence_type="accordion",
            name="Variables",
            anchor_id="variables",
        )
    )
    section_items.append(
        Sequence(
            get_scatter_matrix(summary["scatter"]),
            sequence_type="tabs",
            name="Interactions",
            anchor_id="interactions",
        )
    )

    corr = get_correlation_items(summary)
    if corr is not None:
        section_items.append(corr)

    section_items.append(
        Sequence(
            get_missing_items(summary),
            sequence_type="tabs",
            name="Missing values",
            anchor_id="missing",
        )
    )
    section_items.append(
        Sequence(
            get_sample_items(sample),
            sequence_type="list",
            name="Sample",
            anchor_id="sample",
        )
    )

    sections = Sequence(section_items, name="Report", sequence_type="sections")

    return sections
def get_sample_items(sample)

Create the list of sample items

Args

sample
dict of samples

Returns

List of sample items to show in the interface.

Expand source code
def get_sample_items(sample: dict):
    """Create the list of sample items

    Args:
        sample: dict of samples

    Returns:
        List of sample items to show in the interface.
    """
    items = []
    names = {"head": "First rows", "tail": "Last rows"}
    for key, value in sample.items():
        items.append(
            Sample(
                sample=value.to_html(classes="sample table table-striped"),
                name=names[key],
                anchor_id=key,
            )
        )
    return items
def get_scatter_matrix(scatter_matrix)
Expand source code
def get_scatter_matrix(scatter_matrix):
    image_format = config["plot"]["image_format"].get(str)

    titems = []
    for x_col, y_cols in scatter_matrix.items():
        items = []
        for y_col, splot in y_cols.items():
            items.append(
                Image(
                    splot,
                    image_format=image_format,
                    alt="{x_col} x {y_col}".format(x_col=x_col, y_col=y_col),
                    anchor_id="interactions_{x_col}_{y_col}".format(
                        x_col=x_col, y_col=y_col
                    ),
                    name=y_col,
                )
            )

        titems.append(
            Sequence(
                items,
                sequence_type="tabs",
                name=x_col,
                anchor_id="interactions_{x_col}".format(x_col=x_col),
            )
        )
    return titems
def get_section_items()
Expand source code
def get_section_items() -> List[Renderable]:
    return []
def render_variables_section(dataframe_summary)

Render the HTML for each of the variables in the DataFrame.

Args

dataframe_summary
The statistics for each variable.

Returns

The rendered HTML, where each row represents a variable.

Expand source code
def render_variables_section(dataframe_summary: dict) -> list:
    """Render the HTML for each of the variables in the DataFrame.

    Args:
        dataframe_summary: The statistics for each variable.

    Returns:
        The rendered HTML, where each row represents a variable.
    """
    type_to_func = {
        Boolean: render_boolean,
        Real: render_real,
        Count: render_real,
        Complex: render_complex,
        Date: render_date,
        Categorical: render_categorical,
        Url: render_url,
        AbsolutePath: render_path,
        ExistingPath: render_path,
        # ImagePath: render_path_image,
        Generic: render_generic,
    }

    templs = []

    for idx, summary in dataframe_summary["variables"].items():
        # Common template variables
        warnings = [
            warning.fmt()
            for warning in dataframe_summary["messages"]
            if warning.column_name == idx
        ]

        warning_fields = {
            field
            for warning in dataframe_summary["messages"]
            if warning.column_name == idx
            for field in warning.fields
        }

        warning_types = {
            warning.message_type
            for warning in dataframe_summary["messages"]
            if warning.column_name == idx
        }

        template_variables = {
            "varname": idx,
            "varid": hash(idx),
            "warnings": warnings,
            "warn_fields": warning_fields,
        }

        template_variables.update(summary)

        # Per type template variables
        template_variables.update(type_to_func[summary["type"]](template_variables))

        # Ignore these
        if config["reject_variables"].get(bool):
            ignore = MessageType.REJECTED in warning_types
        else:
            ignore = False

        bottom = None
        if "bottom" in template_variables and template_variables["bottom"] is not None:
            btn = ToggleButton("Toggle details", anchor_id=template_variables["varid"])
            bottom = Collapse(btn, template_variables["bottom"])

        var = Variable(
            template_variables["top"],
            bottom=bottom,
            anchor_id=template_variables["varid"],
            name=idx,
            ignore=ignore,
        )

        templs.append(var)

    return templs