Module pandas_profiling.report.structure.variables.render_url

Expand source code
from pandas_profiling.config import config
from pandas_profiling.report.presentation.frequency_table_utils import freq_table

from pandas_profiling.report.structure.variables import render_common
from pandas_profiling.report.presentation.core import (
    Sequence,
    Table,
    Variable,
    FrequencyTable,
    FrequencyTableSmall,
    VariableInfo,
)


def render_url(summary):
    n_freq_table_max = config["n_freq_table_max"].get(int)

    n_obs_cat = config["vars"]["cat"]["n_obs"].get(int)

    # TODO: merge with boolean/categorical
    mini_freq_table_rows = freq_table(
        freqtable=summary["value_counts"], n=summary["n"], max_number_to_print=n_obs_cat
    )
    template_variables = render_common(summary)

    keys = ["scheme", "netloc", "path", "query", "fragment"]
    for url_part in keys:
        template_variables["freqtable_{}".format(url_part)] = freq_table(
            freqtable=summary["{}_counts".format(url_part)],
            n=summary["n"],
            max_number_to_print=n_freq_table_max,
        )

    full_frequency_table = FrequencyTable(
        template_variables["freq_table_rows"],
        name="Full",
        anchor_id="{varid}full_frequency".format(varid=summary["varid"]),
    )
    scheme_frequency_table = FrequencyTable(
        template_variables["freqtable_scheme"],
        name="Scheme",
        anchor_id="{varid}scheme_frequency".format(varid=summary["varid"]),
    )
    netloc_frequency_table = FrequencyTable(
        template_variables["freqtable_netloc"],
        name="Netloc",
        anchor_id="{varid}netloc_frequency".format(varid=summary["varid"]),
    )
    path_frequency_table = FrequencyTable(
        template_variables["freqtable_path"],
        name="Path",
        anchor_id="{varid}path_frequency".format(varid=summary["varid"]),
    )
    query_frequency_table = FrequencyTable(
        template_variables["freqtable_query"],
        name="Query",
        anchor_id="{varid}query_frequency".format(varid=summary["varid"]),
    )
    fragment_frequency_table = FrequencyTable(
        template_variables["freqtable_fragment"],
        name="Fragment",
        anchor_id="{varid}fragment_frequency".format(varid=summary["varid"]),
    )

    items = [
        full_frequency_table,
        scheme_frequency_table,
        netloc_frequency_table,
        path_frequency_table,
        query_frequency_table,
        fragment_frequency_table,
    ]
    template_variables["bottom"] = Sequence(
        items,
        sequence_type="tabs",
        name="url stats",
        anchor_id="{varid}urlstats".format(varid=summary["varid"]),
    )

    # Element composition
    info = VariableInfo(
        summary["varid"], summary["varname"], "URL", summary["warnings"]
    )

    table = Table(
        [
            {
                "name": "Distinct count",
                "value": summary["n_unique"],
                "fmt": "fmt",
                "alert": False,
            },
            {
                "name": "Unique (%)",
                "value": summary["p_unique"],
                "fmt": "fmt_percent",
                "alert": False,
            },
            {
                "name": "Missing",
                "value": summary["n_missing"],
                "fmt": "fmt",
                "alert": False,
            },
            {
                "name": "Missing (%)",
                "value": summary["p_missing"],
                "fmt": "fmt_percent",
                "alert": False,
            },
            {
                "name": "Memory size",
                "value": summary["memory_size"],
                "fmt": "fmt_bytesize",
                "alert": False,
            },
        ]
    )

    fqm = FrequencyTableSmall(mini_freq_table_rows)

    template_variables["top"] = Sequence([info, table, fqm], sequence_type="grid")

    return template_variables

Functions

def render_url(summary)
Expand source code
def render_url(summary):
    n_freq_table_max = config["n_freq_table_max"].get(int)

    n_obs_cat = config["vars"]["cat"]["n_obs"].get(int)

    # TODO: merge with boolean/categorical
    mini_freq_table_rows = freq_table(
        freqtable=summary["value_counts"], n=summary["n"], max_number_to_print=n_obs_cat
    )
    template_variables = render_common(summary)

    keys = ["scheme", "netloc", "path", "query", "fragment"]
    for url_part in keys:
        template_variables["freqtable_{}".format(url_part)] = freq_table(
            freqtable=summary["{}_counts".format(url_part)],
            n=summary["n"],
            max_number_to_print=n_freq_table_max,
        )

    full_frequency_table = FrequencyTable(
        template_variables["freq_table_rows"],
        name="Full",
        anchor_id="{varid}full_frequency".format(varid=summary["varid"]),
    )
    scheme_frequency_table = FrequencyTable(
        template_variables["freqtable_scheme"],
        name="Scheme",
        anchor_id="{varid}scheme_frequency".format(varid=summary["varid"]),
    )
    netloc_frequency_table = FrequencyTable(
        template_variables["freqtable_netloc"],
        name="Netloc",
        anchor_id="{varid}netloc_frequency".format(varid=summary["varid"]),
    )
    path_frequency_table = FrequencyTable(
        template_variables["freqtable_path"],
        name="Path",
        anchor_id="{varid}path_frequency".format(varid=summary["varid"]),
    )
    query_frequency_table = FrequencyTable(
        template_variables["freqtable_query"],
        name="Query",
        anchor_id="{varid}query_frequency".format(varid=summary["varid"]),
    )
    fragment_frequency_table = FrequencyTable(
        template_variables["freqtable_fragment"],
        name="Fragment",
        anchor_id="{varid}fragment_frequency".format(varid=summary["varid"]),
    )

    items = [
        full_frequency_table,
        scheme_frequency_table,
        netloc_frequency_table,
        path_frequency_table,
        query_frequency_table,
        fragment_frequency_table,
    ]
    template_variables["bottom"] = Sequence(
        items,
        sequence_type="tabs",
        name="url stats",
        anchor_id="{varid}urlstats".format(varid=summary["varid"]),
    )

    # Element composition
    info = VariableInfo(
        summary["varid"], summary["varname"], "URL", summary["warnings"]
    )

    table = Table(
        [
            {
                "name": "Distinct count",
                "value": summary["n_unique"],
                "fmt": "fmt",
                "alert": False,
            },
            {
                "name": "Unique (%)",
                "value": summary["p_unique"],
                "fmt": "fmt_percent",
                "alert": False,
            },
            {
                "name": "Missing",
                "value": summary["n_missing"],
                "fmt": "fmt",
                "alert": False,
            },
            {
                "name": "Missing (%)",
                "value": summary["p_missing"],
                "fmt": "fmt_percent",
                "alert": False,
            },
            {
                "name": "Memory size",
                "value": summary["memory_size"],
                "fmt": "fmt_bytesize",
                "alert": False,
            },
        ]
    )

    fqm = FrequencyTableSmall(mini_freq_table_rows)

    template_variables["top"] = Sequence([info, table, fqm], sequence_type="grid")

    return template_variables