Source code for visions.functional

from typing import Dict, List, Tuple, Type

import pandas as pd

from visions.types.type import VisionsBaseType
from visions.typesets.typeset import VisionsTypeset


[docs]def cast_frame(df: pd.DataFrame, typeset: VisionsTypeset) -> pd.DataFrame: """Casts a DataFrame into a typeset by first performing column wise type inference against a provided typeset Args: df: the DataFrame to cast typeset: the Typeset in which we cast Returns: A tuple of the casted DataFrame and the types to which the columns were cast """ return typeset.cast_frame(df)
[docs]def cast_series(series: pd.Series, typeset: VisionsTypeset) -> pd.Series: """Casts the series Args: series: the Series to infer the type of typeset: the Typeset that provides the type context Returns: The converted series """ return typeset.cast_series(series)
[docs]def cast_and_infer_frame( df: pd.DataFrame, typeset: VisionsTypeset ) -> Tuple[pd.DataFrame, dict]: """Casts a DataFrame into a typeset by first performing column wise type inference against a provided typeset Args: df: the DataFrame to cast typeset: the Typeset in which we cast Returns: A tuple of the casted DataFrame and the types to which the columns were cast """ return typeset.cast_and_infer_frame(df)
[docs]def cast_and_infer_series( series: pd.Series, typeset: VisionsTypeset ) -> Tuple[Type[VisionsBaseType], pd.Series]: """Cast the series and perform type inference Args: series: the Series to infer the type of typeset: the Typeset that provides the type context Returns: The inferred type and the converted series """ return typeset.cast_and_infer_series(series)
[docs]def infer_frame_type( df: pd.DataFrame, typeset: VisionsTypeset ) -> Dict[str, Type[VisionsBaseType]]: """Infer the current types of each column in the DataFrame given the typeset. Args: df: the DataFrame to infer types on typeset: the Typeset that provides the type context Returns: A dictionary with a mapping from column name to type """ return typeset.infer_frame_type(df)
[docs]def infer_series_type( series: pd.Series, typeset: VisionsTypeset ) -> Type[VisionsBaseType]: """Infer the current type of the series given the typeset Args: series: the Series to infer the type of typeset: the Typeset that provides the type context Returns: The inferred type of the series """ return typeset.infer_series_type(series)
[docs]def detect_frame_type( df: pd.DataFrame, typeset: VisionsTypeset ) -> Dict[str, Type[VisionsBaseType]]: """Detect the type in the base graph Args: df: the DataFrame to detect types on typeset: the Typeset that provides the type context Returns: A dictionary with a mapping from column name to type """ return typeset.detect_frame_type(df)
[docs]def detect_series_type( series: pd.Series, typeset: VisionsTypeset ) -> Type[VisionsBaseType]: """Detect the type in the base graph Args: series: the Series to detect the type of typeset: the Typeset that provides the type context Returns: The detected type """ return typeset.detect_series_type(series)
[docs]def compare_detect_inference_frame( df: pd.DataFrame, typeset: VisionsTypeset ) -> List[Tuple[str, Type[VisionsBaseType], Type[VisionsBaseType]]]: """Compare the types given by inference on the base graph and the relational graph Args: df: the DataFrame to detect types on typeset: the Typeset that provides the type context Examples: >>> for column, type_before, type_after in compare_detect_inference_frame(df, typeset): >>> print(f"{column} was {type_before} is {type_after}") See Also: :doc:`type_inference_report_frame <visions.functional.type_inference_report_frame>`: Formatted report of the output of this function """ comparisons = [] detected_types = detect_frame_type(df, typeset) inferred_types = infer_frame_type(df, typeset) for key in detected_types.keys() & inferred_types.keys(): comparisons.append((key, detected_types[key], inferred_types[key])) return comparisons
[docs]def type_inference_report_frame(df, typeset) -> str: """Return formatted report of the output of `compare_detect_inference_frame`. Args: df: the DataFrame to detect types on typeset: the Typeset that provides the type context Returns: Text-based comparative type inference report Examples: >>> import pandas as pd >>> from visions.functional import type_inference_report_frame >>> from visions.typesets import StandardSet >>> >>> typeset = StandardSet() >>> df = pd.read_csv('dataset.csv') >>> >>> report = type_inference_report_frame(df, typeset) >>> print(report) """ padding = 5 max_column_length = max([len(column) for column in df.columns]) + padding max_type_length = 30 report = "" change_count = 0 for column, type_before, type_after in compare_detect_inference_frame(df, typeset): changed = type_before != type_after if changed: fill = "!=" change_count += 1 else: fill = "==" report += "{column: <{max_column_length}} {type_before: <{max_type_length}} {fill} {type_after: <{max_type_length}} \n".format( column=column, max_column_length=max_column_length, type_before=str(type_before), type_after=str(type_after), max_type_length=max_type_length, fill=fill, ) report += "In total {change_count} out of {type_count} types were changed.\n".format( change_count=change_count, type_count=len(df.columns) ) return report