Source code for visions.types.url

from typing import Sequence
from urllib.parse import ParseResult, urlparse

import pandas as pd

from visions.relations import IdentityRelation, InferenceRelation, TypeRelation
from visions.types.type import VisionsBaseType
from visions.utils.series_utils import (
    func_nullable_series_contains,
    isinstance_attrs,
    nullable_series_contains,
    series_not_empty,
)


@func_nullable_series_contains
def string_is_url(series, state: dict) -> bool:
    try:
        return to_url(series, state).apply(lambda x: x.netloc and x.scheme).all()
    except AttributeError:
        return False


def to_url(series: pd.Series, state: dict) -> pd.Series:
    return series.apply(urlparse)


def _get_relations(cls) -> Sequence[TypeRelation]:
    from visions.types import Object, String

    relations = [
        IdentityRelation(cls, Object),
        InferenceRelation(cls, String, relationship=string_is_url, transformer=to_url),
    ]
    return relations


[docs]class URL(VisionsBaseType): """**Url** implementation of :class:`visions.types.type.VisionsBaseType`. Examples: >>> import pandas as pd >>> from urllib.parse import urlparse >>> urls = ['http://www.cwi.nl:80/%7Eguido/Python.html', 'https://github.com/pandas-profiling/pandas-profiling'] >>> x = pd.Series([urlparse(url) for url in urls]) >>> x in visions.URL True """ @classmethod def get_relations(cls) -> Sequence[TypeRelation]: return _get_relations(cls) @classmethod @series_not_empty @nullable_series_contains def contains_op(cls, series: pd.Series, state: dict) -> bool: return isinstance_attrs(series, ParseResult, ["netloc", "scheme"])