pytermgui.highlighters

This module provides the Highlighter class, and some pre-configured instances.

View Source
  0"""This module provides the `Highlighter` class, and some pre-configured instances."""
  1
  2from __future__ import annotations
  3import re
  4import keyword
  5import builtins
  6from dataclasses import dataclass, field
  7from typing import Pattern, Match, Protocol, Callable
  8
  9from .regex import RE_MARKUP
 10
 11__all__ = [
 12    "Highlighter",
 13    "RegexHighlighter",
 14    "highlight_python",
 15]
 16
 17
 18class Highlighter(Protocol):  # pylint: disable=too-few-public-methods
 19    """The protocol for highlighters."""
 20
 21    def __call__(self, text: str, cache: bool = True) -> str:
 22        """Highlights the given text.
 23
 24        Args:
 25            text: The text to highlight.
 26            cache: If set (default), results will be stored, keyed by their respective
 27                inputs, and retrieved the next time the same key is given.
 28        """
 29
 30
 31@dataclass
 32class RegexHighlighter(Highlighter):
 33    """A class to highlight strings using regular expressions.
 34
 35    This class must be provided with a list of styles. These styles are really just a
 36    tuple of the markup alias name, and their associated RE patterns. If *all* aliases
 37    in the instance use the same prefix, it can be given under the `prefix` key and
 38    ommitted from the style names.
 39
 40    On construction, the instance will combine all of its patterns into a monster regex
 41    including named capturing groups. The general format is something like:
 42
 43        (?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...
 44
 45    Calling this instance will then replace all matches, going in the order of
 46    definition, with style-injected versions. These follow the format:
 47
 48        [{prefix?}{name}]{content}[/{prefix}{name}]
 49
 50    Oddities to keep in mind:
 51    - Regex replace goes in the order of the defined groups, and is non-overlapping. Two
 52        groups cannot match the same text.
 53    - Because of how capturing groups work, everything within the patterns will be
 54        matched. To look for context around a match, look-around assertions can be used.
 55    """
 56
 57    styles: list[tuple[str, str]]
 58    """A list of tuples of (style_alias, pattern_str)."""
 59
 60    prefix: str = ""
 61    """Some string to insert before each style alias."""
 62
 63    pre_formatter: Callable[[str], str] | None = None
 64    """A callable that formats the input string, before any highlighting is done to it."""
 65
 66    match_formatter: Callable[[Match, str], str] | None = None
 67    """A callable of (match, content) that gets called on every match.
 68
 69    Its return value will be used as the content that the already set highlighting will apply
 70    to. Useful to trim text, or apply other transformations before inserting it back.
 71    """
 72
 73    re_flags: int = 0
 74    """All regex flags to apply when compiling the generated pattern, OR-d (|) together."""
 75
 76    _pattern: Pattern = field(init=False)
 77    _highlight_cache: dict[str, str] = field(init=False, default_factory=dict)
 78
 79    def __post_init__(self) -> None:
 80        """Combines all styles into one pattern."""
 81
 82        pattern = ""
 83        names: list[str] = []
 84        for name, ptrn in self.styles:
 85            pattern += f"(?P<{name}>{ptrn})|"
 86            names.append(name)
 87
 88        pattern = pattern[:-1]
 89
 90        self._pattern = re.compile(pattern, flags=self.re_flags)
 91
 92    def __call__(self, text: str, cache: bool = True) -> str:
 93        """Highlights the given text, using the combined regex pattern."""
 94
 95        if self.pre_formatter is not None:
 96            text = self.pre_formatter(text)
 97
 98        if cache and text in self._highlight_cache:
 99            return self._highlight_cache[text]
100
101        cache_key = text
102
103        def _insert_style(matchobj: Match) -> str:
104            """Returns the match inserted into a markup style."""
105
106            groups = matchobj.groupdict()
107
108            name = matchobj.lastgroup
109            content = groups.get(str(name), None)
110
111            # Literalize "[" characters to avoid TIM parsing them
112            if name == "str":
113                if len(RE_MARKUP.findall(content)) > 0:
114                    content = content.replace("[", r"\[")
115
116            if self.match_formatter is not None:
117                content = self.match_formatter(matchobj, content)
118
119                if content == "":
120                    return ""
121
122            tag = f"{self.prefix}{name}"
123            style = f"[{tag}]{{}}[/{tag}]"
124
125            return style.format(content)
126
127        text = self._pattern.sub(_insert_style, text)
128        self._highlight_cache[cache_key] = text
129
130        return text
131
132
133_BUILTIN_NAMES = "|".join(f"(?:{item})" for item in dir(builtins))
134_KEYWORD_NAMES = "|".join(f"(?:{keyw})" for keyw in keyword.kwlist)
135_STR_DELIMS = "|".join(('(?:"|("""))', "(?:'|('''))"))
136
137highlight_python = RegexHighlighter(
138    prefix="code.",
139    styles=[
140        ("str", rf"[frbu]*?(?P<str_start>(?:{_STR_DELIMS})).+(?P=str_start)"),
141        ("comment", "(#.*)"),
142        ("keyword", rf"(\b)({_KEYWORD_NAMES}+)\b"),
143        ("builtin", rf"\b(?<!\.)({_BUILTIN_NAMES})\b"),
144        ("identifier", r"([^ \.]+)(?=\()"),
145        ("global", r"(?<=\b)([A-Z]\w+)"),
146        ("number", r"((?:0x[\da-zA-Z]+)|(?:\d+))"),
147    ],
148)
#   class Highlighter(typing.Protocol):
View Source
19class Highlighter(Protocol):  # pylint: disable=too-few-public-methods
20    """The protocol for highlighters."""
21
22    def __call__(self, text: str, cache: bool = True) -> str:
23        """Highlights the given text.
24
25        Args:
26            text: The text to highlight.
27            cache: If set (default), results will be stored, keyed by their respective
28                inputs, and retrieved the next time the same key is given.
29        """

The protocol for highlighters.

#   Highlighter(*args, **kwargs)
View Source
1429def _no_init_or_replace_init(self, *args, **kwargs):
1430    cls = type(self)
1431
1432    if cls._is_protocol:
1433        raise TypeError('Protocols cannot be instantiated')
1434
1435    # Already using a custom `__init__`. No need to calculate correct
1436    # `__init__` to call. This can lead to RecursionError. See bpo-45121.
1437    if cls.__init__ is not _no_init_or_replace_init:
1438        return
1439
1440    # Initially, `__init__` of a protocol subclass is set to `_no_init_or_replace_init`.
1441    # The first instantiation of the subclass will call `_no_init_or_replace_init` which
1442    # searches for a proper new `__init__` in the MRO. The new `__init__`
1443    # replaces the subclass' old `__init__` (ie `_no_init_or_replace_init`). Subsequent
1444    # instantiation of the protocol subclass will thus use the new
1445    # `__init__` and no longer call `_no_init_or_replace_init`.
1446    for base in cls.__mro__:
1447        init = base.__dict__.get('__init__', _no_init_or_replace_init)
1448        if init is not _no_init_or_replace_init:
1449            cls.__init__ = init
1450            break
1451    else:
1452        # should not happen
1453        cls.__init__ = object.__init__
1454
1455    cls.__init__(self, *args, **kwargs)
#  
@dataclass
class RegexHighlighter(Highlighter):
View Source
 32@dataclass
 33class RegexHighlighter(Highlighter):
 34    """A class to highlight strings using regular expressions.
 35
 36    This class must be provided with a list of styles. These styles are really just a
 37    tuple of the markup alias name, and their associated RE patterns. If *all* aliases
 38    in the instance use the same prefix, it can be given under the `prefix` key and
 39    ommitted from the style names.
 40
 41    On construction, the instance will combine all of its patterns into a monster regex
 42    including named capturing groups. The general format is something like:
 43
 44        (?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...
 45
 46    Calling this instance will then replace all matches, going in the order of
 47    definition, with style-injected versions. These follow the format:
 48
 49        [{prefix?}{name}]{content}[/{prefix}{name}]
 50
 51    Oddities to keep in mind:
 52    - Regex replace goes in the order of the defined groups, and is non-overlapping. Two
 53        groups cannot match the same text.
 54    - Because of how capturing groups work, everything within the patterns will be
 55        matched. To look for context around a match, look-around assertions can be used.
 56    """
 57
 58    styles: list[tuple[str, str]]
 59    """A list of tuples of (style_alias, pattern_str)."""
 60
 61    prefix: str = ""
 62    """Some string to insert before each style alias."""
 63
 64    pre_formatter: Callable[[str], str] | None = None
 65    """A callable that formats the input string, before any highlighting is done to it."""
 66
 67    match_formatter: Callable[[Match, str], str] | None = None
 68    """A callable of (match, content) that gets called on every match.
 69
 70    Its return value will be used as the content that the already set highlighting will apply
 71    to. Useful to trim text, or apply other transformations before inserting it back.
 72    """
 73
 74    re_flags: int = 0
 75    """All regex flags to apply when compiling the generated pattern, OR-d (|) together."""
 76
 77    _pattern: Pattern = field(init=False)
 78    _highlight_cache: dict[str, str] = field(init=False, default_factory=dict)
 79
 80    def __post_init__(self) -> None:
 81        """Combines all styles into one pattern."""
 82
 83        pattern = ""
 84        names: list[str] = []
 85        for name, ptrn in self.styles:
 86            pattern += f"(?P<{name}>{ptrn})|"
 87            names.append(name)
 88
 89        pattern = pattern[:-1]
 90
 91        self._pattern = re.compile(pattern, flags=self.re_flags)
 92
 93    def __call__(self, text: str, cache: bool = True) -> str:
 94        """Highlights the given text, using the combined regex pattern."""
 95
 96        if self.pre_formatter is not None:
 97            text = self.pre_formatter(text)
 98
 99        if cache and text in self._highlight_cache:
100            return self._highlight_cache[text]
101
102        cache_key = text
103
104        def _insert_style(matchobj: Match) -> str:
105            """Returns the match inserted into a markup style."""
106
107            groups = matchobj.groupdict()
108
109            name = matchobj.lastgroup
110            content = groups.get(str(name), None)
111
112            # Literalize "[" characters to avoid TIM parsing them
113            if name == "str":
114                if len(RE_MARKUP.findall(content)) > 0:
115                    content = content.replace("[", r"\[")
116
117            if self.match_formatter is not None:
118                content = self.match_formatter(matchobj, content)
119
120                if content == "":
121                    return ""
122
123            tag = f"{self.prefix}{name}"
124            style = f"[{tag}]{{}}[/{tag}]"
125
126            return style.format(content)
127
128        text = self._pattern.sub(_insert_style, text)
129        self._highlight_cache[cache_key] = text
130
131        return text

A class to highlight strings using regular expressions.

This class must be provided with a list of styles. These styles are really just a tuple of the markup alias name, and their associated RE patterns. If all aliases in the instance use the same prefix, it can be given under the prefix key and ommitted from the style names.

On construction, the instance will combine all of its patterns into a monster regex including named capturing groups. The general format is something like:

(?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...

Calling this instance will then replace all matches, going in the order of definition, with style-injected versions. These follow the format:

[{prefix?}{name}]{content}[/{prefix}{name}]

Oddities to keep in mind:

  • Regex replace goes in the order of the defined groups, and is non-overlapping. Two groups cannot match the same text.
  • Because of how capturing groups work, everything within the patterns will be matched. To look for context around a match, look-around assertions can be used.
#   RegexHighlighter( styles: list[tuple[str, str]], prefix: str = '', pre_formatter: Optional[Callable[[str], str]] = None, match_formatter: Optional[Callable[[Match, str], str]] = None, re_flags: int = 0 )
#   styles: list[tuple[str, str]]

A list of tuples of (style_alias, pattern_str).

#   prefix: str = ''

Some string to insert before each style alias.

#   pre_formatter: Optional[Callable[[str], str]] = None

A callable that formats the input string, before any highlighting is done to it.

#   match_formatter: Optional[Callable[[Match, str], str]] = None

A callable of (match, content) that gets called on every match.

Its return value will be used as the content that the already set highlighting will apply to. Useful to trim text, or apply other transformations before inserting it back.

#   re_flags: int = 0

All regex flags to apply when compiling the generated pattern, OR-d (|) together.

#   highlight_python = RegexHighlighter(styles=[('str', '[frbu]*?(?P<str_start>(?:(?:"|("""))|(?:\'|(\'\'\')))).+(?P=str_start)'), ('comment', '(#.*)'), ('keyword', '(\\b)((?:False)|(?:None)|(?:True)|(?:and)|(?:as)|(?:assert)|(?:async)|(?:await)|(?:break)|(?:class)|(?:continue)|(?:def)|(?:del)|(?:elif)|(?:else)|(?:except)|(?:finally)|(?:for)|(?:from)|(?:global)|(?:if)|(?:import)|(?:in)|(?:is)|(?:lambda)|(?:nonlocal)|(?:not)|(?:or)|(?:pass)|(?:raise)|(?:return)|(?:try)|(?:while)|(?:with)|(?:yield)+)\\b'), ('builtin', '\\b(?<!\\.)((?:ArithmeticError)|(?:AssertionError)|(?:AttributeError)|(?:BaseException)|(?:BlockingIOError)|(?:BrokenPipeError)|(?:BufferError)|(?:BytesWarning)|(?:ChildProcessError)|(?:ConnectionAbortedError)|(?:ConnectionError)|(?:ConnectionRefusedError)|(?:ConnectionResetError)|(?:DeprecationWarning)|(?:EOFError)|(?:Ellipsis)|(?:EncodingWarning)|(?:EnvironmentError)|(?:Exception)|(?:False)|(?:FileExistsError)|(?:FileNotFoundError)|(?:FloatingPointError)|(?:FutureWarning)|(?:GeneratorExit)|(?:IOError)|(?:ImportError)|(?:ImportWarning)|(?:IndentationError)|(?:IndexError)|(?:InterruptedError)|(?:IsADirectoryError)|(?:KeyError)|(?:KeyboardInterrupt)|(?:LookupError)|(?:MemoryError)|(?:ModuleNotFoundError)|(?:NameError)|(?:None)|(?:NotADirectoryError)|(?:NotImplemented)|(?:NotImplementedError)|(?:OSError)|(?:OverflowError)|(?:PendingDeprecationWarning)|(?:PermissionError)|(?:ProcessLookupError)|(?:RecursionError)|(?:ReferenceError)|(?:ResourceWarning)|(?:RuntimeError)|(?:RuntimeWarning)|(?:StopAsyncIteration)|(?:StopIteration)|(?:SyntaxError)|(?:SyntaxWarning)|(?:SystemError)|(?:SystemExit)|(?:TabError)|(?:TimeoutError)|(?:True)|(?:TypeError)|(?:UnboundLocalError)|(?:UnicodeDecodeError)|(?:UnicodeEncodeError)|(?:UnicodeError)|(?:UnicodeTranslateError)|(?:UnicodeWarning)|(?:UserWarning)|(?:ValueError)|(?:Warning)|(?:ZeroDivisionError)|(?:__build_class__)|(?:__debug__)|(?:__doc__)|(?:__import__)|(?:__loader__)|(?:__name__)|(?:__package__)|(?:__spec__)|(?:abs)|(?:aiter)|(?:all)|(?:anext)|(?:any)|(?:ascii)|(?:bin)|(?:bool)|(?:breakpoint)|(?:bytearray)|(?:bytes)|(?:callable)|(?:chr)|(?:classmethod)|(?:compile)|(?:complex)|(?:copyright)|(?:credits)|(?:delattr)|(?:dict)|(?:dir)|(?:divmod)|(?:enumerate)|(?:eval)|(?:exec)|(?:exit)|(?:filter)|(?:float)|(?:format)|(?:frozenset)|(?:getattr)|(?:globals)|(?:hasattr)|(?:hash)|(?:help)|(?:hex)|(?:id)|(?:input)|(?:int)|(?:isinstance)|(?:issubclass)|(?:iter)|(?:len)|(?:license)|(?:list)|(?:locals)|(?:map)|(?:max)|(?:memoryview)|(?:min)|(?:next)|(?:object)|(?:oct)|(?:open)|(?:ord)|(?:pow)|(?:print)|(?:property)|(?:quit)|(?:range)|(?:repr)|(?:reversed)|(?:round)|(?:set)|(?:setattr)|(?:slice)|(?:sorted)|(?:staticmethod)|(?:str)|(?:sum)|(?:super)|(?:tuple)|(?:type)|(?:vars)|(?:zip))\\b'), ('identifier', '([^ \\.]+)(?=\\()'), ('global', '(?<=\\b)([A-Z]\\w+)'), ('number', '((?:0x[\\da-zA-Z]+)|(?:\\d+))')], prefix='code.', pre_formatter=None, match_formatter=None, re_flags=0, _pattern=re.compile('(?P<str>[frbu]*?(?P<str_start>(?:(?:"|("""))|(?:\'|(\'\'\')))).+(?P=str_start))|(?P<comment>(#.*))|(?P<keyword>(\\b)((?:False)|(?:None)|(?:True)|(?:and)|(?:as)|(?:assert)|(?:async)|(?:await)|(?:break), _highlight_cache={})