pytermgui.highlighters

This module provides the Highlighter class, and some pre-configured instances.

  1"""This module provides the `Highlighter` class, and some pre-configured instances."""
  2
  3from __future__ import annotations
  4
  5import re
  6import keyword
  7import builtins
  8from dataclasses import dataclass, field
  9from typing import Pattern, Match, Protocol, Callable, Generator, TYPE_CHECKING
 10
 11from .regex import RE_MARKUP
 12
 13if TYPE_CHECKING:
 14    from .fancy_repr import FancyYield
 15
 16__all__ = [
 17    "Highlighter",
 18    "RegexHighlighter",
 19    "highlight_python",
 20]
 21
 22
 23class Highlighter(Protocol):  # pylint: disable=too-few-public-methods
 24    """The protocol for highlighters."""
 25
 26    def __call__(self, text: str, cache: bool = True) -> str:
 27        """Highlights the given text.
 28
 29        Args:
 30            text: The text to highlight.
 31            cache: If set (default), results will be stored, keyed by their respective
 32                inputs, and retrieved the next time the same key is given.
 33        """
 34
 35
 36@dataclass
 37class RegexHighlighter:
 38    """A class to highlight strings using regular expressions.
 39
 40    This class must be provided with a list of styles. These styles are really just a
 41    tuple of the markup alias name, and their associated RE patterns. If *all* aliases
 42    in the instance use the same prefix, it can be given under the `prefix` key and
 43    ommitted from the style names.
 44
 45    On construction, the instance will combine all of its patterns into a monster regex
 46    including named capturing groups. The general format is something like:
 47
 48        (?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...
 49
 50    Calling this instance will then replace all matches, going in the order of
 51    definition, with style-injected versions. These follow the format:
 52
 53        [{prefix?}{name}]{content}[/{prefix}{name}]
 54
 55    Oddities to keep in mind:
 56    - Regex replace goes in the order of the defined groups, and is non-overlapping. Two
 57        groups cannot match the same text.
 58    - Because of how capturing groups work, everything within the patterns will be
 59        matched. To look for context around a match, look-around assertions can be used.
 60    """
 61
 62    styles: list[tuple[str, str]]
 63    """A list of tuples of (style_alias, pattern_str)."""
 64
 65    prefix: str = ""
 66    """Some string to insert before each style alias."""
 67
 68    pre_formatter: Callable[[str], str] | None = None
 69    """A callable that formats the input string, before any highlighting is done to it."""
 70
 71    match_formatter: Callable[[Match, str], str] | None = None
 72    """A callable of (match, content) that gets called on every match.
 73
 74    Its return value will be used as the content that the already set highlighting will apply
 75    to. Useful to trim text, or apply other transformations before inserting it back.
 76    """
 77
 78    re_flags: int = 0
 79    """All regex flags to apply when compiling the generated pattern, OR-d (|) together."""
 80
 81    _pattern: Pattern = field(init=False)
 82    _highlight_cache: dict[str, str] = field(init=False, default_factory=dict)
 83
 84    def __post_init__(self) -> None:
 85        """Combines all styles into one pattern."""
 86
 87        pattern = ""
 88        names: list[str] = []
 89        for name, ptrn in self.styles:
 90            pattern += f"(?P<{name}>{ptrn})|"
 91            names.append(name)
 92
 93        pattern = pattern[:-1]
 94
 95        self._pattern = re.compile(pattern, flags=self.re_flags)
 96
 97    def __call__(self, text: str, cache: bool = True) -> str:
 98        """Highlights the given text, using the combined regex pattern."""
 99
100        if self.pre_formatter is not None:
101            text = self.pre_formatter(text)
102
103        if cache and text in self._highlight_cache:
104            return self._highlight_cache[text]
105
106        cache_key = text
107
108        def _insert_style(matchobj: Match) -> str:
109            """Returns the match inserted into a markup style."""
110
111            groups = matchobj.groupdict()
112
113            name = matchobj.lastgroup
114            content = groups.get(str(name), None)
115
116            # Literalize "[" characters to avoid TIM parsing them
117            if name in ["str", "multiline_str"]:
118                if len(RE_MARKUP.findall(content)) > 0:
119                    content = content.replace("[", r"\[")
120
121            if self.match_formatter is not None:
122                content = self.match_formatter(matchobj, content)
123
124                if content == "":
125                    return ""
126
127            tag = f"{self.prefix}{name}"
128            style = f"[{tag}]{{}}[/{tag}]"
129
130            return style.format(content)
131
132        text = self._pattern.sub(_insert_style, text)
133        self._highlight_cache[cache_key] = text
134
135        return text
136
137    def __fancy_repr__(self) -> Generator[FancyYield, None, None]:
138        """Yields some fancy looking repl text."""
139
140        preview = self("highlight_python()") + "\x1b[0m"
141        pattern = self._pattern.pattern
142
143        if len(pattern) > 40:
144            pattern = pattern[:38] + "..."
145
146        yield f"<{type(self).__name__} pattern: {pattern!r}, preview: "
147        yield {"text": str(preview), "highlight": False}
148
149        yield ">"
150
151
152_BUILTIN_NAMES = "|".join(f"(?:{item})" for item in dir(builtins))
153_KEYWORD_NAMES = "|".join(
154    f"(?:{keyw})" for keyw in keyword.kwlist + ["builtin", "function", "module"]
155)
156
157highlight_python = RegexHighlighter(
158    prefix="code.",
159    styles=[
160        ("multiline_str", r"([frbu]*)\"{3}([\s\S]*?)(?<!\\)\"{3}"),
161        (
162            "str",
163            r"([frbu]*(\".*?(?<!\\)\")|(\'.*?(?<!\\)\'))",
164        ),
165        ("comment", "(#.*)"),
166        ("keyword", rf"\b(?<![\.\-])()({_KEYWORD_NAMES}+)\b"),
167        ("builtin", rf"\b(?<!\.)({_BUILTIN_NAMES})\b"),
168        ("identifier", r"([^ \.=]+)(?=\()"),
169        ("global", r"(?<=\b)([A-Z]\w+)"),
170        ("number", r"(?<=\b)((?:0x[\da-zA-Z]+)|(?:\d+))"),
171    ],
172)
class Highlighter(typing.Protocol):
24class Highlighter(Protocol):  # pylint: disable=too-few-public-methods
25    """The protocol for highlighters."""
26
27    def __call__(self, text: str, cache: bool = True) -> str:
28        """Highlights the given text.
29
30        Args:
31            text: The text to highlight.
32            cache: If set (default), results will be stored, keyed by their respective
33                inputs, and retrieved the next time the same key is given.
34        """

The protocol for highlighters.

Highlighter(*args, **kwargs)
1430def _no_init_or_replace_init(self, *args, **kwargs):
1431    cls = type(self)
1432
1433    if cls._is_protocol:
1434        raise TypeError('Protocols cannot be instantiated')
1435
1436    # Already using a custom `__init__`. No need to calculate correct
1437    # `__init__` to call. This can lead to RecursionError. See bpo-45121.
1438    if cls.__init__ is not _no_init_or_replace_init:
1439        return
1440
1441    # Initially, `__init__` of a protocol subclass is set to `_no_init_or_replace_init`.
1442    # The first instantiation of the subclass will call `_no_init_or_replace_init` which
1443    # searches for a proper new `__init__` in the MRO. The new `__init__`
1444    # replaces the subclass' old `__init__` (ie `_no_init_or_replace_init`). Subsequent
1445    # instantiation of the protocol subclass will thus use the new
1446    # `__init__` and no longer call `_no_init_or_replace_init`.
1447    for base in cls.__mro__:
1448        init = base.__dict__.get('__init__', _no_init_or_replace_init)
1449        if init is not _no_init_or_replace_init:
1450            cls.__init__ = init
1451            break
1452    else:
1453        # should not happen
1454        cls.__init__ = object.__init__
1455
1456    cls.__init__(self, *args, **kwargs)
@dataclass
class RegexHighlighter:
 37@dataclass
 38class RegexHighlighter:
 39    """A class to highlight strings using regular expressions.
 40
 41    This class must be provided with a list of styles. These styles are really just a
 42    tuple of the markup alias name, and their associated RE patterns. If *all* aliases
 43    in the instance use the same prefix, it can be given under the `prefix` key and
 44    ommitted from the style names.
 45
 46    On construction, the instance will combine all of its patterns into a monster regex
 47    including named capturing groups. The general format is something like:
 48
 49        (?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...
 50
 51    Calling this instance will then replace all matches, going in the order of
 52    definition, with style-injected versions. These follow the format:
 53
 54        [{prefix?}{name}]{content}[/{prefix}{name}]
 55
 56    Oddities to keep in mind:
 57    - Regex replace goes in the order of the defined groups, and is non-overlapping. Two
 58        groups cannot match the same text.
 59    - Because of how capturing groups work, everything within the patterns will be
 60        matched. To look for context around a match, look-around assertions can be used.
 61    """
 62
 63    styles: list[tuple[str, str]]
 64    """A list of tuples of (style_alias, pattern_str)."""
 65
 66    prefix: str = ""
 67    """Some string to insert before each style alias."""
 68
 69    pre_formatter: Callable[[str], str] | None = None
 70    """A callable that formats the input string, before any highlighting is done to it."""
 71
 72    match_formatter: Callable[[Match, str], str] | None = None
 73    """A callable of (match, content) that gets called on every match.
 74
 75    Its return value will be used as the content that the already set highlighting will apply
 76    to. Useful to trim text, or apply other transformations before inserting it back.
 77    """
 78
 79    re_flags: int = 0
 80    """All regex flags to apply when compiling the generated pattern, OR-d (|) together."""
 81
 82    _pattern: Pattern = field(init=False)
 83    _highlight_cache: dict[str, str] = field(init=False, default_factory=dict)
 84
 85    def __post_init__(self) -> None:
 86        """Combines all styles into one pattern."""
 87
 88        pattern = ""
 89        names: list[str] = []
 90        for name, ptrn in self.styles:
 91            pattern += f"(?P<{name}>{ptrn})|"
 92            names.append(name)
 93
 94        pattern = pattern[:-1]
 95
 96        self._pattern = re.compile(pattern, flags=self.re_flags)
 97
 98    def __call__(self, text: str, cache: bool = True) -> str:
 99        """Highlights the given text, using the combined regex pattern."""
100
101        if self.pre_formatter is not None:
102            text = self.pre_formatter(text)
103
104        if cache and text in self._highlight_cache:
105            return self._highlight_cache[text]
106
107        cache_key = text
108
109        def _insert_style(matchobj: Match) -> str:
110            """Returns the match inserted into a markup style."""
111
112            groups = matchobj.groupdict()
113
114            name = matchobj.lastgroup
115            content = groups.get(str(name), None)
116
117            # Literalize "[" characters to avoid TIM parsing them
118            if name in ["str", "multiline_str"]:
119                if len(RE_MARKUP.findall(content)) > 0:
120                    content = content.replace("[", r"\[")
121
122            if self.match_formatter is not None:
123                content = self.match_formatter(matchobj, content)
124
125                if content == "":
126                    return ""
127
128            tag = f"{self.prefix}{name}"
129            style = f"[{tag}]{{}}[/{tag}]"
130
131            return style.format(content)
132
133        text = self._pattern.sub(_insert_style, text)
134        self._highlight_cache[cache_key] = text
135
136        return text
137
138    def __fancy_repr__(self) -> Generator[FancyYield, None, None]:
139        """Yields some fancy looking repl text."""
140
141        preview = self("highlight_python()") + "\x1b[0m"
142        pattern = self._pattern.pattern
143
144        if len(pattern) > 40:
145            pattern = pattern[:38] + "..."
146
147        yield f"<{type(self).__name__} pattern: {pattern!r}, preview: "
148        yield {"text": str(preview), "highlight": False}
149
150        yield ">"

A class to highlight strings using regular expressions.

This class must be provided with a list of styles. These styles are really just a tuple of the markup alias name, and their associated RE patterns. If all aliases in the instance use the same prefix, it can be given under the prefix key and ommitted from the style names.

On construction, the instance will combine all of its patterns into a monster regex including named capturing groups. The general format is something like:

(?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...

Calling this instance will then replace all matches, going in the order of definition, with style-injected versions. These follow the format:

[{prefix?}{name}]{content}[/{prefix}{name}]

Oddities to keep in mind:

  • Regex replace goes in the order of the defined groups, and is non-overlapping. Two groups cannot match the same text.
  • Because of how capturing groups work, everything within the patterns will be matched. To look for context around a match, look-around assertions can be used.
RegexHighlighter( styles: list[tuple[str, str]], prefix: str = '', pre_formatter: Optional[Callable[[str], str]] = None, match_formatter: Optional[Callable[[Match, str], str]] = None, re_flags: int = 0)
styles: list[tuple[str, str]]

A list of tuples of (style_alias, pattern_str).

prefix: str = ''

Some string to insert before each style alias.

pre_formatter: Optional[Callable[[str], str]] = None

A callable that formats the input string, before any highlighting is done to it.

match_formatter: Optional[Callable[[Match, str], str]] = None

A callable of (match, content) that gets called on every match.

Its return value will be used as the content that the already set highlighting will apply to. Useful to trim text, or apply other transformations before inserting it back.

re_flags: int = 0

All regex flags to apply when compiling the generated pattern, OR-d (|) together.

highlight_python = RegexHighlighter(styles=[('multiline_str', '([frbu]*)\\"{3}([\\s\\S]*?)(?<!\\\\)\\"{3}'), ('str', '([frbu]*(\\".*?(?<!\\\\)\\")|(\\\'.*?(?<!\\\\)\\\'))'), ('comment', '(#.*)'), ('keyword', '\\b(?<![\\.\\-])()((?:False)|(?:None)|(?:True)|(?:and)|(?:as)|(?:assert)|(?:async)|(?:await)|(?:break)|(?:class)|(?:continue)|(?:def)|(?:del)|(?:elif)|(?:else)|(?:except)|(?:finally)|(?:for)|(?:from)|(?:global)|(?:if)|(?:import)|(?:in)|(?:is)|(?:lambda)|(?:nonlocal)|(?:not)|(?:or)|(?:pass)|(?:raise)|(?:return)|(?:try)|(?:while)|(?:with)|(?:yield)|(?:builtin)|(?:function)|(?:module)+)\\b'), ('builtin', '\\b(?<!\\.)((?:ArithmeticError)|(?:AssertionError)|(?:AttributeError)|(?:BaseException)|(?:BlockingIOError)|(?:BrokenPipeError)|(?:BufferError)|(?:BytesWarning)|(?:ChildProcessError)|(?:ConnectionAbortedError)|(?:ConnectionError)|(?:ConnectionRefusedError)|(?:ConnectionResetError)|(?:DeprecationWarning)|(?:EOFError)|(?:Ellipsis)|(?:EncodingWarning)|(?:EnvironmentError)|(?:Exception)|(?:False)|(?:FileExistsError)|(?:FileNotFoundError)|(?:FloatingPointError)|(?:FutureWarning)|(?:GeneratorExit)|(?:IOError)|(?:ImportError)|(?:ImportWarning)|(?:IndentationError)|(?:IndexError)|(?:InterruptedError)|(?:IsADirectoryError)|(?:KeyError)|(?:KeyboardInterrupt)|(?:LookupError)|(?:MemoryError)|(?:ModuleNotFoundError)|(?:NameError)|(?:None)|(?:NotADirectoryError)|(?:NotImplemented)|(?:NotImplementedError)|(?:OSError)|(?:OverflowError)|(?:PendingDeprecationWarning)|(?:PermissionError)|(?:ProcessLookupError)|(?:RecursionError)|(?:ReferenceError)|(?:ResourceWarning)|(?:RuntimeError)|(?:RuntimeWarning)|(?:StopAsyncIteration)|(?:StopIteration)|(?:SyntaxError)|(?:SyntaxWarning)|(?:SystemError)|(?:SystemExit)|(?:TabError)|(?:TimeoutError)|(?:True)|(?:TypeError)|(?:UnboundLocalError)|(?:UnicodeDecodeError)|(?:UnicodeEncodeError)|(?:UnicodeError)|(?:UnicodeTranslateError)|(?:UnicodeWarning)|(?:UserWarning)|(?:ValueError)|(?:Warning)|(?:ZeroDivisionError)|(?:__build_class__)|(?:__debug__)|(?:__doc__)|(?:__import__)|(?:__loader__)|(?:__name__)|(?:__package__)|(?:__spec__)|(?:abs)|(?:aiter)|(?:all)|(?:anext)|(?:any)|(?:ascii)|(?:bin)|(?:bool)|(?:breakpoint)|(?:bytearray)|(?:bytes)|(?:callable)|(?:chr)|(?:classmethod)|(?:compile)|(?:complex)|(?:copyright)|(?:credits)|(?:delattr)|(?:dict)|(?:dir)|(?:divmod)|(?:enumerate)|(?:eval)|(?:exec)|(?:exit)|(?:filter)|(?:float)|(?:format)|(?:frozenset)|(?:getattr)|(?:globals)|(?:hasattr)|(?:hash)|(?:help)|(?:hex)|(?:id)|(?:input)|(?:int)|(?:isinstance)|(?:issubclass)|(?:iter)|(?:len)|(?:license)|(?:list)|(?:locals)|(?:map)|(?:max)|(?:memoryview)|(?:min)|(?:next)|(?:object)|(?:oct)|(?:open)|(?:ord)|(?:pow)|(?:print)|(?:property)|(?:quit)|(?:range)|(?:repr)|(?:reversed)|(?:round)|(?:set)|(?:setattr)|(?:slice)|(?:sorted)|(?:staticmethod)|(?:str)|(?:sum)|(?:super)|(?:tuple)|(?:type)|(?:vars)|(?:zip))\\b'), ('identifier', '([^ \\.=]+)(?=\\()'), ('global', '(?<=\\b)([A-Z]\\w+)'), ('number', '(?<=\\b)((?:0x[\\da-zA-Z]+)|(?:\\d+))')], prefix='code.', pre_formatter=None, match_formatter=None, re_flags=0, _pattern=re.compile('(?P<multiline_str>([frbu]*)\\"{3}([\\s\\S]*?)(?<!\\\\)\\"{3})|(?P<str>([frbu]*(\\".*?(?<!\\\\)\\")|(\\\'.*?(?<!\\\\)\\\')))|(?P<comment>(#.*))|(?P<keyword>\\b(?<![\\.\\-])()((?:False)|(?:None)|(?:Tru), _highlight_cache={})