pytermgui.highlighters

This module provides the Highlighter class, and some pre-configured instances.

  1"""This module provides the `Highlighter` class, and some pre-configured instances."""
  2
  3from __future__ import annotations
  4
  5import re
  6import keyword
  7import builtins
  8from dataclasses import dataclass, field
  9from typing import Pattern, Match, Protocol, Callable, Generator, TYPE_CHECKING
 10
 11from .regex import RE_MARKUP
 12
 13if TYPE_CHECKING:
 14    from .fancy_repr import FancyYield
 15
 16__all__ = [
 17    "Highlighter",
 18    "RegexHighlighter",
 19    "highlight_python",
 20]
 21
 22
 23class Highlighter(Protocol):  # pylint: disable=too-few-public-methods
 24    """The protocol for highlighters."""
 25
 26    def __call__(self, text: str, cache: bool = True) -> str:
 27        """Highlights the given text.
 28
 29        Args:
 30            text: The text to highlight.
 31            cache: If set (default), results will be stored, keyed by their respective
 32                inputs, and retrieved the next time the same key is given.
 33        """
 34
 35
 36@dataclass
 37class RegexHighlighter:
 38    """A class to highlight strings using regular expressions.
 39
 40    This class must be provided with a list of styles. These styles are really just a
 41    tuple of the markup alias name, and their associated RE patterns. If *all* aliases
 42    in the instance use the same prefix, it can be given under the `prefix` key and
 43    ommitted from the style names.
 44
 45    On construction, the instance will combine all of its patterns into a monster regex
 46    including named capturing groups. The general format is something like:
 47
 48        (?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...
 49
 50    Calling this instance will then replace all matches, going in the order of
 51    definition, with style-injected versions. These follow the format:
 52
 53        [{prefix?}{name}]{content}[/{prefix}{name}]
 54
 55    Oddities to keep in mind:
 56    - Regex replace goes in the order of the defined groups, and is non-overlapping. Two
 57        groups cannot match the same text.
 58    - Because of how capturing groups work, everything within the patterns will be
 59        matched. To look for context around a match, look-around assertions can be used.
 60    """
 61
 62    styles: list[tuple[str, str]]
 63    """A list of tuples of (style_alias, pattern_str)."""
 64
 65    prefix: str = ""
 66    """Some string to insert before each style alias."""
 67
 68    pre_formatter: Callable[[str], str] | None = None
 69    """A callable that formats the input string, before any highlighting is done to it."""
 70
 71    match_formatter: Callable[[Match, str], str] | None = None
 72    """A callable of (match, content) that gets called on every match.
 73
 74    Its return value will be used as the content that the already set highlighting will apply
 75    to. Useful to trim text, or apply other transformations before inserting it back.
 76    """
 77
 78    re_flags: int = 0
 79    """All regex flags to apply when compiling the generated pattern, OR-d (|) together."""
 80
 81    _pattern: Pattern = field(init=False)
 82    _highlight_cache: dict[str, str] = field(init=False, default_factory=dict)
 83
 84    def __post_init__(self) -> None:
 85        """Combines all styles into one pattern."""
 86
 87        pattern = ""
 88        names: list[str] = []
 89        for name, ptrn in self.styles:
 90            pattern += f"(?P<{name}>{ptrn})|"
 91            names.append(name)
 92
 93        pattern = pattern[:-1]
 94
 95        self._pattern = re.compile(pattern, flags=self.re_flags)
 96
 97    def __call__(self, text: str, cache: bool = True) -> str:
 98        """Highlights the given text, using the combined regex pattern."""
 99
100        if self.pre_formatter is not None:
101            text = self.pre_formatter(text)
102
103        if cache and text in self._highlight_cache:
104            return self._highlight_cache[text]
105
106        cache_key = text
107
108        def _insert_style(matchobj: Match) -> str:
109            """Returns the match inserted into a markup style."""
110
111            groups = matchobj.groupdict()
112
113            name = matchobj.lastgroup
114            content = groups.get(str(name), None)
115
116            # Literalize "[" characters to avoid TIM parsing them
117            if name in ["str", "multiline_str"]:
118                if len(RE_MARKUP.findall(content)) > 0:
119                    content = content.replace("[", r"\[")
120
121                content = content.replace("\\n", "\n")
122
123            if self.match_formatter is not None:
124                content = self.match_formatter(matchobj, content)
125
126                if content == "":
127                    return ""
128
129            tag = f"{self.prefix}{name}"
130            style = f"[{tag}]{{}}[/{tag}]"
131
132            return style.format(content)
133
134        text = self._pattern.sub(_insert_style, text)
135        self._highlight_cache[cache_key] = text
136
137        return text
138
139    def __fancy_repr__(self) -> Generator[FancyYield, None, None]:
140        """Yields some fancy looking repl text."""
141
142        preview = self("highlight_python()") + "\x1b[0m"
143        pattern = self._pattern.pattern
144
145        if len(pattern) > 40:
146            pattern = pattern[:38] + "..."
147
148        yield f"<{type(self).__name__} pattern: {pattern!r}, preview: "
149        yield {"text": str(preview), "highlight": False}
150
151        yield ">"
152
153
154_BUILTIN_NAMES = "|".join(f"(?:{item})" for item in dir(builtins))
155_KEYWORD_NAMES = "|".join(
156    f"(?:{keyw})" for keyw in keyword.kwlist + ["builtin", "function", "module"]
157)
158
159highlight_python = RegexHighlighter(
160    prefix="code.",
161    styles=[
162        ("multiline_str", r"([frbu]*)\"{3}([\s\S]*?)(?<!\\)\"{3}"),
163        (
164            "str",
165            r"([frbu]*(\".*?(?<!\\)\")|(\'.*?(?<!\\)\'))",
166        ),
167        ("comment", "(#.*)"),
168        ("keyword", rf"\b(?<![\.\-])()({_KEYWORD_NAMES}+)\b"),
169        ("builtin", rf"\b(?<!\.)({_BUILTIN_NAMES})\b"),
170        ("identifier", r"([^ \.=]+)(?=\()"),
171        ("global", r"(?<=\b)([A-Z]\w+)"),
172        ("number", r"(?<=\b)((?:0x[\da-zA-Z]+)|(?:\d+))"),
173    ],
174)
class Highlighter(typing.Protocol):
24class Highlighter(Protocol):  # pylint: disable=too-few-public-methods
25    """The protocol for highlighters."""
26
27    def __call__(self, text: str, cache: bool = True) -> str:
28        """Highlights the given text.
29
30        Args:
31            text: The text to highlight.
32            cache: If set (default), results will be stored, keyed by their respective
33                inputs, and retrieved the next time the same key is given.
34        """

The protocol for highlighters.

Highlighter(*args, **kwargs)
1430def _no_init_or_replace_init(self, *args, **kwargs):
1431    cls = type(self)
1432
1433    if cls._is_protocol:
1434        raise TypeError('Protocols cannot be instantiated')
1435
1436    # Already using a custom `__init__`. No need to calculate correct
1437    # `__init__` to call. This can lead to RecursionError. See bpo-45121.
1438    if cls.__init__ is not _no_init_or_replace_init:
1439        return
1440
1441    # Initially, `__init__` of a protocol subclass is set to `_no_init_or_replace_init`.
1442    # The first instantiation of the subclass will call `_no_init_or_replace_init` which
1443    # searches for a proper new `__init__` in the MRO. The new `__init__`
1444    # replaces the subclass' old `__init__` (ie `_no_init_or_replace_init`). Subsequent
1445    # instantiation of the protocol subclass will thus use the new
1446    # `__init__` and no longer call `_no_init_or_replace_init`.
1447    for base in cls.__mro__:
1448        init = base.__dict__.get('__init__', _no_init_or_replace_init)
1449        if init is not _no_init_or_replace_init:
1450            cls.__init__ = init
1451            break
1452    else:
1453        # should not happen
1454        cls.__init__ = object.__init__
1455
1456    cls.__init__(self, *args, **kwargs)
@dataclass
class RegexHighlighter:
 37@dataclass
 38class RegexHighlighter:
 39    """A class to highlight strings using regular expressions.
 40
 41    This class must be provided with a list of styles. These styles are really just a
 42    tuple of the markup alias name, and their associated RE patterns. If *all* aliases
 43    in the instance use the same prefix, it can be given under the `prefix` key and
 44    ommitted from the style names.
 45
 46    On construction, the instance will combine all of its patterns into a monster regex
 47    including named capturing groups. The general format is something like:
 48
 49        (?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...
 50
 51    Calling this instance will then replace all matches, going in the order of
 52    definition, with style-injected versions. These follow the format:
 53
 54        [{prefix?}{name}]{content}[/{prefix}{name}]
 55
 56    Oddities to keep in mind:
 57    - Regex replace goes in the order of the defined groups, and is non-overlapping. Two
 58        groups cannot match the same text.
 59    - Because of how capturing groups work, everything within the patterns will be
 60        matched. To look for context around a match, look-around assertions can be used.
 61    """
 62
 63    styles: list[tuple[str, str]]
 64    """A list of tuples of (style_alias, pattern_str)."""
 65
 66    prefix: str = ""
 67    """Some string to insert before each style alias."""
 68
 69    pre_formatter: Callable[[str], str] | None = None
 70    """A callable that formats the input string, before any highlighting is done to it."""
 71
 72    match_formatter: Callable[[Match, str], str] | None = None
 73    """A callable of (match, content) that gets called on every match.
 74
 75    Its return value will be used as the content that the already set highlighting will apply
 76    to. Useful to trim text, or apply other transformations before inserting it back.
 77    """
 78
 79    re_flags: int = 0
 80    """All regex flags to apply when compiling the generated pattern, OR-d (|) together."""
 81
 82    _pattern: Pattern = field(init=False)
 83    _highlight_cache: dict[str, str] = field(init=False, default_factory=dict)
 84
 85    def __post_init__(self) -> None:
 86        """Combines all styles into one pattern."""
 87
 88        pattern = ""
 89        names: list[str] = []
 90        for name, ptrn in self.styles:
 91            pattern += f"(?P<{name}>{ptrn})|"
 92            names.append(name)
 93
 94        pattern = pattern[:-1]
 95
 96        self._pattern = re.compile(pattern, flags=self.re_flags)
 97
 98    def __call__(self, text: str, cache: bool = True) -> str:
 99        """Highlights the given text, using the combined regex pattern."""
100
101        if self.pre_formatter is not None:
102            text = self.pre_formatter(text)
103
104        if cache and text in self._highlight_cache:
105            return self._highlight_cache[text]
106
107        cache_key = text
108
109        def _insert_style(matchobj: Match) -> str:
110            """Returns the match inserted into a markup style."""
111
112            groups = matchobj.groupdict()
113
114            name = matchobj.lastgroup
115            content = groups.get(str(name), None)
116
117            # Literalize "[" characters to avoid TIM parsing them
118            if name in ["str", "multiline_str"]:
119                if len(RE_MARKUP.findall(content)) > 0:
120                    content = content.replace("[", r"\[")
121
122                content = content.replace("\\n", "\n")
123
124            if self.match_formatter is not None:
125                content = self.match_formatter(matchobj, content)
126
127                if content == "":
128                    return ""
129
130            tag = f"{self.prefix}{name}"
131            style = f"[{tag}]{{}}[/{tag}]"
132
133            return style.format(content)
134
135        text = self._pattern.sub(_insert_style, text)
136        self._highlight_cache[cache_key] = text
137
138        return text
139
140    def __fancy_repr__(self) -> Generator[FancyYield, None, None]:
141        """Yields some fancy looking repl text."""
142
143        preview = self("highlight_python()") + "\x1b[0m"
144        pattern = self._pattern.pattern
145
146        if len(pattern) > 40:
147            pattern = pattern[:38] + "..."
148
149        yield f"<{type(self).__name__} pattern: {pattern!r}, preview: "
150        yield {"text": str(preview), "highlight": False}
151
152        yield ">"

A class to highlight strings using regular expressions.

This class must be provided with a list of styles. These styles are really just a tuple of the markup alias name, and their associated RE patterns. If all aliases in the instance use the same prefix, it can be given under the prefix key and ommitted from the style names.

On construction, the instance will combine all of its patterns into a monster regex including named capturing groups. The general format is something like:

(?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...

Calling this instance will then replace all matches, going in the order of definition, with style-injected versions. These follow the format:

[{prefix?}{name}]{content}[/{prefix}{name}]

Oddities to keep in mind:

  • Regex replace goes in the order of the defined groups, and is non-overlapping. Two groups cannot match the same text.
  • Because of how capturing groups work, everything within the patterns will be matched. To look for context around a match, look-around assertions can be used.
RegexHighlighter( styles: list[tuple[str, str]], prefix: str = '', pre_formatter: Optional[Callable[[str], str]] = None, match_formatter: Optional[Callable[[Match, str], str]] = None, re_flags: int = 0)
styles: list[tuple[str, str]]

A list of tuples of (style_alias, pattern_str).

prefix: str = ''

Some string to insert before each style alias.

pre_formatter: Optional[Callable[[str], str]] = None

A callable that formats the input string, before any highlighting is done to it.

match_formatter: Optional[Callable[[Match, str], str]] = None

A callable of (match, content) that gets called on every match.

Its return value will be used as the content that the already set highlighting will apply to. Useful to trim text, or apply other transformations before inserting it back.

re_flags: int = 0

All regex flags to apply when compiling the generated pattern, OR-d (|) together.

highlight_python = RegexHighlighter(styles=[('multiline_str', '([frbu]*)\\"{3}([\\s\\S]*?)(?<!\\\\)\\"{3}'), ('str', '([frbu]*(\\".*?(?<!\\\\)\\")|(\\\'.*?(?<!\\\\)\\\'))'), ('comment', '(#.*)'), ('keyword', '\\b(?<![\\.\\-])()((?:False)|(?:None)|(?:True)|(?:and)|(?:as)|(?:assert)|(?:async)|(?:await)|(?:break)|(?:class)|(?:continue)|(?:def)|(?:del)|(?:elif)|(?:else)|(?:except)|(?:finally)|(?:for)|(?:from)|(?:global)|(?:if)|(?:import)|(?:in)|(?:is)|(?:lambda)|(?:nonlocal)|(?:not)|(?:or)|(?:pass)|(?:raise)|(?:return)|(?:try)|(?:while)|(?:with)|(?:yield)|(?:builtin)|(?:function)|(?:module)+)\\b'), ('builtin', '\\b(?<!\\.)((?:ArithmeticError)|(?:AssertionError)|(?:AttributeError)|(?:BaseException)|(?:BlockingIOError)|(?:BrokenPipeError)|(?:BufferError)|(?:BytesWarning)|(?:ChildProcessError)|(?:ConnectionAbortedError)|(?:ConnectionError)|(?:ConnectionRefusedError)|(?:ConnectionResetError)|(?:DeprecationWarning)|(?:EOFError)|(?:Ellipsis)|(?:EncodingWarning)|(?:EnvironmentError)|(?:Exception)|(?:False)|(?:FileExistsError)|(?:FileNotFoundError)|(?:FloatingPointError)|(?:FutureWarning)|(?:GeneratorExit)|(?:IOError)|(?:ImportError)|(?:ImportWarning)|(?:IndentationError)|(?:IndexError)|(?:InterruptedError)|(?:IsADirectoryError)|(?:KeyError)|(?:KeyboardInterrupt)|(?:LookupError)|(?:MemoryError)|(?:ModuleNotFoundError)|(?:NameError)|(?:None)|(?:NotADirectoryError)|(?:NotImplemented)|(?:NotImplementedError)|(?:OSError)|(?:OverflowError)|(?:PendingDeprecationWarning)|(?:PermissionError)|(?:ProcessLookupError)|(?:RecursionError)|(?:ReferenceError)|(?:ResourceWarning)|(?:RuntimeError)|(?:RuntimeWarning)|(?:StopAsyncIteration)|(?:StopIteration)|(?:SyntaxError)|(?:SyntaxWarning)|(?:SystemError)|(?:SystemExit)|(?:TabError)|(?:TimeoutError)|(?:True)|(?:TypeError)|(?:UnboundLocalError)|(?:UnicodeDecodeError)|(?:UnicodeEncodeError)|(?:UnicodeError)|(?:UnicodeTranslateError)|(?:UnicodeWarning)|(?:UserWarning)|(?:ValueError)|(?:Warning)|(?:ZeroDivisionError)|(?:__build_class__)|(?:__debug__)|(?:__doc__)|(?:__import__)|(?:__loader__)|(?:__name__)|(?:__package__)|(?:__spec__)|(?:abs)|(?:aiter)|(?:all)|(?:anext)|(?:any)|(?:ascii)|(?:bin)|(?:bool)|(?:breakpoint)|(?:bytearray)|(?:bytes)|(?:callable)|(?:chr)|(?:classmethod)|(?:compile)|(?:complex)|(?:copyright)|(?:credits)|(?:delattr)|(?:dict)|(?:dir)|(?:divmod)|(?:enumerate)|(?:eval)|(?:exec)|(?:exit)|(?:filter)|(?:float)|(?:format)|(?:frozenset)|(?:getattr)|(?:globals)|(?:hasattr)|(?:hash)|(?:help)|(?:hex)|(?:id)|(?:input)|(?:int)|(?:isinstance)|(?:issubclass)|(?:iter)|(?:len)|(?:license)|(?:list)|(?:locals)|(?:map)|(?:max)|(?:memoryview)|(?:min)|(?:next)|(?:object)|(?:oct)|(?:open)|(?:ord)|(?:pow)|(?:print)|(?:property)|(?:quit)|(?:range)|(?:repr)|(?:reversed)|(?:round)|(?:set)|(?:setattr)|(?:slice)|(?:sorted)|(?:staticmethod)|(?:str)|(?:sum)|(?:super)|(?:tuple)|(?:type)|(?:vars)|(?:zip))\\b'), ('identifier', '([^ \\.=]+)(?=\\()'), ('global', '(?<=\\b)([A-Z]\\w+)'), ('number', '(?<=\\b)((?:0x[\\da-zA-Z]+)|(?:\\d+))')], prefix='code.', pre_formatter=None, match_formatter=None, re_flags=0, _pattern=re.compile('(?P<multiline_str>([frbu]*)\\"{3}([\\s\\S]*?)(?<!\\\\)\\"{3})|(?P<str>([frbu]*(\\".*?(?<!\\\\)\\")|(\\\'.*?(?<!\\\\)\\\')))|(?P<comment>(#.*))|(?P<keyword>\\b(?<![\\.\\-])()((?:False)|(?:None)|(?:Tru), _highlight_cache={})