pytermgui.highlighters
This module provides the Highlighter
class, and some pre-configured instances.
1"""This module provides the `Highlighter` class, and some pre-configured instances.""" 2 3from __future__ import annotations 4 5import builtins 6import keyword 7import re 8from dataclasses import dataclass, field 9from typing import TYPE_CHECKING, Callable, Generator, Match, Pattern, Protocol 10 11from .regex import RE_MARKUP 12 13if TYPE_CHECKING: 14 from .fancy_repr import FancyYield 15 16__all__ = [ 17 "Highlighter", 18 "RegexHighlighter", 19 "highlight_python", 20] 21 22 23class Highlighter(Protocol): # pylint: disable=too-few-public-methods 24 """The protocol for highlighters.""" 25 26 def __call__(self, text: str, cache: bool = True) -> str: 27 """Highlights the given text. 28 29 Args: 30 text: The text to highlight. 31 cache: If set (default), results will be stored, keyed by their respective 32 inputs, and retrieved the next time the same key is given. 33 """ 34 35 36@dataclass 37class RegexHighlighter: 38 """A class to highlight strings using regular expressions. 39 40 This class must be provided with a list of styles. These styles are really just a 41 tuple of the markup alias name, and their associated RE patterns. If *all* aliases 42 in the instance use the same prefix, it can be given under the `prefix` key and 43 ommitted from the style names. 44 45 On construction, the instance will combine all of its patterns into a monster regex 46 including named capturing groups. The general format is something like: 47 48 (?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|... 49 50 Calling this instance will then replace all matches, going in the order of 51 definition, with style-injected versions. These follow the format: 52 53 [{prefix?}{name}]{content}[/{prefix}{name}] 54 55 Oddities to keep in mind: 56 - Regex replace goes in the order of the defined groups, and is non-overlapping. Two 57 groups cannot match the same text. 58 - Because of how capturing groups work, everything within the patterns will be 59 matched. To look for context around a match, look-around assertions can be used. 60 """ 61 62 styles: list[tuple[str, str]] 63 """A list of tuples of (style_alias, pattern_str).""" 64 65 prefix: str = "" 66 """Some string to insert before each style alias.""" 67 68 pre_formatter: Callable[[str], str] | None = None 69 """A callable that formats the input string, before any highlighting is done to it.""" 70 71 match_formatter: Callable[[Match, str], str] | None = None 72 """A callable of (match, content) that gets called on every match. 73 74 Its return value will be used as the content that the already set highlighting will apply 75 to. Useful to trim text, or apply other transformations before inserting it back. 76 """ 77 78 re_flags: int = 0 79 """All regex flags to apply when compiling the generated pattern, OR-d (|) together.""" 80 81 _pattern: Pattern = field(init=False) 82 _highlight_cache: dict[str, str] = field(init=False, default_factory=dict) 83 84 def __post_init__(self) -> None: 85 """Combines all styles into one pattern.""" 86 87 pattern = "" 88 names: list[str] = [] 89 for name, ptrn in self.styles: 90 pattern += f"(?P<{name}>{ptrn})|" 91 names.append(name) 92 93 pattern = pattern[:-1] 94 95 self._pattern = re.compile(pattern, flags=self.re_flags) 96 97 def __call__(self, text: str, cache: bool = True) -> str: 98 """Highlights the given text, using the combined regex pattern.""" 99 100 if self.pre_formatter is not None: 101 text = self.pre_formatter(text) 102 103 if cache and text in self._highlight_cache: 104 return self._highlight_cache[text] 105 106 cache_key = text 107 108 def _insert_style(matchobj: Match) -> str: 109 """Returns the match inserted into a markup style.""" 110 111 groups = matchobj.groupdict() 112 113 name = matchobj.lastgroup 114 content = groups.get(str(name), None) 115 116 # Literalize "[" characters to avoid TIM parsing them 117 if name in ["str", "multiline_str"]: 118 if len(RE_MARKUP.findall(content)) > 0: 119 content = content.replace("[", r"\[") 120 121 content = content.replace("\\n", "\n") 122 123 if self.match_formatter is not None: 124 content = self.match_formatter(matchobj, content) 125 126 if content == "": 127 return "" 128 129 tag = f"{self.prefix}{name}" 130 style = f"[{tag}]{{}}[/{tag}]" 131 132 return style.format(content) 133 134 text = self._pattern.sub(_insert_style, text) 135 self._highlight_cache[cache_key] = text 136 137 return text 138 139 def __fancy_repr__(self) -> Generator[FancyYield, None, None]: 140 """Yields some fancy looking repl text.""" 141 142 preview = self("highlight_python()") + "\x1b[0m" 143 pattern = self._pattern.pattern 144 145 if len(pattern) > 40: 146 pattern = pattern[:38] + "..." 147 148 yield f"<{type(self).__name__} pattern: {pattern!r}, preview: " 149 yield {"text": str(preview), "highlight": False} 150 151 yield ">" 152 153 154_BUILTIN_NAMES = "|".join(f"(?:{item})" for item in dir(builtins)) 155_KEYWORD_NAMES = "|".join( 156 f"(?:{keyw})" for keyw in list(keyword.kwlist) + ["builtin", "function", "module"] 157) 158 159highlight_python = RegexHighlighter( 160 prefix="code.", 161 styles=[ 162 ("multiline_str", r"([frbu]*)\"{3}([\s\S]*?)(?<!\\)\"{3}"), 163 ( 164 "str", 165 r"([frbu]*(\".*?(?<!\\)\")|(\'.*?(?<!\\)\'))", 166 ), 167 ("comment", "(#.*)"), 168 ("keyword", rf"\b(?<![\.\-])()({_KEYWORD_NAMES}+)\b"), 169 ("builtin", rf"\b(?<!\.)({_BUILTIN_NAMES})\b"), 170 ("identifier", r"([^ \.=]+)(?=\()"), 171 ("global", r"(?<=\b)([A-Z]\w+)"), 172 ("number", r"(?<=\b)((?:0x[\da-zA-Z]+)|(?:\d+))"), 173 ], 174)
24class Highlighter(Protocol): # pylint: disable=too-few-public-methods 25 """The protocol for highlighters.""" 26 27 def __call__(self, text: str, cache: bool = True) -> str: 28 """Highlights the given text. 29 30 Args: 31 text: The text to highlight. 32 cache: If set (default), results will be stored, keyed by their respective 33 inputs, and retrieved the next time the same key is given. 34 """
The protocol for highlighters.
1430def _no_init_or_replace_init(self, *args, **kwargs): 1431 cls = type(self) 1432 1433 if cls._is_protocol: 1434 raise TypeError('Protocols cannot be instantiated') 1435 1436 # Already using a custom `__init__`. No need to calculate correct 1437 # `__init__` to call. This can lead to RecursionError. See bpo-45121. 1438 if cls.__init__ is not _no_init_or_replace_init: 1439 return 1440 1441 # Initially, `__init__` of a protocol subclass is set to `_no_init_or_replace_init`. 1442 # The first instantiation of the subclass will call `_no_init_or_replace_init` which 1443 # searches for a proper new `__init__` in the MRO. The new `__init__` 1444 # replaces the subclass' old `__init__` (ie `_no_init_or_replace_init`). Subsequent 1445 # instantiation of the protocol subclass will thus use the new 1446 # `__init__` and no longer call `_no_init_or_replace_init`. 1447 for base in cls.__mro__: 1448 init = base.__dict__.get('__init__', _no_init_or_replace_init) 1449 if init is not _no_init_or_replace_init: 1450 cls.__init__ = init 1451 break 1452 else: 1453 # should not happen 1454 cls.__init__ = object.__init__ 1455 1456 cls.__init__(self, *args, **kwargs)
37@dataclass 38class RegexHighlighter: 39 """A class to highlight strings using regular expressions. 40 41 This class must be provided with a list of styles. These styles are really just a 42 tuple of the markup alias name, and their associated RE patterns. If *all* aliases 43 in the instance use the same prefix, it can be given under the `prefix` key and 44 ommitted from the style names. 45 46 On construction, the instance will combine all of its patterns into a monster regex 47 including named capturing groups. The general format is something like: 48 49 (?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|... 50 51 Calling this instance will then replace all matches, going in the order of 52 definition, with style-injected versions. These follow the format: 53 54 [{prefix?}{name}]{content}[/{prefix}{name}] 55 56 Oddities to keep in mind: 57 - Regex replace goes in the order of the defined groups, and is non-overlapping. Two 58 groups cannot match the same text. 59 - Because of how capturing groups work, everything within the patterns will be 60 matched. To look for context around a match, look-around assertions can be used. 61 """ 62 63 styles: list[tuple[str, str]] 64 """A list of tuples of (style_alias, pattern_str).""" 65 66 prefix: str = "" 67 """Some string to insert before each style alias.""" 68 69 pre_formatter: Callable[[str], str] | None = None 70 """A callable that formats the input string, before any highlighting is done to it.""" 71 72 match_formatter: Callable[[Match, str], str] | None = None 73 """A callable of (match, content) that gets called on every match. 74 75 Its return value will be used as the content that the already set highlighting will apply 76 to. Useful to trim text, or apply other transformations before inserting it back. 77 """ 78 79 re_flags: int = 0 80 """All regex flags to apply when compiling the generated pattern, OR-d (|) together.""" 81 82 _pattern: Pattern = field(init=False) 83 _highlight_cache: dict[str, str] = field(init=False, default_factory=dict) 84 85 def __post_init__(self) -> None: 86 """Combines all styles into one pattern.""" 87 88 pattern = "" 89 names: list[str] = [] 90 for name, ptrn in self.styles: 91 pattern += f"(?P<{name}>{ptrn})|" 92 names.append(name) 93 94 pattern = pattern[:-1] 95 96 self._pattern = re.compile(pattern, flags=self.re_flags) 97 98 def __call__(self, text: str, cache: bool = True) -> str: 99 """Highlights the given text, using the combined regex pattern.""" 100 101 if self.pre_formatter is not None: 102 text = self.pre_formatter(text) 103 104 if cache and text in self._highlight_cache: 105 return self._highlight_cache[text] 106 107 cache_key = text 108 109 def _insert_style(matchobj: Match) -> str: 110 """Returns the match inserted into a markup style.""" 111 112 groups = matchobj.groupdict() 113 114 name = matchobj.lastgroup 115 content = groups.get(str(name), None) 116 117 # Literalize "[" characters to avoid TIM parsing them 118 if name in ["str", "multiline_str"]: 119 if len(RE_MARKUP.findall(content)) > 0: 120 content = content.replace("[", r"\[") 121 122 content = content.replace("\\n", "\n") 123 124 if self.match_formatter is not None: 125 content = self.match_formatter(matchobj, content) 126 127 if content == "": 128 return "" 129 130 tag = f"{self.prefix}{name}" 131 style = f"[{tag}]{{}}[/{tag}]" 132 133 return style.format(content) 134 135 text = self._pattern.sub(_insert_style, text) 136 self._highlight_cache[cache_key] = text 137 138 return text 139 140 def __fancy_repr__(self) -> Generator[FancyYield, None, None]: 141 """Yields some fancy looking repl text.""" 142 143 preview = self("highlight_python()") + "\x1b[0m" 144 pattern = self._pattern.pattern 145 146 if len(pattern) > 40: 147 pattern = pattern[:38] + "..." 148 149 yield f"<{type(self).__name__} pattern: {pattern!r}, preview: " 150 yield {"text": str(preview), "highlight": False} 151 152 yield ">"
A class to highlight strings using regular expressions.
This class must be provided with a list of styles. These styles are really just a
tuple of the markup alias name, and their associated RE patterns. If all aliases
in the instance use the same prefix, it can be given under the prefix
key and
ommitted from the style names.
On construction, the instance will combine all of its patterns into a monster regex including named capturing groups. The general format is something like:
(?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...
Calling this instance will then replace all matches, going in the order of definition, with style-injected versions. These follow the format:
[{prefix?}{name}]{content}[/{prefix}{name}]
Oddities to keep in mind:
- Regex replace goes in the order of the defined groups, and is non-overlapping. Two groups cannot match the same text.
- Because of how capturing groups work, everything within the patterns will be matched. To look for context around a match, look-around assertions can be used.
A callable that formats the input string, before any highlighting is done to it.