pytermgui.highlighters
This module provides the Highlighter
class, and some pre-configured instances.
View Source
0"""This module provides the `Highlighter` class, and some pre-configured instances.""" 1 2from __future__ import annotations 3import re 4import keyword 5import builtins 6from dataclasses import dataclass, field 7from typing import Pattern, Match, Protocol, Callable 8 9from .regex import RE_MARKUP 10 11__all__ = [ 12 "Highlighter", 13 "RegexHighlighter", 14 "highlight_python", 15] 16 17 18class Highlighter(Protocol): # pylint: disable=too-few-public-methods 19 """The protocol for highlighters.""" 20 21 def __call__(self, text: str, cache: bool = True) -> str: 22 """Highlights the given text. 23 24 Args: 25 text: The text to highlight. 26 cache: If set (default), results will be stored, keyed by their respective 27 inputs, and retrieved the next time the same key is given. 28 """ 29 30 31@dataclass 32class RegexHighlighter(Highlighter): 33 """A class to highlight strings using regular expressions. 34 35 This class must be provided with a list of styles. These styles are really just a 36 tuple of the markup alias name, and their associated RE patterns. If *all* aliases 37 in the instance use the same prefix, it can be given under the `prefix` key and 38 ommitted from the style names. 39 40 On construction, the instance will combine all of its patterns into a monster regex 41 including named capturing groups. The general format is something like: 42 43 (?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|... 44 45 Calling this instance will then replace all matches, going in the order of 46 definition, with style-injected versions. These follow the format: 47 48 [{prefix?}{name}]{content}[/{prefix}{name}] 49 50 Oddities to keep in mind: 51 - Regex replace goes in the order of the defined groups, and is non-overlapping. Two 52 groups cannot match the same text. 53 - Because of how capturing groups work, everything within the patterns will be 54 matched. To look for context around a match, look-around assertions can be used. 55 """ 56 57 styles: list[tuple[str, str]] 58 """A list of tuples of (style_alias, pattern_str).""" 59 60 prefix: str = "" 61 """Some string to insert before each style alias.""" 62 63 pre_formatter: Callable[[str], str] | None = None 64 """A callable that formats the input string, before any highlighting is done to it.""" 65 66 match_formatter: Callable[[Match, str], str] | None = None 67 """A callable of (match, content) that gets called on every match. 68 69 Its return value will be used as the content that the already set highlighting will apply 70 to. Useful to trim text, or apply other transformations before inserting it back. 71 """ 72 73 re_flags: int = 0 74 """All regex flags to apply when compiling the generated pattern, OR-d (|) together.""" 75 76 _pattern: Pattern = field(init=False) 77 _highlight_cache: dict[str, str] = field(init=False, default_factory=dict) 78 79 def __post_init__(self) -> None: 80 """Combines all styles into one pattern.""" 81 82 pattern = "" 83 names: list[str] = [] 84 for name, ptrn in self.styles: 85 pattern += f"(?P<{name}>{ptrn})|" 86 names.append(name) 87 88 pattern = pattern[:-1] 89 90 self._pattern = re.compile(pattern, flags=self.re_flags) 91 92 def __call__(self, text: str, cache: bool = True) -> str: 93 """Highlights the given text, using the combined regex pattern.""" 94 95 if self.pre_formatter is not None: 96 text = self.pre_formatter(text) 97 98 if cache and text in self._highlight_cache: 99 return self._highlight_cache[text] 100 101 cache_key = text 102 103 def _insert_style(matchobj: Match) -> str: 104 """Returns the match inserted into a markup style.""" 105 106 groups = matchobj.groupdict() 107 108 name = matchobj.lastgroup 109 content = groups.get(str(name), None) 110 111 # Literalize "[" characters to avoid TIM parsing them 112 if name == "str": 113 if len(RE_MARKUP.findall(content)) > 0: 114 content = content.replace("[", r"\[") 115 116 if self.match_formatter is not None: 117 content = self.match_formatter(matchobj, content) 118 119 if content == "": 120 return "" 121 122 tag = f"{self.prefix}{name}" 123 style = f"[{tag}]{{}}[/{tag}]" 124 125 return style.format(content) 126 127 text = self._pattern.sub(_insert_style, text) 128 self._highlight_cache[cache_key] = text 129 130 return text 131 132 133_BUILTIN_NAMES = "|".join(f"(?:{item})" for item in dir(builtins)) 134_KEYWORD_NAMES = "|".join(f"(?:{keyw})" for keyw in keyword.kwlist) 135_STR_DELIMS = "|".join(('(?:"|("""))', "(?:'|('''))")) 136 137highlight_python = RegexHighlighter( 138 prefix="code.", 139 styles=[ 140 ("str", rf"[frbu]*?(?P<str_start>(?:{_STR_DELIMS})).+(?P=str_start)"), 141 ("comment", "(#.*)"), 142 ("keyword", rf"(\b)({_KEYWORD_NAMES}+)\b"), 143 ("builtin", rf"\b(?<!\.)({_BUILTIN_NAMES})\b"), 144 ("identifier", r"([^ \.]+)(?=\()"), 145 ("global", r"(?<=\b)([A-Z]\w+)"), 146 ("number", r"((?:0x[\da-zA-Z]+)|(?:\d+))"), 147 ], 148)
View Source
19class Highlighter(Protocol): # pylint: disable=too-few-public-methods 20 """The protocol for highlighters.""" 21 22 def __call__(self, text: str, cache: bool = True) -> str: 23 """Highlights the given text. 24 25 Args: 26 text: The text to highlight. 27 cache: If set (default), results will be stored, keyed by their respective 28 inputs, and retrieved the next time the same key is given. 29 """
The protocol for highlighters.
View Source
1429def _no_init_or_replace_init(self, *args, **kwargs): 1430 cls = type(self) 1431 1432 if cls._is_protocol: 1433 raise TypeError('Protocols cannot be instantiated') 1434 1435 # Already using a custom `__init__`. No need to calculate correct 1436 # `__init__` to call. This can lead to RecursionError. See bpo-45121. 1437 if cls.__init__ is not _no_init_or_replace_init: 1438 return 1439 1440 # Initially, `__init__` of a protocol subclass is set to `_no_init_or_replace_init`. 1441 # The first instantiation of the subclass will call `_no_init_or_replace_init` which 1442 # searches for a proper new `__init__` in the MRO. The new `__init__` 1443 # replaces the subclass' old `__init__` (ie `_no_init_or_replace_init`). Subsequent 1444 # instantiation of the protocol subclass will thus use the new 1445 # `__init__` and no longer call `_no_init_or_replace_init`. 1446 for base in cls.__mro__: 1447 init = base.__dict__.get('__init__', _no_init_or_replace_init) 1448 if init is not _no_init_or_replace_init: 1449 cls.__init__ = init 1450 break 1451 else: 1452 # should not happen 1453 cls.__init__ = object.__init__ 1454 1455 cls.__init__(self, *args, **kwargs)
View Source
32@dataclass 33class RegexHighlighter(Highlighter): 34 """A class to highlight strings using regular expressions. 35 36 This class must be provided with a list of styles. These styles are really just a 37 tuple of the markup alias name, and their associated RE patterns. If *all* aliases 38 in the instance use the same prefix, it can be given under the `prefix` key and 39 ommitted from the style names. 40 41 On construction, the instance will combine all of its patterns into a monster regex 42 including named capturing groups. The general format is something like: 43 44 (?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|... 45 46 Calling this instance will then replace all matches, going in the order of 47 definition, with style-injected versions. These follow the format: 48 49 [{prefix?}{name}]{content}[/{prefix}{name}] 50 51 Oddities to keep in mind: 52 - Regex replace goes in the order of the defined groups, and is non-overlapping. Two 53 groups cannot match the same text. 54 - Because of how capturing groups work, everything within the patterns will be 55 matched. To look for context around a match, look-around assertions can be used. 56 """ 57 58 styles: list[tuple[str, str]] 59 """A list of tuples of (style_alias, pattern_str).""" 60 61 prefix: str = "" 62 """Some string to insert before each style alias.""" 63 64 pre_formatter: Callable[[str], str] | None = None 65 """A callable that formats the input string, before any highlighting is done to it.""" 66 67 match_formatter: Callable[[Match, str], str] | None = None 68 """A callable of (match, content) that gets called on every match. 69 70 Its return value will be used as the content that the already set highlighting will apply 71 to. Useful to trim text, or apply other transformations before inserting it back. 72 """ 73 74 re_flags: int = 0 75 """All regex flags to apply when compiling the generated pattern, OR-d (|) together.""" 76 77 _pattern: Pattern = field(init=False) 78 _highlight_cache: dict[str, str] = field(init=False, default_factory=dict) 79 80 def __post_init__(self) -> None: 81 """Combines all styles into one pattern.""" 82 83 pattern = "" 84 names: list[str] = [] 85 for name, ptrn in self.styles: 86 pattern += f"(?P<{name}>{ptrn})|" 87 names.append(name) 88 89 pattern = pattern[:-1] 90 91 self._pattern = re.compile(pattern, flags=self.re_flags) 92 93 def __call__(self, text: str, cache: bool = True) -> str: 94 """Highlights the given text, using the combined regex pattern.""" 95 96 if self.pre_formatter is not None: 97 text = self.pre_formatter(text) 98 99 if cache and text in self._highlight_cache: 100 return self._highlight_cache[text] 101 102 cache_key = text 103 104 def _insert_style(matchobj: Match) -> str: 105 """Returns the match inserted into a markup style.""" 106 107 groups = matchobj.groupdict() 108 109 name = matchobj.lastgroup 110 content = groups.get(str(name), None) 111 112 # Literalize "[" characters to avoid TIM parsing them 113 if name == "str": 114 if len(RE_MARKUP.findall(content)) > 0: 115 content = content.replace("[", r"\[") 116 117 if self.match_formatter is not None: 118 content = self.match_formatter(matchobj, content) 119 120 if content == "": 121 return "" 122 123 tag = f"{self.prefix}{name}" 124 style = f"[{tag}]{{}}[/{tag}]" 125 126 return style.format(content) 127 128 text = self._pattern.sub(_insert_style, text) 129 self._highlight_cache[cache_key] = text 130 131 return text
A class to highlight strings using regular expressions.
This class must be provided with a list of styles. These styles are really just a
tuple of the markup alias name, and their associated RE patterns. If all aliases
in the instance use the same prefix, it can be given under the prefix
key and
ommitted from the style names.
On construction, the instance will combine all of its patterns into a monster regex including named capturing groups. The general format is something like:
(?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...
Calling this instance will then replace all matches, going in the order of definition, with style-injected versions. These follow the format:
[{prefix?}{name}]{content}[/{prefix}{name}]
Oddities to keep in mind:
- Regex replace goes in the order of the defined groups, and is non-overlapping. Two groups cannot match the same text.
- Because of how capturing groups work, everything within the patterns will be matched. To look for context around a match, look-around assertions can be used.
A list of tuples of (style_alias, pattern_str).
Some string to insert before each style alias.
A callable that formats the input string, before any highlighting is done to it.
A callable of (match, content) that gets called on every match.
Its return value will be used as the content that the already set highlighting will apply to. Useful to trim text, or apply other transformations before inserting it back.
All regex flags to apply when compiling the generated pattern, OR-d (|) together.