pytermgui.regex

This modules contains all of the regex-related names and utilites.

  1"""This modules contains all of the regex-related names and utilites."""
  2
  3import re
  4from functools import lru_cache
  5
  6RE_ANSI = re.compile(r"(?:\x1b\[(.*?)[mH])|(?:\x1b\](.*?)\x1b\\)|(?:\x1b_G(.*?)\x1b\\)")
  7RE_LINK = re.compile(r"(?:\x1b]8;;(.*?)\x1b\\(.*?)\x1b]8;;\x1b\\)")
  8RE_MACRO = re.compile(r"(![a-z0-9_]+)(?:\(([\w\/\.?\-=:]+)\))?")
  9RE_MARKUP = re.compile(r"((\\*)\[([^\[\]]+)\])")
 10RE_POSITION = re.compile(r"\x1b\[(\d+);(\d+)H")
 11RE_PIXEL_SIZE = re.compile(r"\x1b\[4;([\d]+);([\d]+)t")
 12
 13RE_256 = re.compile(r"^([\d]{1,3})$")
 14RE_HEX = re.compile(r"(?:#)?([0-9a-fA-F]{6})")
 15RE_RGB = re.compile(r"(\d{1,3};\d{1,3};\d{1,3})")
 16
 17__all__ = [
 18    "strip_ansi",
 19    "strip_markup",
 20    "real_length",
 21]
 22
 23
 24@lru_cache()
 25def strip_ansi(text: str) -> str:
 26    """Removes ANSI sequences from text.
 27
 28    Args:
 29        text: A string or bytes object containing 0 or more ANSI sequences.
 30
 31    Returns:
 32        The text without any ANSI sequences.
 33    """
 34
 35    if hasattr(text, "plain"):
 36        return text.plain  # type: ignore
 37
 38    return RE_ANSI.sub("", text)
 39
 40
 41@lru_cache()
 42def strip_markup(text: str) -> str:
 43    """Removes markup tags from text.
 44
 45    Args:
 46        text: A string or bytes object containing 0 or more markup tags.
 47
 48    Returns:
 49        The text without any markup tags.
 50    """
 51
 52    return RE_MARKUP.sub("", text)
 53
 54
 55@lru_cache(maxsize=1024)
 56def real_length(text: str) -> int:
 57    """Gets the display-length of text.
 58
 59    This length means no ANSI sequences are counted. This method is a convenience wrapper
 60    for `len(strip_ansi(text))`.
 61
 62    Args:
 63        text: The text to calculate the length of.
 64
 65    Returns:
 66        The display-length of text.
 67    """
 68
 69    return len(strip_ansi(text))
 70
 71
 72@lru_cache(maxsize=1024)
 73def has_open_sequence(text: str) -> bool:
 74    """Figures out if the given text has any unclosed ANSI sequences.
 75
 76    It supports standard SGR (`\\x1b[1mHello`), OSC (`\\x1b[30;2ST\\x1b\\\\`) and Kitty APC codes
 77    (`\x1b_Garguments;hex_data\\x1b\\\\`). It also recognizes incorrect syntax; it only considers
 78    a tag closed when it is using the right closing sequence, e.g. `m` or `H` for SGR, `\\x1b\\\\`
 79    for OSC and APC types.
 80
 81    Args:
 82        text: The text to test.
 83
 84    Returns:
 85        True if there is at least one tag that hasn't been closed, False otherwise.
 86    """
 87
 88    is_osc = False
 89    is_sgr = False
 90    is_apc = False
 91
 92    open_count = 0
 93    sequence = ""
 94
 95    for char in text:
 96        if char == "\x1b":
 97            open_count += 1
 98            sequence += char
 99            continue
100
101        if len(sequence) == 0:
102            continue
103
104        # Ignore OSC and APC closers as new openers
105        if char == "\\" and sequence[-1] == "\x1b":
106            open_count -= 1
107
108        is_osc = is_osc or sequence[:2] == "\x1b]"
109        is_sgr = is_sgr or sequence[:2] == "\x1b["
110        is_apc = is_apc or sequence[:3] == "\x1b_G"
111
112        sequence += char
113        if (is_osc or is_apc) and sequence[-2:] == "\x1b\\":
114            sequence = ""
115            open_count -= 1
116
117        elif is_sgr and char in {"m", "H"}:
118            sequence = ""
119            open_count -= 1
120
121    return len(sequence) != 0 or open_count != 0
@lru_cache()
def strip_ansi(text: str) -> str:
25@lru_cache()
26def strip_ansi(text: str) -> str:
27    """Removes ANSI sequences from text.
28
29    Args:
30        text: A string or bytes object containing 0 or more ANSI sequences.
31
32    Returns:
33        The text without any ANSI sequences.
34    """
35
36    if hasattr(text, "plain"):
37        return text.plain  # type: ignore
38
39    return RE_ANSI.sub("", text)

Removes ANSI sequences from text.

Args
  • text: A string or bytes object containing 0 or more ANSI sequences.
Returns

The text without any ANSI sequences.

@lru_cache()
def strip_markup(text: str) -> str:
42@lru_cache()
43def strip_markup(text: str) -> str:
44    """Removes markup tags from text.
45
46    Args:
47        text: A string or bytes object containing 0 or more markup tags.
48
49    Returns:
50        The text without any markup tags.
51    """
52
53    return RE_MARKUP.sub("", text)

Removes markup tags from text.

Args
  • text: A string or bytes object containing 0 or more markup tags.
Returns

The text without any markup tags.

@lru_cache(maxsize=1024)
def real_length(text: str) -> int:
56@lru_cache(maxsize=1024)
57def real_length(text: str) -> int:
58    """Gets the display-length of text.
59
60    This length means no ANSI sequences are counted. This method is a convenience wrapper
61    for `len(strip_ansi(text))`.
62
63    Args:
64        text: The text to calculate the length of.
65
66    Returns:
67        The display-length of text.
68    """
69
70    return len(strip_ansi(text))

Gets the display-length of text.

This length means no ANSI sequences are counted. This method is a convenience wrapper for len(strip_ansi(text)).

Args
  • text: The text to calculate the length of.
Returns

The display-length of text.