pytermgui.regex

This modules contains all of the regex-related names and utilites.

View Source
  0"""This modules contains all of the regex-related names and utilites."""
  1
  2import re
  3from functools import lru_cache
  4
  5RE_ANSI = re.compile(r"(?:\x1b\[(.*?)[mH])|(?:\x1b\](.*?)\x1b\\)|(?:\x1b_G(.*?)\x1b\\)")
  6RE_LINK = re.compile(r"(?:\x1b]8;;(.*?)\x1b\\(.*?)\x1b]8;;\x1b\\)")
  7RE_MACRO = re.compile(r"(![a-z0-9_]+)(?:\(([\w\/\.?\-=:]+)\))?")
  8RE_MARKUP = re.compile(r"((\\*)\[([^\[\]]+)\])")
  9RE_POSITION = re.compile(r"\x1b\[(\d+);(\d+)H")
 10RE_PIXEL_SIZE = re.compile(r"\x1b\[4;([\d]+);([\d]+)t")
 11
 12RE_256 = re.compile(r"^([\d]{1,3})$")
 13RE_HEX = re.compile(r"(?:#)?([0-9a-fA-F]{6})")
 14RE_RGB = re.compile(r"(\d{1,3};\d{1,3};\d{1,3})")
 15
 16__all__ = [
 17    "strip_ansi",
 18    "strip_markup",
 19    "real_length",
 20]
 21
 22
 23@lru_cache
 24def strip_ansi(text: str) -> str:
 25    """Removes ANSI sequences from text.
 26
 27    Args:
 28        text: A string or bytes object containing 0 or more ANSI sequences.
 29
 30    Returns:
 31        The text without any ANSI sequences.
 32    """
 33
 34    if hasattr(text, "plain"):
 35        return text.plain  # type: ignore
 36
 37    return RE_ANSI.sub("", text)
 38
 39
 40@lru_cache
 41def strip_markup(text: str) -> str:
 42    """Removes markup tags from text.
 43
 44    Args:
 45        text: A string or bytes object containing 0 or more markup tags.
 46
 47    Returns:
 48        The text without any markup tags.
 49    """
 50
 51    return RE_MARKUP.sub("", text)
 52
 53
 54@lru_cache(maxsize=1024)
 55def real_length(text: str) -> int:
 56    """Gets the display-length of text.
 57
 58    This length means no ANSI sequences are counted. This method is a convenience wrapper
 59    for `len(strip_ansi(text))`.
 60
 61    Args:
 62        text: The text to calculate the length of.
 63
 64    Returns:
 65        The display-length of text.
 66    """
 67
 68    return len(strip_ansi(text))
 69
 70
 71@lru_cache(maxsize=1024)
 72def has_open_sequence(text: str) -> bool:
 73    """Figures out if the given text has any unclosed ANSI sequences.
 74
 75    It supports standard SGR (`\\x1b[1mHello`), OSC (`\\x1b[30;2ST\\x1b\\\\`) and Kitty APC codes
 76    (`\x1b_Garguments;hex_data\\x1b\\\\`). It also recognizes incorrect syntax; it only considers
 77    a tag closed when it is using the right closing sequence, e.g. `m` or `H` for SGR, `\\x1b\\\\`
 78    for OSC and APC types.
 79
 80    Args:
 81        text: The text to test.
 82
 83    Returns:
 84        True if there is at least one tag that hasn't been closed, False otherwise.
 85    """
 86
 87    is_osc = False
 88    is_sgr = False
 89    is_apc = False
 90
 91    open_count = 0
 92    sequence = ""
 93
 94    for char in text:
 95        if char == "\x1b":
 96            open_count += 1
 97            sequence += char
 98            continue
 99
100        if len(sequence) == 0:
101            continue
102
103        # Ignore OSC and APC closers as new openers
104        if char == "\\" and sequence[-1] == "\x1b":
105            open_count -= 1
106
107        is_osc = is_osc or sequence[:2] == "\x1b]"
108        is_sgr = is_sgr or sequence[:2] == "\x1b["
109        is_apc = is_apc or sequence[:3] == "\x1b_G"
110
111        sequence += char
112        if (is_osc or is_apc) and sequence[-2:] == "\x1b\\":
113            sequence = ""
114            open_count -= 1
115
116        elif is_sgr and char in {"m", "H"}:
117            sequence = ""
118            open_count -= 1
119
120    return len(sequence) != 0 or open_count != 0
#  
@lru_cache
def strip_ansi(text: str) -> str:
View Source
24@lru_cache
25def strip_ansi(text: str) -> str:
26    """Removes ANSI sequences from text.
27
28    Args:
29        text: A string or bytes object containing 0 or more ANSI sequences.
30
31    Returns:
32        The text without any ANSI sequences.
33    """
34
35    if hasattr(text, "plain"):
36        return text.plain  # type: ignore
37
38    return RE_ANSI.sub("", text)

Removes ANSI sequences from text.

Args
  • text: A string or bytes object containing 0 or more ANSI sequences.
Returns

The text without any ANSI sequences.

#  
@lru_cache
def strip_markup(text: str) -> str:
View Source
41@lru_cache
42def strip_markup(text: str) -> str:
43    """Removes markup tags from text.
44
45    Args:
46        text: A string or bytes object containing 0 or more markup tags.
47
48    Returns:
49        The text without any markup tags.
50    """
51
52    return RE_MARKUP.sub("", text)

Removes markup tags from text.

Args
  • text: A string or bytes object containing 0 or more markup tags.
Returns

The text without any markup tags.

#  
@lru_cache(maxsize=1024)
def real_length(text: str) -> int:
View Source
55@lru_cache(maxsize=1024)
56def real_length(text: str) -> int:
57    """Gets the display-length of text.
58
59    This length means no ANSI sequences are counted. This method is a convenience wrapper
60    for `len(strip_ansi(text))`.
61
62    Args:
63        text: The text to calculate the length of.
64
65    Returns:
66        The display-length of text.
67    """
68
69    return len(strip_ansi(text))

Gets the display-length of text.

This length means no ANSI sequences are counted. This method is a convenience wrapper for len(strip_ansi(text)).

Args
  • text: The text to calculate the length of.
Returns

The display-length of text.