pytermgui.markup.parsing

The internals of the TIM engine.

  1"""The internals of the TIM engine."""
  2
  3from __future__ import annotations
  4
  5import json
  6from typing import Callable, Iterator, Protocol, TypedDict
  7from warnings import filterwarnings, warn
  8
  9from ..colors import str_to_color
 10from ..exceptions import ColorSyntaxError, MarkupSyntaxError
 11from ..regex import RE_ANSI_NEW as RE_ANSI
 12from ..regex import RE_MACRO, RE_MARKUP, RE_POSITION
 13from .style_maps import CLEARERS, REVERSE_CLEARERS, REVERSE_STYLES, STYLES
 14from .tokens import (
 15    AliasToken,
 16    ClearToken,
 17    ColorToken,
 18    CursorToken,
 19    HLinkToken,
 20    MacroToken,
 21    PlainToken,
 22    StyleToken,
 23    Token,
 24)
 25
 26# TODO: Improve first-run performance.
 27
 28filterwarnings("always")
 29
 30
 31LINK_TEMPLATE = "\x1b]8;;{uri}\x1b\\{label}\x1b]8;;\x1b\\"
 32
 33__all__ = [
 34    "ContextDict",
 35    "create_context_dict",
 36    "consume_tag",
 37    "tokenize_markup",
 38    "tokenize_ansi",
 39    "optimize_tokens",
 40    "optimize_markup",
 41    "tokens_to_markup",
 42    "get_markup",
 43    "parse",
 44    "parse_tokens",
 45]
 46
 47
 48class MacroType(Protocol):  # pylint: disable=too-few-public-methods
 49    """A protocol for TIM macros."""
 50
 51    def __call__(*args: str) -> str:  # pylint: disable=no-method-argument
 52        """Applies the macro."""
 53
 54
 55class ContextDict(TypedDict):
 56    """A dictionary to hold context about a markup language's environment.
 57
 58    It has two sub-dicts:
 59
 60    - aliases
 61    - macros
 62
 63    For information about what they do and contain, see the
 64    [MarkupLanguage docs](pytermgui.markup.language.MarkupLanguage).
 65    """
 66
 67    aliases: dict[str, str]
 68    macros: dict[str, MacroType]
 69
 70
 71def create_context_dict() -> ContextDict:
 72    """Creates a new context dictionary, initializing its sub-dicts.
 73
 74    Returns:
 75        A dictionary with `aliases` and `macros` defined as empty sub-dicts.
 76    """
 77
 78    return {"aliases": {}, "macros": {}}
 79
 80
 81def consume_tag(tag: str) -> Token:  # pylint: disable=too-many-return-statements
 82    """Consumes a tag text, returns the associated Token."""
 83
 84    if tag in STYLES:
 85        return StyleToken(tag)
 86
 87    if tag.startswith("/"):
 88        return ClearToken(tag)
 89
 90    if tag.startswith("!"):
 91        matchobj = RE_MACRO.match(tag)
 92
 93        if matchobj is not None:
 94            name, args = matchobj.groups()
 95
 96            if args is None:
 97                return MacroToken(name, tuple())
 98
 99            return MacroToken(name, tuple(args.split(":")))
100
101    if tag.startswith("~"):
102        return HLinkToken(tag[1:])
103
104    if tag.startswith("(") and tag.endswith(")"):
105        values = tag[1:-1].split(";")
106        if len(values) != 2:
107            raise ValueError(
108                f"Cursor tags must have exactly 2 values delimited by `;`, got {tag!r}."
109            )
110
111        return CursorToken(tag[1:-1], *map(int, values))
112
113    token: Token
114    try:
115        token = ColorToken(tag, str_to_color(tag))
116
117    except ColorSyntaxError:
118        token = AliasToken(tag)
119
120    finally:
121        return token  # pylint: disable=lost-exception
122
123
124def tokenize_markup(text: str) -> Iterator[Token]:
125    """Converts some markup text into a stream of tokens.
126
127    Args:
128        text: Any valid markup.
129
130    Yields:
131        The generated tokens, in the order they occur within the markup.
132    """
133
134    cursor = 0
135    length = len(text)
136    has_inverse = False
137    for matchobj in RE_MARKUP.finditer(text):
138        full, escapes, content = matchobj.groups()
139        start, end = matchobj.span()
140
141        if cursor < start:
142            yield PlainToken(text[cursor:start])
143
144        if not escapes == "":
145            _, remaining = divmod(len(escapes), 2)
146
147            yield PlainToken(full[max(1 - remaining, 1) :])
148            cursor = end
149
150            continue
151
152        for tag in content.split():
153            if tag == "inverse":
154                has_inverse = True
155
156            if tag == "/inverse":
157                has_inverse = False
158
159            consumed = consume_tag(tag)
160            if has_inverse:
161                if consumed.markup == "/fg":
162                    consumed = ClearToken("/fg")
163
164                elif consumed.markup == "/bg":
165                    consumed = ClearToken("/bg")
166
167            yield consumed
168
169        cursor = end
170
171    if cursor < length:
172        yield PlainToken(text[cursor:length])
173
174
175def tokenize_ansi(  # pylint: disable=too-many-locals, too-many-branches, too-many-statements
176    text: str,
177) -> Iterator[Token]:
178    """Converts some ANSI-coded text into a stream of tokens.
179
180    Args:
181        text: Any valid ANSI-coded text.
182
183    Yields:
184        The generated tokens, in the order they occur within the text.
185    """
186
187    cursor = 0
188
189    for matchobj in RE_ANSI.finditer(text):
190        start, end = matchobj.span()
191
192        csi = matchobj.groups()[0:2]
193        link_osc = matchobj.groups()[2:4]
194
195        if link_osc != (None, None):
196            cursor = end
197            uri, label = link_osc
198
199            yield HLinkToken(uri)
200            yield PlainToken(label)
201
202            continue
203
204        full, content = csi
205
206        if cursor < start:
207            yield PlainToken(text[cursor:start])
208
209        cursor = end
210
211        code = ""
212
213        # Position
214        posmatch = RE_POSITION.match(full)
215
216        if posmatch is not None:
217            ypos, xpos = posmatch.groups()
218            if not ypos and not xpos:
219                raise ValueError(
220                    f"Cannot parse cursor when no position is supplied. Match: {posmatch!r}"
221                )
222
223            yield CursorToken(content, int(ypos) or None, int(xpos) or None)
224            continue
225
226        parts = content.split(";")
227
228        state = None
229        color_code = ""
230        for part in parts:
231            if state is None:
232                if part in REVERSE_STYLES:
233                    yield StyleToken(REVERSE_STYLES[part])
234                    continue
235
236                if part in REVERSE_CLEARERS:
237                    yield ClearToken(REVERSE_CLEARERS[part])
238                    continue
239
240                if part in ("38", "48"):
241                    state = "COLOR"
242                    color_code += part + ";"
243                    continue
244
245                # standard colors
246                try:
247                    yield ColorToken(part, str_to_color(part))
248                    continue
249
250                except ColorSyntaxError as exc:
251                    raise ValueError(f"Could not parse color tag {part!r}.") from exc
252
253            if state != "COLOR":
254                continue
255
256            color_code += part + ";"
257
258            # Ignore incomplete RGB colors
259            if (
260                color_code.startswith(("38;2;", "48;2;"))
261                and len(color_code.split(";")) != 6
262            ):
263                continue
264
265            try:
266                code = color_code
267
268                if code.startswith(("38;2;", "48;2;", "38;5;", "48;5;")):
269                    stripped = code[5:-1]
270
271                    if code.startswith("4"):
272                        stripped = "@" + stripped
273
274                    code = stripped
275
276                yield ColorToken(code, str_to_color(code))
277
278            except ColorSyntaxError:
279                continue
280
281            state = None
282            color_code = ""
283
284    remaining = text[cursor:]
285    if len(remaining) > 0:
286        yield PlainToken(remaining)
287
288
289def eval_alias(text: str, context: ContextDict) -> str:
290    """Evaluates a space-delimited string of alias tags into their underlying value.
291
292    Args:
293        text: A space-separated string containing the aliases.
294
295    Returns:
296        The space-separated string that the input aliases represent.
297    """
298
299    aliases = context["aliases"]
300
301    evaluated = ""
302    for tag in text.split():
303        if tag not in aliases:
304            evaluated += tag + " "
305            continue
306
307        evaluated += eval_alias(aliases[tag], context)
308
309    return evaluated.rstrip(" ")
310
311
312def parse_plain(token: PlainToken, _: ContextDict, __: Callable[[], str]) -> str:
313    """Parses a plain token."""
314
315    return token.value
316
317
318def parse_color(token: ColorToken, _: ContextDict, __: Callable[[], str]) -> str:
319    """Parses a color token."""
320
321    return token.color.sequence
322
323
324def parse_style(token: StyleToken, _: ContextDict, __: Callable[[], str]) -> str:
325    """Parses a style token."""
326
327    index = STYLES[token.value]
328
329    return f"\x1b[{index}m"
330
331
332def parse_macro(
333    token: MacroToken, context: ContextDict, get_full: Callable[[], str]
334) -> tuple[MacroType, tuple[str, ...]]:
335    """Parses a macro token.
336
337    Returns:
338        A tuple containing the callable bound to the name, as well as the arguments
339        passed to it.
340    """
341
342    func = context["macros"].get(token.value)
343
344    if func is None:
345        dump = json.dumps(context["macros"], indent=2, default=str)
346
347        raise MarkupSyntaxError(
348            token.value, f"not defined in macro context: {dump}", get_full()
349        )
350
351    return func, token.arguments
352
353
354def parse_alias(
355    token: AliasToken, context: ContextDict, get_full: Callable[[], str]
356) -> str:
357    """Parses an alias token."""
358
359    if token.value not in context["aliases"]:
360        dump = json.dumps(context["aliases"], indent=2, default=str)
361
362        raise MarkupSyntaxError(
363            token.value, f"not defined in alias context: {dump}", get_full()
364        )
365
366    meaning = context["aliases"][token.value]
367
368    return eval_alias(meaning, context).rstrip(" ")
369
370
371def parse_clear(token: ClearToken, _: ContextDict, get_full: Callable[[], str]) -> str:
372    """Parses a clearer token."""
373
374    index = CLEARERS.get(token.value)
375    if index is None:
376        raise MarkupSyntaxError(
377            token.value, "not a recognized clearer or alias", get_full()
378        )
379
380    return f"\x1b[{index}m"
381
382
383def parse_cursor(token: CursorToken, _: ContextDict, __: Callable[[], str]) -> str:
384    """Parses a cursor token."""
385
386    ypos, xpos = map(lambda i: "" if i is None else i, token)
387
388    return f"\x1b[{ypos};{xpos}H"
389
390
391def optimize_tokens(tokens: list[Token]) -> Iterator[Token]:
392    """Optimizes a stream of tokens, only yielding functionally relevant ones.
393
394    Args:
395        tokens: Any list of Token objects. Usually obtained from `tokenize_markup`
396            or `tokenize_ansi`.
397
398    Yields:
399        All those tokens within the input iterator that are functionally relevant,
400            keeping their order.
401    """
402
403    previous: list[Token] = []
404    current_tag_group: list[Token] = []
405
406    def _diff_previous() -> Iterator[Token]:
407        """Find difference from the previously active list of tokens."""
408
409        applied = previous.copy()
410
411        for tkn in current_tag_group:
412            targets = []
413
414            clearer = Token.is_clear(tkn)
415            if Token.is_clear(tkn):
416                targets = [tkn.targets(tag) for tag in applied]
417
418            if tkn in previous and not clearer:
419                continue
420
421            if clearer and not any(targets):
422                continue
423
424            applied.append(tkn)
425            yield tkn
426
427    def _remove_redundant_color(token: Token) -> None:
428        """Removes non-functional colors.
429
430        These happen in the following ways:
431        - Multiple colors of the same channel (fg/bg) are present.
432        - A color is applied, then a clearer clears it.
433        """
434
435        for applied in current_tag_group.copy():
436            if Token.is_clear(applied) and applied.targets(token):
437                current_tag_group.remove(applied)
438
439            if not Token.is_color(applied):
440                continue
441
442            old = applied.color
443
444            if old.background == new.background:
445                current_tag_group.remove(applied)
446
447    for token in tokens:
448        if Token.is_plain(token):
449            yield from _diff_previous()
450            yield token
451
452            previous = current_tag_group.copy()
453
454            continue
455
456        if Token.is_color(token):
457            new = token.color
458
459            _remove_redundant_color(token)
460
461            if not any(token.markup == applied.markup for applied in current_tag_group):
462                current_tag_group.append(token)
463
464            continue
465
466        if token.is_style():
467            if not any(token == tag for tag in current_tag_group):
468                current_tag_group.append(token)
469
470            continue
471
472        if Token.is_clear(token):
473            applied = False
474            for tag in current_tag_group.copy():
475                if token.targets(tag) or token == tag:
476                    current_tag_group.remove(tag)
477                    applied = True
478
479            if not applied:
480                continue
481
482        current_tag_group.append(token)
483
484    yield from _diff_previous()
485
486
487def tokens_to_markup(tokens: list[Token]) -> str:
488    """Converts a token stream into the markup of its tokens.
489
490    Args:
491        tokens: Any list of Token objects. Usually obtained from `tokenize_markup` or
492            `tokenize_ansi`.
493
494    Returns:
495        The markup the given tokens represent.
496    """
497
498    tags: list[Token] = []
499    markup = ""
500
501    for token in tokens:
502        if token.is_plain():
503            if len(tags) > 0:
504                markup += f"[{' '.join(tag.markup for tag in tags)}]"
505
506            markup += token.value
507            tags = []
508
509        else:
510            tags.append(token)
511
512    if len(tags) > 0:
513        markup += f"[{' '.join(tag.markup for tag in tags)}]"
514
515    return markup
516
517
518def get_markup(text: str) -> str:
519    """Gets the markup representing an ANSI-coded string."""
520
521    return tokens_to_markup(list(tokenize_ansi(text)))
522
523
524def optimize_markup(markup: str) -> str:
525    """Optimizes markup by tokenizing it, optimizing the tokens and converting it back to markup."""
526
527    return tokens_to_markup(list(optimize_tokens(list(tokenize_markup(markup)))))
528
529
530PARSERS = {
531    PlainToken: parse_plain,
532    ColorToken: parse_color,
533    StyleToken: parse_style,
534    MacroToken: parse_macro,
535    AliasToken: parse_alias,
536    ClearToken: parse_clear,
537    CursorToken: parse_cursor,
538}
539
540
541def _apply_macros(
542    text: str, macros: Iterator[tuple[MacroType, tuple[str, ...]]]
543) -> str:
544    """Applies macros to the given text.
545
546    Args:
547        text: The plain text the macros will apply to.
548        macros: Any iterator of MacroTokens that will be applied.
549
550    Returns:
551        The input plain text, with all macros applied to it. The macros will be applied
552        in the order they appear in.
553    """
554
555    for method, args in macros:
556        if len(args) > 0:
557            text = method(*args, text)
558            continue
559
560        text = method(text)
561
562    return text
563
564
565def _sub_aliases(tokens: list[Token], context: ContextDict) -> list[Token]:
566    """Substitutes all AliasTokens to their underlying values.
567
568    Args:
569        tokens: Any list of Tokens. When this iterator contains nothing
570            that can be interpreted as an alias, the same iterator turned into
571            a list will be returned.
572        context: The context that aliases will be searched in.
573    """
574
575    output: list[Token] = []
576
577    # It's more computationally efficient to create this lambda once and reuse it
578    # every time. There is no need to define a full function, as it just returns
579    # a function return.
580    get_full = (
581        lambda: tokens_to_markup(  # pylint: disable=unnecessary-lambda-assignment
582            tokens
583        )
584    )
585
586    for token in tokens:
587        if token.value in context["aliases"] and (
588            Token.is_clear(token) or Token.is_macro(token) or Token.is_alias(token)
589        ):
590            if Token.is_clear(token) or Token.is_macro(token):
591                token = AliasToken(token.value)
592
593            if Token.is_alias(token):
594                aliases_parsed = parse_alias(token, context, get_full)
595                output.extend(list(tokenize_markup(f"[{aliases_parsed}]")))
596
597            continue
598
599        if Token.is_macro(token) and token.value == "!link":
600            warn(
601                "Hyperlinks are no longer implemented as macros."
602                + " Prefer using the `~{uri}` syntax.",
603                DeprecationWarning,
604                stacklevel=4,
605            )
606
607            output.append(HLinkToken(":".join(token.arguments)))
608            continue
609
610        output.append(token)
611
612    return output
613
614
615def parse_tokens(  # pylint: disable=too-many-branches, too-many-locals
616    tokens: list[Token],
617    optimize: bool = False,
618    context: ContextDict | None = None,
619    append_reset: bool = True,
620    ignore_unknown_tags: bool = True,
621) -> str:
622    """Parses a stream of tokens into the ANSI-coded string they represent.
623
624    Args:
625        tokens: Any list of Tokens, usually obtained from either `tokenize_ansi` or
626            `tokenize_markup`.
627        optimize: If set, `optimize_tokens` will optimize the input iterator before
628            usage. This will incur a (minor) performance hit.
629        context: The context that aliases and macros found within the tokens will be
630            searched in.
631        append_reset: If set, `ClearToken("/")` will be appended to the token iterator,
632            clearing all styles.
633        ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags
634            will be silenced.
635
636    Returns:
637        The ANSI-coded string that the token stream represents.
638    """
639
640    if context is None:
641        context = create_context_dict()
642
643    token_list = list(_sub_aliases(tokens, context))
644
645    # It's more computationally efficient to create this lambda once and reuse it
646    # every time. There is no need to define a full function, as it just returns
647    # a function return.
648    get_full = (
649        lambda: tokens_to_markup(  # pylint: disable=unnecessary-lambda-assignment
650            tokens
651        )
652    )
653
654    if optimize:
655        token_list = list(optimize_tokens(tokens))
656
657    if append_reset:
658        token_list.append(ClearToken("/"))
659
660    link = None
661    output = ""
662    segment = ""
663    macros: list[MacroToken] = []
664    unknown_aliases: list[Token] = []
665
666    for token in token_list:
667        if token.is_plain():
668            value = _apply_macros(
669                token.value, (parse_macro(macro, context, get_full) for macro in macros)
670            )
671
672            if len(unknown_aliases) > 0:
673                output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]"
674                unknown_aliases = []
675
676            output += segment + (
677                value if link is None else LINK_TEMPLATE.format(uri=link, label=value)
678            )
679
680            segment = ""
681            continue
682
683        if token.is_hyperlink():
684            link = token.value
685            continue
686
687        if Token.is_macro(token):
688            macros.append(token)
689            continue
690
691        if Token.is_clear(token):
692            if token.value in ("/", "/~"):
693                link = None
694
695            found = False
696            for macro in macros.copy():
697                if token.targets(macro):
698                    macros.remove(macro)
699                    found = True
700                    break
701
702            if found and token.value != "/":
703                continue
704
705            if token.value.startswith("/!"):
706                raise ValueError(
707                    f"Cannot use clearer {token.value!r} with nothing to target."
708                )
709
710        try:
711            segment += PARSERS[type(token)](token, context, get_full)  # type: ignore
712
713        except MarkupSyntaxError:
714            if not ignore_unknown_tags:
715                raise
716
717            unknown_aliases.append(token)
718
719    if len(unknown_aliases) > 0:
720        output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]"
721
722    output += segment
723
724    return output
725
726
727def parse(
728    text: str,
729    optimize: bool = False,
730    context: ContextDict | None = None,
731    append_reset: bool = True,
732    ignore_unknown_tags: bool = True,
733) -> str:
734    """Parses markup into the ANSI-coded string it represents.
735
736    Args:
737        text: Any valid markup.
738        optimize: If set, `optimize_tokens` will optimize the tokens found within the
739            input markup before usage. This will incur a (minor) performance hit.
740        context: The context that aliases and macros found within the markup will be
741            searched in.
742        append_reset: If set, `[/]` will be appended to the token iterator, clearing all
743            styles.
744        ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags
745            will be silenced.
746
747    Returns:
748        The ANSI-coded string that the markup represents.
749    """
750
751    if context is None:
752        context = create_context_dict()
753
754    if append_reset and not text.endswith("/]"):
755        text += "[/]"
756
757    tokens = list(tokenize_markup(text))
758
759    return parse_tokens(
760        tokens,
761        optimize=optimize,
762        context=context,
763        append_reset=append_reset,
764        ignore_unknown_tags=ignore_unknown_tags,
765    )
class ContextDict(typing.TypedDict):
56class ContextDict(TypedDict):
57    """A dictionary to hold context about a markup language's environment.
58
59    It has two sub-dicts:
60
61    - aliases
62    - macros
63
64    For information about what they do and contain, see the
65    [MarkupLanguage docs](pytermgui.markup.language.MarkupLanguage).
66    """
67
68    aliases: dict[str, str]
69    macros: dict[str, MacroType]

A dictionary to hold context about a markup language's environment.

It has two sub-dicts:

  • aliases
  • macros

For information about what they do and contain, see the pytermgui.markup.language.MarkupLanguage">MarkupLanguage docs.

Inherited Members
builtins.dict
get
setdefault
pop
popitem
keys
items
values
update
fromkeys
clear
copy
def create_context_dict() -> pytermgui.markup.parsing.ContextDict:
72def create_context_dict() -> ContextDict:
73    """Creates a new context dictionary, initializing its sub-dicts.
74
75    Returns:
76        A dictionary with `aliases` and `macros` defined as empty sub-dicts.
77    """
78
79    return {"aliases": {}, "macros": {}}

Creates a new context dictionary, initializing its sub-dicts.

Returns

A dictionary with aliases and macros defined as empty sub-dicts.

def consume_tag(tag: str) -> pytermgui.markup.tokens.Token:
 82def consume_tag(tag: str) -> Token:  # pylint: disable=too-many-return-statements
 83    """Consumes a tag text, returns the associated Token."""
 84
 85    if tag in STYLES:
 86        return StyleToken(tag)
 87
 88    if tag.startswith("/"):
 89        return ClearToken(tag)
 90
 91    if tag.startswith("!"):
 92        matchobj = RE_MACRO.match(tag)
 93
 94        if matchobj is not None:
 95            name, args = matchobj.groups()
 96
 97            if args is None:
 98                return MacroToken(name, tuple())
 99
100            return MacroToken(name, tuple(args.split(":")))
101
102    if tag.startswith("~"):
103        return HLinkToken(tag[1:])
104
105    if tag.startswith("(") and tag.endswith(")"):
106        values = tag[1:-1].split(";")
107        if len(values) != 2:
108            raise ValueError(
109                f"Cursor tags must have exactly 2 values delimited by `;`, got {tag!r}."
110            )
111
112        return CursorToken(tag[1:-1], *map(int, values))
113
114    token: Token
115    try:
116        token = ColorToken(tag, str_to_color(tag))
117
118    except ColorSyntaxError:
119        token = AliasToken(tag)
120
121    finally:
122        return token  # pylint: disable=lost-exception

Consumes a tag text, returns the associated Token.

def tokenize_markup(text: str) -> Iterator[pytermgui.markup.tokens.Token]:
125def tokenize_markup(text: str) -> Iterator[Token]:
126    """Converts some markup text into a stream of tokens.
127
128    Args:
129        text: Any valid markup.
130
131    Yields:
132        The generated tokens, in the order they occur within the markup.
133    """
134
135    cursor = 0
136    length = len(text)
137    has_inverse = False
138    for matchobj in RE_MARKUP.finditer(text):
139        full, escapes, content = matchobj.groups()
140        start, end = matchobj.span()
141
142        if cursor < start:
143            yield PlainToken(text[cursor:start])
144
145        if not escapes == "":
146            _, remaining = divmod(len(escapes), 2)
147
148            yield PlainToken(full[max(1 - remaining, 1) :])
149            cursor = end
150
151            continue
152
153        for tag in content.split():
154            if tag == "inverse":
155                has_inverse = True
156
157            if tag == "/inverse":
158                has_inverse = False
159
160            consumed = consume_tag(tag)
161            if has_inverse:
162                if consumed.markup == "/fg":
163                    consumed = ClearToken("/fg")
164
165                elif consumed.markup == "/bg":
166                    consumed = ClearToken("/bg")
167
168            yield consumed
169
170        cursor = end
171
172    if cursor < length:
173        yield PlainToken(text[cursor:length])

Converts some markup text into a stream of tokens.

Args
  • text: Any valid markup.
Yields

The generated tokens, in the order they occur within the markup.

def tokenize_ansi(text: str) -> Iterator[pytermgui.markup.tokens.Token]:
176def tokenize_ansi(  # pylint: disable=too-many-locals, too-many-branches, too-many-statements
177    text: str,
178) -> Iterator[Token]:
179    """Converts some ANSI-coded text into a stream of tokens.
180
181    Args:
182        text: Any valid ANSI-coded text.
183
184    Yields:
185        The generated tokens, in the order they occur within the text.
186    """
187
188    cursor = 0
189
190    for matchobj in RE_ANSI.finditer(text):
191        start, end = matchobj.span()
192
193        csi = matchobj.groups()[0:2]
194        link_osc = matchobj.groups()[2:4]
195
196        if link_osc != (None, None):
197            cursor = end
198            uri, label = link_osc
199
200            yield HLinkToken(uri)
201            yield PlainToken(label)
202
203            continue
204
205        full, content = csi
206
207        if cursor < start:
208            yield PlainToken(text[cursor:start])
209
210        cursor = end
211
212        code = ""
213
214        # Position
215        posmatch = RE_POSITION.match(full)
216
217        if posmatch is not None:
218            ypos, xpos = posmatch.groups()
219            if not ypos and not xpos:
220                raise ValueError(
221                    f"Cannot parse cursor when no position is supplied. Match: {posmatch!r}"
222                )
223
224            yield CursorToken(content, int(ypos) or None, int(xpos) or None)
225            continue
226
227        parts = content.split(";")
228
229        state = None
230        color_code = ""
231        for part in parts:
232            if state is None:
233                if part in REVERSE_STYLES:
234                    yield StyleToken(REVERSE_STYLES[part])
235                    continue
236
237                if part in REVERSE_CLEARERS:
238                    yield ClearToken(REVERSE_CLEARERS[part])
239                    continue
240
241                if part in ("38", "48"):
242                    state = "COLOR"
243                    color_code += part + ";"
244                    continue
245
246                # standard colors
247                try:
248                    yield ColorToken(part, str_to_color(part))
249                    continue
250
251                except ColorSyntaxError as exc:
252                    raise ValueError(f"Could not parse color tag {part!r}.") from exc
253
254            if state != "COLOR":
255                continue
256
257            color_code += part + ";"
258
259            # Ignore incomplete RGB colors
260            if (
261                color_code.startswith(("38;2;", "48;2;"))
262                and len(color_code.split(";")) != 6
263            ):
264                continue
265
266            try:
267                code = color_code
268
269                if code.startswith(("38;2;", "48;2;", "38;5;", "48;5;")):
270                    stripped = code[5:-1]
271
272                    if code.startswith("4"):
273                        stripped = "@" + stripped
274
275                    code = stripped
276
277                yield ColorToken(code, str_to_color(code))
278
279            except ColorSyntaxError:
280                continue
281
282            state = None
283            color_code = ""
284
285    remaining = text[cursor:]
286    if len(remaining) > 0:
287        yield PlainToken(remaining)

Converts some ANSI-coded text into a stream of tokens.

Args
  • text: Any valid ANSI-coded text.
Yields

The generated tokens, in the order they occur within the text.

def optimize_tokens( tokens: list[pytermgui.markup.tokens.Token]) -> Iterator[pytermgui.markup.tokens.Token]:
392def optimize_tokens(tokens: list[Token]) -> Iterator[Token]:
393    """Optimizes a stream of tokens, only yielding functionally relevant ones.
394
395    Args:
396        tokens: Any list of Token objects. Usually obtained from `tokenize_markup`
397            or `tokenize_ansi`.
398
399    Yields:
400        All those tokens within the input iterator that are functionally relevant,
401            keeping their order.
402    """
403
404    previous: list[Token] = []
405    current_tag_group: list[Token] = []
406
407    def _diff_previous() -> Iterator[Token]:
408        """Find difference from the previously active list of tokens."""
409
410        applied = previous.copy()
411
412        for tkn in current_tag_group:
413            targets = []
414
415            clearer = Token.is_clear(tkn)
416            if Token.is_clear(tkn):
417                targets = [tkn.targets(tag) for tag in applied]
418
419            if tkn in previous and not clearer:
420                continue
421
422            if clearer and not any(targets):
423                continue
424
425            applied.append(tkn)
426            yield tkn
427
428    def _remove_redundant_color(token: Token) -> None:
429        """Removes non-functional colors.
430
431        These happen in the following ways:
432        - Multiple colors of the same channel (fg/bg) are present.
433        - A color is applied, then a clearer clears it.
434        """
435
436        for applied in current_tag_group.copy():
437            if Token.is_clear(applied) and applied.targets(token):
438                current_tag_group.remove(applied)
439
440            if not Token.is_color(applied):
441                continue
442
443            old = applied.color
444
445            if old.background == new.background:
446                current_tag_group.remove(applied)
447
448    for token in tokens:
449        if Token.is_plain(token):
450            yield from _diff_previous()
451            yield token
452
453            previous = current_tag_group.copy()
454
455            continue
456
457        if Token.is_color(token):
458            new = token.color
459
460            _remove_redundant_color(token)
461
462            if not any(token.markup == applied.markup for applied in current_tag_group):
463                current_tag_group.append(token)
464
465            continue
466
467        if token.is_style():
468            if not any(token == tag for tag in current_tag_group):
469                current_tag_group.append(token)
470
471            continue
472
473        if Token.is_clear(token):
474            applied = False
475            for tag in current_tag_group.copy():
476                if token.targets(tag) or token == tag:
477                    current_tag_group.remove(tag)
478                    applied = True
479
480            if not applied:
481                continue
482
483        current_tag_group.append(token)
484
485    yield from _diff_previous()

Optimizes a stream of tokens, only yielding functionally relevant ones.

Args
Yields

All those tokens within the input iterator that are functionally relevant, keeping their order.

def optimize_markup(markup: str) -> str:
525def optimize_markup(markup: str) -> str:
526    """Optimizes markup by tokenizing it, optimizing the tokens and converting it back to markup."""
527
528    return tokens_to_markup(list(optimize_tokens(list(tokenize_markup(markup)))))

Optimizes markup by tokenizing it, optimizing the tokens and converting it back to markup.

def tokens_to_markup(tokens: list[pytermgui.markup.tokens.Token]) -> str:
488def tokens_to_markup(tokens: list[Token]) -> str:
489    """Converts a token stream into the markup of its tokens.
490
491    Args:
492        tokens: Any list of Token objects. Usually obtained from `tokenize_markup` or
493            `tokenize_ansi`.
494
495    Returns:
496        The markup the given tokens represent.
497    """
498
499    tags: list[Token] = []
500    markup = ""
501
502    for token in tokens:
503        if token.is_plain():
504            if len(tags) > 0:
505                markup += f"[{' '.join(tag.markup for tag in tags)}]"
506
507            markup += token.value
508            tags = []
509
510        else:
511            tags.append(token)
512
513    if len(tags) > 0:
514        markup += f"[{' '.join(tag.markup for tag in tags)}]"
515
516    return markup

Converts a token stream into the markup of its tokens.

Args
Returns

The markup the given tokens represent.

def get_markup(text: str) -> str:
519def get_markup(text: str) -> str:
520    """Gets the markup representing an ANSI-coded string."""
521
522    return tokens_to_markup(list(tokenize_ansi(text)))

Gets the markup representing an ANSI-coded string.

def parse( text: str, optimize: bool = False, context: pytermgui.markup.parsing.ContextDict | None = None, append_reset: bool = True, ignore_unknown_tags: bool = True) -> str:
728def parse(
729    text: str,
730    optimize: bool = False,
731    context: ContextDict | None = None,
732    append_reset: bool = True,
733    ignore_unknown_tags: bool = True,
734) -> str:
735    """Parses markup into the ANSI-coded string it represents.
736
737    Args:
738        text: Any valid markup.
739        optimize: If set, `optimize_tokens` will optimize the tokens found within the
740            input markup before usage. This will incur a (minor) performance hit.
741        context: The context that aliases and macros found within the markup will be
742            searched in.
743        append_reset: If set, `[/]` will be appended to the token iterator, clearing all
744            styles.
745        ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags
746            will be silenced.
747
748    Returns:
749        The ANSI-coded string that the markup represents.
750    """
751
752    if context is None:
753        context = create_context_dict()
754
755    if append_reset and not text.endswith("/]"):
756        text += "[/]"
757
758    tokens = list(tokenize_markup(text))
759
760    return parse_tokens(
761        tokens,
762        optimize=optimize,
763        context=context,
764        append_reset=append_reset,
765        ignore_unknown_tags=ignore_unknown_tags,
766    )

Parses markup into the ANSI-coded string it represents.

Args
  • text: Any valid markup.
  • optimize: If set, optimize_tokens will optimize the tokens found within the input markup before usage. This will incur a (minor) performance hit.
  • context: The context that aliases and macros found within the markup will be searched in.
  • append_reset: If set, [/] will be appended to the token iterator, clearing all styles.
  • ignore_unknown_tags: If set, the MarkupSyntaxError coming from unknown tags will be silenced.
Returns

The ANSI-coded string that the markup represents.

def parse_tokens( tokens: list[pytermgui.markup.tokens.Token], optimize: bool = False, context: pytermgui.markup.parsing.ContextDict | None = None, append_reset: bool = True, ignore_unknown_tags: bool = True) -> str:
616def parse_tokens(  # pylint: disable=too-many-branches, too-many-locals
617    tokens: list[Token],
618    optimize: bool = False,
619    context: ContextDict | None = None,
620    append_reset: bool = True,
621    ignore_unknown_tags: bool = True,
622) -> str:
623    """Parses a stream of tokens into the ANSI-coded string they represent.
624
625    Args:
626        tokens: Any list of Tokens, usually obtained from either `tokenize_ansi` or
627            `tokenize_markup`.
628        optimize: If set, `optimize_tokens` will optimize the input iterator before
629            usage. This will incur a (minor) performance hit.
630        context: The context that aliases and macros found within the tokens will be
631            searched in.
632        append_reset: If set, `ClearToken("/")` will be appended to the token iterator,
633            clearing all styles.
634        ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags
635            will be silenced.
636
637    Returns:
638        The ANSI-coded string that the token stream represents.
639    """
640
641    if context is None:
642        context = create_context_dict()
643
644    token_list = list(_sub_aliases(tokens, context))
645
646    # It's more computationally efficient to create this lambda once and reuse it
647    # every time. There is no need to define a full function, as it just returns
648    # a function return.
649    get_full = (
650        lambda: tokens_to_markup(  # pylint: disable=unnecessary-lambda-assignment
651            tokens
652        )
653    )
654
655    if optimize:
656        token_list = list(optimize_tokens(tokens))
657
658    if append_reset:
659        token_list.append(ClearToken("/"))
660
661    link = None
662    output = ""
663    segment = ""
664    macros: list[MacroToken] = []
665    unknown_aliases: list[Token] = []
666
667    for token in token_list:
668        if token.is_plain():
669            value = _apply_macros(
670                token.value, (parse_macro(macro, context, get_full) for macro in macros)
671            )
672
673            if len(unknown_aliases) > 0:
674                output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]"
675                unknown_aliases = []
676
677            output += segment + (
678                value if link is None else LINK_TEMPLATE.format(uri=link, label=value)
679            )
680
681            segment = ""
682            continue
683
684        if token.is_hyperlink():
685            link = token.value
686            continue
687
688        if Token.is_macro(token):
689            macros.append(token)
690            continue
691
692        if Token.is_clear(token):
693            if token.value in ("/", "/~"):
694                link = None
695
696            found = False
697            for macro in macros.copy():
698                if token.targets(macro):
699                    macros.remove(macro)
700                    found = True
701                    break
702
703            if found and token.value != "/":
704                continue
705
706            if token.value.startswith("/!"):
707                raise ValueError(
708                    f"Cannot use clearer {token.value!r} with nothing to target."
709                )
710
711        try:
712            segment += PARSERS[type(token)](token, context, get_full)  # type: ignore
713
714        except MarkupSyntaxError:
715            if not ignore_unknown_tags:
716                raise
717
718            unknown_aliases.append(token)
719
720    if len(unknown_aliases) > 0:
721        output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]"
722
723    output += segment
724
725    return output

Parses a stream of tokens into the ANSI-coded string they represent.

Args
  • tokens: Any list of Tokens, usually obtained from either tokenize_ansi or tokenize_markup.
  • optimize: If set, optimize_tokens will optimize the input iterator before usage. This will incur a (minor) performance hit.
  • context: The context that aliases and macros found within the tokens will be searched in.
  • append_reset: If set, ClearToken("/") will be appended to the token iterator, clearing all styles.
  • ignore_unknown_tags: If set, the MarkupSyntaxError coming from unknown tags will be silenced.
Returns

The ANSI-coded string that the token stream represents.