pytermgui.markup.parsing

The internals of the TIM engine.

  1"""The internals of the TIM engine."""
  2
  3from __future__ import annotations
  4
  5import json
  6from typing import Callable, Iterator, Protocol, TypedDict
  7from warnings import filterwarnings, warn
  8
  9from ..colors import str_to_color
 10from ..exceptions import ColorSyntaxError, MarkupSyntaxError
 11from ..regex import RE_ANSI_NEW as RE_ANSI
 12from ..regex import RE_MACRO, RE_MARKUP, RE_POSITION
 13from .style_maps import CLEARERS, REVERSE_CLEARERS, REVERSE_STYLES, STYLES
 14from .tokens import (
 15    AliasToken,
 16    ClearToken,
 17    ColorToken,
 18    CursorToken,
 19    HLinkToken,
 20    MacroToken,
 21    PlainToken,
 22    StyleToken,
 23    Token,
 24)
 25
 26# TODO: Improve first-run performance.
 27
 28filterwarnings("always")
 29
 30
 31LINK_TEMPLATE = "\x1b]8;;{uri}\x1b\\{label}\x1b]8;;\x1b\\"
 32
 33__all__ = [
 34    "ContextDict",
 35    "create_context_dict",
 36    "consume_tag",
 37    "tokenize_markup",
 38    "tokenize_ansi",
 39    "optimize_tokens",
 40    "optimize_markup",
 41    "tokens_to_markup",
 42    "get_markup",
 43    "parse",
 44    "parse_tokens",
 45]
 46
 47
 48class MacroType(Protocol):  # pylint: disable=too-few-public-methods
 49    """A protocol for TIM macros."""
 50
 51    def __call__(*args: str) -> str:  # pylint: disable=no-method-argument
 52        """Applies the macro."""
 53
 54
 55class ContextDict(TypedDict):
 56    """A dictionary to hold context about a markup language's environment.
 57
 58    It has two sub-dicts:
 59
 60    - aliases
 61    - macros
 62
 63    For information about what they do and contain, see the
 64    [MarkupLanguage docs](pytermgui.markup.language.MarkupLanguage).
 65    """
 66
 67    aliases: dict[str, str]
 68    macros: dict[str, MacroType]
 69
 70
 71def create_context_dict() -> ContextDict:
 72    """Creates a new context dictionary, initializing its sub-dicts.
 73
 74    Returns:
 75        A dictionary with `aliases` and `macros` defined as empty sub-dicts.
 76    """
 77
 78    return {"aliases": {}, "macros": {}}
 79
 80
 81def consume_tag(tag: str) -> Token:  # pylint: disable=too-many-return-statements
 82    """Consumes a tag text, returns the associated Token."""
 83
 84    if tag in STYLES:
 85        return StyleToken(tag)
 86
 87    if tag.startswith("/"):
 88        return ClearToken(tag)
 89
 90    if tag.startswith("!"):
 91        matchobj = RE_MACRO.match(tag)
 92
 93        if matchobj is not None:
 94            name, args = matchobj.groups()
 95
 96            if args is None:
 97                return MacroToken(name, tuple())
 98
 99            return MacroToken(name, tuple(args.split(":")))
100
101    if tag.startswith("~"):
102        return HLinkToken(tag[1:])
103
104    if tag.startswith("(") and tag.endswith(")"):
105        values = tag[1:-1].split(";")
106        if len(values) != 2:
107            raise MarkupSyntaxError(
108                tag,
109                f"should have exactly 2 values separated by `;`, not {len(values)}",
110                "",
111            )
112
113        return CursorToken(tag[1:-1], *map(int, values))
114
115    token: Token
116    try:
117        token = ColorToken(tag, str_to_color(tag))
118
119    except ColorSyntaxError:
120        token = AliasToken(tag)
121
122    finally:
123        return token  # pylint: disable=lost-exception
124
125
126def tokenize_markup(text: str) -> Iterator[Token]:
127    """Converts some markup text into a stream of tokens.
128
129    Args:
130        text: Any valid markup.
131
132    Yields:
133        The generated tokens, in the order they occur within the markup.
134    """
135
136    cursor = 0
137    length = len(text)
138    has_inverse = False
139    for matchobj in RE_MARKUP.finditer(text):
140        full, escapes, content = matchobj.groups()
141        start, end = matchobj.span()
142
143        if cursor < start:
144            yield PlainToken(text[cursor:start])
145
146        if not escapes == "":
147            _, remaining = divmod(len(escapes), 2)
148
149            yield PlainToken(full[max(1 - remaining, 1) :])
150            cursor = end
151
152            continue
153
154        for tag in content.split():
155            if tag == "inverse":
156                has_inverse = True
157
158            if tag == "/inverse":
159                has_inverse = False
160
161            consumed = consume_tag(tag)
162            if has_inverse:
163                if consumed.markup == "/fg":
164                    consumed = ClearToken("/fg")
165
166                elif consumed.markup == "/bg":
167                    consumed = ClearToken("/bg")
168
169            yield consumed
170
171        cursor = end
172
173    if cursor < length:
174        yield PlainToken(text[cursor:length])
175
176
177def tokenize_ansi(  # pylint: disable=too-many-locals, too-many-branches, too-many-statements
178    text: str,
179) -> Iterator[Token]:
180    """Converts some ANSI-coded text into a stream of tokens.
181
182    Args:
183        text: Any valid ANSI-coded text.
184
185    Yields:
186        The generated tokens, in the order they occur within the text.
187    """
188
189    cursor = 0
190
191    for matchobj in RE_ANSI.finditer(text):
192        start, end = matchobj.span()
193
194        csi = matchobj.groups()[0:2]
195        link_osc = matchobj.groups()[2:4]
196
197        if link_osc != (None, None):
198            cursor = end
199            uri, label = link_osc
200
201            yield HLinkToken(uri)
202            yield PlainToken(label)
203
204            continue
205
206        full, content = csi
207
208        if cursor < start:
209            yield PlainToken(text[cursor:start])
210
211        cursor = end
212
213        code = ""
214
215        # Position
216        posmatch = RE_POSITION.match(full)
217
218        if posmatch is not None:
219            ypos, xpos = posmatch.groups()
220            if not ypos and not xpos:
221                raise ValueError(
222                    f"Cannot parse cursor when no position is supplied. Match: {posmatch!r}"
223                )
224
225            yield CursorToken(content, int(ypos) or None, int(xpos) or None)
226            continue
227
228        parts = content.split(";")
229
230        state = None
231        color_code = ""
232        for part in parts:
233            if state is None:
234                if part in REVERSE_STYLES:
235                    yield StyleToken(REVERSE_STYLES[part])
236                    continue
237
238                if part in REVERSE_CLEARERS:
239                    yield ClearToken(REVERSE_CLEARERS[part])
240                    continue
241
242                if part in ("38", "48"):
243                    state = "COLOR"
244                    color_code += part + ";"
245                    continue
246
247                # standard colors
248                try:
249                    yield ColorToken(part, str_to_color(part))
250                    continue
251
252                except ColorSyntaxError as exc:
253                    raise ValueError(f"Could not parse color tag {part!r}.") from exc
254
255            if state != "COLOR":
256                continue
257
258            color_code += part + ";"
259
260            # Ignore incomplete RGB colors
261            if (
262                color_code.startswith(("38;2;", "48;2;"))
263                and len(color_code.split(";")) != 6
264            ):
265                continue
266
267            try:
268                code = color_code
269
270                if code.startswith(("38;2;", "48;2;", "38;5;", "48;5;")):
271                    stripped = code[5:-1]
272
273                    if code.startswith("4"):
274                        stripped = "@" + stripped
275
276                    code = stripped
277
278                yield ColorToken(code, str_to_color(code))
279
280            except ColorSyntaxError:
281                continue
282
283            state = None
284            color_code = ""
285
286    remaining = text[cursor:]
287    if len(remaining) > 0:
288        yield PlainToken(remaining)
289
290
291def eval_alias(text: str, context: ContextDict) -> str:
292    """Evaluates a space-delimited string of alias tags into their underlying value.
293
294    Args:
295        text: A space-separated string containing the aliases.
296
297    Returns:
298        The space-separated string that the input aliases represent.
299    """
300
301    aliases = context["aliases"]
302
303    evaluated = ""
304    for tag in text.split():
305        if tag not in aliases:
306            evaluated += tag + " "
307            continue
308
309        evaluated += eval_alias(aliases[tag], context)
310
311    return evaluated.rstrip(" ")
312
313
314def parse_plain(token: PlainToken, _: ContextDict, __: Callable[[], str]) -> str:
315    """Parses a plain token."""
316
317    return token.value
318
319
320def parse_color(token: ColorToken, _: ContextDict, __: Callable[[], str]) -> str:
321    """Parses a color token."""
322
323    return token.color.sequence
324
325
326def parse_style(token: StyleToken, _: ContextDict, __: Callable[[], str]) -> str:
327    """Parses a style token."""
328
329    index = STYLES[token.value]
330
331    return f"\x1b[{index}m"
332
333
334def parse_macro(
335    token: MacroToken, context: ContextDict, get_full: Callable[[], str]
336) -> tuple[MacroType, tuple[str, ...]]:
337    """Parses a macro token.
338
339    Returns:
340        A tuple containing the callable bound to the name, as well as the arguments
341        passed to it.
342    """
343
344    func = context["macros"].get(token.value)
345
346    if func is None:
347        dump = json.dumps(context["macros"], indent=2, default=str)
348
349        raise MarkupSyntaxError(
350            token.value, f"not defined in macro context: {dump}", get_full()
351        )
352
353    return func, token.arguments
354
355
356def parse_alias(
357    token: AliasToken, context: ContextDict, get_full: Callable[[], str]
358) -> str:
359    """Parses an alias token."""
360
361    if token.value not in context["aliases"]:
362        dump = json.dumps(context["aliases"], indent=2, default=str)
363
364        raise MarkupSyntaxError(
365            token.value, f"not defined in alias context: {dump}", get_full()
366        )
367
368    meaning = context["aliases"][token.value]
369
370    return eval_alias(meaning, context).rstrip(" ")
371
372
373def parse_clear(token: ClearToken, _: ContextDict, get_full: Callable[[], str]) -> str:
374    """Parses a clearer token."""
375
376    index = CLEARERS.get(token.value)
377    if index is None:
378        raise MarkupSyntaxError(
379            token.value, "not a recognized clearer or alias", get_full()
380        )
381
382    return f"\x1b[{index}m"
383
384
385def parse_cursor(token: CursorToken, _: ContextDict, __: Callable[[], str]) -> str:
386    """Parses a cursor token."""
387
388    ypos, xpos = map(lambda i: "" if i is None else i, token)
389
390    return f"\x1b[{ypos};{xpos}H"
391
392
393def optimize_tokens(tokens: list[Token]) -> Iterator[Token]:
394    """Optimizes a stream of tokens, only yielding functionally relevant ones.
395
396    Args:
397        tokens: Any list of Token objects. Usually obtained from `tokenize_markup`
398            or `tokenize_ansi`.
399
400    Yields:
401        All those tokens within the input iterator that are functionally relevant,
402            keeping their order.
403    """
404
405    previous: list[Token] = []
406    current_tag_group: list[Token] = []
407
408    def _diff_previous() -> Iterator[Token]:
409        """Find difference from the previously active list of tokens."""
410
411        applied = previous.copy()
412
413        for tkn in current_tag_group:
414            targets = []
415
416            clearer = Token.is_clear(tkn)
417            if Token.is_clear(tkn):
418                targets = [tkn.targets(tag) for tag in applied]
419
420            if tkn in previous and not clearer:
421                continue
422
423            if clearer and not any(targets):
424                continue
425
426            applied.append(tkn)
427            yield tkn
428
429    def _remove_redundant_color(token: Token) -> None:
430        """Removes non-functional colors.
431
432        These happen in the following ways:
433        - Multiple colors of the same channel (fg/bg) are present.
434        - A color is applied, then a clearer clears it.
435        """
436
437        for applied in current_tag_group.copy():
438            if Token.is_clear(applied) and applied.targets(token):
439                current_tag_group.remove(applied)
440
441            if not Token.is_color(applied):
442                continue
443
444            old = applied.color
445
446            if old.background == new.background:
447                current_tag_group.remove(applied)
448
449    for token in tokens:
450        if Token.is_plain(token):
451            yield from _diff_previous()
452            yield token
453
454            previous = current_tag_group.copy()
455
456            continue
457
458        if Token.is_color(token):
459            new = token.color
460
461            _remove_redundant_color(token)
462
463            if not any(token.markup == applied.markup for applied in current_tag_group):
464                current_tag_group.append(token)
465
466            continue
467
468        if token.is_style():
469            if not any(token == tag for tag in current_tag_group):
470                current_tag_group.append(token)
471
472            continue
473
474        if Token.is_clear(token):
475            applied = False
476            for tag in current_tag_group.copy():
477                if token.targets(tag) or token == tag:
478                    current_tag_group.remove(tag)
479                    applied = True
480
481            if not applied:
482                continue
483
484        current_tag_group.append(token)
485
486    yield from _diff_previous()
487
488
489def tokens_to_markup(tokens: list[Token]) -> str:
490    """Converts a token stream into the markup of its tokens.
491
492    Args:
493        tokens: Any list of Token objects. Usually obtained from `tokenize_markup` or
494            `tokenize_ansi`.
495
496    Returns:
497        The markup the given tokens represent.
498    """
499
500    tags: list[Token] = []
501    markup = ""
502
503    for token in tokens:
504        if token.is_plain():
505            if len(tags) > 0:
506                markup += f"[{' '.join(tag.markup for tag in tags)}]"
507
508            markup += token.value
509            tags = []
510
511        else:
512            tags.append(token)
513
514    if len(tags) > 0:
515        markup += f"[{' '.join(tag.markup for tag in tags)}]"
516
517    return markup
518
519
520def get_markup(text: str) -> str:
521    """Gets the markup representing an ANSI-coded string."""
522
523    return tokens_to_markup(list(tokenize_ansi(text)))
524
525
526def optimize_markup(markup: str) -> str:
527    """Optimizes markup by tokenizing it, optimizing the tokens and converting it back to markup."""
528
529    return tokens_to_markup(list(optimize_tokens(list(tokenize_markup(markup)))))
530
531
532PARSERS = {
533    PlainToken: parse_plain,
534    ColorToken: parse_color,
535    StyleToken: parse_style,
536    MacroToken: parse_macro,
537    AliasToken: parse_alias,
538    ClearToken: parse_clear,
539    CursorToken: parse_cursor,
540}
541
542
543def _apply_macros(
544    text: str, macros: Iterator[tuple[MacroType, tuple[str, ...]]]
545) -> str:
546    """Applies macros to the given text.
547
548    Args:
549        text: The plain text the macros will apply to.
550        macros: Any iterator of MacroTokens that will be applied.
551
552    Returns:
553        The input plain text, with all macros applied to it. The macros will be applied
554        in the order they appear in.
555    """
556
557    for method, args in macros:
558        if len(args) > 0:
559            text = method(*args, text)
560            continue
561
562        text = method(text)
563
564    return text
565
566
567def _sub_aliases(tokens: list[Token], context: ContextDict) -> list[Token]:
568    """Substitutes all AliasTokens to their underlying values.
569
570    Args:
571        tokens: Any list of Tokens. When this iterator contains nothing
572            that can be interpreted as an alias, the same iterator turned into
573            a list will be returned.
574        context: The context that aliases will be searched in.
575    """
576
577    output: list[Token] = []
578
579    # It's more computationally efficient to create this lambda once and reuse it
580    # every time. There is no need to define a full function, as it just returns
581    # a function return.
582    get_full = (
583        lambda: tokens_to_markup(  # pylint: disable=unnecessary-lambda-assignment
584            tokens
585        )
586    )
587
588    for token in tokens:
589        if token.value in context["aliases"] and (
590            Token.is_clear(token) or Token.is_macro(token) or Token.is_alias(token)
591        ):
592            if Token.is_clear(token) or Token.is_macro(token):
593                token = AliasToken(token.value)
594
595            if Token.is_alias(token):
596                aliases_parsed = parse_alias(token, context, get_full)
597                output.extend(list(tokenize_markup(f"[{aliases_parsed}]")))
598
599            continue
600
601        if Token.is_macro(token) and token.value == "!link":
602            warn(
603                "Hyperlinks are no longer implemented as macros."
604                + " Prefer using the `~{uri}` syntax.",
605                DeprecationWarning,
606                stacklevel=4,
607            )
608
609            output.append(HLinkToken(":".join(token.arguments)))
610            continue
611
612        output.append(token)
613
614    return output
615
616
617def parse_tokens(  # pylint: disable=too-many-branches, too-many-locals
618    tokens: list[Token],
619    optimize: bool = False,
620    context: ContextDict | None = None,
621    append_reset: bool = True,
622    ignore_unknown_tags: bool = True,
623) -> str:
624    """Parses a stream of tokens into the ANSI-coded string they represent.
625
626    Args:
627        tokens: Any list of Tokens, usually obtained from either `tokenize_ansi` or
628            `tokenize_markup`.
629        optimize: If set, `optimize_tokens` will optimize the input iterator before
630            usage. This will incur a (minor) performance hit.
631        context: The context that aliases and macros found within the tokens will be
632            searched in.
633        append_reset: If set, `ClearToken("/")` will be appended to the token iterator,
634            clearing all styles.
635        ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags
636            will be silenced.
637
638    Returns:
639        The ANSI-coded string that the token stream represents.
640    """
641
642    if context is None:
643        context = create_context_dict()
644
645    token_list = list(_sub_aliases(tokens, context))
646
647    # It's more computationally efficient to create this lambda once and reuse it
648    # every time. There is no need to define a full function, as it just returns
649    # a function return.
650    get_full = (
651        lambda: tokens_to_markup(  # pylint: disable=unnecessary-lambda-assignment
652            tokens
653        )
654    )
655
656    if optimize:
657        token_list = list(optimize_tokens(tokens))
658
659    if append_reset:
660        token_list.append(ClearToken("/"))
661
662    link = None
663    output = ""
664    segment = ""
665    macros: list[MacroToken] = []
666    unknown_aliases: list[Token] = []
667
668    for token in token_list:
669        if token.is_plain():
670            value = _apply_macros(
671                token.value, (parse_macro(macro, context, get_full) for macro in macros)
672            )
673
674            if len(unknown_aliases) > 0:
675                output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]"
676                unknown_aliases = []
677
678            output += segment + (
679                value if link is None else LINK_TEMPLATE.format(uri=link, label=value)
680            )
681
682            segment = ""
683            continue
684
685        if token.is_hyperlink():
686            link = token.value
687            continue
688
689        if Token.is_macro(token):
690            macros.append(token)
691            continue
692
693        if Token.is_clear(token):
694            if token.value in ("/", "/~"):
695                link = None
696
697            found = False
698            for macro in macros.copy():
699                if token.targets(macro):
700                    macros.remove(macro)
701                    found = True
702                    break
703
704            if found and token.value != "/":
705                continue
706
707            if token.value.startswith("/!"):
708                raise ValueError(
709                    f"Cannot use clearer {token.value!r} with nothing to target."
710                )
711
712        try:
713            segment += PARSERS[type(token)](token, context, get_full)  # type: ignore
714
715        except MarkupSyntaxError:
716            if not ignore_unknown_tags:
717                raise
718
719            unknown_aliases.append(token)
720
721    if len(unknown_aliases) > 0:
722        output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]"
723
724    output += segment
725
726    return output
727
728
729def parse(
730    text: str,
731    optimize: bool = False,
732    context: ContextDict | None = None,
733    append_reset: bool = True,
734    ignore_unknown_tags: bool = True,
735) -> str:
736    """Parses markup into the ANSI-coded string it represents.
737
738    Args:
739        text: Any valid markup.
740        optimize: If set, `optimize_tokens` will optimize the tokens found within the
741            input markup before usage. This will incur a (minor) performance hit.
742        context: The context that aliases and macros found within the markup will be
743            searched in.
744        append_reset: If set, `[/]` will be appended to the token iterator, clearing all
745            styles.
746        ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags
747            will be silenced.
748
749    Returns:
750        The ANSI-coded string that the markup represents.
751    """
752
753    if context is None:
754        context = create_context_dict()
755
756    if append_reset and not text.endswith("/]"):
757        text += "[/]"
758
759    tokens = list(tokenize_markup(text))
760
761    return parse_tokens(
762        tokens,
763        optimize=optimize,
764        context=context,
765        append_reset=append_reset,
766        ignore_unknown_tags=ignore_unknown_tags,
767    )
class ContextDict(typing.TypedDict):
56class ContextDict(TypedDict):
57    """A dictionary to hold context about a markup language's environment.
58
59    It has two sub-dicts:
60
61    - aliases
62    - macros
63
64    For information about what they do and contain, see the
65    [MarkupLanguage docs](pytermgui.markup.language.MarkupLanguage).
66    """
67
68    aliases: dict[str, str]
69    macros: dict[str, MacroType]

A dictionary to hold context about a markup language's environment.

It has two sub-dicts:

  • aliases
  • macros

For information about what they do and contain, see the pytermgui.markup.language.MarkupLanguage">MarkupLanguage docs.

Inherited Members
builtins.dict
get
setdefault
pop
popitem
keys
items
values
update
fromkeys
clear
copy
def create_context_dict() -> pytermgui.markup.parsing.ContextDict:
72def create_context_dict() -> ContextDict:
73    """Creates a new context dictionary, initializing its sub-dicts.
74
75    Returns:
76        A dictionary with `aliases` and `macros` defined as empty sub-dicts.
77    """
78
79    return {"aliases": {}, "macros": {}}

Creates a new context dictionary, initializing its sub-dicts.

Returns

A dictionary with aliases and macros defined as empty sub-dicts.

def consume_tag(tag: str) -> pytermgui.markup.tokens.Token:
 82def consume_tag(tag: str) -> Token:  # pylint: disable=too-many-return-statements
 83    """Consumes a tag text, returns the associated Token."""
 84
 85    if tag in STYLES:
 86        return StyleToken(tag)
 87
 88    if tag.startswith("/"):
 89        return ClearToken(tag)
 90
 91    if tag.startswith("!"):
 92        matchobj = RE_MACRO.match(tag)
 93
 94        if matchobj is not None:
 95            name, args = matchobj.groups()
 96
 97            if args is None:
 98                return MacroToken(name, tuple())
 99
100            return MacroToken(name, tuple(args.split(":")))
101
102    if tag.startswith("~"):
103        return HLinkToken(tag[1:])
104
105    if tag.startswith("(") and tag.endswith(")"):
106        values = tag[1:-1].split(";")
107        if len(values) != 2:
108            raise MarkupSyntaxError(
109                tag,
110                f"should have exactly 2 values separated by `;`, not {len(values)}",
111                "",
112            )
113
114        return CursorToken(tag[1:-1], *map(int, values))
115
116    token: Token
117    try:
118        token = ColorToken(tag, str_to_color(tag))
119
120    except ColorSyntaxError:
121        token = AliasToken(tag)
122
123    finally:
124        return token  # pylint: disable=lost-exception

Consumes a tag text, returns the associated Token.

def tokenize_markup(text: str) -> Iterator[pytermgui.markup.tokens.Token]:
127def tokenize_markup(text: str) -> Iterator[Token]:
128    """Converts some markup text into a stream of tokens.
129
130    Args:
131        text: Any valid markup.
132
133    Yields:
134        The generated tokens, in the order they occur within the markup.
135    """
136
137    cursor = 0
138    length = len(text)
139    has_inverse = False
140    for matchobj in RE_MARKUP.finditer(text):
141        full, escapes, content = matchobj.groups()
142        start, end = matchobj.span()
143
144        if cursor < start:
145            yield PlainToken(text[cursor:start])
146
147        if not escapes == "":
148            _, remaining = divmod(len(escapes), 2)
149
150            yield PlainToken(full[max(1 - remaining, 1) :])
151            cursor = end
152
153            continue
154
155        for tag in content.split():
156            if tag == "inverse":
157                has_inverse = True
158
159            if tag == "/inverse":
160                has_inverse = False
161
162            consumed = consume_tag(tag)
163            if has_inverse:
164                if consumed.markup == "/fg":
165                    consumed = ClearToken("/fg")
166
167                elif consumed.markup == "/bg":
168                    consumed = ClearToken("/bg")
169
170            yield consumed
171
172        cursor = end
173
174    if cursor < length:
175        yield PlainToken(text[cursor:length])

Converts some markup text into a stream of tokens.

Args
  • text: Any valid markup.
Yields

The generated tokens, in the order they occur within the markup.

def tokenize_ansi(text: str) -> Iterator[pytermgui.markup.tokens.Token]:
178def tokenize_ansi(  # pylint: disable=too-many-locals, too-many-branches, too-many-statements
179    text: str,
180) -> Iterator[Token]:
181    """Converts some ANSI-coded text into a stream of tokens.
182
183    Args:
184        text: Any valid ANSI-coded text.
185
186    Yields:
187        The generated tokens, in the order they occur within the text.
188    """
189
190    cursor = 0
191
192    for matchobj in RE_ANSI.finditer(text):
193        start, end = matchobj.span()
194
195        csi = matchobj.groups()[0:2]
196        link_osc = matchobj.groups()[2:4]
197
198        if link_osc != (None, None):
199            cursor = end
200            uri, label = link_osc
201
202            yield HLinkToken(uri)
203            yield PlainToken(label)
204
205            continue
206
207        full, content = csi
208
209        if cursor < start:
210            yield PlainToken(text[cursor:start])
211
212        cursor = end
213
214        code = ""
215
216        # Position
217        posmatch = RE_POSITION.match(full)
218
219        if posmatch is not None:
220            ypos, xpos = posmatch.groups()
221            if not ypos and not xpos:
222                raise ValueError(
223                    f"Cannot parse cursor when no position is supplied. Match: {posmatch!r}"
224                )
225
226            yield CursorToken(content, int(ypos) or None, int(xpos) or None)
227            continue
228
229        parts = content.split(";")
230
231        state = None
232        color_code = ""
233        for part in parts:
234            if state is None:
235                if part in REVERSE_STYLES:
236                    yield StyleToken(REVERSE_STYLES[part])
237                    continue
238
239                if part in REVERSE_CLEARERS:
240                    yield ClearToken(REVERSE_CLEARERS[part])
241                    continue
242
243                if part in ("38", "48"):
244                    state = "COLOR"
245                    color_code += part + ";"
246                    continue
247
248                # standard colors
249                try:
250                    yield ColorToken(part, str_to_color(part))
251                    continue
252
253                except ColorSyntaxError as exc:
254                    raise ValueError(f"Could not parse color tag {part!r}.") from exc
255
256            if state != "COLOR":
257                continue
258
259            color_code += part + ";"
260
261            # Ignore incomplete RGB colors
262            if (
263                color_code.startswith(("38;2;", "48;2;"))
264                and len(color_code.split(";")) != 6
265            ):
266                continue
267
268            try:
269                code = color_code
270
271                if code.startswith(("38;2;", "48;2;", "38;5;", "48;5;")):
272                    stripped = code[5:-1]
273
274                    if code.startswith("4"):
275                        stripped = "@" + stripped
276
277                    code = stripped
278
279                yield ColorToken(code, str_to_color(code))
280
281            except ColorSyntaxError:
282                continue
283
284            state = None
285            color_code = ""
286
287    remaining = text[cursor:]
288    if len(remaining) > 0:
289        yield PlainToken(remaining)

Converts some ANSI-coded text into a stream of tokens.

Args
  • text: Any valid ANSI-coded text.
Yields

The generated tokens, in the order they occur within the text.

def optimize_tokens( tokens: list[pytermgui.markup.tokens.Token]) -> Iterator[pytermgui.markup.tokens.Token]:
394def optimize_tokens(tokens: list[Token]) -> Iterator[Token]:
395    """Optimizes a stream of tokens, only yielding functionally relevant ones.
396
397    Args:
398        tokens: Any list of Token objects. Usually obtained from `tokenize_markup`
399            or `tokenize_ansi`.
400
401    Yields:
402        All those tokens within the input iterator that are functionally relevant,
403            keeping their order.
404    """
405
406    previous: list[Token] = []
407    current_tag_group: list[Token] = []
408
409    def _diff_previous() -> Iterator[Token]:
410        """Find difference from the previously active list of tokens."""
411
412        applied = previous.copy()
413
414        for tkn in current_tag_group:
415            targets = []
416
417            clearer = Token.is_clear(tkn)
418            if Token.is_clear(tkn):
419                targets = [tkn.targets(tag) for tag in applied]
420
421            if tkn in previous and not clearer:
422                continue
423
424            if clearer and not any(targets):
425                continue
426
427            applied.append(tkn)
428            yield tkn
429
430    def _remove_redundant_color(token: Token) -> None:
431        """Removes non-functional colors.
432
433        These happen in the following ways:
434        - Multiple colors of the same channel (fg/bg) are present.
435        - A color is applied, then a clearer clears it.
436        """
437
438        for applied in current_tag_group.copy():
439            if Token.is_clear(applied) and applied.targets(token):
440                current_tag_group.remove(applied)
441
442            if not Token.is_color(applied):
443                continue
444
445            old = applied.color
446
447            if old.background == new.background:
448                current_tag_group.remove(applied)
449
450    for token in tokens:
451        if Token.is_plain(token):
452            yield from _diff_previous()
453            yield token
454
455            previous = current_tag_group.copy()
456
457            continue
458
459        if Token.is_color(token):
460            new = token.color
461
462            _remove_redundant_color(token)
463
464            if not any(token.markup == applied.markup for applied in current_tag_group):
465                current_tag_group.append(token)
466
467            continue
468
469        if token.is_style():
470            if not any(token == tag for tag in current_tag_group):
471                current_tag_group.append(token)
472
473            continue
474
475        if Token.is_clear(token):
476            applied = False
477            for tag in current_tag_group.copy():
478                if token.targets(tag) or token == tag:
479                    current_tag_group.remove(tag)
480                    applied = True
481
482            if not applied:
483                continue
484
485        current_tag_group.append(token)
486
487    yield from _diff_previous()

Optimizes a stream of tokens, only yielding functionally relevant ones.

Args
Yields

All those tokens within the input iterator that are functionally relevant, keeping their order.

def optimize_markup(markup: str) -> str:
527def optimize_markup(markup: str) -> str:
528    """Optimizes markup by tokenizing it, optimizing the tokens and converting it back to markup."""
529
530    return tokens_to_markup(list(optimize_tokens(list(tokenize_markup(markup)))))

Optimizes markup by tokenizing it, optimizing the tokens and converting it back to markup.

def tokens_to_markup(tokens: list[pytermgui.markup.tokens.Token]) -> str:
490def tokens_to_markup(tokens: list[Token]) -> str:
491    """Converts a token stream into the markup of its tokens.
492
493    Args:
494        tokens: Any list of Token objects. Usually obtained from `tokenize_markup` or
495            `tokenize_ansi`.
496
497    Returns:
498        The markup the given tokens represent.
499    """
500
501    tags: list[Token] = []
502    markup = ""
503
504    for token in tokens:
505        if token.is_plain():
506            if len(tags) > 0:
507                markup += f"[{' '.join(tag.markup for tag in tags)}]"
508
509            markup += token.value
510            tags = []
511
512        else:
513            tags.append(token)
514
515    if len(tags) > 0:
516        markup += f"[{' '.join(tag.markup for tag in tags)}]"
517
518    return markup

Converts a token stream into the markup of its tokens.

Args
Returns

The markup the given tokens represent.

def get_markup(text: str) -> str:
521def get_markup(text: str) -> str:
522    """Gets the markup representing an ANSI-coded string."""
523
524    return tokens_to_markup(list(tokenize_ansi(text)))

Gets the markup representing an ANSI-coded string.

def parse( text: str, optimize: bool = False, context: pytermgui.markup.parsing.ContextDict | None = None, append_reset: bool = True, ignore_unknown_tags: bool = True) -> str:
730def parse(
731    text: str,
732    optimize: bool = False,
733    context: ContextDict | None = None,
734    append_reset: bool = True,
735    ignore_unknown_tags: bool = True,
736) -> str:
737    """Parses markup into the ANSI-coded string it represents.
738
739    Args:
740        text: Any valid markup.
741        optimize: If set, `optimize_tokens` will optimize the tokens found within the
742            input markup before usage. This will incur a (minor) performance hit.
743        context: The context that aliases and macros found within the markup will be
744            searched in.
745        append_reset: If set, `[/]` will be appended to the token iterator, clearing all
746            styles.
747        ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags
748            will be silenced.
749
750    Returns:
751        The ANSI-coded string that the markup represents.
752    """
753
754    if context is None:
755        context = create_context_dict()
756
757    if append_reset and not text.endswith("/]"):
758        text += "[/]"
759
760    tokens = list(tokenize_markup(text))
761
762    return parse_tokens(
763        tokens,
764        optimize=optimize,
765        context=context,
766        append_reset=append_reset,
767        ignore_unknown_tags=ignore_unknown_tags,
768    )

Parses markup into the ANSI-coded string it represents.

Args
  • text: Any valid markup.
  • optimize: If set, optimize_tokens will optimize the tokens found within the input markup before usage. This will incur a (minor) performance hit.
  • context: The context that aliases and macros found within the markup will be searched in.
  • append_reset: If set, [/] will be appended to the token iterator, clearing all styles.
  • ignore_unknown_tags: If set, the MarkupSyntaxError coming from unknown tags will be silenced.
Returns

The ANSI-coded string that the markup represents.

def parse_tokens( tokens: list[pytermgui.markup.tokens.Token], optimize: bool = False, context: pytermgui.markup.parsing.ContextDict | None = None, append_reset: bool = True, ignore_unknown_tags: bool = True) -> str:
618def parse_tokens(  # pylint: disable=too-many-branches, too-many-locals
619    tokens: list[Token],
620    optimize: bool = False,
621    context: ContextDict | None = None,
622    append_reset: bool = True,
623    ignore_unknown_tags: bool = True,
624) -> str:
625    """Parses a stream of tokens into the ANSI-coded string they represent.
626
627    Args:
628        tokens: Any list of Tokens, usually obtained from either `tokenize_ansi` or
629            `tokenize_markup`.
630        optimize: If set, `optimize_tokens` will optimize the input iterator before
631            usage. This will incur a (minor) performance hit.
632        context: The context that aliases and macros found within the tokens will be
633            searched in.
634        append_reset: If set, `ClearToken("/")` will be appended to the token iterator,
635            clearing all styles.
636        ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags
637            will be silenced.
638
639    Returns:
640        The ANSI-coded string that the token stream represents.
641    """
642
643    if context is None:
644        context = create_context_dict()
645
646    token_list = list(_sub_aliases(tokens, context))
647
648    # It's more computationally efficient to create this lambda once and reuse it
649    # every time. There is no need to define a full function, as it just returns
650    # a function return.
651    get_full = (
652        lambda: tokens_to_markup(  # pylint: disable=unnecessary-lambda-assignment
653            tokens
654        )
655    )
656
657    if optimize:
658        token_list = list(optimize_tokens(tokens))
659
660    if append_reset:
661        token_list.append(ClearToken("/"))
662
663    link = None
664    output = ""
665    segment = ""
666    macros: list[MacroToken] = []
667    unknown_aliases: list[Token] = []
668
669    for token in token_list:
670        if token.is_plain():
671            value = _apply_macros(
672                token.value, (parse_macro(macro, context, get_full) for macro in macros)
673            )
674
675            if len(unknown_aliases) > 0:
676                output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]"
677                unknown_aliases = []
678
679            output += segment + (
680                value if link is None else LINK_TEMPLATE.format(uri=link, label=value)
681            )
682
683            segment = ""
684            continue
685
686        if token.is_hyperlink():
687            link = token.value
688            continue
689
690        if Token.is_macro(token):
691            macros.append(token)
692            continue
693
694        if Token.is_clear(token):
695            if token.value in ("/", "/~"):
696                link = None
697
698            found = False
699            for macro in macros.copy():
700                if token.targets(macro):
701                    macros.remove(macro)
702                    found = True
703                    break
704
705            if found and token.value != "/":
706                continue
707
708            if token.value.startswith("/!"):
709                raise ValueError(
710                    f"Cannot use clearer {token.value!r} with nothing to target."
711                )
712
713        try:
714            segment += PARSERS[type(token)](token, context, get_full)  # type: ignore
715
716        except MarkupSyntaxError:
717            if not ignore_unknown_tags:
718                raise
719
720            unknown_aliases.append(token)
721
722    if len(unknown_aliases) > 0:
723        output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]"
724
725    output += segment
726
727    return output

Parses a stream of tokens into the ANSI-coded string they represent.

Args
  • tokens: Any list of Tokens, usually obtained from either tokenize_ansi or tokenize_markup.
  • optimize: If set, optimize_tokens will optimize the input iterator before usage. This will incur a (minor) performance hit.
  • context: The context that aliases and macros found within the tokens will be searched in.
  • append_reset: If set, ClearToken("/") will be appended to the token iterator, clearing all styles.
  • ignore_unknown_tags: If set, the MarkupSyntaxError coming from unknown tags will be silenced.
Returns

The ANSI-coded string that the token stream represents.