pytermgui.markup.parsing
The internals of the TIM engine.
1"""The internals of the TIM engine.""" 2 3from __future__ import annotations 4 5import json 6from typing import Callable, Iterator, Protocol, TypedDict 7from warnings import filterwarnings, warn 8 9from ..colors import str_to_color 10from ..exceptions import ColorSyntaxError, MarkupSyntaxError 11from ..regex import RE_ANSI_NEW as RE_ANSI 12from ..regex import RE_MACRO, RE_MARKUP, RE_POSITION 13from .style_maps import CLEARERS, REVERSE_CLEARERS, REVERSE_STYLES, STYLES 14from .tokens import ( 15 AliasToken, 16 ClearToken, 17 ColorToken, 18 CursorToken, 19 HLinkToken, 20 MacroToken, 21 PlainToken, 22 StyleToken, 23 Token, 24) 25 26# TODO: Improve first-run performance. 27 28filterwarnings("always") 29 30 31LINK_TEMPLATE = "\x1b]8;;{uri}\x1b\\{label}\x1b]8;;\x1b\\" 32 33__all__ = [ 34 "ContextDict", 35 "create_context_dict", 36 "consume_tag", 37 "tokenize_markup", 38 "tokenize_ansi", 39 "optimize_tokens", 40 "optimize_markup", 41 "tokens_to_markup", 42 "get_markup", 43 "parse", 44 "parse_tokens", 45] 46 47 48class MacroType(Protocol): # pylint: disable=too-few-public-methods 49 """A protocol for TIM macros.""" 50 51 def __call__(*args: str) -> str: # pylint: disable=no-method-argument 52 """Applies the macro.""" 53 54 55class ContextDict(TypedDict): 56 """A dictionary to hold context about a markup language's environment. 57 58 It has two sub-dicts: 59 60 - aliases 61 - macros 62 63 For information about what they do and contain, see the 64 [MarkupLanguage docs](pytermgui.markup.language.MarkupLanguage). 65 """ 66 67 aliases: dict[str, str] 68 macros: dict[str, MacroType] 69 70 71def create_context_dict() -> ContextDict: 72 """Creates a new context dictionary, initializing its sub-dicts. 73 74 Returns: 75 A dictionary with `aliases` and `macros` defined as empty sub-dicts. 76 """ 77 78 return {"aliases": {}, "macros": {}} 79 80 81def consume_tag(tag: str) -> Token: # pylint: disable=too-many-return-statements 82 """Consumes a tag text, returns the associated Token.""" 83 84 if tag in STYLES: 85 return StyleToken(tag) 86 87 if tag.startswith("/"): 88 return ClearToken(tag) 89 90 if tag.startswith("!"): 91 matchobj = RE_MACRO.match(tag) 92 93 if matchobj is not None: 94 name, args = matchobj.groups() 95 96 if args is None: 97 return MacroToken(name, tuple()) 98 99 return MacroToken(name, tuple(args.split(":"))) 100 101 if tag.startswith("~"): 102 return HLinkToken(tag[1:]) 103 104 if tag.startswith("(") and tag.endswith(")"): 105 values = tag[1:-1].split(";") 106 if len(values) != 2: 107 raise ValueError( 108 f"Cursor tags must have exactly 2 values delimited by `;`, got {tag!r}." 109 ) 110 111 return CursorToken(tag[1:-1], *map(int, values)) 112 113 token: Token 114 try: 115 token = ColorToken(tag, str_to_color(tag)) 116 117 except ColorSyntaxError: 118 token = AliasToken(tag) 119 120 finally: 121 return token # pylint: disable=lost-exception 122 123 124def tokenize_markup(text: str) -> Iterator[Token]: 125 """Converts some markup text into a stream of tokens. 126 127 Args: 128 text: Any valid markup. 129 130 Yields: 131 The generated tokens, in the order they occur within the markup. 132 """ 133 134 cursor = 0 135 length = len(text) 136 has_inverse = False 137 for matchobj in RE_MARKUP.finditer(text): 138 full, escapes, content = matchobj.groups() 139 start, end = matchobj.span() 140 141 if cursor < start: 142 yield PlainToken(text[cursor:start]) 143 144 if not escapes == "": 145 _, remaining = divmod(len(escapes), 2) 146 147 yield PlainToken(full[max(1 - remaining, 1) :]) 148 cursor = end 149 150 continue 151 152 for tag in content.split(): 153 if tag == "inverse": 154 has_inverse = True 155 156 if tag == "/inverse": 157 has_inverse = False 158 159 consumed = consume_tag(tag) 160 if has_inverse: 161 if consumed.markup == "/fg": 162 consumed = ClearToken("/fg") 163 164 elif consumed.markup == "/bg": 165 consumed = ClearToken("/bg") 166 167 yield consumed 168 169 cursor = end 170 171 if cursor < length: 172 yield PlainToken(text[cursor:length]) 173 174 175def tokenize_ansi( # pylint: disable=too-many-locals, too-many-branches, too-many-statements 176 text: str, 177) -> Iterator[Token]: 178 """Converts some ANSI-coded text into a stream of tokens. 179 180 Args: 181 text: Any valid ANSI-coded text. 182 183 Yields: 184 The generated tokens, in the order they occur within the text. 185 """ 186 187 cursor = 0 188 189 for matchobj in RE_ANSI.finditer(text): 190 start, end = matchobj.span() 191 192 csi = matchobj.groups()[0:2] 193 link_osc = matchobj.groups()[2:4] 194 195 if link_osc != (None, None): 196 cursor = end 197 uri, label = link_osc 198 199 yield HLinkToken(uri) 200 yield PlainToken(label) 201 202 continue 203 204 full, content = csi 205 206 if cursor < start: 207 yield PlainToken(text[cursor:start]) 208 209 cursor = end 210 211 code = "" 212 213 # Position 214 posmatch = RE_POSITION.match(full) 215 216 if posmatch is not None: 217 ypos, xpos = posmatch.groups() 218 if not ypos and not xpos: 219 raise ValueError( 220 f"Cannot parse cursor when no position is supplied. Match: {posmatch!r}" 221 ) 222 223 yield CursorToken(content, int(ypos) or None, int(xpos) or None) 224 continue 225 226 parts = content.split(";") 227 228 state = None 229 color_code = "" 230 for part in parts: 231 if state is None: 232 if part in REVERSE_STYLES: 233 yield StyleToken(REVERSE_STYLES[part]) 234 continue 235 236 if part in REVERSE_CLEARERS: 237 yield ClearToken(REVERSE_CLEARERS[part]) 238 continue 239 240 if part in ("38", "48"): 241 state = "COLOR" 242 color_code += part + ";" 243 continue 244 245 # standard colors 246 try: 247 yield ColorToken(part, str_to_color(part)) 248 continue 249 250 except ColorSyntaxError as exc: 251 raise ValueError(f"Could not parse color tag {part!r}.") from exc 252 253 if state != "COLOR": 254 continue 255 256 color_code += part + ";" 257 258 # Ignore incomplete RGB colors 259 if ( 260 color_code.startswith(("38;2;", "48;2;")) 261 and len(color_code.split(";")) != 6 262 ): 263 continue 264 265 try: 266 code = color_code 267 268 if code.startswith(("38;2;", "48;2;", "38;5;", "48;5;")): 269 stripped = code[5:-1] 270 271 if code.startswith("4"): 272 stripped = "@" + stripped 273 274 code = stripped 275 276 yield ColorToken(code, str_to_color(code)) 277 278 except ColorSyntaxError: 279 continue 280 281 state = None 282 color_code = "" 283 284 remaining = text[cursor:] 285 if len(remaining) > 0: 286 yield PlainToken(remaining) 287 288 289def eval_alias(text: str, context: ContextDict) -> str: 290 """Evaluates a space-delimited string of alias tags into their underlying value. 291 292 Args: 293 text: A space-separated string containing the aliases. 294 295 Returns: 296 The space-separated string that the input aliases represent. 297 """ 298 299 aliases = context["aliases"] 300 301 evaluated = "" 302 for tag in text.split(): 303 if tag not in aliases: 304 evaluated += tag + " " 305 continue 306 307 evaluated += eval_alias(aliases[tag], context) 308 309 return evaluated.rstrip(" ") 310 311 312def parse_plain(token: PlainToken, _: ContextDict, __: Callable[[], str]) -> str: 313 """Parses a plain token.""" 314 315 return token.value 316 317 318def parse_color(token: ColorToken, _: ContextDict, __: Callable[[], str]) -> str: 319 """Parses a color token.""" 320 321 return token.color.sequence 322 323 324def parse_style(token: StyleToken, _: ContextDict, __: Callable[[], str]) -> str: 325 """Parses a style token.""" 326 327 index = STYLES[token.value] 328 329 return f"\x1b[{index}m" 330 331 332def parse_macro( 333 token: MacroToken, context: ContextDict, get_full: Callable[[], str] 334) -> tuple[MacroType, tuple[str, ...]]: 335 """Parses a macro token. 336 337 Returns: 338 A tuple containing the callable bound to the name, as well as the arguments 339 passed to it. 340 """ 341 342 func = context["macros"].get(token.value) 343 344 if func is None: 345 dump = json.dumps(context["macros"], indent=2, default=str) 346 347 raise MarkupSyntaxError( 348 token.value, f"not defined in macro context: {dump}", get_full() 349 ) 350 351 return func, token.arguments 352 353 354def parse_alias( 355 token: AliasToken, context: ContextDict, get_full: Callable[[], str] 356) -> str: 357 """Parses an alias token.""" 358 359 if token.value not in context["aliases"]: 360 dump = json.dumps(context["aliases"], indent=2, default=str) 361 362 raise MarkupSyntaxError( 363 token.value, f"not defined in alias context: {dump}", get_full() 364 ) 365 366 meaning = context["aliases"][token.value] 367 368 return eval_alias(meaning, context).rstrip(" ") 369 370 371def parse_clear(token: ClearToken, _: ContextDict, get_full: Callable[[], str]) -> str: 372 """Parses a clearer token.""" 373 374 index = CLEARERS.get(token.value) 375 if index is None: 376 raise MarkupSyntaxError( 377 token.value, "not a recognized clearer or alias", get_full() 378 ) 379 380 return f"\x1b[{index}m" 381 382 383def parse_cursor(token: CursorToken, _: ContextDict, __: Callable[[], str]) -> str: 384 """Parses a cursor token.""" 385 386 ypos, xpos = map(lambda i: "" if i is None else i, token) 387 388 return f"\x1b[{ypos};{xpos}H" 389 390 391def optimize_tokens(tokens: list[Token]) -> Iterator[Token]: 392 """Optimizes a stream of tokens, only yielding functionally relevant ones. 393 394 Args: 395 tokens: Any list of Token objects. Usually obtained from `tokenize_markup` 396 or `tokenize_ansi`. 397 398 Yields: 399 All those tokens within the input iterator that are functionally relevant, 400 keeping their order. 401 """ 402 403 previous: list[Token] = [] 404 current_tag_group: list[Token] = [] 405 406 def _diff_previous() -> Iterator[Token]: 407 """Find difference from the previously active list of tokens.""" 408 409 applied = previous.copy() 410 411 for tkn in current_tag_group: 412 targets = [] 413 414 clearer = Token.is_clear(tkn) 415 if Token.is_clear(tkn): 416 targets = [tkn.targets(tag) for tag in applied] 417 418 if tkn in previous and not clearer: 419 continue 420 421 if clearer and not any(targets): 422 continue 423 424 applied.append(tkn) 425 yield tkn 426 427 def _remove_redundant_color(token: Token) -> None: 428 """Removes non-functional colors. 429 430 These happen in the following ways: 431 - Multiple colors of the same channel (fg/bg) are present. 432 - A color is applied, then a clearer clears it. 433 """ 434 435 for applied in current_tag_group.copy(): 436 if Token.is_clear(applied) and applied.targets(token): 437 current_tag_group.remove(applied) 438 439 if not Token.is_color(applied): 440 continue 441 442 old = applied.color 443 444 if old.background == new.background: 445 current_tag_group.remove(applied) 446 447 for token in tokens: 448 if Token.is_plain(token): 449 yield from _diff_previous() 450 yield token 451 452 previous = current_tag_group.copy() 453 454 continue 455 456 if Token.is_color(token): 457 new = token.color 458 459 _remove_redundant_color(token) 460 461 if not any(token.markup == applied.markup for applied in current_tag_group): 462 current_tag_group.append(token) 463 464 continue 465 466 if token.is_style(): 467 if not any(token == tag for tag in current_tag_group): 468 current_tag_group.append(token) 469 470 continue 471 472 if Token.is_clear(token): 473 applied = False 474 for tag in current_tag_group.copy(): 475 if token.targets(tag) or token == tag: 476 current_tag_group.remove(tag) 477 applied = True 478 479 if not applied: 480 continue 481 482 current_tag_group.append(token) 483 484 yield from _diff_previous() 485 486 487def tokens_to_markup(tokens: list[Token]) -> str: 488 """Converts a token stream into the markup of its tokens. 489 490 Args: 491 tokens: Any list of Token objects. Usually obtained from `tokenize_markup` or 492 `tokenize_ansi`. 493 494 Returns: 495 The markup the given tokens represent. 496 """ 497 498 tags: list[Token] = [] 499 markup = "" 500 501 for token in tokens: 502 if token.is_plain(): 503 if len(tags) > 0: 504 markup += f"[{' '.join(tag.markup for tag in tags)}]" 505 506 markup += token.value 507 tags = [] 508 509 else: 510 tags.append(token) 511 512 if len(tags) > 0: 513 markup += f"[{' '.join(tag.markup for tag in tags)}]" 514 515 return markup 516 517 518def get_markup(text: str) -> str: 519 """Gets the markup representing an ANSI-coded string.""" 520 521 return tokens_to_markup(list(tokenize_ansi(text))) 522 523 524def optimize_markup(markup: str) -> str: 525 """Optimizes markup by tokenizing it, optimizing the tokens and converting it back to markup.""" 526 527 return tokens_to_markup(list(optimize_tokens(list(tokenize_markup(markup))))) 528 529 530PARSERS = { 531 PlainToken: parse_plain, 532 ColorToken: parse_color, 533 StyleToken: parse_style, 534 MacroToken: parse_macro, 535 AliasToken: parse_alias, 536 ClearToken: parse_clear, 537 CursorToken: parse_cursor, 538} 539 540 541def _apply_macros( 542 text: str, macros: Iterator[tuple[MacroType, tuple[str, ...]]] 543) -> str: 544 """Applies macros to the given text. 545 546 Args: 547 text: The plain text the macros will apply to. 548 macros: Any iterator of MacroTokens that will be applied. 549 550 Returns: 551 The input plain text, with all macros applied to it. The macros will be applied 552 in the order they appear in. 553 """ 554 555 for method, args in macros: 556 if len(args) > 0: 557 text = method(*args, text) 558 continue 559 560 text = method(text) 561 562 return text 563 564 565def _sub_aliases(tokens: list[Token], context: ContextDict) -> list[Token]: 566 """Substitutes all AliasTokens to their underlying values. 567 568 Args: 569 tokens: Any list of Tokens. When this iterator contains nothing 570 that can be interpreted as an alias, the same iterator turned into 571 a list will be returned. 572 context: The context that aliases will be searched in. 573 """ 574 575 output: list[Token] = [] 576 577 # It's more computationally efficient to create this lambda once and reuse it 578 # every time. There is no need to define a full function, as it just returns 579 # a function return. 580 get_full = ( 581 lambda: tokens_to_markup( # pylint: disable=unnecessary-lambda-assignment 582 tokens 583 ) 584 ) 585 586 for token in tokens: 587 if token.value in context["aliases"] and ( 588 Token.is_clear(token) or Token.is_macro(token) or Token.is_alias(token) 589 ): 590 if Token.is_clear(token) or Token.is_macro(token): 591 token = AliasToken(token.value) 592 593 if Token.is_alias(token): 594 aliases_parsed = parse_alias(token, context, get_full) 595 output.extend(list(tokenize_markup(f"[{aliases_parsed}]"))) 596 597 continue 598 599 if Token.is_macro(token) and token.value == "!link": 600 warn( 601 "Hyperlinks are no longer implemented as macros." 602 + " Prefer using the `~{uri}` syntax.", 603 DeprecationWarning, 604 stacklevel=4, 605 ) 606 607 output.append(HLinkToken(":".join(token.arguments))) 608 continue 609 610 output.append(token) 611 612 return output 613 614 615def parse_tokens( # pylint: disable=too-many-branches, too-many-locals 616 tokens: list[Token], 617 optimize: bool = False, 618 context: ContextDict | None = None, 619 append_reset: bool = True, 620 ignore_unknown_tags: bool = True, 621) -> str: 622 """Parses a stream of tokens into the ANSI-coded string they represent. 623 624 Args: 625 tokens: Any list of Tokens, usually obtained from either `tokenize_ansi` or 626 `tokenize_markup`. 627 optimize: If set, `optimize_tokens` will optimize the input iterator before 628 usage. This will incur a (minor) performance hit. 629 context: The context that aliases and macros found within the tokens will be 630 searched in. 631 append_reset: If set, `ClearToken("/")` will be appended to the token iterator, 632 clearing all styles. 633 ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags 634 will be silenced. 635 636 Returns: 637 The ANSI-coded string that the token stream represents. 638 """ 639 640 if context is None: 641 context = create_context_dict() 642 643 token_list = list(_sub_aliases(tokens, context)) 644 645 # It's more computationally efficient to create this lambda once and reuse it 646 # every time. There is no need to define a full function, as it just returns 647 # a function return. 648 get_full = ( 649 lambda: tokens_to_markup( # pylint: disable=unnecessary-lambda-assignment 650 tokens 651 ) 652 ) 653 654 if optimize: 655 token_list = list(optimize_tokens(tokens)) 656 657 if append_reset: 658 token_list.append(ClearToken("/")) 659 660 link = None 661 output = "" 662 segment = "" 663 macros: list[MacroToken] = [] 664 unknown_aliases: list[Token] = [] 665 666 for token in token_list: 667 if token.is_plain(): 668 value = _apply_macros( 669 token.value, (parse_macro(macro, context, get_full) for macro in macros) 670 ) 671 672 if len(unknown_aliases) > 0: 673 output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]" 674 unknown_aliases = [] 675 676 output += segment + ( 677 value if link is None else LINK_TEMPLATE.format(uri=link, label=value) 678 ) 679 680 segment = "" 681 continue 682 683 if token.is_hyperlink(): 684 link = token.value 685 continue 686 687 if Token.is_macro(token): 688 macros.append(token) 689 continue 690 691 if Token.is_clear(token): 692 if token.value in ("/", "/~"): 693 link = None 694 695 found = False 696 for macro in macros.copy(): 697 if token.targets(macro): 698 macros.remove(macro) 699 found = True 700 break 701 702 if found and token.value != "/": 703 continue 704 705 if token.value.startswith("/!"): 706 raise ValueError( 707 f"Cannot use clearer {token.value!r} with nothing to target." 708 ) 709 710 try: 711 segment += PARSERS[type(token)](token, context, get_full) # type: ignore 712 713 except MarkupSyntaxError: 714 if not ignore_unknown_tags: 715 raise 716 717 unknown_aliases.append(token) 718 719 if len(unknown_aliases) > 0: 720 output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]" 721 722 output += segment 723 724 return output 725 726 727def parse( 728 text: str, 729 optimize: bool = False, 730 context: ContextDict | None = None, 731 append_reset: bool = True, 732 ignore_unknown_tags: bool = True, 733) -> str: 734 """Parses markup into the ANSI-coded string it represents. 735 736 Args: 737 text: Any valid markup. 738 optimize: If set, `optimize_tokens` will optimize the tokens found within the 739 input markup before usage. This will incur a (minor) performance hit. 740 context: The context that aliases and macros found within the markup will be 741 searched in. 742 append_reset: If set, `[/]` will be appended to the token iterator, clearing all 743 styles. 744 ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags 745 will be silenced. 746 747 Returns: 748 The ANSI-coded string that the markup represents. 749 """ 750 751 if context is None: 752 context = create_context_dict() 753 754 if append_reset and not text.endswith("/]"): 755 text += "[/]" 756 757 tokens = list(tokenize_markup(text)) 758 759 return parse_tokens( 760 tokens, 761 optimize=optimize, 762 context=context, 763 append_reset=append_reset, 764 ignore_unknown_tags=ignore_unknown_tags, 765 )
56class ContextDict(TypedDict): 57 """A dictionary to hold context about a markup language's environment. 58 59 It has two sub-dicts: 60 61 - aliases 62 - macros 63 64 For information about what they do and contain, see the 65 [MarkupLanguage docs](pytermgui.markup.language.MarkupLanguage). 66 """ 67 68 aliases: dict[str, str] 69 macros: dict[str, MacroType]
A dictionary to hold context about a markup language's environment.
It has two sub-dicts:
- aliases
- macros
For information about what they do and contain, see the pytermgui.markup.language.MarkupLanguage">MarkupLanguage docs.
Inherited Members
- builtins.dict
- get
- setdefault
- pop
- popitem
- keys
- items
- values
- update
- fromkeys
- clear
- copy
72def create_context_dict() -> ContextDict: 73 """Creates a new context dictionary, initializing its sub-dicts. 74 75 Returns: 76 A dictionary with `aliases` and `macros` defined as empty sub-dicts. 77 """ 78 79 return {"aliases": {}, "macros": {}}
Creates a new context dictionary, initializing its sub-dicts.
Returns
A dictionary with
aliases
andmacros
defined as empty sub-dicts.
82def consume_tag(tag: str) -> Token: # pylint: disable=too-many-return-statements 83 """Consumes a tag text, returns the associated Token.""" 84 85 if tag in STYLES: 86 return StyleToken(tag) 87 88 if tag.startswith("/"): 89 return ClearToken(tag) 90 91 if tag.startswith("!"): 92 matchobj = RE_MACRO.match(tag) 93 94 if matchobj is not None: 95 name, args = matchobj.groups() 96 97 if args is None: 98 return MacroToken(name, tuple()) 99 100 return MacroToken(name, tuple(args.split(":"))) 101 102 if tag.startswith("~"): 103 return HLinkToken(tag[1:]) 104 105 if tag.startswith("(") and tag.endswith(")"): 106 values = tag[1:-1].split(";") 107 if len(values) != 2: 108 raise ValueError( 109 f"Cursor tags must have exactly 2 values delimited by `;`, got {tag!r}." 110 ) 111 112 return CursorToken(tag[1:-1], *map(int, values)) 113 114 token: Token 115 try: 116 token = ColorToken(tag, str_to_color(tag)) 117 118 except ColorSyntaxError: 119 token = AliasToken(tag) 120 121 finally: 122 return token # pylint: disable=lost-exception
Consumes a tag text, returns the associated Token.
125def tokenize_markup(text: str) -> Iterator[Token]: 126 """Converts some markup text into a stream of tokens. 127 128 Args: 129 text: Any valid markup. 130 131 Yields: 132 The generated tokens, in the order they occur within the markup. 133 """ 134 135 cursor = 0 136 length = len(text) 137 has_inverse = False 138 for matchobj in RE_MARKUP.finditer(text): 139 full, escapes, content = matchobj.groups() 140 start, end = matchobj.span() 141 142 if cursor < start: 143 yield PlainToken(text[cursor:start]) 144 145 if not escapes == "": 146 _, remaining = divmod(len(escapes), 2) 147 148 yield PlainToken(full[max(1 - remaining, 1) :]) 149 cursor = end 150 151 continue 152 153 for tag in content.split(): 154 if tag == "inverse": 155 has_inverse = True 156 157 if tag == "/inverse": 158 has_inverse = False 159 160 consumed = consume_tag(tag) 161 if has_inverse: 162 if consumed.markup == "/fg": 163 consumed = ClearToken("/fg") 164 165 elif consumed.markup == "/bg": 166 consumed = ClearToken("/bg") 167 168 yield consumed 169 170 cursor = end 171 172 if cursor < length: 173 yield PlainToken(text[cursor:length])
Converts some markup text into a stream of tokens.
Args
- text: Any valid markup.
Yields
The generated tokens, in the order they occur within the markup.
176def tokenize_ansi( # pylint: disable=too-many-locals, too-many-branches, too-many-statements 177 text: str, 178) -> Iterator[Token]: 179 """Converts some ANSI-coded text into a stream of tokens. 180 181 Args: 182 text: Any valid ANSI-coded text. 183 184 Yields: 185 The generated tokens, in the order they occur within the text. 186 """ 187 188 cursor = 0 189 190 for matchobj in RE_ANSI.finditer(text): 191 start, end = matchobj.span() 192 193 csi = matchobj.groups()[0:2] 194 link_osc = matchobj.groups()[2:4] 195 196 if link_osc != (None, None): 197 cursor = end 198 uri, label = link_osc 199 200 yield HLinkToken(uri) 201 yield PlainToken(label) 202 203 continue 204 205 full, content = csi 206 207 if cursor < start: 208 yield PlainToken(text[cursor:start]) 209 210 cursor = end 211 212 code = "" 213 214 # Position 215 posmatch = RE_POSITION.match(full) 216 217 if posmatch is not None: 218 ypos, xpos = posmatch.groups() 219 if not ypos and not xpos: 220 raise ValueError( 221 f"Cannot parse cursor when no position is supplied. Match: {posmatch!r}" 222 ) 223 224 yield CursorToken(content, int(ypos) or None, int(xpos) or None) 225 continue 226 227 parts = content.split(";") 228 229 state = None 230 color_code = "" 231 for part in parts: 232 if state is None: 233 if part in REVERSE_STYLES: 234 yield StyleToken(REVERSE_STYLES[part]) 235 continue 236 237 if part in REVERSE_CLEARERS: 238 yield ClearToken(REVERSE_CLEARERS[part]) 239 continue 240 241 if part in ("38", "48"): 242 state = "COLOR" 243 color_code += part + ";" 244 continue 245 246 # standard colors 247 try: 248 yield ColorToken(part, str_to_color(part)) 249 continue 250 251 except ColorSyntaxError as exc: 252 raise ValueError(f"Could not parse color tag {part!r}.") from exc 253 254 if state != "COLOR": 255 continue 256 257 color_code += part + ";" 258 259 # Ignore incomplete RGB colors 260 if ( 261 color_code.startswith(("38;2;", "48;2;")) 262 and len(color_code.split(";")) != 6 263 ): 264 continue 265 266 try: 267 code = color_code 268 269 if code.startswith(("38;2;", "48;2;", "38;5;", "48;5;")): 270 stripped = code[5:-1] 271 272 if code.startswith("4"): 273 stripped = "@" + stripped 274 275 code = stripped 276 277 yield ColorToken(code, str_to_color(code)) 278 279 except ColorSyntaxError: 280 continue 281 282 state = None 283 color_code = "" 284 285 remaining = text[cursor:] 286 if len(remaining) > 0: 287 yield PlainToken(remaining)
Converts some ANSI-coded text into a stream of tokens.
Args
- text: Any valid ANSI-coded text.
Yields
The generated tokens, in the order they occur within the text.
392def optimize_tokens(tokens: list[Token]) -> Iterator[Token]: 393 """Optimizes a stream of tokens, only yielding functionally relevant ones. 394 395 Args: 396 tokens: Any list of Token objects. Usually obtained from `tokenize_markup` 397 or `tokenize_ansi`. 398 399 Yields: 400 All those tokens within the input iterator that are functionally relevant, 401 keeping their order. 402 """ 403 404 previous: list[Token] = [] 405 current_tag_group: list[Token] = [] 406 407 def _diff_previous() -> Iterator[Token]: 408 """Find difference from the previously active list of tokens.""" 409 410 applied = previous.copy() 411 412 for tkn in current_tag_group: 413 targets = [] 414 415 clearer = Token.is_clear(tkn) 416 if Token.is_clear(tkn): 417 targets = [tkn.targets(tag) for tag in applied] 418 419 if tkn in previous and not clearer: 420 continue 421 422 if clearer and not any(targets): 423 continue 424 425 applied.append(tkn) 426 yield tkn 427 428 def _remove_redundant_color(token: Token) -> None: 429 """Removes non-functional colors. 430 431 These happen in the following ways: 432 - Multiple colors of the same channel (fg/bg) are present. 433 - A color is applied, then a clearer clears it. 434 """ 435 436 for applied in current_tag_group.copy(): 437 if Token.is_clear(applied) and applied.targets(token): 438 current_tag_group.remove(applied) 439 440 if not Token.is_color(applied): 441 continue 442 443 old = applied.color 444 445 if old.background == new.background: 446 current_tag_group.remove(applied) 447 448 for token in tokens: 449 if Token.is_plain(token): 450 yield from _diff_previous() 451 yield token 452 453 previous = current_tag_group.copy() 454 455 continue 456 457 if Token.is_color(token): 458 new = token.color 459 460 _remove_redundant_color(token) 461 462 if not any(token.markup == applied.markup for applied in current_tag_group): 463 current_tag_group.append(token) 464 465 continue 466 467 if token.is_style(): 468 if not any(token == tag for tag in current_tag_group): 469 current_tag_group.append(token) 470 471 continue 472 473 if Token.is_clear(token): 474 applied = False 475 for tag in current_tag_group.copy(): 476 if token.targets(tag) or token == tag: 477 current_tag_group.remove(tag) 478 applied = True 479 480 if not applied: 481 continue 482 483 current_tag_group.append(token) 484 485 yield from _diff_previous()
Optimizes a stream of tokens, only yielding functionally relevant ones.
Args
- tokens: Any list of Token objects. Usually obtained from
tokenize_markup
ortokenize_ansi
.
Yields
All those tokens within the input iterator that are functionally relevant, keeping their order.
525def optimize_markup(markup: str) -> str: 526 """Optimizes markup by tokenizing it, optimizing the tokens and converting it back to markup.""" 527 528 return tokens_to_markup(list(optimize_tokens(list(tokenize_markup(markup)))))
Optimizes markup by tokenizing it, optimizing the tokens and converting it back to markup.
488def tokens_to_markup(tokens: list[Token]) -> str: 489 """Converts a token stream into the markup of its tokens. 490 491 Args: 492 tokens: Any list of Token objects. Usually obtained from `tokenize_markup` or 493 `tokenize_ansi`. 494 495 Returns: 496 The markup the given tokens represent. 497 """ 498 499 tags: list[Token] = [] 500 markup = "" 501 502 for token in tokens: 503 if token.is_plain(): 504 if len(tags) > 0: 505 markup += f"[{' '.join(tag.markup for tag in tags)}]" 506 507 markup += token.value 508 tags = [] 509 510 else: 511 tags.append(token) 512 513 if len(tags) > 0: 514 markup += f"[{' '.join(tag.markup for tag in tags)}]" 515 516 return markup
Converts a token stream into the markup of its tokens.
Args
- tokens: Any list of Token objects. Usually obtained from
tokenize_markup
ortokenize_ansi
.
Returns
The markup the given tokens represent.
519def get_markup(text: str) -> str: 520 """Gets the markup representing an ANSI-coded string.""" 521 522 return tokens_to_markup(list(tokenize_ansi(text)))
Gets the markup representing an ANSI-coded string.
728def parse( 729 text: str, 730 optimize: bool = False, 731 context: ContextDict | None = None, 732 append_reset: bool = True, 733 ignore_unknown_tags: bool = True, 734) -> str: 735 """Parses markup into the ANSI-coded string it represents. 736 737 Args: 738 text: Any valid markup. 739 optimize: If set, `optimize_tokens` will optimize the tokens found within the 740 input markup before usage. This will incur a (minor) performance hit. 741 context: The context that aliases and macros found within the markup will be 742 searched in. 743 append_reset: If set, `[/]` will be appended to the token iterator, clearing all 744 styles. 745 ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags 746 will be silenced. 747 748 Returns: 749 The ANSI-coded string that the markup represents. 750 """ 751 752 if context is None: 753 context = create_context_dict() 754 755 if append_reset and not text.endswith("/]"): 756 text += "[/]" 757 758 tokens = list(tokenize_markup(text)) 759 760 return parse_tokens( 761 tokens, 762 optimize=optimize, 763 context=context, 764 append_reset=append_reset, 765 ignore_unknown_tags=ignore_unknown_tags, 766 )
Parses markup into the ANSI-coded string it represents.
Args
- text: Any valid markup.
- optimize: If set,
optimize_tokens
will optimize the tokens found within the input markup before usage. This will incur a (minor) performance hit. - context: The context that aliases and macros found within the markup will be searched in.
- append_reset: If set,
[/]
will be appended to the token iterator, clearing all styles. - ignore_unknown_tags: If set, the
MarkupSyntaxError
coming from unknown tags will be silenced.
Returns
The ANSI-coded string that the markup represents.
616def parse_tokens( # pylint: disable=too-many-branches, too-many-locals 617 tokens: list[Token], 618 optimize: bool = False, 619 context: ContextDict | None = None, 620 append_reset: bool = True, 621 ignore_unknown_tags: bool = True, 622) -> str: 623 """Parses a stream of tokens into the ANSI-coded string they represent. 624 625 Args: 626 tokens: Any list of Tokens, usually obtained from either `tokenize_ansi` or 627 `tokenize_markup`. 628 optimize: If set, `optimize_tokens` will optimize the input iterator before 629 usage. This will incur a (minor) performance hit. 630 context: The context that aliases and macros found within the tokens will be 631 searched in. 632 append_reset: If set, `ClearToken("/")` will be appended to the token iterator, 633 clearing all styles. 634 ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags 635 will be silenced. 636 637 Returns: 638 The ANSI-coded string that the token stream represents. 639 """ 640 641 if context is None: 642 context = create_context_dict() 643 644 token_list = list(_sub_aliases(tokens, context)) 645 646 # It's more computationally efficient to create this lambda once and reuse it 647 # every time. There is no need to define a full function, as it just returns 648 # a function return. 649 get_full = ( 650 lambda: tokens_to_markup( # pylint: disable=unnecessary-lambda-assignment 651 tokens 652 ) 653 ) 654 655 if optimize: 656 token_list = list(optimize_tokens(tokens)) 657 658 if append_reset: 659 token_list.append(ClearToken("/")) 660 661 link = None 662 output = "" 663 segment = "" 664 macros: list[MacroToken] = [] 665 unknown_aliases: list[Token] = [] 666 667 for token in token_list: 668 if token.is_plain(): 669 value = _apply_macros( 670 token.value, (parse_macro(macro, context, get_full) for macro in macros) 671 ) 672 673 if len(unknown_aliases) > 0: 674 output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]" 675 unknown_aliases = [] 676 677 output += segment + ( 678 value if link is None else LINK_TEMPLATE.format(uri=link, label=value) 679 ) 680 681 segment = "" 682 continue 683 684 if token.is_hyperlink(): 685 link = token.value 686 continue 687 688 if Token.is_macro(token): 689 macros.append(token) 690 continue 691 692 if Token.is_clear(token): 693 if token.value in ("/", "/~"): 694 link = None 695 696 found = False 697 for macro in macros.copy(): 698 if token.targets(macro): 699 macros.remove(macro) 700 found = True 701 break 702 703 if found and token.value != "/": 704 continue 705 706 if token.value.startswith("/!"): 707 raise ValueError( 708 f"Cannot use clearer {token.value!r} with nothing to target." 709 ) 710 711 try: 712 segment += PARSERS[type(token)](token, context, get_full) # type: ignore 713 714 except MarkupSyntaxError: 715 if not ignore_unknown_tags: 716 raise 717 718 unknown_aliases.append(token) 719 720 if len(unknown_aliases) > 0: 721 output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]" 722 723 output += segment 724 725 return output
Parses a stream of tokens into the ANSI-coded string they represent.
Args
- tokens: Any list of Tokens, usually obtained from either
tokenize_ansi
ortokenize_markup
. - optimize: If set,
optimize_tokens
will optimize the input iterator before usage. This will incur a (minor) performance hit. - context: The context that aliases and macros found within the tokens will be searched in.
- append_reset: If set,
ClearToken("/")
will be appended to the token iterator, clearing all styles. - ignore_unknown_tags: If set, the
MarkupSyntaxError
coming from unknown tags will be silenced.
Returns
The ANSI-coded string that the token stream represents.