pytermgui.markup.parsing
The internals of the TIM engine.
1"""The internals of the TIM engine.""" 2 3from __future__ import annotations 4 5import json 6from typing import Callable, Iterator, Protocol, TypedDict 7from warnings import filterwarnings, warn 8 9from ..colors import str_to_color 10from ..exceptions import ColorSyntaxError, MarkupSyntaxError 11from ..regex import RE_ANSI_NEW as RE_ANSI 12from ..regex import RE_MACRO, RE_MARKUP, RE_POSITION 13from .style_maps import CLEARERS, REVERSE_CLEARERS, REVERSE_STYLES, STYLES 14from .tokens import ( 15 AliasToken, 16 ClearToken, 17 ColorToken, 18 CursorToken, 19 HLinkToken, 20 MacroToken, 21 PlainToken, 22 StyleToken, 23 Token, 24) 25 26# TODO: Improve first-run performance. 27 28filterwarnings("always") 29 30 31LINK_TEMPLATE = "\x1b]8;;{uri}\x1b\\{label}\x1b]8;;\x1b\\" 32 33__all__ = [ 34 "ContextDict", 35 "create_context_dict", 36 "consume_tag", 37 "tokenize_markup", 38 "tokenize_ansi", 39 "optimize_tokens", 40 "optimize_markup", 41 "tokens_to_markup", 42 "get_markup", 43 "parse", 44 "parse_tokens", 45] 46 47 48class MacroType(Protocol): # pylint: disable=too-few-public-methods 49 """A protocol for TIM macros.""" 50 51 def __call__(*args: str) -> str: # pylint: disable=no-method-argument 52 """Applies the macro.""" 53 54 55class ContextDict(TypedDict): 56 """A dictionary to hold context about a markup language's environment. 57 58 It has two sub-dicts: 59 60 - aliases 61 - macros 62 63 For information about what they do and contain, see the 64 [MarkupLanguage docs](pytermgui.markup.language.MarkupLanguage). 65 """ 66 67 aliases: dict[str, str] 68 macros: dict[str, MacroType] 69 70 71def create_context_dict() -> ContextDict: 72 """Creates a new context dictionary, initializing its sub-dicts. 73 74 Returns: 75 A dictionary with `aliases` and `macros` defined as empty sub-dicts. 76 """ 77 78 return {"aliases": {}, "macros": {}} 79 80 81def consume_tag(tag: str) -> Token: # pylint: disable=too-many-return-statements 82 """Consumes a tag text, returns the associated Token.""" 83 84 if tag in STYLES: 85 return StyleToken(tag) 86 87 if tag.startswith("/"): 88 return ClearToken(tag) 89 90 if tag.startswith("!"): 91 matchobj = RE_MACRO.match(tag) 92 93 if matchobj is not None: 94 name, args = matchobj.groups() 95 96 if args is None: 97 return MacroToken(name, tuple()) 98 99 return MacroToken(name, tuple(args.split(":"))) 100 101 if tag.startswith("~"): 102 return HLinkToken(tag[1:]) 103 104 if tag.startswith("(") and tag.endswith(")"): 105 values = tag[1:-1].split(";") 106 if len(values) != 2: 107 raise MarkupSyntaxError( 108 tag, 109 f"should have exactly 2 values separated by `;`, not {len(values)}", 110 "", 111 ) 112 113 return CursorToken(tag[1:-1], *map(int, values)) 114 115 token: Token 116 try: 117 token = ColorToken(tag, str_to_color(tag)) 118 119 except ColorSyntaxError: 120 token = AliasToken(tag) 121 122 finally: 123 return token # pylint: disable=lost-exception 124 125 126def tokenize_markup(text: str) -> Iterator[Token]: 127 """Converts some markup text into a stream of tokens. 128 129 Args: 130 text: Any valid markup. 131 132 Yields: 133 The generated tokens, in the order they occur within the markup. 134 """ 135 136 cursor = 0 137 length = len(text) 138 has_inverse = False 139 for matchobj in RE_MARKUP.finditer(text): 140 full, escapes, content = matchobj.groups() 141 start, end = matchobj.span() 142 143 if cursor < start: 144 yield PlainToken(text[cursor:start]) 145 146 if not escapes == "": 147 _, remaining = divmod(len(escapes), 2) 148 149 yield PlainToken(full[max(1 - remaining, 1) :]) 150 cursor = end 151 152 continue 153 154 for tag in content.split(): 155 if tag == "inverse": 156 has_inverse = True 157 158 if tag == "/inverse": 159 has_inverse = False 160 161 consumed = consume_tag(tag) 162 if has_inverse: 163 if consumed.markup == "/fg": 164 consumed = ClearToken("/fg") 165 166 elif consumed.markup == "/bg": 167 consumed = ClearToken("/bg") 168 169 yield consumed 170 171 cursor = end 172 173 if cursor < length: 174 yield PlainToken(text[cursor:length]) 175 176 177def tokenize_ansi( # pylint: disable=too-many-locals, too-many-branches, too-many-statements 178 text: str, 179) -> Iterator[Token]: 180 """Converts some ANSI-coded text into a stream of tokens. 181 182 Args: 183 text: Any valid ANSI-coded text. 184 185 Yields: 186 The generated tokens, in the order they occur within the text. 187 """ 188 189 cursor = 0 190 191 for matchobj in RE_ANSI.finditer(text): 192 start, end = matchobj.span() 193 194 csi = matchobj.groups()[0:2] 195 link_osc = matchobj.groups()[2:4] 196 197 if link_osc != (None, None): 198 cursor = end 199 uri, label = link_osc 200 201 yield HLinkToken(uri) 202 yield PlainToken(label) 203 204 continue 205 206 full, content = csi 207 208 if cursor < start: 209 yield PlainToken(text[cursor:start]) 210 211 cursor = end 212 213 code = "" 214 215 # Position 216 posmatch = RE_POSITION.match(full) 217 218 if posmatch is not None: 219 ypos, xpos = posmatch.groups() 220 if not ypos and not xpos: 221 raise ValueError( 222 f"Cannot parse cursor when no position is supplied. Match: {posmatch!r}" 223 ) 224 225 yield CursorToken(content, int(ypos) or None, int(xpos) or None) 226 continue 227 228 parts = content.split(";") 229 230 state = None 231 color_code = "" 232 for part in parts: 233 if state is None: 234 if part in REVERSE_STYLES: 235 yield StyleToken(REVERSE_STYLES[part]) 236 continue 237 238 if part in REVERSE_CLEARERS: 239 yield ClearToken(REVERSE_CLEARERS[part]) 240 continue 241 242 if part in ("38", "48"): 243 state = "COLOR" 244 color_code += part + ";" 245 continue 246 247 # standard colors 248 try: 249 yield ColorToken(part, str_to_color(part)) 250 continue 251 252 except ColorSyntaxError as exc: 253 raise ValueError(f"Could not parse color tag {part!r}.") from exc 254 255 if state != "COLOR": 256 continue 257 258 color_code += part + ";" 259 260 # Ignore incomplete RGB colors 261 if ( 262 color_code.startswith(("38;2;", "48;2;")) 263 and len(color_code.split(";")) != 6 264 ): 265 continue 266 267 try: 268 code = color_code 269 270 if code.startswith(("38;2;", "48;2;", "38;5;", "48;5;")): 271 stripped = code[5:-1] 272 273 if code.startswith("4"): 274 stripped = "@" + stripped 275 276 code = stripped 277 278 yield ColorToken(code, str_to_color(code)) 279 280 except ColorSyntaxError: 281 continue 282 283 state = None 284 color_code = "" 285 286 remaining = text[cursor:] 287 if len(remaining) > 0: 288 yield PlainToken(remaining) 289 290 291def eval_alias(text: str, context: ContextDict) -> str: 292 """Evaluates a space-delimited string of alias tags into their underlying value. 293 294 Args: 295 text: A space-separated string containing the aliases. 296 297 Returns: 298 The space-separated string that the input aliases represent. 299 """ 300 301 aliases = context["aliases"] 302 303 evaluated = "" 304 for tag in text.split(): 305 if tag not in aliases: 306 evaluated += tag + " " 307 continue 308 309 evaluated += eval_alias(aliases[tag], context) 310 311 return evaluated.rstrip(" ") 312 313 314def parse_plain(token: PlainToken, _: ContextDict, __: Callable[[], str]) -> str: 315 """Parses a plain token.""" 316 317 return token.value 318 319 320def parse_color(token: ColorToken, _: ContextDict, __: Callable[[], str]) -> str: 321 """Parses a color token.""" 322 323 return token.color.sequence 324 325 326def parse_style(token: StyleToken, _: ContextDict, __: Callable[[], str]) -> str: 327 """Parses a style token.""" 328 329 index = STYLES[token.value] 330 331 return f"\x1b[{index}m" 332 333 334def parse_macro( 335 token: MacroToken, context: ContextDict, get_full: Callable[[], str] 336) -> tuple[MacroType, tuple[str, ...]]: 337 """Parses a macro token. 338 339 Returns: 340 A tuple containing the callable bound to the name, as well as the arguments 341 passed to it. 342 """ 343 344 func = context["macros"].get(token.value) 345 346 if func is None: 347 dump = json.dumps(context["macros"], indent=2, default=str) 348 349 raise MarkupSyntaxError( 350 token.value, f"not defined in macro context: {dump}", get_full() 351 ) 352 353 return func, token.arguments 354 355 356def parse_alias( 357 token: AliasToken, context: ContextDict, get_full: Callable[[], str] 358) -> str: 359 """Parses an alias token.""" 360 361 if token.value not in context["aliases"]: 362 dump = json.dumps(context["aliases"], indent=2, default=str) 363 364 raise MarkupSyntaxError( 365 token.value, f"not defined in alias context: {dump}", get_full() 366 ) 367 368 meaning = context["aliases"][token.value] 369 370 return eval_alias(meaning, context).rstrip(" ") 371 372 373def parse_clear(token: ClearToken, _: ContextDict, get_full: Callable[[], str]) -> str: 374 """Parses a clearer token.""" 375 376 index = CLEARERS.get(token.value) 377 if index is None: 378 raise MarkupSyntaxError( 379 token.value, "not a recognized clearer or alias", get_full() 380 ) 381 382 return f"\x1b[{index}m" 383 384 385def parse_cursor(token: CursorToken, _: ContextDict, __: Callable[[], str]) -> str: 386 """Parses a cursor token.""" 387 388 ypos, xpos = map(lambda i: "" if i is None else i, token) 389 390 return f"\x1b[{ypos};{xpos}H" 391 392 393def optimize_tokens(tokens: list[Token]) -> Iterator[Token]: 394 """Optimizes a stream of tokens, only yielding functionally relevant ones. 395 396 Args: 397 tokens: Any list of Token objects. Usually obtained from `tokenize_markup` 398 or `tokenize_ansi`. 399 400 Yields: 401 All those tokens within the input iterator that are functionally relevant, 402 keeping their order. 403 """ 404 405 previous: list[Token] = [] 406 current_tag_group: list[Token] = [] 407 408 def _diff_previous() -> Iterator[Token]: 409 """Find difference from the previously active list of tokens.""" 410 411 applied = previous.copy() 412 413 for tkn in current_tag_group: 414 targets = [] 415 416 clearer = Token.is_clear(tkn) 417 if Token.is_clear(tkn): 418 targets = [tkn.targets(tag) for tag in applied] 419 420 if tkn in previous and not clearer: 421 continue 422 423 if clearer and not any(targets): 424 continue 425 426 applied.append(tkn) 427 yield tkn 428 429 def _remove_redundant_color(token: Token) -> None: 430 """Removes non-functional colors. 431 432 These happen in the following ways: 433 - Multiple colors of the same channel (fg/bg) are present. 434 - A color is applied, then a clearer clears it. 435 """ 436 437 for applied in current_tag_group.copy(): 438 if Token.is_clear(applied) and applied.targets(token): 439 current_tag_group.remove(applied) 440 441 if not Token.is_color(applied): 442 continue 443 444 old = applied.color 445 446 if old.background == new.background: 447 current_tag_group.remove(applied) 448 449 for token in tokens: 450 if Token.is_plain(token): 451 yield from _diff_previous() 452 yield token 453 454 previous = current_tag_group.copy() 455 456 continue 457 458 if Token.is_color(token): 459 new = token.color 460 461 _remove_redundant_color(token) 462 463 if not any(token.markup == applied.markup for applied in current_tag_group): 464 current_tag_group.append(token) 465 466 continue 467 468 if token.is_style(): 469 if not any(token == tag for tag in current_tag_group): 470 current_tag_group.append(token) 471 472 continue 473 474 if Token.is_clear(token): 475 applied = False 476 for tag in current_tag_group.copy(): 477 if token.targets(tag) or token == tag: 478 current_tag_group.remove(tag) 479 applied = True 480 481 if not applied: 482 continue 483 484 current_tag_group.append(token) 485 486 yield from _diff_previous() 487 488 489def tokens_to_markup(tokens: list[Token]) -> str: 490 """Converts a token stream into the markup of its tokens. 491 492 Args: 493 tokens: Any list of Token objects. Usually obtained from `tokenize_markup` or 494 `tokenize_ansi`. 495 496 Returns: 497 The markup the given tokens represent. 498 """ 499 500 tags: list[Token] = [] 501 markup = "" 502 503 for token in tokens: 504 if token.is_plain(): 505 if len(tags) > 0: 506 markup += f"[{' '.join(tag.markup for tag in tags)}]" 507 508 markup += token.value 509 tags = [] 510 511 else: 512 tags.append(token) 513 514 if len(tags) > 0: 515 markup += f"[{' '.join(tag.markup for tag in tags)}]" 516 517 return markup 518 519 520def get_markup(text: str) -> str: 521 """Gets the markup representing an ANSI-coded string.""" 522 523 return tokens_to_markup(list(tokenize_ansi(text))) 524 525 526def optimize_markup(markup: str) -> str: 527 """Optimizes markup by tokenizing it, optimizing the tokens and converting it back to markup.""" 528 529 return tokens_to_markup(list(optimize_tokens(list(tokenize_markup(markup))))) 530 531 532PARSERS = { 533 PlainToken: parse_plain, 534 ColorToken: parse_color, 535 StyleToken: parse_style, 536 MacroToken: parse_macro, 537 AliasToken: parse_alias, 538 ClearToken: parse_clear, 539 CursorToken: parse_cursor, 540} 541 542 543def _apply_macros( 544 text: str, macros: Iterator[tuple[MacroType, tuple[str, ...]]] 545) -> str: 546 """Applies macros to the given text. 547 548 Args: 549 text: The plain text the macros will apply to. 550 macros: Any iterator of MacroTokens that will be applied. 551 552 Returns: 553 The input plain text, with all macros applied to it. The macros will be applied 554 in the order they appear in. 555 """ 556 557 for method, args in macros: 558 if len(args) > 0: 559 text = method(*args, text) 560 continue 561 562 text = method(text) 563 564 return text 565 566 567def _sub_aliases(tokens: list[Token], context: ContextDict) -> list[Token]: 568 """Substitutes all AliasTokens to their underlying values. 569 570 Args: 571 tokens: Any list of Tokens. When this iterator contains nothing 572 that can be interpreted as an alias, the same iterator turned into 573 a list will be returned. 574 context: The context that aliases will be searched in. 575 """ 576 577 output: list[Token] = [] 578 579 # It's more computationally efficient to create this lambda once and reuse it 580 # every time. There is no need to define a full function, as it just returns 581 # a function return. 582 get_full = ( 583 lambda: tokens_to_markup( # pylint: disable=unnecessary-lambda-assignment 584 tokens 585 ) 586 ) 587 588 for token in tokens: 589 if token.value in context["aliases"] and ( 590 Token.is_clear(token) or Token.is_macro(token) or Token.is_alias(token) 591 ): 592 if Token.is_clear(token) or Token.is_macro(token): 593 token = AliasToken(token.value) 594 595 if Token.is_alias(token): 596 aliases_parsed = parse_alias(token, context, get_full) 597 output.extend(list(tokenize_markup(f"[{aliases_parsed}]"))) 598 599 continue 600 601 if Token.is_macro(token) and token.value == "!link": 602 warn( 603 "Hyperlinks are no longer implemented as macros." 604 + " Prefer using the `~{uri}` syntax.", 605 DeprecationWarning, 606 stacklevel=4, 607 ) 608 609 output.append(HLinkToken(":".join(token.arguments))) 610 continue 611 612 output.append(token) 613 614 return output 615 616 617def parse_tokens( # pylint: disable=too-many-branches, too-many-locals 618 tokens: list[Token], 619 optimize: bool = False, 620 context: ContextDict | None = None, 621 append_reset: bool = True, 622 ignore_unknown_tags: bool = True, 623) -> str: 624 """Parses a stream of tokens into the ANSI-coded string they represent. 625 626 Args: 627 tokens: Any list of Tokens, usually obtained from either `tokenize_ansi` or 628 `tokenize_markup`. 629 optimize: If set, `optimize_tokens` will optimize the input iterator before 630 usage. This will incur a (minor) performance hit. 631 context: The context that aliases and macros found within the tokens will be 632 searched in. 633 append_reset: If set, `ClearToken("/")` will be appended to the token iterator, 634 clearing all styles. 635 ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags 636 will be silenced. 637 638 Returns: 639 The ANSI-coded string that the token stream represents. 640 """ 641 642 if context is None: 643 context = create_context_dict() 644 645 token_list = list(_sub_aliases(tokens, context)) 646 647 # It's more computationally efficient to create this lambda once and reuse it 648 # every time. There is no need to define a full function, as it just returns 649 # a function return. 650 get_full = ( 651 lambda: tokens_to_markup( # pylint: disable=unnecessary-lambda-assignment 652 tokens 653 ) 654 ) 655 656 if optimize: 657 token_list = list(optimize_tokens(tokens)) 658 659 if append_reset: 660 token_list.append(ClearToken("/")) 661 662 link = None 663 output = "" 664 segment = "" 665 macros: list[MacroToken] = [] 666 unknown_aliases: list[Token] = [] 667 668 for token in token_list: 669 if token.is_plain(): 670 value = _apply_macros( 671 token.value, (parse_macro(macro, context, get_full) for macro in macros) 672 ) 673 674 if len(unknown_aliases) > 0: 675 output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]" 676 unknown_aliases = [] 677 678 output += segment + ( 679 value if link is None else LINK_TEMPLATE.format(uri=link, label=value) 680 ) 681 682 segment = "" 683 continue 684 685 if token.is_hyperlink(): 686 link = token.value 687 continue 688 689 if Token.is_macro(token): 690 macros.append(token) 691 continue 692 693 if Token.is_clear(token): 694 if token.value in ("/", "/~"): 695 link = None 696 697 found = False 698 for macro in macros.copy(): 699 if token.targets(macro): 700 macros.remove(macro) 701 found = True 702 break 703 704 if found and token.value != "/": 705 continue 706 707 if token.value.startswith("/!"): 708 raise ValueError( 709 f"Cannot use clearer {token.value!r} with nothing to target." 710 ) 711 712 try: 713 segment += PARSERS[type(token)](token, context, get_full) # type: ignore 714 715 except MarkupSyntaxError: 716 if not ignore_unknown_tags: 717 raise 718 719 unknown_aliases.append(token) 720 721 if len(unknown_aliases) > 0: 722 output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]" 723 724 output += segment 725 726 return output 727 728 729def parse( 730 text: str, 731 optimize: bool = False, 732 context: ContextDict | None = None, 733 append_reset: bool = True, 734 ignore_unknown_tags: bool = True, 735) -> str: 736 """Parses markup into the ANSI-coded string it represents. 737 738 Args: 739 text: Any valid markup. 740 optimize: If set, `optimize_tokens` will optimize the tokens found within the 741 input markup before usage. This will incur a (minor) performance hit. 742 context: The context that aliases and macros found within the markup will be 743 searched in. 744 append_reset: If set, `[/]` will be appended to the token iterator, clearing all 745 styles. 746 ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags 747 will be silenced. 748 749 Returns: 750 The ANSI-coded string that the markup represents. 751 """ 752 753 if context is None: 754 context = create_context_dict() 755 756 if append_reset and not text.endswith("/]"): 757 text += "[/]" 758 759 tokens = list(tokenize_markup(text)) 760 761 return parse_tokens( 762 tokens, 763 optimize=optimize, 764 context=context, 765 append_reset=append_reset, 766 ignore_unknown_tags=ignore_unknown_tags, 767 )
56class ContextDict(TypedDict): 57 """A dictionary to hold context about a markup language's environment. 58 59 It has two sub-dicts: 60 61 - aliases 62 - macros 63 64 For information about what they do and contain, see the 65 [MarkupLanguage docs](pytermgui.markup.language.MarkupLanguage). 66 """ 67 68 aliases: dict[str, str] 69 macros: dict[str, MacroType]
A dictionary to hold context about a markup language's environment.
It has two sub-dicts:
- aliases
- macros
For information about what they do and contain, see the pytermgui.markup.language.MarkupLanguage">MarkupLanguage docs.
Inherited Members
- builtins.dict
- get
- setdefault
- pop
- popitem
- keys
- items
- values
- update
- fromkeys
- clear
- copy
72def create_context_dict() -> ContextDict: 73 """Creates a new context dictionary, initializing its sub-dicts. 74 75 Returns: 76 A dictionary with `aliases` and `macros` defined as empty sub-dicts. 77 """ 78 79 return {"aliases": {}, "macros": {}}
Creates a new context dictionary, initializing its sub-dicts.
Returns
A dictionary with
aliases
andmacros
defined as empty sub-dicts.
82def consume_tag(tag: str) -> Token: # pylint: disable=too-many-return-statements 83 """Consumes a tag text, returns the associated Token.""" 84 85 if tag in STYLES: 86 return StyleToken(tag) 87 88 if tag.startswith("/"): 89 return ClearToken(tag) 90 91 if tag.startswith("!"): 92 matchobj = RE_MACRO.match(tag) 93 94 if matchobj is not None: 95 name, args = matchobj.groups() 96 97 if args is None: 98 return MacroToken(name, tuple()) 99 100 return MacroToken(name, tuple(args.split(":"))) 101 102 if tag.startswith("~"): 103 return HLinkToken(tag[1:]) 104 105 if tag.startswith("(") and tag.endswith(")"): 106 values = tag[1:-1].split(";") 107 if len(values) != 2: 108 raise MarkupSyntaxError( 109 tag, 110 f"should have exactly 2 values separated by `;`, not {len(values)}", 111 "", 112 ) 113 114 return CursorToken(tag[1:-1], *map(int, values)) 115 116 token: Token 117 try: 118 token = ColorToken(tag, str_to_color(tag)) 119 120 except ColorSyntaxError: 121 token = AliasToken(tag) 122 123 finally: 124 return token # pylint: disable=lost-exception
Consumes a tag text, returns the associated Token.
127def tokenize_markup(text: str) -> Iterator[Token]: 128 """Converts some markup text into a stream of tokens. 129 130 Args: 131 text: Any valid markup. 132 133 Yields: 134 The generated tokens, in the order they occur within the markup. 135 """ 136 137 cursor = 0 138 length = len(text) 139 has_inverse = False 140 for matchobj in RE_MARKUP.finditer(text): 141 full, escapes, content = matchobj.groups() 142 start, end = matchobj.span() 143 144 if cursor < start: 145 yield PlainToken(text[cursor:start]) 146 147 if not escapes == "": 148 _, remaining = divmod(len(escapes), 2) 149 150 yield PlainToken(full[max(1 - remaining, 1) :]) 151 cursor = end 152 153 continue 154 155 for tag in content.split(): 156 if tag == "inverse": 157 has_inverse = True 158 159 if tag == "/inverse": 160 has_inverse = False 161 162 consumed = consume_tag(tag) 163 if has_inverse: 164 if consumed.markup == "/fg": 165 consumed = ClearToken("/fg") 166 167 elif consumed.markup == "/bg": 168 consumed = ClearToken("/bg") 169 170 yield consumed 171 172 cursor = end 173 174 if cursor < length: 175 yield PlainToken(text[cursor:length])
Converts some markup text into a stream of tokens.
Args
- text: Any valid markup.
Yields
The generated tokens, in the order they occur within the markup.
178def tokenize_ansi( # pylint: disable=too-many-locals, too-many-branches, too-many-statements 179 text: str, 180) -> Iterator[Token]: 181 """Converts some ANSI-coded text into a stream of tokens. 182 183 Args: 184 text: Any valid ANSI-coded text. 185 186 Yields: 187 The generated tokens, in the order they occur within the text. 188 """ 189 190 cursor = 0 191 192 for matchobj in RE_ANSI.finditer(text): 193 start, end = matchobj.span() 194 195 csi = matchobj.groups()[0:2] 196 link_osc = matchobj.groups()[2:4] 197 198 if link_osc != (None, None): 199 cursor = end 200 uri, label = link_osc 201 202 yield HLinkToken(uri) 203 yield PlainToken(label) 204 205 continue 206 207 full, content = csi 208 209 if cursor < start: 210 yield PlainToken(text[cursor:start]) 211 212 cursor = end 213 214 code = "" 215 216 # Position 217 posmatch = RE_POSITION.match(full) 218 219 if posmatch is not None: 220 ypos, xpos = posmatch.groups() 221 if not ypos and not xpos: 222 raise ValueError( 223 f"Cannot parse cursor when no position is supplied. Match: {posmatch!r}" 224 ) 225 226 yield CursorToken(content, int(ypos) or None, int(xpos) or None) 227 continue 228 229 parts = content.split(";") 230 231 state = None 232 color_code = "" 233 for part in parts: 234 if state is None: 235 if part in REVERSE_STYLES: 236 yield StyleToken(REVERSE_STYLES[part]) 237 continue 238 239 if part in REVERSE_CLEARERS: 240 yield ClearToken(REVERSE_CLEARERS[part]) 241 continue 242 243 if part in ("38", "48"): 244 state = "COLOR" 245 color_code += part + ";" 246 continue 247 248 # standard colors 249 try: 250 yield ColorToken(part, str_to_color(part)) 251 continue 252 253 except ColorSyntaxError as exc: 254 raise ValueError(f"Could not parse color tag {part!r}.") from exc 255 256 if state != "COLOR": 257 continue 258 259 color_code += part + ";" 260 261 # Ignore incomplete RGB colors 262 if ( 263 color_code.startswith(("38;2;", "48;2;")) 264 and len(color_code.split(";")) != 6 265 ): 266 continue 267 268 try: 269 code = color_code 270 271 if code.startswith(("38;2;", "48;2;", "38;5;", "48;5;")): 272 stripped = code[5:-1] 273 274 if code.startswith("4"): 275 stripped = "@" + stripped 276 277 code = stripped 278 279 yield ColorToken(code, str_to_color(code)) 280 281 except ColorSyntaxError: 282 continue 283 284 state = None 285 color_code = "" 286 287 remaining = text[cursor:] 288 if len(remaining) > 0: 289 yield PlainToken(remaining)
Converts some ANSI-coded text into a stream of tokens.
Args
- text: Any valid ANSI-coded text.
Yields
The generated tokens, in the order they occur within the text.
394def optimize_tokens(tokens: list[Token]) -> Iterator[Token]: 395 """Optimizes a stream of tokens, only yielding functionally relevant ones. 396 397 Args: 398 tokens: Any list of Token objects. Usually obtained from `tokenize_markup` 399 or `tokenize_ansi`. 400 401 Yields: 402 All those tokens within the input iterator that are functionally relevant, 403 keeping their order. 404 """ 405 406 previous: list[Token] = [] 407 current_tag_group: list[Token] = [] 408 409 def _diff_previous() -> Iterator[Token]: 410 """Find difference from the previously active list of tokens.""" 411 412 applied = previous.copy() 413 414 for tkn in current_tag_group: 415 targets = [] 416 417 clearer = Token.is_clear(tkn) 418 if Token.is_clear(tkn): 419 targets = [tkn.targets(tag) for tag in applied] 420 421 if tkn in previous and not clearer: 422 continue 423 424 if clearer and not any(targets): 425 continue 426 427 applied.append(tkn) 428 yield tkn 429 430 def _remove_redundant_color(token: Token) -> None: 431 """Removes non-functional colors. 432 433 These happen in the following ways: 434 - Multiple colors of the same channel (fg/bg) are present. 435 - A color is applied, then a clearer clears it. 436 """ 437 438 for applied in current_tag_group.copy(): 439 if Token.is_clear(applied) and applied.targets(token): 440 current_tag_group.remove(applied) 441 442 if not Token.is_color(applied): 443 continue 444 445 old = applied.color 446 447 if old.background == new.background: 448 current_tag_group.remove(applied) 449 450 for token in tokens: 451 if Token.is_plain(token): 452 yield from _diff_previous() 453 yield token 454 455 previous = current_tag_group.copy() 456 457 continue 458 459 if Token.is_color(token): 460 new = token.color 461 462 _remove_redundant_color(token) 463 464 if not any(token.markup == applied.markup for applied in current_tag_group): 465 current_tag_group.append(token) 466 467 continue 468 469 if token.is_style(): 470 if not any(token == tag for tag in current_tag_group): 471 current_tag_group.append(token) 472 473 continue 474 475 if Token.is_clear(token): 476 applied = False 477 for tag in current_tag_group.copy(): 478 if token.targets(tag) or token == tag: 479 current_tag_group.remove(tag) 480 applied = True 481 482 if not applied: 483 continue 484 485 current_tag_group.append(token) 486 487 yield from _diff_previous()
Optimizes a stream of tokens, only yielding functionally relevant ones.
Args
- tokens: Any list of Token objects. Usually obtained from
tokenize_markup
ortokenize_ansi
.
Yields
All those tokens within the input iterator that are functionally relevant, keeping their order.
527def optimize_markup(markup: str) -> str: 528 """Optimizes markup by tokenizing it, optimizing the tokens and converting it back to markup.""" 529 530 return tokens_to_markup(list(optimize_tokens(list(tokenize_markup(markup)))))
Optimizes markup by tokenizing it, optimizing the tokens and converting it back to markup.
490def tokens_to_markup(tokens: list[Token]) -> str: 491 """Converts a token stream into the markup of its tokens. 492 493 Args: 494 tokens: Any list of Token objects. Usually obtained from `tokenize_markup` or 495 `tokenize_ansi`. 496 497 Returns: 498 The markup the given tokens represent. 499 """ 500 501 tags: list[Token] = [] 502 markup = "" 503 504 for token in tokens: 505 if token.is_plain(): 506 if len(tags) > 0: 507 markup += f"[{' '.join(tag.markup for tag in tags)}]" 508 509 markup += token.value 510 tags = [] 511 512 else: 513 tags.append(token) 514 515 if len(tags) > 0: 516 markup += f"[{' '.join(tag.markup for tag in tags)}]" 517 518 return markup
Converts a token stream into the markup of its tokens.
Args
- tokens: Any list of Token objects. Usually obtained from
tokenize_markup
ortokenize_ansi
.
Returns
The markup the given tokens represent.
521def get_markup(text: str) -> str: 522 """Gets the markup representing an ANSI-coded string.""" 523 524 return tokens_to_markup(list(tokenize_ansi(text)))
Gets the markup representing an ANSI-coded string.
730def parse( 731 text: str, 732 optimize: bool = False, 733 context: ContextDict | None = None, 734 append_reset: bool = True, 735 ignore_unknown_tags: bool = True, 736) -> str: 737 """Parses markup into the ANSI-coded string it represents. 738 739 Args: 740 text: Any valid markup. 741 optimize: If set, `optimize_tokens` will optimize the tokens found within the 742 input markup before usage. This will incur a (minor) performance hit. 743 context: The context that aliases and macros found within the markup will be 744 searched in. 745 append_reset: If set, `[/]` will be appended to the token iterator, clearing all 746 styles. 747 ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags 748 will be silenced. 749 750 Returns: 751 The ANSI-coded string that the markup represents. 752 """ 753 754 if context is None: 755 context = create_context_dict() 756 757 if append_reset and not text.endswith("/]"): 758 text += "[/]" 759 760 tokens = list(tokenize_markup(text)) 761 762 return parse_tokens( 763 tokens, 764 optimize=optimize, 765 context=context, 766 append_reset=append_reset, 767 ignore_unknown_tags=ignore_unknown_tags, 768 )
Parses markup into the ANSI-coded string it represents.
Args
- text: Any valid markup.
- optimize: If set,
optimize_tokens
will optimize the tokens found within the input markup before usage. This will incur a (minor) performance hit. - context: The context that aliases and macros found within the markup will be searched in.
- append_reset: If set,
[/]
will be appended to the token iterator, clearing all styles. - ignore_unknown_tags: If set, the
MarkupSyntaxError
coming from unknown tags will be silenced.
Returns
The ANSI-coded string that the markup represents.
618def parse_tokens( # pylint: disable=too-many-branches, too-many-locals 619 tokens: list[Token], 620 optimize: bool = False, 621 context: ContextDict | None = None, 622 append_reset: bool = True, 623 ignore_unknown_tags: bool = True, 624) -> str: 625 """Parses a stream of tokens into the ANSI-coded string they represent. 626 627 Args: 628 tokens: Any list of Tokens, usually obtained from either `tokenize_ansi` or 629 `tokenize_markup`. 630 optimize: If set, `optimize_tokens` will optimize the input iterator before 631 usage. This will incur a (minor) performance hit. 632 context: The context that aliases and macros found within the tokens will be 633 searched in. 634 append_reset: If set, `ClearToken("/")` will be appended to the token iterator, 635 clearing all styles. 636 ignore_unknown_tags: If set, the `MarkupSyntaxError` coming from unknown tags 637 will be silenced. 638 639 Returns: 640 The ANSI-coded string that the token stream represents. 641 """ 642 643 if context is None: 644 context = create_context_dict() 645 646 token_list = list(_sub_aliases(tokens, context)) 647 648 # It's more computationally efficient to create this lambda once and reuse it 649 # every time. There is no need to define a full function, as it just returns 650 # a function return. 651 get_full = ( 652 lambda: tokens_to_markup( # pylint: disable=unnecessary-lambda-assignment 653 tokens 654 ) 655 ) 656 657 if optimize: 658 token_list = list(optimize_tokens(tokens)) 659 660 if append_reset: 661 token_list.append(ClearToken("/")) 662 663 link = None 664 output = "" 665 segment = "" 666 macros: list[MacroToken] = [] 667 unknown_aliases: list[Token] = [] 668 669 for token in token_list: 670 if token.is_plain(): 671 value = _apply_macros( 672 token.value, (parse_macro(macro, context, get_full) for macro in macros) 673 ) 674 675 if len(unknown_aliases) > 0: 676 output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]" 677 unknown_aliases = [] 678 679 output += segment + ( 680 value if link is None else LINK_TEMPLATE.format(uri=link, label=value) 681 ) 682 683 segment = "" 684 continue 685 686 if token.is_hyperlink(): 687 link = token.value 688 continue 689 690 if Token.is_macro(token): 691 macros.append(token) 692 continue 693 694 if Token.is_clear(token): 695 if token.value in ("/", "/~"): 696 link = None 697 698 found = False 699 for macro in macros.copy(): 700 if token.targets(macro): 701 macros.remove(macro) 702 found = True 703 break 704 705 if found and token.value != "/": 706 continue 707 708 if token.value.startswith("/!"): 709 raise ValueError( 710 f"Cannot use clearer {token.value!r} with nothing to target." 711 ) 712 713 try: 714 segment += PARSERS[type(token)](token, context, get_full) # type: ignore 715 716 except MarkupSyntaxError: 717 if not ignore_unknown_tags: 718 raise 719 720 unknown_aliases.append(token) 721 722 if len(unknown_aliases) > 0: 723 output += f"[{' '.join(tkn.value for tkn in unknown_aliases)}]" 724 725 output += segment 726 727 return output
Parses a stream of tokens into the ANSI-coded string they represent.
Args
- tokens: Any list of Tokens, usually obtained from either
tokenize_ansi
ortokenize_markup
. - optimize: If set,
optimize_tokens
will optimize the input iterator before usage. This will incur a (minor) performance hit. - context: The context that aliases and macros found within the tokens will be searched in.
- append_reset: If set,
ClearToken("/")
will be appended to the token iterator, clearing all styles. - ignore_unknown_tags: If set, the
MarkupSyntaxError
coming from unknown tags will be silenced.
Returns
The ANSI-coded string that the token stream represents.