1"""Join raw text tokens with the rest of the text
2
3This is set as a separate rule to provide an opportunity for plugins
4to run text replacements after text join, but before escape join.
5
6For example, `\\:)` shouldn't be replaced with an emoji.
7"""
8
9from __future__ import annotations
10
11from ..token import Token
12from .state_core import StateCore
13
14
15def text_join(state: StateCore) -> None:
16 """Join raw text for escape sequences (`text_special`) tokens with the rest of the text"""
17
18 for inline_token in state.tokens[:]:
19 if inline_token.type != "inline":
20 continue
21
22 # convert text_special to text and join all adjacent text nodes
23 new_tokens: list[Token] = []
24 children = inline_token.children or []
25 i = 0
26 while i < len(children):
27 child_token = children[i]
28 if child_token.type == "text_special":
29 child_token.type = "text"
30 if (
31 child_token.type == "text"
32 and new_tokens
33 and new_tokens[-1].type == "text"
34 ):
35 # Collapse a run of adjacent text nodes in a single join, instead
36 # of pairwise `a + b` concatenation. The pairwise form is O(L*k)
37 # in the size of the run because each step rebuilds the growing
38 # prefix; "".join is O(L).
39 parts = [new_tokens[-1].content, child_token.content]
40 i += 1
41 while i < len(children):
42 next_token = children[i]
43 if next_token.type == "text_special":
44 next_token.type = "text"
45 if next_token.type != "text":
46 break
47 parts.append(next_token.content)
48 i += 1
49 new_tokens[-1].content = "".join(parts)
50 else:
51 new_tokens.append(child_token)
52 i += 1
53 inline_token.children = new_tokens