1"""Tokenizes paragraph content.
2"""
3from __future__ import annotations
4
5from typing import TYPE_CHECKING, Callable
6
7from . import rules_inline
8from .ruler import Ruler
9from .rules_inline.state_inline import StateInline
10from .token import Token
11from .utils import EnvType
12
13if TYPE_CHECKING:
14 from markdown_it import MarkdownIt
15
16
17# Parser rules
18RuleFuncInlineType = Callable[[StateInline, bool], bool]
19"""(state: StateInline, silent: bool) -> matched: bool)
20
21`silent` disables token generation, useful for lookahead.
22"""
23_rules: list[tuple[str, RuleFuncInlineType]] = [
24 ("text", rules_inline.text),
25 ("linkify", rules_inline.linkify),
26 ("newline", rules_inline.newline),
27 ("escape", rules_inline.escape),
28 ("backticks", rules_inline.backtick),
29 ("strikethrough", rules_inline.strikethrough.tokenize),
30 ("emphasis", rules_inline.emphasis.tokenize),
31 ("link", rules_inline.link),
32 ("image", rules_inline.image),
33 ("autolink", rules_inline.autolink),
34 ("html_inline", rules_inline.html_inline),
35 ("entity", rules_inline.entity),
36]
37
38# Note `rule2` ruleset was created specifically for emphasis/strikethrough
39# post-processing and may be changed in the future.
40#
41# Don't use this for anything except pairs (plugins working with `balance_pairs`).
42#
43RuleFuncInline2Type = Callable[[StateInline], None]
44_rules2: list[tuple[str, RuleFuncInline2Type]] = [
45 ("balance_pairs", rules_inline.link_pairs),
46 ("strikethrough", rules_inline.strikethrough.postProcess),
47 ("emphasis", rules_inline.emphasis.postProcess),
48 # rules for pairs separate '**' into its own text tokens, which may be left unused,
49 # rule below merges unused segments back with the rest of the text
50 ("fragments_join", rules_inline.fragments_join),
51]
52
53
54class ParserInline:
55 def __init__(self) -> None:
56 self.ruler = Ruler[RuleFuncInlineType]()
57 for name, rule in _rules:
58 self.ruler.push(name, rule)
59 # Second ruler used for post-processing (e.g. in emphasis-like rules)
60 self.ruler2 = Ruler[RuleFuncInline2Type]()
61 for name, rule2 in _rules2:
62 self.ruler2.push(name, rule2)
63
64 def skipToken(self, state: StateInline) -> None:
65 """Skip single token by running all rules in validation mode;
66 returns `True` if any rule reported success
67 """
68 ok = False
69 pos = state.pos
70 rules = self.ruler.getRules("")
71 maxNesting = state.md.options["maxNesting"]
72 cache = state.cache
73
74 if pos in cache:
75 state.pos = cache[pos]
76 return
77
78 if state.level < maxNesting:
79 for rule in rules:
80 # Increment state.level and decrement it later to limit recursion.
81 # It's harmless to do here, because no tokens are created.
82 # But ideally, we'd need a separate private state variable for this purpose.
83 state.level += 1
84 ok = rule(state, True)
85 state.level -= 1
86 if ok:
87 break
88 else:
89 # Too much nesting, just skip until the end of the paragraph.
90 #
91 # NOTE: this will cause links to behave incorrectly in the following case,
92 # when an amount of `[` is exactly equal to `maxNesting + 1`:
93 #
94 # [[[[[[[[[[[[[[[[[[[[[foo]()
95 #
96 # TODO: remove this workaround when CM standard will allow nested links
97 # (we can replace it by preventing links from being parsed in
98 # validation mode)
99 #
100 state.pos = state.posMax
101
102 if not ok:
103 state.pos += 1
104 cache[pos] = state.pos
105
106 def tokenize(self, state: StateInline) -> None:
107 """Generate tokens for input range."""
108 ok = False
109 rules = self.ruler.getRules("")
110 end = state.posMax
111 maxNesting = state.md.options["maxNesting"]
112
113 while state.pos < end:
114 # Try all possible rules.
115 # On success, rule should:
116 #
117 # - update `state.pos`
118 # - update `state.tokens`
119 # - return true
120
121 if state.level < maxNesting:
122 for rule in rules:
123 ok = rule(state, False)
124 if ok:
125 break
126
127 if ok:
128 if state.pos >= end:
129 break
130 continue
131
132 state.pending += state.src[state.pos]
133 state.pos += 1
134
135 if state.pending:
136 state.pushPending()
137
138 def parse(
139 self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
140 ) -> list[Token]:
141 """Process input string and push inline tokens into `tokens`"""
142 state = StateInline(src, md, env, tokens)
143 self.tokenize(state)
144 rules2 = self.ruler2.getRules("")
145 for rule in rules2:
146 rule(state)
147 return state.tokens