1"""Tokenizes paragraph content."""
2
3from __future__ import annotations
4
5from collections.abc import Callable
6from typing import TYPE_CHECKING
7
8from . import rules_inline
9from .ruler import Ruler
10from .rules_inline.state_inline import StateInline
11from .token import Token
12from .utils import EnvType
13
14if TYPE_CHECKING:
15 from markdown_it import MarkdownIt
16
17
18# Parser rules
19RuleFuncInlineType = Callable[[StateInline, bool], bool]
20"""(state: StateInline, silent: bool) -> matched: bool)
21
22`silent` disables token generation, useful for lookahead.
23"""
24_rules: list[tuple[str, RuleFuncInlineType]] = [
25 ("text", rules_inline.text),
26 ("linkify", rules_inline.linkify),
27 ("newline", rules_inline.newline),
28 ("escape", rules_inline.escape),
29 ("backticks", rules_inline.backtick),
30 ("strikethrough", rules_inline.strikethrough.tokenize),
31 ("emphasis", rules_inline.emphasis.tokenize),
32 ("link", rules_inline.link),
33 ("image", rules_inline.image),
34 ("autolink", rules_inline.autolink),
35 ("html_inline", rules_inline.html_inline),
36 ("entity", rules_inline.entity),
37]
38
39# Note `rule2` ruleset was created specifically for emphasis/strikethrough
40# post-processing and may be changed in the future.
41#
42# Don't use this for anything except pairs (plugins working with `balance_pairs`).
43#
44RuleFuncInline2Type = Callable[[StateInline], None]
45_rules2: list[tuple[str, RuleFuncInline2Type]] = [
46 ("balance_pairs", rules_inline.link_pairs),
47 ("strikethrough", rules_inline.strikethrough.postProcess),
48 ("emphasis", rules_inline.emphasis.postProcess),
49 # rules for pairs separate '**' into its own text tokens, which may be left unused,
50 # rule below merges unused segments back with the rest of the text
51 ("fragments_join", rules_inline.fragments_join),
52]
53
54
55class ParserInline:
56 def __init__(self) -> None:
57 self.ruler = Ruler[RuleFuncInlineType]()
58 for name, rule in _rules:
59 self.ruler.push(name, rule)
60 # Second ruler used for post-processing (e.g. in emphasis-like rules)
61 self.ruler2 = Ruler[RuleFuncInline2Type]()
62 for name, rule2 in _rules2:
63 self.ruler2.push(name, rule2)
64
65 def skipToken(self, state: StateInline) -> None:
66 """Skip single token by running all rules in validation mode;
67 returns `True` if any rule reported success
68 """
69 ok = False
70 pos = state.pos
71 rules = self.ruler.getRules("")
72 maxNesting = state.md.options["maxNesting"]
73 cache = state.cache
74
75 if pos in cache:
76 state.pos = cache[pos]
77 return
78
79 if state.level < maxNesting:
80 for rule in rules:
81 # Increment state.level and decrement it later to limit recursion.
82 # It's harmless to do here, because no tokens are created.
83 # But ideally, we'd need a separate private state variable for this purpose.
84 state.level += 1
85 ok = rule(state, True)
86 state.level -= 1
87 if ok:
88 break
89 else:
90 # Too much nesting, just skip until the end of the paragraph.
91 #
92 # NOTE: this will cause links to behave incorrectly in the following case,
93 # when an amount of `[` is exactly equal to `maxNesting + 1`:
94 #
95 # [[[[[[[[[[[[[[[[[[[[[foo]()
96 #
97 # TODO: remove this workaround when CM standard will allow nested links
98 # (we can replace it by preventing links from being parsed in
99 # validation mode)
100 #
101 state.pos = state.posMax
102
103 if not ok:
104 state.pos += 1
105 cache[pos] = state.pos
106
107 def tokenize(self, state: StateInline) -> None:
108 """Generate tokens for input range."""
109 ok = False
110 rules = self.ruler.getRules("")
111 end = state.posMax
112 maxNesting = state.md.options["maxNesting"]
113
114 while state.pos < end:
115 # Try all possible rules.
116 # On success, rule should:
117 #
118 # - update `state.pos`
119 # - update `state.tokens`
120 # - return true
121
122 if state.level < maxNesting:
123 for rule in rules:
124 ok = rule(state, False)
125 if ok:
126 break
127
128 if ok:
129 if state.pos >= end:
130 break
131 continue
132
133 state.pending += state.src[state.pos]
134 state.pos += 1
135
136 if state.pending:
137 state.pushPending()
138
139 def parse(
140 self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
141 ) -> list[Token]:
142 """Process input string and push inline tokens into `tokens`"""
143 state = StateInline(src, md, env, tokens)
144 self.tokenize(state)
145 rules2 = self.ruler2.getRules("")
146 for rule in rules2:
147 rule(state)
148 return state.tokens