Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/markdown_it/parser

1"""Tokenizes paragraph content."""

3from __future__ import annotations

5from collections.abc import Callable

6import functools

7import re

8from typing import TYPE_CHECKING

10from . import rules_inline

11from .ruler import Ruler

12from .rules_inline.state_inline import StateInline

13from .token import Token

14from .utils import EnvType

16if TYPE_CHECKING:

17 from markdown_it import MarkdownIt

20# Default set of characters that terminate a text token and allow inline rules to fire.

21# '{}$%@~+=:' reserved for extensions.

22# Note: Don't confuse with "Markdown ASCII Punctuation" chars.

23# http://spec.commonmark.org/0.15/#ascii-punctuation-character

24_DEFAULT_TERMINATORS: frozenset[str] = frozenset(

25 {

26 "\n",

27 "!",

28 "#",

29 "$",

30 "%",

31 "&",

32 "*",

33 "+",

34 "-",

35 ":",

36 "<",

37 "=",

38 ">",

39 "@",

40 "[",

41 "\\",

42 "]",

43 "^",

44 "_",

45 "`",

46 "{",

47 "}",

48 "~",

49 }

50)

53# Lazily compiled regex for the default terminator set. The @cache ensures it is

54# compiled at most once (on first ParserInline instantiation) and shared across all

55# instances that have not added extra chars, keeping __init__ cost near zero.

56@functools.cache

57def _default_terminator_re() -> re.Pattern[str]:

58 return re.compile("[" + re.escape("".join(_DEFAULT_TERMINATORS)) + "]")

61# Parser rules

62RuleFuncInlineType = Callable[[StateInline, bool], bool]

63"""(state: StateInline, silent: bool) -> matched: bool)

65`silent` disables token generation, useful for lookahead.

66"""

67_rules: list[tuple[str, RuleFuncInlineType]] = [

68 ("text", rules_inline.text),

69 ("linkify", rules_inline.linkify),

70 ("newline", rules_inline.newline),

71 ("escape", rules_inline.escape),

72 ("backticks", rules_inline.backtick),

73 ("strikethrough", rules_inline.strikethrough.tokenize),

74 ("emphasis", rules_inline.emphasis.tokenize),

75 ("link", rules_inline.link),

76 ("image", rules_inline.image),

77 ("autolink", rules_inline.autolink),

78 ("html_inline", rules_inline.html_inline),

79 ("entity", rules_inline.entity),

80]

82# Note `rule2` ruleset was created specifically for emphasis/strikethrough

83# post-processing and may be changed in the future.

84#

85# Don't use this for anything except pairs (plugins working with `balance_pairs`).

86#

87RuleFuncInline2Type = Callable[[StateInline], None]

88_rules2: list[tuple[str, RuleFuncInline2Type]] = [

89 ("balance_pairs", rules_inline.link_pairs),

90 ("strikethrough", rules_inline.strikethrough.postProcess),

91 ("emphasis", rules_inline.emphasis.postProcess),

92 # rules for pairs separate '**' into its own text tokens, which may be left unused,

93 # rule below merges unused segments back with the rest of the text

94 ("fragments_join", rules_inline.fragments_join),

95]

98class ParserInline:

99 def __init__(self) -> None:

100 self.ruler = Ruler[RuleFuncInlineType]()

101 for name, rule in _rules:

102 self.ruler.push(name, rule)

103 # Second ruler used for post-processing (e.g. in emphasis-like rules)

104 self.ruler2 = Ruler[RuleFuncInline2Type]()

105 for name, rule2 in _rules2:

106 self.ruler2.push(name, rule2)

107 # Characters that stop the text rule, allowing other inline rules to fire.

108 # _extra_terminator_chars is only allocated when add_terminator_char() is called

109 # with a char outside the defaults, keeping __init__ allocation-free.

110 self._extra_terminator_chars: set[str] = set()

111 # Pre-compiled regex shared with all default instances (no copy in the common path).

112 self.terminator_re: re.Pattern[str] = _default_terminator_re()

113

114 def add_terminator_char(self, ch: str) -> None:

115 """Register a character that stops the ``text`` rule, allowing inline rules to fire.

116

117 This lets plugins declare which characters their inline rules react to,

118 mirroring the ``MARKER`` mechanism in the Rust markdown-it implementation.

119

120 :param ch: A single character to add to the terminator set.

121 """

122 if ch not in _DEFAULT_TERMINATORS and ch not in self._extra_terminator_chars:

123 self._extra_terminator_chars.add(ch)

124 self.terminator_re = re.compile(

125 "["

126 + re.escape(

127 "".join(_DEFAULT_TERMINATORS | self._extra_terminator_chars)

128 )

129 + "]"

130 )

131

132 def skipToken(self, state: StateInline) -> None:

133 """Skip single token by running all rules in validation mode;

134 returns `True` if any rule reported success

135 """

136 ok = False

137 pos = state.pos

138 rules = self.ruler.getRules("")

139 maxNesting = state.md.options["maxNesting"]

140 cache = state.cache

141

142 if pos in cache:

143 state.pos = cache[pos]

144 return

145

146 if state.level < maxNesting:

147 for rule in rules:

148 # Increment state.level and decrement it later to limit recursion.

149 # It's harmless to do here, because no tokens are created.

150 # But ideally, we'd need a separate private state variable for this purpose.

151 state.level += 1

152 ok = rule(state, True)

153 state.level -= 1

154 if ok:

155 break

156 else:

157 # Too much nesting, just skip until the end of the paragraph.

158 #

159 # NOTE: this will cause links to behave incorrectly in the following case,

160 # when an amount of `[` is exactly equal to `maxNesting + 1`:

161 #

162 # [[[[[[[[[[[[[[[[[[[[[foo]()

163 #

164 # TODO: remove this workaround when CM standard will allow nested links

165 # (we can replace it by preventing links from being parsed in

166 # validation mode)

167 #

168 state.pos = state.posMax

169

170 if not ok:

171 state.pos += 1

172 cache[pos] = state.pos

173

174 def tokenize(self, state: StateInline) -> None:

175 """Generate tokens for input range."""

176 ok = False

177 rules = self.ruler.getRules("")

178 end = state.posMax

179 maxNesting = state.md.options["maxNesting"]

180

181 while state.pos < end:

182 # Try all possible rules.

183 # On success, rule should:

184 #

185 # - update `state.pos`

186 # - update `state.tokens`

187 # - return true

188

189 if state.level < maxNesting:

190 for rule in rules:

191 ok = rule(state, False)

192 if ok:

193 break

194

195 if ok:

196 if state.pos >= end:

197 break

198 continue

199

200 state.pending += state.src[state.pos]

201 state.pos += 1

202

203 if state.pending:

204 state.pushPending()

205

206 def parse(

207 self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]

208 ) -> list[Token]:

209 """Process input string and push inline tokens into `tokens`"""

210 state = StateInline(src, md, env, tokens)

211 self.tokenize(state)

212 rules2 = self.ruler2.getRules("")

213 for rule in rules2:

214 rule(state)

215 return state.tokens

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/markdown_it/parser_inline.py: 95%

82 statements