1from __future__ import annotations
2
3from collections.abc import Callable, Generator, Iterable, Mapping, MutableMapping
4from contextlib import contextmanager
5from typing import Any, Literal, overload
6
7from . import helpers, presets
8from .common import normalize_url, utils
9from .parser_block import ParserBlock
10from .parser_core import ParserCore
11from .parser_inline import ParserInline
12from .renderer import RendererHTML, RendererProtocol
13from .rules_core.state_core import StateCore
14from .token import Token
15from .utils import EnvType, OptionsDict, OptionsType, PresetType
16
17try:
18 import linkify_it
19except ModuleNotFoundError:
20 linkify_it = None
21
22
23_PRESETS: dict[str, PresetType] = {
24 "default": presets.default.make(),
25 "js-default": presets.js_default.make(),
26 "zero": presets.zero.make(),
27 "commonmark": presets.commonmark.make(),
28 "gfm-like": presets.gfm_like.make(),
29}
30
31
32class MarkdownIt:
33 def __init__(
34 self,
35 config: str | PresetType = "commonmark",
36 options_update: Mapping[str, Any] | None = None,
37 *,
38 renderer_cls: Callable[[MarkdownIt], RendererProtocol] = RendererHTML,
39 ):
40 """Main parser class
41
42 :param config: name of configuration to load or a pre-defined dictionary
43 :param options_update: dictionary that will be merged into ``config["options"]``
44 :param renderer_cls: the class to load as the renderer:
45 ``self.renderer = renderer_cls(self)
46 """
47 # add modules
48 self.utils = utils
49 self.helpers = helpers
50
51 # initialise classes
52 self.inline = ParserInline()
53 self.block = ParserBlock()
54 self.core = ParserCore()
55 self.renderer = renderer_cls(self)
56 self.linkify = linkify_it.LinkifyIt() if linkify_it else None
57
58 # set the configuration
59 if options_update and not isinstance(options_update, Mapping):
60 # catch signature change where renderer_cls was not used as a key-word
61 raise TypeError(
62 f"options_update should be a mapping: {options_update}"
63 "\n(Perhaps you intended this to be the renderer_cls?)"
64 )
65 self.configure(config, options_update=options_update)
66
67 def __repr__(self) -> str:
68 return f"{self.__class__.__module__}.{self.__class__.__name__}()"
69
70 @overload
71 def __getitem__(self, name: Literal["inline"]) -> ParserInline: ...
72
73 @overload
74 def __getitem__(self, name: Literal["block"]) -> ParserBlock: ...
75
76 @overload
77 def __getitem__(self, name: Literal["core"]) -> ParserCore: ...
78
79 @overload
80 def __getitem__(self, name: Literal["renderer"]) -> RendererProtocol: ...
81
82 @overload
83 def __getitem__(self, name: str) -> Any: ...
84
85 def __getitem__(self, name: str) -> Any:
86 return {
87 "inline": self.inline,
88 "block": self.block,
89 "core": self.core,
90 "renderer": self.renderer,
91 }[name]
92
93 def set(self, options: OptionsType) -> None:
94 """Set parser options (in the same format as in constructor).
95 Probably, you will never need it, but you can change options after constructor call.
96
97 __Note:__ To achieve the best possible performance, don't modify a
98 `markdown-it` instance options on the fly. If you need multiple configurations
99 it's best to create multiple instances and initialize each with separate config.
100 """
101 self.options = OptionsDict(options)
102
103 def configure(
104 self, presets: str | PresetType, options_update: Mapping[str, Any] | None = None
105 ) -> MarkdownIt:
106 """Batch load of all options and component settings.
107 This is an internal method, and you probably will not need it.
108 But if you will - see available presets and data structure
109 [here](https://github.com/markdown-it/markdown-it/tree/master/lib/presets)
110
111 We strongly recommend to use presets instead of direct config loads.
112 That will give better compatibility with next versions.
113 """
114 if isinstance(presets, str):
115 if presets not in _PRESETS:
116 raise KeyError(f"Wrong `markdown-it` preset '{presets}', check name")
117 config = _PRESETS[presets]
118 else:
119 config = presets
120
121 if not config:
122 raise ValueError("Wrong `markdown-it` config, can't be empty")
123
124 options = config.get("options", {}) or {}
125 if options_update:
126 options = {**options, **options_update} # type: ignore
127
128 self.set(options) # type: ignore
129
130 if "components" in config:
131 for name, component in config["components"].items():
132 rules = component.get("rules", None)
133 if rules:
134 self[name].ruler.enableOnly(rules)
135 rules2 = component.get("rules2", None)
136 if rules2:
137 self[name].ruler2.enableOnly(rules2)
138
139 return self
140
141 def get_all_rules(self) -> dict[str, list[str]]:
142 """Return the names of all active rules."""
143 rules = {
144 chain: self[chain].ruler.get_all_rules()
145 for chain in ["core", "block", "inline"]
146 }
147 rules["inline2"] = self.inline.ruler2.get_all_rules()
148 return rules
149
150 def get_active_rules(self) -> dict[str, list[str]]:
151 """Return the names of all active rules."""
152 rules = {
153 chain: self[chain].ruler.get_active_rules()
154 for chain in ["core", "block", "inline"]
155 }
156 rules["inline2"] = self.inline.ruler2.get_active_rules()
157 return rules
158
159 def enable(
160 self, names: str | Iterable[str], ignoreInvalid: bool = False
161 ) -> MarkdownIt:
162 """Enable list or rules. (chainable)
163
164 :param names: rule name or list of rule names to enable.
165 :param ignoreInvalid: set `true` to ignore errors when rule not found.
166
167 It will automatically find appropriate components,
168 containing rules with given names. If rule not found, and `ignoreInvalid`
169 not set - throws exception.
170
171 Example::
172
173 md = MarkdownIt().enable(['sub', 'sup']).disable('smartquotes')
174
175 """
176 result = []
177
178 if isinstance(names, str):
179 names = [names]
180
181 for chain in ["core", "block", "inline"]:
182 result.extend(self[chain].ruler.enable(names, True))
183 result.extend(self.inline.ruler2.enable(names, True))
184
185 missed = [name for name in names if name not in result]
186 if missed and not ignoreInvalid:
187 raise ValueError(f"MarkdownIt. Failed to enable unknown rule(s): {missed}")
188
189 return self
190
191 def disable(
192 self, names: str | Iterable[str], ignoreInvalid: bool = False
193 ) -> MarkdownIt:
194 """The same as [[MarkdownIt.enable]], but turn specified rules off. (chainable)
195
196 :param names: rule name or list of rule names to disable.
197 :param ignoreInvalid: set `true` to ignore errors when rule not found.
198
199 """
200 result = []
201
202 if isinstance(names, str):
203 names = [names]
204
205 for chain in ["core", "block", "inline"]:
206 result.extend(self[chain].ruler.disable(names, True))
207 result.extend(self.inline.ruler2.disable(names, True))
208
209 missed = [name for name in names if name not in result]
210 if missed and not ignoreInvalid:
211 raise ValueError(f"MarkdownIt. Failed to disable unknown rule(s): {missed}")
212 return self
213
214 @contextmanager
215 def reset_rules(self) -> Generator[None, None, None]:
216 """A context manager, that will reset the current enabled rules on exit."""
217 chain_rules = self.get_active_rules()
218 yield
219 for chain, rules in chain_rules.items():
220 if chain != "inline2":
221 self[chain].ruler.enableOnly(rules)
222 self.inline.ruler2.enableOnly(chain_rules["inline2"])
223
224 def add_render_rule(
225 self, name: str, function: Callable[..., Any], fmt: str = "html"
226 ) -> None:
227 """Add a rule for rendering a particular Token type.
228
229 Only applied when ``renderer.__output__ == fmt``
230 """
231 if self.renderer.__output__ == fmt:
232 self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore
233
234 def use(
235 self, plugin: Callable[..., None], *params: Any, **options: Any
236 ) -> MarkdownIt:
237 """Load specified plugin with given params into current parser instance. (chainable)
238
239 It's just a sugar to call `plugin(md, params)` with curring.
240
241 Example::
242
243 def func(tokens, idx):
244 tokens[idx].content = tokens[idx].content.replace('foo', 'bar')
245 md = MarkdownIt().use(plugin, 'foo_replace', 'text', func)
246
247 """
248 plugin(self, *params, **options)
249 return self
250
251 def parse(self, src: str, env: EnvType | None = None) -> list[Token]:
252 """Parse the source string to a token stream
253
254 :param src: source string
255 :param env: environment sandbox
256
257 Parse input string and return list of block tokens (special token type
258 "inline" will contain list of inline tokens).
259
260 `env` is used to pass data between "distributed" rules and return additional
261 metadata like reference info, needed for the renderer. It also can be used to
262 inject data in specific cases. Usually, you will be ok to pass `{}`,
263 and then pass updated object to renderer.
264 """
265 env = {} if env is None else env
266 if not isinstance(env, MutableMapping):
267 raise TypeError(f"Input data should be a MutableMapping, not {type(env)}")
268 if not isinstance(src, str):
269 raise TypeError(f"Input data should be a string, not {type(src)}")
270 state = StateCore(src, self, env)
271 self.core.process(state)
272 return state.tokens
273
274 def render(self, src: str, env: EnvType | None = None) -> Any:
275 """Render markdown string into html. It does all magic for you :).
276
277 :param src: source string
278 :param env: environment sandbox
279 :returns: The output of the loaded renderer
280
281 `env` can be used to inject additional metadata (`{}` by default).
282 But you will not need it with high probability. See also comment
283 in [[MarkdownIt.parse]].
284 """
285 env = {} if env is None else env
286 return self.renderer.render(self.parse(src, env), self.options, env)
287
288 def parseInline(self, src: str, env: EnvType | None = None) -> list[Token]:
289 """The same as [[MarkdownIt.parse]] but skip all block rules.
290
291 :param src: source string
292 :param env: environment sandbox
293
294 It returns the
295 block tokens list with the single `inline` element, containing parsed inline
296 tokens in `children` property. Also updates `env` object.
297 """
298 env = {} if env is None else env
299 if not isinstance(env, MutableMapping):
300 raise TypeError(f"Input data should be an MutableMapping, not {type(env)}")
301 if not isinstance(src, str):
302 raise TypeError(f"Input data should be a string, not {type(src)}")
303 state = StateCore(src, self, env)
304 state.inlineMode = True
305 self.core.process(state)
306 return state.tokens
307
308 def renderInline(self, src: str, env: EnvType | None = None) -> Any:
309 """Similar to [[MarkdownIt.render]] but for single paragraph content.
310
311 :param src: source string
312 :param env: environment sandbox
313
314 Similar to [[MarkdownIt.render]] but for single paragraph content. Result
315 will NOT be wrapped into `<p>` tags.
316 """
317 env = {} if env is None else env
318 return self.renderer.render(self.parseInline(src, env), self.options, env)
319
320 # link methods
321
322 def validateLink(self, url: str) -> bool:
323 """Validate if the URL link is allowed in output.
324
325 This validator can prohibit more than really needed to prevent XSS.
326 It's a tradeoff to keep code simple and to be secure by default.
327
328 Note: the url should be normalized at this point, and existing entities decoded.
329 """
330 return normalize_url.validateLink(url)
331
332 def normalizeLink(self, url: str) -> str:
333 """Normalize destination URLs in links
334
335 ::
336
337 [label]: destination 'title'
338 ^^^^^^^^^^^
339 """
340 return normalize_url.normalizeLink(url)
341
342 def normalizeLinkText(self, link: str) -> str:
343 """Normalize autolink content
344
345 ::
346
347 <destination>
348 ~~~~~~~~~~~
349 """
350 return normalize_url.normalizeLinkText(link)