Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/markdown/util.py: 77%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Python Markdown
3# A Python implementation of John Gruber's Markdown.
5# Documentation: https://python-markdown.github.io/
6# GitHub: https://github.com/Python-Markdown/markdown/
7# PyPI: https://pypi.org/project/Markdown/
9# Started by Manfred Stienstra (http://www.dwerg.net/).
10# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
11# Currently maintained by Waylan Limberg (https://github.com/waylan),
12# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
14# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
15# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
16# Copyright 2004 Manfred Stienstra (the original version)
18# License: BSD (see LICENSE.md for details).
20"""
21This module contains various contacts, classes and functions which get referenced and used
22throughout the code base.
23"""
25from __future__ import annotations
27import re
28import sys
29import warnings
30from functools import wraps, lru_cache
31from itertools import count
32from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload
34if TYPE_CHECKING: # pragma: no cover
35 from markdown import Markdown
36 import xml.etree.ElementTree as etree
38_T = TypeVar('_T')
41"""
42Constants you might want to modify
43-----------------------------------------------------------------------------
44"""
47BLOCK_LEVEL_ELEMENTS: list[str] = [
48 # Elements which are invalid to wrap in a `<p>` tag.
49 # See https://w3c.github.io/html/grouping-content.html#the-p-element
50 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl',
51 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3',
52 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol',
53 'p', 'pre', 'section', 'table', 'ul',
54 # Other elements which Markdown should not be mucking up the contents of.
55 'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'html', 'iframe', 'li', 'legend',
56 'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script',
57 'style', 'summary', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video',
58 'center'
59]
60"""
61List of HTML tags which get treated as block-level elements. Same as the `block_level_elements`
62attribute of the [`Markdown`][markdown.Markdown] class. Generally one should use the
63attribute on the class. This remains for compatibility with older extensions.
64"""
66# Placeholders
67STX = '\u0002'
68""" "Start of Text" marker for placeholder templates. """
69ETX = '\u0003'
70""" "End of Text" marker for placeholder templates. """
71INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
72""" Prefix for inline placeholder template. """
73INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
74""" Placeholder template for stashed inline text. """
75INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)')
76""" Regular Expression which matches inline placeholders. """
77AMP_SUBSTITUTE = STX+"amp"+ETX
78""" Placeholder template for HTML entities. """
79HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX
80""" Placeholder template for raw HTML. """
81HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')
82""" Regular expression which matches HTML placeholders. """
83TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX
84""" Placeholder template for tags. """
87# Constants you probably do not need to change
88# -----------------------------------------------------------------------------
90RTL_BIDI_RANGES = (
91 ('\u0590', '\u07FF'),
92 # Hebrew (0590-05FF), Arabic (0600-06FF),
93 # Syriac (0700-074F), Arabic supplement (0750-077F),
94 # Thaana (0780-07BF), Nko (07C0-07FF).
95 ('\u2D30', '\u2D7F') # Tifinagh
96)
99# AUXILIARY GLOBAL FUNCTIONS
100# =============================================================================
103@lru_cache(maxsize=None)
104def get_installed_extensions():
105 """ Return all entry_points in the `markdown.extensions` group. """
106 if sys.version_info >= (3, 10):
107 from importlib import metadata
108 else: # `<PY310` use backport
109 import importlib_metadata as metadata
110 # Only load extension entry_points once.
111 return metadata.entry_points(group='markdown.extensions')
114def deprecated(message: str, stacklevel: int = 2):
115 """
116 Raise a [`DeprecationWarning`][] when wrapped function/method is called.
118 Usage:
120 ```python
121 @deprecated("This method will be removed in version X; use Y instead.")
122 def some_method():
123 pass
124 ```
125 """
126 def wrapper(func):
127 @wraps(func)
128 def deprecated_func(*args, **kwargs):
129 warnings.warn(
130 f"'{func.__name__}' is deprecated. {message}",
131 category=DeprecationWarning,
132 stacklevel=stacklevel
133 )
134 return func(*args, **kwargs)
135 return deprecated_func
136 return wrapper
139def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None:
140 """Parses a string representing a boolean value. If parsing was successful,
141 returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`,
142 or `None`. If parsing was not successful, raises `ValueError`, or, if
143 `fail_on_errors=False`, returns `None`."""
144 if not isinstance(value, str):
145 if preserve_none and value is None:
146 return value
147 return bool(value)
148 elif preserve_none and value.lower() == 'none':
149 return None
150 elif value.lower() in ('true', 'yes', 'y', 'on', '1'):
151 return True
152 elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'):
153 return False
154 elif fail_on_errors:
155 raise ValueError('Cannot parse bool value: %r' % value)
158def code_escape(text: str) -> str:
159 """HTML escape a string of code."""
160 if "&" in text:
161 text = text.replace("&", "&")
162 if "<" in text:
163 text = text.replace("<", "<")
164 if ">" in text:
165 text = text.replace(">", ">")
166 return text
169def _get_stack_depth(size: int = 2) -> int:
170 """Get current stack depth, performantly.
171 """
172 frame = sys._getframe(size)
174 for size in count(size):
175 frame = frame.f_back
176 if not frame:
177 return size
180def nearing_recursion_limit() -> bool:
181 """Return true if current stack depth is within 100 of maximum limit."""
182 return sys.getrecursionlimit() - _get_stack_depth() < 100
185# MISC AUXILIARY CLASSES
186# =============================================================================
189class AtomicString(str):
190 """A string which should not be further processed."""
191 pass
194class Processor:
195 """ The base class for all processors.
197 Attributes:
198 Processor.md: The `Markdown` instance passed in an initialization.
200 Arguments:
201 md: The `Markdown` instance this processor is a part of.
203 """
204 def __init__(self, md: Markdown | None = None):
205 self.md = md
208if TYPE_CHECKING: # pragma: no cover
209 class TagData(TypedDict):
210 tag: str
211 attrs: dict[str, str]
212 left_index: int
213 right_index: int
216class HtmlStash:
217 """
218 This class is used for stashing HTML objects that we extract
219 in the beginning and replace with place-holders.
220 """
222 def __init__(self):
223 """ Create an `HtmlStash`. """
224 self.html_counter = 0 # for counting inline html segments
225 self.rawHtmlBlocks: list[str | etree.Element] = []
226 self.tag_counter = 0
227 self.tag_data: list[TagData] = [] # list of dictionaries in the order tags appear
229 def store(self, html: str | etree.Element) -> str:
230 """
231 Saves an HTML segment for later reinsertion. Returns a
232 placeholder string that needs to be inserted into the
233 document.
235 Keyword arguments:
236 html: An html segment.
238 Returns:
239 A placeholder string.
241 """
242 self.rawHtmlBlocks.append(html)
243 placeholder = self.get_placeholder(self.html_counter)
244 self.html_counter += 1
245 return placeholder
247 def reset(self) -> None:
248 """ Clear the stash. """
249 self.html_counter = 0
250 self.rawHtmlBlocks = []
252 def get_placeholder(self, key: int) -> str:
253 return HTML_PLACEHOLDER % key
255 def store_tag(self, tag: str, attrs: dict[str, str], left_index: int, right_index: int) -> str:
256 """Store tag data and return a placeholder."""
257 self.tag_data.append({'tag': tag, 'attrs': attrs,
258 'left_index': left_index,
259 'right_index': right_index})
260 placeholder = TAG_PLACEHOLDER % str(self.tag_counter)
261 self.tag_counter += 1 # equal to the tag's index in `self.tag_data`
262 return placeholder
265# Used internally by `Registry` for each item in its sorted list.
266# Provides an easier to read API when editing the code later.
267# For example, `item.name` is more clear than `item[0]`.
268class _PriorityItem(NamedTuple):
269 name: str
270 priority: float
273class Registry(Generic[_T]):
274 """
275 A priority sorted registry.
277 A `Registry` instance provides two public methods to alter the data of the
278 registry: `register` and `deregister`. Use `register` to add items and
279 `deregister` to remove items. See each method for specifics.
281 When registering an item, a "name" and a "priority" must be provided. All
282 items are automatically sorted by "priority" from highest to lowest. The
283 "name" is used to remove ("deregister") and get items.
285 A `Registry` instance it like a list (which maintains order) when reading
286 data. You may iterate over the items, get an item and get a count (length)
287 of all items. You may also check that the registry contains an item.
289 When getting an item you may use either the index of the item or the
290 string-based "name". For example:
292 registry = Registry()
293 registry.register(SomeItem(), 'itemname', 20)
294 # Get the item by index
295 item = registry[0]
296 # Get the item by name
297 item = registry['itemname']
299 When checking that the registry contains an item, you may use either the
300 string-based "name", or a reference to the actual item. For example:
302 someitem = SomeItem()
303 registry.register(someitem, 'itemname', 20)
304 # Contains the name
305 assert 'itemname' in registry
306 # Contains the item instance
307 assert someitem in registry
309 The method `get_index_for_name` is also available to obtain the index of
310 an item using that item's assigned "name".
311 """
313 def __init__(self):
314 self._data: dict[str, _T] = {}
315 self._priority: list[_PriorityItem] = []
316 self._is_sorted = False
318 def __contains__(self, item: str | _T) -> bool:
319 if isinstance(item, str):
320 # Check if an item exists by this name.
321 return item in self._data.keys()
322 # Check if this instance exists.
323 return item in self._data.values()
325 def __iter__(self) -> Iterator[_T]:
326 self._sort()
327 return iter([self._data[k] for k, p in self._priority])
329 @overload
330 def __getitem__(self, key: str | int) -> _T: # pragma: no cover
331 ...
333 @overload
334 def __getitem__(self, key: slice) -> Registry[_T]: # pragma: no cover
335 ...
337 def __getitem__(self, key: str | int | slice) -> _T | Registry[_T]:
338 self._sort()
339 if isinstance(key, slice):
340 data: Registry[_T] = Registry()
341 for k, p in self._priority[key]:
342 data.register(self._data[k], k, p)
343 return data
344 if isinstance(key, int):
345 return self._data[self._priority[key].name]
346 return self._data[key]
348 def __len__(self) -> int:
349 return len(self._priority)
351 def __repr__(self):
352 return '<{}({})>'.format(self.__class__.__name__, list(self))
354 def get_index_for_name(self, name: str) -> int:
355 """
356 Return the index of the given name.
357 """
358 if name in self:
359 self._sort()
360 return self._priority.index(
361 [x for x in self._priority if x.name == name][0]
362 )
363 raise ValueError('No item named "{}" exists.'.format(name))
365 def register(self, item: _T, name: str, priority: float) -> None:
366 """
367 Add an item to the registry with the given name and priority.
369 Arguments:
370 item: The item being registered.
371 name: A string used to reference the item.
372 priority: An integer or float used to sort against all items.
374 If an item is registered with a "name" which already exists, the
375 existing item is replaced with the new item. Treat carefully as the
376 old item is lost with no way to recover it. The new item will be
377 sorted according to its priority and will **not** retain the position
378 of the old item.
379 """
380 if name in self:
381 # Remove existing item of same name first
382 self.deregister(name)
383 self._is_sorted = False
384 self._data[name] = item
385 self._priority.append(_PriorityItem(name, priority))
387 def deregister(self, name: str, strict: bool = True) -> None:
388 """
389 Remove an item from the registry.
391 Set `strict=False` to fail silently. Otherwise a [`ValueError`][] is raised for an unknown `name`.
392 """
393 try:
394 index = self.get_index_for_name(name)
395 del self._priority[index]
396 del self._data[name]
397 except ValueError:
398 if strict:
399 raise
401 def _sort(self) -> None:
402 """
403 Sort the registry by priority from highest to lowest.
405 This method is called internally and should never be explicitly called.
406 """
407 if not self._is_sorted:
408 self._priority.sort(key=lambda item: item.priority, reverse=True)
409 self._is_sorted = True