Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/markdown/util.py: 56%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Python Markdown
3# A Python implementation of John Gruber's Markdown.
5# Documentation: https://python-markdown.github.io/
6# GitHub: https://github.com/Python-Markdown/markdown/
7# PyPI: https://pypi.org/project/Markdown/
9# Started by Manfred Stienstra (http://www.dwerg.net/).
10# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
11# Currently maintained by Waylan Limberg (https://github.com/waylan),
12# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
14# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
15# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
16# Copyright 2004 Manfred Stienstra (the original version)
18# License: BSD (see LICENSE.md for details).
20"""
21This module contains various contacts, classes and functions which get referenced and used
22throughout the code base.
23"""
25from __future__ import annotations
27import re
28import sys
29import warnings
30from functools import wraps, lru_cache
31from itertools import count
32from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload
34if TYPE_CHECKING: # pragma: no cover
35 from markdown import Markdown
36 import xml.etree.ElementTree as etree
38_T = TypeVar('_T')
41"""
42Constants you might want to modify
43-----------------------------------------------------------------------------
44"""
47BLOCK_LEVEL_ELEMENTS: list[str] = [
48 # Elements which are invalid to wrap in a `<p>` tag.
49 # See https://w3c.github.io/html/grouping-content.html#the-p-element
50 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl',
51 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3',
52 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol',
53 'p', 'pre', 'section', 'table', 'ul',
54 # Other elements which Markdown should not be mucking up the contents of.
55 'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'html', 'iframe', 'li', 'legend',
56 'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script',
57 'style', 'summary', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video'
58]
59"""
60List of HTML tags which get treated as block-level elements. Same as the `block_level_elements`
61attribute of the [`Markdown`][markdown.Markdown] class. Generally one should use the
62attribute on the class. This remains for compatibility with older extensions.
63"""
65# Placeholders
66STX = '\u0002'
67""" "Start of Text" marker for placeholder templates. """
68ETX = '\u0003'
69""" "End of Text" marker for placeholder templates. """
70INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
71""" Prefix for inline placeholder template. """
72INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
73""" Placeholder template for stashed inline text. """
74INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)')
75""" Regular Expression which matches inline placeholders. """
76AMP_SUBSTITUTE = STX+"amp"+ETX
77""" Placeholder template for HTML entities. """
78HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX
79""" Placeholder template for raw HTML. """
80HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')
81""" Regular expression which matches HTML placeholders. """
82TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX
83""" Placeholder template for tags. """
86# Constants you probably do not need to change
87# -----------------------------------------------------------------------------
89RTL_BIDI_RANGES = (
90 ('\u0590', '\u07FF'),
91 # Hebrew (0590-05FF), Arabic (0600-06FF),
92 # Syriac (0700-074F), Arabic supplement (0750-077F),
93 # Thaana (0780-07BF), Nko (07C0-07FF).
94 ('\u2D30', '\u2D7F') # Tifinagh
95)
98# AUXILIARY GLOBAL FUNCTIONS
99# =============================================================================
102@lru_cache(maxsize=None)
103def get_installed_extensions():
104 """ Return all entry_points in the `markdown.extensions` group. """
105 if sys.version_info >= (3, 10):
106 from importlib import metadata
107 else: # `<PY310` use backport
108 import importlib_metadata as metadata
109 # Only load extension entry_points once.
110 return metadata.entry_points(group='markdown.extensions')
113def deprecated(message: str, stacklevel: int = 2):
114 """
115 Raise a [`DeprecationWarning`][] when wrapped function/method is called.
117 Usage:
119 ```python
120 @deprecated("This method will be removed in version X; use Y instead.")
121 def some_method():
122 pass
123 ```
124 """
125 def wrapper(func):
126 @wraps(func)
127 def deprecated_func(*args, **kwargs):
128 warnings.warn(
129 f"'{func.__name__}' is deprecated. {message}",
130 category=DeprecationWarning,
131 stacklevel=stacklevel
132 )
133 return func(*args, **kwargs)
134 return deprecated_func
135 return wrapper
138def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None:
139 """Parses a string representing a boolean value. If parsing was successful,
140 returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`,
141 or `None`. If parsing was not successful, raises `ValueError`, or, if
142 `fail_on_errors=False`, returns `None`."""
143 if not isinstance(value, str):
144 if preserve_none and value is None:
145 return value
146 return bool(value)
147 elif preserve_none and value.lower() == 'none':
148 return None
149 elif value.lower() in ('true', 'yes', 'y', 'on', '1'):
150 return True
151 elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'):
152 return False
153 elif fail_on_errors:
154 raise ValueError('Cannot parse bool value: %r' % value)
157def code_escape(text: str) -> str:
158 """HTML escape a string of code."""
159 if "&" in text:
160 text = text.replace("&", "&")
161 if "<" in text:
162 text = text.replace("<", "<")
163 if ">" in text:
164 text = text.replace(">", ">")
165 return text
168def _get_stack_depth(size: int = 2) -> int:
169 """Get current stack depth, performantly.
170 """
171 frame = sys._getframe(size)
173 for size in count(size):
174 frame = frame.f_back
175 if not frame:
176 return size
179def nearing_recursion_limit() -> bool:
180 """Return true if current stack depth is within 100 of maximum limit."""
181 return sys.getrecursionlimit() - _get_stack_depth() < 100
184# MISC AUXILIARY CLASSES
185# =============================================================================
188class AtomicString(str):
189 """A string which should not be further processed."""
190 pass
193class Processor:
194 """ The base class for all processors.
196 Attributes:
197 Processor.md: The `Markdown` instance passed in an initialization.
199 Arguments:
200 md: The `Markdown` instance this processor is a part of.
202 """
203 def __init__(self, md: Markdown | None = None):
204 self.md = md
207if TYPE_CHECKING: # pragma: no cover
208 class TagData(TypedDict):
209 tag: str
210 attrs: dict[str, str]
211 left_index: int
212 right_index: int
215class HtmlStash:
216 """
217 This class is used for stashing HTML objects that we extract
218 in the beginning and replace with place-holders.
219 """
221 def __init__(self):
222 """ Create an `HtmlStash`. """
223 self.html_counter = 0 # for counting inline html segments
224 self.rawHtmlBlocks: list[str | etree.Element] = []
225 self.tag_counter = 0
226 self.tag_data: list[TagData] = [] # list of dictionaries in the order tags appear
228 def store(self, html: str | etree.Element) -> str:
229 """
230 Saves an HTML segment for later reinsertion. Returns a
231 placeholder string that needs to be inserted into the
232 document.
234 Keyword arguments:
235 html: An html segment.
237 Returns:
238 A placeholder string.
240 """
241 self.rawHtmlBlocks.append(html)
242 placeholder = self.get_placeholder(self.html_counter)
243 self.html_counter += 1
244 return placeholder
246 def reset(self) -> None:
247 """ Clear the stash. """
248 self.html_counter = 0
249 self.rawHtmlBlocks = []
251 def get_placeholder(self, key: int) -> str:
252 return HTML_PLACEHOLDER % key
254 def store_tag(self, tag: str, attrs: dict[str, str], left_index: int, right_index: int) -> str:
255 """Store tag data and return a placeholder."""
256 self.tag_data.append({'tag': tag, 'attrs': attrs,
257 'left_index': left_index,
258 'right_index': right_index})
259 placeholder = TAG_PLACEHOLDER % str(self.tag_counter)
260 self.tag_counter += 1 # equal to the tag's index in `self.tag_data`
261 return placeholder
264# Used internally by `Registry` for each item in its sorted list.
265# Provides an easier to read API when editing the code later.
266# For example, `item.name` is more clear than `item[0]`.
267class _PriorityItem(NamedTuple):
268 name: str
269 priority: float
272class Registry(Generic[_T]):
273 """
274 A priority sorted registry.
276 A `Registry` instance provides two public methods to alter the data of the
277 registry: `register` and `deregister`. Use `register` to add items and
278 `deregister` to remove items. See each method for specifics.
280 When registering an item, a "name" and a "priority" must be provided. All
281 items are automatically sorted by "priority" from highest to lowest. The
282 "name" is used to remove ("deregister") and get items.
284 A `Registry` instance it like a list (which maintains order) when reading
285 data. You may iterate over the items, get an item and get a count (length)
286 of all items. You may also check that the registry contains an item.
288 When getting an item you may use either the index of the item or the
289 string-based "name". For example:
291 registry = Registry()
292 registry.register(SomeItem(), 'itemname', 20)
293 # Get the item by index
294 item = registry[0]
295 # Get the item by name
296 item = registry['itemname']
298 When checking that the registry contains an item, you may use either the
299 string-based "name", or a reference to the actual item. For example:
301 someitem = SomeItem()
302 registry.register(someitem, 'itemname', 20)
303 # Contains the name
304 assert 'itemname' in registry
305 # Contains the item instance
306 assert someitem in registry
308 The method `get_index_for_name` is also available to obtain the index of
309 an item using that item's assigned "name".
310 """
312 def __init__(self):
313 self._data: dict[str, _T] = {}
314 self._priority: list[_PriorityItem] = []
315 self._is_sorted = False
317 def __contains__(self, item: str | _T) -> bool:
318 if isinstance(item, str):
319 # Check if an item exists by this name.
320 return item in self._data.keys()
321 # Check if this instance exists.
322 return item in self._data.values()
324 def __iter__(self) -> Iterator[_T]:
325 self._sort()
326 return iter([self._data[k] for k, p in self._priority])
328 @overload
329 def __getitem__(self, key: str | int) -> _T: # pragma: no cover
330 ...
332 @overload
333 def __getitem__(self, key: slice) -> Registry[_T]: # pragma: no cover
334 ...
336 def __getitem__(self, key: str | int | slice) -> _T | Registry[_T]:
337 self._sort()
338 if isinstance(key, slice):
339 data: Registry[_T] = Registry()
340 for k, p in self._priority[key]:
341 data.register(self._data[k], k, p)
342 return data
343 if isinstance(key, int):
344 return self._data[self._priority[key].name]
345 return self._data[key]
347 def __len__(self) -> int:
348 return len(self._priority)
350 def __repr__(self):
351 return '<{}({})>'.format(self.__class__.__name__, list(self))
353 def get_index_for_name(self, name: str) -> int:
354 """
355 Return the index of the given name.
356 """
357 if name in self:
358 self._sort()
359 return self._priority.index(
360 [x for x in self._priority if x.name == name][0]
361 )
362 raise ValueError('No item named "{}" exists.'.format(name))
364 def register(self, item: _T, name: str, priority: float) -> None:
365 """
366 Add an item to the registry with the given name and priority.
368 Arguments:
369 item: The item being registered.
370 name: A string used to reference the item.
371 priority: An integer or float used to sort against all items.
373 If an item is registered with a "name" which already exists, the
374 existing item is replaced with the new item. Treat carefully as the
375 old item is lost with no way to recover it. The new item will be
376 sorted according to its priority and will **not** retain the position
377 of the old item.
378 """
379 if name in self:
380 # Remove existing item of same name first
381 self.deregister(name)
382 self._is_sorted = False
383 self._data[name] = item
384 self._priority.append(_PriorityItem(name, priority))
386 def deregister(self, name: str, strict: bool = True) -> None:
387 """
388 Remove an item from the registry.
390 Set `strict=False` to fail silently. Otherwise a [`ValueError`][] is raised for an unknown `name`.
391 """
392 try:
393 index = self.get_index_for_name(name)
394 del self._priority[index]
395 del self._data[name]
396 except ValueError:
397 if strict:
398 raise
400 def _sort(self) -> None:
401 """
402 Sort the registry by priority from highest to lowest.
404 This method is called internally and should never be explicitly called.
405 """
406 if not self._is_sorted:
407 self._priority.sort(key=lambda item: item.priority, reverse=True)
408 self._is_sorted = True