Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/markdown/util.py: 77%

1# Python Markdown

3# A Python implementation of John Gruber's Markdown.

5# Documentation: https://python-markdown.github.io/

6# GitHub: https://github.com/Python-Markdown/markdown/

7# PyPI: https://pypi.org/project/Markdown/

9# Started by Manfred Stienstra (http://www.dwerg.net/).

10# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).

11# Currently maintained by Waylan Limberg (https://github.com/waylan),

12# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).

18# License: BSD (see LICENSE.md for details).

20"""

21This module contains various contacts, classes and functions which get referenced and used

22throughout the code base.

23"""

25from __future__ import annotations

27import re

28import sys

29import warnings

30from functools import wraps, lru_cache

31from itertools import count

32from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload

34if TYPE_CHECKING: # pragma: no cover

35 from markdown import Markdown

36 import xml.etree.ElementTree as etree

38_T = TypeVar('_T')

41"""

42Constants you might want to modify

43-----------------------------------------------------------------------------

44"""

47BLOCK_LEVEL_ELEMENTS: list[str] = [

48 # Elements which are invalid to wrap in a `<p>` tag.

49 # See https://w3c.github.io/html/grouping-content.html#the-p-element

50 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl',

51 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3',

52 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol',

53 'p', 'pre', 'section', 'table', 'ul',

54 # Other elements which Markdown should not be mucking up the contents of.

55 'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'html', 'iframe', 'li', 'legend',

56 'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script',

57 'style', 'summary', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video',

58 'center'

59]

60"""

61List of HTML tags which get treated as block-level elements. Same as the `block_level_elements`

62attribute of the [`Markdown`][markdown.Markdown] class. Generally one should use the

63attribute on the class. This remains for compatibility with older extensions.

64"""

66# Placeholders

67STX = '\u0002'

68""" "Start of Text" marker for placeholder templates. """

69ETX = '\u0003'

70""" "End of Text" marker for placeholder templates. """

71INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"

72""" Prefix for inline placeholder template. """

73INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX

74""" Placeholder template for stashed inline text. """

75INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)')

76""" Regular Expression which matches inline placeholders. """

77AMP_SUBSTITUTE = STX+"amp"+ETX

78""" Placeholder template for HTML entities. """

79HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX

80""" Placeholder template for raw HTML. """

81HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')

82""" Regular expression which matches HTML placeholders. """

83TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX

84""" Placeholder template for tags. """

87# Constants you probably do not need to change

88# -----------------------------------------------------------------------------

90RTL_BIDI_RANGES = (

91 ('\u0590', '\u07FF'),

92 # Hebrew (0590-05FF), Arabic (0600-06FF),

93 # Syriac (0700-074F), Arabic supplement (0750-077F),

94 # Thaana (0780-07BF), Nko (07C0-07FF).

95 ('\u2D30', '\u2D7F') # Tifinagh

96)

99# AUXILIARY GLOBAL FUNCTIONS

100# =============================================================================

101

102

103@lru_cache(maxsize=None)

104def get_installed_extensions():

105 """ Return all entry_points in the `markdown.extensions` group. """

106 if sys.version_info >= (3, 10):

107 from importlib import metadata

108 else: # `<PY310` use backport

109 import importlib_metadata as metadata

110 # Only load extension entry_points once.

111 return metadata.entry_points(group='markdown.extensions')

112

113

114def deprecated(message: str, stacklevel: int = 2):

115 """

116 Raise a [`DeprecationWarning`][] when wrapped function/method is called.

117

118 Usage:

119

120 ```python

121 @deprecated("This method will be removed in version X; use Y instead.")

122 def some_method():

123 pass

124 ```

125 """

126 def wrapper(func):

127 @wraps(func)

128 def deprecated_func(*args, **kwargs):

129 warnings.warn(

130 f"'{func.__name__}' is deprecated. {message}",

131 category=DeprecationWarning,

132 stacklevel=stacklevel

133 )

134 return func(*args, **kwargs)

135 return deprecated_func

136 return wrapper

137

138

139def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None:

140 """Parses a string representing a boolean value. If parsing was successful,

141 returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`,

142 or `None`. If parsing was not successful, raises `ValueError`, or, if

143 `fail_on_errors=False`, returns `None`."""

144 if not isinstance(value, str):

145 if preserve_none and value is None:

146 return value

147 return bool(value)

148 elif preserve_none and value.lower() == 'none':

149 return None

150 elif value.lower() in ('true', 'yes', 'y', 'on', '1'):

151 return True

152 elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'):

153 return False

154 elif fail_on_errors:

155 raise ValueError('Cannot parse bool value: %r' % value)

156

157

158def code_escape(text: str) -> str:

159 """HTML escape a string of code."""

160 if "&" in text:

161 text = text.replace("&", "&")

162 if "<" in text:

163 text = text.replace("<", "<")

164 if ">" in text:

165 text = text.replace(">", ">")

166 return text

167

168

169def _get_stack_depth(size: int = 2) -> int:

170 """Get current stack depth, performantly.

171 """

172 frame = sys._getframe(size)

173

174 for size in count(size):

175 frame = frame.f_back

176 if not frame:

177 return size

178

179

180def nearing_recursion_limit() -> bool:

181 """Return true if current stack depth is within 100 of maximum limit."""

182 return sys.getrecursionlimit() - _get_stack_depth() < 100

183

184

185# MISC AUXILIARY CLASSES

186# =============================================================================

187

188

189class AtomicString(str):

190 """A string which should not be further processed."""

191 pass

192

193

194class Processor:

195 """ The base class for all processors.

196

197 Attributes:

198 Processor.md: The `Markdown` instance passed in an initialization.

199

200 Arguments:

201 md: The `Markdown` instance this processor is a part of.

202

203 """

204 def __init__(self, md: Markdown | None = None):

205 self.md = md

206

207

208if TYPE_CHECKING: # pragma: no cover

209 class TagData(TypedDict):

210 tag: str

211 attrs: dict[str, str]

212 left_index: int

213 right_index: int

214

215

216class HtmlStash:

217 """

218 This class is used for stashing HTML objects that we extract

219 in the beginning and replace with place-holders.

220 """

221

222 def __init__(self):

223 """ Create an `HtmlStash`. """

224 self.html_counter = 0 # for counting inline html segments

225 self.rawHtmlBlocks: list[str | etree.Element] = []

226 self.tag_counter = 0

227 self.tag_data: list[TagData] = [] # list of dictionaries in the order tags appear

228

229 def store(self, html: str | etree.Element) -> str:

230 """

231 Saves an HTML segment for later reinsertion. Returns a

232 placeholder string that needs to be inserted into the

233 document.

234

235 Keyword arguments:

236 html: An html segment.

237

238 Returns:

239 A placeholder string.

240

241 """

242 self.rawHtmlBlocks.append(html)

243 placeholder = self.get_placeholder(self.html_counter)

244 self.html_counter += 1

245 return placeholder

246

247 def reset(self) -> None:

248 """ Clear the stash. """

249 self.html_counter = 0

250 self.rawHtmlBlocks = []

251

252 def get_placeholder(self, key: int) -> str:

253 return HTML_PLACEHOLDER % key

254

255 def store_tag(self, tag: str, attrs: dict[str, str], left_index: int, right_index: int) -> str:

256 """Store tag data and return a placeholder."""

257 self.tag_data.append({'tag': tag, 'attrs': attrs,

258 'left_index': left_index,

259 'right_index': right_index})

260 placeholder = TAG_PLACEHOLDER % str(self.tag_counter)

261 self.tag_counter += 1 # equal to the tag's index in `self.tag_data`

262 return placeholder

263

264

265# Used internally by `Registry` for each item in its sorted list.

266# Provides an easier to read API when editing the code later.

267# For example, `item.name` is more clear than `item[0]`.

268class _PriorityItem(NamedTuple):

269 name: str

270 priority: float

271

272

273class Registry(Generic[_T]):

274 """

275 A priority sorted registry.

276

277 A `Registry` instance provides two public methods to alter the data of the

278 registry: `register` and `deregister`. Use `register` to add items and

279 `deregister` to remove items. See each method for specifics.

280

281 When registering an item, a "name" and a "priority" must be provided. All

282 items are automatically sorted by "priority" from highest to lowest. The

283 "name" is used to remove ("deregister") and get items.

284

285 A `Registry` instance it like a list (which maintains order) when reading

286 data. You may iterate over the items, get an item and get a count (length)

287 of all items. You may also check that the registry contains an item.

288

289 When getting an item you may use either the index of the item or the

290 string-based "name". For example:

291

292 registry = Registry()

293 registry.register(SomeItem(), 'itemname', 20)

294 # Get the item by index

295 item = registry[0]

296 # Get the item by name

297 item = registry['itemname']

298

299 When checking that the registry contains an item, you may use either the

300 string-based "name", or a reference to the actual item. For example:

301

302 someitem = SomeItem()

303 registry.register(someitem, 'itemname', 20)

304 # Contains the name

305 assert 'itemname' in registry

306 # Contains the item instance

307 assert someitem in registry

308

309 The method `get_index_for_name` is also available to obtain the index of

310 an item using that item's assigned "name".

311 """

312

313 def __init__(self):

314 self._data: dict[str, _T] = {}

315 self._priority: list[_PriorityItem] = []

316 self._is_sorted = False

317

318 def __contains__(self, item: str | _T) -> bool:

319 if isinstance(item, str):

320 # Check if an item exists by this name.

321 return item in self._data.keys()

322 # Check if this instance exists.

323 return item in self._data.values()

324

325 def __iter__(self) -> Iterator[_T]:

326 self._sort()

327 return iter([self._data[k] for k, p in self._priority])

328

329 @overload

330 def __getitem__(self, key: str | int) -> _T: # pragma: no cover

331 ...

332

333 @overload

334 def __getitem__(self, key: slice) -> Registry[_T]: # pragma: no cover

335 ...

336

337 def __getitem__(self, key: str | int | slice) -> _T | Registry[_T]:

338 self._sort()

339 if isinstance(key, slice):

340 data: Registry[_T] = Registry()

341 for k, p in self._priority[key]:

342 data.register(self._data[k], k, p)

343 return data

344 if isinstance(key, int):

345 return self._data[self._priority[key].name]

346 return self._data[key]

347

348 def __len__(self) -> int:

349 return len(self._priority)

350

351 def __repr__(self):

352 return '<{}({})>'.format(self.__class__.__name__, list(self))

353

354 def get_index_for_name(self, name: str) -> int:

355 """

356 Return the index of the given name.

357 """

358 if name in self:

359 self._sort()

360 return self._priority.index(

361 [x for x in self._priority if x.name == name][0]

362 )

363 raise ValueError('No item named "{}" exists.'.format(name))

364

365 def register(self, item: _T, name: str, priority: float) -> None:

366 """

367 Add an item to the registry with the given name and priority.

368

369 Arguments:

370 item: The item being registered.

371 name: A string used to reference the item.

372 priority: An integer or float used to sort against all items.

373

374 If an item is registered with a "name" which already exists, the

375 existing item is replaced with the new item. Treat carefully as the

376 old item is lost with no way to recover it. The new item will be

377 sorted according to its priority and will **not** retain the position

378 of the old item.

379 """

380 if name in self:

381 # Remove existing item of same name first

382 self.deregister(name)

383 self._is_sorted = False

384 self._data[name] = item

385 self._priority.append(_PriorityItem(name, priority))

386

387 def deregister(self, name: str, strict: bool = True) -> None:

388 """

389 Remove an item from the registry.

390

391 Set `strict=False` to fail silently. Otherwise a [`ValueError`][] is raised for an unknown `name`.

392 """

393 try:

394 index = self.get_index_for_name(name)

395 del self._priority[index]

396 del self._data[name]

397 except ValueError:

398 if strict:

399 raise

400

401 def _sort(self) -> None:

402 """

403 Sort the registry by priority from highest to lowest.

404

405 This method is called internally and should never be explicitly called.

406 """

407 if not self._is_sorted:

408 self._priority.sort(key=lambda item: item.priority, reverse=True)

409 self._is_sorted = True