Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/markdown/util.py: 56%

1# Python Markdown

3# A Python implementation of John Gruber's Markdown.

5# Documentation: https://python-markdown.github.io/

6# GitHub: https://github.com/Python-Markdown/markdown/

7# PyPI: https://pypi.org/project/Markdown/

9# Started by Manfred Stienstra (http://www.dwerg.net/).

10# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).

11# Currently maintained by Waylan Limberg (https://github.com/waylan),

12# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).

18# License: BSD (see LICENSE.md for details).

20"""

21This module contains various contacts, classes and functions which get referenced and used

22throughout the code base.

23"""

25from __future__ import annotations

27import re

28import sys

29import warnings

30from functools import wraps, lru_cache

31from itertools import count

32from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload

34if TYPE_CHECKING: # pragma: no cover

35 from markdown import Markdown

36 import xml.etree.ElementTree as etree

38_T = TypeVar('_T')

41"""

42Constants you might want to modify

43-----------------------------------------------------------------------------

44"""

47BLOCK_LEVEL_ELEMENTS: list[str] = [

48 # Elements which are invalid to wrap in a `<p>` tag.

49 # See https://w3c.github.io/html/grouping-content.html#the-p-element

50 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl',

51 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3',

52 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol',

53 'p', 'pre', 'section', 'table', 'ul',

54 # Other elements which Markdown should not be mucking up the contents of.

55 'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'html', 'iframe', 'li', 'legend',

56 'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script',

57 'style', 'summary', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video'

58]

59"""

60List of HTML tags which get treated as block-level elements. Same as the `block_level_elements`

61attribute of the [`Markdown`][markdown.Markdown] class. Generally one should use the

62attribute on the class. This remains for compatibility with older extensions.

63"""

65# Placeholders

66STX = '\u0002'

67""" "Start of Text" marker for placeholder templates. """

68ETX = '\u0003'

69""" "End of Text" marker for placeholder templates. """

70INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"

71""" Prefix for inline placeholder template. """

72INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX

73""" Placeholder template for stashed inline text. """

74INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)')

75""" Regular Expression which matches inline placeholders. """

76AMP_SUBSTITUTE = STX+"amp"+ETX

77""" Placeholder template for HTML entities. """

78HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX

79""" Placeholder template for raw HTML. """

80HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')

81""" Regular expression which matches HTML placeholders. """

82TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX

83""" Placeholder template for tags. """

86# Constants you probably do not need to change

87# -----------------------------------------------------------------------------

89RTL_BIDI_RANGES = (

90 ('\u0590', '\u07FF'),

91 # Hebrew (0590-05FF), Arabic (0600-06FF),

92 # Syriac (0700-074F), Arabic supplement (0750-077F),

93 # Thaana (0780-07BF), Nko (07C0-07FF).

94 ('\u2D30', '\u2D7F') # Tifinagh

95)

98# AUXILIARY GLOBAL FUNCTIONS

99# =============================================================================

100

101

102@lru_cache(maxsize=None)

103def get_installed_extensions():

104 """ Return all entry_points in the `markdown.extensions` group. """

105 if sys.version_info >= (3, 10):

106 from importlib import metadata

107 else: # `<PY310` use backport

108 import importlib_metadata as metadata

109 # Only load extension entry_points once.

110 return metadata.entry_points(group='markdown.extensions')

111

112

113def deprecated(message: str, stacklevel: int = 2):

114 """

115 Raise a [`DeprecationWarning`][] when wrapped function/method is called.

116

117 Usage:

118

119 ```python

120 @deprecated("This method will be removed in version X; use Y instead.")

121 def some_method():

122 pass

123 ```

124 """

125 def wrapper(func):

126 @wraps(func)

127 def deprecated_func(*args, **kwargs):

128 warnings.warn(

129 f"'{func.__name__}' is deprecated. {message}",

130 category=DeprecationWarning,

131 stacklevel=stacklevel

132 )

133 return func(*args, **kwargs)

134 return deprecated_func

135 return wrapper

136

137

138def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None:

139 """Parses a string representing a boolean value. If parsing was successful,

140 returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`,

141 or `None`. If parsing was not successful, raises `ValueError`, or, if

142 `fail_on_errors=False`, returns `None`."""

143 if not isinstance(value, str):

144 if preserve_none and value is None:

145 return value

146 return bool(value)

147 elif preserve_none and value.lower() == 'none':

148 return None

149 elif value.lower() in ('true', 'yes', 'y', 'on', '1'):

150 return True

151 elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'):

152 return False

153 elif fail_on_errors:

154 raise ValueError('Cannot parse bool value: %r' % value)

155

156

157def code_escape(text: str) -> str:

158 """HTML escape a string of code."""

159 if "&" in text:

160 text = text.replace("&", "&")

161 if "<" in text:

162 text = text.replace("<", "<")

163 if ">" in text:

164 text = text.replace(">", ">")

165 return text

166

167

168def _get_stack_depth(size: int = 2) -> int:

169 """Get current stack depth, performantly.

170 """

171 frame = sys._getframe(size)

172

173 for size in count(size):

174 frame = frame.f_back

175 if not frame:

176 return size

177

178

179def nearing_recursion_limit() -> bool:

180 """Return true if current stack depth is within 100 of maximum limit."""

181 return sys.getrecursionlimit() - _get_stack_depth() < 100

182

183

184# MISC AUXILIARY CLASSES

185# =============================================================================

186

187

188class AtomicString(str):

189 """A string which should not be further processed."""

190 pass

191

192

193class Processor:

194 """ The base class for all processors.

195

196 Attributes:

197 Processor.md: The `Markdown` instance passed in an initialization.

198

199 Arguments:

200 md: The `Markdown` instance this processor is a part of.

201

202 """

203 def __init__(self, md: Markdown | None = None):

204 self.md = md

205

206

207if TYPE_CHECKING: # pragma: no cover

208 class TagData(TypedDict):

209 tag: str

210 attrs: dict[str, str]

211 left_index: int

212 right_index: int

213

214

215class HtmlStash:

216 """

217 This class is used for stashing HTML objects that we extract

218 in the beginning and replace with place-holders.

219 """

220

221 def __init__(self):

222 """ Create an `HtmlStash`. """

223 self.html_counter = 0 # for counting inline html segments

224 self.rawHtmlBlocks: list[str | etree.Element] = []

225 self.tag_counter = 0

226 self.tag_data: list[TagData] = [] # list of dictionaries in the order tags appear

227

228 def store(self, html: str | etree.Element) -> str:

229 """

230 Saves an HTML segment for later reinsertion. Returns a

231 placeholder string that needs to be inserted into the

232 document.

233

234 Keyword arguments:

235 html: An html segment.

236

237 Returns:

238 A placeholder string.

239

240 """

241 self.rawHtmlBlocks.append(html)

242 placeholder = self.get_placeholder(self.html_counter)

243 self.html_counter += 1

244 return placeholder

245

246 def reset(self) -> None:

247 """ Clear the stash. """

248 self.html_counter = 0

249 self.rawHtmlBlocks = []

250

251 def get_placeholder(self, key: int) -> str:

252 return HTML_PLACEHOLDER % key

253

254 def store_tag(self, tag: str, attrs: dict[str, str], left_index: int, right_index: int) -> str:

255 """Store tag data and return a placeholder."""

256 self.tag_data.append({'tag': tag, 'attrs': attrs,

257 'left_index': left_index,

258 'right_index': right_index})

259 placeholder = TAG_PLACEHOLDER % str(self.tag_counter)

260 self.tag_counter += 1 # equal to the tag's index in `self.tag_data`

261 return placeholder

262

263

264# Used internally by `Registry` for each item in its sorted list.

265# Provides an easier to read API when editing the code later.

266# For example, `item.name` is more clear than `item[0]`.

267class _PriorityItem(NamedTuple):

268 name: str

269 priority: float

270

271

272class Registry(Generic[_T]):

273 """

274 A priority sorted registry.

275

276 A `Registry` instance provides two public methods to alter the data of the

277 registry: `register` and `deregister`. Use `register` to add items and

278 `deregister` to remove items. See each method for specifics.

279

280 When registering an item, a "name" and a "priority" must be provided. All

281 items are automatically sorted by "priority" from highest to lowest. The

282 "name" is used to remove ("deregister") and get items.

283

284 A `Registry` instance it like a list (which maintains order) when reading

285 data. You may iterate over the items, get an item and get a count (length)

286 of all items. You may also check that the registry contains an item.

287

288 When getting an item you may use either the index of the item or the

289 string-based "name". For example:

290

291 registry = Registry()

292 registry.register(SomeItem(), 'itemname', 20)

293 # Get the item by index

294 item = registry[0]

295 # Get the item by name

296 item = registry['itemname']

297

298 When checking that the registry contains an item, you may use either the

299 string-based "name", or a reference to the actual item. For example:

300

301 someitem = SomeItem()

302 registry.register(someitem, 'itemname', 20)

303 # Contains the name

304 assert 'itemname' in registry

305 # Contains the item instance

306 assert someitem in registry

307

308 The method `get_index_for_name` is also available to obtain the index of

309 an item using that item's assigned "name".

310 """

311

312 def __init__(self):

313 self._data: dict[str, _T] = {}

314 self._priority: list[_PriorityItem] = []

315 self._is_sorted = False

316

317 def __contains__(self, item: str | _T) -> bool:

318 if isinstance(item, str):

319 # Check if an item exists by this name.

320 return item in self._data.keys()

321 # Check if this instance exists.

322 return item in self._data.values()

323

324 def __iter__(self) -> Iterator[_T]:

325 self._sort()

326 return iter([self._data[k] for k, p in self._priority])

327

328 @overload

329 def __getitem__(self, key: str | int) -> _T: # pragma: no cover

330 ...

331

332 @overload

333 def __getitem__(self, key: slice) -> Registry[_T]: # pragma: no cover

334 ...

335

336 def __getitem__(self, key: str | int | slice) -> _T | Registry[_T]:

337 self._sort()

338 if isinstance(key, slice):

339 data: Registry[_T] = Registry()

340 for k, p in self._priority[key]:

341 data.register(self._data[k], k, p)

342 return data

343 if isinstance(key, int):

344 return self._data[self._priority[key].name]

345 return self._data[key]

346

347 def __len__(self) -> int:

348 return len(self._priority)

349

350 def __repr__(self):

351 return '<{}({})>'.format(self.__class__.__name__, list(self))

352

353 def get_index_for_name(self, name: str) -> int:

354 """

355 Return the index of the given name.

356 """

357 if name in self:

358 self._sort()

359 return self._priority.index(

360 [x for x in self._priority if x.name == name][0]

361 )

362 raise ValueError('No item named "{}" exists.'.format(name))

363

364 def register(self, item: _T, name: str, priority: float) -> None:

365 """

366 Add an item to the registry with the given name and priority.

367

368 Arguments:

369 item: The item being registered.

370 name: A string used to reference the item.

371 priority: An integer or float used to sort against all items.

372

373 If an item is registered with a "name" which already exists, the

374 existing item is replaced with the new item. Treat carefully as the

375 old item is lost with no way to recover it. The new item will be

376 sorted according to its priority and will **not** retain the position

377 of the old item.

378 """

379 if name in self:

380 # Remove existing item of same name first

381 self.deregister(name)

382 self._is_sorted = False

383 self._data[name] = item

384 self._priority.append(_PriorityItem(name, priority))

385

386 def deregister(self, name: str, strict: bool = True) -> None:

387 """

388 Remove an item from the registry.

389

390 Set `strict=False` to fail silently. Otherwise a [`ValueError`][] is raised for an unknown `name`.

391 """

392 try:

393 index = self.get_index_for_name(name)

394 del self._priority[index]

395 del self._data[name]

396 except ValueError:

397 if strict:

398 raise

399

400 def _sort(self) -> None:

401 """

402 Sort the registry by priority from highest to lowest.

403

404 This method is called internally and should never be explicitly called.

405 """

406 if not self._is_sorted:

407 self._priority.sort(key=lambda item: item.priority, reverse=True)

408 self._is_sorted = True