Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/markdown/util.py: 77%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

155 statements  

1# Python Markdown 

2 

3# A Python implementation of John Gruber's Markdown. 

4 

5# Documentation: https://python-markdown.github.io/ 

6# GitHub: https://github.com/Python-Markdown/markdown/ 

7# PyPI: https://pypi.org/project/Markdown/ 

8 

9# Started by Manfred Stienstra (http://www.dwerg.net/). 

10# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). 

11# Currently maintained by Waylan Limberg (https://github.com/waylan), 

12# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). 

13 

14# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later) 

15# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) 

16# Copyright 2004 Manfred Stienstra (the original version) 

17 

18# License: BSD (see LICENSE.md for details). 

19 

20""" 

21This module contains various contacts, classes and functions which get referenced and used 

22throughout the code base. 

23""" 

24 

25from __future__ import annotations 

26 

27import re 

28import sys 

29import warnings 

30from functools import wraps, lru_cache 

31from itertools import count 

32from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload 

33 

34if TYPE_CHECKING: # pragma: no cover 

35 from markdown import Markdown 

36 import xml.etree.ElementTree as etree 

37 

38_T = TypeVar('_T') 

39 

40 

41""" 

42Constants you might want to modify 

43----------------------------------------------------------------------------- 

44""" 

45 

46 

47BLOCK_LEVEL_ELEMENTS: list[str] = [ 

48 # Elements which are invalid to wrap in a `<p>` tag. 

49 # See https://w3c.github.io/html/grouping-content.html#the-p-element 

50 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl', 

51 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 

52 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol', 

53 'p', 'pre', 'section', 'table', 'ul', 

54 # Other elements which Markdown should not be mucking up the contents of. 

55 'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'html', 'iframe', 'li', 'legend', 

56 'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script', 

57 'style', 'summary', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video', 

58 'center' 

59] 

60""" 

61List of HTML tags which get treated as block-level elements. Same as the `block_level_elements` 

62attribute of the [`Markdown`][markdown.Markdown] class. Generally one should use the 

63attribute on the class. This remains for compatibility with older extensions. 

64""" 

65 

66# Placeholders 

67STX = '\u0002' 

68""" "Start of Text" marker for placeholder templates. """ 

69ETX = '\u0003' 

70""" "End of Text" marker for placeholder templates. """ 

71INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" 

72""" Prefix for inline placeholder template. """ 

73INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX 

74""" Placeholder template for stashed inline text. """ 

75INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)') 

76""" Regular Expression which matches inline placeholders. """ 

77AMP_SUBSTITUTE = STX+"amp"+ETX 

78""" Placeholder template for HTML entities. """ 

79HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX 

80""" Placeholder template for raw HTML. """ 

81HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)') 

82""" Regular expression which matches HTML placeholders. """ 

83TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX 

84""" Placeholder template for tags. """ 

85 

86 

87# Constants you probably do not need to change 

88# ----------------------------------------------------------------------------- 

89 

90RTL_BIDI_RANGES = ( 

91 ('\u0590', '\u07FF'), 

92 # Hebrew (0590-05FF), Arabic (0600-06FF), 

93 # Syriac (0700-074F), Arabic supplement (0750-077F), 

94 # Thaana (0780-07BF), Nko (07C0-07FF). 

95 ('\u2D30', '\u2D7F') # Tifinagh 

96) 

97 

98 

99# AUXILIARY GLOBAL FUNCTIONS 

100# ============================================================================= 

101 

102 

103@lru_cache(maxsize=None) 

104def get_installed_extensions(): 

105 """ Return all entry_points in the `markdown.extensions` group. """ 

106 if sys.version_info >= (3, 10): 

107 from importlib import metadata 

108 else: # `<PY310` use backport 

109 import importlib_metadata as metadata 

110 # Only load extension entry_points once. 

111 return metadata.entry_points(group='markdown.extensions') 

112 

113 

114def deprecated(message: str, stacklevel: int = 2): 

115 """ 

116 Raise a [`DeprecationWarning`][] when wrapped function/method is called. 

117 

118 Usage: 

119 

120 ```python 

121 @deprecated("This method will be removed in version X; use Y instead.") 

122 def some_method(): 

123 pass 

124 ``` 

125 """ 

126 def wrapper(func): 

127 @wraps(func) 

128 def deprecated_func(*args, **kwargs): 

129 warnings.warn( 

130 f"'{func.__name__}' is deprecated. {message}", 

131 category=DeprecationWarning, 

132 stacklevel=stacklevel 

133 ) 

134 return func(*args, **kwargs) 

135 return deprecated_func 

136 return wrapper 

137 

138 

139def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None: 

140 """Parses a string representing a boolean value. If parsing was successful, 

141 returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`, 

142 or `None`. If parsing was not successful, raises `ValueError`, or, if 

143 `fail_on_errors=False`, returns `None`.""" 

144 if not isinstance(value, str): 

145 if preserve_none and value is None: 

146 return value 

147 return bool(value) 

148 elif preserve_none and value.lower() == 'none': 

149 return None 

150 elif value.lower() in ('true', 'yes', 'y', 'on', '1'): 

151 return True 

152 elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'): 

153 return False 

154 elif fail_on_errors: 

155 raise ValueError('Cannot parse bool value: %r' % value) 

156 

157 

158def code_escape(text: str) -> str: 

159 """HTML escape a string of code.""" 

160 if "&" in text: 

161 text = text.replace("&", "&amp;") 

162 if "<" in text: 

163 text = text.replace("<", "&lt;") 

164 if ">" in text: 

165 text = text.replace(">", "&gt;") 

166 return text 

167 

168 

169def _get_stack_depth(size: int = 2) -> int: 

170 """Get current stack depth, performantly. 

171 """ 

172 frame = sys._getframe(size) 

173 

174 for size in count(size): 

175 frame = frame.f_back 

176 if not frame: 

177 return size 

178 

179 

180def nearing_recursion_limit() -> bool: 

181 """Return true if current stack depth is within 100 of maximum limit.""" 

182 return sys.getrecursionlimit() - _get_stack_depth() < 100 

183 

184 

185# MISC AUXILIARY CLASSES 

186# ============================================================================= 

187 

188 

189class AtomicString(str): 

190 """A string which should not be further processed.""" 

191 pass 

192 

193 

194class Processor: 

195 """ The base class for all processors. 

196 

197 Attributes: 

198 Processor.md: The `Markdown` instance passed in an initialization. 

199 

200 Arguments: 

201 md: The `Markdown` instance this processor is a part of. 

202 

203 """ 

204 def __init__(self, md: Markdown | None = None): 

205 self.md = md 

206 

207 

208if TYPE_CHECKING: # pragma: no cover 

209 class TagData(TypedDict): 

210 tag: str 

211 attrs: dict[str, str] 

212 left_index: int 

213 right_index: int 

214 

215 

216class HtmlStash: 

217 """ 

218 This class is used for stashing HTML objects that we extract 

219 in the beginning and replace with place-holders. 

220 """ 

221 

222 def __init__(self): 

223 """ Create an `HtmlStash`. """ 

224 self.html_counter = 0 # for counting inline html segments 

225 self.rawHtmlBlocks: list[str | etree.Element] = [] 

226 self.tag_counter = 0 

227 self.tag_data: list[TagData] = [] # list of dictionaries in the order tags appear 

228 

229 def store(self, html: str | etree.Element) -> str: 

230 """ 

231 Saves an HTML segment for later reinsertion. Returns a 

232 placeholder string that needs to be inserted into the 

233 document. 

234 

235 Keyword arguments: 

236 html: An html segment. 

237 

238 Returns: 

239 A placeholder string. 

240 

241 """ 

242 self.rawHtmlBlocks.append(html) 

243 placeholder = self.get_placeholder(self.html_counter) 

244 self.html_counter += 1 

245 return placeholder 

246 

247 def reset(self) -> None: 

248 """ Clear the stash. """ 

249 self.html_counter = 0 

250 self.rawHtmlBlocks = [] 

251 

252 def get_placeholder(self, key: int) -> str: 

253 return HTML_PLACEHOLDER % key 

254 

255 def store_tag(self, tag: str, attrs: dict[str, str], left_index: int, right_index: int) -> str: 

256 """Store tag data and return a placeholder.""" 

257 self.tag_data.append({'tag': tag, 'attrs': attrs, 

258 'left_index': left_index, 

259 'right_index': right_index}) 

260 placeholder = TAG_PLACEHOLDER % str(self.tag_counter) 

261 self.tag_counter += 1 # equal to the tag's index in `self.tag_data` 

262 return placeholder 

263 

264 

265# Used internally by `Registry` for each item in its sorted list. 

266# Provides an easier to read API when editing the code later. 

267# For example, `item.name` is more clear than `item[0]`. 

268class _PriorityItem(NamedTuple): 

269 name: str 

270 priority: float 

271 

272 

273class Registry(Generic[_T]): 

274 """ 

275 A priority sorted registry. 

276 

277 A `Registry` instance provides two public methods to alter the data of the 

278 registry: `register` and `deregister`. Use `register` to add items and 

279 `deregister` to remove items. See each method for specifics. 

280 

281 When registering an item, a "name" and a "priority" must be provided. All 

282 items are automatically sorted by "priority" from highest to lowest. The 

283 "name" is used to remove ("deregister") and get items. 

284 

285 A `Registry` instance it like a list (which maintains order) when reading 

286 data. You may iterate over the items, get an item and get a count (length) 

287 of all items. You may also check that the registry contains an item. 

288 

289 When getting an item you may use either the index of the item or the 

290 string-based "name". For example: 

291 

292 registry = Registry() 

293 registry.register(SomeItem(), 'itemname', 20) 

294 # Get the item by index 

295 item = registry[0] 

296 # Get the item by name 

297 item = registry['itemname'] 

298 

299 When checking that the registry contains an item, you may use either the 

300 string-based "name", or a reference to the actual item. For example: 

301 

302 someitem = SomeItem() 

303 registry.register(someitem, 'itemname', 20) 

304 # Contains the name 

305 assert 'itemname' in registry 

306 # Contains the item instance 

307 assert someitem in registry 

308 

309 The method `get_index_for_name` is also available to obtain the index of 

310 an item using that item's assigned "name". 

311 """ 

312 

313 def __init__(self): 

314 self._data: dict[str, _T] = {} 

315 self._priority: list[_PriorityItem] = [] 

316 self._is_sorted = False 

317 

318 def __contains__(self, item: str | _T) -> bool: 

319 if isinstance(item, str): 

320 # Check if an item exists by this name. 

321 return item in self._data.keys() 

322 # Check if this instance exists. 

323 return item in self._data.values() 

324 

325 def __iter__(self) -> Iterator[_T]: 

326 self._sort() 

327 return iter([self._data[k] for k, p in self._priority]) 

328 

329 @overload 

330 def __getitem__(self, key: str | int) -> _T: # pragma: no cover 

331 ... 

332 

333 @overload 

334 def __getitem__(self, key: slice) -> Registry[_T]: # pragma: no cover 

335 ... 

336 

337 def __getitem__(self, key: str | int | slice) -> _T | Registry[_T]: 

338 self._sort() 

339 if isinstance(key, slice): 

340 data: Registry[_T] = Registry() 

341 for k, p in self._priority[key]: 

342 data.register(self._data[k], k, p) 

343 return data 

344 if isinstance(key, int): 

345 return self._data[self._priority[key].name] 

346 return self._data[key] 

347 

348 def __len__(self) -> int: 

349 return len(self._priority) 

350 

351 def __repr__(self): 

352 return '<{}({})>'.format(self.__class__.__name__, list(self)) 

353 

354 def get_index_for_name(self, name: str) -> int: 

355 """ 

356 Return the index of the given name. 

357 """ 

358 if name in self: 

359 self._sort() 

360 return self._priority.index( 

361 [x for x in self._priority if x.name == name][0] 

362 ) 

363 raise ValueError('No item named "{}" exists.'.format(name)) 

364 

365 def register(self, item: _T, name: str, priority: float) -> None: 

366 """ 

367 Add an item to the registry with the given name and priority. 

368 

369 Arguments: 

370 item: The item being registered. 

371 name: A string used to reference the item. 

372 priority: An integer or float used to sort against all items. 

373 

374 If an item is registered with a "name" which already exists, the 

375 existing item is replaced with the new item. Treat carefully as the 

376 old item is lost with no way to recover it. The new item will be 

377 sorted according to its priority and will **not** retain the position 

378 of the old item. 

379 """ 

380 if name in self: 

381 # Remove existing item of same name first 

382 self.deregister(name) 

383 self._is_sorted = False 

384 self._data[name] = item 

385 self._priority.append(_PriorityItem(name, priority)) 

386 

387 def deregister(self, name: str, strict: bool = True) -> None: 

388 """ 

389 Remove an item from the registry. 

390 

391 Set `strict=False` to fail silently. Otherwise a [`ValueError`][] is raised for an unknown `name`. 

392 """ 

393 try: 

394 index = self.get_index_for_name(name) 

395 del self._priority[index] 

396 del self._data[name] 

397 except ValueError: 

398 if strict: 

399 raise 

400 

401 def _sort(self) -> None: 

402 """ 

403 Sort the registry by priority from highest to lowest. 

404 

405 This method is called internally and should never be explicitly called. 

406 """ 

407 if not self._is_sorted: 

408 self._priority.sort(key=lambda item: item.priority, reverse=True) 

409 self._is_sorted = True