Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/markdown/util.py: 56%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

154 statements  

1# Python Markdown 

2 

3# A Python implementation of John Gruber's Markdown. 

4 

5# Documentation: https://python-markdown.github.io/ 

6# GitHub: https://github.com/Python-Markdown/markdown/ 

7# PyPI: https://pypi.org/project/Markdown/ 

8 

9# Started by Manfred Stienstra (http://www.dwerg.net/). 

10# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). 

11# Currently maintained by Waylan Limberg (https://github.com/waylan), 

12# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). 

13 

14# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later) 

15# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) 

16# Copyright 2004 Manfred Stienstra (the original version) 

17 

18# License: BSD (see LICENSE.md for details). 

19 

20""" 

21This module contains various contacts, classes and functions which get referenced and used 

22throughout the code base. 

23""" 

24 

25from __future__ import annotations 

26 

27import re 

28import sys 

29import warnings 

30from functools import wraps, lru_cache 

31from itertools import count 

32from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload 

33 

34if TYPE_CHECKING: # pragma: no cover 

35 from markdown import Markdown 

36 import xml.etree.ElementTree as etree 

37 

38_T = TypeVar('_T') 

39 

40 

41""" 

42Constants you might want to modify 

43----------------------------------------------------------------------------- 

44""" 

45 

46 

47BLOCK_LEVEL_ELEMENTS: list[str] = [ 

48 # Elements which are invalid to wrap in a `<p>` tag. 

49 # See https://w3c.github.io/html/grouping-content.html#the-p-element 

50 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl', 

51 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 

52 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol', 

53 'p', 'pre', 'section', 'table', 'ul', 

54 # Other elements which Markdown should not be mucking up the contents of. 

55 'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'html', 'iframe', 'li', 'legend', 

56 'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script', 

57 'style', 'summary', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video' 

58] 

59""" 

60List of HTML tags which get treated as block-level elements. Same as the `block_level_elements` 

61attribute of the [`Markdown`][markdown.Markdown] class. Generally one should use the 

62attribute on the class. This remains for compatibility with older extensions. 

63""" 

64 

65# Placeholders 

66STX = '\u0002' 

67""" "Start of Text" marker for placeholder templates. """ 

68ETX = '\u0003' 

69""" "End of Text" marker for placeholder templates. """ 

70INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" 

71""" Prefix for inline placeholder template. """ 

72INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX 

73""" Placeholder template for stashed inline text. """ 

74INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)') 

75""" Regular Expression which matches inline placeholders. """ 

76AMP_SUBSTITUTE = STX+"amp"+ETX 

77""" Placeholder template for HTML entities. """ 

78HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX 

79""" Placeholder template for raw HTML. """ 

80HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)') 

81""" Regular expression which matches HTML placeholders. """ 

82TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX 

83""" Placeholder template for tags. """ 

84 

85 

86# Constants you probably do not need to change 

87# ----------------------------------------------------------------------------- 

88 

89RTL_BIDI_RANGES = ( 

90 ('\u0590', '\u07FF'), 

91 # Hebrew (0590-05FF), Arabic (0600-06FF), 

92 # Syriac (0700-074F), Arabic supplement (0750-077F), 

93 # Thaana (0780-07BF), Nko (07C0-07FF). 

94 ('\u2D30', '\u2D7F') # Tifinagh 

95) 

96 

97 

98# AUXILIARY GLOBAL FUNCTIONS 

99# ============================================================================= 

100 

101 

102@lru_cache(maxsize=None) 

103def get_installed_extensions(): 

104 """ Return all entry_points in the `markdown.extensions` group. """ 

105 if sys.version_info >= (3, 10): 

106 from importlib import metadata 

107 else: # `<PY310` use backport 

108 import importlib_metadata as metadata 

109 # Only load extension entry_points once. 

110 return metadata.entry_points(group='markdown.extensions') 

111 

112 

113def deprecated(message: str, stacklevel: int = 2): 

114 """ 

115 Raise a [`DeprecationWarning`][] when wrapped function/method is called. 

116 

117 Usage: 

118 

119 ```python 

120 @deprecated("This method will be removed in version X; use Y instead.") 

121 def some_method(): 

122 pass 

123 ``` 

124 """ 

125 def wrapper(func): 

126 @wraps(func) 

127 def deprecated_func(*args, **kwargs): 

128 warnings.warn( 

129 f"'{func.__name__}' is deprecated. {message}", 

130 category=DeprecationWarning, 

131 stacklevel=stacklevel 

132 ) 

133 return func(*args, **kwargs) 

134 return deprecated_func 

135 return wrapper 

136 

137 

138def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None: 

139 """Parses a string representing a boolean value. If parsing was successful, 

140 returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`, 

141 or `None`. If parsing was not successful, raises `ValueError`, or, if 

142 `fail_on_errors=False`, returns `None`.""" 

143 if not isinstance(value, str): 

144 if preserve_none and value is None: 

145 return value 

146 return bool(value) 

147 elif preserve_none and value.lower() == 'none': 

148 return None 

149 elif value.lower() in ('true', 'yes', 'y', 'on', '1'): 

150 return True 

151 elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'): 

152 return False 

153 elif fail_on_errors: 

154 raise ValueError('Cannot parse bool value: %r' % value) 

155 

156 

157def code_escape(text: str) -> str: 

158 """HTML escape a string of code.""" 

159 if "&" in text: 

160 text = text.replace("&", "&amp;") 

161 if "<" in text: 

162 text = text.replace("<", "&lt;") 

163 if ">" in text: 

164 text = text.replace(">", "&gt;") 

165 return text 

166 

167 

168def _get_stack_depth(size: int = 2) -> int: 

169 """Get current stack depth, performantly. 

170 """ 

171 frame = sys._getframe(size) 

172 

173 for size in count(size): 

174 frame = frame.f_back 

175 if not frame: 

176 return size 

177 

178 

179def nearing_recursion_limit() -> bool: 

180 """Return true if current stack depth is within 100 of maximum limit.""" 

181 return sys.getrecursionlimit() - _get_stack_depth() < 100 

182 

183 

184# MISC AUXILIARY CLASSES 

185# ============================================================================= 

186 

187 

188class AtomicString(str): 

189 """A string which should not be further processed.""" 

190 pass 

191 

192 

193class Processor: 

194 """ The base class for all processors. 

195 

196 Attributes: 

197 Processor.md: The `Markdown` instance passed in an initialization. 

198 

199 Arguments: 

200 md: The `Markdown` instance this processor is a part of. 

201 

202 """ 

203 def __init__(self, md: Markdown | None = None): 

204 self.md = md 

205 

206 

207if TYPE_CHECKING: # pragma: no cover 

208 class TagData(TypedDict): 

209 tag: str 

210 attrs: dict[str, str] 

211 left_index: int 

212 right_index: int 

213 

214 

215class HtmlStash: 

216 """ 

217 This class is used for stashing HTML objects that we extract 

218 in the beginning and replace with place-holders. 

219 """ 

220 

221 def __init__(self): 

222 """ Create an `HtmlStash`. """ 

223 self.html_counter = 0 # for counting inline html segments 

224 self.rawHtmlBlocks: list[str | etree.Element] = [] 

225 self.tag_counter = 0 

226 self.tag_data: list[TagData] = [] # list of dictionaries in the order tags appear 

227 

228 def store(self, html: str | etree.Element) -> str: 

229 """ 

230 Saves an HTML segment for later reinsertion. Returns a 

231 placeholder string that needs to be inserted into the 

232 document. 

233 

234 Keyword arguments: 

235 html: An html segment. 

236 

237 Returns: 

238 A placeholder string. 

239 

240 """ 

241 self.rawHtmlBlocks.append(html) 

242 placeholder = self.get_placeholder(self.html_counter) 

243 self.html_counter += 1 

244 return placeholder 

245 

246 def reset(self) -> None: 

247 """ Clear the stash. """ 

248 self.html_counter = 0 

249 self.rawHtmlBlocks = [] 

250 

251 def get_placeholder(self, key: int) -> str: 

252 return HTML_PLACEHOLDER % key 

253 

254 def store_tag(self, tag: str, attrs: dict[str, str], left_index: int, right_index: int) -> str: 

255 """Store tag data and return a placeholder.""" 

256 self.tag_data.append({'tag': tag, 'attrs': attrs, 

257 'left_index': left_index, 

258 'right_index': right_index}) 

259 placeholder = TAG_PLACEHOLDER % str(self.tag_counter) 

260 self.tag_counter += 1 # equal to the tag's index in `self.tag_data` 

261 return placeholder 

262 

263 

264# Used internally by `Registry` for each item in its sorted list. 

265# Provides an easier to read API when editing the code later. 

266# For example, `item.name` is more clear than `item[0]`. 

267class _PriorityItem(NamedTuple): 

268 name: str 

269 priority: float 

270 

271 

272class Registry(Generic[_T]): 

273 """ 

274 A priority sorted registry. 

275 

276 A `Registry` instance provides two public methods to alter the data of the 

277 registry: `register` and `deregister`. Use `register` to add items and 

278 `deregister` to remove items. See each method for specifics. 

279 

280 When registering an item, a "name" and a "priority" must be provided. All 

281 items are automatically sorted by "priority" from highest to lowest. The 

282 "name" is used to remove ("deregister") and get items. 

283 

284 A `Registry` instance it like a list (which maintains order) when reading 

285 data. You may iterate over the items, get an item and get a count (length) 

286 of all items. You may also check that the registry contains an item. 

287 

288 When getting an item you may use either the index of the item or the 

289 string-based "name". For example: 

290 

291 registry = Registry() 

292 registry.register(SomeItem(), 'itemname', 20) 

293 # Get the item by index 

294 item = registry[0] 

295 # Get the item by name 

296 item = registry['itemname'] 

297 

298 When checking that the registry contains an item, you may use either the 

299 string-based "name", or a reference to the actual item. For example: 

300 

301 someitem = SomeItem() 

302 registry.register(someitem, 'itemname', 20) 

303 # Contains the name 

304 assert 'itemname' in registry 

305 # Contains the item instance 

306 assert someitem in registry 

307 

308 The method `get_index_for_name` is also available to obtain the index of 

309 an item using that item's assigned "name". 

310 """ 

311 

312 def __init__(self): 

313 self._data: dict[str, _T] = {} 

314 self._priority: list[_PriorityItem] = [] 

315 self._is_sorted = False 

316 

317 def __contains__(self, item: str | _T) -> bool: 

318 if isinstance(item, str): 

319 # Check if an item exists by this name. 

320 return item in self._data.keys() 

321 # Check if this instance exists. 

322 return item in self._data.values() 

323 

324 def __iter__(self) -> Iterator[_T]: 

325 self._sort() 

326 return iter([self._data[k] for k, p in self._priority]) 

327 

328 @overload 

329 def __getitem__(self, key: str | int) -> _T: # pragma: no cover 

330 ... 

331 

332 @overload 

333 def __getitem__(self, key: slice) -> Registry[_T]: # pragma: no cover 

334 ... 

335 

336 def __getitem__(self, key: str | int | slice) -> _T | Registry[_T]: 

337 self._sort() 

338 if isinstance(key, slice): 

339 data: Registry[_T] = Registry() 

340 for k, p in self._priority[key]: 

341 data.register(self._data[k], k, p) 

342 return data 

343 if isinstance(key, int): 

344 return self._data[self._priority[key].name] 

345 return self._data[key] 

346 

347 def __len__(self) -> int: 

348 return len(self._priority) 

349 

350 def __repr__(self): 

351 return '<{}({})>'.format(self.__class__.__name__, list(self)) 

352 

353 def get_index_for_name(self, name: str) -> int: 

354 """ 

355 Return the index of the given name. 

356 """ 

357 if name in self: 

358 self._sort() 

359 return self._priority.index( 

360 [x for x in self._priority if x.name == name][0] 

361 ) 

362 raise ValueError('No item named "{}" exists.'.format(name)) 

363 

364 def register(self, item: _T, name: str, priority: float) -> None: 

365 """ 

366 Add an item to the registry with the given name and priority. 

367 

368 Arguments: 

369 item: The item being registered. 

370 name: A string used to reference the item. 

371 priority: An integer or float used to sort against all items. 

372 

373 If an item is registered with a "name" which already exists, the 

374 existing item is replaced with the new item. Treat carefully as the 

375 old item is lost with no way to recover it. The new item will be 

376 sorted according to its priority and will **not** retain the position 

377 of the old item. 

378 """ 

379 if name in self: 

380 # Remove existing item of same name first 

381 self.deregister(name) 

382 self._is_sorted = False 

383 self._data[name] = item 

384 self._priority.append(_PriorityItem(name, priority)) 

385 

386 def deregister(self, name: str, strict: bool = True) -> None: 

387 """ 

388 Remove an item from the registry. 

389 

390 Set `strict=False` to fail silently. Otherwise a [`ValueError`][] is raised for an unknown `name`. 

391 """ 

392 try: 

393 index = self.get_index_for_name(name) 

394 del self._priority[index] 

395 del self._data[name] 

396 except ValueError: 

397 if strict: 

398 raise 

399 

400 def _sort(self) -> None: 

401 """ 

402 Sort the registry by priority from highest to lowest. 

403 

404 This method is called internally and should never be explicitly called. 

405 """ 

406 if not self._is_sorted: 

407 self._priority.sort(key=lambda item: item.priority, reverse=True) 

408 self._is_sorted = True