Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/markdown/extensions/footnotes.py: 92%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

229 statements  

1# Footnotes Extension for Python-Markdown 

2# ======================================= 

3 

4# Adds footnote handling to Python-Markdown. 

5 

6# See https://Python-Markdown.github.io/extensions/footnotes 

7# for documentation. 

8 

9# Copyright The Python Markdown Project 

10 

11# License: [BSD](https://opensource.org/licenses/bsd-license.php) 

12 

13""" 

14Adds footnote handling to Python-Markdown. 

15 

16See the [documentation](https://Python-Markdown.github.io/extensions/footnotes) 

17for details. 

18""" 

19 

20from __future__ import annotations 

21 

22from . import Extension 

23from ..blockprocessors import BlockProcessor 

24from ..inlinepatterns import InlineProcessor 

25from ..treeprocessors import Treeprocessor 

26from ..postprocessors import Postprocessor 

27from .. import util 

28from collections import OrderedDict 

29import re 

30import copy 

31import xml.etree.ElementTree as etree 

32 

33FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX 

34NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX 

35RE_REF_ID = re.compile(r'(fnref)(\d+)') 

36 

37 

38class FootnoteExtension(Extension): 

39 """ Footnote Extension. """ 

40 

41 def __init__(self, **kwargs): 

42 """ Setup configs. """ 

43 

44 self.config = { 

45 'PLACE_MARKER': [ 

46 '///Footnotes Go Here///', 'The text string that marks where the footnotes go' 

47 ], 

48 'UNIQUE_IDS': [ 

49 False, 'Avoid name collisions across multiple calls to `reset()`.' 

50 ], 

51 'BACKLINK_TEXT': [ 

52 '↩', "The text string that links from the footnote to the reader's place." 

53 ], 

54 'SUPERSCRIPT_TEXT': [ 

55 '{}', "The text string that links from the reader's place to the footnote." 

56 ], 

57 'BACKLINK_TITLE': [ 

58 'Jump back to footnote %d in the text', 

59 'The text string used for the title HTML attribute of the backlink. ' 

60 '%d will be replaced by the footnote number.' 

61 ], 

62 'SEPARATOR': [ 

63 ':', 'Footnote separator.' 

64 ] 

65 } 

66 """ Default configuration options. """ 

67 super().__init__(**kwargs) 

68 

69 # In multiple invocations, emit links that don't get tangled. 

70 self.unique_prefix = 0 

71 self.found_refs: dict[str, int] = {} 

72 self.used_refs: set[str] = set() 

73 

74 self.reset() 

75 

76 def extendMarkdown(self, md): 

77 """ Add pieces to Markdown. """ 

78 md.registerExtension(self) 

79 self.parser = md.parser 

80 self.md = md 

81 # Insert a `blockprocessor` before `ReferencePreprocessor` 

82 md.parser.blockprocessors.register(FootnoteBlockProcessor(self), 'footnote', 17) 

83 

84 # Insert an inline pattern before `ImageReferencePattern` 

85 FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah 

86 md.inlinePatterns.register(FootnoteInlineProcessor(FOOTNOTE_RE, self), 'footnote', 175) 

87 # Insert a tree-processor that would actually add the footnote div 

88 # This must be before all other tree-processors (i.e., `inline` and 

89 # `codehilite`) so they can run on the the contents of the div. 

90 md.treeprocessors.register(FootnoteTreeprocessor(self), 'footnote', 50) 

91 

92 # Insert a tree-processor that will run after inline is done. 

93 # In this tree-processor we want to check our duplicate footnote tracker 

94 # And add additional `backrefs` to the footnote pointing back to the 

95 # duplicated references. 

96 md.treeprocessors.register(FootnotePostTreeprocessor(self), 'footnote-duplicate', 15) 

97 

98 # Insert a postprocessor after amp_substitute processor 

99 md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25) 

100 

101 def reset(self) -> None: 

102 """ Clear footnotes on reset, and prepare for distinct document. """ 

103 self.footnotes: OrderedDict[str, str] = OrderedDict() 

104 self.unique_prefix += 1 

105 self.found_refs = {} 

106 self.used_refs = set() 

107 

108 def unique_ref(self, reference: str, found: bool = False) -> str: 

109 """ Get a unique reference if there are duplicates. """ 

110 if not found: 

111 return reference 

112 

113 original_ref = reference 

114 while reference in self.used_refs: 

115 ref, rest = reference.split(self.get_separator(), 1) 

116 m = RE_REF_ID.match(ref) 

117 if m: 

118 reference = '%s%d%s%s' % (m.group(1), int(m.group(2))+1, self.get_separator(), rest) 

119 else: 

120 reference = '%s%d%s%s' % (ref, 2, self.get_separator(), rest) 

121 

122 self.used_refs.add(reference) 

123 if original_ref in self.found_refs: 

124 self.found_refs[original_ref] += 1 

125 else: 

126 self.found_refs[original_ref] = 1 

127 return reference 

128 

129 def findFootnotesPlaceholder( 

130 self, root: etree.Element 

131 ) -> tuple[etree.Element, etree.Element, bool] | None: 

132 """ Return ElementTree Element that contains Footnote placeholder. """ 

133 def finder(element): 

134 for child in element: 

135 if child.text: 

136 if child.text.find(self.getConfig("PLACE_MARKER")) > -1: 

137 return child, element, True 

138 if child.tail: 

139 if child.tail.find(self.getConfig("PLACE_MARKER")) > -1: 

140 return child, element, False 

141 child_res = finder(child) 

142 if child_res is not None: 

143 return child_res 

144 return None 

145 

146 res = finder(root) 

147 return res 

148 

149 def setFootnote(self, id: str, text: str) -> None: 

150 """ Store a footnote for later retrieval. """ 

151 self.footnotes[id] = text 

152 

153 def get_separator(self) -> str: 

154 """ Get the footnote separator. """ 

155 return self.getConfig("SEPARATOR") 

156 

157 def makeFootnoteId(self, id: str) -> str: 

158 """ Return footnote link id. """ 

159 if self.getConfig("UNIQUE_IDS"): 

160 return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id) 

161 else: 

162 return 'fn{}{}'.format(self.get_separator(), id) 

163 

164 def makeFootnoteRefId(self, id: str, found: bool = False) -> str: 

165 """ Return footnote back-link id. """ 

166 if self.getConfig("UNIQUE_IDS"): 

167 return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found) 

168 else: 

169 return self.unique_ref('fnref{}{}'.format(self.get_separator(), id), found) 

170 

171 def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None: 

172 """ Return `div` of footnotes as `etree` Element. """ 

173 

174 if not list(self.footnotes.keys()): 

175 return None 

176 

177 div = etree.Element("div") 

178 div.set('class', 'footnote') 

179 etree.SubElement(div, "hr") 

180 ol = etree.SubElement(div, "ol") 

181 surrogate_parent = etree.Element("div") 

182 

183 # Backward compatibility with old '%d' placeholder 

184 backlink_title = self.getConfig("BACKLINK_TITLE").replace("%d", "{}") 

185 

186 for index, id in enumerate(self.footnotes.keys(), start=1): 

187 li = etree.SubElement(ol, "li") 

188 li.set("id", self.makeFootnoteId(id)) 

189 # Parse footnote with surrogate parent as `li` cannot be used. 

190 # List block handlers have special logic to deal with `li`. 

191 # When we are done parsing, we will copy everything over to `li`. 

192 self.parser.parseChunk(surrogate_parent, self.footnotes[id]) 

193 for el in list(surrogate_parent): 

194 li.append(el) 

195 surrogate_parent.remove(el) 

196 backlink = etree.Element("a") 

197 backlink.set("href", "#" + self.makeFootnoteRefId(id)) 

198 backlink.set("class", "footnote-backref") 

199 backlink.set( 

200 "title", 

201 backlink_title.format(index) 

202 ) 

203 backlink.text = FN_BACKLINK_TEXT 

204 

205 if len(li): 

206 node = li[-1] 

207 if node.tag == "p": 

208 node.text = node.text + NBSP_PLACEHOLDER 

209 node.append(backlink) 

210 else: 

211 p = etree.SubElement(li, "p") 

212 p.append(backlink) 

213 return div 

214 

215 

216class FootnoteBlockProcessor(BlockProcessor): 

217 """ Find all footnote references and store for later use. """ 

218 

219 RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE) 

220 

221 def __init__(self, footnotes: FootnoteExtension): 

222 super().__init__(footnotes.parser) 

223 self.footnotes = footnotes 

224 

225 def test(self, parent: etree.Element, block: str) -> bool: 

226 return True 

227 

228 def run(self, parent: etree.Element, blocks: list[str]) -> bool: 

229 """ Find, set, and remove footnote definitions. """ 

230 block = blocks.pop(0) 

231 m = self.RE.search(block) 

232 if m: 

233 id = m.group(1) 

234 fn_blocks = [m.group(2)] 

235 

236 # Handle rest of block 

237 therest = block[m.end():].lstrip('\n') 

238 m2 = self.RE.search(therest) 

239 if m2: 

240 # Another footnote exists in the rest of this block. 

241 # Any content before match is continuation of this footnote, which may be lazily indented. 

242 before = therest[:m2.start()].rstrip('\n') 

243 fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(before)]).lstrip('\n') 

244 # Add back to blocks everything from beginning of match forward for next iteration. 

245 blocks.insert(0, therest[m2.start():]) 

246 else: 

247 # All remaining lines of block are continuation of this footnote, which may be lazily indented. 

248 fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(therest)]).strip('\n') 

249 

250 # Check for child elements in remaining blocks. 

251 fn_blocks.extend(self.detectTabbed(blocks)) 

252 

253 footnote = "\n\n".join(fn_blocks) 

254 self.footnotes.setFootnote(id, footnote.rstrip()) 

255 

256 if block[:m.start()].strip(): 

257 # Add any content before match back to blocks as separate block 

258 blocks.insert(0, block[:m.start()].rstrip('\n')) 

259 return True 

260 # No match. Restore block. 

261 blocks.insert(0, block) 

262 return False 

263 

264 def detectTabbed(self, blocks: list[str]) -> list[str]: 

265 """ Find indented text and remove indent before further processing. 

266 

267 Returns: 

268 A list of blocks with indentation removed. 

269 """ 

270 fn_blocks = [] 

271 while blocks: 

272 if blocks[0].startswith(' '*4): 

273 block = blocks.pop(0) 

274 # Check for new footnotes within this block and split at new footnote. 

275 m = self.RE.search(block) 

276 if m: 

277 # Another footnote exists in this block. 

278 # Any content before match is continuation of this footnote, which may be lazily indented. 

279 before = block[:m.start()].rstrip('\n') 

280 fn_blocks.append(self.detab(before)) 

281 # Add back to blocks everything from beginning of match forward for next iteration. 

282 blocks.insert(0, block[m.start():]) 

283 # End of this footnote. 

284 break 

285 else: 

286 # Entire block is part of this footnote. 

287 fn_blocks.append(self.detab(block)) 

288 else: 

289 # End of this footnote. 

290 break 

291 return fn_blocks 

292 

293 def detab(self, block: str) -> str: 

294 """ Remove one level of indent from a block. 

295 

296 Preserve lazily indented blocks by only removing indent from indented lines. 

297 """ 

298 lines = block.split('\n') 

299 for i, line in enumerate(lines): 

300 if line.startswith(' '*4): 

301 lines[i] = line[4:] 

302 return '\n'.join(lines) 

303 

304 

305class FootnoteInlineProcessor(InlineProcessor): 

306 """ `InlineProcessor` for footnote markers in a document's body text. """ 

307 

308 def __init__(self, pattern: str, footnotes: FootnoteExtension): 

309 super().__init__(pattern) 

310 self.footnotes = footnotes 

311 

312 def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]: 

313 id = m.group(1) 

314 if id in self.footnotes.footnotes.keys(): 

315 sup = etree.Element("sup") 

316 a = etree.SubElement(sup, "a") 

317 sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True)) 

318 a.set('href', '#' + self.footnotes.makeFootnoteId(id)) 

319 a.set('class', 'footnote-ref') 

320 a.text = self.footnotes.getConfig("SUPERSCRIPT_TEXT").format( 

321 list(self.footnotes.footnotes.keys()).index(id) + 1 

322 ) 

323 return sup, m.start(0), m.end(0) 

324 else: 

325 return None, None, None 

326 

327 

328class FootnotePostTreeprocessor(Treeprocessor): 

329 """ Amend footnote div with duplicates. """ 

330 

331 def __init__(self, footnotes: FootnoteExtension): 

332 self.footnotes = footnotes 

333 

334 def add_duplicates(self, li: etree.Element, duplicates: int) -> None: 

335 """ Adjust current `li` and add the duplicates: `fnref2`, `fnref3`, etc. """ 

336 for link in li.iter('a'): 

337 # Find the link that needs to be duplicated. 

338 if link.attrib.get('class', '') == 'footnote-backref': 

339 ref, rest = link.attrib['href'].split(self.footnotes.get_separator(), 1) 

340 # Duplicate link the number of times we need to 

341 # and point the to the appropriate references. 

342 links = [] 

343 for index in range(2, duplicates + 1): 

344 sib_link = copy.deepcopy(link) 

345 sib_link.attrib['href'] = '%s%d%s%s' % (ref, index, self.footnotes.get_separator(), rest) 

346 links.append(sib_link) 

347 self.offset += 1 

348 # Add all the new duplicate links. 

349 el = list(li)[-1] 

350 for link in links: 

351 el.append(link) 

352 break 

353 

354 def get_num_duplicates(self, li: etree.Element) -> int: 

355 """ Get the number of duplicate refs of the footnote. """ 

356 fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1) 

357 link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest) 

358 return self.footnotes.found_refs.get(link_id, 0) 

359 

360 def handle_duplicates(self, parent: etree.Element) -> None: 

361 """ Find duplicate footnotes and format and add the duplicates. """ 

362 for li in list(parent): 

363 # Check number of duplicates footnotes and insert 

364 # additional links if needed. 

365 count = self.get_num_duplicates(li) 

366 if count > 1: 

367 self.add_duplicates(li, count) 

368 

369 def run(self, root: etree.Element) -> None: 

370 """ Crawl the footnote div and add missing duplicate footnotes. """ 

371 self.offset = 0 

372 for div in root.iter('div'): 

373 if div.attrib.get('class', '') == 'footnote': 

374 # Footnotes should be under the first ordered list under 

375 # the footnote div. So once we find it, quit. 

376 for ol in div.iter('ol'): 

377 self.handle_duplicates(ol) 

378 break 

379 

380 

381class FootnoteTreeprocessor(Treeprocessor): 

382 """ Build and append footnote div to end of document. """ 

383 

384 def __init__(self, footnotes: FootnoteExtension): 

385 self.footnotes = footnotes 

386 

387 def run(self, root: etree.Element) -> None: 

388 footnotesDiv = self.footnotes.makeFootnotesDiv(root) 

389 if footnotesDiv is not None: 

390 result = self.footnotes.findFootnotesPlaceholder(root) 

391 if result: 

392 child, parent, isText = result 

393 ind = list(parent).index(child) 

394 if isText: 

395 parent.remove(child) 

396 parent.insert(ind, footnotesDiv) 

397 else: 

398 parent.insert(ind + 1, footnotesDiv) 

399 child.tail = None 

400 else: 

401 root.append(footnotesDiv) 

402 

403 

404class FootnotePostprocessor(Postprocessor): 

405 """ Replace placeholders with html entities. """ 

406 def __init__(self, footnotes: FootnoteExtension): 

407 self.footnotes = footnotes 

408 

409 def run(self, text: str) -> str: 

410 text = text.replace( 

411 FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT") 

412 ) 

413 return text.replace(NBSP_PLACEHOLDER, " ") 

414 

415 

416def makeExtension(**kwargs): # pragma: no cover 

417 """ Return an instance of the `FootnoteExtension` """ 

418 return FootnoteExtension(**kwargs)