1# Footnotes Extension for Python-Markdown
2# =======================================
3
4# Adds footnote handling to Python-Markdown.
5
6# See https://Python-Markdown.github.io/extensions/footnotes
7# for documentation.
8
9# Copyright The Python Markdown Project
10
11# License: [BSD](https://opensource.org/licenses/bsd-license.php)
12
13"""
14Adds footnote handling to Python-Markdown.
15
16See the [documentation](https://Python-Markdown.github.io/extensions/footnotes)
17for details.
18"""
19
20from __future__ import annotations
21
22from . import Extension
23from ..blockprocessors import BlockProcessor
24from ..inlinepatterns import InlineProcessor
25from ..treeprocessors import Treeprocessor
26from ..postprocessors import Postprocessor
27from .. import util
28from collections import OrderedDict
29import re
30import copy
31import xml.etree.ElementTree as etree
32
33FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX
34NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX
35RE_REF_ID = re.compile(r'(fnref)(\d+)')
36RE_REFERENCE = re.compile(r'(?<!!)\[\^([^\]]*)\](?!\s*:)')
37
38
39class FootnoteExtension(Extension):
40 """ Footnote Extension. """
41
42 def __init__(self, **kwargs):
43 """ Setup configs. """
44
45 self.config = {
46 'PLACE_MARKER': [
47 '///Footnotes Go Here///', 'The text string that marks where the footnotes go'
48 ],
49 'UNIQUE_IDS': [
50 False, 'Avoid name collisions across multiple calls to `reset()`.'
51 ],
52 'BACKLINK_TEXT': [
53 '↩', "The text string that links from the footnote to the reader's place."
54 ],
55 'SUPERSCRIPT_TEXT': [
56 '{}', "The text string that links from the reader's place to the footnote."
57 ],
58 'BACKLINK_TITLE': [
59 'Jump back to footnote %d in the text',
60 'The text string used for the title HTML attribute of the backlink. '
61 '%d will be replaced by the footnote number.'
62 ],
63 'SEPARATOR': [
64 ':', 'Footnote separator.'
65 ],
66 'USE_DEFINITION_ORDER': [
67 True,
68 'Order footnote labels by definition order (True) or by document order (False). '
69 'Default: True.'
70 ]
71 }
72 """ Default configuration options. """
73 super().__init__(**kwargs)
74
75 # In multiple invocations, emit links that don't get tangled.
76 self.unique_prefix = 0
77 self.found_refs: dict[str, int] = {}
78 self.used_refs: set[str] = set()
79
80 # Backward compatibility with old '%d' placeholder
81 self.setConfig('BACKLINK_TITLE', self.getConfig("BACKLINK_TITLE").replace("%d", "{}"))
82
83 self.reset()
84
85 def extendMarkdown(self, md):
86 """ Add pieces to Markdown. """
87 md.registerExtension(self)
88 self.parser = md.parser
89 self.md = md
90 # Insert a `blockprocessor` before `ReferencePreprocessor`
91 md.parser.blockprocessors.register(FootnoteBlockProcessor(self), 'footnote', 17)
92
93 # Insert an inline pattern before `ImageReferencePattern`
94 FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
95 md.inlinePatterns.register(FootnoteInlineProcessor(FOOTNOTE_RE, self), 'footnote', 175)
96 # Insert a tree-processor that would actually add the footnote div
97 # This must be before all other tree-processors (i.e., `inline` and
98 # `codehilite`) so they can run on the the contents of the div.
99 md.treeprocessors.register(FootnoteTreeprocessor(self), 'footnote', 50)
100
101 # Insert a tree-processor to reorder the footnotes if necessary. This must be after
102 # `inline` tree-processor so it can access the footnote reference order
103 # (`self.footnote_order`) that gets populated by the `FootnoteInlineProcessor`.
104 if not self.getConfig("USE_DEFINITION_ORDER"):
105 md.treeprocessors.register(FootnoteReorderingProcessor(self), 'footnote-reorder', 19)
106
107 # Insert a tree-processor that will run after inline is done.
108 # In this tree-processor we want to check our duplicate footnote tracker
109 # And add additional `backrefs` to the footnote pointing back to the
110 # duplicated references.
111 md.treeprocessors.register(FootnotePostTreeprocessor(self), 'footnote-duplicate', 15)
112
113 # Insert a postprocessor after amp_substitute processor
114 md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25)
115
116 def reset(self) -> None:
117 """ Clear footnotes on reset, and prepare for distinct document. """
118 self.footnote_order: list[str] = []
119 self.footnotes: OrderedDict[str, str] = OrderedDict()
120 self.unique_prefix += 1
121 self.found_refs = {}
122 self.used_refs = set()
123
124 def unique_ref(self, reference: str, found: bool = False) -> str:
125 """ Get a unique reference if there are duplicates. """
126 if not found:
127 return reference
128
129 original_ref = reference
130 while reference in self.used_refs:
131 ref, rest = reference.split(self.get_separator(), 1)
132 m = RE_REF_ID.match(ref)
133 if m:
134 reference = '%s%d%s%s' % (m.group(1), int(m.group(2))+1, self.get_separator(), rest)
135 else:
136 reference = '%s%d%s%s' % (ref, 2, self.get_separator(), rest)
137
138 self.used_refs.add(reference)
139 if original_ref in self.found_refs:
140 self.found_refs[original_ref] += 1
141 else:
142 self.found_refs[original_ref] = 1
143 return reference
144
145 def findFootnotesPlaceholder(
146 self, root: etree.Element
147 ) -> tuple[etree.Element, etree.Element, bool] | None:
148 """ Return ElementTree Element that contains Footnote placeholder. """
149 def finder(element):
150 for child in element:
151 if child.text:
152 if child.text.find(self.getConfig("PLACE_MARKER")) > -1:
153 return child, element, True
154 if child.tail:
155 if child.tail.find(self.getConfig("PLACE_MARKER")) > -1:
156 return child, element, False
157 child_res = finder(child)
158 if child_res is not None:
159 return child_res
160 return None
161
162 res = finder(root)
163 return res
164
165 def setFootnote(self, id: str, text: str) -> None:
166 """ Store a footnote for later retrieval. """
167 self.footnotes[id] = text
168
169 def addFootnoteRef(self, id: str) -> None:
170 """ Store a footnote reference id in order of appearance. """
171 if id not in self.footnote_order:
172 self.footnote_order.append(id)
173
174 def get_separator(self) -> str:
175 """ Get the footnote separator. """
176 return self.getConfig("SEPARATOR")
177
178 def makeFootnoteId(self, id: str) -> str:
179 """ Return footnote link id. """
180 if self.getConfig("UNIQUE_IDS"):
181 return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id)
182 else:
183 return 'fn{}{}'.format(self.get_separator(), id)
184
185 def makeFootnoteRefId(self, id: str, found: bool = False) -> str:
186 """ Return footnote back-link id. """
187 if self.getConfig("UNIQUE_IDS"):
188 return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found)
189 else:
190 return self.unique_ref('fnref{}{}'.format(self.get_separator(), id), found)
191
192 def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None:
193 """ Return `div` of footnotes as `etree` Element. """
194
195 if not list(self.footnotes.keys()):
196 return None
197
198 div = etree.Element("div")
199 div.set('class', 'footnote')
200 etree.SubElement(div, "hr")
201 ol = etree.SubElement(div, "ol")
202 surrogate_parent = etree.Element("div")
203
204 for index, id in enumerate(self.footnotes.keys(), start=1):
205 li = etree.SubElement(ol, "li")
206 li.set("id", self.makeFootnoteId(id))
207 # Parse footnote with surrogate parent as `li` cannot be used.
208 # List block handlers have special logic to deal with `li`.
209 # When we are done parsing, we will copy everything over to `li`.
210 self.parser.parseChunk(surrogate_parent, self.footnotes[id])
211 for el in list(surrogate_parent):
212 li.append(el)
213 surrogate_parent.remove(el)
214 backlink = etree.Element("a")
215 backlink.set("href", "#" + self.makeFootnoteRefId(id))
216 backlink.set("class", "footnote-backref")
217 backlink.set(
218 "title",
219 self.getConfig('BACKLINK_TITLE').format(index)
220 )
221 backlink.text = FN_BACKLINK_TEXT
222
223 if len(li):
224 node = li[-1]
225 if node.tag == "p":
226 node.text = node.text + NBSP_PLACEHOLDER
227 node.append(backlink)
228 else:
229 p = etree.SubElement(li, "p")
230 p.append(backlink)
231 return div
232
233
234class FootnoteBlockProcessor(BlockProcessor):
235 """ Find footnote definitions and store for later use. """
236
237 RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE)
238
239 def __init__(self, footnotes: FootnoteExtension):
240 super().__init__(footnotes.parser)
241 self.footnotes = footnotes
242
243 def test(self, parent: etree.Element, block: str) -> bool:
244 return True
245
246 def run(self, parent: etree.Element, blocks: list[str]) -> bool:
247 """ Find, set, and remove footnote definitions. """
248 block = blocks.pop(0)
249
250 m = self.RE.search(block)
251 if m:
252 id = m.group(1)
253 fn_blocks = [m.group(2)]
254
255 # Handle rest of block
256 therest = block[m.end():].lstrip('\n')
257 m2 = self.RE.search(therest)
258 if m2:
259 # Another footnote exists in the rest of this block.
260 # Any content before match is continuation of this footnote, which may be lazily indented.
261 before = therest[:m2.start()].rstrip('\n')
262 fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(before)]).lstrip('\n')
263 # Add back to blocks everything from beginning of match forward for next iteration.
264 blocks.insert(0, therest[m2.start():])
265 else:
266 # All remaining lines of block are continuation of this footnote, which may be lazily indented.
267 fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(therest)]).strip('\n')
268
269 # Check for child elements in remaining blocks.
270 fn_blocks.extend(self.detectTabbed(blocks))
271
272 footnote = "\n\n".join(fn_blocks)
273 self.footnotes.setFootnote(id, footnote.rstrip())
274
275 if block[:m.start()].strip():
276 # Add any content before match back to blocks as separate block
277 blocks.insert(0, block[:m.start()].rstrip('\n'))
278 return True
279 # No match. Restore block.
280 blocks.insert(0, block)
281 return False
282
283 def detectTabbed(self, blocks: list[str]) -> list[str]:
284 """ Find indented text and remove indent before further processing.
285
286 Returns:
287 A list of blocks with indentation removed.
288 """
289 fn_blocks = []
290 while blocks:
291 if blocks[0].startswith(' '*4):
292 block = blocks.pop(0)
293 # Check for new footnotes within this block and split at new footnote.
294 m = self.RE.search(block)
295 if m:
296 # Another footnote exists in this block.
297 # Any content before match is continuation of this footnote, which may be lazily indented.
298 before = block[:m.start()].rstrip('\n')
299 fn_blocks.append(self.detab(before))
300 # Add back to blocks everything from beginning of match forward for next iteration.
301 blocks.insert(0, block[m.start():])
302 # End of this footnote.
303 break
304 else:
305 # Entire block is part of this footnote.
306 fn_blocks.append(self.detab(block))
307 else:
308 # End of this footnote.
309 break
310 return fn_blocks
311
312 def detab(self, block: str) -> str:
313 """ Remove one level of indent from a block.
314
315 Preserve lazily indented blocks by only removing indent from indented lines.
316 """
317 lines = block.split('\n')
318 for i, line in enumerate(lines):
319 if line.startswith(' '*4):
320 lines[i] = line[4:]
321 return '\n'.join(lines)
322
323
324class FootnoteInlineProcessor(InlineProcessor):
325 """ `InlineProcessor` for footnote markers in a document's body text. """
326
327 def __init__(self, pattern: str, footnotes: FootnoteExtension):
328 super().__init__(pattern)
329 self.footnotes = footnotes
330
331 def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]:
332 id = m.group(1)
333 if id in self.footnotes.footnotes.keys():
334 self.footnotes.addFootnoteRef(id)
335
336 if not self.footnotes.getConfig("USE_DEFINITION_ORDER"):
337 # Order by reference
338 footnote_num = self.footnotes.footnote_order.index(id) + 1
339 else:
340 # Order by definition
341 footnote_num = list(self.footnotes.footnotes.keys()).index(id) + 1
342
343 sup = etree.Element("sup")
344 a = etree.SubElement(sup, "a")
345 sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True))
346 a.set('href', '#' + self.footnotes.makeFootnoteId(id))
347 a.set('class', 'footnote-ref')
348 a.text = self.footnotes.getConfig("SUPERSCRIPT_TEXT").format(footnote_num)
349 return sup, m.start(0), m.end(0)
350 else:
351 return None, None, None
352
353
354class FootnotePostTreeprocessor(Treeprocessor):
355 """ Amend footnote div with duplicates. """
356
357 def __init__(self, footnotes: FootnoteExtension):
358 self.footnotes = footnotes
359
360 def add_duplicates(self, li: etree.Element, duplicates: int) -> None:
361 """ Adjust current `li` and add the duplicates: `fnref2`, `fnref3`, etc. """
362 for link in li.iter('a'):
363 # Find the link that needs to be duplicated.
364 if link.attrib.get('class', '') == 'footnote-backref':
365 ref, rest = link.attrib['href'].split(self.footnotes.get_separator(), 1)
366 # Duplicate link the number of times we need to
367 # and point the to the appropriate references.
368 links = []
369 for index in range(2, duplicates + 1):
370 sib_link = copy.deepcopy(link)
371 sib_link.attrib['href'] = '%s%d%s%s' % (ref, index, self.footnotes.get_separator(), rest)
372 links.append(sib_link)
373 self.offset += 1
374 # Add all the new duplicate links.
375 el = list(li)[-1]
376 for link in links:
377 el.append(link)
378 break
379
380 def get_num_duplicates(self, li: etree.Element) -> int:
381 """ Get the number of duplicate refs of the footnote. """
382 fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1)
383 link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest)
384 return self.footnotes.found_refs.get(link_id, 0)
385
386 def handle_duplicates(self, parent: etree.Element) -> None:
387 """ Find duplicate footnotes and format and add the duplicates. """
388 for li in list(parent):
389 # Check number of duplicates footnotes and insert
390 # additional links if needed.
391 count = self.get_num_duplicates(li)
392 if count > 1:
393 self.add_duplicates(li, count)
394
395 def run(self, root: etree.Element) -> None:
396 """ Crawl the footnote div and add missing duplicate footnotes. """
397 self.offset = 0
398 for div in root.iter('div'):
399 if div.attrib.get('class', '') == 'footnote':
400 # Footnotes should be under the first ordered list under
401 # the footnote div. So once we find it, quit.
402 for ol in div.iter('ol'):
403 self.handle_duplicates(ol)
404 break
405
406
407class FootnoteTreeprocessor(Treeprocessor):
408 """ Build and append footnote div to end of document. """
409
410 def __init__(self, footnotes: FootnoteExtension):
411 self.footnotes = footnotes
412
413 def run(self, root: etree.Element) -> None:
414 footnotesDiv = self.footnotes.makeFootnotesDiv(root)
415 if footnotesDiv is not None:
416 result = self.footnotes.findFootnotesPlaceholder(root)
417 if result:
418 child, parent, isText = result
419 ind = list(parent).index(child)
420 if isText:
421 parent.remove(child)
422 parent.insert(ind, footnotesDiv)
423 else:
424 parent.insert(ind + 1, footnotesDiv)
425 child.tail = None
426 else:
427 root.append(footnotesDiv)
428
429
430class FootnoteReorderingProcessor(Treeprocessor):
431 """ Reorder list items in the footnotes div. """
432
433 def __init__(self, footnotes: FootnoteExtension):
434 self.footnotes = footnotes
435
436 def run(self, root: etree.Element) -> None:
437 if not self.footnotes.footnotes:
438 return
439 if self.footnotes.footnote_order != list(self.footnotes.footnotes.keys()):
440 for div in root.iter('div'):
441 if div.attrib.get('class', '') == 'footnote':
442 self.reorder_footnotes(div)
443 break
444
445 def reorder_footnotes(self, parent: etree.Element) -> None:
446 old_list = parent.find('ol')
447 parent.remove(old_list)
448 items = old_list.findall('li')
449
450 def order_by_id(li) -> int:
451 id = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1)[-1]
452 return (
453 self.footnotes.footnote_order.index(id)
454 if id in self.footnotes.footnote_order
455 else len(self.footnotes.footnotes)
456 )
457
458 items = sorted(items, key=order_by_id)
459
460 new_list = etree.SubElement(parent, 'ol')
461
462 for index, item in enumerate(items, start=1):
463 backlink = item.find('.//a[@class="footnote-backref"]')
464 backlink.set("title", self.footnotes.getConfig("BACKLINK_TITLE").format(index))
465 new_list.append(item)
466
467
468class FootnotePostprocessor(Postprocessor):
469 """ Replace placeholders with html entities. """
470 def __init__(self, footnotes: FootnoteExtension):
471 self.footnotes = footnotes
472
473 def run(self, text: str) -> str:
474 text = text.replace(
475 FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT")
476 )
477 return text.replace(NBSP_PLACEHOLDER, " ")
478
479
480def makeExtension(**kwargs): # pragma: no cover
481 """ Return an instance of the `FootnoteExtension` """
482 return FootnoteExtension(**kwargs)