1# Footnotes Extension for Python-Markdown
2# =======================================
3
4# Adds footnote handling to Python-Markdown.
5
6# See https://Python-Markdown.github.io/extensions/footnotes
7# for documentation.
8
9# Copyright The Python Markdown Project
10
11# License: [BSD](https://opensource.org/licenses/bsd-license.php)
12
13"""
14Adds footnote handling to Python-Markdown.
15
16See the [documentation](https://Python-Markdown.github.io/extensions/footnotes)
17for details.
18"""
19
20from __future__ import annotations
21
22from . import Extension
23from ..blockprocessors import BlockProcessor
24from ..inlinepatterns import InlineProcessor
25from ..treeprocessors import Treeprocessor
26from ..postprocessors import Postprocessor
27from .. import util
28from collections import OrderedDict
29import re
30import copy
31import xml.etree.ElementTree as etree
32
33FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX
34NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX
35RE_REF_ID = re.compile(r'(fnref)(\d+)')
36RE_REFERENCE = re.compile(r'(?<!!)\[\^([^\]]*)\](?!\s*:)')
37
38
39class FootnoteExtension(Extension):
40 """ Footnote Extension. """
41
42 def __init__(self, **kwargs):
43 """ Setup configs. """
44
45 self.config = {
46 'PLACE_MARKER': [
47 '///Footnotes Go Here///', 'The text string that marks where the footnotes go'
48 ],
49 'UNIQUE_IDS': [
50 False, 'Avoid name collisions across multiple calls to `reset()`.'
51 ],
52 'BACKLINK_TEXT': [
53 '↩', "The text string that links from the footnote to the reader's place."
54 ],
55 'SUPERSCRIPT_TEXT': [
56 '{}', "The text string that links from the reader's place to the footnote."
57 ],
58 'BACKLINK_TITLE': [
59 'Jump back to footnote %d in the text',
60 'The text string used for the title HTML attribute of the backlink. '
61 '%d will be replaced by the footnote number.'
62 ],
63 'SEPARATOR': [
64 ':', 'Footnote separator.'
65 ],
66 'USE_DEFINITION_ORDER': [
67 False, 'Whether to order footnotes by footnote content rather than by footnote label.'
68 ]
69 }
70 """ Default configuration options. """
71 super().__init__(**kwargs)
72
73 # In multiple invocations, emit links that don't get tangled.
74 self.unique_prefix = 0
75 self.found_refs: dict[str, int] = {}
76 self.used_refs: set[str] = set()
77
78 # Backward compatibility with old '%d' placeholder
79 self.setConfig('BACKLINK_TITLE', self.getConfig("BACKLINK_TITLE").replace("%d", "{}"))
80
81 self.reset()
82
83 def extendMarkdown(self, md):
84 """ Add pieces to Markdown. """
85 md.registerExtension(self)
86 self.parser = md.parser
87 self.md = md
88 # Insert a `blockprocessor` before `ReferencePreprocessor`
89 md.parser.blockprocessors.register(FootnoteBlockProcessor(self), 'footnote', 17)
90
91 # Insert an inline pattern before `ImageReferencePattern`
92 FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
93 md.inlinePatterns.register(FootnoteInlineProcessor(FOOTNOTE_RE, self), 'footnote', 175)
94 # Insert a tree-processor that would actually add the footnote div
95 # This must be before all other tree-processors (i.e., `inline` and
96 # `codehilite`) so they can run on the the contents of the div.
97 md.treeprocessors.register(FootnoteTreeprocessor(self), 'footnote', 50)
98
99 # Insert a tree-processor to reorder the footnotes if necessary. This must be after
100 # `inline` tree-processor so it can access the footnote reference order
101 # (`self.footnote_order`) that gets populated by the `FootnoteInlineProcessor`.
102 if not self.getConfig("USE_DEFINITION_ORDER"):
103 md.treeprocessors.register(FootnoteReorderingProcessor(self), 'footnote-reorder', 19)
104
105 # Insert a tree-processor that will run after inline is done.
106 # In this tree-processor we want to check our duplicate footnote tracker
107 # And add additional `backrefs` to the footnote pointing back to the
108 # duplicated references.
109 md.treeprocessors.register(FootnotePostTreeprocessor(self), 'footnote-duplicate', 15)
110
111 # Insert a postprocessor after amp_substitute processor
112 md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25)
113
114 def reset(self) -> None:
115 """ Clear footnotes on reset, and prepare for distinct document. """
116 self.footnote_order: list[str] = []
117 self.footnotes: OrderedDict[str, str] = OrderedDict()
118 self.unique_prefix += 1
119 self.found_refs = {}
120 self.used_refs = set()
121
122 def unique_ref(self, reference: str, found: bool = False) -> str:
123 """ Get a unique reference if there are duplicates. """
124 if not found:
125 return reference
126
127 original_ref = reference
128 while reference in self.used_refs:
129 ref, rest = reference.split(self.get_separator(), 1)
130 m = RE_REF_ID.match(ref)
131 if m:
132 reference = '%s%d%s%s' % (m.group(1), int(m.group(2))+1, self.get_separator(), rest)
133 else:
134 reference = '%s%d%s%s' % (ref, 2, self.get_separator(), rest)
135
136 self.used_refs.add(reference)
137 if original_ref in self.found_refs:
138 self.found_refs[original_ref] += 1
139 else:
140 self.found_refs[original_ref] = 1
141 return reference
142
143 def findFootnotesPlaceholder(
144 self, root: etree.Element
145 ) -> tuple[etree.Element, etree.Element, bool] | None:
146 """ Return ElementTree Element that contains Footnote placeholder. """
147 def finder(element):
148 for child in element:
149 if child.text:
150 if child.text.find(self.getConfig("PLACE_MARKER")) > -1:
151 return child, element, True
152 if child.tail:
153 if child.tail.find(self.getConfig("PLACE_MARKER")) > -1:
154 return child, element, False
155 child_res = finder(child)
156 if child_res is not None:
157 return child_res
158 return None
159
160 res = finder(root)
161 return res
162
163 def setFootnote(self, id: str, text: str) -> None:
164 """ Store a footnote for later retrieval. """
165 self.footnotes[id] = text
166
167 def addFootnoteRef(self, id: str) -> None:
168 """ Store a footnote reference id in order of appearance. """
169 if id not in self.footnote_order:
170 self.footnote_order.append(id)
171
172 def get_separator(self) -> str:
173 """ Get the footnote separator. """
174 return self.getConfig("SEPARATOR")
175
176 def makeFootnoteId(self, id: str) -> str:
177 """ Return footnote link id. """
178 if self.getConfig("UNIQUE_IDS"):
179 return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id)
180 else:
181 return 'fn{}{}'.format(self.get_separator(), id)
182
183 def makeFootnoteRefId(self, id: str, found: bool = False) -> str:
184 """ Return footnote back-link id. """
185 if self.getConfig("UNIQUE_IDS"):
186 return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found)
187 else:
188 return self.unique_ref('fnref{}{}'.format(self.get_separator(), id), found)
189
190 def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None:
191 """ Return `div` of footnotes as `etree` Element. """
192
193 if not list(self.footnotes.keys()):
194 return None
195
196 div = etree.Element("div")
197 div.set('class', 'footnote')
198 etree.SubElement(div, "hr")
199 ol = etree.SubElement(div, "ol")
200 surrogate_parent = etree.Element("div")
201
202 for index, id in enumerate(self.footnotes.keys(), start=1):
203 li = etree.SubElement(ol, "li")
204 li.set("id", self.makeFootnoteId(id))
205 # Parse footnote with surrogate parent as `li` cannot be used.
206 # List block handlers have special logic to deal with `li`.
207 # When we are done parsing, we will copy everything over to `li`.
208 self.parser.parseChunk(surrogate_parent, self.footnotes[id])
209 for el in list(surrogate_parent):
210 li.append(el)
211 surrogate_parent.remove(el)
212 backlink = etree.Element("a")
213 backlink.set("href", "#" + self.makeFootnoteRefId(id))
214 backlink.set("class", "footnote-backref")
215 backlink.set(
216 "title",
217 self.getConfig('BACKLINK_TITLE').format(index)
218 )
219 backlink.text = FN_BACKLINK_TEXT
220
221 if len(li):
222 node = li[-1]
223 if node.tag == "p":
224 node.text = node.text + NBSP_PLACEHOLDER
225 node.append(backlink)
226 else:
227 p = etree.SubElement(li, "p")
228 p.append(backlink)
229 return div
230
231
232class FootnoteBlockProcessor(BlockProcessor):
233 """ Find footnote definitions and store for later use. """
234
235 RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE)
236
237 def __init__(self, footnotes: FootnoteExtension):
238 super().__init__(footnotes.parser)
239 self.footnotes = footnotes
240
241 def test(self, parent: etree.Element, block: str) -> bool:
242 return True
243
244 def run(self, parent: etree.Element, blocks: list[str]) -> bool:
245 """ Find, set, and remove footnote definitions. """
246 block = blocks.pop(0)
247
248 m = self.RE.search(block)
249 if m:
250 id = m.group(1)
251 fn_blocks = [m.group(2)]
252
253 # Handle rest of block
254 therest = block[m.end():].lstrip('\n')
255 m2 = self.RE.search(therest)
256 if m2:
257 # Another footnote exists in the rest of this block.
258 # Any content before match is continuation of this footnote, which may be lazily indented.
259 before = therest[:m2.start()].rstrip('\n')
260 fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(before)]).lstrip('\n')
261 # Add back to blocks everything from beginning of match forward for next iteration.
262 blocks.insert(0, therest[m2.start():])
263 else:
264 # All remaining lines of block are continuation of this footnote, which may be lazily indented.
265 fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(therest)]).strip('\n')
266
267 # Check for child elements in remaining blocks.
268 fn_blocks.extend(self.detectTabbed(blocks))
269
270 footnote = "\n\n".join(fn_blocks)
271 self.footnotes.setFootnote(id, footnote.rstrip())
272
273 if block[:m.start()].strip():
274 # Add any content before match back to blocks as separate block
275 blocks.insert(0, block[:m.start()].rstrip('\n'))
276 return True
277 # No match. Restore block.
278 blocks.insert(0, block)
279 return False
280
281 def detectTabbed(self, blocks: list[str]) -> list[str]:
282 """ Find indented text and remove indent before further processing.
283
284 Returns:
285 A list of blocks with indentation removed.
286 """
287 fn_blocks = []
288 while blocks:
289 if blocks[0].startswith(' '*4):
290 block = blocks.pop(0)
291 # Check for new footnotes within this block and split at new footnote.
292 m = self.RE.search(block)
293 if m:
294 # Another footnote exists in this block.
295 # Any content before match is continuation of this footnote, which may be lazily indented.
296 before = block[:m.start()].rstrip('\n')
297 fn_blocks.append(self.detab(before))
298 # Add back to blocks everything from beginning of match forward for next iteration.
299 blocks.insert(0, block[m.start():])
300 # End of this footnote.
301 break
302 else:
303 # Entire block is part of this footnote.
304 fn_blocks.append(self.detab(block))
305 else:
306 # End of this footnote.
307 break
308 return fn_blocks
309
310 def detab(self, block: str) -> str:
311 """ Remove one level of indent from a block.
312
313 Preserve lazily indented blocks by only removing indent from indented lines.
314 """
315 lines = block.split('\n')
316 for i, line in enumerate(lines):
317 if line.startswith(' '*4):
318 lines[i] = line[4:]
319 return '\n'.join(lines)
320
321
322class FootnoteInlineProcessor(InlineProcessor):
323 """ `InlineProcessor` for footnote markers in a document's body text. """
324
325 def __init__(self, pattern: str, footnotes: FootnoteExtension):
326 super().__init__(pattern)
327 self.footnotes = footnotes
328
329 def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]:
330 id = m.group(1)
331 if id in self.footnotes.footnotes.keys():
332 self.footnotes.addFootnoteRef(id)
333
334 if not self.footnotes.getConfig("USE_DEFINITION_ORDER"):
335 # Order by reference
336 footnote_num = self.footnotes.footnote_order.index(id) + 1
337 else:
338 # Order by definition
339 footnote_num = list(self.footnotes.footnotes.keys()).index(id) + 1
340
341 sup = etree.Element("sup")
342 a = etree.SubElement(sup, "a")
343 sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True))
344 a.set('href', '#' + self.footnotes.makeFootnoteId(id))
345 a.set('class', 'footnote-ref')
346 a.text = self.footnotes.getConfig("SUPERSCRIPT_TEXT").format(footnote_num)
347 return sup, m.start(0), m.end(0)
348 else:
349 return None, None, None
350
351
352class FootnotePostTreeprocessor(Treeprocessor):
353 """ Amend footnote div with duplicates. """
354
355 def __init__(self, footnotes: FootnoteExtension):
356 self.footnotes = footnotes
357
358 def add_duplicates(self, li: etree.Element, duplicates: int) -> None:
359 """ Adjust current `li` and add the duplicates: `fnref2`, `fnref3`, etc. """
360 for link in li.iter('a'):
361 # Find the link that needs to be duplicated.
362 if link.attrib.get('class', '') == 'footnote-backref':
363 ref, rest = link.attrib['href'].split(self.footnotes.get_separator(), 1)
364 # Duplicate link the number of times we need to
365 # and point the to the appropriate references.
366 links = []
367 for index in range(2, duplicates + 1):
368 sib_link = copy.deepcopy(link)
369 sib_link.attrib['href'] = '%s%d%s%s' % (ref, index, self.footnotes.get_separator(), rest)
370 links.append(sib_link)
371 self.offset += 1
372 # Add all the new duplicate links.
373 el = list(li)[-1]
374 for link in links:
375 el.append(link)
376 break
377
378 def get_num_duplicates(self, li: etree.Element) -> int:
379 """ Get the number of duplicate refs of the footnote. """
380 fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1)
381 link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest)
382 return self.footnotes.found_refs.get(link_id, 0)
383
384 def handle_duplicates(self, parent: etree.Element) -> None:
385 """ Find duplicate footnotes and format and add the duplicates. """
386 for li in list(parent):
387 # Check number of duplicates footnotes and insert
388 # additional links if needed.
389 count = self.get_num_duplicates(li)
390 if count > 1:
391 self.add_duplicates(li, count)
392
393 def run(self, root: etree.Element) -> None:
394 """ Crawl the footnote div and add missing duplicate footnotes. """
395 self.offset = 0
396 for div in root.iter('div'):
397 if div.attrib.get('class', '') == 'footnote':
398 # Footnotes should be under the first ordered list under
399 # the footnote div. So once we find it, quit.
400 for ol in div.iter('ol'):
401 self.handle_duplicates(ol)
402 break
403
404
405class FootnoteTreeprocessor(Treeprocessor):
406 """ Build and append footnote div to end of document. """
407
408 def __init__(self, footnotes: FootnoteExtension):
409 self.footnotes = footnotes
410
411 def run(self, root: etree.Element) -> None:
412 footnotesDiv = self.footnotes.makeFootnotesDiv(root)
413 if footnotesDiv is not None:
414 result = self.footnotes.findFootnotesPlaceholder(root)
415 if result:
416 child, parent, isText = result
417 ind = list(parent).index(child)
418 if isText:
419 parent.remove(child)
420 parent.insert(ind, footnotesDiv)
421 else:
422 parent.insert(ind + 1, footnotesDiv)
423 child.tail = None
424 else:
425 root.append(footnotesDiv)
426
427
428class FootnoteReorderingProcessor(Treeprocessor):
429 """ Reorder list items in the footnotes div. """
430
431 def __init__(self, footnotes: FootnoteExtension):
432 self.footnotes = footnotes
433
434 def run(self, root: etree.Element) -> None:
435 if not self.footnotes.footnotes:
436 return
437 if self.footnotes.footnote_order != list(self.footnotes.footnotes.keys()):
438 for div in root.iter('div'):
439 if div.attrib.get('class', '') == 'footnote':
440 self.reorder_footnotes(div)
441 break
442
443 def reorder_footnotes(self, parent: etree.Element) -> None:
444 old_list = parent.find('ol')
445 parent.remove(old_list)
446 items = old_list.findall('li')
447
448 def order_by_id(li) -> int:
449 id = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1)[-1]
450 return (
451 self.footnotes.footnote_order.index(id)
452 if id in self.footnotes.footnote_order
453 else len(self.footnotes.footnotes)
454 )
455
456 items = sorted(items, key=order_by_id)
457
458 new_list = etree.SubElement(parent, 'ol')
459
460 for index, item in enumerate(items, start=1):
461 backlink = item.find('.//a[@class="footnote-backref"]')
462 backlink.set("title", self.footnotes.getConfig("BACKLINK_TITLE").format(index))
463 new_list.append(item)
464
465
466class FootnotePostprocessor(Postprocessor):
467 """ Replace placeholders with html entities. """
468 def __init__(self, footnotes: FootnoteExtension):
469 self.footnotes = footnotes
470
471 def run(self, text: str) -> str:
472 text = text.replace(
473 FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT")
474 )
475 return text.replace(NBSP_PLACEHOLDER, " ")
476
477
478def makeExtension(**kwargs): # pragma: no cover
479 """ Return an instance of the `FootnoteExtension` """
480 return FootnoteExtension(**kwargs)