1# Footnotes Extension for Python-Markdown
2# =======================================
3
4# Adds footnote handling to Python-Markdown.
5
6# See https://Python-Markdown.github.io/extensions/footnotes
7# for documentation.
8
9# Copyright The Python Markdown Project
10
11# License: [BSD](https://opensource.org/licenses/bsd-license.php)
12
13"""
14Adds footnote handling to Python-Markdown.
15
16See the [documentation](https://Python-Markdown.github.io/extensions/footnotes)
17for details.
18"""
19
20from __future__ import annotations
21
22from . import Extension
23from ..blockprocessors import BlockProcessor
24from ..inlinepatterns import InlineProcessor
25from ..treeprocessors import Treeprocessor
26from ..postprocessors import Postprocessor
27from .. import util
28from collections import OrderedDict
29import re
30import copy
31import xml.etree.ElementTree as etree
32
33FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX
34NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX
35RE_REF_ID = re.compile(r'(fnref)(\d+)')
36
37
38class FootnoteExtension(Extension):
39 """ Footnote Extension. """
40
41 def __init__(self, **kwargs):
42 """ Setup configs. """
43
44 self.config = {
45 'PLACE_MARKER': [
46 '///Footnotes Go Here///', 'The text string that marks where the footnotes go'
47 ],
48 'UNIQUE_IDS': [
49 False, 'Avoid name collisions across multiple calls to `reset()`.'
50 ],
51 'BACKLINK_TEXT': [
52 '↩', "The text string that links from the footnote to the reader's place."
53 ],
54 'SUPERSCRIPT_TEXT': [
55 '{}', "The text string that links from the reader's place to the footnote."
56 ],
57 'BACKLINK_TITLE': [
58 'Jump back to footnote %d in the text',
59 'The text string used for the title HTML attribute of the backlink. '
60 '%d will be replaced by the footnote number.'
61 ],
62 'SEPARATOR': [
63 ':', 'Footnote separator.'
64 ]
65 }
66 """ Default configuration options. """
67 super().__init__(**kwargs)
68
69 # In multiple invocations, emit links that don't get tangled.
70 self.unique_prefix = 0
71 self.found_refs: dict[str, int] = {}
72 self.used_refs: set[str] = set()
73
74 self.reset()
75
76 def extendMarkdown(self, md):
77 """ Add pieces to Markdown. """
78 md.registerExtension(self)
79 self.parser = md.parser
80 self.md = md
81 # Insert a `blockprocessor` before `ReferencePreprocessor`
82 md.parser.blockprocessors.register(FootnoteBlockProcessor(self), 'footnote', 17)
83
84 # Insert an inline pattern before `ImageReferencePattern`
85 FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
86 md.inlinePatterns.register(FootnoteInlineProcessor(FOOTNOTE_RE, self), 'footnote', 175)
87 # Insert a tree-processor that would actually add the footnote div
88 # This must be before all other tree-processors (i.e., `inline` and
89 # `codehilite`) so they can run on the the contents of the div.
90 md.treeprocessors.register(FootnoteTreeprocessor(self), 'footnote', 50)
91
92 # Insert a tree-processor that will run after inline is done.
93 # In this tree-processor we want to check our duplicate footnote tracker
94 # And add additional `backrefs` to the footnote pointing back to the
95 # duplicated references.
96 md.treeprocessors.register(FootnotePostTreeprocessor(self), 'footnote-duplicate', 15)
97
98 # Insert a postprocessor after amp_substitute processor
99 md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25)
100
101 def reset(self) -> None:
102 """ Clear footnotes on reset, and prepare for distinct document. """
103 self.footnotes: OrderedDict[str, str] = OrderedDict()
104 self.unique_prefix += 1
105 self.found_refs = {}
106 self.used_refs = set()
107
108 def unique_ref(self, reference: str, found: bool = False) -> str:
109 """ Get a unique reference if there are duplicates. """
110 if not found:
111 return reference
112
113 original_ref = reference
114 while reference in self.used_refs:
115 ref, rest = reference.split(self.get_separator(), 1)
116 m = RE_REF_ID.match(ref)
117 if m:
118 reference = '%s%d%s%s' % (m.group(1), int(m.group(2))+1, self.get_separator(), rest)
119 else:
120 reference = '%s%d%s%s' % (ref, 2, self.get_separator(), rest)
121
122 self.used_refs.add(reference)
123 if original_ref in self.found_refs:
124 self.found_refs[original_ref] += 1
125 else:
126 self.found_refs[original_ref] = 1
127 return reference
128
129 def findFootnotesPlaceholder(
130 self, root: etree.Element
131 ) -> tuple[etree.Element, etree.Element, bool] | None:
132 """ Return ElementTree Element that contains Footnote placeholder. """
133 def finder(element):
134 for child in element:
135 if child.text:
136 if child.text.find(self.getConfig("PLACE_MARKER")) > -1:
137 return child, element, True
138 if child.tail:
139 if child.tail.find(self.getConfig("PLACE_MARKER")) > -1:
140 return child, element, False
141 child_res = finder(child)
142 if child_res is not None:
143 return child_res
144 return None
145
146 res = finder(root)
147 return res
148
149 def setFootnote(self, id: str, text: str) -> None:
150 """ Store a footnote for later retrieval. """
151 self.footnotes[id] = text
152
153 def get_separator(self) -> str:
154 """ Get the footnote separator. """
155 return self.getConfig("SEPARATOR")
156
157 def makeFootnoteId(self, id: str) -> str:
158 """ Return footnote link id. """
159 if self.getConfig("UNIQUE_IDS"):
160 return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id)
161 else:
162 return 'fn{}{}'.format(self.get_separator(), id)
163
164 def makeFootnoteRefId(self, id: str, found: bool = False) -> str:
165 """ Return footnote back-link id. """
166 if self.getConfig("UNIQUE_IDS"):
167 return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found)
168 else:
169 return self.unique_ref('fnref{}{}'.format(self.get_separator(), id), found)
170
171 def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None:
172 """ Return `div` of footnotes as `etree` Element. """
173
174 if not list(self.footnotes.keys()):
175 return None
176
177 div = etree.Element("div")
178 div.set('class', 'footnote')
179 etree.SubElement(div, "hr")
180 ol = etree.SubElement(div, "ol")
181 surrogate_parent = etree.Element("div")
182
183 # Backward compatibility with old '%d' placeholder
184 backlink_title = self.getConfig("BACKLINK_TITLE").replace("%d", "{}")
185
186 for index, id in enumerate(self.footnotes.keys(), start=1):
187 li = etree.SubElement(ol, "li")
188 li.set("id", self.makeFootnoteId(id))
189 # Parse footnote with surrogate parent as `li` cannot be used.
190 # List block handlers have special logic to deal with `li`.
191 # When we are done parsing, we will copy everything over to `li`.
192 self.parser.parseChunk(surrogate_parent, self.footnotes[id])
193 for el in list(surrogate_parent):
194 li.append(el)
195 surrogate_parent.remove(el)
196 backlink = etree.Element("a")
197 backlink.set("href", "#" + self.makeFootnoteRefId(id))
198 backlink.set("class", "footnote-backref")
199 backlink.set(
200 "title",
201 backlink_title.format(index)
202 )
203 backlink.text = FN_BACKLINK_TEXT
204
205 if len(li):
206 node = li[-1]
207 if node.tag == "p":
208 node.text = node.text + NBSP_PLACEHOLDER
209 node.append(backlink)
210 else:
211 p = etree.SubElement(li, "p")
212 p.append(backlink)
213 return div
214
215
216class FootnoteBlockProcessor(BlockProcessor):
217 """ Find all footnote references and store for later use. """
218
219 RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE)
220
221 def __init__(self, footnotes: FootnoteExtension):
222 super().__init__(footnotes.parser)
223 self.footnotes = footnotes
224
225 def test(self, parent: etree.Element, block: str) -> bool:
226 return True
227
228 def run(self, parent: etree.Element, blocks: list[str]) -> bool:
229 """ Find, set, and remove footnote definitions. """
230 block = blocks.pop(0)
231 m = self.RE.search(block)
232 if m:
233 id = m.group(1)
234 fn_blocks = [m.group(2)]
235
236 # Handle rest of block
237 therest = block[m.end():].lstrip('\n')
238 m2 = self.RE.search(therest)
239 if m2:
240 # Another footnote exists in the rest of this block.
241 # Any content before match is continuation of this footnote, which may be lazily indented.
242 before = therest[:m2.start()].rstrip('\n')
243 fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(before)]).lstrip('\n')
244 # Add back to blocks everything from beginning of match forward for next iteration.
245 blocks.insert(0, therest[m2.start():])
246 else:
247 # All remaining lines of block are continuation of this footnote, which may be lazily indented.
248 fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(therest)]).strip('\n')
249
250 # Check for child elements in remaining blocks.
251 fn_blocks.extend(self.detectTabbed(blocks))
252
253 footnote = "\n\n".join(fn_blocks)
254 self.footnotes.setFootnote(id, footnote.rstrip())
255
256 if block[:m.start()].strip():
257 # Add any content before match back to blocks as separate block
258 blocks.insert(0, block[:m.start()].rstrip('\n'))
259 return True
260 # No match. Restore block.
261 blocks.insert(0, block)
262 return False
263
264 def detectTabbed(self, blocks: list[str]) -> list[str]:
265 """ Find indented text and remove indent before further processing.
266
267 Returns:
268 A list of blocks with indentation removed.
269 """
270 fn_blocks = []
271 while blocks:
272 if blocks[0].startswith(' '*4):
273 block = blocks.pop(0)
274 # Check for new footnotes within this block and split at new footnote.
275 m = self.RE.search(block)
276 if m:
277 # Another footnote exists in this block.
278 # Any content before match is continuation of this footnote, which may be lazily indented.
279 before = block[:m.start()].rstrip('\n')
280 fn_blocks.append(self.detab(before))
281 # Add back to blocks everything from beginning of match forward for next iteration.
282 blocks.insert(0, block[m.start():])
283 # End of this footnote.
284 break
285 else:
286 # Entire block is part of this footnote.
287 fn_blocks.append(self.detab(block))
288 else:
289 # End of this footnote.
290 break
291 return fn_blocks
292
293 def detab(self, block: str) -> str:
294 """ Remove one level of indent from a block.
295
296 Preserve lazily indented blocks by only removing indent from indented lines.
297 """
298 lines = block.split('\n')
299 for i, line in enumerate(lines):
300 if line.startswith(' '*4):
301 lines[i] = line[4:]
302 return '\n'.join(lines)
303
304
305class FootnoteInlineProcessor(InlineProcessor):
306 """ `InlineProcessor` for footnote markers in a document's body text. """
307
308 def __init__(self, pattern: str, footnotes: FootnoteExtension):
309 super().__init__(pattern)
310 self.footnotes = footnotes
311
312 def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]:
313 id = m.group(1)
314 if id in self.footnotes.footnotes.keys():
315 sup = etree.Element("sup")
316 a = etree.SubElement(sup, "a")
317 sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True))
318 a.set('href', '#' + self.footnotes.makeFootnoteId(id))
319 a.set('class', 'footnote-ref')
320 a.text = self.footnotes.getConfig("SUPERSCRIPT_TEXT").format(
321 list(self.footnotes.footnotes.keys()).index(id) + 1
322 )
323 return sup, m.start(0), m.end(0)
324 else:
325 return None, None, None
326
327
328class FootnotePostTreeprocessor(Treeprocessor):
329 """ Amend footnote div with duplicates. """
330
331 def __init__(self, footnotes: FootnoteExtension):
332 self.footnotes = footnotes
333
334 def add_duplicates(self, li: etree.Element, duplicates: int) -> None:
335 """ Adjust current `li` and add the duplicates: `fnref2`, `fnref3`, etc. """
336 for link in li.iter('a'):
337 # Find the link that needs to be duplicated.
338 if link.attrib.get('class', '') == 'footnote-backref':
339 ref, rest = link.attrib['href'].split(self.footnotes.get_separator(), 1)
340 # Duplicate link the number of times we need to
341 # and point the to the appropriate references.
342 links = []
343 for index in range(2, duplicates + 1):
344 sib_link = copy.deepcopy(link)
345 sib_link.attrib['href'] = '%s%d%s%s' % (ref, index, self.footnotes.get_separator(), rest)
346 links.append(sib_link)
347 self.offset += 1
348 # Add all the new duplicate links.
349 el = list(li)[-1]
350 for link in links:
351 el.append(link)
352 break
353
354 def get_num_duplicates(self, li: etree.Element) -> int:
355 """ Get the number of duplicate refs of the footnote. """
356 fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1)
357 link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest)
358 return self.footnotes.found_refs.get(link_id, 0)
359
360 def handle_duplicates(self, parent: etree.Element) -> None:
361 """ Find duplicate footnotes and format and add the duplicates. """
362 for li in list(parent):
363 # Check number of duplicates footnotes and insert
364 # additional links if needed.
365 count = self.get_num_duplicates(li)
366 if count > 1:
367 self.add_duplicates(li, count)
368
369 def run(self, root: etree.Element) -> None:
370 """ Crawl the footnote div and add missing duplicate footnotes. """
371 self.offset = 0
372 for div in root.iter('div'):
373 if div.attrib.get('class', '') == 'footnote':
374 # Footnotes should be under the first ordered list under
375 # the footnote div. So once we find it, quit.
376 for ol in div.iter('ol'):
377 self.handle_duplicates(ol)
378 break
379
380
381class FootnoteTreeprocessor(Treeprocessor):
382 """ Build and append footnote div to end of document. """
383
384 def __init__(self, footnotes: FootnoteExtension):
385 self.footnotes = footnotes
386
387 def run(self, root: etree.Element) -> None:
388 footnotesDiv = self.footnotes.makeFootnotesDiv(root)
389 if footnotesDiv is not None:
390 result = self.footnotes.findFootnotesPlaceholder(root)
391 if result:
392 child, parent, isText = result
393 ind = list(parent).index(child)
394 if isText:
395 parent.remove(child)
396 parent.insert(ind, footnotesDiv)
397 else:
398 parent.insert(ind + 1, footnotesDiv)
399 child.tail = None
400 else:
401 root.append(footnotesDiv)
402
403
404class FootnotePostprocessor(Postprocessor):
405 """ Replace placeholders with html entities. """
406 def __init__(self, footnotes: FootnoteExtension):
407 self.footnotes = footnotes
408
409 def run(self, text: str) -> str:
410 text = text.replace(
411 FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT")
412 )
413 return text.replace(NBSP_PLACEHOLDER, " ")
414
415
416def makeExtension(**kwargs): # pragma: no cover
417 """ Return an instance of the `FootnoteExtension` """
418 return FootnoteExtension(**kwargs)