1# Abbreviation Extension for Python-Markdown
2# ==========================================
3
4# This extension adds abbreviation handling to Python-Markdown.
5
6# See https://Python-Markdown.github.io/extensions/abbreviations
7# for documentation.
8
9# Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/)
10# and [Seemant Kulleen](http://www.kulleen.org/)
11
12# All changes Copyright 2008-2014 The Python Markdown Project
13
14# License: [BSD](https://opensource.org/licenses/bsd-license.php)
15
16"""
17This extension adds abbreviation handling to Python-Markdown.
18
19See the [documentation](https://Python-Markdown.github.io/extensions/abbreviations)
20for details.
21"""
22
23from __future__ import annotations
24
25from . import Extension
26from ..blockprocessors import BlockProcessor
27from ..inlinepatterns import InlineProcessor
28from ..treeprocessors import Treeprocessor
29from ..util import AtomicString, deprecated
30from typing import TYPE_CHECKING
31import re
32import xml.etree.ElementTree as etree
33
34if TYPE_CHECKING: # pragma: no cover
35 from .. import Markdown
36 from ..blockparser import BlockParser
37
38
39class AbbrExtension(Extension):
40 """ Abbreviation Extension for Python-Markdown. """
41
42 def __init__(self, **kwargs):
43 """ Initiate Extension and set up configs. """
44 self.config = {
45 'glossary': [
46 {},
47 'A dictionary where the `key` is the abbreviation and the `value` is the definition.'
48 "Default: `{}`"
49 ],
50 }
51 """ Default configuration options. """
52 super().__init__(**kwargs)
53 self.abbrs = {}
54 self.glossary = {}
55
56 def reset(self):
57 """ Clear all previously defined abbreviations. """
58 self.abbrs.clear()
59 if (self.glossary):
60 self.abbrs.update(self.glossary)
61
62 def reset_glossary(self):
63 """ Clear all abbreviations from the glossary. """
64 self.glossary.clear()
65
66 def load_glossary(self, dictionary: dict[str, str]):
67 """Adds `dictionary` to our glossary. Any abbreviations that already exist will be overwritten."""
68 if dictionary:
69 self.glossary = {**dictionary, **self.glossary}
70
71 def extendMarkdown(self, md):
72 """ Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """
73 if (self.config['glossary'][0]):
74 self.load_glossary(self.config['glossary'][0])
75 self.abbrs.update(self.glossary)
76 md.registerExtension(self)
77 md.treeprocessors.register(AbbrTreeprocessor(md, self.abbrs), 'abbr', 7)
78 md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, self.abbrs), 'abbr', 16)
79
80
81class AbbrTreeprocessor(Treeprocessor):
82 """ Replace abbreviation text with `<abbr>` elements. """
83
84 def __init__(self, md: Markdown | None = None, abbrs: dict | None = None):
85 self.abbrs: dict = abbrs if abbrs is not None else {}
86 self.RE: re.RegexObject | None = None
87 super().__init__(md)
88
89 def create_element(self, title: str, text: str, tail: str) -> etree.Element:
90 ''' Create an `abbr` element. '''
91 abbr = etree.Element('abbr', {'title': title})
92 abbr.text = AtomicString(text)
93 abbr.tail = tail
94 return abbr
95
96 def iter_element(self, el: etree.Element, parent: etree.Element | None = None) -> None:
97 ''' Recursively iterate over elements, run regex on text and wrap matches in `abbr` tags. '''
98 for child in reversed(el):
99 self.iter_element(child, el)
100 if text := el.text:
101 if not isinstance(text, AtomicString):
102 for m in reversed(list(self.RE.finditer(text))):
103 if self.abbrs[m.group(0)]:
104 abbr = self.create_element(self.abbrs[m.group(0)], m.group(0), text[m.end():])
105 el.insert(0, abbr)
106 text = text[:m.start()]
107 el.text = text
108 if parent is not None and el.tail:
109 tail = el.tail
110 index = list(parent).index(el) + 1
111 if not isinstance(tail, AtomicString):
112 for m in reversed(list(self.RE.finditer(tail))):
113 abbr = self.create_element(self.abbrs[m.group(0)], m.group(0), tail[m.end():])
114 parent.insert(index, abbr)
115 tail = tail[:m.start()]
116 el.tail = tail
117
118 def run(self, root: etree.Element) -> etree.Element | None:
119 ''' Step through tree to find known abbreviations. '''
120 if not self.abbrs:
121 # No abbreviations defined. Skip running processor.
122 return
123 # Build and compile regex
124 abbr_list = list(self.abbrs.keys())
125 abbr_list.sort(key=len, reverse=True)
126 self.RE = re.compile(f"\\b(?:{ '|'.join(re.escape(key) for key in abbr_list) })\\b")
127 # Step through tree and modify on matches
128 self.iter_element(root)
129
130
131class AbbrBlockprocessor(BlockProcessor):
132 """ Parse text for abbreviation references. """
133
134 RE = re.compile(r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
135
136 def __init__(self, parser: BlockParser, abbrs: dict):
137 self.abbrs: dict = abbrs
138 super().__init__(parser)
139
140 def test(self, parent: etree.Element, block: str) -> bool:
141 return True
142
143 def run(self, parent: etree.Element, blocks: list[str]) -> bool:
144 """
145 Find and remove all abbreviation references from the text.
146 Each reference is added to the abbreviation collection.
147
148 """
149 block = blocks.pop(0)
150 m = self.RE.search(block)
151 if m:
152 abbr = m.group('abbr').strip()
153 title = m.group('title').strip()
154 if title and abbr:
155 if title == "''" or title == '""':
156 self.abbrs.pop(abbr)
157 else:
158 self.abbrs[abbr] = title
159 if block[m.end():].strip():
160 # Add any content after match back to blocks as separate block
161 blocks.insert(0, block[m.end():].lstrip('\n'))
162 if block[:m.start()].strip():
163 # Add any content before match back to blocks as separate block
164 blocks.insert(0, block[:m.start()].rstrip('\n'))
165 return True
166 # No match. Restore block.
167 blocks.insert(0, block)
168 return False
169
170
171AbbrPreprocessor = deprecated("This class has been renamed to `AbbrBlockprocessor`.")(AbbrBlockprocessor)
172
173
174@deprecated("This class will be removed in the future; use `AbbrTreeprocessor` instead.")
175class AbbrInlineProcessor(InlineProcessor):
176 """ Abbreviation inline pattern. """
177
178 def __init__(self, pattern: str, title: str):
179 super().__init__(pattern)
180 self.title = title
181
182 def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]:
183 abbr = etree.Element('abbr')
184 abbr.text = AtomicString(m.group('abbr'))
185 abbr.set('title', self.title)
186 return abbr, m.start(0), m.end(0)
187
188
189def makeExtension(**kwargs): # pragma: no cover
190 return AbbrExtension(**kwargs)