Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/markdown/extensions/abbr.py: 85%

1# Abbreviation Extension for Python-Markdown

2# ==========================================

4# This extension adds abbreviation handling to Python-Markdown.

6# See https://Python-Markdown.github.io/extensions/abbreviations

7# for documentation.

10# and [Seemant Kulleen](http://www.kulleen.org/)

14# License: [BSD](https://opensource.org/licenses/bsd-license.php)

16"""

17This extension adds abbreviation handling to Python-Markdown.

19See the [documentation](https://Python-Markdown.github.io/extensions/abbreviations)

20for details.

21"""

23from __future__ import annotations

25from . import Extension

26from ..blockprocessors import BlockProcessor

27from ..inlinepatterns import InlineProcessor

28from ..treeprocessors import Treeprocessor

29from ..util import AtomicString, deprecated

30from typing import TYPE_CHECKING

31import re

32import xml.etree.ElementTree as etree

34if TYPE_CHECKING: # pragma: no cover

35 from .. import Markdown

36 from ..blockparser import BlockParser

39class AbbrExtension(Extension):

40 """ Abbreviation Extension for Python-Markdown. """

42 def __init__(self, **kwargs):

43 """ Initiate Extension and set up configs. """

44 self.config = {

45 'glossary': [

46 {},

47 'A dictionary where the `key` is the abbreviation and the `value` is the definition.'

48 "Default: `{}`"

49 ],

50 }

51 """ Default configuration options. """

52 super().__init__(**kwargs)

53 self.abbrs = {}

54 self.glossary = {}

56 def reset(self):

57 """ Clear all previously defined abbreviations. """

58 self.abbrs.clear()

59 if (self.glossary):

60 self.abbrs.update(self.glossary)

62 def reset_glossary(self):

63 """ Clear all abbreviations from the glossary. """

64 self.glossary.clear()

66 def load_glossary(self, dictionary: dict[str, str]):

67 """Adds `dictionary` to our glossary. Any abbreviations that already exist will be overwritten."""

68 if dictionary:

69 self.glossary = {**dictionary, **self.glossary}

71 def extendMarkdown(self, md):

72 """ Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """

73 if (self.config['glossary'][0]):

74 self.load_glossary(self.config['glossary'][0])

75 self.abbrs.update(self.glossary)

76 md.registerExtension(self)

77 md.treeprocessors.register(AbbrTreeprocessor(md, self.abbrs), 'abbr', 7)

78 md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, self.abbrs), 'abbr', 16)

81class AbbrTreeprocessor(Treeprocessor):

82 """ Replace abbreviation text with `<abbr>` elements. """

84 def __init__(self, md: Markdown | None = None, abbrs: dict | None = None):

85 self.abbrs: dict = abbrs if abbrs is not None else {}

86 self.RE: re.RegexObject | None = None

87 super().__init__(md)

89 def create_element(self, title: str, text: str, tail: str) -> etree.Element:

90 ''' Create an `abbr` element. '''

91 abbr = etree.Element('abbr', {'title': title})

92 abbr.text = AtomicString(text)

93 abbr.tail = tail

94 return abbr

96 def iter_element(self, el: etree.Element, parent: etree.Element | None = None) -> None:

97 ''' Recursively iterate over elements, run regex on text and wrap matches in `abbr` tags. '''

98 for child in reversed(el):

99 self.iter_element(child, el)

100 if text := el.text:

101 if not isinstance(text, AtomicString):

102 for m in reversed(list(self.RE.finditer(text))):

103 if self.abbrs[m.group(0)]:

104 abbr = self.create_element(self.abbrs[m.group(0)], m.group(0), text[m.end():])

105 el.insert(0, abbr)

106 text = text[:m.start()]

107 el.text = text

108 if parent is not None and el.tail:

109 tail = el.tail

110 index = list(parent).index(el) + 1

111 if not isinstance(tail, AtomicString):

112 for m in reversed(list(self.RE.finditer(tail))):

113 abbr = self.create_element(self.abbrs[m.group(0)], m.group(0), tail[m.end():])

114 parent.insert(index, abbr)

115 tail = tail[:m.start()]

116 el.tail = tail

117

118 def run(self, root: etree.Element) -> etree.Element | None:

119 ''' Step through tree to find known abbreviations. '''

120 if not self.abbrs:

121 # No abbreviations defined. Skip running processor.

122 return

123 # Build and compile regex

124 abbr_list = list(self.abbrs.keys())

125 abbr_list.sort(key=len, reverse=True)

126 self.RE = re.compile(f"\\b(?:{ '|'.join(re.escape(key) for key in abbr_list) })\\b")

127 # Step through tree and modify on matches

128 self.iter_element(root)

129

130

131class AbbrBlockprocessor(BlockProcessor):

132 """ Parse text for abbreviation references. """

133

134 RE = re.compile(r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)

135

136 def __init__(self, parser: BlockParser, abbrs: dict):

137 self.abbrs: dict = abbrs

138 super().__init__(parser)

139

140 def test(self, parent: etree.Element, block: str) -> bool:

141 return True

142

143 def run(self, parent: etree.Element, blocks: list[str]) -> bool:

144 """

145 Find and remove all abbreviation references from the text.

146 Each reference is added to the abbreviation collection.

147

148 """

149 block = blocks.pop(0)

150 m = self.RE.search(block)

151 if m:

152 abbr = m.group('abbr').strip()

153 title = m.group('title').strip()

154 if title and abbr:

155 if title == "''" or title == '""':

156 self.abbrs.pop(abbr)

157 else:

158 self.abbrs[abbr] = title

159 if block[m.end():].strip():

160 # Add any content after match back to blocks as separate block

161 blocks.insert(0, block[m.end():].lstrip('\n'))

162 if block[:m.start()].strip():

163 # Add any content before match back to blocks as separate block

164 blocks.insert(0, block[:m.start()].rstrip('\n'))

165 return True

166 # No match. Restore block.

167 blocks.insert(0, block)

168 return False

169

170

171AbbrPreprocessor = deprecated("This class has been renamed to `AbbrBlockprocessor`.")(AbbrBlockprocessor)

172

173

174@deprecated("This class will be removed in the future; use `AbbrTreeprocessor` instead.")

175class AbbrInlineProcessor(InlineProcessor):

176 """ Abbreviation inline pattern. """

177

178 def __init__(self, pattern: str, title: str):

179 super().__init__(pattern)

180 self.title = title

181

182 def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]:

183 abbr = etree.Element('abbr')

184 abbr.text = AtomicString(m.group('abbr'))

185 abbr.set('title', self.title)

186 return abbr, m.start(0), m.end(0)

187

188

189def makeExtension(**kwargs): # pragma: no cover

190 return AbbrExtension(**kwargs)