Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/markdown/extensions/abbr.py: 85%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

106 statements  

1# Abbreviation Extension for Python-Markdown 

2# ========================================== 

3 

4# This extension adds abbreviation handling to Python-Markdown. 

5 

6# See https://Python-Markdown.github.io/extensions/abbreviations 

7# for documentation. 

8 

9# Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/) 

10# and [Seemant Kulleen](http://www.kulleen.org/) 

11 

12# All changes Copyright 2008-2014 The Python Markdown Project 

13 

14# License: [BSD](https://opensource.org/licenses/bsd-license.php) 

15 

16""" 

17This extension adds abbreviation handling to Python-Markdown. 

18 

19See the [documentation](https://Python-Markdown.github.io/extensions/abbreviations) 

20for details. 

21""" 

22 

23from __future__ import annotations 

24 

25from . import Extension 

26from ..blockprocessors import BlockProcessor 

27from ..inlinepatterns import InlineProcessor 

28from ..treeprocessors import Treeprocessor 

29from ..util import AtomicString, deprecated 

30from typing import TYPE_CHECKING 

31import re 

32import xml.etree.ElementTree as etree 

33 

34if TYPE_CHECKING: # pragma: no cover 

35 from .. import Markdown 

36 from ..blockparser import BlockParser 

37 

38 

39class AbbrExtension(Extension): 

40 """ Abbreviation Extension for Python-Markdown. """ 

41 

42 def __init__(self, **kwargs): 

43 """ Initiate Extension and set up configs. """ 

44 self.config = { 

45 'glossary': [ 

46 {}, 

47 'A dictionary where the `key` is the abbreviation and the `value` is the definition.' 

48 "Default: `{}`" 

49 ], 

50 } 

51 """ Default configuration options. """ 

52 super().__init__(**kwargs) 

53 self.abbrs = {} 

54 self.glossary = {} 

55 

56 def reset(self): 

57 """ Clear all previously defined abbreviations. """ 

58 self.abbrs.clear() 

59 if (self.glossary): 

60 self.abbrs.update(self.glossary) 

61 

62 def reset_glossary(self): 

63 """ Clear all abbreviations from the glossary. """ 

64 self.glossary.clear() 

65 

66 def load_glossary(self, dictionary: dict[str, str]): 

67 """Adds `dictionary` to our glossary. Any abbreviations that already exist will be overwritten.""" 

68 if dictionary: 

69 self.glossary = {**dictionary, **self.glossary} 

70 

71 def extendMarkdown(self, md): 

72 """ Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """ 

73 if (self.config['glossary'][0]): 

74 self.load_glossary(self.config['glossary'][0]) 

75 self.abbrs.update(self.glossary) 

76 md.registerExtension(self) 

77 md.treeprocessors.register(AbbrTreeprocessor(md, self.abbrs), 'abbr', 7) 

78 md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, self.abbrs), 'abbr', 16) 

79 

80 

81class AbbrTreeprocessor(Treeprocessor): 

82 """ Replace abbreviation text with `<abbr>` elements. """ 

83 

84 def __init__(self, md: Markdown | None = None, abbrs: dict | None = None): 

85 self.abbrs: dict = abbrs if abbrs is not None else {} 

86 self.RE: re.RegexObject | None = None 

87 super().__init__(md) 

88 

89 def create_element(self, title: str, text: str, tail: str) -> etree.Element: 

90 ''' Create an `abbr` element. ''' 

91 abbr = etree.Element('abbr', {'title': title}) 

92 abbr.text = AtomicString(text) 

93 abbr.tail = tail 

94 return abbr 

95 

96 def iter_element(self, el: etree.Element, parent: etree.Element | None = None) -> None: 

97 ''' Recursively iterate over elements, run regex on text and wrap matches in `abbr` tags. ''' 

98 for child in reversed(el): 

99 self.iter_element(child, el) 

100 if text := el.text: 

101 if not isinstance(text, AtomicString): 

102 for m in reversed(list(self.RE.finditer(text))): 

103 if self.abbrs[m.group(0)]: 

104 abbr = self.create_element(self.abbrs[m.group(0)], m.group(0), text[m.end():]) 

105 el.insert(0, abbr) 

106 text = text[:m.start()] 

107 el.text = text 

108 if parent is not None and el.tail: 

109 tail = el.tail 

110 index = list(parent).index(el) + 1 

111 if not isinstance(tail, AtomicString): 

112 for m in reversed(list(self.RE.finditer(tail))): 

113 abbr = self.create_element(self.abbrs[m.group(0)], m.group(0), tail[m.end():]) 

114 parent.insert(index, abbr) 

115 tail = tail[:m.start()] 

116 el.tail = tail 

117 

118 def run(self, root: etree.Element) -> etree.Element | None: 

119 ''' Step through tree to find known abbreviations. ''' 

120 if not self.abbrs: 

121 # No abbreviations defined. Skip running processor. 

122 return 

123 # Build and compile regex 

124 abbr_list = list(self.abbrs.keys()) 

125 abbr_list.sort(key=len, reverse=True) 

126 self.RE = re.compile(f"\\b(?:{ '|'.join(re.escape(key) for key in abbr_list) })\\b") 

127 # Step through tree and modify on matches 

128 self.iter_element(root) 

129 

130 

131class AbbrBlockprocessor(BlockProcessor): 

132 """ Parse text for abbreviation references. """ 

133 

134 RE = re.compile(r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE) 

135 

136 def __init__(self, parser: BlockParser, abbrs: dict): 

137 self.abbrs: dict = abbrs 

138 super().__init__(parser) 

139 

140 def test(self, parent: etree.Element, block: str) -> bool: 

141 return True 

142 

143 def run(self, parent: etree.Element, blocks: list[str]) -> bool: 

144 """ 

145 Find and remove all abbreviation references from the text. 

146 Each reference is added to the abbreviation collection. 

147 

148 """ 

149 block = blocks.pop(0) 

150 m = self.RE.search(block) 

151 if m: 

152 abbr = m.group('abbr').strip() 

153 title = m.group('title').strip() 

154 if title and abbr: 

155 if title == "''" or title == '""': 

156 self.abbrs.pop(abbr) 

157 else: 

158 self.abbrs[abbr] = title 

159 if block[m.end():].strip(): 

160 # Add any content after match back to blocks as separate block 

161 blocks.insert(0, block[m.end():].lstrip('\n')) 

162 if block[:m.start()].strip(): 

163 # Add any content before match back to blocks as separate block 

164 blocks.insert(0, block[:m.start()].rstrip('\n')) 

165 return True 

166 # No match. Restore block. 

167 blocks.insert(0, block) 

168 return False 

169 

170 

171AbbrPreprocessor = deprecated("This class has been renamed to `AbbrBlockprocessor`.")(AbbrBlockprocessor) 

172 

173 

174@deprecated("This class will be removed in the future; use `AbbrTreeprocessor` instead.") 

175class AbbrInlineProcessor(InlineProcessor): 

176 """ Abbreviation inline pattern. """ 

177 

178 def __init__(self, pattern: str, title: str): 

179 super().__init__(pattern) 

180 self.title = title 

181 

182 def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: 

183 abbr = etree.Element('abbr') 

184 abbr.text = AtomicString(m.group('abbr')) 

185 abbr.set('title', self.title) 

186 return abbr, m.start(0), m.end(0) 

187 

188 

189def makeExtension(**kwargs): # pragma: no cover 

190 return AbbrExtension(**kwargs)