Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/markdown/extensions/attr_list.py: 94%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

99 statements  

1# Attribute List Extension for Python-Markdown 

2# ============================================ 

3 

4# Adds attribute list syntax. Inspired by 

5# [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s 

6# feature of the same name. 

7 

8# See https://Python-Markdown.github.io/extensions/attr_list 

9# for documentation. 

10 

11# Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/). 

12 

13# All changes Copyright 2011-2014 The Python Markdown Project 

14 

15# License: [BSD](https://opensource.org/licenses/bsd-license.php) 

16 

17""" 

18Adds attribute list syntax to Python-Markdown. 

19Inspired by 

20[Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s 

21feature of the same name. 

22 

23See the [documentation](https://Python-Markdown.github.io/extensions/attr_list) 

24for details. 

25""" 

26 

27from __future__ import annotations 

28from typing import TYPE_CHECKING 

29 

30from . import Extension 

31from ..treeprocessors import Treeprocessor 

32import re 

33 

34if TYPE_CHECKING: # pragma: no cover 

35 from xml.etree.ElementTree import Element 

36 

37 

38def _handle_double_quote(s, t): 

39 k, v = t.split('=', 1) 

40 return k, v.strip('"') 

41 

42 

43def _handle_single_quote(s, t): 

44 k, v = t.split('=', 1) 

45 return k, v.strip("'") 

46 

47 

48def _handle_key_value(s, t): 

49 return t.split('=', 1) 

50 

51 

52def _handle_word(s, t): 

53 if t.startswith('.'): 

54 return '.', t[1:] 

55 if t.startswith('#'): 

56 return 'id', t[1:] 

57 return t, t 

58 

59 

60_scanner = re.Scanner([ 

61 (r'[^ =}]+=".*?"', _handle_double_quote), 

62 (r"[^ =}]+='.*?'", _handle_single_quote), 

63 (r'[^ =}]+=[^ =}]+', _handle_key_value), 

64 (r'[^ =}]+', _handle_word), 

65 (r' ', None) 

66]) 

67 

68 

69def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]: 

70 """ Parse attribute list and return a list of attribute tuples. 

71 

72 Additionally, return any text that remained after a curly brace. In typical cases, its presence 

73 should mean that the input does not match the intended attribute list syntax. 

74 """ 

75 attrs, remainder = _scanner.scan(attrs_string) 

76 # To keep historic behavior, discard all unparsable text prior to '}'. 

77 index = remainder.find('}') 

78 remainder = remainder[index:] if index != -1 else '' 

79 return attrs, remainder 

80 

81 

82def get_attrs(str: str) -> list[tuple[str, str]]: # pragma: no cover 

83 """ Soft-deprecated. Prefer `get_attrs_and_remainder`. """ 

84 return get_attrs_and_remainder(str)[0] 

85 

86 

87def isheader(elem: Element) -> bool: 

88 return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] 

89 

90 

91class AttrListTreeprocessor(Treeprocessor): 

92 

93 BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}' 

94 HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE)) 

95 BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE)) 

96 INLINE_RE = re.compile(r'^{}'.format(BASE_RE)) 

97 NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff' 

98 r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d' 

99 r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff' 

100 r'\uf900-\ufdcf\ufdf0-\ufffd' 

101 r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') 

102 

103 def run(self, doc: Element) -> None: 

104 for elem in doc.iter(): 

105 if self.md.is_block_level(elem.tag): 

106 # Block level: check for `attrs` on last line of text 

107 RE = self.BLOCK_RE 

108 if isheader(elem) or elem.tag in ['dt', 'td', 'th']: 

109 # header, def-term, or table cell: check for attributes at end of element 

110 RE = self.HEADER_RE 

111 if len(elem) and elem.tag == 'li': 

112 # special case list items. children may include a `ul` or `ol`. 

113 pos = None 

114 # find the `ul` or `ol` position 

115 for i, child in enumerate(elem): 

116 if child.tag in ['ul', 'ol']: 

117 pos = i 

118 break 

119 if pos is None and elem[-1].tail: 

120 # use tail of last child. no `ul` or `ol`. 

121 m = RE.search(elem[-1].tail) 

122 if m: 

123 if not self.assign_attrs(elem, m.group(1), strict=True): 

124 elem[-1].tail = elem[-1].tail[:m.start()] 

125 elif pos is not None and pos > 0 and elem[pos-1].tail: 

126 # use tail of last child before `ul` or `ol` 

127 m = RE.search(elem[pos-1].tail) 

128 if m: 

129 if not self.assign_attrs(elem, m.group(1), strict=True): 

130 elem[pos-1].tail = elem[pos-1].tail[:m.start()] 

131 elif elem.text: 

132 # use text. `ul` is first child. 

133 m = RE.search(elem.text) 

134 if m: 

135 if not self.assign_attrs(elem, m.group(1), strict=True): 

136 elem.text = elem.text[:m.start()] 

137 elif len(elem) and elem[-1].tail: 

138 # has children. Get from tail of last child 

139 m = RE.search(elem[-1].tail) 

140 if m: 

141 if not self.assign_attrs(elem, m.group(1), strict=True): 

142 elem[-1].tail = elem[-1].tail[:m.start()] 

143 if isheader(elem): 

144 # clean up trailing #s 

145 elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() 

146 elif elem.text: 

147 # no children. Get from text. 

148 m = RE.search(elem.text) 

149 if m: 

150 if not self.assign_attrs(elem, m.group(1), strict=True): 

151 elem.text = elem.text[:m.start()] 

152 if isheader(elem): 

153 # clean up trailing #s 

154 elem.text = elem.text.rstrip('#').rstrip() 

155 else: 

156 # inline: check for `attrs` at start of tail 

157 if elem.tail: 

158 m = self.INLINE_RE.match(elem.tail) 

159 if m: 

160 remainder = self.assign_attrs(elem, m.group(1)) 

161 elem.tail = elem.tail[m.end():] + remainder 

162 

163 def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str: 

164 """ Assign `attrs` to element. 

165 

166 If the `attrs_string` has an extra closing curly brace, the remaining text is returned. 

167 

168 The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`. 

169 """ 

170 attrs, remainder = get_attrs_and_remainder(attrs_string) 

171 if strict and remainder: 

172 return remainder 

173 

174 for k, v in attrs: 

175 if k == '.': 

176 # add to class 

177 cls = elem.get('class') 

178 if cls: 

179 elem.set('class', '{} {}'.format(cls, v)) 

180 else: 

181 elem.set('class', v) 

182 else: 

183 # assign attribute `k` with `v` 

184 elem.set(self.sanitize_name(k), v) 

185 # The text that we initially over-matched will be put back. 

186 return remainder 

187 

188 def sanitize_name(self, name: str) -> str: 

189 """ 

190 Sanitize name as 'an XML Name, minus the `:`.' 

191 See <https://www.w3.org/TR/REC-xml-names/#NT-NCName>. 

192 """ 

193 return self.NAME_RE.sub('_', name) 

194 

195 

196class AttrListExtension(Extension): 

197 """ Attribute List extension for Python-Markdown """ 

198 def extendMarkdown(self, md): 

199 md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8) 

200 md.registerExtension(self) 

201 

202 

203def makeExtension(**kwargs): # pragma: no cover 

204 return AttrListExtension(**kwargs)