1# Attribute List Extension for Python-Markdown
2# ============================================
3
4# Adds attribute list syntax. Inspired by
5# [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
6# feature of the same name.
7
8# See https://Python-Markdown.github.io/extensions/attr_list
9# for documentation.
10
11# Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).
12
13# All changes Copyright 2011-2014 The Python Markdown Project
14
15# License: [BSD](https://opensource.org/licenses/bsd-license.php)
16
17"""
18Adds attribute list syntax to Python-Markdown.
19Inspired by
20[Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
21feature of the same name.
22
23See the [documentation](https://Python-Markdown.github.io/extensions/attr_list)
24for details.
25"""
26
27from __future__ import annotations
28from typing import TYPE_CHECKING
29
30from . import Extension
31from ..treeprocessors import Treeprocessor
32import re
33
34if TYPE_CHECKING: # pragma: no cover
35 from xml.etree.ElementTree import Element
36
37
38def _handle_double_quote(s, t):
39 k, v = t.split('=', 1)
40 return k, v.strip('"')
41
42
43def _handle_single_quote(s, t):
44 k, v = t.split('=', 1)
45 return k, v.strip("'")
46
47
48def _handle_key_value(s, t):
49 return t.split('=', 1)
50
51
52def _handle_word(s, t):
53 if t.startswith('.'):
54 return '.', t[1:]
55 if t.startswith('#'):
56 return 'id', t[1:]
57 return t, t
58
59
60_scanner = re.Scanner([
61 (r'[^ =}]+=".*?"', _handle_double_quote),
62 (r"[^ =}]+='.*?'", _handle_single_quote),
63 (r'[^ =}]+=[^ =}]+', _handle_key_value),
64 (r'[^ =}]+', _handle_word),
65 (r' ', None)
66])
67
68
69def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]:
70 """ Parse attribute list and return a list of attribute tuples.
71
72 Additionally, return any text that remained after a curly brace. In typical cases, its presence
73 should mean that the input does not match the intended attribute list syntax.
74 """
75 attrs, remainder = _scanner.scan(attrs_string)
76 # To keep historic behavior, discard all unparsable text prior to '}'.
77 index = remainder.find('}')
78 remainder = remainder[index:] if index != -1 else ''
79 return attrs, remainder
80
81
82def get_attrs(str: str) -> list[tuple[str, str]]: # pragma: no cover
83 """ Soft-deprecated. Prefer `get_attrs_and_remainder`. """
84 return get_attrs_and_remainder(str)[0]
85
86
87def isheader(elem: Element) -> bool:
88 return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
89
90
91class AttrListTreeprocessor(Treeprocessor):
92
93 BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}'
94 HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))
95 BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE))
96 INLINE_RE = re.compile(r'^{}'.format(BASE_RE))
97 NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
98 r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
99 r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
100 r'\uf900-\ufdcf\ufdf0-\ufffd'
101 r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
102
103 def run(self, doc: Element) -> None:
104 for elem in doc.iter():
105 if self.md.is_block_level(elem.tag):
106 # Block level: check for `attrs` on last line of text
107 RE = self.BLOCK_RE
108 if isheader(elem) or elem.tag in ['dt', 'td', 'th']:
109 # header, def-term, or table cell: check for attributes at end of element
110 RE = self.HEADER_RE
111 if len(elem) and elem.tag == 'li':
112 # special case list items. children may include a `ul` or `ol`.
113 pos = None
114 # find the `ul` or `ol` position
115 for i, child in enumerate(elem):
116 if child.tag in ['ul', 'ol']:
117 pos = i
118 break
119 if pos is None and elem[-1].tail:
120 # use tail of last child. no `ul` or `ol`.
121 m = RE.search(elem[-1].tail)
122 if m:
123 if not self.assign_attrs(elem, m.group(1), strict=True):
124 elem[-1].tail = elem[-1].tail[:m.start()]
125 elif pos is not None and pos > 0 and elem[pos-1].tail:
126 # use tail of last child before `ul` or `ol`
127 m = RE.search(elem[pos-1].tail)
128 if m:
129 if not self.assign_attrs(elem, m.group(1), strict=True):
130 elem[pos-1].tail = elem[pos-1].tail[:m.start()]
131 elif elem.text:
132 # use text. `ul` is first child.
133 m = RE.search(elem.text)
134 if m:
135 if not self.assign_attrs(elem, m.group(1), strict=True):
136 elem.text = elem.text[:m.start()]
137 elif len(elem) and elem[-1].tail:
138 # has children. Get from tail of last child
139 m = RE.search(elem[-1].tail)
140 if m:
141 if not self.assign_attrs(elem, m.group(1), strict=True):
142 elem[-1].tail = elem[-1].tail[:m.start()]
143 if isheader(elem):
144 # clean up trailing #s
145 elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
146 elif elem.text:
147 # no children. Get from text.
148 m = RE.search(elem.text)
149 if m:
150 if not self.assign_attrs(elem, m.group(1), strict=True):
151 elem.text = elem.text[:m.start()]
152 if isheader(elem):
153 # clean up trailing #s
154 elem.text = elem.text.rstrip('#').rstrip()
155 else:
156 # inline: check for `attrs` at start of tail
157 if elem.tail:
158 m = self.INLINE_RE.match(elem.tail)
159 if m:
160 remainder = self.assign_attrs(elem, m.group(1))
161 elem.tail = elem.tail[m.end():] + remainder
162
163 def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str:
164 """ Assign `attrs` to element.
165
166 If the `attrs_string` has an extra closing curly brace, the remaining text is returned.
167
168 The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`.
169 """
170 attrs, remainder = get_attrs_and_remainder(attrs_string)
171 if strict and remainder:
172 return remainder
173
174 for k, v in attrs:
175 if k == '.':
176 # add to class
177 cls = elem.get('class')
178 if cls:
179 elem.set('class', '{} {}'.format(cls, v))
180 else:
181 elem.set('class', v)
182 else:
183 # assign attribute `k` with `v`
184 elem.set(self.sanitize_name(k), v)
185 # The text that we initially over-matched will be put back.
186 return remainder
187
188 def sanitize_name(self, name: str) -> str:
189 """
190 Sanitize name as 'an XML Name, minus the `:`.'
191 See <https://www.w3.org/TR/REC-xml-names/#NT-NCName>.
192 """
193 return self.NAME_RE.sub('_', name)
194
195
196class AttrListExtension(Extension):
197 """ Attribute List extension for Python-Markdown """
198 def extendMarkdown(self, md):
199 md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)
200 md.registerExtension(self)
201
202
203def makeExtension(**kwargs): # pragma: no cover
204 return AttrListExtension(**kwargs)