1from __future__ import annotations
2
3from functools import partial
4from typing import Any, Sequence
5
6from markdown_it import MarkdownIt
7from markdown_it.rules_block import StateBlock
8from markdown_it.rules_core import StateCore
9from markdown_it.rules_inline import StateInline
10from markdown_it.token import Token
11
12from mdit_py_plugins.utils import is_code_block
13
14from .parse import ParseError, parse
15
16
17def attrs_plugin(
18 md: MarkdownIt,
19 *,
20 after: Sequence[str] = ("image", "code_inline", "link_close", "span_close"),
21 spans: bool = False,
22 span_after: str = "link",
23 allowed: Sequence[str] | None = None,
24) -> None:
25 """Parse inline attributes that immediately follow certain inline elements::
26
27 {#id .a b=c}
28
29 This syntax is inspired by
30 `Djot spans
31 <https://htmlpreview.github.io/?https://github.com/jgm/djot/blob/master/doc/syntax.html#inline-attributes>`_.
32
33 Inside the curly braces, the following syntax is possible:
34
35 - `.foo` specifies foo as a class.
36 Multiple classes may be given in this way; they will be combined.
37 - `#foo` specifies foo as an identifier.
38 An element may have only one identifier;
39 if multiple identifiers are given, the last one is used.
40 - `key="value"` or `key=value` specifies a key-value attribute.
41 Quotes are not needed when the value consists entirely of
42 ASCII alphanumeric characters or `_` or `:` or `-`.
43 Backslash escapes may be used inside quoted values.
44 - `%` begins a comment, which ends with the next `%` or the end of the attribute (`}`).
45
46 Multiple attribute blocks are merged.
47
48 :param md: The MarkdownIt instance to modify.
49 :param after: The names of inline elements after which attributes may be specified.
50 This plugin does not support attributes after emphasis, strikethrough or text elements,
51 which all require post-parse processing.
52 :param spans: If True, also parse attributes after spans of text, encapsulated by `[]`.
53 Note Markdown link references take precedence over this syntax.
54 :param span_after: The name of an inline rule after which spans may be specified.
55 :param allowed: A list of allowed attribute names.
56 If not ``None``, any attributes not in this list will be removed
57 and placed in the token's meta under the key "insecure_attrs".
58 """
59
60 if spans:
61 md.inline.ruler.after(span_after, "span", _span_rule)
62 if after:
63 md.inline.ruler.push(
64 "attr",
65 partial(
66 _attr_inline_rule,
67 after=after,
68 allowed=None if allowed is None else set(allowed),
69 ),
70 )
71
72
73def attrs_block_plugin(md: MarkdownIt, *, allowed: Sequence[str] | None = None) -> None:
74 """Parse block attributes.
75
76 Block attributes are attributes on a single line, with no other content.
77 They attach the specified attributes to the block below them::
78
79 {.a #b c=1}
80 A paragraph, that will be assigned the class ``a`` and the identifier ``b``.
81
82 Attributes can be stacked, with classes accumulating and lower attributes overriding higher::
83
84 {#a .a c=1}
85 {#b .b c=2}
86 A paragraph, that will be assigned the class ``a b c``, and the identifier ``b``.
87
88 This syntax is inspired by Djot block attributes.
89
90 :param allowed: A list of allowed attribute names.
91 If not ``None``, any attributes not in this list will be removed
92 and placed in the token's meta under the key "insecure_attrs".
93 """
94 md.block.ruler.before("fence", "attr", _attr_block_rule)
95 md.core.ruler.after(
96 "block",
97 "attr",
98 partial(
99 _attr_resolve_block_rule, allowed=None if allowed is None else set(allowed)
100 ),
101 )
102
103
104def _find_opening(tokens: Sequence[Token], index: int) -> int | None:
105 """Find the opening token index, if the token is closing."""
106 if tokens[index].nesting != -1:
107 return index
108 level = 0
109 while index >= 0:
110 level += tokens[index].nesting
111 if level == 0:
112 return index
113 index -= 1
114 return None
115
116
117def _span_rule(state: StateInline, silent: bool) -> bool:
118 if state.src[state.pos] != "[":
119 return False
120
121 maximum = state.posMax
122 labelStart = state.pos + 1
123 labelEnd = state.md.helpers.parseLinkLabel(state, state.pos, False)
124
125 # parser failed to find ']', so it's not a valid span
126 if labelEnd < 0:
127 return False
128
129 pos = labelEnd + 1
130
131 # check not at end of inline
132 if pos >= maximum:
133 return False
134
135 try:
136 new_pos, attrs = parse(state.src[pos:])
137 except ParseError:
138 return False
139
140 pos += new_pos + 1
141
142 if not silent:
143 state.pos = labelStart
144 state.posMax = labelEnd
145 token = state.push("span_open", "span", 1)
146 token.attrs = attrs # type: ignore[assignment]
147 state.md.inline.tokenize(state)
148 token = state.push("span_close", "span", -1)
149
150 state.pos = pos
151 state.posMax = maximum
152 return True
153
154
155def _attr_inline_rule(
156 state: StateInline,
157 silent: bool,
158 after: Sequence[str],
159 *,
160 allowed: set[str] | None = None,
161) -> bool:
162 if state.pending or not state.tokens:
163 return False
164 token = state.tokens[-1]
165 if token.type not in after:
166 return False
167 try:
168 new_pos, attrs = parse(state.src[state.pos :])
169 except ParseError:
170 return False
171 token_index = _find_opening(state.tokens, len(state.tokens) - 1)
172 if token_index is None:
173 return False
174 state.pos += new_pos + 1
175 if not silent:
176 attr_token = state.tokens[token_index]
177 if "class" in attrs and "class" in token.attrs:
178 attrs["class"] = f"{token.attrs['class']} {attrs['class']}"
179 _add_attrs(attr_token, attrs, allowed)
180 return True
181
182
183def _attr_block_rule(
184 state: StateBlock, startLine: int, endLine: int, silent: bool
185) -> bool:
186 """Find a block of attributes.
187
188 The block must be a single line that begins with a `{`, after three or less spaces,
189 and end with a `}` followed by any number if spaces.
190 """
191 if is_code_block(state, startLine):
192 return False
193
194 pos = state.bMarks[startLine] + state.tShift[startLine]
195 maximum = state.eMarks[startLine]
196
197 # if it doesn't start with a {, it's not an attribute block
198 if state.src[pos] != "{":
199 return False
200
201 # find first non-space character from the right
202 while maximum > pos and state.src[maximum - 1] in (" ", "\t"):
203 maximum -= 1
204 # if it doesn't end with a }, it's not an attribute block
205 if maximum <= pos:
206 return False
207 if state.src[maximum - 1] != "}":
208 return False
209
210 try:
211 new_pos, attrs = parse(state.src[pos:maximum])
212 except ParseError:
213 return False
214
215 # if the block was resolved earlier than expected, it's not an attribute block
216 # TODO this was not working in some instances, so I disabled it
217 # if (maximum - 1) != new_pos:
218 # return False
219
220 if silent:
221 return True
222
223 token = state.push("attrs_block", "", 0)
224 token.attrs = attrs # type: ignore[assignment]
225 token.map = [startLine, startLine + 1]
226
227 state.line = startLine + 1
228 return True
229
230
231def _attr_resolve_block_rule(state: StateCore, *, allowed: set[str] | None) -> None:
232 """Find attribute block then move its attributes to the next block."""
233 i = 0
234 len_tokens = len(state.tokens)
235 while i < len_tokens:
236 if state.tokens[i].type != "attrs_block":
237 i += 1
238 continue
239
240 if i + 1 < len_tokens:
241 next_token = state.tokens[i + 1]
242
243 # classes are appended
244 if "class" in state.tokens[i].attrs and "class" in next_token.attrs:
245 state.tokens[i].attrs["class"] = (
246 f"{state.tokens[i].attrs['class']} {next_token.attrs['class']}"
247 )
248
249 if next_token.type == "attrs_block":
250 # subsequent attribute blocks take precedence, when merging
251 for key, value in state.tokens[i].attrs.items():
252 if key == "class" or key not in next_token.attrs:
253 next_token.attrs[key] = value
254 else:
255 _add_attrs(next_token, state.tokens[i].attrs, allowed)
256
257 state.tokens.pop(i)
258 len_tokens -= 1
259
260
261def _add_attrs(
262 token: Token,
263 attrs: dict[str, Any],
264 allowed: set[str] | None,
265) -> None:
266 """Add attributes to a token, skipping any disallowed attributes."""
267 if allowed is not None and (
268 disallowed := {k: v for k, v in attrs.items() if k not in allowed}
269 ):
270 token.meta["insecure_attrs"] = disallowed
271 attrs = {k: v for k, v in attrs.items() if k in allowed}
272
273 # attributes takes precedence over existing attributes
274 token.attrs.update(attrs)