1from __future__ import annotations
2
3from collections.abc import Sequence
4from functools import partial
5from typing import Any
6
7from markdown_it import MarkdownIt
8from markdown_it.rules_block import StateBlock
9from markdown_it.rules_core import StateCore
10from markdown_it.rules_inline import StateInline
11from markdown_it.token import Token
12
13from mdit_py_plugins.utils import is_code_block
14
15from .parse import ParseError, parse
16
17
18def attrs_plugin(
19 md: MarkdownIt,
20 *,
21 after: Sequence[str] = ("image", "code_inline", "link_close", "span_close"),
22 spans: bool = False,
23 span_after: str = "link",
24 allowed: Sequence[str] | None = None,
25) -> None:
26 """Parse inline attributes that immediately follow certain inline elements::
27
28 {#id .a b=c}
29
30 This syntax is inspired by
31 `Djot spans
32 <https://htmlpreview.github.io/?https://github.com/jgm/djot/blob/master/doc/syntax.html#inline-attributes>`_.
33
34 Inside the curly braces, the following syntax is possible:
35
36 - `.foo` specifies foo as a class.
37 Multiple classes may be given in this way; they will be combined.
38 - `#foo` specifies foo as an identifier.
39 An element may have only one identifier;
40 if multiple identifiers are given, the last one is used.
41 - `key="value"` or `key=value` specifies a key-value attribute.
42 Quotes are not needed when the value consists entirely of
43 ASCII alphanumeric characters or `_` or `:` or `-`.
44 Backslash escapes may be used inside quoted values.
45 - `%` begins a comment, which ends with the next `%` or the end of the attribute (`}`).
46
47 Multiple attribute blocks are merged.
48
49 :param md: The MarkdownIt instance to modify.
50 :param after: The names of inline elements after which attributes may be specified.
51 This plugin does not support attributes after emphasis, strikethrough or text elements,
52 which all require post-parse processing.
53 :param spans: If True, also parse attributes after spans of text, encapsulated by `[]`.
54 Note Markdown link references take precedence over this syntax.
55 :param span_after: The name of an inline rule after which spans may be specified.
56 :param allowed: A list of allowed attribute names.
57 If not ``None``, any attributes not in this list will be removed
58 and placed in the token's meta under the key "insecure_attrs".
59 """
60
61 if spans:
62 md.inline.ruler.after(span_after, "span", _span_rule)
63 if after:
64 md.inline.ruler.push(
65 "attr",
66 partial(
67 _attr_inline_rule,
68 after=after,
69 allowed=None if allowed is None else set(allowed),
70 ),
71 )
72
73
74def attrs_block_plugin(md: MarkdownIt, *, allowed: Sequence[str] | None = None) -> None:
75 """Parse block attributes.
76
77 Block attributes are attributes on a single line, with no other content.
78 They attach the specified attributes to the block below them::
79
80 {.a #b c=1}
81 A paragraph, that will be assigned the class ``a`` and the identifier ``b``.
82
83 Attributes can be stacked, with classes accumulating and lower attributes overriding higher::
84
85 {#a .a c=1}
86 {#b .b c=2}
87 A paragraph, that will be assigned the class ``a b c``, and the identifier ``b``.
88
89 This syntax is inspired by Djot block attributes.
90
91 :param allowed: A list of allowed attribute names.
92 If not ``None``, any attributes not in this list will be removed
93 and placed in the token's meta under the key "insecure_attrs".
94 """
95 md.block.ruler.before("fence", "attr", _attr_block_rule)
96 md.core.ruler.after(
97 "block",
98 "attr",
99 partial(
100 _attr_resolve_block_rule, allowed=None if allowed is None else set(allowed)
101 ),
102 )
103
104
105def _find_opening(tokens: Sequence[Token], index: int) -> int | None:
106 """Find the opening token index, if the token is closing."""
107 if tokens[index].nesting != -1:
108 return index
109 level = 0
110 while index >= 0:
111 level += tokens[index].nesting
112 if level == 0:
113 return index
114 index -= 1
115 return None
116
117
118def _span_rule(state: StateInline, silent: bool) -> bool:
119 if state.src[state.pos] != "[":
120 return False
121
122 maximum = state.posMax
123 labelStart = state.pos + 1
124 labelEnd = state.md.helpers.parseLinkLabel(state, state.pos, False)
125
126 # parser failed to find ']', so it's not a valid span
127 if labelEnd < 0:
128 return False
129
130 pos = labelEnd + 1
131
132 # check not at end of inline
133 if pos >= maximum:
134 return False
135
136 try:
137 new_pos, attrs = parse(state.src[pos:])
138 except ParseError:
139 return False
140
141 pos += new_pos + 1
142
143 if not silent:
144 state.pos = labelStart
145 state.posMax = labelEnd
146 token = state.push("span_open", "span", 1)
147 token.attrs = attrs # type: ignore[assignment]
148 state.md.inline.tokenize(state)
149 token = state.push("span_close", "span", -1)
150
151 state.pos = pos
152 state.posMax = maximum
153 return True
154
155
156def _attr_inline_rule(
157 state: StateInline,
158 silent: bool,
159 after: Sequence[str],
160 *,
161 allowed: set[str] | None = None,
162) -> bool:
163 if state.pending or not state.tokens:
164 return False
165 token = state.tokens[-1]
166 if token.type not in after:
167 return False
168 try:
169 new_pos, attrs = parse(state.src[state.pos :])
170 except ParseError:
171 return False
172 token_index = _find_opening(state.tokens, len(state.tokens) - 1)
173 if token_index is None:
174 return False
175 state.pos += new_pos + 1
176 if not silent:
177 attr_token = state.tokens[token_index]
178 if "class" in attrs and "class" in token.attrs:
179 attrs["class"] = f"{token.attrs['class']} {attrs['class']}"
180 _add_attrs(attr_token, attrs, allowed)
181 return True
182
183
184def _attr_block_rule(
185 state: StateBlock, startLine: int, endLine: int, silent: bool
186) -> bool:
187 """Find a block of attributes.
188
189 The block must be a single line that begins with a `{`, after three or less spaces,
190 and end with a `}` followed by any number if spaces.
191 """
192 if is_code_block(state, startLine):
193 return False
194
195 pos = state.bMarks[startLine] + state.tShift[startLine]
196 maximum = state.eMarks[startLine]
197
198 # if it doesn't start with a {, it's not an attribute block
199 if state.src[pos] != "{":
200 return False
201
202 # find first non-space character from the right
203 while maximum > pos and state.src[maximum - 1] in (" ", "\t"):
204 maximum -= 1
205 # if it doesn't end with a }, it's not an attribute block
206 if maximum <= pos:
207 return False
208 if state.src[maximum - 1] != "}":
209 return False
210
211 try:
212 new_pos, attrs = parse(state.src[pos:maximum])
213 except ParseError:
214 return False
215
216 # if the block was resolved earlier than expected, it's not an attribute block
217 # TODO this was not working in some instances, so I disabled it
218 # if (maximum - 1) != new_pos:
219 # return False
220
221 if silent:
222 return True
223
224 token = state.push("attrs_block", "", 0)
225 token.attrs = attrs # type: ignore[assignment]
226 token.map = [startLine, startLine + 1]
227
228 state.line = startLine + 1
229 return True
230
231
232def _attr_resolve_block_rule(state: StateCore, *, allowed: set[str] | None) -> None:
233 """Find attribute block then move its attributes to the next block."""
234 i = 0
235 len_tokens = len(state.tokens)
236 while i < len_tokens:
237 if state.tokens[i].type != "attrs_block":
238 i += 1
239 continue
240
241 if i + 1 < len_tokens:
242 next_token = state.tokens[i + 1]
243
244 # classes are appended
245 if "class" in state.tokens[i].attrs and "class" in next_token.attrs:
246 state.tokens[i].attrs["class"] = (
247 f"{state.tokens[i].attrs['class']} {next_token.attrs['class']}"
248 )
249
250 if next_token.type == "attrs_block":
251 # subsequent attribute blocks take precedence, when merging
252 for key, value in state.tokens[i].attrs.items():
253 if key == "class" or key not in next_token.attrs:
254 next_token.attrs[key] = value
255 else:
256 _add_attrs(next_token, state.tokens[i].attrs, allowed)
257
258 state.tokens.pop(i)
259 len_tokens -= 1
260
261
262def _add_attrs(
263 token: Token,
264 attrs: dict[str, Any],
265 allowed: set[str] | None,
266) -> None:
267 """Add attributes to a token, skipping any disallowed attributes."""
268 if allowed is not None and (
269 disallowed := {k: v for k, v in attrs.items() if k not in allowed}
270 ):
271 token.meta["insecure_attrs"] = disallowed
272 attrs = {k: v for k, v in attrs.items() if k in allowed}
273
274 # attributes takes precedence over existing attributes
275 token.attrs.update(attrs)