Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mdit_py_plugins/attrs/parse.py: 99%
158 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:15 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:15 +0000
1"""Parser for attributes::
3 attributes { id = "foo", class = "bar baz",
4 key1 = "val1", key2 = "val2" }
6Adapted from:
7https://github.com/jgm/djot/blob/fae7364b86bfce69bc6d5b5eede1f5196d845fd6/djot/attributes.lua#L1
9syntax:
11attributes <- '{' whitespace* attribute (whitespace attribute)* whitespace* '}'
12attribute <- identifier | class | keyval
13identifier <- '#' name
14class <- '.' name
15name <- (nonspace, nonpunctuation other than ':', '_', '-')+
16keyval <- key '=' val
17key <- (ASCII_ALPHANUM | ':' | '_' | '-')+
18val <- bareval | quotedval
19bareval <- (ASCII_ALPHANUM | ':' | '_' | '-')+
20quotedval <- '"' ([^"] | '\"') '"'
21"""
22from __future__ import annotations
24from enum import Enum
25import re
26from typing import Callable
29class State(Enum):
30 START = 0
31 SCANNING = 1
32 SCANNING_ID = 2
33 SCANNING_CLASS = 3
34 SCANNING_KEY = 4
35 SCANNING_VALUE = 5
36 SCANNING_BARE_VALUE = 6
37 SCANNING_QUOTED_VALUE = 7
38 SCANNING_COMMENT = 8
39 SCANNING_ESCAPED = 9
40 DONE = 10
43REGEX_SPACE = re.compile(r"\s")
44REGEX_SPACE_PUNCTUATION = re.compile(r"[\s!\"#$%&'()*+,./;<=>?@[\]^`{|}~]")
45REGEX_KEY_CHARACTERS = re.compile(r"[a-zA-Z\d_:-]")
48class TokenState:
49 def __init__(self) -> None:
50 self._tokens: list[tuple[int, int, str]] = []
51 self.start: int = 0
53 def set_start(self, start: int) -> None:
54 self.start = start
56 def append(self, start: int, end: int, ttype: str) -> None:
57 self._tokens.append((start, end, ttype))
59 def compile(self, string: str) -> dict[str, str]:
60 """compile the tokens into a dictionary"""
61 attributes = {}
62 classes = []
63 idx = 0
64 while idx < len(self._tokens):
65 start, end, ttype = self._tokens[idx]
66 if ttype == "id":
67 attributes["id"] = string[start:end]
68 elif ttype == "class":
69 classes.append(string[start:end])
70 elif ttype == "key":
71 key = string[start:end]
72 if idx + 1 < len(self._tokens):
73 start, end, ttype = self._tokens[idx + 1]
74 if ttype == "value":
75 if key == "class":
76 classes.append(string[start:end])
77 else:
78 attributes[key] = string[start:end]
79 idx += 1
80 idx += 1
81 if classes:
82 attributes["class"] = " ".join(classes)
83 return attributes
85 def __str__(self) -> str:
86 return str(self._tokens)
88 def __repr__(self) -> str:
89 return repr(self._tokens)
92class ParseError(Exception):
93 def __init__(self, msg: str, pos: int) -> None:
94 self.pos = pos
95 super().__init__(msg + f" at position {pos}")
98def parse(string: str) -> tuple[int, dict[str, str]]:
99 """Parse attributes from start of string.
101 :returns: (length of parsed string, dict of attributes)
102 """
103 pos = 0
104 state: State = State.START
105 tokens = TokenState()
106 while pos < len(string):
107 state = HANDLERS[state](string[pos], pos, tokens)
108 if state == State.DONE:
109 return pos, tokens.compile(string)
110 pos = pos + 1
112 return pos, tokens.compile(string)
115def handle_start(char: str, pos: int, tokens: TokenState) -> State:
116 if char == "{":
117 return State.SCANNING
118 raise ParseError("Attributes must start with '{'", pos)
121def handle_scanning(char: str, pos: int, tokens: TokenState) -> State:
122 if char == " " or char == "\t" or char == "\n" or char == "\r":
123 return State.SCANNING
124 if char == "}":
125 return State.DONE
126 if char == "#":
127 tokens.set_start(pos)
128 return State.SCANNING_ID
129 if char == "%":
130 tokens.set_start(pos)
131 return State.SCANNING_COMMENT
132 if char == ".":
133 tokens.set_start(pos)
134 return State.SCANNING_CLASS
135 if REGEX_KEY_CHARACTERS.fullmatch(char):
136 tokens.set_start(pos)
137 return State.SCANNING_KEY
139 raise ParseError(f"Unexpected character whilst scanning: {char}", pos)
142def handle_scanning_comment(char: str, pos: int, tokens: TokenState) -> State:
143 if char == "%":
144 return State.SCANNING
146 return State.SCANNING_COMMENT
149def handle_scanning_id(char: str, pos: int, tokens: TokenState) -> State:
150 if not REGEX_SPACE_PUNCTUATION.fullmatch(char):
151 return State.SCANNING_ID
153 if char == "}":
154 if (pos - 1) > tokens.start:
155 tokens.append(tokens.start + 1, pos, "id")
156 return State.DONE
158 if REGEX_SPACE.fullmatch(char):
159 if (pos - 1) > tokens.start:
160 tokens.append(tokens.start + 1, pos, "id")
161 return State.SCANNING
163 raise ParseError(f"Unexpected character whilst scanning id: {char}", pos)
166def handle_scanning_class(char: str, pos: int, tokens: TokenState) -> State:
167 if not REGEX_SPACE_PUNCTUATION.fullmatch(char):
168 return State.SCANNING_CLASS
170 if char == "}":
171 if (pos - 1) > tokens.start:
172 tokens.append(tokens.start + 1, pos, "class")
173 return State.DONE
175 if REGEX_SPACE.fullmatch(char):
176 if (pos - 1) > tokens.start:
177 tokens.append(tokens.start + 1, pos, "class")
178 return State.SCANNING
180 raise ParseError(f"Unexpected character whilst scanning class: {char}", pos)
183def handle_scanning_key(char: str, pos: int, tokens: TokenState) -> State:
184 if char == "=":
185 tokens.append(tokens.start, pos, "key")
186 return State.SCANNING_VALUE
188 if REGEX_KEY_CHARACTERS.fullmatch(char):
189 return State.SCANNING_KEY
191 raise ParseError(f"Unexpected character whilst scanning key: {char}", pos)
194def handle_scanning_value(char: str, pos: int, tokens: TokenState) -> State:
195 if char == '"':
196 tokens.set_start(pos)
197 return State.SCANNING_QUOTED_VALUE
199 if REGEX_KEY_CHARACTERS.fullmatch(char):
200 tokens.set_start(pos)
201 return State.SCANNING_BARE_VALUE
203 raise ParseError(f"Unexpected character whilst scanning value: {char}", pos)
206def handle_scanning_bare_value(char: str, pos: int, tokens: TokenState) -> State:
207 if REGEX_KEY_CHARACTERS.fullmatch(char):
208 return State.SCANNING_BARE_VALUE
210 if char == "}":
211 tokens.append(tokens.start, pos, "value")
212 return State.DONE
214 if REGEX_SPACE.fullmatch(char):
215 tokens.append(tokens.start, pos, "value")
216 return State.SCANNING
218 raise ParseError(f"Unexpected character whilst scanning bare value: {char}", pos)
221def handle_scanning_escaped(char: str, pos: int, tokens: TokenState) -> State:
222 return State.SCANNING_QUOTED_VALUE
225def handle_scanning_quoted_value(char: str, pos: int, tokens: TokenState) -> State:
226 if char == '"':
227 tokens.append(tokens.start + 1, pos, "value")
228 return State.SCANNING
230 if char == "\\":
231 return State.SCANNING_ESCAPED
233 if char == "{" or char == "}":
234 raise ParseError(
235 f"Unexpected character whilst scanning quoted value: {char}", pos
236 )
238 if char == "\n":
239 tokens.append(tokens.start + 1, pos, "value")
240 return State.SCANNING_QUOTED_VALUE
242 return State.SCANNING_QUOTED_VALUE
245HANDLERS: dict[State, Callable[[str, int, TokenState], State]] = {
246 State.START: handle_start,
247 State.SCANNING: handle_scanning,
248 State.SCANNING_COMMENT: handle_scanning_comment,
249 State.SCANNING_ID: handle_scanning_id,
250 State.SCANNING_CLASS: handle_scanning_class,
251 State.SCANNING_KEY: handle_scanning_key,
252 State.SCANNING_VALUE: handle_scanning_value,
253 State.SCANNING_BARE_VALUE: handle_scanning_bare_value,
254 State.SCANNING_QUOTED_VALUE: handle_scanning_quoted_value,
255 State.SCANNING_ESCAPED: handle_scanning_escaped,
256}