1"""Handwritten parser of dependency specifiers.
2
3The docstring for each __parse_* function contains EBNF-inspired grammar representing
4the implementation.
5"""
6
7from __future__ import annotations
8
9import ast
10from typing import List, Literal, NamedTuple, Sequence, Tuple, Union
11
12from ._tokenizer import DEFAULT_RULES, Tokenizer
13
14
15class Node:
16 __slots__ = ("value",)
17
18 def __init__(self, value: str) -> None:
19 self.value = value
20
21 def __str__(self) -> str:
22 return self.value
23
24 def __repr__(self) -> str:
25 return f"<{self.__class__.__name__}({self.value!r})>"
26
27 def serialize(self) -> str:
28 raise NotImplementedError
29
30 def __getstate__(self) -> str:
31 # Return just the value string for compactness and stability.
32 return self.value
33
34 def _restore_value(self, value: object) -> None:
35 if not isinstance(value, str):
36 raise TypeError(
37 f"Cannot restore {self.__class__.__name__} value from {value!r}"
38 )
39 self.value = value
40
41 def __setstate__(self, state: object) -> None:
42 if isinstance(state, str):
43 # New format (26.2+): just the value string.
44 self._restore_value(state)
45 return
46 if isinstance(state, tuple) and len(state) == 2:
47 # Old format (packaging <= 26.0, __slots__): (None, {slot: value}).
48 _, slot_dict = state
49 if isinstance(slot_dict, dict) and "value" in slot_dict:
50 self._restore_value(slot_dict["value"])
51 return
52 if isinstance(state, dict) and "value" in state:
53 # Old format (packaging <= 25.0, no __slots__): plain __dict__.
54 self._restore_value(state["value"])
55 return
56 raise TypeError(f"Cannot restore {self.__class__.__name__} from {state!r}")
57
58
59class Variable(Node):
60 __slots__ = ()
61
62 def serialize(self) -> str:
63 return str(self)
64
65
66class Value(Node):
67 __slots__ = ()
68
69 def serialize(self) -> str:
70 return f'"{self}"'
71
72
73class Op(Node):
74 __slots__ = ()
75
76 def serialize(self) -> str:
77 return str(self)
78
79
80MarkerLogical = Literal["and", "or"]
81MarkerVar = Union[Variable, Value]
82MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
83MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]]
84MarkerList = List[Union["MarkerList", MarkerAtom, MarkerLogical]]
85
86
87class ParsedRequirement(NamedTuple):
88 name: str
89 url: str
90 extras: list[str]
91 specifier: str
92 marker: MarkerList | None
93
94
95# --------------------------------------------------------------------------------------
96# Recursive descent parser for dependency specifier
97# --------------------------------------------------------------------------------------
98def parse_requirement(source: str) -> ParsedRequirement:
99 return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
100
101
102def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
103 """
104 requirement = WS? IDENTIFIER WS? extras WS? requirement_details
105 """
106 tokenizer.consume("WS")
107
108 name_token = tokenizer.expect(
109 "IDENTIFIER", expected="package name at the start of dependency specifier"
110 )
111 name = name_token.text
112 tokenizer.consume("WS")
113
114 extras = _parse_extras(tokenizer)
115 tokenizer.consume("WS")
116
117 url, specifier, marker = _parse_requirement_details(tokenizer)
118 tokenizer.expect("END", expected="end of dependency specifier")
119
120 return ParsedRequirement(name, url, extras, specifier, marker)
121
122
123def _parse_requirement_details(
124 tokenizer: Tokenizer,
125) -> tuple[str, str, MarkerList | None]:
126 """
127 requirement_details = AT URL (WS requirement_marker?)?
128 | specifier WS? (requirement_marker)?
129 """
130
131 specifier = ""
132 url = ""
133 marker = None
134
135 if tokenizer.check("AT"):
136 tokenizer.read()
137 tokenizer.consume("WS")
138
139 url_start = tokenizer.position
140 url = tokenizer.expect("URL", expected="URL after @").text
141 if tokenizer.check("END", peek=True):
142 return (url, specifier, marker)
143
144 tokenizer.expect("WS", expected="whitespace after URL")
145
146 # The input might end after whitespace.
147 if tokenizer.check("END", peek=True):
148 return (url, specifier, marker)
149
150 marker = _parse_requirement_marker(
151 tokenizer,
152 span_start=url_start,
153 expected="semicolon (after URL and whitespace)",
154 )
155 else:
156 specifier_start = tokenizer.position
157 specifier = _parse_specifier(tokenizer)
158 tokenizer.consume("WS")
159
160 if tokenizer.check("END", peek=True):
161 return (url, specifier, marker)
162
163 marker = _parse_requirement_marker(
164 tokenizer,
165 span_start=specifier_start,
166 expected=(
167 "comma (within version specifier), semicolon (after version specifier)"
168 if specifier
169 else "semicolon (after name with no version specifier)"
170 ),
171 )
172
173 return (url, specifier, marker)
174
175
176def _parse_requirement_marker(
177 tokenizer: Tokenizer, *, span_start: int, expected: str
178) -> MarkerList:
179 """
180 requirement_marker = SEMICOLON marker WS?
181 """
182
183 if not tokenizer.check("SEMICOLON"):
184 tokenizer.raise_syntax_error(
185 f"Expected {expected} or end",
186 span_start=span_start,
187 span_end=None,
188 )
189 tokenizer.read()
190
191 marker = _parse_marker(tokenizer)
192 tokenizer.consume("WS")
193
194 return marker
195
196
197def _parse_extras(tokenizer: Tokenizer) -> list[str]:
198 """
199 extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
200 """
201 if not tokenizer.check("LEFT_BRACKET", peek=True):
202 return []
203
204 with tokenizer.enclosing_tokens(
205 "LEFT_BRACKET",
206 "RIGHT_BRACKET",
207 around="extras",
208 ):
209 tokenizer.consume("WS")
210 extras = _parse_extras_list(tokenizer)
211 tokenizer.consume("WS")
212
213 return extras
214
215
216def _parse_extras_list(tokenizer: Tokenizer) -> list[str]:
217 """
218 extras_list = identifier (wsp* ',' wsp* identifier)*
219 """
220 extras: list[str] = []
221
222 if not tokenizer.check("IDENTIFIER"):
223 return extras
224
225 extras.append(tokenizer.read().text)
226
227 while True:
228 tokenizer.consume("WS")
229 if tokenizer.check("IDENTIFIER", peek=True):
230 tokenizer.raise_syntax_error("Expected comma between extra names")
231 elif not tokenizer.check("COMMA"):
232 break
233
234 tokenizer.read()
235 tokenizer.consume("WS")
236
237 extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
238 extras.append(extra_token.text)
239
240 return extras
241
242
243def _parse_specifier(tokenizer: Tokenizer) -> str:
244 """
245 specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
246 | WS? version_many WS?
247 """
248 with tokenizer.enclosing_tokens(
249 "LEFT_PARENTHESIS",
250 "RIGHT_PARENTHESIS",
251 around="version specifier",
252 ):
253 tokenizer.consume("WS")
254 parsed_specifiers = _parse_version_many(tokenizer)
255 tokenizer.consume("WS")
256
257 return parsed_specifiers
258
259
260def _parse_version_many(tokenizer: Tokenizer) -> str:
261 """
262 version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
263 """
264 parsed_specifiers = ""
265 while tokenizer.check("SPECIFIER"):
266 span_start = tokenizer.position
267 parsed_specifiers += tokenizer.read().text
268 if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
269 tokenizer.raise_syntax_error(
270 ".* suffix can only be used with `==` or `!=` operators",
271 span_start=span_start,
272 span_end=tokenizer.position + 1,
273 )
274 if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
275 tokenizer.raise_syntax_error(
276 "Local version label can only be used with `==` or `!=` operators",
277 span_start=span_start,
278 span_end=tokenizer.position,
279 )
280 tokenizer.consume("WS")
281 if not tokenizer.check("COMMA"):
282 break
283 parsed_specifiers += tokenizer.read().text
284 tokenizer.consume("WS")
285
286 return parsed_specifiers
287
288
289# --------------------------------------------------------------------------------------
290# Recursive descent parser for marker expression
291# --------------------------------------------------------------------------------------
292def parse_marker(source: str) -> MarkerList:
293 return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
294
295
296def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList:
297 retval = _parse_marker(tokenizer)
298 tokenizer.expect("END", expected="end of marker expression")
299 return retval
300
301
302def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
303 """
304 marker = marker_atom (BOOLOP marker_atom)+
305 """
306 expression = [_parse_marker_atom(tokenizer)]
307 while tokenizer.check("BOOLOP"):
308 token = tokenizer.read()
309 expr_right = _parse_marker_atom(tokenizer)
310 expression.extend((token.text, expr_right))
311 return expression
312
313
314def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
315 """
316 marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
317 | WS? marker_item WS?
318 """
319
320 tokenizer.consume("WS")
321 if tokenizer.check("LEFT_PARENTHESIS", peek=True):
322 with tokenizer.enclosing_tokens(
323 "LEFT_PARENTHESIS",
324 "RIGHT_PARENTHESIS",
325 around="marker expression",
326 ):
327 tokenizer.consume("WS")
328 marker: MarkerAtom = _parse_marker(tokenizer)
329 tokenizer.consume("WS")
330 else:
331 marker = _parse_marker_item(tokenizer)
332 tokenizer.consume("WS")
333 return marker
334
335
336def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
337 """
338 marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
339 """
340 tokenizer.consume("WS")
341 marker_var_left = _parse_marker_var(tokenizer)
342 tokenizer.consume("WS")
343 marker_op = _parse_marker_op(tokenizer)
344 tokenizer.consume("WS")
345 marker_var_right = _parse_marker_var(tokenizer)
346 tokenizer.consume("WS")
347 return (marker_var_left, marker_op, marker_var_right)
348
349
350def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: # noqa: RET503
351 """
352 marker_var = VARIABLE | QUOTED_STRING
353 """
354 if tokenizer.check("VARIABLE"):
355 return process_env_var(tokenizer.read().text.replace(".", "_"))
356 elif tokenizer.check("QUOTED_STRING"):
357 return process_python_str(tokenizer.read().text)
358 else:
359 tokenizer.raise_syntax_error(
360 message="Expected a marker variable or quoted string"
361 )
362
363
364def process_env_var(env_var: str) -> Variable:
365 if env_var in ("platform_python_implementation", "python_implementation"):
366 return Variable("platform_python_implementation")
367 else:
368 return Variable(env_var)
369
370
371def process_python_str(python_str: str) -> Value:
372 value = ast.literal_eval(python_str)
373 return Value(str(value))
374
375
376def _parse_marker_op(tokenizer: Tokenizer) -> Op:
377 """
378 marker_op = IN | NOT IN | OP
379 """
380 if tokenizer.check("IN"):
381 tokenizer.read()
382 return Op("in")
383 elif tokenizer.check("NOT"):
384 tokenizer.read()
385 tokenizer.expect("WS", expected="whitespace after 'not'")
386 tokenizer.expect("IN", expected="'in' after 'not'")
387 return Op("not in")
388 elif tokenizer.check("OP"):
389 return Op(tokenizer.read().text)
390 else:
391 return tokenizer.raise_syntax_error(
392 "Expected marker operator, one of <=, <, !=, ==, >=, >, ~=, ===, in, not in"
393 )