1"""Handwritten parser of dependency specifiers.
2
3The docstring for each __parse_* function contains EBNF-inspired grammar representing
4the implementation.
5"""
6
7from __future__ import annotations
8
9import ast
10from typing import List, Literal, NamedTuple, Sequence, Tuple, Union
11
12from ._tokenizer import DEFAULT_RULES, Tokenizer
13
14
15class Node:
16 __slots__ = ("value",)
17
18 def __init__(self, value: str) -> None:
19 self.value = value
20
21 def __str__(self) -> str:
22 return self.value
23
24 def __repr__(self) -> str:
25 return f"<{self.__class__.__name__}({self.value!r})>"
26
27 def serialize(self) -> str:
28 raise NotImplementedError
29
30
31class Variable(Node):
32 __slots__ = ()
33
34 def serialize(self) -> str:
35 return str(self)
36
37
38class Value(Node):
39 __slots__ = ()
40
41 def serialize(self) -> str:
42 return f'"{self}"'
43
44
45class Op(Node):
46 __slots__ = ()
47
48 def serialize(self) -> str:
49 return str(self)
50
51
52MarkerLogical = Literal["and", "or"]
53MarkerVar = Union[Variable, Value]
54MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
55MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]]
56MarkerList = List[Union["MarkerList", MarkerAtom, MarkerLogical]]
57
58
59class ParsedRequirement(NamedTuple):
60 name: str
61 url: str
62 extras: list[str]
63 specifier: str
64 marker: MarkerList | None
65
66
67# --------------------------------------------------------------------------------------
68# Recursive descent parser for dependency specifier
69# --------------------------------------------------------------------------------------
70def parse_requirement(source: str) -> ParsedRequirement:
71 return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
72
73
74def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
75 """
76 requirement = WS? IDENTIFIER WS? extras WS? requirement_details
77 """
78 tokenizer.consume("WS")
79
80 name_token = tokenizer.expect(
81 "IDENTIFIER", expected="package name at the start of dependency specifier"
82 )
83 name = name_token.text
84 tokenizer.consume("WS")
85
86 extras = _parse_extras(tokenizer)
87 tokenizer.consume("WS")
88
89 url, specifier, marker = _parse_requirement_details(tokenizer)
90 tokenizer.expect("END", expected="end of dependency specifier")
91
92 return ParsedRequirement(name, url, extras, specifier, marker)
93
94
95def _parse_requirement_details(
96 tokenizer: Tokenizer,
97) -> tuple[str, str, MarkerList | None]:
98 """
99 requirement_details = AT URL (WS requirement_marker?)?
100 | specifier WS? (requirement_marker)?
101 """
102
103 specifier = ""
104 url = ""
105 marker = None
106
107 if tokenizer.check("AT"):
108 tokenizer.read()
109 tokenizer.consume("WS")
110
111 url_start = tokenizer.position
112 url = tokenizer.expect("URL", expected="URL after @").text
113 if tokenizer.check("END", peek=True):
114 return (url, specifier, marker)
115
116 tokenizer.expect("WS", expected="whitespace after URL")
117
118 # The input might end after whitespace.
119 if tokenizer.check("END", peek=True):
120 return (url, specifier, marker)
121
122 marker = _parse_requirement_marker(
123 tokenizer,
124 span_start=url_start,
125 expected="semicolon (after URL and whitespace)",
126 )
127 else:
128 specifier_start = tokenizer.position
129 specifier = _parse_specifier(tokenizer)
130 tokenizer.consume("WS")
131
132 if tokenizer.check("END", peek=True):
133 return (url, specifier, marker)
134
135 marker = _parse_requirement_marker(
136 tokenizer,
137 span_start=specifier_start,
138 expected=(
139 "comma (within version specifier), semicolon (after version specifier)"
140 if specifier
141 else "semicolon (after name with no version specifier)"
142 ),
143 )
144
145 return (url, specifier, marker)
146
147
148def _parse_requirement_marker(
149 tokenizer: Tokenizer, *, span_start: int, expected: str
150) -> MarkerList:
151 """
152 requirement_marker = SEMICOLON marker WS?
153 """
154
155 if not tokenizer.check("SEMICOLON"):
156 tokenizer.raise_syntax_error(
157 f"Expected {expected} or end",
158 span_start=span_start,
159 span_end=None,
160 )
161 tokenizer.read()
162
163 marker = _parse_marker(tokenizer)
164 tokenizer.consume("WS")
165
166 return marker
167
168
169def _parse_extras(tokenizer: Tokenizer) -> list[str]:
170 """
171 extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
172 """
173 if not tokenizer.check("LEFT_BRACKET", peek=True):
174 return []
175
176 with tokenizer.enclosing_tokens(
177 "LEFT_BRACKET",
178 "RIGHT_BRACKET",
179 around="extras",
180 ):
181 tokenizer.consume("WS")
182 extras = _parse_extras_list(tokenizer)
183 tokenizer.consume("WS")
184
185 return extras
186
187
188def _parse_extras_list(tokenizer: Tokenizer) -> list[str]:
189 """
190 extras_list = identifier (wsp* ',' wsp* identifier)*
191 """
192 extras: list[str] = []
193
194 if not tokenizer.check("IDENTIFIER"):
195 return extras
196
197 extras.append(tokenizer.read().text)
198
199 while True:
200 tokenizer.consume("WS")
201 if tokenizer.check("IDENTIFIER", peek=True):
202 tokenizer.raise_syntax_error("Expected comma between extra names")
203 elif not tokenizer.check("COMMA"):
204 break
205
206 tokenizer.read()
207 tokenizer.consume("WS")
208
209 extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
210 extras.append(extra_token.text)
211
212 return extras
213
214
215def _parse_specifier(tokenizer: Tokenizer) -> str:
216 """
217 specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
218 | WS? version_many WS?
219 """
220 with tokenizer.enclosing_tokens(
221 "LEFT_PARENTHESIS",
222 "RIGHT_PARENTHESIS",
223 around="version specifier",
224 ):
225 tokenizer.consume("WS")
226 parsed_specifiers = _parse_version_many(tokenizer)
227 tokenizer.consume("WS")
228
229 return parsed_specifiers
230
231
232def _parse_version_many(tokenizer: Tokenizer) -> str:
233 """
234 version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
235 """
236 parsed_specifiers = ""
237 while tokenizer.check("SPECIFIER"):
238 span_start = tokenizer.position
239 parsed_specifiers += tokenizer.read().text
240 if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
241 tokenizer.raise_syntax_error(
242 ".* suffix can only be used with `==` or `!=` operators",
243 span_start=span_start,
244 span_end=tokenizer.position + 1,
245 )
246 if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
247 tokenizer.raise_syntax_error(
248 "Local version label can only be used with `==` or `!=` operators",
249 span_start=span_start,
250 span_end=tokenizer.position,
251 )
252 tokenizer.consume("WS")
253 if not tokenizer.check("COMMA"):
254 break
255 parsed_specifiers += tokenizer.read().text
256 tokenizer.consume("WS")
257
258 return parsed_specifiers
259
260
261# --------------------------------------------------------------------------------------
262# Recursive descent parser for marker expression
263# --------------------------------------------------------------------------------------
264def parse_marker(source: str) -> MarkerList:
265 return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
266
267
268def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList:
269 retval = _parse_marker(tokenizer)
270 tokenizer.expect("END", expected="end of marker expression")
271 return retval
272
273
274def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
275 """
276 marker = marker_atom (BOOLOP marker_atom)+
277 """
278 expression = [_parse_marker_atom(tokenizer)]
279 while tokenizer.check("BOOLOP"):
280 token = tokenizer.read()
281 expr_right = _parse_marker_atom(tokenizer)
282 expression.extend((token.text, expr_right))
283 return expression
284
285
286def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
287 """
288 marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
289 | WS? marker_item WS?
290 """
291
292 tokenizer.consume("WS")
293 if tokenizer.check("LEFT_PARENTHESIS", peek=True):
294 with tokenizer.enclosing_tokens(
295 "LEFT_PARENTHESIS",
296 "RIGHT_PARENTHESIS",
297 around="marker expression",
298 ):
299 tokenizer.consume("WS")
300 marker: MarkerAtom = _parse_marker(tokenizer)
301 tokenizer.consume("WS")
302 else:
303 marker = _parse_marker_item(tokenizer)
304 tokenizer.consume("WS")
305 return marker
306
307
308def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
309 """
310 marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
311 """
312 tokenizer.consume("WS")
313 marker_var_left = _parse_marker_var(tokenizer)
314 tokenizer.consume("WS")
315 marker_op = _parse_marker_op(tokenizer)
316 tokenizer.consume("WS")
317 marker_var_right = _parse_marker_var(tokenizer)
318 tokenizer.consume("WS")
319 return (marker_var_left, marker_op, marker_var_right)
320
321
322def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: # noqa: RET503
323 """
324 marker_var = VARIABLE | QUOTED_STRING
325 """
326 if tokenizer.check("VARIABLE"):
327 return process_env_var(tokenizer.read().text.replace(".", "_"))
328 elif tokenizer.check("QUOTED_STRING"):
329 return process_python_str(tokenizer.read().text)
330 else:
331 tokenizer.raise_syntax_error(
332 message="Expected a marker variable or quoted string"
333 )
334
335
336def process_env_var(env_var: str) -> Variable:
337 if env_var in ("platform_python_implementation", "python_implementation"):
338 return Variable("platform_python_implementation")
339 else:
340 return Variable(env_var)
341
342
343def process_python_str(python_str: str) -> Value:
344 value = ast.literal_eval(python_str)
345 return Value(str(value))
346
347
348def _parse_marker_op(tokenizer: Tokenizer) -> Op:
349 """
350 marker_op = IN | NOT IN | OP
351 """
352 if tokenizer.check("IN"):
353 tokenizer.read()
354 return Op("in")
355 elif tokenizer.check("NOT"):
356 tokenizer.read()
357 tokenizer.expect("WS", expected="whitespace after 'not'")
358 tokenizer.expect("IN", expected="'in' after 'not'")
359 return Op("not in")
360 elif tokenizer.check("OP"):
361 return Op(tokenizer.read().text)
362 else:
363 return tokenizer.raise_syntax_error(
364 "Expected marker operator, one of <=, <, !=, ==, >=, >, ~=, ===, in, not in"
365 )