Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pip/_vendor/packaging/_parser.py: 26%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

200 statements  

1"""Handwritten parser of dependency specifiers. 

2 

3The docstring for each __parse_* function contains EBNF-inspired grammar representing 

4the implementation. 

5""" 

6 

7from __future__ import annotations 

8 

9import ast 

10from typing import List, Literal, NamedTuple, Sequence, Tuple, Union 

11 

12from ._tokenizer import DEFAULT_RULES, Tokenizer 

13 

14 

15class Node: 

16 __slots__ = ("value",) 

17 

18 def __init__(self, value: str) -> None: 

19 self.value = value 

20 

21 def __str__(self) -> str: 

22 return self.value 

23 

24 def __repr__(self) -> str: 

25 return f"<{self.__class__.__name__}({self.value!r})>" 

26 

27 def serialize(self) -> str: 

28 raise NotImplementedError 

29 

30 def __getstate__(self) -> str: 

31 # Return just the value string for compactness and stability. 

32 return self.value 

33 

34 def _restore_value(self, value: object) -> None: 

35 if not isinstance(value, str): 

36 raise TypeError( 

37 f"Cannot restore {self.__class__.__name__} value from {value!r}" 

38 ) 

39 self.value = value 

40 

41 def __setstate__(self, state: object) -> None: 

42 if isinstance(state, str): 

43 # New format (26.2+): just the value string. 

44 self._restore_value(state) 

45 return 

46 if isinstance(state, tuple) and len(state) == 2: 

47 # Old format (packaging <= 26.0, __slots__): (None, {slot: value}). 

48 _, slot_dict = state 

49 if isinstance(slot_dict, dict) and "value" in slot_dict: 

50 self._restore_value(slot_dict["value"]) 

51 return 

52 if isinstance(state, dict) and "value" in state: 

53 # Old format (packaging <= 25.0, no __slots__): plain __dict__. 

54 self._restore_value(state["value"]) 

55 return 

56 raise TypeError(f"Cannot restore {self.__class__.__name__} from {state!r}") 

57 

58 

59class Variable(Node): 

60 __slots__ = () 

61 

62 def serialize(self) -> str: 

63 return str(self) 

64 

65 

66class Value(Node): 

67 __slots__ = () 

68 

69 def serialize(self) -> str: 

70 return f'"{self}"' 

71 

72 

73class Op(Node): 

74 __slots__ = () 

75 

76 def serialize(self) -> str: 

77 return str(self) 

78 

79 

80MarkerLogical = Literal["and", "or"] 

81MarkerVar = Union[Variable, Value] 

82MarkerItem = Tuple[MarkerVar, Op, MarkerVar] 

83MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]] 

84MarkerList = List[Union["MarkerList", MarkerAtom, MarkerLogical]] 

85 

86 

87class ParsedRequirement(NamedTuple): 

88 name: str 

89 url: str 

90 extras: list[str] 

91 specifier: str 

92 marker: MarkerList | None 

93 

94 

95# -------------------------------------------------------------------------------------- 

96# Recursive descent parser for dependency specifier 

97# -------------------------------------------------------------------------------------- 

98def parse_requirement(source: str) -> ParsedRequirement: 

99 return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) 

100 

101 

102def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: 

103 """ 

104 requirement = WS? IDENTIFIER WS? extras WS? requirement_details 

105 """ 

106 tokenizer.consume("WS") 

107 

108 name_token = tokenizer.expect( 

109 "IDENTIFIER", expected="package name at the start of dependency specifier" 

110 ) 

111 name = name_token.text 

112 tokenizer.consume("WS") 

113 

114 extras = _parse_extras(tokenizer) 

115 tokenizer.consume("WS") 

116 

117 url, specifier, marker = _parse_requirement_details(tokenizer) 

118 tokenizer.expect("END", expected="end of dependency specifier") 

119 

120 return ParsedRequirement(name, url, extras, specifier, marker) 

121 

122 

123def _parse_requirement_details( 

124 tokenizer: Tokenizer, 

125) -> tuple[str, str, MarkerList | None]: 

126 """ 

127 requirement_details = AT URL (WS requirement_marker?)? 

128 | specifier WS? (requirement_marker)? 

129 """ 

130 

131 specifier = "" 

132 url = "" 

133 marker = None 

134 

135 if tokenizer.check("AT"): 

136 tokenizer.read() 

137 tokenizer.consume("WS") 

138 

139 url_start = tokenizer.position 

140 url = tokenizer.expect("URL", expected="URL after @").text 

141 if tokenizer.check("END", peek=True): 

142 return (url, specifier, marker) 

143 

144 tokenizer.expect("WS", expected="whitespace after URL") 

145 

146 # The input might end after whitespace. 

147 if tokenizer.check("END", peek=True): 

148 return (url, specifier, marker) 

149 

150 marker = _parse_requirement_marker( 

151 tokenizer, 

152 span_start=url_start, 

153 expected="semicolon (after URL and whitespace)", 

154 ) 

155 else: 

156 specifier_start = tokenizer.position 

157 specifier = _parse_specifier(tokenizer) 

158 tokenizer.consume("WS") 

159 

160 if tokenizer.check("END", peek=True): 

161 return (url, specifier, marker) 

162 

163 marker = _parse_requirement_marker( 

164 tokenizer, 

165 span_start=specifier_start, 

166 expected=( 

167 "comma (within version specifier), semicolon (after version specifier)" 

168 if specifier 

169 else "semicolon (after name with no version specifier)" 

170 ), 

171 ) 

172 

173 return (url, specifier, marker) 

174 

175 

176def _parse_requirement_marker( 

177 tokenizer: Tokenizer, *, span_start: int, expected: str 

178) -> MarkerList: 

179 """ 

180 requirement_marker = SEMICOLON marker WS? 

181 """ 

182 

183 if not tokenizer.check("SEMICOLON"): 

184 tokenizer.raise_syntax_error( 

185 f"Expected {expected} or end", 

186 span_start=span_start, 

187 span_end=None, 

188 ) 

189 tokenizer.read() 

190 

191 marker = _parse_marker(tokenizer) 

192 tokenizer.consume("WS") 

193 

194 return marker 

195 

196 

197def _parse_extras(tokenizer: Tokenizer) -> list[str]: 

198 """ 

199 extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? 

200 """ 

201 if not tokenizer.check("LEFT_BRACKET", peek=True): 

202 return [] 

203 

204 with tokenizer.enclosing_tokens( 

205 "LEFT_BRACKET", 

206 "RIGHT_BRACKET", 

207 around="extras", 

208 ): 

209 tokenizer.consume("WS") 

210 extras = _parse_extras_list(tokenizer) 

211 tokenizer.consume("WS") 

212 

213 return extras 

214 

215 

216def _parse_extras_list(tokenizer: Tokenizer) -> list[str]: 

217 """ 

218 extras_list = identifier (wsp* ',' wsp* identifier)* 

219 """ 

220 extras: list[str] = [] 

221 

222 if not tokenizer.check("IDENTIFIER"): 

223 return extras 

224 

225 extras.append(tokenizer.read().text) 

226 

227 while True: 

228 tokenizer.consume("WS") 

229 if tokenizer.check("IDENTIFIER", peek=True): 

230 tokenizer.raise_syntax_error("Expected comma between extra names") 

231 elif not tokenizer.check("COMMA"): 

232 break 

233 

234 tokenizer.read() 

235 tokenizer.consume("WS") 

236 

237 extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") 

238 extras.append(extra_token.text) 

239 

240 return extras 

241 

242 

243def _parse_specifier(tokenizer: Tokenizer) -> str: 

244 """ 

245 specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS 

246 | WS? version_many WS? 

247 """ 

248 with tokenizer.enclosing_tokens( 

249 "LEFT_PARENTHESIS", 

250 "RIGHT_PARENTHESIS", 

251 around="version specifier", 

252 ): 

253 tokenizer.consume("WS") 

254 parsed_specifiers = _parse_version_many(tokenizer) 

255 tokenizer.consume("WS") 

256 

257 return parsed_specifiers 

258 

259 

260def _parse_version_many(tokenizer: Tokenizer) -> str: 

261 """ 

262 version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? 

263 """ 

264 parsed_specifiers = "" 

265 while tokenizer.check("SPECIFIER"): 

266 span_start = tokenizer.position 

267 parsed_specifiers += tokenizer.read().text 

268 if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): 

269 tokenizer.raise_syntax_error( 

270 ".* suffix can only be used with `==` or `!=` operators", 

271 span_start=span_start, 

272 span_end=tokenizer.position + 1, 

273 ) 

274 if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): 

275 tokenizer.raise_syntax_error( 

276 "Local version label can only be used with `==` or `!=` operators", 

277 span_start=span_start, 

278 span_end=tokenizer.position, 

279 ) 

280 tokenizer.consume("WS") 

281 if not tokenizer.check("COMMA"): 

282 break 

283 parsed_specifiers += tokenizer.read().text 

284 tokenizer.consume("WS") 

285 

286 return parsed_specifiers 

287 

288 

289# -------------------------------------------------------------------------------------- 

290# Recursive descent parser for marker expression 

291# -------------------------------------------------------------------------------------- 

292def parse_marker(source: str) -> MarkerList: 

293 return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES)) 

294 

295 

296def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList: 

297 retval = _parse_marker(tokenizer) 

298 tokenizer.expect("END", expected="end of marker expression") 

299 return retval 

300 

301 

302def _parse_marker(tokenizer: Tokenizer) -> MarkerList: 

303 """ 

304 marker = marker_atom (BOOLOP marker_atom)+ 

305 """ 

306 expression = [_parse_marker_atom(tokenizer)] 

307 while tokenizer.check("BOOLOP"): 

308 token = tokenizer.read() 

309 expr_right = _parse_marker_atom(tokenizer) 

310 expression.extend((token.text, expr_right)) 

311 return expression 

312 

313 

314def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: 

315 """ 

316 marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? 

317 | WS? marker_item WS? 

318 """ 

319 

320 tokenizer.consume("WS") 

321 if tokenizer.check("LEFT_PARENTHESIS", peek=True): 

322 with tokenizer.enclosing_tokens( 

323 "LEFT_PARENTHESIS", 

324 "RIGHT_PARENTHESIS", 

325 around="marker expression", 

326 ): 

327 tokenizer.consume("WS") 

328 marker: MarkerAtom = _parse_marker(tokenizer) 

329 tokenizer.consume("WS") 

330 else: 

331 marker = _parse_marker_item(tokenizer) 

332 tokenizer.consume("WS") 

333 return marker 

334 

335 

336def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: 

337 """ 

338 marker_item = WS? marker_var WS? marker_op WS? marker_var WS? 

339 """ 

340 tokenizer.consume("WS") 

341 marker_var_left = _parse_marker_var(tokenizer) 

342 tokenizer.consume("WS") 

343 marker_op = _parse_marker_op(tokenizer) 

344 tokenizer.consume("WS") 

345 marker_var_right = _parse_marker_var(tokenizer) 

346 tokenizer.consume("WS") 

347 return (marker_var_left, marker_op, marker_var_right) 

348 

349 

350def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: # noqa: RET503 

351 """ 

352 marker_var = VARIABLE | QUOTED_STRING 

353 """ 

354 if tokenizer.check("VARIABLE"): 

355 return process_env_var(tokenizer.read().text.replace(".", "_")) 

356 elif tokenizer.check("QUOTED_STRING"): 

357 return process_python_str(tokenizer.read().text) 

358 else: 

359 tokenizer.raise_syntax_error( 

360 message="Expected a marker variable or quoted string" 

361 ) 

362 

363 

364def process_env_var(env_var: str) -> Variable: 

365 if env_var in ("platform_python_implementation", "python_implementation"): 

366 return Variable("platform_python_implementation") 

367 else: 

368 return Variable(env_var) 

369 

370 

371def process_python_str(python_str: str) -> Value: 

372 value = ast.literal_eval(python_str) 

373 return Value(str(value)) 

374 

375 

376def _parse_marker_op(tokenizer: Tokenizer) -> Op: 

377 """ 

378 marker_op = IN | NOT IN | OP 

379 """ 

380 if tokenizer.check("IN"): 

381 tokenizer.read() 

382 return Op("in") 

383 elif tokenizer.check("NOT"): 

384 tokenizer.read() 

385 tokenizer.expect("WS", expected="whitespace after 'not'") 

386 tokenizer.expect("IN", expected="'in' after 'not'") 

387 return Op("not in") 

388 elif tokenizer.check("OP"): 

389 return Op(tokenizer.read().text) 

390 else: 

391 return tokenizer.raise_syntax_error( 

392 "Expected marker operator, one of <=, <, !=, ==, >=, >, ~=, ===, in, not in" 

393 )