Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pip/_vendor/packaging/_parser.py: 27%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

181 statements  

1"""Handwritten parser of dependency specifiers. 

2 

3The docstring for each __parse_* function contains EBNF-inspired grammar representing 

4the implementation. 

5""" 

6 

7from __future__ import annotations 

8 

9import ast 

10from typing import List, Literal, NamedTuple, Sequence, Tuple, Union 

11 

12from ._tokenizer import DEFAULT_RULES, Tokenizer 

13 

14 

15class Node: 

16 __slots__ = ("value",) 

17 

18 def __init__(self, value: str) -> None: 

19 self.value = value 

20 

21 def __str__(self) -> str: 

22 return self.value 

23 

24 def __repr__(self) -> str: 

25 return f"<{self.__class__.__name__}({self.value!r})>" 

26 

27 def serialize(self) -> str: 

28 raise NotImplementedError 

29 

30 

31class Variable(Node): 

32 __slots__ = () 

33 

34 def serialize(self) -> str: 

35 return str(self) 

36 

37 

38class Value(Node): 

39 __slots__ = () 

40 

41 def serialize(self) -> str: 

42 return f'"{self}"' 

43 

44 

45class Op(Node): 

46 __slots__ = () 

47 

48 def serialize(self) -> str: 

49 return str(self) 

50 

51 

52MarkerLogical = Literal["and", "or"] 

53MarkerVar = Union[Variable, Value] 

54MarkerItem = Tuple[MarkerVar, Op, MarkerVar] 

55MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]] 

56MarkerList = List[Union["MarkerList", MarkerAtom, MarkerLogical]] 

57 

58 

59class ParsedRequirement(NamedTuple): 

60 name: str 

61 url: str 

62 extras: list[str] 

63 specifier: str 

64 marker: MarkerList | None 

65 

66 

67# -------------------------------------------------------------------------------------- 

68# Recursive descent parser for dependency specifier 

69# -------------------------------------------------------------------------------------- 

70def parse_requirement(source: str) -> ParsedRequirement: 

71 return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) 

72 

73 

74def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: 

75 """ 

76 requirement = WS? IDENTIFIER WS? extras WS? requirement_details 

77 """ 

78 tokenizer.consume("WS") 

79 

80 name_token = tokenizer.expect( 

81 "IDENTIFIER", expected="package name at the start of dependency specifier" 

82 ) 

83 name = name_token.text 

84 tokenizer.consume("WS") 

85 

86 extras = _parse_extras(tokenizer) 

87 tokenizer.consume("WS") 

88 

89 url, specifier, marker = _parse_requirement_details(tokenizer) 

90 tokenizer.expect("END", expected="end of dependency specifier") 

91 

92 return ParsedRequirement(name, url, extras, specifier, marker) 

93 

94 

95def _parse_requirement_details( 

96 tokenizer: Tokenizer, 

97) -> tuple[str, str, MarkerList | None]: 

98 """ 

99 requirement_details = AT URL (WS requirement_marker?)? 

100 | specifier WS? (requirement_marker)? 

101 """ 

102 

103 specifier = "" 

104 url = "" 

105 marker = None 

106 

107 if tokenizer.check("AT"): 

108 tokenizer.read() 

109 tokenizer.consume("WS") 

110 

111 url_start = tokenizer.position 

112 url = tokenizer.expect("URL", expected="URL after @").text 

113 if tokenizer.check("END", peek=True): 

114 return (url, specifier, marker) 

115 

116 tokenizer.expect("WS", expected="whitespace after URL") 

117 

118 # The input might end after whitespace. 

119 if tokenizer.check("END", peek=True): 

120 return (url, specifier, marker) 

121 

122 marker = _parse_requirement_marker( 

123 tokenizer, 

124 span_start=url_start, 

125 expected="semicolon (after URL and whitespace)", 

126 ) 

127 else: 

128 specifier_start = tokenizer.position 

129 specifier = _parse_specifier(tokenizer) 

130 tokenizer.consume("WS") 

131 

132 if tokenizer.check("END", peek=True): 

133 return (url, specifier, marker) 

134 

135 marker = _parse_requirement_marker( 

136 tokenizer, 

137 span_start=specifier_start, 

138 expected=( 

139 "comma (within version specifier), semicolon (after version specifier)" 

140 if specifier 

141 else "semicolon (after name with no version specifier)" 

142 ), 

143 ) 

144 

145 return (url, specifier, marker) 

146 

147 

148def _parse_requirement_marker( 

149 tokenizer: Tokenizer, *, span_start: int, expected: str 

150) -> MarkerList: 

151 """ 

152 requirement_marker = SEMICOLON marker WS? 

153 """ 

154 

155 if not tokenizer.check("SEMICOLON"): 

156 tokenizer.raise_syntax_error( 

157 f"Expected {expected} or end", 

158 span_start=span_start, 

159 span_end=None, 

160 ) 

161 tokenizer.read() 

162 

163 marker = _parse_marker(tokenizer) 

164 tokenizer.consume("WS") 

165 

166 return marker 

167 

168 

169def _parse_extras(tokenizer: Tokenizer) -> list[str]: 

170 """ 

171 extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? 

172 """ 

173 if not tokenizer.check("LEFT_BRACKET", peek=True): 

174 return [] 

175 

176 with tokenizer.enclosing_tokens( 

177 "LEFT_BRACKET", 

178 "RIGHT_BRACKET", 

179 around="extras", 

180 ): 

181 tokenizer.consume("WS") 

182 extras = _parse_extras_list(tokenizer) 

183 tokenizer.consume("WS") 

184 

185 return extras 

186 

187 

188def _parse_extras_list(tokenizer: Tokenizer) -> list[str]: 

189 """ 

190 extras_list = identifier (wsp* ',' wsp* identifier)* 

191 """ 

192 extras: list[str] = [] 

193 

194 if not tokenizer.check("IDENTIFIER"): 

195 return extras 

196 

197 extras.append(tokenizer.read().text) 

198 

199 while True: 

200 tokenizer.consume("WS") 

201 if tokenizer.check("IDENTIFIER", peek=True): 

202 tokenizer.raise_syntax_error("Expected comma between extra names") 

203 elif not tokenizer.check("COMMA"): 

204 break 

205 

206 tokenizer.read() 

207 tokenizer.consume("WS") 

208 

209 extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") 

210 extras.append(extra_token.text) 

211 

212 return extras 

213 

214 

215def _parse_specifier(tokenizer: Tokenizer) -> str: 

216 """ 

217 specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS 

218 | WS? version_many WS? 

219 """ 

220 with tokenizer.enclosing_tokens( 

221 "LEFT_PARENTHESIS", 

222 "RIGHT_PARENTHESIS", 

223 around="version specifier", 

224 ): 

225 tokenizer.consume("WS") 

226 parsed_specifiers = _parse_version_many(tokenizer) 

227 tokenizer.consume("WS") 

228 

229 return parsed_specifiers 

230 

231 

232def _parse_version_many(tokenizer: Tokenizer) -> str: 

233 """ 

234 version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? 

235 """ 

236 parsed_specifiers = "" 

237 while tokenizer.check("SPECIFIER"): 

238 span_start = tokenizer.position 

239 parsed_specifiers += tokenizer.read().text 

240 if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): 

241 tokenizer.raise_syntax_error( 

242 ".* suffix can only be used with `==` or `!=` operators", 

243 span_start=span_start, 

244 span_end=tokenizer.position + 1, 

245 ) 

246 if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): 

247 tokenizer.raise_syntax_error( 

248 "Local version label can only be used with `==` or `!=` operators", 

249 span_start=span_start, 

250 span_end=tokenizer.position, 

251 ) 

252 tokenizer.consume("WS") 

253 if not tokenizer.check("COMMA"): 

254 break 

255 parsed_specifiers += tokenizer.read().text 

256 tokenizer.consume("WS") 

257 

258 return parsed_specifiers 

259 

260 

261# -------------------------------------------------------------------------------------- 

262# Recursive descent parser for marker expression 

263# -------------------------------------------------------------------------------------- 

264def parse_marker(source: str) -> MarkerList: 

265 return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES)) 

266 

267 

268def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList: 

269 retval = _parse_marker(tokenizer) 

270 tokenizer.expect("END", expected="end of marker expression") 

271 return retval 

272 

273 

274def _parse_marker(tokenizer: Tokenizer) -> MarkerList: 

275 """ 

276 marker = marker_atom (BOOLOP marker_atom)+ 

277 """ 

278 expression = [_parse_marker_atom(tokenizer)] 

279 while tokenizer.check("BOOLOP"): 

280 token = tokenizer.read() 

281 expr_right = _parse_marker_atom(tokenizer) 

282 expression.extend((token.text, expr_right)) 

283 return expression 

284 

285 

286def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: 

287 """ 

288 marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? 

289 | WS? marker_item WS? 

290 """ 

291 

292 tokenizer.consume("WS") 

293 if tokenizer.check("LEFT_PARENTHESIS", peek=True): 

294 with tokenizer.enclosing_tokens( 

295 "LEFT_PARENTHESIS", 

296 "RIGHT_PARENTHESIS", 

297 around="marker expression", 

298 ): 

299 tokenizer.consume("WS") 

300 marker: MarkerAtom = _parse_marker(tokenizer) 

301 tokenizer.consume("WS") 

302 else: 

303 marker = _parse_marker_item(tokenizer) 

304 tokenizer.consume("WS") 

305 return marker 

306 

307 

308def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: 

309 """ 

310 marker_item = WS? marker_var WS? marker_op WS? marker_var WS? 

311 """ 

312 tokenizer.consume("WS") 

313 marker_var_left = _parse_marker_var(tokenizer) 

314 tokenizer.consume("WS") 

315 marker_op = _parse_marker_op(tokenizer) 

316 tokenizer.consume("WS") 

317 marker_var_right = _parse_marker_var(tokenizer) 

318 tokenizer.consume("WS") 

319 return (marker_var_left, marker_op, marker_var_right) 

320 

321 

322def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: # noqa: RET503 

323 """ 

324 marker_var = VARIABLE | QUOTED_STRING 

325 """ 

326 if tokenizer.check("VARIABLE"): 

327 return process_env_var(tokenizer.read().text.replace(".", "_")) 

328 elif tokenizer.check("QUOTED_STRING"): 

329 return process_python_str(tokenizer.read().text) 

330 else: 

331 tokenizer.raise_syntax_error( 

332 message="Expected a marker variable or quoted string" 

333 ) 

334 

335 

336def process_env_var(env_var: str) -> Variable: 

337 if env_var in ("platform_python_implementation", "python_implementation"): 

338 return Variable("platform_python_implementation") 

339 else: 

340 return Variable(env_var) 

341 

342 

343def process_python_str(python_str: str) -> Value: 

344 value = ast.literal_eval(python_str) 

345 return Value(str(value)) 

346 

347 

348def _parse_marker_op(tokenizer: Tokenizer) -> Op: 

349 """ 

350 marker_op = IN | NOT IN | OP 

351 """ 

352 if tokenizer.check("IN"): 

353 tokenizer.read() 

354 return Op("in") 

355 elif tokenizer.check("NOT"): 

356 tokenizer.read() 

357 tokenizer.expect("WS", expected="whitespace after 'not'") 

358 tokenizer.expect("IN", expected="'in' after 'not'") 

359 return Op("not in") 

360 elif tokenizer.check("OP"): 

361 return Op(tokenizer.read().text) 

362 else: 

363 return tokenizer.raise_syntax_error( 

364 "Expected marker operator, one of <=, <, !=, ==, >=, >, ~=, ===, in, not in" 

365 )