Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pkg_resources/_vendor/packaging/_parser.py: 18%

164 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:35 +0000

1"""Handwritten parser of dependency specifiers. 

2 

3The docstring for each __parse_* function contains ENBF-inspired grammar representing 

4the implementation. 

5""" 

6 

7import ast 

8from typing import Any, List, NamedTuple, Optional, Tuple, Union 

9 

10from ._tokenizer import DEFAULT_RULES, Tokenizer 

11 

12 

13class Node: 

14 def __init__(self, value: str) -> None: 

15 self.value = value 

16 

17 def __str__(self) -> str: 

18 return self.value 

19 

20 def __repr__(self) -> str: 

21 return f"<{self.__class__.__name__}('{self}')>" 

22 

23 def serialize(self) -> str: 

24 raise NotImplementedError 

25 

26 

27class Variable(Node): 

28 def serialize(self) -> str: 

29 return str(self) 

30 

31 

32class Value(Node): 

33 def serialize(self) -> str: 

34 return f'"{self}"' 

35 

36 

37class Op(Node): 

38 def serialize(self) -> str: 

39 return str(self) 

40 

41 

42MarkerVar = Union[Variable, Value] 

43MarkerItem = Tuple[MarkerVar, Op, MarkerVar] 

44# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]] 

45# MarkerList = List[Union["MarkerList", MarkerAtom, str]] 

46# mypy does not support recursive type definition 

47# https://github.com/python/mypy/issues/731 

48MarkerAtom = Any 

49MarkerList = List[Any] 

50 

51 

52class ParsedRequirement(NamedTuple): 

53 name: str 

54 url: str 

55 extras: List[str] 

56 specifier: str 

57 marker: Optional[MarkerList] 

58 

59 

60# -------------------------------------------------------------------------------------- 

61# Recursive descent parser for dependency specifier 

62# -------------------------------------------------------------------------------------- 

63def parse_requirement(source: str) -> ParsedRequirement: 

64 return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) 

65 

66 

67def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: 

68 """ 

69 requirement = WS? IDENTIFIER WS? extras WS? requirement_details 

70 """ 

71 tokenizer.consume("WS") 

72 

73 name_token = tokenizer.expect( 

74 "IDENTIFIER", expected="package name at the start of dependency specifier" 

75 ) 

76 name = name_token.text 

77 tokenizer.consume("WS") 

78 

79 extras = _parse_extras(tokenizer) 

80 tokenizer.consume("WS") 

81 

82 url, specifier, marker = _parse_requirement_details(tokenizer) 

83 tokenizer.expect("END", expected="end of dependency specifier") 

84 

85 return ParsedRequirement(name, url, extras, specifier, marker) 

86 

87 

88def _parse_requirement_details( 

89 tokenizer: Tokenizer, 

90) -> Tuple[str, str, Optional[MarkerList]]: 

91 """ 

92 requirement_details = AT URL (WS requirement_marker?)? 

93 | specifier WS? (requirement_marker)? 

94 """ 

95 

96 specifier = "" 

97 url = "" 

98 marker = None 

99 

100 if tokenizer.check("AT"): 

101 tokenizer.read() 

102 tokenizer.consume("WS") 

103 

104 url_start = tokenizer.position 

105 url = tokenizer.expect("URL", expected="URL after @").text 

106 if tokenizer.check("END", peek=True): 

107 return (url, specifier, marker) 

108 

109 tokenizer.expect("WS", expected="whitespace after URL") 

110 

111 # The input might end after whitespace. 

112 if tokenizer.check("END", peek=True): 

113 return (url, specifier, marker) 

114 

115 marker = _parse_requirement_marker( 

116 tokenizer, span_start=url_start, after="URL and whitespace" 

117 ) 

118 else: 

119 specifier_start = tokenizer.position 

120 specifier = _parse_specifier(tokenizer) 

121 tokenizer.consume("WS") 

122 

123 if tokenizer.check("END", peek=True): 

124 return (url, specifier, marker) 

125 

126 marker = _parse_requirement_marker( 

127 tokenizer, 

128 span_start=specifier_start, 

129 after=( 

130 "version specifier" 

131 if specifier 

132 else "name and no valid version specifier" 

133 ), 

134 ) 

135 

136 return (url, specifier, marker) 

137 

138 

139def _parse_requirement_marker( 

140 tokenizer: Tokenizer, *, span_start: int, after: str 

141) -> MarkerList: 

142 """ 

143 requirement_marker = SEMICOLON marker WS? 

144 """ 

145 

146 if not tokenizer.check("SEMICOLON"): 

147 tokenizer.raise_syntax_error( 

148 f"Expected end or semicolon (after {after})", 

149 span_start=span_start, 

150 ) 

151 tokenizer.read() 

152 

153 marker = _parse_marker(tokenizer) 

154 tokenizer.consume("WS") 

155 

156 return marker 

157 

158 

159def _parse_extras(tokenizer: Tokenizer) -> List[str]: 

160 """ 

161 extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? 

162 """ 

163 if not tokenizer.check("LEFT_BRACKET", peek=True): 

164 return [] 

165 

166 with tokenizer.enclosing_tokens("LEFT_BRACKET", "RIGHT_BRACKET"): 

167 tokenizer.consume("WS") 

168 extras = _parse_extras_list(tokenizer) 

169 tokenizer.consume("WS") 

170 

171 return extras 

172 

173 

174def _parse_extras_list(tokenizer: Tokenizer) -> List[str]: 

175 """ 

176 extras_list = identifier (wsp* ',' wsp* identifier)* 

177 """ 

178 extras: List[str] = [] 

179 

180 if not tokenizer.check("IDENTIFIER"): 

181 return extras 

182 

183 extras.append(tokenizer.read().text) 

184 

185 while True: 

186 tokenizer.consume("WS") 

187 if tokenizer.check("IDENTIFIER", peek=True): 

188 tokenizer.raise_syntax_error("Expected comma between extra names") 

189 elif not tokenizer.check("COMMA"): 

190 break 

191 

192 tokenizer.read() 

193 tokenizer.consume("WS") 

194 

195 extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") 

196 extras.append(extra_token.text) 

197 

198 return extras 

199 

200 

201def _parse_specifier(tokenizer: Tokenizer) -> str: 

202 """ 

203 specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS 

204 | WS? version_many WS? 

205 """ 

206 with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"): 

207 tokenizer.consume("WS") 

208 parsed_specifiers = _parse_version_many(tokenizer) 

209 tokenizer.consume("WS") 

210 

211 return parsed_specifiers 

212 

213 

214def _parse_version_many(tokenizer: Tokenizer) -> str: 

215 """ 

216 version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? 

217 """ 

218 parsed_specifiers = "" 

219 while tokenizer.check("SPECIFIER"): 

220 parsed_specifiers += tokenizer.read().text 

221 tokenizer.consume("WS") 

222 if not tokenizer.check("COMMA"): 

223 break 

224 parsed_specifiers += tokenizer.read().text 

225 tokenizer.consume("WS") 

226 

227 return parsed_specifiers 

228 

229 

230# -------------------------------------------------------------------------------------- 

231# Recursive descent parser for marker expression 

232# -------------------------------------------------------------------------------------- 

233def parse_marker(source: str) -> MarkerList: 

234 return _parse_marker(Tokenizer(source, rules=DEFAULT_RULES)) 

235 

236 

237def _parse_marker(tokenizer: Tokenizer) -> MarkerList: 

238 """ 

239 marker = marker_atom (BOOLOP marker_atom)+ 

240 """ 

241 expression = [_parse_marker_atom(tokenizer)] 

242 while tokenizer.check("BOOLOP"): 

243 token = tokenizer.read() 

244 expr_right = _parse_marker_atom(tokenizer) 

245 expression.extend((token.text, expr_right)) 

246 return expression 

247 

248 

249def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: 

250 """ 

251 marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? 

252 | WS? marker_item WS? 

253 """ 

254 

255 tokenizer.consume("WS") 

256 if tokenizer.check("LEFT_PARENTHESIS", peek=True): 

257 with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"): 

258 tokenizer.consume("WS") 

259 marker: MarkerAtom = _parse_marker(tokenizer) 

260 tokenizer.consume("WS") 

261 else: 

262 marker = _parse_marker_item(tokenizer) 

263 tokenizer.consume("WS") 

264 return marker 

265 

266 

267def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: 

268 """ 

269 marker_item = WS? marker_var WS? marker_op WS? marker_var WS? 

270 """ 

271 tokenizer.consume("WS") 

272 marker_var_left = _parse_marker_var(tokenizer) 

273 tokenizer.consume("WS") 

274 marker_op = _parse_marker_op(tokenizer) 

275 tokenizer.consume("WS") 

276 marker_var_right = _parse_marker_var(tokenizer) 

277 tokenizer.consume("WS") 

278 return (marker_var_left, marker_op, marker_var_right) 

279 

280 

281def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: 

282 """ 

283 marker_var = VARIABLE | QUOTED_STRING 

284 """ 

285 if tokenizer.check("VARIABLE"): 

286 return process_env_var(tokenizer.read().text.replace(".", "_")) 

287 elif tokenizer.check("QUOTED_STRING"): 

288 return process_python_str(tokenizer.read().text) 

289 else: 

290 tokenizer.raise_syntax_error( 

291 message="Expected a marker variable or quoted string" 

292 ) 

293 

294 

295def process_env_var(env_var: str) -> Variable: 

296 if ( 

297 env_var == "platform_python_implementation" 

298 or env_var == "python_implementation" 

299 ): 

300 return Variable("platform_python_implementation") 

301 else: 

302 return Variable(env_var) 

303 

304 

305def process_python_str(python_str: str) -> Value: 

306 value = ast.literal_eval(python_str) 

307 return Value(str(value)) 

308 

309 

310def _parse_marker_op(tokenizer: Tokenizer) -> Op: 

311 """ 

312 marker_op = IN | NOT IN | OP 

313 """ 

314 if tokenizer.check("IN"): 

315 tokenizer.read() 

316 return Op("in") 

317 elif tokenizer.check("NOT"): 

318 tokenizer.read() 

319 tokenizer.expect("WS", expected="whitespace after 'not'") 

320 tokenizer.expect("IN", expected="'in' after 'not'") 

321 return Op("not in") 

322 elif tokenizer.check("OP"): 

323 return Op(tokenizer.read().text) 

324 else: 

325 return tokenizer.raise_syntax_error( 

326 "Expected marker operator, one of " 

327 "<=, <, !=, ==, >=, >, ~=, ===, in, not in" 

328 )