Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/bigquery/magics/line_arg_parser/parser.py: 62%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

215 statements  

1# Copyright 2020 Google LLC 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15from google.cloud.bigquery.magics.line_arg_parser import DuplicateQueryParamsError 

16from google.cloud.bigquery.magics.line_arg_parser import ParseError 

17from google.cloud.bigquery.magics.line_arg_parser import QueryParamsParseError 

18from google.cloud.bigquery.magics.line_arg_parser import TokenType 

19 

20 

21class ParseNode(object): 

22 """A base class for nodes in the input parsed to an abstract syntax tree.""" 

23 

24 

25class InputLine(ParseNode): 

26 def __init__(self, destination_var, option_list): 

27 self.destination_var = destination_var 

28 self.option_list = option_list 

29 

30 

31class DestinationVar(ParseNode): 

32 def __init__(self, token): 

33 # token type is DEST_VAR 

34 self.token = token 

35 self.name = token.lexeme if token is not None else None 

36 

37 

38class CmdOptionList(ParseNode): 

39 def __init__(self, option_nodes): 

40 self.options = [node for node in option_nodes] # shallow copy 

41 

42 

43class CmdOption(ParseNode): 

44 def __init__(self, name, value): 

45 self.name = name # string 

46 self.value = value # CmdOptionValue node 

47 

48 

49class ParamsOption(CmdOption): 

50 def __init__(self, value): 

51 super(ParamsOption, self).__init__("params", value) 

52 

53 

54class CmdOptionValue(ParseNode): 

55 def __init__(self, token): 

56 # token type is OPT_VAL 

57 self.token = token 

58 self.value = token.lexeme 

59 

60 

61class PyVarExpansion(ParseNode): 

62 def __init__(self, token): 

63 self.token = token 

64 self.raw_value = token.lexeme 

65 

66 

67class PyDict(ParseNode): 

68 def __init__(self, dict_items): 

69 self.items = [item for item in dict_items] # shallow copy 

70 

71 

72class PyDictItem(ParseNode): 

73 def __init__(self, key, value): 

74 self.key = key 

75 self.value = value 

76 

77 

78class PyDictKey(ParseNode): 

79 def __init__(self, token): 

80 self.token = token 

81 self.key_value = token.lexeme 

82 

83 

84class PyScalarValue(ParseNode): 

85 def __init__(self, token, raw_value): 

86 self.token = token 

87 self.raw_value = raw_value 

88 

89 

90class PyTuple(ParseNode): 

91 def __init__(self, tuple_items): 

92 self.items = [item for item in tuple_items] # shallow copy 

93 

94 

95class PyList(ParseNode): 

96 def __init__(self, list_items): 

97 self.items = [item for item in list_items] # shallow copy 

98 

99 

100class Parser(object): 

101 """Parser for the tokenized cell magic input line. 

102 

103 The parser recognizes a simplified subset of Python grammar, specifically 

104 a dictionary representation in typical use cases when the "--params" option 

105 is used with the %%bigquery cell magic. 

106 

107 The grammar (terminal symbols are CAPITALIZED): 

108 

109 input_line : destination_var option_list 

110 destination_var : DEST_VAR | EMPTY 

111 option_list : (OPTION_SPEC [OPTION_EQ] option_value)* 

112 (params_option | EMPTY) 

113 (OPTION_SPEC [OPTION_EQ] option_value)* 

114 

115 option_value : OPT_VAL | EMPTY 

116 

117 # DOLLAR_PY_ID can occur if a variable passed to --params does not exist 

118 # and is thus not expanded to a dict. 

119 params_option : PARAMS_OPT_SPEC [PARAMS_OPT_EQ] \ 

120 (DOLLAR_PY_ID | PY_STRING | py_dict) 

121 

122 py_dict : LCURL dict_items RCURL 

123 dict_items : dict_item | (dict_item COMMA dict_items) 

124 dict_item : (dict_key COLON py_value) | EMPTY 

125 

126 # dict items are actually @parameter names in the cell body (i.e. the query), 

127 # thus restricting them to strings. 

128 dict_key : PY_STRING 

129 

130 py_value : PY_BOOL 

131 | PY_NUMBER 

132 | PY_STRING 

133 | py_tuple 

134 | py_list 

135 | py_dict 

136 

137 py_tuple : LPAREN collection_items RPAREN 

138 py_list : LSQUARE collection_items RSQUARE 

139 collection_items : collection_item | (collection_item COMMA collection_items) 

140 collection_item : py_value | EMPTY 

141 

142 Args: 

143 lexer (line_arg_parser.lexer.Lexer): 

144 An iterable producing a tokenized cell magic argument line. 

145 """ 

146 

147 def __init__(self, lexer): 

148 self._lexer = lexer 

149 self._tokens_iter = iter(self._lexer) 

150 self.get_next_token() 

151 

152 def get_next_token(self): 

153 """Obtain the next token from the token stream and store it as current.""" 

154 token = next(self._tokens_iter) 

155 self._current_token = token 

156 

157 def consume(self, expected_type, exc_type=ParseError): 

158 """Move to the next token in token stream if it matches the expected type. 

159 

160 Args: 

161 expected_type (lexer.TokenType): The expected token type to be consumed. 

162 exc_type (Optional[ParseError]): The type of the exception to raise. Should be 

163 the ``ParseError`` class or one of its subclasses. Defaults to 

164 ``ParseError``. 

165 

166 Raises: 

167 ParseError: If the current token does not match the expected type. 

168 """ 

169 if self._current_token.type_ == expected_type: 

170 if expected_type != TokenType.EOL: 

171 self.get_next_token() 

172 else: 

173 if self._current_token.type_ == TokenType.EOL: 

174 msg = "Unexpected end of input, expected {}.".format(expected_type) 

175 else: 

176 msg = "Expected token type {}, but found {} at position {}.".format( 

177 expected_type, self._current_token.lexeme, self._current_token.pos 

178 ) 

179 self.error(message=msg, exc_type=exc_type) 

180 

181 def error(self, message="Syntax error.", exc_type=ParseError): 

182 """Raise an error with the given message. 

183 

184 Args: 

185 expected_type (lexer.TokenType): The expected token type to be consumed. 

186 exc_type (Optional[ParseError]): The type of the exception to raise. Should be 

187 the ``ParseError`` class or one of its subclasses. Defaults to 

188 ``ParseError``. 

189 

190 Raises: 

191 ParseError: If the current token does not match the expected type. 

192 """ 

193 raise exc_type(message) 

194 

195 def input_line(self): 

196 """The top level method for parsing the cell magic arguments line. 

197 

198 Implements the following grammar production rule: 

199 

200 input_line : destination_var option_list 

201 """ 

202 dest_var = self.destination_var() 

203 options = self.option_list() 

204 

205 token = self._current_token 

206 

207 if token.type_ != TokenType.EOL: 

208 msg = "Unexpected input at position {}: {}".format(token.pos, token.lexeme) 

209 self.error(msg) 

210 

211 return InputLine(dest_var, options) 

212 

213 def destination_var(self): 

214 """Implementation of the ``destination_var`` grammar production rule. 

215 

216 Production: 

217 

218 destination_var : DEST_VAR | EMPTY 

219 """ 

220 token = self._current_token 

221 

222 if token.type_ == TokenType.DEST_VAR: 

223 self.consume(TokenType.DEST_VAR) 

224 result = DestinationVar(token) 

225 elif token.type_ == TokenType.UNKNOWN: 

226 msg = "Unknown input at position {}: {}".format(token.pos, token.lexeme) 

227 self.error(msg) 

228 else: 

229 result = DestinationVar(None) 

230 

231 return result 

232 

233 def option_list(self): 

234 """Implementation of the ``option_list`` grammar production rule. 

235 

236 Production: 

237 

238 option_list : (OPTION_SPEC [OPTION_EQ] option_value)* 

239 (params_option | EMPTY) 

240 (OPTION_SPEC [OPTION_EQ] option_value)* 

241 """ 

242 all_options = [] 

243 

244 def parse_nonparams_options(): 

245 while self._current_token.type_ == TokenType.OPTION_SPEC: 

246 token = self._current_token 

247 self.consume(TokenType.OPTION_SPEC) 

248 

249 opt_name = token.lexeme[2:] # cut off the "--" prefix 

250 

251 # skip the optional "=" character 

252 if self._current_token.type_ == TokenType.OPTION_EQ: 

253 self.consume(TokenType.OPTION_EQ) 

254 

255 opt_value = self.option_value() 

256 option = CmdOption(opt_name, opt_value) 

257 all_options.append(option) 

258 

259 parse_nonparams_options() 

260 

261 token = self._current_token 

262 

263 if token.type_ == TokenType.PARAMS_OPT_SPEC: 

264 option = self.params_option() 

265 all_options.append(option) 

266 

267 parse_nonparams_options() 

268 

269 if self._current_token.type_ == TokenType.PARAMS_OPT_SPEC: 

270 self.error( 

271 message="Duplicate --params option", exc_type=DuplicateQueryParamsError 

272 ) 

273 

274 return CmdOptionList(all_options) 

275 

276 def option_value(self): 

277 """Implementation of the ``option_value`` grammar production rule. 

278 

279 Production: 

280 

281 option_value : OPT_VAL | EMPTY 

282 """ 

283 token = self._current_token 

284 

285 if token.type_ == TokenType.OPT_VAL: 

286 self.consume(TokenType.OPT_VAL) 

287 result = CmdOptionValue(token) 

288 elif token.type_ == TokenType.UNKNOWN: 

289 msg = "Unknown input at position {}: {}".format(token.pos, token.lexeme) 

290 self.error(msg) 

291 else: 

292 result = None 

293 

294 return result 

295 

296 def params_option(self): 

297 """Implementation of the ``params_option`` grammar production rule. 

298 

299 Production: 

300 

301 params_option : PARAMS_OPT_SPEC [PARAMS_OPT_EQ] \ 

302 (DOLLAR_PY_ID | PY_STRING | py_dict) 

303 """ 

304 self.consume(TokenType.PARAMS_OPT_SPEC) 

305 

306 # skip the optional "=" character 

307 if self._current_token.type_ == TokenType.PARAMS_OPT_EQ: 

308 self.consume(TokenType.PARAMS_OPT_EQ) 

309 

310 if self._current_token.type_ == TokenType.DOLLAR_PY_ID: 

311 token = self._current_token 

312 self.consume(TokenType.DOLLAR_PY_ID) 

313 opt_value = PyVarExpansion(token) 

314 elif self._current_token.type_ == TokenType.PY_STRING: 

315 token = self._current_token 

316 self.consume(TokenType.PY_STRING, exc_type=QueryParamsParseError) 

317 opt_value = PyScalarValue(token, token.lexeme) 

318 else: 

319 opt_value = self.py_dict() 

320 

321 result = ParamsOption(opt_value) 

322 

323 return result 

324 

325 def py_dict(self): 

326 """Implementation of the ``py_dict`` grammar production rule. 

327 

328 Production: 

329 

330 py_dict : LCURL dict_items RCURL 

331 """ 

332 self.consume(TokenType.LCURL, exc_type=QueryParamsParseError) 

333 dict_items = self.dict_items() 

334 self.consume(TokenType.RCURL, exc_type=QueryParamsParseError) 

335 

336 return PyDict(dict_items) 

337 

338 def dict_items(self): 

339 """Implementation of the ``dict_items`` grammar production rule. 

340 

341 Production: 

342 

343 dict_items : dict_item | (dict_item COMMA dict_items) 

344 """ 

345 result = [] 

346 

347 item = self.dict_item() 

348 if item is not None: 

349 result.append(item) 

350 

351 while self._current_token.type_ == TokenType.COMMA: 

352 self.consume(TokenType.COMMA, exc_type=QueryParamsParseError) 

353 item = self.dict_item() 

354 if item is not None: 

355 result.append(item) 

356 

357 return result 

358 

359 def dict_item(self): 

360 """Implementation of the ``dict_item`` grammar production rule. 

361 

362 Production: 

363 

364 dict_item : (dict_key COLON py_value) | EMPTY 

365 """ 

366 token = self._current_token 

367 

368 if token.type_ == TokenType.PY_STRING: 

369 key = self.dict_key() 

370 self.consume(TokenType.COLON, exc_type=QueryParamsParseError) 

371 value = self.py_value() 

372 result = PyDictItem(key, value) 

373 elif token.type_ == TokenType.UNKNOWN: 

374 msg = "Unknown input at position {}: {}".format(token.pos, token.lexeme) 

375 self.error(msg, exc_type=QueryParamsParseError) 

376 else: 

377 result = None 

378 

379 return result 

380 

381 def dict_key(self): 

382 """Implementation of the ``dict_key`` grammar production rule. 

383 

384 Production: 

385 

386 dict_key : PY_STRING 

387 """ 

388 token = self._current_token 

389 self.consume(TokenType.PY_STRING, exc_type=QueryParamsParseError) 

390 return PyDictKey(token) 

391 

392 def py_value(self): 

393 """Implementation of the ``py_value`` grammar production rule. 

394 

395 Production: 

396 

397 py_value : PY_BOOL | PY_NUMBER | PY_STRING | py_tuple | py_list | py_dict 

398 """ 

399 token = self._current_token 

400 

401 if token.type_ == TokenType.PY_BOOL: 

402 self.consume(TokenType.PY_BOOL, exc_type=QueryParamsParseError) 

403 return PyScalarValue(token, token.lexeme) 

404 elif token.type_ == TokenType.PY_NUMBER: 

405 self.consume(TokenType.PY_NUMBER, exc_type=QueryParamsParseError) 

406 return PyScalarValue(token, token.lexeme) 

407 elif token.type_ == TokenType.PY_STRING: 

408 self.consume(TokenType.PY_STRING, exc_type=QueryParamsParseError) 

409 return PyScalarValue(token, token.lexeme) 

410 elif token.type_ == TokenType.LPAREN: 

411 tuple_node = self.py_tuple() 

412 return tuple_node 

413 elif token.type_ == TokenType.LSQUARE: 

414 list_node = self.py_list() 

415 return list_node 

416 elif token.type_ == TokenType.LCURL: 

417 dict_node = self.py_dict() 

418 return dict_node 

419 else: 

420 msg = "Unexpected token type {} at position {}.".format( 

421 token.type_, token.pos 

422 ) 

423 self.error(msg, exc_type=QueryParamsParseError) 

424 

425 def py_tuple(self): 

426 """Implementation of the ``py_tuple`` grammar production rule. 

427 

428 Production: 

429 

430 py_tuple : LPAREN collection_items RPAREN 

431 """ 

432 self.consume(TokenType.LPAREN, exc_type=QueryParamsParseError) 

433 items = self.collection_items() 

434 self.consume(TokenType.RPAREN, exc_type=QueryParamsParseError) 

435 

436 return PyTuple(items) 

437 

438 def py_list(self): 

439 """Implementation of the ``py_list`` grammar production rule. 

440 

441 Production: 

442 

443 py_list : LSQUARE collection_items RSQUARE 

444 """ 

445 self.consume(TokenType.LSQUARE, exc_type=QueryParamsParseError) 

446 items = self.collection_items() 

447 self.consume(TokenType.RSQUARE, exc_type=QueryParamsParseError) 

448 

449 return PyList(items) 

450 

451 def collection_items(self): 

452 """Implementation of the ``collection_items`` grammar production rule. 

453 

454 Production: 

455 

456 collection_items : collection_item | (collection_item COMMA collection_items) 

457 """ 

458 result = [] 

459 

460 item = self.collection_item() 

461 if item is not None: 

462 result.append(item) 

463 

464 while self._current_token.type_ == TokenType.COMMA: 

465 self.consume(TokenType.COMMA, exc_type=QueryParamsParseError) 

466 item = self.collection_item() 

467 if item is not None: 

468 result.append(item) 

469 

470 return result 

471 

472 def collection_item(self): 

473 """Implementation of the ``collection_item`` grammar production rule. 

474 

475 Production: 

476 

477 collection_item : py_value | EMPTY 

478 """ 

479 if self._current_token.type_ not in {TokenType.RPAREN, TokenType.RSQUARE}: 

480 result = self.py_value() 

481 else: 

482 result = None # end of list/tuple items 

483 

484 return result