Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/cloud/bigquery/magics/line_arg_parser/parser.py: 62%
214 statements
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 06:07 +0000
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 06:07 +0000
1# Copyright 2020 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
15from google.cloud.bigquery.magics.line_arg_parser import DuplicateQueryParamsError
16from google.cloud.bigquery.magics.line_arg_parser import ParseError
17from google.cloud.bigquery.magics.line_arg_parser import QueryParamsParseError
18from google.cloud.bigquery.magics.line_arg_parser import TokenType
21class ParseNode(object):
22 """A base class for nodes in the input parsed to an abstract syntax tree."""
25class InputLine(ParseNode):
26 def __init__(self, destination_var, option_list):
27 self.destination_var = destination_var
28 self.option_list = option_list
31class DestinationVar(ParseNode):
32 def __init__(self, token):
33 # token type is DEST_VAR
34 self.token = token
35 self.name = token.lexeme if token is not None else None
38class CmdOptionList(ParseNode):
39 def __init__(self, option_nodes):
40 self.options = [node for node in option_nodes] # shallow copy
43class CmdOption(ParseNode):
44 def __init__(self, name, value):
45 self.name = name # string
46 self.value = value # CmdOptionValue node
49class ParamsOption(CmdOption):
50 def __init__(self, value):
51 super(ParamsOption, self).__init__("params", value)
54class CmdOptionValue(ParseNode):
55 def __init__(self, token):
56 # token type is OPT_VAL
57 self.token = token
58 self.value = token.lexeme
61class PyVarExpansion(ParseNode):
62 def __init__(self, token):
63 self.token = token
64 self.raw_value = token.lexeme
67class PyDict(ParseNode):
68 def __init__(self, dict_items):
69 self.items = [item for item in dict_items] # shallow copy
72class PyDictItem(ParseNode):
73 def __init__(self, key, value):
74 self.key = key
75 self.value = value
78class PyDictKey(ParseNode):
79 def __init__(self, token):
80 self.token = token
81 self.key_value = token.lexeme
84class PyScalarValue(ParseNode):
85 def __init__(self, token, raw_value):
86 self.token = token
87 self.raw_value = raw_value
90class PyTuple(ParseNode):
91 def __init__(self, tuple_items):
92 self.items = [item for item in tuple_items] # shallow copy
95class PyList(ParseNode):
96 def __init__(self, list_items):
97 self.items = [item for item in list_items] # shallow copy
100class Parser(object):
101 """Parser for the tokenized cell magic input line.
103 The parser recognizes a simplified subset of Python grammar, specifically
104 a dictionary representation in typical use cases when the "--params" option
105 is used with the %%bigquery cell magic.
107 The grammar (terminal symbols are CAPITALIZED):
109 input_line : destination_var option_list
110 destination_var : DEST_VAR | EMPTY
111 option_list : (OPTION_SPEC [OPTION_EQ] option_value)*
112 (params_option | EMPTY)
113 (OPTION_SPEC [OPTION_EQ] option_value)*
115 option_value : OPT_VAL | EMPTY
117 # DOLLAR_PY_ID can occur if a variable passed to --params does not exist
118 # and is thus not expanded to a dict.
119 params_option : PARAMS_OPT_SPEC [PARAMS_OPT_EQ] \
120 (DOLLAR_PY_ID | PY_STRING | py_dict)
122 py_dict : LCURL dict_items RCURL
123 dict_items : dict_item | (dict_item COMMA dict_items)
124 dict_item : (dict_key COLON py_value) | EMPTY
126 # dict items are actually @parameter names in the cell body (i.e. the query),
127 # thus restricting them to strings.
128 dict_key : PY_STRING
130 py_value : PY_BOOL
131 | PY_NUMBER
132 | PY_STRING
133 | py_tuple
134 | py_list
135 | py_dict
137 py_tuple : LPAREN collection_items RPAREN
138 py_list : LSQUARE collection_items RSQUARE
139 collection_items : collection_item | (collection_item COMMA collection_items)
140 collection_item : py_value | EMPTY
142 Args:
143 lexer (line_arg_parser.lexer.Lexer):
144 An iterable producing a tokenized cell magic argument line.
145 """
147 def __init__(self, lexer):
148 self._lexer = lexer
149 self._tokens_iter = iter(self._lexer)
150 self.get_next_token()
152 def get_next_token(self):
153 """Obtain the next token from the token stream and store it as current."""
154 token = next(self._tokens_iter)
155 self._current_token = token
157 def consume(self, expected_type, exc_type=ParseError):
158 """Move to the next token in token stream if it matches the expected type.
160 Args:
161 expected_type (lexer.TokenType): The expected token type to be consumed.
162 exc_type (Optional[ParseError]): The type of the exception to raise. Should be
163 the ``ParseError`` class or one of its subclasses. Defaults to
164 ``ParseError``.
166 Raises:
167 ParseError: If the current token does not match the expected type.
168 """
169 if self._current_token.type_ == expected_type:
170 if expected_type != TokenType.EOL:
171 self.get_next_token()
172 else:
173 if self._current_token.type_ == TokenType.EOL:
174 msg = "Unexpected end of input, expected {}.".format(expected_type)
175 else:
176 msg = "Expected token type {}, but found {} at position {}.".format(
177 expected_type, self._current_token.lexeme, self._current_token.pos
178 )
179 self.error(message=msg, exc_type=exc_type)
181 def error(self, message="Syntax error.", exc_type=ParseError):
182 """Raise an error with the given message.
184 Args:
185 expected_type (lexer.TokenType): The expected token type to be consumed.
186 exc_type (Optional[ParseError]): The type of the exception to raise. Should be
187 the ``ParseError`` class or one of its subclasses. Defaults to
188 ``ParseError``.
190 Raises:
191 ParseError: If the current token does not match the expected type.
192 """
193 raise exc_type(message)
195 def input_line(self):
196 """The top level method for parsing the cell magic arguments line.
198 Implements the following grammar production rule:
200 input_line : destination_var option_list
201 """
202 dest_var = self.destination_var()
203 options = self.option_list()
205 token = self._current_token
207 if token.type_ != TokenType.EOL:
208 msg = "Unexpected input at position {}: {}".format(token.pos, token.lexeme)
209 self.error(msg)
211 return InputLine(dest_var, options)
213 def destination_var(self):
214 """Implementation of the ``destination_var`` grammar production rule.
216 Production:
218 destination_var : DEST_VAR | EMPTY
219 """
220 token = self._current_token
222 if token.type_ == TokenType.DEST_VAR:
223 self.consume(TokenType.DEST_VAR)
224 result = DestinationVar(token)
225 elif token.type_ == TokenType.UNKNOWN:
226 msg = "Unknown input at position {}: {}".format(token.pos, token.lexeme)
227 self.error(msg)
228 else:
229 result = DestinationVar(None)
231 return result
233 def option_list(self):
234 """Implementation of the ``option_list`` grammar production rule.
236 Production:
238 option_list : (OPTION_SPEC [OPTION_EQ] option_value)*
239 (params_option | EMPTY)
240 (OPTION_SPEC [OPTION_EQ] option_value)*
241 """
242 all_options = []
244 def parse_nonparams_options():
245 while self._current_token.type_ == TokenType.OPTION_SPEC:
246 token = self._current_token
247 self.consume(TokenType.OPTION_SPEC)
249 opt_name = token.lexeme[2:] # cut off the "--" prefix
251 # skip the optional "=" character
252 if self._current_token.type_ == TokenType.OPTION_EQ:
253 self.consume(TokenType.OPTION_EQ)
255 opt_value = self.option_value()
256 option = CmdOption(opt_name, opt_value)
257 all_options.append(option)
259 parse_nonparams_options()
261 token = self._current_token
263 if token.type_ == TokenType.PARAMS_OPT_SPEC:
264 option = self.params_option()
265 all_options.append(option)
267 parse_nonparams_options()
269 if self._current_token.type_ == TokenType.PARAMS_OPT_SPEC:
270 self.error(
271 message="Duplicate --params option", exc_type=DuplicateQueryParamsError
272 )
274 return CmdOptionList(all_options)
276 def option_value(self):
277 """Implementation of the ``option_value`` grammar production rule.
279 Production:
281 option_value : OPT_VAL | EMPTY
282 """
283 token = self._current_token
285 if token.type_ == TokenType.OPT_VAL:
286 self.consume(TokenType.OPT_VAL)
287 result = CmdOptionValue(token)
288 elif token.type_ == TokenType.UNKNOWN:
289 msg = "Unknown input at position {}: {}".format(token.pos, token.lexeme)
290 self.error(msg)
291 else:
292 result = None
294 return result
296 def params_option(self):
297 """Implementation of the ``params_option`` grammar production rule.
299 Production:
301 params_option : PARAMS_OPT_SPEC [PARAMS_OPT_EQ] \
302 (DOLLAR_PY_ID | PY_STRING | py_dict)
303 """
304 self.consume(TokenType.PARAMS_OPT_SPEC)
306 # skip the optional "=" character
307 if self._current_token.type_ == TokenType.PARAMS_OPT_EQ:
308 self.consume(TokenType.PARAMS_OPT_EQ)
310 if self._current_token.type_ == TokenType.DOLLAR_PY_ID:
311 token = self._current_token
312 self.consume(TokenType.DOLLAR_PY_ID)
313 opt_value = PyVarExpansion(token)
314 elif self._current_token.type_ == TokenType.PY_STRING:
315 token = self._current_token
316 self.consume(TokenType.PY_STRING, exc_type=QueryParamsParseError)
317 opt_value = PyScalarValue(token, token.lexeme)
318 else:
319 opt_value = self.py_dict()
321 result = ParamsOption(opt_value)
323 return result
325 def py_dict(self):
326 """Implementation of the ``py_dict`` grammar production rule.
328 Production:
330 py_dict : LCURL dict_items RCURL
331 """
332 self.consume(TokenType.LCURL, exc_type=QueryParamsParseError)
333 dict_items = self.dict_items()
334 self.consume(TokenType.RCURL, exc_type=QueryParamsParseError)
336 return PyDict(dict_items)
338 def dict_items(self):
339 """Implementation of the ``dict_items`` grammar production rule.
341 Production:
343 dict_items : dict_item | (dict_item COMMA dict_items)
344 """
345 result = []
347 item = self.dict_item()
348 if item is not None:
349 result.append(item)
351 while self._current_token.type_ == TokenType.COMMA:
352 self.consume(TokenType.COMMA, exc_type=QueryParamsParseError)
353 item = self.dict_item()
354 if item is not None:
355 result.append(item)
357 return result
359 def dict_item(self):
360 """Implementation of the ``dict_item`` grammar production rule.
362 Production:
364 dict_item : (dict_key COLON py_value) | EMPTY
365 """
366 token = self._current_token
368 if token.type_ == TokenType.PY_STRING:
369 key = self.dict_key()
370 self.consume(TokenType.COLON, exc_type=QueryParamsParseError)
371 value = self.py_value()
372 result = PyDictItem(key, value)
373 elif token.type_ == TokenType.UNKNOWN:
374 msg = "Unknown input at position {}: {}".format(token.pos, token.lexeme)
375 self.error(msg, exc_type=QueryParamsParseError)
376 else:
377 result = None
379 return result
381 def dict_key(self):
382 """Implementation of the ``dict_key`` grammar production rule.
384 Production:
386 dict_key : PY_STRING
387 """
388 token = self._current_token
389 self.consume(TokenType.PY_STRING, exc_type=QueryParamsParseError)
390 return PyDictKey(token)
392 def py_value(self):
393 """Implementation of the ``py_value`` grammar production rule.
395 Production:
397 py_value : PY_BOOL | PY_NUMBER | PY_STRING | py_tuple | py_list | py_dict
398 """
399 token = self._current_token
401 if token.type_ == TokenType.PY_BOOL:
402 self.consume(TokenType.PY_BOOL, exc_type=QueryParamsParseError)
403 return PyScalarValue(token, token.lexeme)
404 elif token.type_ == TokenType.PY_NUMBER:
405 self.consume(TokenType.PY_NUMBER, exc_type=QueryParamsParseError)
406 return PyScalarValue(token, token.lexeme)
407 elif token.type_ == TokenType.PY_STRING:
408 self.consume(TokenType.PY_STRING, exc_type=QueryParamsParseError)
409 return PyScalarValue(token, token.lexeme)
410 elif token.type_ == TokenType.LPAREN:
411 tuple_node = self.py_tuple()
412 return tuple_node
413 elif token.type_ == TokenType.LSQUARE:
414 list_node = self.py_list()
415 return list_node
416 elif token.type_ == TokenType.LCURL:
417 dict_node = self.py_dict()
418 return dict_node
419 else:
420 msg = "Unexpected token type {} at position {}.".format(
421 token.type_, token.pos
422 )
423 self.error(msg, exc_type=QueryParamsParseError)
425 def py_tuple(self):
426 """Implementation of the ``py_tuple`` grammar production rule.
428 Production:
430 py_tuple : LPAREN collection_items RPAREN
431 """
432 self.consume(TokenType.LPAREN, exc_type=QueryParamsParseError)
433 items = self.collection_items()
434 self.consume(TokenType.RPAREN, exc_type=QueryParamsParseError)
436 return PyTuple(items)
438 def py_list(self):
439 """Implementation of the ``py_list`` grammar production rule.
441 Production:
443 py_list : LSQUARE collection_items RSQUARE
444 """
445 self.consume(TokenType.LSQUARE, exc_type=QueryParamsParseError)
446 items = self.collection_items()
447 self.consume(TokenType.RSQUARE, exc_type=QueryParamsParseError)
449 return PyList(items)
451 def collection_items(self):
452 """Implementation of the ``collection_items`` grammar production rule.
454 Production:
456 collection_items : collection_item | (collection_item COMMA collection_items)
457 """
458 result = []
460 item = self.collection_item()
461 if item is not None:
462 result.append(item)
464 while self._current_token.type_ == TokenType.COMMA:
465 self.consume(TokenType.COMMA, exc_type=QueryParamsParseError)
466 item = self.collection_item()
467 if item is not None:
468 result.append(item)
470 return result
472 def collection_item(self):
473 """Implementation of the ``collection_item`` grammar production rule.
475 Production:
477 collection_item : py_value | EMPTY
478 """
479 if self._current_token.type_ not in {TokenType.RPAREN, TokenType.RSQUARE}:
480 result = self.py_value()
481 else:
482 result = None # end of list/tuple items
484 return result