Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/jmespath/lexer.py: 100%
139 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:51 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:51 +0000
1import string
2import warnings
3from json import loads
5from jmespath.exceptions import LexerError, EmptyExpressionError
8class Lexer(object):
9 START_IDENTIFIER = set(string.ascii_letters + '_')
10 VALID_IDENTIFIER = set(string.ascii_letters + string.digits + '_')
11 VALID_NUMBER = set(string.digits)
12 WHITESPACE = set(" \t\n\r")
13 SIMPLE_TOKENS = {
14 '.': 'dot',
15 '*': 'star',
16 ']': 'rbracket',
17 ',': 'comma',
18 ':': 'colon',
19 '@': 'current',
20 '(': 'lparen',
21 ')': 'rparen',
22 '{': 'lbrace',
23 '}': 'rbrace',
24 }
26 def tokenize(self, expression):
27 self._initialize_for_expression(expression)
28 while self._current is not None:
29 if self._current in self.SIMPLE_TOKENS:
30 yield {'type': self.SIMPLE_TOKENS[self._current],
31 'value': self._current,
32 'start': self._position, 'end': self._position + 1}
33 self._next()
34 elif self._current in self.START_IDENTIFIER:
35 start = self._position
36 buff = self._current
37 while self._next() in self.VALID_IDENTIFIER:
38 buff += self._current
39 yield {'type': 'unquoted_identifier', 'value': buff,
40 'start': start, 'end': start + len(buff)}
41 elif self._current in self.WHITESPACE:
42 self._next()
43 elif self._current == '[':
44 start = self._position
45 next_char = self._next()
46 if next_char == ']':
47 self._next()
48 yield {'type': 'flatten', 'value': '[]',
49 'start': start, 'end': start + 2}
50 elif next_char == '?':
51 self._next()
52 yield {'type': 'filter', 'value': '[?',
53 'start': start, 'end': start + 2}
54 else:
55 yield {'type': 'lbracket', 'value': '[',
56 'start': start, 'end': start + 1}
57 elif self._current == "'":
58 yield self._consume_raw_string_literal()
59 elif self._current == '|':
60 yield self._match_or_else('|', 'or', 'pipe')
61 elif self._current == '&':
62 yield self._match_or_else('&', 'and', 'expref')
63 elif self._current == '`':
64 yield self._consume_literal()
65 elif self._current in self.VALID_NUMBER:
66 start = self._position
67 buff = self._consume_number()
68 yield {'type': 'number', 'value': int(buff),
69 'start': start, 'end': start + len(buff)}
70 elif self._current == '-':
71 # Negative number.
72 start = self._position
73 buff = self._consume_number()
74 if len(buff) > 1:
75 yield {'type': 'number', 'value': int(buff),
76 'start': start, 'end': start + len(buff)}
77 else:
78 raise LexerError(lexer_position=start,
79 lexer_value=buff,
80 message="Unknown token '%s'" % buff)
81 elif self._current == '"':
82 yield self._consume_quoted_identifier()
83 elif self._current == '<':
84 yield self._match_or_else('=', 'lte', 'lt')
85 elif self._current == '>':
86 yield self._match_or_else('=', 'gte', 'gt')
87 elif self._current == '!':
88 yield self._match_or_else('=', 'ne', 'not')
89 elif self._current == '=':
90 if self._next() == '=':
91 yield {'type': 'eq', 'value': '==',
92 'start': self._position - 1, 'end': self._position}
93 self._next()
94 else:
95 if self._current is None:
96 # If we're at the EOF, we never advanced
97 # the position so we don't need to rewind
98 # it back one location.
99 position = self._position
100 else:
101 position = self._position - 1
102 raise LexerError(
103 lexer_position=position,
104 lexer_value='=',
105 message="Unknown token '='")
106 else:
107 raise LexerError(lexer_position=self._position,
108 lexer_value=self._current,
109 message="Unknown token %s" % self._current)
110 yield {'type': 'eof', 'value': '',
111 'start': self._length, 'end': self._length}
113 def _consume_number(self):
114 start = self._position
115 buff = self._current
116 while self._next() in self.VALID_NUMBER:
117 buff += self._current
118 return buff
120 def _initialize_for_expression(self, expression):
121 if not expression:
122 raise EmptyExpressionError()
123 self._position = 0
124 self._expression = expression
125 self._chars = list(self._expression)
126 self._current = self._chars[self._position]
127 self._length = len(self._expression)
129 def _next(self):
130 if self._position == self._length - 1:
131 self._current = None
132 else:
133 self._position += 1
134 self._current = self._chars[self._position]
135 return self._current
137 def _consume_until(self, delimiter):
138 # Consume until the delimiter is reached,
139 # allowing for the delimiter to be escaped with "\".
140 start = self._position
141 buff = ''
142 self._next()
143 while self._current != delimiter:
144 if self._current == '\\':
145 buff += '\\'
146 self._next()
147 if self._current is None:
148 # We're at the EOF.
149 raise LexerError(lexer_position=start,
150 lexer_value=self._expression[start:],
151 message="Unclosed %s delimiter" % delimiter)
152 buff += self._current
153 self._next()
154 # Skip the closing delimiter.
155 self._next()
156 return buff
158 def _consume_literal(self):
159 start = self._position
160 lexeme = self._consume_until('`').replace('\\`', '`')
161 try:
162 # Assume it is valid JSON and attempt to parse.
163 parsed_json = loads(lexeme)
164 except ValueError:
165 try:
166 # Invalid JSON values should be converted to quoted
167 # JSON strings during the JEP-12 deprecation period.
168 parsed_json = loads('"%s"' % lexeme.lstrip())
169 warnings.warn("deprecated string literal syntax",
170 PendingDeprecationWarning)
171 except ValueError:
172 raise LexerError(lexer_position=start,
173 lexer_value=self._expression[start:],
174 message="Bad token %s" % lexeme)
175 token_len = self._position - start
176 return {'type': 'literal', 'value': parsed_json,
177 'start': start, 'end': token_len}
179 def _consume_quoted_identifier(self):
180 start = self._position
181 lexeme = '"' + self._consume_until('"') + '"'
182 try:
183 token_len = self._position - start
184 return {'type': 'quoted_identifier', 'value': loads(lexeme),
185 'start': start, 'end': token_len}
186 except ValueError as e:
187 error_message = str(e).split(':')[0]
188 raise LexerError(lexer_position=start,
189 lexer_value=lexeme,
190 message=error_message)
192 def _consume_raw_string_literal(self):
193 start = self._position
194 lexeme = self._consume_until("'").replace("\\'", "'")
195 token_len = self._position - start
196 return {'type': 'literal', 'value': lexeme,
197 'start': start, 'end': token_len}
199 def _match_or_else(self, expected, match_type, else_type):
200 start = self._position
201 current = self._current
202 next_char = self._next()
203 if next_char == expected:
204 self._next()
205 return {'type': match_type, 'value': current + next_char,
206 'start': start, 'end': start + 1}
207 return {'type': else_type, 'value': current,
208 'start': start, 'end': start}