Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/jmespath/lexer.py: 100%

139 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:03 +0000

1import string 

2import warnings 

3from json import loads 

4 

5from jmespath.exceptions import LexerError, EmptyExpressionError 

6 

7 

8class Lexer(object): 

9 START_IDENTIFIER = set(string.ascii_letters + '_') 

10 VALID_IDENTIFIER = set(string.ascii_letters + string.digits + '_') 

11 VALID_NUMBER = set(string.digits) 

12 WHITESPACE = set(" \t\n\r") 

13 SIMPLE_TOKENS = { 

14 '.': 'dot', 

15 '*': 'star', 

16 ']': 'rbracket', 

17 ',': 'comma', 

18 ':': 'colon', 

19 '@': 'current', 

20 '(': 'lparen', 

21 ')': 'rparen', 

22 '{': 'lbrace', 

23 '}': 'rbrace', 

24 } 

25 

26 def tokenize(self, expression): 

27 self._initialize_for_expression(expression) 

28 while self._current is not None: 

29 if self._current in self.SIMPLE_TOKENS: 

30 yield {'type': self.SIMPLE_TOKENS[self._current], 

31 'value': self._current, 

32 'start': self._position, 'end': self._position + 1} 

33 self._next() 

34 elif self._current in self.START_IDENTIFIER: 

35 start = self._position 

36 buff = self._current 

37 while self._next() in self.VALID_IDENTIFIER: 

38 buff += self._current 

39 yield {'type': 'unquoted_identifier', 'value': buff, 

40 'start': start, 'end': start + len(buff)} 

41 elif self._current in self.WHITESPACE: 

42 self._next() 

43 elif self._current == '[': 

44 start = self._position 

45 next_char = self._next() 

46 if next_char == ']': 

47 self._next() 

48 yield {'type': 'flatten', 'value': '[]', 

49 'start': start, 'end': start + 2} 

50 elif next_char == '?': 

51 self._next() 

52 yield {'type': 'filter', 'value': '[?', 

53 'start': start, 'end': start + 2} 

54 else: 

55 yield {'type': 'lbracket', 'value': '[', 

56 'start': start, 'end': start + 1} 

57 elif self._current == "'": 

58 yield self._consume_raw_string_literal() 

59 elif self._current == '|': 

60 yield self._match_or_else('|', 'or', 'pipe') 

61 elif self._current == '&': 

62 yield self._match_or_else('&', 'and', 'expref') 

63 elif self._current == '`': 

64 yield self._consume_literal() 

65 elif self._current in self.VALID_NUMBER: 

66 start = self._position 

67 buff = self._consume_number() 

68 yield {'type': 'number', 'value': int(buff), 

69 'start': start, 'end': start + len(buff)} 

70 elif self._current == '-': 

71 # Negative number. 

72 start = self._position 

73 buff = self._consume_number() 

74 if len(buff) > 1: 

75 yield {'type': 'number', 'value': int(buff), 

76 'start': start, 'end': start + len(buff)} 

77 else: 

78 raise LexerError(lexer_position=start, 

79 lexer_value=buff, 

80 message="Unknown token '%s'" % buff) 

81 elif self._current == '"': 

82 yield self._consume_quoted_identifier() 

83 elif self._current == '<': 

84 yield self._match_or_else('=', 'lte', 'lt') 

85 elif self._current == '>': 

86 yield self._match_or_else('=', 'gte', 'gt') 

87 elif self._current == '!': 

88 yield self._match_or_else('=', 'ne', 'not') 

89 elif self._current == '=': 

90 if self._next() == '=': 

91 yield {'type': 'eq', 'value': '==', 

92 'start': self._position - 1, 'end': self._position} 

93 self._next() 

94 else: 

95 if self._current is None: 

96 # If we're at the EOF, we never advanced 

97 # the position so we don't need to rewind 

98 # it back one location. 

99 position = self._position 

100 else: 

101 position = self._position - 1 

102 raise LexerError( 

103 lexer_position=position, 

104 lexer_value='=', 

105 message="Unknown token '='") 

106 else: 

107 raise LexerError(lexer_position=self._position, 

108 lexer_value=self._current, 

109 message="Unknown token %s" % self._current) 

110 yield {'type': 'eof', 'value': '', 

111 'start': self._length, 'end': self._length} 

112 

113 def _consume_number(self): 

114 start = self._position 

115 buff = self._current 

116 while self._next() in self.VALID_NUMBER: 

117 buff += self._current 

118 return buff 

119 

120 def _initialize_for_expression(self, expression): 

121 if not expression: 

122 raise EmptyExpressionError() 

123 self._position = 0 

124 self._expression = expression 

125 self._chars = list(self._expression) 

126 self._current = self._chars[self._position] 

127 self._length = len(self._expression) 

128 

129 def _next(self): 

130 if self._position == self._length - 1: 

131 self._current = None 

132 else: 

133 self._position += 1 

134 self._current = self._chars[self._position] 

135 return self._current 

136 

137 def _consume_until(self, delimiter): 

138 # Consume until the delimiter is reached, 

139 # allowing for the delimiter to be escaped with "\". 

140 start = self._position 

141 buff = '' 

142 self._next() 

143 while self._current != delimiter: 

144 if self._current == '\\': 

145 buff += '\\' 

146 self._next() 

147 if self._current is None: 

148 # We're at the EOF. 

149 raise LexerError(lexer_position=start, 

150 lexer_value=self._expression[start:], 

151 message="Unclosed %s delimiter" % delimiter) 

152 buff += self._current 

153 self._next() 

154 # Skip the closing delimiter. 

155 self._next() 

156 return buff 

157 

158 def _consume_literal(self): 

159 start = self._position 

160 lexeme = self._consume_until('`').replace('\\`', '`') 

161 try: 

162 # Assume it is valid JSON and attempt to parse. 

163 parsed_json = loads(lexeme) 

164 except ValueError: 

165 try: 

166 # Invalid JSON values should be converted to quoted 

167 # JSON strings during the JEP-12 deprecation period. 

168 parsed_json = loads('"%s"' % lexeme.lstrip()) 

169 warnings.warn("deprecated string literal syntax", 

170 PendingDeprecationWarning) 

171 except ValueError: 

172 raise LexerError(lexer_position=start, 

173 lexer_value=self._expression[start:], 

174 message="Bad token %s" % lexeme) 

175 token_len = self._position - start 

176 return {'type': 'literal', 'value': parsed_json, 

177 'start': start, 'end': token_len} 

178 

179 def _consume_quoted_identifier(self): 

180 start = self._position 

181 lexeme = '"' + self._consume_until('"') + '"' 

182 try: 

183 token_len = self._position - start 

184 return {'type': 'quoted_identifier', 'value': loads(lexeme), 

185 'start': start, 'end': token_len} 

186 except ValueError as e: 

187 error_message = str(e).split(':')[0] 

188 raise LexerError(lexer_position=start, 

189 lexer_value=lexeme, 

190 message=error_message) 

191 

192 def _consume_raw_string_literal(self): 

193 start = self._position 

194 lexeme = self._consume_until("'").replace("\\'", "'") 

195 token_len = self._position - start 

196 return {'type': 'literal', 'value': lexeme, 

197 'start': start, 'end': token_len} 

198 

199 def _match_or_else(self, expected, match_type, else_type): 

200 start = self._position 

201 current = self._current 

202 next_char = self._next() 

203 if next_char == expected: 

204 self._next() 

205 return {'type': match_type, 'value': current + next_char, 

206 'start': start, 'end': start + 1} 

207 return {'type': else_type, 'value': current, 

208 'start': start, 'end': start}