/src/glaze/include/glaze/json/jmespath.hpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Glaze Library |
2 | | // For the license information refer to glaze.hpp |
3 | | |
4 | | #pragma once |
5 | | |
6 | | #include <algorithm> |
7 | | #include <charconv> |
8 | | #include <optional> |
9 | | |
10 | | #include "glaze/core/seek.hpp" |
11 | | #include "glaze/json/read.hpp" |
12 | | #include "glaze/json/skip.hpp" |
13 | | #include "glaze/util/parse.hpp" |
14 | | #include "glaze/util/string_literal.hpp" |
15 | | |
16 | | namespace glz |
17 | | { |
18 | | namespace jmespath |
19 | | { |
20 | | enum struct tokenization_error { |
21 | | none, // No error |
22 | | unbalanced_brackets, // Mismatched '[' and ']' |
23 | | unbalanced_parentheses, // Mismatched '(' and ')' |
24 | | unclosed_string, // String literal not properly closed |
25 | | invalid_escape_sequence, // Invalid escape sequence in string |
26 | | unexpected_delimiter, // Unexpected character encountered (e.g., consecutive delimiters) |
27 | | }; |
28 | | |
29 | | struct tokenization_result |
30 | | { |
31 | | std::string_view first; |
32 | | std::string_view second; |
33 | | tokenization_error error; |
34 | | }; |
35 | | |
36 | | /** |
37 | | * @brief Trims leading whitespace characters from a string_view. |
38 | | * |
39 | | * @param s The input string_view to trim. |
40 | | * @return A string_view with leading whitespace removed. |
41 | | */ |
42 | | inline constexpr std::string_view trim_left(std::string_view s) |
43 | 0 | { |
44 | 0 | size_t start = 0; |
45 | 0 | while (start < s.size() && (s[start] == ' ' || s[start] == '\t' || s[start] == '\n' || s[start] == '\r')) { |
46 | 0 | start++; |
47 | 0 | } |
48 | 0 | return s.substr(start); |
49 | 0 | } |
50 | | |
51 | | /** |
52 | | * @brief Splits a JMESPath expression into the first token and the remaining path with error handling. |
53 | | * |
54 | | * @param s The JMESPath expression to tokenize. |
55 | | * @return tokenization_result containing: |
56 | | * - first: The first token of the expression. |
57 | | * - second: The remaining expression after the first token. |
58 | | * - error: tokenization_error indicating if an error occurred. |
59 | | */ |
60 | | inline constexpr tokenization_result tokenize_jmes_path(std::string_view s) |
61 | 0 | { |
62 | 0 | if (s.empty()) { |
63 | 0 | return {"", "", tokenization_error::none}; |
64 | 0 | } |
65 | 0 |
|
66 | 0 | size_t pos = 0; |
67 | 0 | size_t len = s.size(); |
68 | 0 | int bracket_level = 0; |
69 | 0 | int parenthesis_level = 0; |
70 | 0 | bool in_string = false; |
71 | 0 | char string_delim = '\0'; |
72 | 0 |
|
73 | 0 | while (pos < len) { |
74 | 0 | char current = s[pos]; |
75 | 0 |
|
76 | 0 | if (in_string) { |
77 | 0 | if (current == string_delim) { |
78 | 0 | // Check for escaped delimiter |
79 | 0 | size_t backslashes = 0; |
80 | 0 | size_t temp = pos; |
81 | 0 | while (temp > 0 && s[--temp] == '\\') { |
82 | 0 | backslashes++; |
83 | 0 | } |
84 | 0 | if (backslashes % 2 == 0) { |
85 | 0 | in_string = false; |
86 | 0 | } |
87 | 0 | } |
88 | 0 | else if (current == '\\') { |
89 | 0 | // Validate escape sequence |
90 | 0 | if (pos + 1 >= len) { |
91 | 0 | return {"", "", tokenization_error::invalid_escape_sequence}; |
92 | 0 | } |
93 | 0 | char next_char = s[pos + 1]; |
94 | 0 | // Simple validation: allow known escape characters |
95 | 0 | if (next_char != '"' && next_char != '\'' && next_char != '\\' && next_char != '/' && |
96 | 0 | next_char != 'b' && next_char != 'f' && next_char != 'n' && next_char != 'r' && |
97 | 0 | next_char != 't' && next_char != 'u') { |
98 | 0 | return {"", "", tokenization_error::invalid_escape_sequence}; |
99 | 0 | } |
100 | 0 | } |
101 | 0 | pos++; |
102 | 0 | continue; |
103 | 0 | } |
104 | 0 |
|
105 | 0 | switch (current) { |
106 | 0 | case '"': |
107 | 0 | case '\'': |
108 | 0 | in_string = true; |
109 | 0 | string_delim = current; |
110 | 0 | pos++; |
111 | 0 | break; |
112 | 0 | case '[': |
113 | 0 | bracket_level++; |
114 | 0 | pos++; |
115 | 0 | break; |
116 | 0 | case ']': |
117 | 0 | if (bracket_level > 0) { |
118 | 0 | bracket_level--; |
119 | 0 | } |
120 | 0 | else { |
121 | 0 | return {"", "", tokenization_error::unbalanced_brackets}; |
122 | 0 | } |
123 | 0 | pos++; |
124 | 0 | break; |
125 | 0 | case '(': |
126 | 0 | parenthesis_level++; |
127 | 0 | pos++; |
128 | 0 | break; |
129 | 0 | case ')': |
130 | 0 | if (parenthesis_level > 0) { |
131 | 0 | parenthesis_level--; |
132 | 0 | } |
133 | 0 | else { |
134 | 0 | return {"", "", tokenization_error::unbalanced_parentheses}; |
135 | 0 | } |
136 | 0 | pos++; |
137 | 0 | break; |
138 | 0 | case '.': |
139 | 0 | case '|': |
140 | 0 | if (bracket_level == 0 && parenthesis_level == 0) { |
141 | 0 | // Split here |
142 | 0 | return {s.substr(0, pos), s.substr(pos, len - pos), tokenization_error::none}; |
143 | 0 | } |
144 | 0 | pos++; |
145 | 0 | break; |
146 | 0 | default: |
147 | 0 | pos++; |
148 | 0 | break; |
149 | 0 | } |
150 | 0 | } |
151 | 0 |
|
152 | 0 | if (in_string) { |
153 | 0 | return {"", "", tokenization_error::unclosed_string}; |
154 | 0 | } |
155 | 0 |
|
156 | 0 | if (bracket_level != 0) { |
157 | 0 | return {"", "", tokenization_error::unbalanced_brackets}; |
158 | 0 | } |
159 | 0 |
|
160 | 0 | if (parenthesis_level != 0) { |
161 | 0 | return {"", "", tokenization_error::unbalanced_parentheses}; |
162 | 0 | } |
163 | 0 |
|
164 | 0 | // If no delimiter found, return the whole string as first token |
165 | 0 | return {s, "", tokenization_error::none}; |
166 | 0 | } |
167 | | |
168 | | inline constexpr tokenization_error finalize_tokens(std::vector<std::string_view>& tokens) |
169 | 0 | { |
170 | 0 | std::vector<std::string_view> final_tokens; |
171 | 0 | final_tokens.reserve(tokens.size()); // at least |
172 | 0 |
|
173 | 0 | for (auto token : tokens) { |
174 | 0 | size_t start = 0; |
175 | 0 | while (start < token.size()) { |
176 | 0 | // Find the next '[' |
177 | 0 | auto open = token.find('[', start); |
178 | 0 | if (open == std::string_view::npos) { |
179 | 0 | // No more bracketed segments |
180 | 0 | if (start < token.size()) { |
181 | 0 | // Add remaining part (if not empty) |
182 | 0 | final_tokens.push_back(token.substr(start)); |
183 | 0 | } |
184 | 0 | break; // move to next token |
185 | 0 | } |
186 | 0 | else { |
187 | 0 | // If there's a key part before the bracket, add it |
188 | 0 | if (open > start) { |
189 | 0 | final_tokens.push_back(token.substr(start, open - start)); |
190 | 0 | } |
191 | 0 | // Now find the closing bracket ']' |
192 | 0 | auto close = token.find(']', open + 1); |
193 | 0 | if (close == std::string_view::npos) { |
194 | 0 | // Mismatched bracket |
195 | 0 | return tokenization_error::unbalanced_brackets; |
196 | 0 | } |
197 | 0 | // Extract the bracketed token: e.g. [0] |
198 | 0 | final_tokens.push_back(token.substr(open, close - open + 1)); |
199 | 0 | start = close + 1; // continue after the ']' |
200 | 0 | } |
201 | 0 | } |
202 | 0 | } |
203 | 0 |
|
204 | 0 | tokens = std::move(final_tokens); |
205 | 0 | return tokenization_error::none; |
206 | 0 | } |
207 | | |
208 | | /** |
209 | | * @brief Recursively tokenizes a full JMESPath expression into all its tokens with error handling. |
210 | | * |
211 | | * @param expression The complete JMESPath expression to tokenize. |
212 | | * @param tokens A vector to store all tokens in the order they appear. |
213 | | * @param error An output parameter to capture any tokenization errors. |
214 | | * @return true if tokenization succeeded without errors, false otherwise. |
215 | | */ |
216 | | inline constexpr jmespath::tokenization_error tokenize_full_jmespath(std::string_view expression, |
217 | | std::vector<std::string_view>& tokens) |
218 | 0 | { |
219 | 0 | tokens.clear(); |
220 | 0 | auto remaining = expression; |
221 | 0 |
|
222 | 0 | while (!remaining.empty()) { |
223 | 0 | tokenization_result result = tokenize_jmes_path(remaining); |
224 | 0 | if (result.error != tokenization_error::none) { |
225 | 0 | return result.error; |
226 | 0 | } |
227 | 0 |
|
228 | 0 | if (result.first.empty()) { |
229 | 0 | return tokenization_error::unexpected_delimiter; |
230 | 0 | } |
231 | 0 |
|
232 | 0 | tokens.emplace_back(result.first); |
233 | 0 |
|
234 | 0 | if (!result.second.empty()) { |
235 | 0 | char delimiter = result.second.front(); |
236 | 0 | if (delimiter == '.' || delimiter == '|') { |
237 | 0 | remaining = result.second.substr(1); |
238 | 0 | remaining = trim_left(remaining); |
239 | 0 | if (!remaining.empty() && (remaining.front() == '.' || remaining.front() == '|')) { |
240 | 0 | return tokenization_error::unexpected_delimiter; |
241 | 0 | } |
242 | 0 | } |
243 | 0 | else { |
244 | 0 | return tokenization_error::unexpected_delimiter; |
245 | 0 | } |
246 | 0 | } |
247 | 0 | else { |
248 | 0 | break; |
249 | 0 | } |
250 | 0 | } |
251 | 0 |
|
252 | 0 | // New step: finalize the tokens by splitting multiple bracket accesses |
253 | 0 | auto err = finalize_tokens(tokens); |
254 | 0 | if (err != jmespath::tokenization_error::none) { |
255 | 0 | return err; |
256 | 0 | } |
257 | 0 |
|
258 | 0 | return tokenization_error::none; |
259 | 0 | } |
260 | | |
261 | | template <const std::string_view& expression> |
262 | | consteval auto tokenize_as_array() |
263 | | { |
264 | | constexpr auto N = [] { |
265 | | std::vector<std::string_view> tokens; |
266 | | auto err = tokenize_full_jmespath(expression, tokens); |
267 | | if (err != tokenization_error::none) { |
268 | | std::abort(); |
269 | | } |
270 | | return tokens.size(); |
271 | | }(); |
272 | | |
273 | | std::vector<std::string_view> tokens; |
274 | | auto err = tokenize_full_jmespath(expression, tokens); |
275 | | if (err != tokenization_error::none) { |
276 | | std::abort(); |
277 | | } |
278 | | |
279 | | std::array<std::string_view, N> arr{}; |
280 | | for (std::size_t i = 0; i < N; ++i) { |
281 | | arr[i] = tokens[i]; |
282 | | } |
283 | | return arr; // Vector destroyed here, leaving only the array. |
284 | | } |
285 | | |
286 | | struct ArrayParseResult |
287 | | { |
288 | | bool is_array_access = false; // True if "key[...]" |
289 | | bool error = false; // True if parsing encountered an error |
290 | | std::string_view key; // The part before the first '[' |
291 | | std::optional<int32_t> start; // For a single index or slice start |
292 | | std::optional<int32_t> end; // For slice end |
293 | | std::optional<int32_t> step; // For slice step |
294 | | size_t colon_count = 0; // Number of ':' characters found inside the brackets |
295 | | }; |
296 | | |
297 | | inline constexpr std::optional<int> parse_int(std::string_view s) |
298 | 0 | { |
299 | 0 | if (s.empty()) { |
300 | 0 | return std::nullopt; |
301 | 0 | } |
302 | 0 | int value; |
303 | 0 | auto result = detail::from_chars(s.data(), s.data() + s.size(), value); |
304 | 0 | if (result.ec == std::errc()) { |
305 | 0 | return value; |
306 | 0 | } |
307 | 0 | return std::nullopt; |
308 | 0 | } |
309 | | |
310 | | // Parse a token that may have array indexing or slicing. |
311 | | inline constexpr ArrayParseResult parse_jmespath_token(std::string_view token) |
312 | 0 | { |
313 | 0 | ArrayParseResult result; |
314 | 0 |
|
315 | 0 | // Find the first '[' |
316 | 0 | auto open_pos = token.find('['); |
317 | 0 | if (open_pos == std::string_view::npos) { |
318 | 0 | // No array access, just a key. |
319 | 0 | result.key = token; |
320 | 0 | return result; |
321 | 0 | } |
322 | 0 |
|
323 | 0 | auto close_pos = token.rfind(']'); |
324 | 0 | if (close_pos == std::string_view::npos || close_pos < open_pos) { |
325 | 0 | // Mismatched brackets -> error |
326 | 0 | result.key = token.substr(0, open_pos); |
327 | 0 | result.is_array_access = true; |
328 | 0 | result.error = true; |
329 | 0 | return result; |
330 | 0 | } |
331 | 0 |
|
332 | 0 | result.is_array_access = true; |
333 | 0 | result.key = token.substr(0, open_pos); |
334 | 0 | auto inside = token.substr(open_pos + 1, close_pos - (open_pos + 1)); |
335 | 0 | if (inside.empty()) { |
336 | 0 | // Empty inside "[]" is invalid |
337 | 0 | result.error = true; |
338 | 0 | return result; |
339 | 0 | } |
340 | 0 |
|
341 | 0 | // Count colons to determine if it's a slice |
342 | 0 | size_t colon_count = 0; |
343 | 0 | for (char c : inside) { |
344 | 0 | if (c == ':') { |
345 | 0 | colon_count++; |
346 | 0 | } |
347 | 0 | } |
348 | 0 | result.colon_count = colon_count; |
349 | 0 |
|
350 | 0 | // Helper lambda to parse slice parts |
351 | 0 | auto parse_slice = [&](std::string_view inside) { |
352 | 0 | std::string_view parts[3]; |
353 | 0 | { |
354 | 0 | size_t start_idx = 0; |
355 | 0 | int idx = 0; |
356 | 0 | for (size_t i = 0; i <= inside.size(); ++i) { |
357 | 0 | if (i == inside.size() || inside[i] == ':') { |
358 | 0 | if (idx < 3) { |
359 | 0 | parts[idx] = inside.substr(start_idx, i - start_idx); |
360 | 0 | idx++; |
361 | 0 | } |
362 | 0 | start_idx = i + 1; |
363 | 0 | } |
364 | 0 | } |
365 | 0 | } |
366 | 0 |
|
367 | 0 | // Parse start |
368 | 0 | if (!parts[0].empty()) { |
369 | 0 | auto val = parse_int(parts[0]); |
370 | 0 | if (!val.has_value()) { |
371 | 0 | result.error = true; |
372 | 0 | } |
373 | 0 | else { |
374 | 0 | result.start = val; |
375 | 0 | } |
376 | 0 | } |
377 | 0 |
|
378 | 0 | // Parse end |
379 | 0 | if (!parts[1].empty()) { |
380 | 0 | auto val = parse_int(parts[1]); |
381 | 0 | if (!val.has_value()) { |
382 | 0 | result.error = true; |
383 | 0 | } |
384 | 0 | else { |
385 | 0 | result.end = val; |
386 | 0 | } |
387 | 0 | } |
388 | 0 |
|
389 | 0 | // Parse step |
390 | 0 | if (colon_count == 2 && !parts[2].empty()) { |
391 | 0 | auto val = parse_int(parts[2]); |
392 | 0 | if (!val.has_value()) { |
393 | 0 | result.error = true; |
394 | 0 | } |
395 | 0 | else { |
396 | 0 | result.step = val; |
397 | 0 | } |
398 | 0 | } |
399 | 0 | }; |
400 | 0 |
|
401 | 0 | if (colon_count == 0) { |
402 | 0 | // single index |
403 | 0 | auto val = parse_int(inside); |
404 | 0 | if (!val.has_value()) { |
405 | 0 | result.error = true; |
406 | 0 | } |
407 | 0 | else { |
408 | 0 | result.start = val; |
409 | 0 | } |
410 | 0 | } |
411 | 0 | else if (colon_count == 1 || colon_count == 2) { |
412 | 0 | // slice |
413 | 0 | parse_slice(inside); |
414 | 0 | } |
415 | 0 | else { |
416 | 0 | // More than 2 colons is invalid |
417 | 0 | result.error = true; |
418 | 0 | } |
419 | 0 |
|
420 | 0 | return result; |
421 | 0 | } |
422 | | } |
423 | | |
424 | | namespace detail |
425 | | { |
426 | | template <auto Opts = opts{}, class T> |
427 | | requires(Opts.format == JSON && not readable_array_t<T>) |
428 | | inline void handle_slice(const jmespath::ArrayParseResult&, T&&, context& ctx, auto&&, auto&&) |
429 | | { |
430 | | ctx.error = error_code::syntax_error; |
431 | | } |
432 | | |
433 | | template <auto Opts = opts{}, class T> |
434 | | requires(Opts.format == JSON && readable_array_t<T>) |
435 | | inline void handle_slice(const jmespath::ArrayParseResult& decomposed_key, T&& value, context& ctx, auto&& it, |
436 | | auto&& end) |
437 | | { |
438 | | if (skip_ws<Opts>(ctx, it, end)) { |
439 | | return; |
440 | | } |
441 | | |
442 | | // Determine slice parameters |
443 | | int32_t step_idx = decomposed_key.step.value_or(1); |
444 | | bool has_negative_index = (decomposed_key.start.value_or(0) < 0) || (decomposed_key.end.value_or(0) < 0); |
445 | | |
446 | | // If we have negative indices or step != 1, fall back to the original method (read all then slice) |
447 | | if (step_idx != 1 || has_negative_index) { |
448 | | // Original fallback behavior: |
449 | | // Read entire array into value first |
450 | | value.clear(); |
451 | | if (*it == ']') { |
452 | | // empty array |
453 | | ++it; // consume ']' |
454 | | } |
455 | | else { |
456 | | while (true) { |
457 | | parse<Opts.format>::template op<Opts>(value.emplace_back(), ctx, it, end); |
458 | | if (bool(ctx.error)) [[unlikely]] |
459 | | return; |
460 | | |
461 | | if (skip_ws<Opts>(ctx, it, end)) { |
462 | | return; |
463 | | } |
464 | | if (*it == ']') { |
465 | | ++it; |
466 | | break; |
467 | | } |
468 | | if (*it != ',') { |
469 | | ctx.error = error_code::parse_error; |
470 | | return; |
471 | | } |
472 | | ++it; |
473 | | if (skip_ws<Opts>(ctx, it, end)) { |
474 | | return; |
475 | | } |
476 | | } |
477 | | } |
478 | | |
479 | | // Now do the slicing |
480 | | const int32_t size = static_cast<int32_t>(value.size()); |
481 | | auto wrap_index = [&](int32_t idx) { |
482 | | if (idx < 0) idx += size; |
483 | | return std::clamp(idx, int32_t{0}, size); |
484 | | }; |
485 | | |
486 | | const int32_t start_idx = wrap_index(decomposed_key.start.value_or(0)); |
487 | | const int32_t end_idx = wrap_index(decomposed_key.end.value_or(size)); |
488 | | |
489 | | if (step_idx == 1) { |
490 | | if (start_idx < end_idx) { |
491 | | if (start_idx > 0) { |
492 | | value.erase(value.begin(), value.begin() + start_idx); |
493 | | } |
494 | | if (static_cast<size_t>(end_idx - start_idx) < value.size()) { |
495 | | value.erase(value.begin() + (end_idx - start_idx), value.end()); |
496 | | } |
497 | | } |
498 | | else { |
499 | | value.clear(); |
500 | | } |
501 | | } |
502 | | else { |
503 | | // For steps != 1 (or negative steps), the fallback path was already chosen. |
504 | | // Just apply the same logic as before. |
505 | | std::size_t dest = 0; |
506 | | if (step_idx > 0) { |
507 | | for (int32_t i = start_idx; i < end_idx; i += step_idx) { |
508 | | value[dest++] = std::move(value[i]); |
509 | | } |
510 | | } |
511 | | else { |
512 | | for (int32_t i = start_idx; i > end_idx; i += step_idx) { |
513 | | value[dest++] = std::move(value[i]); |
514 | | } |
515 | | } |
516 | | value.resize(dest); |
517 | | } |
518 | | |
519 | | return; |
520 | | } |
521 | | |
522 | | // If we reach here, step == 1 and no negative indices, so we can do partial reading. |
523 | | value.clear(); |
524 | | const int32_t start_idx = decomposed_key.start.value_or(0); |
525 | | const int32_t end_idx = decomposed_key.end.value_or((std::numeric_limits<int32_t>::max)()); |
526 | | |
527 | | // If empty array |
528 | | if (*it == ']') { |
529 | | ++it; // consume ']' |
530 | | return; |
531 | | } |
532 | | |
533 | | // We'll read elements and track their index |
534 | | int32_t current_index = 0; |
535 | | while (true) { |
536 | | if (skip_ws<Opts>(ctx, it, end)) { |
537 | | return; |
538 | | } |
539 | | |
540 | | // Decide whether we read or skip this element |
541 | | if (current_index < start_idx) { |
542 | | // Skip this element |
543 | | skip_value<JSON>::op<Opts>(ctx, it, end); |
544 | | if (bool(ctx.error)) [[unlikely]] |
545 | | return; |
546 | | } |
547 | | else if (current_index >= start_idx && current_index < end_idx) { |
548 | | // Read this element into value |
549 | | parse<Opts.format>::template op<Opts>(value.emplace_back(), ctx, it, end); |
550 | | if (bool(ctx.error)) [[unlikely]] |
551 | | return; |
552 | | } |
553 | | else { |
554 | | // current_index >= end_idx, we can skip reading into value |
555 | | skip_value<JSON>::op<Opts>(ctx, it, end); |
556 | | if (bool(ctx.error)) [[unlikely]] |
557 | | return; |
558 | | } |
559 | | |
560 | | if (skip_ws<Opts>(ctx, it, end)) { |
561 | | return; |
562 | | } |
563 | | if (*it == ']') { |
564 | | ++it; // finished reading array |
565 | | break; |
566 | | } |
567 | | if (*it != ',') { |
568 | | ctx.error = error_code::parse_error; |
569 | | return; |
570 | | } |
571 | | ++it; // consume ',' |
572 | | if (skip_ws<Opts>(ctx, it, end)) { |
573 | | return; |
574 | | } |
575 | | |
576 | | ++current_index; |
577 | | } |
578 | | } |
579 | | } |
580 | | |
581 | | // Read into a C++ type given a path denoted by a JMESPath query |
582 | | template <string_literal Path, auto Options = opts{}, class T, contiguous Buffer> |
583 | | requires(Options.format == JSON) |
584 | | [[nodiscard]] inline error_ctx read_jmespath(T&& value, Buffer&& buffer) |
585 | | { |
586 | | static constexpr auto S = chars<Path>; |
587 | | static constexpr auto tokens = jmespath::tokenize_as_array<S>(); |
588 | | static constexpr auto N = tokens.size(); |
589 | | |
590 | | constexpr bool use_padded = resizable<Buffer> && non_const_buffer<Buffer> && !check_disable_padding(Options); |
591 | | |
592 | | static constexpr auto Opts = use_padded ? is_padded_on<Options>() : is_padded_off<Options>(); |
593 | | |
594 | | if constexpr (use_padded) { |
595 | | // Pad the buffer for SWAR |
596 | | buffer.resize(buffer.size() + padding_bytes); |
597 | | } |
598 | | auto p = read_iterators<Opts>(buffer); |
599 | | auto it = p.first; |
600 | | auto end = p.second; |
601 | | auto start = it; |
602 | | |
603 | | context ctx{}; |
604 | | |
605 | | if constexpr (N == 0) { |
606 | | parse<Opts.format>::template op<Opts>(value, ctx, it, end); |
607 | | } |
608 | | else { |
609 | | using namespace glz::detail; |
610 | | |
611 | | skip_ws<Opts>(ctx, it, end); |
612 | | |
613 | | for_each<N>([&]<auto I>() { |
614 | | if (bool(ctx.error)) [[unlikely]] { |
615 | | return; |
616 | | } |
617 | | |
618 | | static constexpr auto decomposed_key = jmespath::parse_jmespath_token(tokens[I]); |
619 | | static constexpr auto key = decomposed_key.key; |
620 | | |
621 | | if constexpr (decomposed_key.is_array_access) { |
622 | | // If we have a key, that means we're looking into an object like: key[0:5] |
623 | | if constexpr (key.empty()) { |
624 | | if (skip_ws<Opts>(ctx, it, end)) { |
625 | | return; |
626 | | } |
627 | | // We expect the JSON at this level to be an array |
628 | | if (match_invalid_end<'[', Opts>(ctx, it, end)) { |
629 | | return; |
630 | | } |
631 | | |
632 | | // If this is a slice (colon_count > 0) |
633 | | if constexpr (decomposed_key.colon_count > 0) { |
634 | | detail::handle_slice<Opts>(decomposed_key, value, ctx, it, end); |
635 | | } |
636 | | else { |
637 | | // SINGLE INDEX SCENARIO (no slice, just an index) |
638 | | if constexpr (decomposed_key.start.has_value()) { |
639 | | constexpr auto n = decomposed_key.start.value(); |
640 | | |
641 | | if constexpr (I == (N - 1)) { |
642 | | // Skip until we reach the target element n |
643 | | for (int32_t i = 0; i < n; ++i) { |
644 | | skip_value<JSON>::op<Opts>(ctx, it, end); |
645 | | if (bool(ctx.error)) [[unlikely]] |
646 | | return; |
647 | | |
648 | | if (*it != ',') { |
649 | | ctx.error = error_code::array_element_not_found; |
650 | | return; |
651 | | } |
652 | | ++it; |
653 | | if (skip_ws<Opts>(ctx, it, end)) { |
654 | | return; |
655 | | } |
656 | | } |
657 | | |
658 | | // Now read the element at index n |
659 | | parse<Opts.format>::template op<Opts>(value, ctx, it, end); |
660 | | } |
661 | | else { |
662 | | // Not the last token. We must still parse the element at index n so the next indexing can |
663 | | // proceed. |
664 | | for (int32_t i = 0; i < n; ++i) { |
665 | | skip_value<JSON>::op<Opts>(ctx, it, end); |
666 | | if (bool(ctx.error)) [[unlikely]] |
667 | | return; |
668 | | |
669 | | if (*it != ',') { |
670 | | ctx.error = error_code::array_element_not_found; |
671 | | return; |
672 | | } |
673 | | ++it; |
674 | | if (skip_ws<Opts>(ctx, it, end)) { |
675 | | return; |
676 | | } |
677 | | } |
678 | | } |
679 | | } |
680 | | else { |
681 | | ctx.error = error_code::array_element_not_found; |
682 | | return; |
683 | | } |
684 | | } |
685 | | |
686 | | // After handling the array access, we're done for this token |
687 | | return; |
688 | | } |
689 | | else { |
690 | | // Object scenario with a key, like: key[0:5] |
691 | | if (match_invalid_end<'{', Opts>(ctx, it, end)) { |
692 | | return; |
693 | | } |
694 | | |
695 | | while (true) { |
696 | | if (skip_ws<Opts>(ctx, it, end)) { |
697 | | return; |
698 | | } |
699 | | if (match<'"'>(ctx, it)) { |
700 | | return; |
701 | | } |
702 | | |
703 | | auto* start = it; |
704 | | skip_string_view<Opts>(ctx, it, end); |
705 | | if (bool(ctx.error)) [[unlikely]] |
706 | | return; |
707 | | const sv k = {start, size_t(it - start)}; |
708 | | ++it; |
709 | | |
710 | | if (key.size() == k.size() && comparitor<key>(k.data())) { |
711 | | if (skip_ws<Opts>(ctx, it, end)) { |
712 | | return; |
713 | | } |
714 | | if (match_invalid_end<':', Opts>(ctx, it, end)) { |
715 | | return; |
716 | | } |
717 | | if (skip_ws<Opts>(ctx, it, end)) { |
718 | | return; |
719 | | } |
720 | | if (match_invalid_end<'[', Opts>(ctx, it, end)) { |
721 | | return; |
722 | | } |
723 | | |
724 | | // Distinguish single index vs slice using colon_count |
725 | | if constexpr (decomposed_key.colon_count > 0) { |
726 | | detail::handle_slice<Opts, decomposed_key>(value, ctx, it, end); |
727 | | } |
728 | | else { |
729 | | // SINGLE INDEX SCENARIO (colon_count == 0) |
730 | | if constexpr (decomposed_key.start.has_value()) { |
731 | | // Skip until we reach the target element |
732 | | constexpr auto n = decomposed_key.start.value(); |
733 | | for (int32_t i = 0; i < n; ++i) { |
734 | | skip_value<JSON>::op<Opts>(ctx, it, end); |
735 | | if (bool(ctx.error)) [[unlikely]] |
736 | | return; |
737 | | |
738 | | if (*it != ',') { |
739 | | ctx.error = error_code::array_element_not_found; |
740 | | return; |
741 | | } |
742 | | ++it; |
743 | | if (skip_ws<Opts>(ctx, it, end)) { |
744 | | return; |
745 | | } |
746 | | } |
747 | | |
748 | | if (skip_ws<Opts>(ctx, it, end)) { |
749 | | return; |
750 | | } |
751 | | |
752 | | if constexpr (I == (N - 1)) { |
753 | | parse<Opts.format>::template op<Opts>(value, ctx, it, end); |
754 | | } |
755 | | return; |
756 | | } |
757 | | else { |
758 | | ctx.error = error_code::array_element_not_found; |
759 | | return; |
760 | | } |
761 | | } |
762 | | } |
763 | | else { |
764 | | skip_value<JSON>::op<Opts>(ctx, it, end); |
765 | | if (bool(ctx.error)) [[unlikely]] { |
766 | | return; |
767 | | } |
768 | | if (*it != ',') { |
769 | | ctx.error = error_code::key_not_found; |
770 | | return; |
771 | | } |
772 | | ++it; |
773 | | } |
774 | | } |
775 | | } |
776 | | } |
777 | | else { |
778 | | // If it's not array access, we are dealing with an object key |
779 | | if (match_invalid_end<'{', Opts>(ctx, it, end)) { |
780 | | return; |
781 | | } |
782 | | |
783 | | while (it < end) { |
784 | | if (skip_ws<Opts>(ctx, it, end)) { |
785 | | return; |
786 | | } |
787 | | if (match<'"'>(ctx, it)) { |
788 | | return; |
789 | | } |
790 | | |
791 | | auto* start = it; |
792 | | skip_string_view<Opts>(ctx, it, end); |
793 | | if (bool(ctx.error)) [[unlikely]] |
794 | | return; |
795 | | const sv k = {start, size_t(it - start)}; |
796 | | ++it; |
797 | | |
798 | | if (key.size() == k.size() && comparitor<key>(k.data())) { |
799 | | if (skip_ws<Opts>(ctx, it, end)) { |
800 | | return; |
801 | | } |
802 | | if (match_invalid_end<':', Opts>(ctx, it, end)) { |
803 | | return; |
804 | | } |
805 | | if (skip_ws<Opts>(ctx, it, end)) { |
806 | | return; |
807 | | } |
808 | | |
809 | | if constexpr (I == (N - 1)) { |
810 | | parse<Opts.format>::template op<Opts>(value, ctx, it, end); |
811 | | } |
812 | | return; |
813 | | } |
814 | | else { |
815 | | skip_value<JSON>::op<Opts>(ctx, it, end); |
816 | | if (bool(ctx.error)) [[unlikely]] { |
817 | | return; |
818 | | } |
819 | | if (*it != ',') { |
820 | | ctx.error = error_code::key_not_found; |
821 | | return; |
822 | | } |
823 | | ++it; |
824 | | } |
825 | | } |
826 | | } |
827 | | }); |
828 | | } |
829 | | |
830 | | if constexpr (use_padded) { |
831 | | // Restore the original buffer state |
832 | | buffer.resize(buffer.size() - padding_bytes); |
833 | | } |
834 | | |
835 | | return {ctx.error, ctx.custom_error_message, size_t(it - start), ctx.includer_error}; |
836 | | } |
837 | | |
838 | | // A "compiled" jmespath expression, which can be pre-computed for efficient traversal |
839 | | struct jmespath_expression |
840 | | { |
841 | | std::string_view path{}; |
842 | | jmespath::tokenization_error error{}; |
843 | | std::vector<std::string_view> tokens{}; // evaluated tokens |
844 | | |
845 | | jmespath_expression(const std::string_view input_path) noexcept : path(input_path) |
846 | 0 | { |
847 | 0 | error = jmespath::tokenize_full_jmespath(path, tokens); |
848 | 0 | } |
849 | | |
850 | | template <size_t N> |
851 | | jmespath_expression(const char (&input_path)[N]) noexcept : path(input_path) |
852 | | { |
853 | | error = jmespath::tokenize_full_jmespath(path, tokens); |
854 | | } |
855 | | jmespath_expression(const jmespath_expression&) noexcept = default; |
856 | | jmespath_expression(jmespath_expression&&) noexcept = default; |
857 | | jmespath_expression& operator=(const jmespath_expression&) noexcept = default; |
858 | | jmespath_expression& operator=(jmespath_expression&&) noexcept = default; |
859 | | }; |
860 | | |
861 | | // Read into a C++ type given a path denoted by a JMESPath query |
862 | | // This version supports a runtime path |
863 | | template <auto Options = opts{}, class T, contiguous Buffer> |
864 | | requires(Options.format == JSON) |
865 | | [[nodiscard]] inline error_ctx read_jmespath(const jmespath_expression& expression, T&& value, Buffer&& buffer) |
866 | | { |
867 | | if (bool(expression.error)) { |
868 | | return {error_code::syntax_error, "JMESPath invalid expression"}; |
869 | | } |
870 | | |
871 | | const auto& tokens = expression.tokens; |
872 | | const auto N = tokens.size(); |
873 | | |
874 | | constexpr bool use_padded = resizable<Buffer> && non_const_buffer<Buffer> && !check_disable_padding(Options); |
875 | | static constexpr auto Opts = use_padded ? is_padded_on<Options>() : is_padded_off<Options>(); |
876 | | |
877 | | if constexpr (use_padded) { |
878 | | // Pad the buffer for SWAR |
879 | | buffer.resize(buffer.size() + padding_bytes); |
880 | | } |
881 | | auto p = read_iterators<Opts>(buffer); |
882 | | auto it = p.first; |
883 | | auto end = p.second; |
884 | | auto start = it; |
885 | | |
886 | | context ctx{}; |
887 | | |
888 | | if (N == 0) { |
889 | | parse<Opts.format>::template op<Opts>(value, ctx, it, end); |
890 | | } |
891 | | else { |
892 | | using namespace glz::detail; |
893 | | |
894 | | skip_ws<Opts>(ctx, it, end); |
895 | | |
896 | | for (size_t I = 0; I < N; ++I) { |
897 | | if (bool(ctx.error)) [[unlikely]] { |
898 | | break; |
899 | | } |
900 | | |
901 | | [&] { |
902 | | const auto decomposed_key = jmespath::parse_jmespath_token(tokens[I]); |
903 | | const auto& key = decomposed_key.key; |
904 | | |
905 | | if (decomposed_key.is_array_access) { |
906 | | if (key.empty()) { |
907 | | // Top-level array scenario |
908 | | if (skip_ws<Opts>(ctx, it, end)) { |
909 | | return; |
910 | | } |
911 | | if (match_invalid_end<'[', Opts>(ctx, it, end)) { |
912 | | return; |
913 | | } |
914 | | |
915 | | if (decomposed_key.colon_count > 0) { |
916 | | // Slice scenario |
917 | | detail::handle_slice(decomposed_key, value, ctx, it, end); |
918 | | return; |
919 | | } |
920 | | else { |
921 | | // Single index scenario |
922 | | if (decomposed_key.start.has_value()) { |
923 | | const int32_t n = decomposed_key.start.value(); |
924 | | |
925 | | if (I == (N - 1)) { |
926 | | // Skip until we reach the target element n |
927 | | for (int32_t i = 0; i < n; ++i) { |
928 | | skip_value<JSON>::op<Opts>(ctx, it, end); |
929 | | if (bool(ctx.error)) [[unlikely]] |
930 | | return; |
931 | | |
932 | | if (*it != ',') { |
933 | | ctx.error = error_code::array_element_not_found; |
934 | | return; |
935 | | } |
936 | | ++it; |
937 | | if (skip_ws<Opts>(ctx, it, end)) { |
938 | | return; |
939 | | } |
940 | | } |
941 | | |
942 | | // Now read the element at index n |
943 | | parse<Opts.format>::template op<Opts>(value, ctx, it, end); |
944 | | } |
945 | | else { |
946 | | // Not the last token. We must still parse the element at index n so the next indexing can |
947 | | // proceed. |
948 | | for (int32_t i = 0; i < n; ++i) { |
949 | | skip_value<JSON>::op<Opts>(ctx, it, end); |
950 | | if (bool(ctx.error)) [[unlikely]] |
951 | | return; |
952 | | |
953 | | if (*it != ',') { |
954 | | ctx.error = error_code::array_element_not_found; |
955 | | return; |
956 | | } |
957 | | ++it; |
958 | | if (skip_ws<Opts>(ctx, it, end)) { |
959 | | return; |
960 | | } |
961 | | } |
962 | | } |
963 | | } |
964 | | else { |
965 | | ctx.error = error_code::array_element_not_found; |
966 | | return; |
967 | | } |
968 | | return; |
969 | | } |
970 | | } |
971 | | else { |
972 | | // Object scenario: key[...] |
973 | | if (match_invalid_end<'{', Opts>(ctx, it, end)) { |
974 | | return; |
975 | | } |
976 | | |
977 | | while (true) { |
978 | | if (skip_ws<Opts>(ctx, it, end)) { |
979 | | return; |
980 | | } |
981 | | if (match<'"'>(ctx, it)) { |
982 | | return; |
983 | | } |
984 | | |
985 | | auto* start_pos = it; |
986 | | skip_string_view<Opts>(ctx, it, end); |
987 | | if (bool(ctx.error)) [[unlikely]] |
988 | | return; |
989 | | const sv k = {start_pos, size_t(it - start_pos)}; |
990 | | ++it; |
991 | | |
992 | | if (key.size() == k.size() && memcmp(key.data(), k.data(), key.size()) == 0) { |
993 | | if (skip_ws<Opts>(ctx, it, end)) { |
994 | | return; |
995 | | } |
996 | | if (match_invalid_end<':', Opts>(ctx, it, end)) { |
997 | | return; |
998 | | } |
999 | | if (skip_ws<Opts>(ctx, it, end)) { |
1000 | | return; |
1001 | | } |
1002 | | if (match_invalid_end<'[', Opts>(ctx, it, end)) { |
1003 | | return; |
1004 | | } |
1005 | | |
1006 | | if (decomposed_key.colon_count > 0) { |
1007 | | // Slice scenario |
1008 | | detail::handle_slice(decomposed_key, value, ctx, it, end); |
1009 | | return; |
1010 | | } |
1011 | | else { |
1012 | | // Single index scenario |
1013 | | if (decomposed_key.start.has_value()) { |
1014 | | int32_t n = decomposed_key.start.value(); |
1015 | | for (int32_t i = 0; i < n; ++i) { |
1016 | | skip_value<JSON>::op<Opts>(ctx, it, end); |
1017 | | if (bool(ctx.error)) [[unlikely]] |
1018 | | return; |
1019 | | |
1020 | | if (*it != ',') { |
1021 | | ctx.error = error_code::array_element_not_found; |
1022 | | return; |
1023 | | } |
1024 | | ++it; |
1025 | | if (skip_ws<Opts>(ctx, it, end)) { |
1026 | | return; |
1027 | | } |
1028 | | } |
1029 | | |
1030 | | if (skip_ws<Opts>(ctx, it, end)) { |
1031 | | return; |
1032 | | } |
1033 | | |
1034 | | if (I == (N - 1)) { |
1035 | | parse<Opts.format>::template op<Opts>(value, ctx, it, end); |
1036 | | } |
1037 | | return; |
1038 | | } |
1039 | | else { |
1040 | | ctx.error = error_code::array_element_not_found; |
1041 | | return; |
1042 | | } |
1043 | | } |
1044 | | } |
1045 | | else { |
1046 | | skip_value<JSON>::op<Opts>(ctx, it, end); |
1047 | | if (bool(ctx.error)) [[unlikely]] { |
1048 | | return; |
1049 | | } |
1050 | | if (*it != ',') { |
1051 | | ctx.error = error_code::key_not_found; |
1052 | | return; |
1053 | | } |
1054 | | ++it; |
1055 | | } |
1056 | | } |
1057 | | } |
1058 | | } |
1059 | | else { |
1060 | | // Non-array access: key-only navigation |
1061 | | if (match_invalid_end<'{', Opts>(ctx, it, end)) { |
1062 | | return; |
1063 | | } |
1064 | | |
1065 | | while (it < end) { |
1066 | | if (skip_ws<Opts>(ctx, it, end)) { |
1067 | | return; |
1068 | | } |
1069 | | if (match<'"'>(ctx, it)) { |
1070 | | return; |
1071 | | } |
1072 | | |
1073 | | auto* start_pos = it; |
1074 | | skip_string_view<Opts>(ctx, it, end); |
1075 | | if (bool(ctx.error)) [[unlikely]] |
1076 | | return; |
1077 | | const sv k = {start_pos, size_t(it - start_pos)}; |
1078 | | ++it; |
1079 | | |
1080 | | if (key.size() == k.size() && memcmp(key.data(), k.data(), key.size()) == 0) { |
1081 | | if (skip_ws<Opts>(ctx, it, end)) { |
1082 | | return; |
1083 | | } |
1084 | | if (match_invalid_end<':', Opts>(ctx, it, end)) { |
1085 | | return; |
1086 | | } |
1087 | | if (skip_ws<Opts>(ctx, it, end)) { |
1088 | | return; |
1089 | | } |
1090 | | |
1091 | | if (I == (N - 1)) { |
1092 | | parse<Opts.format>::template op<Opts>(value, ctx, it, end); |
1093 | | } |
1094 | | return; |
1095 | | } |
1096 | | else { |
1097 | | skip_value<JSON>::op<Opts>(ctx, it, end); |
1098 | | if (bool(ctx.error)) [[unlikely]] { |
1099 | | return; |
1100 | | } |
1101 | | if (*it != ',') { |
1102 | | ctx.error = error_code::key_not_found; |
1103 | | return; |
1104 | | } |
1105 | | ++it; |
1106 | | } |
1107 | | } |
1108 | | } |
1109 | | }(); |
1110 | | } |
1111 | | } |
1112 | | |
1113 | | if constexpr (use_padded) { |
1114 | | // Restore the original buffer state |
1115 | | buffer.resize(buffer.size() - padding_bytes); |
1116 | | } |
1117 | | |
1118 | | return {ctx.error, ctx.custom_error_message, size_t(it - start), ctx.includer_error}; |
1119 | | } |
1120 | | |
1121 | | } |