Coverage Report

Created: 2025-07-11 06:48

/src/glaze/include/glaze/json/jmespath.hpp
Line
Count
Source (jump to first uncovered line)
1
// Glaze Library
2
// For the license information refer to glaze.hpp
3
4
#pragma once
5
6
#include <algorithm>
7
#include <charconv>
8
#include <optional>
9
10
#include "glaze/core/seek.hpp"
11
#include "glaze/json/read.hpp"
12
#include "glaze/json/skip.hpp"
13
#include "glaze/util/parse.hpp"
14
#include "glaze/util/string_literal.hpp"
15
16
namespace glz
17
{
18
   namespace jmespath
19
   {
20
      enum struct tokenization_error {
21
         none, // No error
22
         unbalanced_brackets, // Mismatched '[' and ']'
23
         unbalanced_parentheses, // Mismatched '(' and ')'
24
         unclosed_string, // String literal not properly closed
25
         invalid_escape_sequence, // Invalid escape sequence in string
26
         unexpected_delimiter, // Unexpected character encountered (e.g., consecutive delimiters)
27
      };
28
29
      struct tokenization_result
30
      {
31
         std::string_view first;
32
         std::string_view second;
33
         tokenization_error error;
34
      };
35
36
      /**
37
       * @brief Trims leading whitespace characters from a string_view.
38
       *
39
       * @param s The input string_view to trim.
40
       * @return A string_view with leading whitespace removed.
41
       */
42
      inline constexpr std::string_view trim_left(std::string_view s)
43
0
      {
44
0
         size_t start = 0;
45
0
         while (start < s.size() && (s[start] == ' ' || s[start] == '\t' || s[start] == '\n' || s[start] == '\r')) {
46
0
            start++;
47
0
         }
48
0
         return s.substr(start);
49
0
      }
50
51
      /**
52
       * @brief Splits a JMESPath expression into the first token and the remaining path with error handling.
53
       *
54
       * @param s The JMESPath expression to tokenize.
55
       * @return tokenization_result containing:
56
       *         - first: The first token of the expression.
57
       *         - second: The remaining expression after the first token.
58
       *         - error: tokenization_error indicating if an error occurred.
59
       */
60
      inline constexpr tokenization_result tokenize_jmes_path(std::string_view s)
61
0
      {
62
0
         if (s.empty()) {
63
0
            return {"", "", tokenization_error::none};
64
0
         }
65
0
66
0
         size_t pos = 0;
67
0
         size_t len = s.size();
68
0
         int bracket_level = 0;
69
0
         int parenthesis_level = 0;
70
0
         bool in_string = false;
71
0
         char string_delim = '\0';
72
0
73
0
         while (pos < len) {
74
0
            char current = s[pos];
75
0
76
0
            if (in_string) {
77
0
               if (current == string_delim) {
78
0
                  // Check for escaped delimiter
79
0
                  size_t backslashes = 0;
80
0
                  size_t temp = pos;
81
0
                  while (temp > 0 && s[--temp] == '\\') {
82
0
                     backslashes++;
83
0
                  }
84
0
                  if (backslashes % 2 == 0) {
85
0
                     in_string = false;
86
0
                  }
87
0
               }
88
0
               else if (current == '\\') {
89
0
                  // Validate escape sequence
90
0
                  if (pos + 1 >= len) {
91
0
                     return {"", "", tokenization_error::invalid_escape_sequence};
92
0
                  }
93
0
                  char next_char = s[pos + 1];
94
0
                  // Simple validation: allow known escape characters
95
0
                  if (next_char != '"' && next_char != '\'' && next_char != '\\' && next_char != '/' &&
96
0
                      next_char != 'b' && next_char != 'f' && next_char != 'n' && next_char != 'r' &&
97
0
                      next_char != 't' && next_char != 'u') {
98
0
                     return {"", "", tokenization_error::invalid_escape_sequence};
99
0
                  }
100
0
               }
101
0
               pos++;
102
0
               continue;
103
0
            }
104
0
105
0
            switch (current) {
106
0
            case '"':
107
0
            case '\'':
108
0
               in_string = true;
109
0
               string_delim = current;
110
0
               pos++;
111
0
               break;
112
0
            case '[':
113
0
               bracket_level++;
114
0
               pos++;
115
0
               break;
116
0
            case ']':
117
0
               if (bracket_level > 0) {
118
0
                  bracket_level--;
119
0
               }
120
0
               else {
121
0
                  return {"", "", tokenization_error::unbalanced_brackets};
122
0
               }
123
0
               pos++;
124
0
               break;
125
0
            case '(':
126
0
               parenthesis_level++;
127
0
               pos++;
128
0
               break;
129
0
            case ')':
130
0
               if (parenthesis_level > 0) {
131
0
                  parenthesis_level--;
132
0
               }
133
0
               else {
134
0
                  return {"", "", tokenization_error::unbalanced_parentheses};
135
0
               }
136
0
               pos++;
137
0
               break;
138
0
            case '.':
139
0
            case '|':
140
0
               if (bracket_level == 0 && parenthesis_level == 0) {
141
0
                  // Split here
142
0
                  return {s.substr(0, pos), s.substr(pos, len - pos), tokenization_error::none};
143
0
               }
144
0
               pos++;
145
0
               break;
146
0
            default:
147
0
               pos++;
148
0
               break;
149
0
            }
150
0
         }
151
0
152
0
         if (in_string) {
153
0
            return {"", "", tokenization_error::unclosed_string};
154
0
         }
155
0
156
0
         if (bracket_level != 0) {
157
0
            return {"", "", tokenization_error::unbalanced_brackets};
158
0
         }
159
0
160
0
         if (parenthesis_level != 0) {
161
0
            return {"", "", tokenization_error::unbalanced_parentheses};
162
0
         }
163
0
164
0
         // If no delimiter found, return the whole string as first token
165
0
         return {s, "", tokenization_error::none};
166
0
      }
167
168
      inline constexpr tokenization_error finalize_tokens(std::vector<std::string_view>& tokens)
169
0
      {
170
0
         std::vector<std::string_view> final_tokens;
171
0
         final_tokens.reserve(tokens.size()); // at least
172
0
173
0
         for (auto token : tokens) {
174
0
            size_t start = 0;
175
0
            while (start < token.size()) {
176
0
               // Find the next '['
177
0
               auto open = token.find('[', start);
178
0
               if (open == std::string_view::npos) {
179
0
                  // No more bracketed segments
180
0
                  if (start < token.size()) {
181
0
                     // Add remaining part (if not empty)
182
0
                     final_tokens.push_back(token.substr(start));
183
0
                  }
184
0
                  break; // move to next token
185
0
               }
186
0
               else {
187
0
                  // If there's a key part before the bracket, add it
188
0
                  if (open > start) {
189
0
                     final_tokens.push_back(token.substr(start, open - start));
190
0
                  }
191
0
                  // Now find the closing bracket ']'
192
0
                  auto close = token.find(']', open + 1);
193
0
                  if (close == std::string_view::npos) {
194
0
                     // Mismatched bracket
195
0
                     return tokenization_error::unbalanced_brackets;
196
0
                  }
197
0
                  // Extract the bracketed token: e.g. [0]
198
0
                  final_tokens.push_back(token.substr(open, close - open + 1));
199
0
                  start = close + 1; // continue after the ']'
200
0
               }
201
0
            }
202
0
         }
203
0
204
0
         tokens = std::move(final_tokens);
205
0
         return tokenization_error::none;
206
0
      }
207
208
      /**
209
       * @brief Recursively tokenizes a full JMESPath expression into all its tokens with error handling.
210
       *
211
       * @param expression The complete JMESPath expression to tokenize.
212
       * @param tokens A vector to store all tokens in the order they appear.
213
       * @param error An output parameter to capture any tokenization errors.
214
       * @return true if tokenization succeeded without errors, false otherwise.
215
       */
216
      inline constexpr jmespath::tokenization_error tokenize_full_jmespath(std::string_view expression,
217
                                                                           std::vector<std::string_view>& tokens)
218
0
      {
219
0
         tokens.clear();
220
0
         auto remaining = expression;
221
0
222
0
         while (!remaining.empty()) {
223
0
            tokenization_result result = tokenize_jmes_path(remaining);
224
0
            if (result.error != tokenization_error::none) {
225
0
               return result.error;
226
0
            }
227
0
228
0
            if (result.first.empty()) {
229
0
               return tokenization_error::unexpected_delimiter;
230
0
            }
231
0
232
0
            tokens.emplace_back(result.first);
233
0
234
0
            if (!result.second.empty()) {
235
0
               char delimiter = result.second.front();
236
0
               if (delimiter == '.' || delimiter == '|') {
237
0
                  remaining = result.second.substr(1);
238
0
                  remaining = trim_left(remaining);
239
0
                  if (!remaining.empty() && (remaining.front() == '.' || remaining.front() == '|')) {
240
0
                     return tokenization_error::unexpected_delimiter;
241
0
                  }
242
0
               }
243
0
               else {
244
0
                  return tokenization_error::unexpected_delimiter;
245
0
               }
246
0
            }
247
0
            else {
248
0
               break;
249
0
            }
250
0
         }
251
0
252
0
         // New step: finalize the tokens by splitting multiple bracket accesses
253
0
         auto err = finalize_tokens(tokens);
254
0
         if (err != jmespath::tokenization_error::none) {
255
0
            return err;
256
0
         }
257
0
258
0
         return tokenization_error::none;
259
0
      }
260
261
      template <const std::string_view& expression>
262
      consteval auto tokenize_as_array()
263
      {
264
         constexpr auto N = [] {
265
            std::vector<std::string_view> tokens;
266
            auto err = tokenize_full_jmespath(expression, tokens);
267
            if (err != tokenization_error::none) {
268
               std::abort();
269
            }
270
            return tokens.size();
271
         }();
272
273
         std::vector<std::string_view> tokens;
274
         auto err = tokenize_full_jmespath(expression, tokens);
275
         if (err != tokenization_error::none) {
276
            std::abort();
277
         }
278
279
         std::array<std::string_view, N> arr{};
280
         for (std::size_t i = 0; i < N; ++i) {
281
            arr[i] = tokens[i];
282
         }
283
         return arr; // Vector destroyed here, leaving only the array.
284
      }
285
286
      struct ArrayParseResult
287
      {
288
         bool is_array_access = false; // True if "key[...]"
289
         bool error = false; // True if parsing encountered an error
290
         std::string_view key; // The part before the first '['
291
         std::optional<int32_t> start; // For a single index or slice start
292
         std::optional<int32_t> end; // For slice end
293
         std::optional<int32_t> step; // For slice step
294
         size_t colon_count = 0; // Number of ':' characters found inside the brackets
295
      };
296
297
      inline constexpr std::optional<int> parse_int(std::string_view s)
298
0
      {
299
0
         if (s.empty()) {
300
0
            return std::nullopt;
301
0
         }
302
0
         int value;
303
0
         auto result = detail::from_chars(s.data(), s.data() + s.size(), value);
304
0
         if (result.ec == std::errc()) {
305
0
            return value;
306
0
         }
307
0
         return std::nullopt;
308
0
      }
309
310
      // Parse a token that may have array indexing or slicing.
311
      inline constexpr ArrayParseResult parse_jmespath_token(std::string_view token)
312
0
      {
313
0
         ArrayParseResult result;
314
0
315
0
         // Find the first '['
316
0
         auto open_pos = token.find('[');
317
0
         if (open_pos == std::string_view::npos) {
318
0
            // No array access, just a key.
319
0
            result.key = token;
320
0
            return result;
321
0
         }
322
0
323
0
         auto close_pos = token.rfind(']');
324
0
         if (close_pos == std::string_view::npos || close_pos < open_pos) {
325
0
            // Mismatched brackets -> error
326
0
            result.key = token.substr(0, open_pos);
327
0
            result.is_array_access = true;
328
0
            result.error = true;
329
0
            return result;
330
0
         }
331
0
332
0
         result.is_array_access = true;
333
0
         result.key = token.substr(0, open_pos);
334
0
         auto inside = token.substr(open_pos + 1, close_pos - (open_pos + 1));
335
0
         if (inside.empty()) {
336
0
            // Empty inside "[]" is invalid
337
0
            result.error = true;
338
0
            return result;
339
0
         }
340
0
341
0
         // Count colons to determine if it's a slice
342
0
         size_t colon_count = 0;
343
0
         for (char c : inside) {
344
0
            if (c == ':') {
345
0
               colon_count++;
346
0
            }
347
0
         }
348
0
         result.colon_count = colon_count;
349
0
350
0
         // Helper lambda to parse slice parts
351
0
         auto parse_slice = [&](std::string_view inside) {
352
0
            std::string_view parts[3];
353
0
            {
354
0
               size_t start_idx = 0;
355
0
               int idx = 0;
356
0
               for (size_t i = 0; i <= inside.size(); ++i) {
357
0
                  if (i == inside.size() || inside[i] == ':') {
358
0
                     if (idx < 3) {
359
0
                        parts[idx] = inside.substr(start_idx, i - start_idx);
360
0
                        idx++;
361
0
                     }
362
0
                     start_idx = i + 1;
363
0
                  }
364
0
               }
365
0
            }
366
0
367
0
            // Parse start
368
0
            if (!parts[0].empty()) {
369
0
               auto val = parse_int(parts[0]);
370
0
               if (!val.has_value()) {
371
0
                  result.error = true;
372
0
               }
373
0
               else {
374
0
                  result.start = val;
375
0
               }
376
0
            }
377
0
378
0
            // Parse end
379
0
            if (!parts[1].empty()) {
380
0
               auto val = parse_int(parts[1]);
381
0
               if (!val.has_value()) {
382
0
                  result.error = true;
383
0
               }
384
0
               else {
385
0
                  result.end = val;
386
0
               }
387
0
            }
388
0
389
0
            // Parse step
390
0
            if (colon_count == 2 && !parts[2].empty()) {
391
0
               auto val = parse_int(parts[2]);
392
0
               if (!val.has_value()) {
393
0
                  result.error = true;
394
0
               }
395
0
               else {
396
0
                  result.step = val;
397
0
               }
398
0
            }
399
0
         };
400
0
401
0
         if (colon_count == 0) {
402
0
            // single index
403
0
            auto val = parse_int(inside);
404
0
            if (!val.has_value()) {
405
0
               result.error = true;
406
0
            }
407
0
            else {
408
0
               result.start = val;
409
0
            }
410
0
         }
411
0
         else if (colon_count == 1 || colon_count == 2) {
412
0
            // slice
413
0
            parse_slice(inside);
414
0
         }
415
0
         else {
416
0
            // More than 2 colons is invalid
417
0
            result.error = true;
418
0
         }
419
0
420
0
         return result;
421
0
      }
422
   }
423
424
   namespace detail
425
   {
426
      template <auto Opts = opts{}, class T>
427
         requires(Opts.format == JSON && not readable_array_t<T>)
428
      inline void handle_slice(const jmespath::ArrayParseResult&, T&&, context& ctx, auto&&, auto&&)
429
      {
430
         ctx.error = error_code::syntax_error;
431
      }
432
433
      template <auto Opts = opts{}, class T>
434
         requires(Opts.format == JSON && readable_array_t<T>)
435
      inline void handle_slice(const jmespath::ArrayParseResult& decomposed_key, T&& value, context& ctx, auto&& it,
436
                               auto&& end)
437
      {
438
         if (skip_ws<Opts>(ctx, it, end)) {
439
            return;
440
         }
441
442
         // Determine slice parameters
443
         int32_t step_idx = decomposed_key.step.value_or(1);
444
         bool has_negative_index = (decomposed_key.start.value_or(0) < 0) || (decomposed_key.end.value_or(0) < 0);
445
446
         // If we have negative indices or step != 1, fall back to the original method (read all then slice)
447
         if (step_idx != 1 || has_negative_index) {
448
            // Original fallback behavior:
449
            // Read entire array into value first
450
            value.clear();
451
            if (*it == ']') {
452
               // empty array
453
               ++it; // consume ']'
454
            }
455
            else {
456
               while (true) {
457
                  parse<Opts.format>::template op<Opts>(value.emplace_back(), ctx, it, end);
458
                  if (bool(ctx.error)) [[unlikely]]
459
                     return;
460
461
                  if (skip_ws<Opts>(ctx, it, end)) {
462
                     return;
463
                  }
464
                  if (*it == ']') {
465
                     ++it;
466
                     break;
467
                  }
468
                  if (*it != ',') {
469
                     ctx.error = error_code::parse_error;
470
                     return;
471
                  }
472
                  ++it;
473
                  if (skip_ws<Opts>(ctx, it, end)) {
474
                     return;
475
                  }
476
               }
477
            }
478
479
            // Now do the slicing
480
            const int32_t size = static_cast<int32_t>(value.size());
481
            auto wrap_index = [&](int32_t idx) {
482
               if (idx < 0) idx += size;
483
               return std::clamp(idx, int32_t{0}, size);
484
            };
485
486
            const int32_t start_idx = wrap_index(decomposed_key.start.value_or(0));
487
            const int32_t end_idx = wrap_index(decomposed_key.end.value_or(size));
488
489
            if (step_idx == 1) {
490
               if (start_idx < end_idx) {
491
                  if (start_idx > 0) {
492
                     value.erase(value.begin(), value.begin() + start_idx);
493
                  }
494
                  if (static_cast<size_t>(end_idx - start_idx) < value.size()) {
495
                     value.erase(value.begin() + (end_idx - start_idx), value.end());
496
                  }
497
               }
498
               else {
499
                  value.clear();
500
               }
501
            }
502
            else {
503
               // For steps != 1 (or negative steps), the fallback path was already chosen.
504
               // Just apply the same logic as before.
505
               std::size_t dest = 0;
506
               if (step_idx > 0) {
507
                  for (int32_t i = start_idx; i < end_idx; i += step_idx) {
508
                     value[dest++] = std::move(value[i]);
509
                  }
510
               }
511
               else {
512
                  for (int32_t i = start_idx; i > end_idx; i += step_idx) {
513
                     value[dest++] = std::move(value[i]);
514
                  }
515
               }
516
               value.resize(dest);
517
            }
518
519
            return;
520
         }
521
522
         // If we reach here, step == 1 and no negative indices, so we can do partial reading.
523
         value.clear();
524
         const int32_t start_idx = decomposed_key.start.value_or(0);
525
         const int32_t end_idx = decomposed_key.end.value_or((std::numeric_limits<int32_t>::max)());
526
527
         // If empty array
528
         if (*it == ']') {
529
            ++it; // consume ']'
530
            return;
531
         }
532
533
         // We'll read elements and track their index
534
         int32_t current_index = 0;
535
         while (true) {
536
            if (skip_ws<Opts>(ctx, it, end)) {
537
               return;
538
            }
539
540
            // Decide whether we read or skip this element
541
            if (current_index < start_idx) {
542
               // Skip this element
543
               skip_value<JSON>::op<Opts>(ctx, it, end);
544
               if (bool(ctx.error)) [[unlikely]]
545
                  return;
546
            }
547
            else if (current_index >= start_idx && current_index < end_idx) {
548
               // Read this element into value
549
               parse<Opts.format>::template op<Opts>(value.emplace_back(), ctx, it, end);
550
               if (bool(ctx.error)) [[unlikely]]
551
                  return;
552
            }
553
            else {
554
               // current_index >= end_idx, we can skip reading into value
555
               skip_value<JSON>::op<Opts>(ctx, it, end);
556
               if (bool(ctx.error)) [[unlikely]]
557
                  return;
558
            }
559
560
            if (skip_ws<Opts>(ctx, it, end)) {
561
               return;
562
            }
563
            if (*it == ']') {
564
               ++it; // finished reading array
565
               break;
566
            }
567
            if (*it != ',') {
568
               ctx.error = error_code::parse_error;
569
               return;
570
            }
571
            ++it; // consume ','
572
            if (skip_ws<Opts>(ctx, it, end)) {
573
               return;
574
            }
575
576
            ++current_index;
577
         }
578
      }
579
   }
580
581
   // Read into a C++ type given a path denoted by a JMESPath query
582
   template <string_literal Path, auto Options = opts{}, class T, contiguous Buffer>
583
      requires(Options.format == JSON)
584
   [[nodiscard]] inline error_ctx read_jmespath(T&& value, Buffer&& buffer)
585
   {
586
      static constexpr auto S = chars<Path>;
587
      static constexpr auto tokens = jmespath::tokenize_as_array<S>();
588
      static constexpr auto N = tokens.size();
589
590
      constexpr bool use_padded = resizable<Buffer> && non_const_buffer<Buffer> && !check_disable_padding(Options);
591
592
      static constexpr auto Opts = use_padded ? is_padded_on<Options>() : is_padded_off<Options>();
593
594
      if constexpr (use_padded) {
595
         // Pad the buffer for SWAR
596
         buffer.resize(buffer.size() + padding_bytes);
597
      }
598
      auto p = read_iterators<Opts>(buffer);
599
      auto it = p.first;
600
      auto end = p.second;
601
      auto start = it;
602
603
      context ctx{};
604
605
      if constexpr (N == 0) {
606
         parse<Opts.format>::template op<Opts>(value, ctx, it, end);
607
      }
608
      else {
609
         using namespace glz::detail;
610
611
         skip_ws<Opts>(ctx, it, end);
612
613
         for_each<N>([&]<auto I>() {
614
            if (bool(ctx.error)) [[unlikely]] {
615
               return;
616
            }
617
618
            static constexpr auto decomposed_key = jmespath::parse_jmespath_token(tokens[I]);
619
            static constexpr auto key = decomposed_key.key;
620
621
            if constexpr (decomposed_key.is_array_access) {
622
               // If we have a key, that means we're looking into an object like: key[0:5]
623
               if constexpr (key.empty()) {
624
                  if (skip_ws<Opts>(ctx, it, end)) {
625
                     return;
626
                  }
627
                  // We expect the JSON at this level to be an array
628
                  if (match_invalid_end<'[', Opts>(ctx, it, end)) {
629
                     return;
630
                  }
631
632
                  // If this is a slice (colon_count > 0)
633
                  if constexpr (decomposed_key.colon_count > 0) {
634
                     detail::handle_slice<Opts>(decomposed_key, value, ctx, it, end);
635
                  }
636
                  else {
637
                     // SINGLE INDEX SCENARIO (no slice, just an index)
638
                     if constexpr (decomposed_key.start.has_value()) {
639
                        constexpr auto n = decomposed_key.start.value();
640
641
                        if constexpr (I == (N - 1)) {
642
                           // Skip until we reach the target element n
643
                           for (int32_t i = 0; i < n; ++i) {
644
                              skip_value<JSON>::op<Opts>(ctx, it, end);
645
                              if (bool(ctx.error)) [[unlikely]]
646
                                 return;
647
648
                              if (*it != ',') {
649
                                 ctx.error = error_code::array_element_not_found;
650
                                 return;
651
                              }
652
                              ++it;
653
                              if (skip_ws<Opts>(ctx, it, end)) {
654
                                 return;
655
                              }
656
                           }
657
658
                           // Now read the element at index n
659
                           parse<Opts.format>::template op<Opts>(value, ctx, it, end);
660
                        }
661
                        else {
662
                           // Not the last token. We must still parse the element at index n so the next indexing can
663
                           // proceed.
664
                           for (int32_t i = 0; i < n; ++i) {
665
                              skip_value<JSON>::op<Opts>(ctx, it, end);
666
                              if (bool(ctx.error)) [[unlikely]]
667
                                 return;
668
669
                              if (*it != ',') {
670
                                 ctx.error = error_code::array_element_not_found;
671
                                 return;
672
                              }
673
                              ++it;
674
                              if (skip_ws<Opts>(ctx, it, end)) {
675
                                 return;
676
                              }
677
                           }
678
                        }
679
                     }
680
                     else {
681
                        ctx.error = error_code::array_element_not_found;
682
                        return;
683
                     }
684
                  }
685
686
                  // After handling the array access, we're done for this token
687
                  return;
688
               }
689
               else {
690
                  // Object scenario with a key, like: key[0:5]
691
                  if (match_invalid_end<'{', Opts>(ctx, it, end)) {
692
                     return;
693
                  }
694
695
                  while (true) {
696
                     if (skip_ws<Opts>(ctx, it, end)) {
697
                        return;
698
                     }
699
                     if (match<'"'>(ctx, it)) {
700
                        return;
701
                     }
702
703
                     auto* start = it;
704
                     skip_string_view<Opts>(ctx, it, end);
705
                     if (bool(ctx.error)) [[unlikely]]
706
                        return;
707
                     const sv k = {start, size_t(it - start)};
708
                     ++it;
709
710
                     if (key.size() == k.size() && comparitor<key>(k.data())) {
711
                        if (skip_ws<Opts>(ctx, it, end)) {
712
                           return;
713
                        }
714
                        if (match_invalid_end<':', Opts>(ctx, it, end)) {
715
                           return;
716
                        }
717
                        if (skip_ws<Opts>(ctx, it, end)) {
718
                           return;
719
                        }
720
                        if (match_invalid_end<'[', Opts>(ctx, it, end)) {
721
                           return;
722
                        }
723
724
                        // Distinguish single index vs slice using colon_count
725
                        if constexpr (decomposed_key.colon_count > 0) {
726
                           detail::handle_slice<Opts, decomposed_key>(value, ctx, it, end);
727
                        }
728
                        else {
729
                           // SINGLE INDEX SCENARIO (colon_count == 0)
730
                           if constexpr (decomposed_key.start.has_value()) {
731
                              // Skip until we reach the target element
732
                              constexpr auto n = decomposed_key.start.value();
733
                              for (int32_t i = 0; i < n; ++i) {
734
                                 skip_value<JSON>::op<Opts>(ctx, it, end);
735
                                 if (bool(ctx.error)) [[unlikely]]
736
                                    return;
737
738
                                 if (*it != ',') {
739
                                    ctx.error = error_code::array_element_not_found;
740
                                    return;
741
                                 }
742
                                 ++it;
743
                                 if (skip_ws<Opts>(ctx, it, end)) {
744
                                    return;
745
                                 }
746
                              }
747
748
                              if (skip_ws<Opts>(ctx, it, end)) {
749
                                 return;
750
                              }
751
752
                              if constexpr (I == (N - 1)) {
753
                                 parse<Opts.format>::template op<Opts>(value, ctx, it, end);
754
                              }
755
                              return;
756
                           }
757
                           else {
758
                              ctx.error = error_code::array_element_not_found;
759
                              return;
760
                           }
761
                        }
762
                     }
763
                     else {
764
                        skip_value<JSON>::op<Opts>(ctx, it, end);
765
                        if (bool(ctx.error)) [[unlikely]] {
766
                           return;
767
                        }
768
                        if (*it != ',') {
769
                           ctx.error = error_code::key_not_found;
770
                           return;
771
                        }
772
                        ++it;
773
                     }
774
                  }
775
               }
776
            }
777
            else {
778
               // If it's not array access, we are dealing with an object key
779
               if (match_invalid_end<'{', Opts>(ctx, it, end)) {
780
                  return;
781
               }
782
783
               while (it < end) {
784
                  if (skip_ws<Opts>(ctx, it, end)) {
785
                     return;
786
                  }
787
                  if (match<'"'>(ctx, it)) {
788
                     return;
789
                  }
790
791
                  auto* start = it;
792
                  skip_string_view<Opts>(ctx, it, end);
793
                  if (bool(ctx.error)) [[unlikely]]
794
                     return;
795
                  const sv k = {start, size_t(it - start)};
796
                  ++it;
797
798
                  if (key.size() == k.size() && comparitor<key>(k.data())) {
799
                     if (skip_ws<Opts>(ctx, it, end)) {
800
                        return;
801
                     }
802
                     if (match_invalid_end<':', Opts>(ctx, it, end)) {
803
                        return;
804
                     }
805
                     if (skip_ws<Opts>(ctx, it, end)) {
806
                        return;
807
                     }
808
809
                     if constexpr (I == (N - 1)) {
810
                        parse<Opts.format>::template op<Opts>(value, ctx, it, end);
811
                     }
812
                     return;
813
                  }
814
                  else {
815
                     skip_value<JSON>::op<Opts>(ctx, it, end);
816
                     if (bool(ctx.error)) [[unlikely]] {
817
                        return;
818
                     }
819
                     if (*it != ',') {
820
                        ctx.error = error_code::key_not_found;
821
                        return;
822
                     }
823
                     ++it;
824
                  }
825
               }
826
            }
827
         });
828
      }
829
830
      if constexpr (use_padded) {
831
         // Restore the original buffer state
832
         buffer.resize(buffer.size() - padding_bytes);
833
      }
834
835
      return {ctx.error, ctx.custom_error_message, size_t(it - start), ctx.includer_error};
836
   }
837
838
   // A "compiled" jmespath expression, which can be pre-computed for efficient traversal
839
   struct jmespath_expression
840
   {
841
      std::string_view path{};
842
      jmespath::tokenization_error error{};
843
      std::vector<std::string_view> tokens{}; // evaluated tokens
844
845
      jmespath_expression(const std::string_view input_path) noexcept : path(input_path)
846
0
      {
847
0
         error = jmespath::tokenize_full_jmespath(path, tokens);
848
0
      }
849
850
      template <size_t N>
851
      jmespath_expression(const char (&input_path)[N]) noexcept : path(input_path)
852
      {
853
         error = jmespath::tokenize_full_jmespath(path, tokens);
854
      }
855
      jmespath_expression(const jmespath_expression&) noexcept = default;
856
      jmespath_expression(jmespath_expression&&) noexcept = default;
857
      jmespath_expression& operator=(const jmespath_expression&) noexcept = default;
858
      jmespath_expression& operator=(jmespath_expression&&) noexcept = default;
859
   };
860
861
   // Read into a C++ type given a path denoted by a JMESPath query
862
   // This version supports a runtime path
863
   template <auto Options = opts{}, class T, contiguous Buffer>
864
      requires(Options.format == JSON)
865
   [[nodiscard]] inline error_ctx read_jmespath(const jmespath_expression& expression, T&& value, Buffer&& buffer)
866
   {
867
      if (bool(expression.error)) {
868
         return {error_code::syntax_error, "JMESPath invalid expression"};
869
      }
870
871
      const auto& tokens = expression.tokens;
872
      const auto N = tokens.size();
873
874
      constexpr bool use_padded = resizable<Buffer> && non_const_buffer<Buffer> && !check_disable_padding(Options);
875
      static constexpr auto Opts = use_padded ? is_padded_on<Options>() : is_padded_off<Options>();
876
877
      if constexpr (use_padded) {
878
         // Pad the buffer for SWAR
879
         buffer.resize(buffer.size() + padding_bytes);
880
      }
881
      auto p = read_iterators<Opts>(buffer);
882
      auto it = p.first;
883
      auto end = p.second;
884
      auto start = it;
885
886
      context ctx{};
887
888
      if (N == 0) {
889
         parse<Opts.format>::template op<Opts>(value, ctx, it, end);
890
      }
891
      else {
892
         using namespace glz::detail;
893
894
         skip_ws<Opts>(ctx, it, end);
895
896
         for (size_t I = 0; I < N; ++I) {
897
            if (bool(ctx.error)) [[unlikely]] {
898
               break;
899
            }
900
901
            [&] {
902
               const auto decomposed_key = jmespath::parse_jmespath_token(tokens[I]);
903
               const auto& key = decomposed_key.key;
904
905
               if (decomposed_key.is_array_access) {
906
                  if (key.empty()) {
907
                     // Top-level array scenario
908
                     if (skip_ws<Opts>(ctx, it, end)) {
909
                        return;
910
                     }
911
                     if (match_invalid_end<'[', Opts>(ctx, it, end)) {
912
                        return;
913
                     }
914
915
                     if (decomposed_key.colon_count > 0) {
916
                        // Slice scenario
917
                        detail::handle_slice(decomposed_key, value, ctx, it, end);
918
                        return;
919
                     }
920
                     else {
921
                        // Single index scenario
922
                        if (decomposed_key.start.has_value()) {
923
                           const int32_t n = decomposed_key.start.value();
924
925
                           if (I == (N - 1)) {
926
                              // Skip until we reach the target element n
927
                              for (int32_t i = 0; i < n; ++i) {
928
                                 skip_value<JSON>::op<Opts>(ctx, it, end);
929
                                 if (bool(ctx.error)) [[unlikely]]
930
                                    return;
931
932
                                 if (*it != ',') {
933
                                    ctx.error = error_code::array_element_not_found;
934
                                    return;
935
                                 }
936
                                 ++it;
937
                                 if (skip_ws<Opts>(ctx, it, end)) {
938
                                    return;
939
                                 }
940
                              }
941
942
                              // Now read the element at index n
943
                              parse<Opts.format>::template op<Opts>(value, ctx, it, end);
944
                           }
945
                           else {
946
                              // Not the last token. We must still parse the element at index n so the next indexing can
947
                              // proceed.
948
                              for (int32_t i = 0; i < n; ++i) {
949
                                 skip_value<JSON>::op<Opts>(ctx, it, end);
950
                                 if (bool(ctx.error)) [[unlikely]]
951
                                    return;
952
953
                                 if (*it != ',') {
954
                                    ctx.error = error_code::array_element_not_found;
955
                                    return;
956
                                 }
957
                                 ++it;
958
                                 if (skip_ws<Opts>(ctx, it, end)) {
959
                                    return;
960
                                 }
961
                              }
962
                           }
963
                        }
964
                        else {
965
                           ctx.error = error_code::array_element_not_found;
966
                           return;
967
                        }
968
                        return;
969
                     }
970
                  }
971
                  else {
972
                     // Object scenario: key[...]
973
                     if (match_invalid_end<'{', Opts>(ctx, it, end)) {
974
                        return;
975
                     }
976
977
                     while (true) {
978
                        if (skip_ws<Opts>(ctx, it, end)) {
979
                           return;
980
                        }
981
                        if (match<'"'>(ctx, it)) {
982
                           return;
983
                        }
984
985
                        auto* start_pos = it;
986
                        skip_string_view<Opts>(ctx, it, end);
987
                        if (bool(ctx.error)) [[unlikely]]
988
                           return;
989
                        const sv k = {start_pos, size_t(it - start_pos)};
990
                        ++it;
991
992
                        if (key.size() == k.size() && memcmp(key.data(), k.data(), key.size()) == 0) {
993
                           if (skip_ws<Opts>(ctx, it, end)) {
994
                              return;
995
                           }
996
                           if (match_invalid_end<':', Opts>(ctx, it, end)) {
997
                              return;
998
                           }
999
                           if (skip_ws<Opts>(ctx, it, end)) {
1000
                              return;
1001
                           }
1002
                           if (match_invalid_end<'[', Opts>(ctx, it, end)) {
1003
                              return;
1004
                           }
1005
1006
                           if (decomposed_key.colon_count > 0) {
1007
                              // Slice scenario
1008
                              detail::handle_slice(decomposed_key, value, ctx, it, end);
1009
                              return;
1010
                           }
1011
                           else {
1012
                              // Single index scenario
1013
                              if (decomposed_key.start.has_value()) {
1014
                                 int32_t n = decomposed_key.start.value();
1015
                                 for (int32_t i = 0; i < n; ++i) {
1016
                                    skip_value<JSON>::op<Opts>(ctx, it, end);
1017
                                    if (bool(ctx.error)) [[unlikely]]
1018
                                       return;
1019
1020
                                    if (*it != ',') {
1021
                                       ctx.error = error_code::array_element_not_found;
1022
                                       return;
1023
                                    }
1024
                                    ++it;
1025
                                    if (skip_ws<Opts>(ctx, it, end)) {
1026
                                       return;
1027
                                    }
1028
                                 }
1029
1030
                                 if (skip_ws<Opts>(ctx, it, end)) {
1031
                                    return;
1032
                                 }
1033
1034
                                 if (I == (N - 1)) {
1035
                                    parse<Opts.format>::template op<Opts>(value, ctx, it, end);
1036
                                 }
1037
                                 return;
1038
                              }
1039
                              else {
1040
                                 ctx.error = error_code::array_element_not_found;
1041
                                 return;
1042
                              }
1043
                           }
1044
                        }
1045
                        else {
1046
                           skip_value<JSON>::op<Opts>(ctx, it, end);
1047
                           if (bool(ctx.error)) [[unlikely]] {
1048
                              return;
1049
                           }
1050
                           if (*it != ',') {
1051
                              ctx.error = error_code::key_not_found;
1052
                              return;
1053
                           }
1054
                           ++it;
1055
                        }
1056
                     }
1057
                  }
1058
               }
1059
               else {
1060
                  // Non-array access: key-only navigation
1061
                  if (match_invalid_end<'{', Opts>(ctx, it, end)) {
1062
                     return;
1063
                  }
1064
1065
                  while (it < end) {
1066
                     if (skip_ws<Opts>(ctx, it, end)) {
1067
                        return;
1068
                     }
1069
                     if (match<'"'>(ctx, it)) {
1070
                        return;
1071
                     }
1072
1073
                     auto* start_pos = it;
1074
                     skip_string_view<Opts>(ctx, it, end);
1075
                     if (bool(ctx.error)) [[unlikely]]
1076
                        return;
1077
                     const sv k = {start_pos, size_t(it - start_pos)};
1078
                     ++it;
1079
1080
                     if (key.size() == k.size() && memcmp(key.data(), k.data(), key.size()) == 0) {
1081
                        if (skip_ws<Opts>(ctx, it, end)) {
1082
                           return;
1083
                        }
1084
                        if (match_invalid_end<':', Opts>(ctx, it, end)) {
1085
                           return;
1086
                        }
1087
                        if (skip_ws<Opts>(ctx, it, end)) {
1088
                           return;
1089
                        }
1090
1091
                        if (I == (N - 1)) {
1092
                           parse<Opts.format>::template op<Opts>(value, ctx, it, end);
1093
                        }
1094
                        return;
1095
                     }
1096
                     else {
1097
                        skip_value<JSON>::op<Opts>(ctx, it, end);
1098
                        if (bool(ctx.error)) [[unlikely]] {
1099
                           return;
1100
                        }
1101
                        if (*it != ',') {
1102
                           ctx.error = error_code::key_not_found;
1103
                           return;
1104
                        }
1105
                        ++it;
1106
                     }
1107
                  }
1108
               }
1109
            }();
1110
         }
1111
      }
1112
1113
      if constexpr (use_padded) {
1114
         // Restore the original buffer state
1115
         buffer.resize(buffer.size() - padding_bytes);
1116
      }
1117
1118
      return {ctx.error, ctx.custom_error_message, size_t(it - start), ctx.includer_error};
1119
   }
1120
1121
}