/src/tarantool/src/lib/json/json.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright 2010-2018 Tarantool AUTHORS: please see AUTHORS file. |
3 | | * |
4 | | * Redistribution and use in source and binary forms, with or |
5 | | * without modification, are permitted provided that the following |
6 | | * conditions are met: |
7 | | * |
8 | | * 1. Redistributions of source code must retain the above |
9 | | * copyright notice, this list of conditions and the |
10 | | * following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above |
13 | | * copyright notice, this list of conditions and the following |
14 | | * disclaimer in the documentation and/or other materials |
15 | | * provided with the distribution. |
16 | | * |
17 | | * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND |
18 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
19 | | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
20 | | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL |
21 | | * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, |
22 | | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
24 | | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
25 | | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
26 | | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF |
28 | | * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
29 | | * SUCH DAMAGE. |
30 | | */ |
31 | | |
32 | | #include "json.h" |
33 | | |
34 | | #include <assert.h> |
35 | | #include <ctype.h> |
36 | | #include <stdbool.h> |
37 | | #include <stddef.h> |
38 | | #include <stdint.h> |
39 | | #include <stdlib.h> |
40 | | #include <string.h> |
41 | | #include <unicode/uchar.h> |
42 | | #include <unicode/utf8.h> |
43 | | |
44 | | #include "trivia/util.h" |
45 | | #include <PMurHash.h> |
46 | | |
47 | | /** |
48 | | * Read a single symbol from a string starting from an offset. |
49 | | * @param lexer JSON path lexer. |
50 | | * @param[out] UChar32 Read symbol. |
51 | | * |
52 | | * @retval 0 Success. |
53 | | * @retval > 0 1-based position of a syntax error. |
54 | | */ |
55 | | static inline int |
56 | | json_read_symbol(struct json_lexer *lexer, UChar32 *out) |
57 | 0 | { |
58 | 0 | if (json_lexer_is_eof(lexer)) { |
59 | 0 | *out = U_SENTINEL; |
60 | 0 | return lexer->symbol_count + 1; |
61 | 0 | } |
62 | 0 | U8_NEXT(lexer->src, lexer->offset, lexer->src_len, *out); |
63 | 0 | if (*out == U_SENTINEL) |
64 | 0 | return lexer->symbol_count + 1; |
65 | 0 | ++lexer->symbol_count; |
66 | 0 | return 0; |
67 | 0 | } |
68 | | |
69 | | /** |
70 | | * Rollback one symbol offset. |
71 | | * @param lexer JSON path lexer. |
72 | | * @param offset Offset to the previous symbol. |
73 | | */ |
74 | | static inline void |
75 | | json_revert_symbol(struct json_lexer *lexer, int offset) |
76 | 0 | { |
77 | 0 | lexer->offset = offset; |
78 | 0 | --lexer->symbol_count; |
79 | 0 | } |
80 | | |
81 | | /** Fast forward when it is known that a symbol is 1-byte char. */ |
82 | | static inline void |
83 | | json_skip_char(struct json_lexer *lexer) |
84 | 0 | { |
85 | 0 | ++lexer->offset; |
86 | 0 | ++lexer->symbol_count; |
87 | 0 | } |
88 | | |
89 | | /** Get a current symbol as a 1-byte char. */ |
90 | | static inline char |
91 | | json_current_char(const struct json_lexer *lexer) |
92 | 0 | { |
93 | 0 | return *(lexer->src + lexer->offset); |
94 | 0 | } |
95 | | |
96 | | /** |
97 | | * Parse string identifier in quotes. Lexer either stops right |
98 | | * after the closing quote, or returns an error position. |
99 | | * @param lexer JSON path lexer. |
100 | | * @param[out] token JSON token to store result. |
101 | | * @param quote_type Quote by that a string must be terminated. |
102 | | * |
103 | | * @retval 0 Success. |
104 | | * @retval > 0 1-based position of a syntax error. |
105 | | */ |
106 | | static inline int |
107 | | json_parse_string(struct json_lexer *lexer, struct json_token *token, |
108 | | UChar32 quote_type) |
109 | 0 | { |
110 | 0 | assert(lexer->offset < lexer->src_len); |
111 | 0 | assert(quote_type == json_current_char(lexer)); |
112 | | /* The first symbol is always char - ' or ". */ |
113 | 0 | json_skip_char(lexer); |
114 | 0 | int str_offset = lexer->offset; |
115 | 0 | UChar32 c; |
116 | 0 | int rc; |
117 | 0 | while ((rc = json_read_symbol(lexer, &c)) == 0) { |
118 | 0 | if (c == quote_type) { |
119 | 0 | int len = lexer->offset - str_offset - 1; |
120 | 0 | if (len == 0) |
121 | 0 | return lexer->symbol_count; |
122 | 0 | token->type = JSON_TOKEN_STR; |
123 | 0 | token->str = lexer->src + str_offset; |
124 | 0 | token->len = len; |
125 | 0 | return 0; |
126 | 0 | } |
127 | 0 | } |
128 | 0 | return rc; |
129 | 0 | } |
130 | | |
131 | | /** |
132 | | * Parse digit sequence into integer until non-digit is met. |
133 | | * Lexer stops right after the last digit. |
134 | | * @param lexer JSON lexer. |
135 | | * @param[out] token JSON token to store result. |
136 | | * |
137 | | * @retval 0 Success. |
138 | | * @retval > 0 1-based position of a syntax error. |
139 | | */ |
140 | | static inline int |
141 | | json_parse_integer(struct json_lexer *lexer, struct json_token *token) |
142 | 0 | { |
143 | 0 | const char *end = lexer->src + lexer->src_len; |
144 | 0 | const char *pos = lexer->src + lexer->offset; |
145 | 0 | assert(pos < end); |
146 | 0 | int len = 0; |
147 | 0 | int value = 0; |
148 | 0 | char c = *pos; |
149 | 0 | if (! isdigit(c)) |
150 | 0 | return lexer->symbol_count + 1; |
151 | 0 | do { |
152 | 0 | value = value * 10 + c - (int)'0'; |
153 | 0 | ++len; |
154 | 0 | } while (++pos < end && isdigit((c = *pos))); |
155 | 0 | if (value < lexer->index_base) |
156 | 0 | return lexer->symbol_count + 1; |
157 | 0 | lexer->offset += len; |
158 | 0 | lexer->symbol_count += len; |
159 | 0 | token->type = JSON_TOKEN_NUM; |
160 | 0 | token->num = value - lexer->index_base; |
161 | 0 | return 0; |
162 | 0 | } |
163 | | |
164 | | /** |
165 | | * Check that a symbol can be part of a JSON path not inside |
166 | | * ["..."]. |
167 | | */ |
168 | | static inline bool |
169 | | json_is_valid_identifier_symbol(UChar32 c) |
170 | 0 | { |
171 | 0 | return u_isUAlphabetic(c) || c == (UChar32)'_' || u_isdigit(c); |
172 | 0 | } |
173 | | |
174 | | /** |
175 | | * Parse identifier out of quotes. It can contain only alphas, |
176 | | * digits and underscores. And can not contain digit at the first |
177 | | * position. Lexer is stoped right after the last non-digit, |
178 | | * non-alpha and non-underscore symbol. |
179 | | * @param lexer JSON lexer. |
180 | | * @param[out] token JSON token to store result. |
181 | | * |
182 | | * @retval 0 Success. |
183 | | * @retval > 0 1-based position of a syntax error. |
184 | | */ |
185 | | static inline int |
186 | | json_parse_identifier(struct json_lexer *lexer, struct json_token *token) |
187 | 0 | { |
188 | 0 | assert(lexer->offset < lexer->src_len); |
189 | 0 | int str_offset = lexer->offset; |
190 | 0 | UChar32 c; |
191 | 0 | int rc = json_read_symbol(lexer, &c); |
192 | 0 | if (rc != 0) |
193 | 0 | return rc; |
194 | | /* First symbol can not be digit. */ |
195 | 0 | if (!u_isalpha(c) && c != (UChar32)'_') |
196 | 0 | return lexer->symbol_count; |
197 | 0 | int last_offset = lexer->offset; |
198 | 0 | while ((rc = json_read_symbol(lexer, &c)) == 0) { |
199 | 0 | if (! json_is_valid_identifier_symbol(c)) { |
200 | 0 | json_revert_symbol(lexer, last_offset); |
201 | 0 | break; |
202 | 0 | } |
203 | 0 | last_offset = lexer->offset; |
204 | 0 | } |
205 | 0 | token->type = JSON_TOKEN_STR; |
206 | 0 | token->str = lexer->src + str_offset; |
207 | 0 | token->len = lexer->offset - str_offset; |
208 | 0 | return 0; |
209 | 0 | } |
210 | | |
211 | | int |
212 | | json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) |
213 | 0 | { |
214 | 0 | if (json_lexer_is_eof(lexer)) { |
215 | 0 | token->type = JSON_TOKEN_END; |
216 | 0 | return 0; |
217 | 0 | } |
218 | 0 | UChar32 c; |
219 | 0 | int last_offset = lexer->offset; |
220 | 0 | int rc = json_read_symbol(lexer, &c); |
221 | 0 | if (rc != 0) |
222 | 0 | return rc; |
223 | 0 | switch(c) { |
224 | 0 | case (UChar32)'[': |
225 | | /* Error for '[\0'. */ |
226 | 0 | if (json_lexer_is_eof(lexer)) |
227 | 0 | return lexer->symbol_count; |
228 | 0 | c = json_current_char(lexer); |
229 | 0 | if (c == '"' || c == '\'') { |
230 | 0 | rc = json_parse_string(lexer, token, c); |
231 | 0 | } else if (c == '*') { |
232 | 0 | json_skip_char(lexer); |
233 | 0 | token->type = JSON_TOKEN_ANY; |
234 | 0 | } else { |
235 | 0 | rc = json_parse_integer(lexer, token); |
236 | 0 | } |
237 | 0 | if (rc != 0) |
238 | 0 | return rc; |
239 | | /* |
240 | | * Expression, started from [ must be finished |
241 | | * with ] regardless of its type. |
242 | | */ |
243 | 0 | if (json_lexer_is_eof(lexer) || |
244 | 0 | json_current_char(lexer) != ']') |
245 | 0 | return lexer->symbol_count + 1; |
246 | | /* Skip ] - one byte char. */ |
247 | 0 | json_skip_char(lexer); |
248 | 0 | return 0; |
249 | 0 | case (UChar32)'.': |
250 | 0 | if (json_lexer_is_eof(lexer)) |
251 | 0 | return lexer->symbol_count + 1; |
252 | 0 | return json_parse_identifier(lexer, token); |
253 | 0 | default: |
254 | 0 | if (last_offset != 0) |
255 | 0 | return lexer->symbol_count; |
256 | 0 | json_revert_symbol(lexer, last_offset); |
257 | 0 | return json_parse_identifier(lexer, token); |
258 | 0 | } |
259 | 0 | } |
260 | | |
261 | | /** |
262 | | * Compare JSON tokens as nodes of a JSON tree. That is, including |
263 | | * parent references. |
264 | | */ |
265 | | static int |
266 | | json_token_cmp_in_tree(const struct json_token *a, const struct json_token *b) |
267 | 0 | { |
268 | 0 | if (a->parent != b->parent) |
269 | 0 | return a->parent - b->parent; |
270 | 0 | return json_token_cmp(a, b); |
271 | 0 | } |
272 | | |
273 | | int |
274 | | json_path_cmp(const char *a, int a_len, const char *b, int b_len, |
275 | | int index_base) |
276 | 0 | { |
277 | 0 | struct json_lexer lexer_a, lexer_b; |
278 | 0 | json_lexer_create(&lexer_a, a, a_len, index_base); |
279 | 0 | json_lexer_create(&lexer_b, b, b_len, index_base); |
280 | 0 | struct json_token token_a, token_b; |
281 | | /* For the sake of json_token_cmp_in_tree(). */ |
282 | 0 | token_a.parent = NULL; |
283 | 0 | token_b.parent = NULL; |
284 | 0 | int rc_a, rc_b; |
285 | 0 | while ((rc_a = json_lexer_next_token(&lexer_a, &token_a)) == 0 && |
286 | 0 | (rc_b = json_lexer_next_token(&lexer_b, &token_b)) == 0 && |
287 | 0 | token_a.type != JSON_TOKEN_END && |
288 | 0 | token_b.type != JSON_TOKEN_END) { |
289 | 0 | int rc = json_token_cmp_in_tree(&token_a, &token_b); |
290 | 0 | if (rc != 0) |
291 | 0 | return rc; |
292 | 0 | } |
293 | | /* Paths a and b must be valid. */ |
294 | 0 | assert(rc_a == 0 && rc_b == 0); |
295 | | /* |
296 | | * The parser stopped because the end of one of the paths |
297 | | * was reached. As JSON_TOKEN_END > JSON_TOKEN_{NUM, STR}, |
298 | | * the path having more tokens has lower key.type value. |
299 | | */ |
300 | 0 | return token_b.type - token_a.type; |
301 | 0 | } |
302 | | |
303 | | int |
304 | | json_path_validate(const char *path, int path_len, int index_base) |
305 | 0 | { |
306 | 0 | struct json_lexer lexer; |
307 | 0 | json_lexer_create(&lexer, path, path_len, index_base); |
308 | 0 | struct json_token token; |
309 | 0 | int rc; |
310 | 0 | while ((rc = json_lexer_next_token(&lexer, &token)) == 0 && |
311 | 0 | token.type != JSON_TOKEN_END) {} |
312 | 0 | return rc; |
313 | 0 | } |
314 | | |
315 | | int |
316 | | json_path_multikey_offset(const char *path, int path_len, int index_base) |
317 | 0 | { |
318 | 0 | struct json_lexer lexer; |
319 | 0 | json_lexer_create(&lexer, path, path_len, index_base); |
320 | 0 | struct json_token token; |
321 | 0 | int rc, last_lexer_offset = 0; |
322 | 0 | while ((rc = json_lexer_next_token(&lexer, &token)) == 0) { |
323 | 0 | if (token.type == JSON_TOKEN_ANY) |
324 | 0 | return last_lexer_offset; |
325 | 0 | else if (token.type == JSON_TOKEN_END) |
326 | 0 | break; |
327 | 0 | last_lexer_offset = lexer.offset; |
328 | 0 | } |
329 | 0 | assert(rc == 0); |
330 | 0 | return path_len; |
331 | 0 | } |
332 | | |
333 | | /** |
334 | | * An snprint-style helper to print an individual token key. |
335 | | */ |
336 | | static int |
337 | | json_token_snprint(char *buf, int size, const struct json_token *token, |
338 | | int index_base) |
339 | 0 | { |
340 | 0 | int len = 0; |
341 | 0 | switch (token->type) { |
342 | 0 | case JSON_TOKEN_NUM: |
343 | 0 | len = snprintf(buf, size, "[%d]", token->num + index_base); |
344 | 0 | break; |
345 | 0 | case JSON_TOKEN_STR: |
346 | 0 | len = snprintf(buf, size, "[\"%.*s\"]", token->len, token->str); |
347 | 0 | break; |
348 | 0 | case JSON_TOKEN_ANY: |
349 | 0 | len = snprintf(buf, size, "[*]"); |
350 | 0 | break; |
351 | 0 | default: |
352 | 0 | unreachable(); |
353 | 0 | } |
354 | 0 | return len; |
355 | 0 | } |
356 | | |
357 | | int |
358 | | json_tree_snprint_path(char *buf, int size, const struct json_token *token, |
359 | | int index_base) |
360 | 0 | { |
361 | | /* The token must be linked in a tree. */ |
362 | 0 | assert(token->parent != NULL); |
363 | | |
364 | | /* |
365 | | * Calculate the length of the final path string by passing |
366 | | * 0-length buffer to json_token_snprint() routine. |
367 | | */ |
368 | 0 | int len = 0; |
369 | 0 | for (const struct json_token *iter = token; |
370 | 0 | iter->type != JSON_TOKEN_END; iter = iter->parent) |
371 | 0 | len += json_token_snprint(NULL, 0, iter, index_base); |
372 | | |
373 | | /* |
374 | | * Nothing else to do if the caller is only interested in |
375 | | * the string length. |
376 | | */ |
377 | 0 | if (size == 0) |
378 | 0 | return len; |
379 | | |
380 | | /* |
381 | | * Write the path to the supplied buffer. |
382 | | */ |
383 | 0 | int pos = len; |
384 | 0 | char last = '\0'; |
385 | 0 | for (const struct json_token *iter = token; |
386 | 0 | iter->type != JSON_TOKEN_END; iter = iter->parent) { |
387 | 0 | pos -= json_token_snprint(NULL, 0, iter, index_base); |
388 | 0 | assert(pos >= 0); |
389 | 0 | if (pos >= size) { |
390 | | /* The token doesn't fit in the buffer. */ |
391 | 0 | continue; |
392 | 0 | } |
393 | 0 | int rc = json_token_snprint(buf + pos, size - pos, |
394 | 0 | iter, index_base); |
395 | | /* |
396 | | * Restore the character overwritten with |
397 | | * a terminating nul by json_token_snprint(). |
398 | | */ |
399 | 0 | if (last != '\0') { |
400 | 0 | assert(pos + rc < size); |
401 | 0 | buf[pos + rc] = last; |
402 | 0 | } |
403 | 0 | last = buf[pos]; |
404 | 0 | } |
405 | 0 | return len; |
406 | 0 | } |
407 | | |
408 | | #define MH_SOURCE 1 |
409 | | #define mh_name _json |
410 | | #define mh_key_t struct json_token * |
411 | 0 | #define mh_node_t struct json_token * |
412 | | #define mh_arg_t void * |
413 | 0 | #define mh_hash(a, arg) ((*(a))->hash) |
414 | 0 | #define mh_hash_key(a, arg) ((a)->hash) |
415 | 0 | #define mh_cmp(a, b, arg) (json_token_cmp_in_tree(*(a), *(b))) |
416 | 0 | #define mh_cmp_key(a, b, arg) (json_token_cmp_in_tree((a), *(b))) |
417 | | #include "salad/mhash.h" |
418 | | |
419 | | static const uint32_t hash_seed = 13U; |
420 | | |
421 | | /** |
422 | | * Compute the hash value of a JSON token. |
423 | | * |
424 | | * See the comment to json_tree::hash for implementation details. |
425 | | */ |
426 | | static uint32_t |
427 | | json_token_hash(struct json_token *token) |
428 | 0 | { |
429 | 0 | uint32_t h = token->parent->hash; |
430 | 0 | uint32_t carry = 0; |
431 | 0 | const void *data; |
432 | 0 | int data_size; |
433 | 0 | if (token->type == JSON_TOKEN_STR) { |
434 | 0 | data = token->str; |
435 | 0 | data_size = token->len; |
436 | 0 | } else if (token->type == JSON_TOKEN_NUM) { |
437 | 0 | data = &token->num; |
438 | 0 | data_size = sizeof(token->num); |
439 | 0 | } else if (token->type == JSON_TOKEN_ANY) { |
440 | 0 | data = "*"; |
441 | 0 | data_size = 1; |
442 | 0 | } else { |
443 | 0 | unreachable(); |
444 | 0 | } |
445 | 0 | PMurHash32_Process(&h, &carry, data, data_size); |
446 | 0 | return PMurHash32_Result(h, carry, data_size); |
447 | 0 | } |
448 | | |
449 | | void |
450 | | json_tree_create(struct json_tree *tree) |
451 | 2 | { |
452 | 2 | memset(tree, 0, sizeof(struct json_tree)); |
453 | 2 | tree->root.hash = hash_seed; |
454 | 2 | tree->root.type = JSON_TOKEN_END; |
455 | 2 | tree->root.max_child_idx = -1; |
456 | 2 | tree->root.sibling_idx = -1; |
457 | 2 | tree->hash = mh_json_new(); |
458 | 2 | } |
459 | | |
460 | | static void |
461 | | json_token_destroy(struct json_token *token) |
462 | 0 | { |
463 | 0 | assert(token->max_child_idx == -1); |
464 | 0 | assert(token->sibling_idx == -1); |
465 | 0 | free(token->children); |
466 | 0 | token->children = NULL; |
467 | 0 | } |
468 | | |
469 | | void |
470 | | json_tree_destroy(struct json_tree *tree) |
471 | 0 | { |
472 | 0 | json_token_destroy(&tree->root); |
473 | 0 | mh_json_delete(tree->hash); |
474 | 0 | } |
475 | | |
476 | | struct json_token * |
477 | | json_tree_lookup_slowpath(struct json_tree *tree, struct json_token *parent, |
478 | | const struct json_token *token) |
479 | 0 | { |
480 | 0 | assert(token->type == JSON_TOKEN_STR); |
481 | 0 | struct json_token key; |
482 | 0 | key.type = token->type; |
483 | 0 | key.str = token->str; |
484 | 0 | key.len = token->len; |
485 | 0 | key.parent = parent; |
486 | 0 | key.hash = json_token_hash(&key); |
487 | 0 | mh_int_t id = mh_json_find(tree->hash, &key, NULL); |
488 | 0 | if (id == mh_end(tree->hash)) |
489 | 0 | return NULL; |
490 | 0 | struct json_token **entry = mh_json_node(tree->hash, id); |
491 | 0 | assert(entry != NULL && *entry != NULL); |
492 | 0 | return *entry; |
493 | 0 | } |
494 | | |
495 | | int |
496 | | json_tree_add(struct json_tree *tree, struct json_token *parent, |
497 | | struct json_token *token) |
498 | 0 | { |
499 | 0 | assert(json_tree_lookup(tree, parent, token) == NULL); |
500 | 0 | token->parent = parent; |
501 | 0 | token->children = NULL; |
502 | 0 | token->children_capacity = 0; |
503 | 0 | token->max_child_idx = -1; |
504 | 0 | token->hash = json_token_hash(token); |
505 | 0 | int insert_idx = token->type == JSON_TOKEN_NUM ? |
506 | 0 | (int)token->num : parent->max_child_idx + 1; |
507 | | /* |
508 | | * Dynamically grow the children array if necessary. |
509 | | */ |
510 | 0 | if (insert_idx >= parent->children_capacity) { |
511 | 0 | int new_size = parent->children_capacity == 0 ? |
512 | 0 | 8 : 2 * parent->children_capacity; |
513 | 0 | while (insert_idx >= new_size) |
514 | 0 | new_size *= 2; |
515 | 0 | struct json_token **children = realloc(parent->children, |
516 | 0 | new_size * sizeof(void *)); |
517 | 0 | if (children == NULL) |
518 | 0 | return -1; /* out of memory */ |
519 | 0 | memset(children + parent->children_capacity, 0, |
520 | 0 | (new_size - parent->children_capacity) * sizeof(void *)); |
521 | 0 | parent->children = children; |
522 | 0 | parent->children_capacity = new_size; |
523 | 0 | } |
524 | | /* |
525 | | * Insert the token into the hash (only for tokens representing |
526 | | * JSON map entries, see the comment to json_tree::hash). |
527 | | */ |
528 | 0 | if (token->type == JSON_TOKEN_STR) { |
529 | 0 | mh_json_put(tree->hash, (const struct json_token **)&token, |
530 | 0 | NULL, NULL); |
531 | 0 | } |
532 | | /* |
533 | | * Success, now we can insert the new token into its parent's |
534 | | * children array. |
535 | | */ |
536 | 0 | assert(parent->children[insert_idx] == NULL); |
537 | 0 | parent->children[insert_idx] = token; |
538 | 0 | parent->max_child_idx = MAX(parent->max_child_idx, insert_idx); |
539 | 0 | token->sibling_idx = insert_idx; |
540 | 0 | assert(json_tree_lookup(tree, parent, token) == token); |
541 | 0 | return 0; |
542 | 0 | } |
543 | | |
544 | | void |
545 | | json_tree_del(struct json_tree *tree, struct json_token *token) |
546 | 0 | { |
547 | 0 | struct json_token *parent = token->parent; |
548 | 0 | assert(token->sibling_idx >= 0); |
549 | 0 | assert(parent->children[token->sibling_idx] == token); |
550 | 0 | assert(json_tree_lookup(tree, parent, token) == token); |
551 | | /* |
552 | | * Clear the entry corresponding to this token in parent's |
553 | | * children array and update max_child_idx if necessary. |
554 | | */ |
555 | 0 | parent->children[token->sibling_idx] = NULL; |
556 | 0 | token->sibling_idx = -1; |
557 | 0 | while (parent->max_child_idx >= 0 && |
558 | 0 | parent->children[parent->max_child_idx] == NULL) |
559 | 0 | parent->max_child_idx--; |
560 | | /* |
561 | | * Remove the token from the hash (only for tokens representing |
562 | | * JSON map entries, see the comment to json_tree::hash). |
563 | | */ |
564 | 0 | if (token->type == JSON_TOKEN_STR) { |
565 | 0 | mh_int_t id = mh_json_find(tree->hash, token, NULL); |
566 | 0 | assert(id != mh_end(tree->hash)); |
567 | 0 | mh_json_del(tree->hash, id, NULL); |
568 | 0 | } |
569 | 0 | json_token_destroy(token); |
570 | 0 | assert(json_tree_lookup(tree, parent, token) == NULL); |
571 | 0 | } |
572 | | |
573 | | struct json_token * |
574 | | json_tree_lookup_path(struct json_tree *tree, struct json_token *root, |
575 | | const char *path, int path_len, int index_base) |
576 | 0 | { |
577 | 0 | int rc; |
578 | 0 | struct json_lexer lexer; |
579 | 0 | struct json_token token; |
580 | 0 | struct json_token *ret = root; |
581 | 0 | json_lexer_create(&lexer, path, path_len, index_base); |
582 | 0 | while ((rc = json_lexer_next_token(&lexer, &token)) == 0 && |
583 | 0 | token.type != JSON_TOKEN_END && ret != NULL) { |
584 | | /* |
585 | | * We could skip intermediate lookups by computing |
586 | | * a rolling hash of all tokens produced by the |
587 | | * lexer. But then we would still have to traverse |
588 | | * the path back to ensure it is equal to the |
589 | | * found to the found one. For that we would have |
590 | | * to keep the stack of lexer tokens somewhere. |
591 | | * Given the complications of the alternative, |
592 | | * intermediate lookups don't seem to be so big of |
593 | | * a problem. |
594 | | */ |
595 | 0 | ret = json_tree_lookup(tree, ret, &token); |
596 | 0 | } |
597 | 0 | if (rc != 0 || token.type != JSON_TOKEN_END) |
598 | 0 | return NULL; |
599 | 0 | return ret; |
600 | 0 | } |
601 | | |
602 | | /** |
603 | | * Return the child of @parent following @pos or NULL if @pos |
604 | | * points to the last child in the children array. If @pos is |
605 | | * NULL, this function returns the first child. |
606 | | */ |
607 | | static struct json_token * |
608 | | json_tree_child_next(struct json_token *parent, struct json_token *pos) |
609 | 2 | { |
610 | 2 | assert(pos == NULL || pos->parent == parent); |
611 | 2 | struct json_token **arr = parent->children; |
612 | 2 | if (arr == NULL) |
613 | 2 | return NULL; |
614 | 0 | int idx = pos != NULL ? pos->sibling_idx + 1 : 0; |
615 | 0 | while (idx <= parent->max_child_idx && arr[idx] == NULL) |
616 | 0 | idx++; |
617 | 0 | return idx <= parent->max_child_idx ? arr[idx] : NULL; |
618 | 2 | } |
619 | | |
620 | | /** |
621 | | * Return the leftmost descendant of the tree rooted at @pos |
622 | | * or NULL if the tree is empty. |
623 | | */ |
624 | | static struct json_token * |
625 | | json_tree_leftmost(struct json_token *pos) |
626 | 0 | { |
627 | 0 | struct json_token *last; |
628 | 0 | do { |
629 | 0 | last = pos; |
630 | 0 | pos = json_tree_child_next(pos, NULL); |
631 | 0 | } while (pos != NULL); |
632 | 0 | return last; |
633 | 0 | } |
634 | | |
635 | | struct json_token * |
636 | | json_tree_preorder_next(struct json_token *root, struct json_token *pos) |
637 | 2 | { |
638 | 2 | struct json_token *next = json_tree_child_next(pos, NULL); |
639 | 2 | if (next != NULL) |
640 | 0 | return next; |
641 | 2 | while (pos != root) { |
642 | 0 | next = json_tree_child_next(pos->parent, pos); |
643 | 0 | if (next != NULL) |
644 | 0 | return next; |
645 | 0 | pos = pos->parent; |
646 | 0 | } |
647 | 2 | return NULL; |
648 | 2 | } |
649 | | |
650 | | struct json_token * |
651 | | json_tree_postorder_next(struct json_token *root, struct json_token *pos) |
652 | 0 | { |
653 | 0 | struct json_token *next; |
654 | 0 | if (pos == NULL) |
655 | 0 | return json_tree_leftmost(root); |
656 | 0 | if (pos == root) |
657 | 0 | return NULL; |
658 | 0 | next = json_tree_child_next(pos->parent, pos); |
659 | 0 | if (next != NULL) |
660 | 0 | return json_tree_leftmost(next); |
661 | 0 | return pos->parent; |
662 | 0 | } |