/src/libucl/src/ucl_parser.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (c) 2013, Vsevolod Stakhov |
2 | | * All rights reserved. |
3 | | * |
4 | | * Redistribution and use in source and binary forms, with or without |
5 | | * modification, are permitted provided that the following conditions are met: |
6 | | * * Redistributions of source code must retain the above copyright |
7 | | * notice, this list of conditions and the following disclaimer. |
8 | | * * Redistributions in binary form must reproduce the above copyright |
9 | | * notice, this list of conditions and the following disclaimer in the |
10 | | * documentation and/or other materials provided with the distribution. |
11 | | * |
12 | | * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY |
13 | | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
14 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
15 | | * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY |
16 | | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
17 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
18 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
19 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
20 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
21 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
22 | | */ |
23 | | |
24 | | #include <math.h> |
25 | | #include "ucl.h" |
26 | | #include "ucl_internal.h" |
27 | | #include "ucl_chartable.h" |
28 | | |
29 | | /** |
30 | | * @file ucl_parser.c |
31 | | * The implementation of ucl parser |
32 | | */ |
33 | | |
34 | | struct ucl_parser_saved_state { |
35 | | unsigned int line; |
36 | | unsigned int column; |
37 | | size_t remain; |
38 | | const unsigned char *pos; |
39 | | }; |
40 | | |
41 | | /** |
42 | | * Move up to len characters |
43 | | * @param parser |
44 | | * @param begin |
45 | | * @param len |
46 | | * @return new position in chunk |
47 | | */ |
48 | 69.0M | #define ucl_chunk_skipc(chunk, p) \ |
49 | 69.0M | do { \ |
50 | 69.0M | if (p == chunk->end) { \ |
51 | 741 | break; \ |
52 | 741 | } \ |
53 | 69.0M | if (*(p) == '\n') { \ |
54 | 1.31M | (chunk)->line ++; \ |
55 | 1.31M | (chunk)->column = 0; \ |
56 | 1.31M | } \ |
57 | 69.0M | else (chunk)->column ++; \ |
58 | 69.0M | (p++); \ |
59 | 69.0M | (chunk)->pos ++; \ |
60 | 69.0M | (chunk)->remain --; \ |
61 | 69.0M | } while (0) |
62 | | |
63 | | static inline void |
64 | | ucl_set_err (struct ucl_parser *parser, int code, const char *str, UT_string **err) |
65 | 17.3k | { |
66 | 17.3k | const char *fmt_string, *filename; |
67 | 17.3k | struct ucl_chunk *chunk = parser->chunks; |
68 | | |
69 | 17.3k | if (parser->cur_file) { |
70 | 0 | filename = parser->cur_file; |
71 | 0 | } |
72 | 17.3k | else { |
73 | 17.3k | filename = "<unknown>"; |
74 | 17.3k | } |
75 | | |
76 | 17.3k | if (chunk->pos < chunk->end) { |
77 | 15.0k | if (isgraph (*chunk->pos)) { |
78 | 13.0k | fmt_string = "error while parsing %s: " |
79 | 13.0k | "line: %d, column: %d - '%s', character: '%c'"; |
80 | 13.0k | } |
81 | 2.06k | else { |
82 | 2.06k | fmt_string = "error while parsing %s: " |
83 | 2.06k | "line: %d, column: %d - '%s', character: '0x%02x'"; |
84 | 2.06k | } |
85 | 15.0k | ucl_create_err (err, fmt_string, |
86 | 15.0k | filename, chunk->line, chunk->column, |
87 | 15.0k | str, *chunk->pos); |
88 | 15.0k | } |
89 | 2.30k | else { |
90 | 2.30k | ucl_create_err (err, "error while parsing %s: at the end of chunk: %s", |
91 | 2.30k | filename, str); |
92 | 2.30k | } |
93 | | |
94 | 17.3k | parser->err_code = code; |
95 | 17.3k | parser->state = UCL_STATE_ERROR; |
96 | 17.3k | } |
97 | | |
98 | | static void |
99 | | ucl_save_comment (struct ucl_parser *parser, const char *begin, size_t len) |
100 | 0 | { |
101 | 0 | ucl_object_t *nobj; |
102 | |
|
103 | 0 | if (len > 0 && begin != NULL) { |
104 | 0 | nobj = ucl_object_fromstring_common (begin, len, 0); |
105 | |
|
106 | 0 | if (parser->last_comment) { |
107 | | /* We need to append data to an existing object */ |
108 | 0 | DL_APPEND (parser->last_comment, nobj); |
109 | 0 | } |
110 | 0 | else { |
111 | 0 | parser->last_comment = nobj; |
112 | 0 | } |
113 | 0 | } |
114 | 0 | } |
115 | | |
116 | | static void |
117 | | ucl_attach_comment (struct ucl_parser *parser, ucl_object_t *obj, bool before) |
118 | 1.99M | { |
119 | 1.99M | if (parser->last_comment) { |
120 | 0 | ucl_object_insert_key (parser->comments, parser->last_comment, |
121 | 0 | (const char *)&obj, sizeof (void *), true); |
122 | |
|
123 | 0 | if (before) { |
124 | 0 | parser->last_comment->flags |= UCL_OBJECT_INHERITED; |
125 | 0 | } |
126 | |
|
127 | 0 | parser->last_comment = NULL; |
128 | 0 | } |
129 | 1.99M | } |
130 | | |
131 | | /** |
132 | | * Skip all comments from the current pos resolving nested and multiline comments |
133 | | * @param parser |
134 | | * @return |
135 | | */ |
136 | | static bool |
137 | | ucl_skip_comments (struct ucl_parser *parser) |
138 | 45.3k | { |
139 | 45.3k | struct ucl_chunk *chunk = parser->chunks; |
140 | 45.3k | const unsigned char *p, *beg = NULL; |
141 | 45.3k | int comments_nested = 0; |
142 | 45.3k | bool quoted = false; |
143 | | |
144 | 45.3k | p = chunk->pos; |
145 | | |
146 | 100k | start: |
147 | 100k | if (chunk->remain > 0 && *p == '#') { |
148 | 55.8k | if (parser->state != UCL_STATE_SCOMMENT && |
149 | 55.8k | parser->state != UCL_STATE_MCOMMENT) { |
150 | 55.8k | beg = p; |
151 | | |
152 | 3.38M | while (p < chunk->end) { |
153 | 3.38M | if (*p == '\n') { |
154 | 55.4k | if (parser->flags & UCL_PARSER_SAVE_COMMENTS) { |
155 | 0 | ucl_save_comment (parser, beg, p - beg); |
156 | 0 | beg = NULL; |
157 | 0 | } |
158 | | |
159 | 55.4k | ucl_chunk_skipc (chunk, p); |
160 | | |
161 | 0 | goto start; |
162 | 55.4k | } |
163 | 3.32M | ucl_chunk_skipc (chunk, p); |
164 | 3.32M | } |
165 | 55.8k | } |
166 | 55.8k | } |
167 | 44.9k | else if (chunk->remain >= 2 && *p == '/') { |
168 | 1.21k | if (p[1] == '*') { |
169 | 958 | beg = p; |
170 | 958 | ucl_chunk_skipc (chunk, p); |
171 | 0 | comments_nested ++; |
172 | 958 | ucl_chunk_skipc (chunk, p); |
173 | | |
174 | 5.20M | while (p < chunk->end) { |
175 | 5.20M | if (*p == '"' && *(p - 1) != '\\') { |
176 | 5.50k | quoted = !quoted; |
177 | 5.50k | } |
178 | | |
179 | 5.20M | if (!quoted) { |
180 | 3.92M | if (*p == '*') { |
181 | 135k | ucl_chunk_skipc (chunk, p); |
182 | 135k | if (chunk->remain > 0 && *p == '/') { |
183 | 123k | comments_nested --; |
184 | 123k | if (comments_nested == 0) { |
185 | 28 | if (parser->flags & UCL_PARSER_SAVE_COMMENTS) { |
186 | 0 | ucl_save_comment (parser, beg, p - beg + 1); |
187 | 0 | beg = NULL; |
188 | 0 | } |
189 | | |
190 | 28 | ucl_chunk_skipc (chunk, p); |
191 | 0 | goto start; |
192 | 28 | } |
193 | 123k | } |
194 | 135k | ucl_chunk_skipc (chunk, p); |
195 | 135k | } |
196 | 3.78M | else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') { |
197 | 2.17M | comments_nested ++; |
198 | 2.17M | ucl_chunk_skipc (chunk, p); |
199 | 2.17M | ucl_chunk_skipc (chunk, p); |
200 | 0 | continue; |
201 | 2.17M | } |
202 | 3.92M | } |
203 | | |
204 | 3.02M | ucl_chunk_skipc (chunk, p); |
205 | 3.02M | } |
206 | 930 | if (comments_nested != 0) { |
207 | 930 | ucl_set_err (parser, UCL_ENESTED, |
208 | 930 | "unfinished multiline comment", &parser->err); |
209 | 930 | return false; |
210 | 930 | } |
211 | 930 | } |
212 | 1.21k | } |
213 | | |
214 | 44.4k | if (beg && p > beg && (parser->flags & UCL_PARSER_SAVE_COMMENTS)) { |
215 | 0 | ucl_save_comment (parser, beg, p - beg); |
216 | 0 | } |
217 | | |
218 | 44.4k | return true; |
219 | 100k | } |
220 | | |
221 | | /** |
222 | | * Return multiplier for a character |
223 | | * @param c multiplier character |
224 | | * @param is_bytes if true use 1024 multiplier |
225 | | * @return multiplier |
226 | | */ |
227 | | static inline unsigned long |
228 | 435 | ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) { |
229 | 435 | const struct { |
230 | 435 | char c; |
231 | 435 | long mult_normal; |
232 | 435 | long mult_bytes; |
233 | 435 | } multipliers[] = { |
234 | 435 | {'m', 1000 * 1000, 1024 * 1024}, |
235 | 435 | {'k', 1000, 1024}, |
236 | 435 | {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024} |
237 | 435 | }; |
238 | 435 | int i; |
239 | | |
240 | 693 | for (i = 0; i < 3; i ++) { |
241 | 693 | if (tolower (c) == multipliers[i].c) { |
242 | 435 | if (is_bytes) { |
243 | 86 | return multipliers[i].mult_bytes; |
244 | 86 | } |
245 | 349 | return multipliers[i].mult_normal; |
246 | 435 | } |
247 | 693 | } |
248 | | |
249 | 0 | return 1; |
250 | 435 | } |
251 | | |
252 | | |
253 | | /** |
254 | | * Return multiplier for time scaling |
255 | | * @param c |
256 | | * @return |
257 | | */ |
258 | | static inline double |
259 | 270 | ucl_lex_time_multiplier (const unsigned char c) { |
260 | 270 | const struct { |
261 | 270 | char c; |
262 | 270 | double mult; |
263 | 270 | } multipliers[] = { |
264 | 270 | {'m', 60}, |
265 | 270 | {'h', 60 * 60}, |
266 | 270 | {'d', 60 * 60 * 24}, |
267 | 270 | {'w', 60 * 60 * 24 * 7}, |
268 | 270 | {'y', 60 * 60 * 24 * 365} |
269 | 270 | }; |
270 | 270 | int i; |
271 | | |
272 | 1.30k | for (i = 0; i < 5; i ++) { |
273 | 1.30k | if (tolower (c) == multipliers[i].c) { |
274 | 270 | return multipliers[i].mult; |
275 | 270 | } |
276 | 1.30k | } |
277 | | |
278 | 0 | return 1; |
279 | 270 | } |
280 | | |
281 | | /** |
282 | | * Return true if a character is a end of an atom |
283 | | * @param c |
284 | | * @return |
285 | | */ |
286 | | static inline bool |
287 | | ucl_lex_is_atom_end (const unsigned char c) |
288 | 28.1M | { |
289 | 28.1M | return ucl_test_character (c, UCL_CHARACTER_VALUE_END); |
290 | 28.1M | } |
291 | | |
292 | | static inline bool |
293 | | ucl_lex_is_comment (const unsigned char c1, const unsigned char c2) |
294 | 26.4M | { |
295 | 26.4M | if (c1 == '/') { |
296 | 149k | if (c2 == '*') { |
297 | 1.32k | return true; |
298 | 1.32k | } |
299 | 149k | } |
300 | 26.3M | else if (c1 == '#') { |
301 | 24.7k | return true; |
302 | 24.7k | } |
303 | 26.4M | return false; |
304 | 26.4M | } |
305 | | |
306 | | /** |
307 | | * Check variable found |
308 | | * @param parser |
309 | | * @param ptr |
310 | | * @param remain |
311 | | * @param out_len |
312 | | * @param strict |
313 | | * @param found |
314 | | * @return |
315 | | */ |
316 | | static inline const char * |
317 | | ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain, |
318 | | size_t *out_len, bool strict, bool *found) |
319 | 4.64M | { |
320 | 4.64M | struct ucl_variable *var; |
321 | 4.64M | unsigned char *dst; |
322 | 4.64M | size_t dstlen; |
323 | 4.64M | bool need_free = false; |
324 | | |
325 | 9.28M | LL_FOREACH (parser->variables, var) { |
326 | 9.28M | if (strict) { |
327 | 2.84M | if (remain == var->var_len) { |
328 | 21.9k | if (memcmp (ptr, var->var, var->var_len) == 0) { |
329 | 18.4k | *out_len += var->value_len; |
330 | 18.4k | *found = true; |
331 | 18.4k | return (ptr + var->var_len); |
332 | 18.4k | } |
333 | 21.9k | } |
334 | 2.84M | } |
335 | 6.43M | else { |
336 | 6.43M | if (remain >= var->var_len) { |
337 | 6.43M | if (memcmp (ptr, var->var, var->var_len) == 0) { |
338 | 2.82M | *out_len += var->value_len; |
339 | 2.82M | *found = true; |
340 | 2.82M | return (ptr + var->var_len); |
341 | 2.82M | } |
342 | 6.43M | } |
343 | 6.43M | } |
344 | 9.28M | } |
345 | | |
346 | | /* XXX: can only handle ${VAR} */ |
347 | 1.79M | if (!(*found) && parser->var_handler != NULL && strict) { |
348 | | /* Call generic handler */ |
349 | 0 | if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free, |
350 | 0 | parser->var_data)) { |
351 | 0 | *found = true; |
352 | 0 | if (need_free) { |
353 | 0 | free (dst); |
354 | 0 | } |
355 | 0 | return (ptr + remain); |
356 | 0 | } |
357 | 0 | } |
358 | | |
359 | 1.79M | return ptr; |
360 | 1.79M | } |
361 | | |
362 | | /** |
363 | | * Check for a variable in a given string |
364 | | * @param parser |
365 | | * @param ptr |
366 | | * @param remain |
367 | | * @param out_len |
368 | | * @param vars_found |
369 | | * @return |
370 | | */ |
371 | | static const char * |
372 | | ucl_check_variable (struct ucl_parser *parser, const char *ptr, |
373 | | size_t remain, size_t *out_len, bool *vars_found) |
374 | 4.71M | { |
375 | 4.71M | const char *p, *end, *ret = ptr; |
376 | 4.71M | bool found = false; |
377 | | |
378 | 4.71M | if (*ptr == '{') { |
379 | | /* We need to match the variable enclosed in braces */ |
380 | 1.42M | p = ptr + 1; |
381 | 1.42M | end = ptr + remain; |
382 | 2.74G | while (p < end) { |
383 | 2.74G | if (*p == '}') { |
384 | 1.42M | ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, |
385 | 1.42M | out_len, true, &found); |
386 | 1.42M | if (found) { |
387 | | /* {} must be excluded actually */ |
388 | 18.4k | ret ++; |
389 | 18.4k | if (!*vars_found) { |
390 | 19 | *vars_found = true; |
391 | 19 | } |
392 | 18.4k | } |
393 | 1.40M | else { |
394 | 1.40M | *out_len += 2; |
395 | 1.40M | } |
396 | 1.42M | break; |
397 | 1.42M | } |
398 | 2.74G | p ++; |
399 | 2.74G | } |
400 | 1.42M | if(p == end) { |
401 | 5.07k | (*out_len) ++; |
402 | 5.07k | } |
403 | 1.42M | } |
404 | 3.28M | else if (*ptr != '$') { |
405 | | /* Not count escaped dollar sign */ |
406 | 3.21M | ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found); |
407 | 3.21M | if (found && !*vars_found) { |
408 | 379 | *vars_found = true; |
409 | 379 | } |
410 | 3.21M | if (!found) { |
411 | 388k | (*out_len) ++; |
412 | 388k | } |
413 | 3.21M | } |
414 | 69.9k | else { |
415 | 69.9k | ret ++; |
416 | 69.9k | (*out_len) ++; |
417 | 69.9k | } |
418 | | |
419 | 4.71M | return ret; |
420 | 4.71M | } |
421 | | |
422 | | /** |
423 | | * Expand a single variable |
424 | | * @param parser |
425 | | * @param ptr |
426 | | * @param in_len |
427 | | * @param dest |
428 | | * @param out_len |
429 | | * @return |
430 | | */ |
431 | | static const char * |
432 | | ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr, |
433 | | size_t in_len, unsigned char **dest, size_t out_len) |
434 | 4.69M | { |
435 | 4.69M | unsigned char *d = *dest, *dst; |
436 | 4.69M | const char *p = ptr + 1, *ret; |
437 | 4.69M | struct ucl_variable *var; |
438 | 4.69M | size_t dstlen; |
439 | 4.69M | bool need_free = false; |
440 | 4.69M | bool found = false; |
441 | 4.69M | bool strict = false; |
442 | | |
443 | 4.69M | ret = ptr + 1; |
444 | | /* For the $ sign */ |
445 | 4.69M | in_len --; |
446 | | |
447 | 4.69M | if (*p == '$') { |
448 | 69.3k | *d++ = *p++; |
449 | 69.3k | *dest = d; |
450 | 69.3k | return p; |
451 | 69.3k | } |
452 | 4.62M | else if (*p == '{') { |
453 | 1.42M | p ++; |
454 | 1.42M | in_len --; |
455 | 1.42M | strict = true; |
456 | 1.42M | ret += 2; |
457 | 1.42M | } |
458 | | |
459 | 9.25M | LL_FOREACH (parser->variables, var) { |
460 | 9.25M | if (out_len >= var->value_len && in_len >= (var->var_len + (strict ? 1 : 0))) { |
461 | 9.25M | if (memcmp (p, var->var, var->var_len) == 0) { |
462 | 2.87M | if (!strict || p[var->var_len] == '}') { |
463 | 2.84M | memcpy (d, var->value, var->value_len); |
464 | 2.84M | ret += var->var_len; |
465 | 2.84M | d += var->value_len; |
466 | 2.84M | found = true; |
467 | 2.84M | break; |
468 | 2.84M | } |
469 | 2.87M | } |
470 | 9.25M | } |
471 | 9.25M | } |
472 | | |
473 | 4.62M | if (!found) { |
474 | 1.78M | if (strict && parser->var_handler != NULL) { |
475 | 0 | dstlen = out_len; |
476 | |
|
477 | 0 | if (parser->var_handler (p, in_len, &dst, &dstlen, &need_free, |
478 | 0 | parser->var_data)) { |
479 | 0 | if (dstlen > out_len) { |
480 | | /* We do not have enough space! */ |
481 | 0 | if (need_free) { |
482 | 0 | free (dst); |
483 | 0 | } |
484 | 0 | } |
485 | 0 | else { |
486 | 0 | memcpy(d, dst, dstlen); |
487 | 0 | ret += in_len; |
488 | 0 | d += dstlen; |
489 | 0 | found = true; |
490 | |
|
491 | 0 | if (need_free) { |
492 | 0 | free(dst); |
493 | 0 | } |
494 | 0 | } |
495 | 0 | } |
496 | 0 | } |
497 | | |
498 | | /* Leave variable as is, in this case we use dest */ |
499 | 1.78M | if (!found) { |
500 | 1.78M | if (strict && out_len >= 2) { |
501 | | /* Copy '${' */ |
502 | 1.40M | memcpy (d, ptr, 2); |
503 | 1.40M | d += 2; |
504 | 1.40M | ret --; |
505 | 1.40M | } |
506 | 378k | else { |
507 | 378k | memcpy (d, ptr, 1); |
508 | 378k | d ++; |
509 | 378k | } |
510 | 1.78M | } |
511 | 1.78M | } |
512 | | |
513 | 4.62M | *dest = d; |
514 | 4.62M | return ret; |
515 | 4.69M | } |
516 | | |
517 | | /** |
518 | | * Expand variables in string |
519 | | * @param parser |
520 | | * @param dst |
521 | | * @param src |
522 | | * @param in_len |
523 | | * @return |
524 | | */ |
525 | | static ssize_t |
526 | | ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst, |
527 | | const char *src, size_t in_len) |
528 | 16.4k | { |
529 | 16.4k | const char *p, *end = src + in_len; |
530 | 16.4k | unsigned char *d, *d_end; |
531 | 16.4k | size_t out_len = 0; |
532 | 16.4k | bool vars_found = false; |
533 | | |
534 | 16.4k | if (parser->flags & UCL_PARSER_DISABLE_MACRO) { |
535 | 0 | *dst = NULL; |
536 | 0 | return in_len; |
537 | 0 | } |
538 | | |
539 | 16.4k | p = src; |
540 | 11.4M | while (p != end) { |
541 | 11.4M | if (*p == '$' && p + 1 != end) { |
542 | 4.71M | p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found); |
543 | 4.71M | } |
544 | 6.75M | else { |
545 | 6.75M | p ++; |
546 | 6.75M | out_len ++; |
547 | 6.75M | } |
548 | 11.4M | } |
549 | | |
550 | 16.4k | if (!vars_found) { |
551 | | /* Trivial case */ |
552 | 16.0k | *dst = NULL; |
553 | 16.0k | return in_len; |
554 | 16.0k | } |
555 | | |
556 | 398 | *dst = UCL_ALLOC (out_len + 1); |
557 | 398 | if (*dst == NULL) { |
558 | 0 | return in_len; |
559 | 0 | } |
560 | | |
561 | 398 | d = *dst; |
562 | 398 | d_end = d + out_len; |
563 | 398 | p = src; |
564 | 11.1M | while (p != end && d != d_end) { |
565 | 11.1M | if (*p == '$' && p + 1 != end) { |
566 | 4.69M | p = ucl_expand_single_variable (parser, p, end - p, &d, d_end - d); |
567 | 4.69M | } |
568 | 6.42M | else { |
569 | 6.42M | *d++ = *p++; |
570 | 6.42M | } |
571 | 11.1M | } |
572 | | |
573 | 398 | *d = '\0'; |
574 | | |
575 | 398 | return out_len; |
576 | 398 | } |
577 | | |
578 | | /** |
579 | | * Store or copy pointer to the trash stack |
580 | | * @param parser parser object |
581 | | * @param src src string |
582 | | * @param dst destination buffer (trash stack pointer) |
583 | | * @param dst_const const destination pointer (e.g. value of object) |
584 | | * @param in_len input length |
585 | | * @param need_unescape need to unescape source (and copy it) |
586 | | * @param need_lowercase need to lowercase value (and copy) |
587 | | * @param need_expand need to expand variables (and copy as well) |
588 | | * @param unescape_squote unescape single quoted string |
589 | | * @return output length (excluding \0 symbol) |
590 | | */ |
591 | | static inline ssize_t |
592 | | ucl_copy_or_store_ptr (struct ucl_parser *parser, |
593 | | const unsigned char *src, unsigned char **dst, |
594 | | const char **dst_const, size_t in_len, |
595 | | bool need_unescape, bool need_lowercase, bool need_expand, |
596 | | bool unescape_squote) |
597 | 825k | { |
598 | 825k | ssize_t ret = -1, tret; |
599 | 825k | unsigned char *tmp; |
600 | | |
601 | 825k | if (need_unescape || need_lowercase || |
602 | 825k | (need_expand && parser->variables != NULL) || |
603 | 825k | !(parser->flags & UCL_PARSER_ZEROCOPY)) { |
604 | | /* Copy string */ |
605 | 825k | *dst = UCL_ALLOC (in_len + 1); |
606 | 825k | if (*dst == NULL) { |
607 | 0 | ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for a string", |
608 | 0 | &parser->err); |
609 | 0 | return false; |
610 | 0 | } |
611 | 825k | if (need_lowercase) { |
612 | 0 | ret = ucl_strlcpy_tolower (*dst, src, in_len + 1); |
613 | 0 | } |
614 | 825k | else { |
615 | 825k | ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1); |
616 | 825k | } |
617 | | |
618 | 825k | if (need_unescape) { |
619 | 1.71k | if (!unescape_squote) { |
620 | 1.69k | ret = ucl_unescape_json_string (*dst, ret); |
621 | 1.69k | } |
622 | 20 | else { |
623 | 20 | ret = ucl_unescape_squoted_string (*dst, ret); |
624 | 20 | } |
625 | 1.71k | } |
626 | | |
627 | 825k | if (need_expand) { |
628 | 693 | tmp = *dst; |
629 | 693 | tret = ret; |
630 | 693 | ret = ucl_expand_variable (parser, dst, tmp, ret); |
631 | 693 | if (*dst == NULL) { |
632 | | /* Nothing to expand */ |
633 | 578 | *dst = tmp; |
634 | 578 | ret = tret; |
635 | 578 | } |
636 | 115 | else { |
637 | | /* Free unexpanded value */ |
638 | 115 | UCL_FREE (in_len + 1, tmp); |
639 | 115 | } |
640 | 693 | } |
641 | 825k | *dst_const = *dst; |
642 | 825k | } |
643 | 0 | else { |
644 | 0 | *dst_const = src; |
645 | 0 | ret = in_len; |
646 | 0 | } |
647 | | |
648 | 825k | return ret; |
649 | 825k | } |
650 | | |
651 | | /** |
652 | | * Create and append an object at the specified level |
653 | | * @param parser |
654 | | * @param is_array |
655 | | * @param level |
656 | | * @return |
657 | | */ |
658 | | static inline ucl_object_t * |
659 | | ucl_parser_add_container (ucl_object_t *obj, struct ucl_parser *parser, |
660 | | bool is_array, uint32_t level, bool has_obrace) |
661 | 126k | { |
662 | 126k | struct ucl_stack *st; |
663 | 126k | ucl_object_t *nobj; |
664 | | |
665 | 126k | if (obj == NULL) { |
666 | 16.2k | nobj = ucl_object_new_full (is_array ? UCL_ARRAY : UCL_OBJECT, parser->chunks->priority); |
667 | 16.2k | if (nobj == NULL) { |
668 | 0 | goto enomem0; |
669 | 0 | } |
670 | 110k | } else { |
671 | 110k | if (obj->type == (is_array ? UCL_OBJECT : UCL_ARRAY)) { |
672 | | /* Bad combination for merge: array and object */ |
673 | 0 | ucl_set_err (parser, UCL_EMERGE, |
674 | 0 | "cannot merge an object with an array", |
675 | 0 | &parser->err); |
676 | |
|
677 | 0 | return NULL; |
678 | 0 | } |
679 | 110k | nobj = obj; |
680 | 110k | nobj->type = is_array ? UCL_ARRAY : UCL_OBJECT; |
681 | 110k | } |
682 | | |
683 | 126k | if (!is_array) { |
684 | 125k | if (nobj->value.ov == NULL) { |
685 | 125k | nobj->value.ov = ucl_hash_create (parser->flags & UCL_PARSER_KEY_LOWERCASE); |
686 | 125k | if (nobj->value.ov == NULL) { |
687 | 0 | goto enomem1; |
688 | 0 | } |
689 | 125k | } |
690 | 125k | parser->state = UCL_STATE_KEY; |
691 | 125k | } else { |
692 | 684 | parser->state = UCL_STATE_VALUE; |
693 | 684 | } |
694 | | |
695 | 126k | st = UCL_ALLOC (sizeof (struct ucl_stack)); |
696 | | |
697 | 126k | if (st == NULL) { |
698 | 0 | goto enomem1; |
699 | 0 | } |
700 | | |
701 | 126k | st->obj = nobj; |
702 | | |
703 | 126k | if (level >= UINT16_MAX) { |
704 | 1 | ucl_set_err (parser, UCL_ENESTED, |
705 | 1 | "objects are nesting too deep (over 65535 limit)", |
706 | 1 | &parser->err); |
707 | 1 | if (nobj != obj) { |
708 | 0 | ucl_object_unref (obj); |
709 | 0 | } |
710 | | |
711 | 1 | UCL_FREE(sizeof (struct ucl_stack), st); |
712 | | |
713 | 1 | return NULL; |
714 | 1 | } |
715 | | |
716 | | |
717 | 126k | st->e.params.level = level; |
718 | 126k | st->e.params.line = parser->chunks->line; |
719 | 126k | st->chunk = parser->chunks; |
720 | | |
721 | 126k | if (has_obrace) { |
722 | 1.26k | st->e.params.flags = UCL_STACK_HAS_OBRACE; |
723 | 1.26k | } |
724 | 125k | else { |
725 | 125k | st->e.params.flags = 0; |
726 | 125k | } |
727 | | |
728 | 126k | LL_PREPEND (parser->stack, st); |
729 | 126k | parser->cur_obj = nobj; |
730 | | |
731 | 126k | return nobj; |
732 | 0 | enomem1: |
733 | 0 | if (nobj != obj) |
734 | 0 | ucl_object_unref (nobj); |
735 | 0 | enomem0: |
736 | 0 | ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for an object", |
737 | 0 | &parser->err); |
738 | 0 | return NULL; |
739 | 0 | } |
740 | | |
741 | | int |
742 | | ucl_maybe_parse_number (ucl_object_t *obj, |
743 | | const char *start, const char *end, const char **pos, |
744 | | bool allow_double, bool number_bytes, bool allow_time) |
745 | 1.20M | { |
746 | 1.20M | const char *p = start, *c = start; |
747 | 1.20M | char *endptr; |
748 | 1.20M | bool got_dot = false, got_exp = false, need_double = false, |
749 | 1.20M | is_time = false, valid_start = false, is_hex = false; |
750 | 1.20M | int is_neg = 0; |
751 | 1.20M | double dv = 0; |
752 | 1.20M | int64_t lv = 0; |
753 | | |
754 | 1.20M | if (*p == '-') { |
755 | 1.11k | is_neg = 1; |
756 | 1.11k | c ++; |
757 | 1.11k | p ++; |
758 | 1.11k | } |
759 | 2.71M | while (p < end) { |
760 | 2.71M | if (is_hex && isxdigit (*p)) { |
761 | 8 | p ++; |
762 | 8 | } |
763 | 2.71M | else if (isdigit (*p)) { |
764 | 1.50M | valid_start = true; |
765 | 1.50M | p ++; |
766 | 1.50M | } |
767 | 1.21M | else if (!is_hex && (*p == 'x' || *p == 'X')) { |
768 | 297 | is_hex = true; |
769 | 297 | allow_double = false; |
770 | 297 | c = p + 1; |
771 | 297 | p ++; |
772 | 297 | } |
773 | 1.21M | else if (allow_double) { |
774 | 1.20M | if (p == c) { |
775 | | /* Empty digits sequence, not a number */ |
776 | 39 | *pos = start; |
777 | 39 | return EINVAL; |
778 | 39 | } |
779 | 1.20M | else if (*p == '.') { |
780 | 1.63k | if (got_dot) { |
781 | | /* Double dots, not a number */ |
782 | 0 | *pos = start; |
783 | 0 | return EINVAL; |
784 | 0 | } |
785 | 1.63k | else { |
786 | 1.63k | got_dot = true; |
787 | 1.63k | need_double = true; |
788 | 1.63k | p ++; |
789 | 1.63k | } |
790 | 1.63k | } |
791 | 1.20M | else if (*p == 'e' || *p == 'E') { |
792 | 342 | if (got_exp) { |
793 | | /* Double exp, not a number */ |
794 | 0 | *pos = start; |
795 | 0 | return EINVAL; |
796 | 0 | } |
797 | 342 | else { |
798 | 342 | got_exp = true; |
799 | 342 | need_double = true; |
800 | 342 | p ++; |
801 | 342 | if (p >= end) { |
802 | 264 | *pos = start; |
803 | 264 | return EINVAL; |
804 | 264 | } |
805 | 78 | if (!isdigit (*p) && *p != '+' && *p != '-') { |
806 | | /* Wrong exponent sign */ |
807 | 75 | *pos = start; |
808 | 75 | return EINVAL; |
809 | 75 | } |
810 | 3 | else { |
811 | 3 | p ++; |
812 | 3 | } |
813 | 78 | } |
814 | 342 | } |
815 | 1.20M | else { |
816 | | /* Got the end of the number, need to check */ |
817 | 1.20M | break; |
818 | 1.20M | } |
819 | 1.20M | } |
820 | 297 | else if (!allow_double && *p == '.') { |
821 | | /* Unexpected dot */ |
822 | 144 | *pos = start; |
823 | 144 | return EINVAL; |
824 | 144 | } |
825 | 153 | else { |
826 | 153 | break; |
827 | 153 | } |
828 | 2.71M | } |
829 | | |
830 | 1.20M | if (!valid_start || p == c) { |
831 | 145 | *pos = start; |
832 | 145 | return EINVAL; |
833 | 145 | } |
834 | | |
835 | 1.20M | char numbuf[128]; |
836 | | |
837 | 1.20M | if ((size_t)(p - c + 1) >= sizeof(numbuf)) { |
838 | 1 | *pos = start; |
839 | 1 | return EINVAL; |
840 | 1 | } |
841 | | |
842 | 1.20M | if (is_neg) { |
843 | 1.07k | numbuf[0] = '-'; |
844 | 1.07k | ucl_strlcpy (&numbuf[1], c, p - c + 1); |
845 | 1.07k | } |
846 | 1.20M | else { |
847 | 1.20M | ucl_strlcpy (numbuf, c, p - c + 1); |
848 | 1.20M | } |
849 | | |
850 | 1.20M | errno = 0; |
851 | 1.20M | if (need_double) { |
852 | 1.64k | dv = strtod (numbuf, &endptr); |
853 | 1.64k | } |
854 | 1.20M | else { |
855 | 1.20M | if (is_hex) { |
856 | 8 | lv = strtoimax (numbuf, &endptr, 16); |
857 | 8 | } |
858 | 1.20M | else { |
859 | 1.20M | lv = strtoimax (numbuf, &endptr, 10); |
860 | 1.20M | } |
861 | 1.20M | } |
862 | 1.20M | if (errno == ERANGE) { |
863 | 113 | *pos = start; |
864 | 113 | return ERANGE; |
865 | 113 | } |
866 | | |
867 | | /* Now check endptr and move it from numbuf to the real ending */ |
868 | 1.20M | if (endptr != NULL) { |
869 | 1.20M | long shift = endptr - numbuf - is_neg; |
870 | 1.20M | endptr = (char *)c + shift; |
871 | 1.20M | } |
872 | 1.20M | if (endptr >= end) { |
873 | 671 | p = end; |
874 | 671 | goto set_obj; |
875 | 671 | } |
876 | 1.20M | if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') { |
877 | 1.20M | p = endptr; |
878 | 1.20M | goto set_obj; |
879 | 1.20M | } |
880 | | |
881 | 6.34k | if (endptr < end && endptr != start) { |
882 | 6.34k | switch (*p) { |
883 | 423 | case 'm': |
884 | 732 | case 'M': |
885 | 1.13k | case 'g': |
886 | 1.13k | case 'G': |
887 | 1.17k | case 'k': |
888 | 1.17k | case 'K': |
889 | 1.17k | if (end - p >= 2) { |
890 | 957 | if (p[1] == 's' || p[1] == 'S') { |
891 | | /* Milliseconds */ |
892 | 44 | if (!need_double) { |
893 | 44 | need_double = true; |
894 | 44 | dv = lv; |
895 | 44 | } |
896 | 44 | is_time = true; |
897 | 44 | if (p[0] == 'm' || p[0] == 'M') { |
898 | 44 | dv /= 1000.; |
899 | 44 | } |
900 | 0 | else { |
901 | 0 | dv *= ucl_lex_num_multiplier (*p, false); |
902 | 0 | } |
903 | 44 | p += 2; |
904 | 44 | if (end - p > 0 && !ucl_lex_is_atom_end (*p)) { |
905 | 9 | *pos = start; |
906 | 9 | return EINVAL; |
907 | 9 | } |
908 | 35 | goto set_obj; |
909 | 44 | } |
910 | 913 | else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) { |
911 | | /* Bytes */ |
912 | 86 | if (need_double) { |
913 | 8 | need_double = false; |
914 | 8 | lv = dv; |
915 | 8 | } |
916 | 86 | lv *= ucl_lex_num_multiplier (*p, true); |
917 | 86 | p += 2; |
918 | 86 | if (end - p > 0 && !ucl_lex_is_atom_end (*p)) { |
919 | 22 | *pos = start; |
920 | 22 | return EINVAL; |
921 | 22 | } |
922 | 64 | goto set_obj; |
923 | 86 | } |
924 | 827 | else if (ucl_lex_is_atom_end (p[1])) { |
925 | 131 | if (need_double) { |
926 | 0 | dv *= ucl_lex_num_multiplier (*p, false); |
927 | 0 | } |
928 | 131 | else { |
929 | 131 | lv *= ucl_lex_num_multiplier (*p, number_bytes); |
930 | 131 | } |
931 | 131 | p ++; |
932 | 131 | goto set_obj; |
933 | 131 | } |
934 | 696 | else if (allow_time && end - p >= 3) { |
935 | 696 | if (tolower (p[0]) == 'm' && |
936 | 696 | tolower (p[1]) == 'i' && |
937 | 696 | tolower (p[2]) == 'n') { |
938 | | /* Minutes */ |
939 | 65 | if (!need_double) { |
940 | 3 | need_double = true; |
941 | 3 | dv = lv; |
942 | 3 | } |
943 | 65 | is_time = true; |
944 | 65 | dv *= 60.; |
945 | 65 | p += 3; |
946 | 65 | if (end - p > 0 && !ucl_lex_is_atom_end (*p)) { |
947 | 26 | *pos = start; |
948 | 26 | return EINVAL; |
949 | 26 | } |
950 | 39 | goto set_obj; |
951 | 65 | } |
952 | 696 | } |
953 | 957 | } |
954 | 218 | else { |
955 | 218 | if (need_double) { |
956 | 1 | dv *= ucl_lex_num_multiplier (*p, false); |
957 | 1 | } |
958 | 217 | else { |
959 | 217 | lv *= ucl_lex_num_multiplier (*p, number_bytes); |
960 | 217 | } |
961 | 218 | p ++; |
962 | 218 | if (end - p > 0 && !ucl_lex_is_atom_end (*p)) { |
963 | 0 | *pos = start; |
964 | 0 | return EINVAL; |
965 | 0 | } |
966 | 218 | goto set_obj; |
967 | 218 | } |
968 | 631 | break; |
969 | 631 | case 'S': |
970 | 810 | case 's': |
971 | 810 | if (allow_time && |
972 | 810 | (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { |
973 | 691 | if (!need_double) { |
974 | 0 | need_double = true; |
975 | 0 | dv = lv; |
976 | 0 | } |
977 | 691 | p ++; |
978 | 691 | is_time = true; |
979 | 691 | goto set_obj; |
980 | 691 | } |
981 | 119 | break; |
982 | 119 | case 'h': |
983 | 16 | case 'H': |
984 | 18 | case 'd': |
985 | 18 | case 'D': |
986 | 20 | case 'w': |
987 | 20 | case 'W': |
988 | 213 | case 'Y': |
989 | 314 | case 'y': |
990 | 314 | if (allow_time && |
991 | 314 | (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { |
992 | 270 | if (!need_double) { |
993 | 270 | need_double = true; |
994 | 270 | dv = lv; |
995 | 270 | } |
996 | 270 | is_time = true; |
997 | 270 | dv *= ucl_lex_time_multiplier (*p); |
998 | 270 | p ++; |
999 | 270 | goto set_obj; |
1000 | 270 | } |
1001 | 44 | break; |
1002 | 143 | case '\t': |
1003 | 149 | case ' ': |
1004 | 306 | while (p < end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) { |
1005 | 157 | p++; |
1006 | 157 | } |
1007 | 149 | if (ucl_lex_is_atom_end(*p)) |
1008 | 8 | goto set_obj; |
1009 | 141 | break; |
1010 | 6.34k | } |
1011 | 6.34k | } |
1012 | 0 | else if (endptr == end) { |
1013 | | /* Just a number at the end of chunk */ |
1014 | 0 | p = end; |
1015 | 0 | goto set_obj; |
1016 | 0 | } |
1017 | | |
1018 | 4.83k | *pos = c; |
1019 | 4.83k | return EINVAL; |
1020 | | |
1021 | 1.20M | set_obj: |
1022 | 1.20M | if (obj != NULL) { |
1023 | 1.20M | if (allow_double && (need_double || is_time)) { |
1024 | 1.69k | if (!is_time) { |
1025 | 657 | obj->type = UCL_FLOAT; |
1026 | 657 | } |
1027 | 1.03k | else { |
1028 | 1.03k | obj->type = UCL_TIME; |
1029 | 1.03k | } |
1030 | 1.69k | obj->value.dv = dv; |
1031 | 1.69k | } |
1032 | 1.20M | else { |
1033 | 1.20M | obj->type = UCL_INT; |
1034 | 1.20M | obj->value.iv = lv; |
1035 | 1.20M | } |
1036 | 1.20M | } |
1037 | 1.20M | *pos = p; |
1038 | 1.20M | return 0; |
1039 | 6.34k | } |
1040 | | |
1041 | | /** |
1042 | | * Parse possible number |
1043 | | * @param parser |
1044 | | * @param chunk |
1045 | | * @param obj |
1046 | | * @return true if a number has been parsed |
1047 | | */ |
1048 | | static bool |
1049 | | ucl_lex_number (struct ucl_parser *parser, |
1050 | | struct ucl_chunk *chunk, ucl_object_t *obj) |
1051 | 1.20M | { |
1052 | 1.20M | const unsigned char *pos; |
1053 | 1.20M | int ret; |
1054 | | |
1055 | 1.20M | ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos, |
1056 | 1.20M | true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0)); |
1057 | | |
1058 | 1.20M | if (ret == 0) { |
1059 | 1.20M | chunk->remain -= pos - chunk->pos; |
1060 | 1.20M | chunk->column += pos - chunk->pos; |
1061 | 1.20M | chunk->pos = pos; |
1062 | 1.20M | return true; |
1063 | 1.20M | } |
1064 | 5.67k | else if (ret == ERANGE) { |
1065 | 113 | ucl_set_err (parser, UCL_ESYNTAX, "numeric value out of range", |
1066 | 113 | &parser->err); |
1067 | 113 | } |
1068 | | |
1069 | 5.67k | return false; |
1070 | 1.20M | } |
1071 | | |
1072 | | /** |
1073 | | * Parse quoted string with possible escapes |
1074 | | * @param parser |
1075 | | * @param chunk |
1076 | | * @param need_unescape |
1077 | | * @param ucl_escape |
1078 | | * @param var_expand |
1079 | | * @return true if a string has been parsed |
1080 | | */ |
1081 | | static bool |
1082 | | ucl_lex_json_string (struct ucl_parser *parser, |
1083 | | struct ucl_chunk *chunk, |
1084 | | bool *need_unescape, |
1085 | | bool *ucl_escape, |
1086 | | bool *var_expand) |
1087 | 5.17k | { |
1088 | 5.17k | const unsigned char *p = chunk->pos; |
1089 | 5.17k | unsigned char c; |
1090 | 5.17k | int i; |
1091 | | |
1092 | 301k | while (p < chunk->end) { |
1093 | 300k | c = *p; |
1094 | 300k | if (c < 0x1F) { |
1095 | | /* Unmasked control character */ |
1096 | 52 | if (c == '\n') { |
1097 | 6 | ucl_set_err (parser, UCL_ESYNTAX, "unexpected newline", |
1098 | 6 | &parser->err); |
1099 | 6 | } |
1100 | 46 | else { |
1101 | 46 | ucl_set_err (parser, UCL_ESYNTAX, "unexpected control character", |
1102 | 46 | &parser->err); |
1103 | 46 | } |
1104 | 52 | return false; |
1105 | 52 | } |
1106 | 300k | else if (c == '\\') { |
1107 | 7 | ucl_chunk_skipc (chunk, p); |
1108 | 7 | if (p >= chunk->end) { |
1109 | 1 | ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character", |
1110 | 1 | &parser->err); |
1111 | 1 | return false; |
1112 | 1 | } |
1113 | 6 | c = *p; |
1114 | 6 | if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) { |
1115 | 1 | if (c == 'u') { |
1116 | 1 | ucl_chunk_skipc (chunk, p); |
1117 | 1 | for (i = 0; i < 4 && p < chunk->end; i ++) { |
1118 | 1 | if (!isxdigit (*p)) { |
1119 | 1 | ucl_set_err (parser, UCL_ESYNTAX, "invalid utf escape", |
1120 | 1 | &parser->err); |
1121 | 1 | return false; |
1122 | 1 | } |
1123 | 0 | ucl_chunk_skipc (chunk, p); |
1124 | 0 | } |
1125 | 0 | if (p >= chunk->end) { |
1126 | 0 | ucl_set_err (parser, UCL_ESYNTAX, |
1127 | 0 | "unfinished escape character", |
1128 | 0 | &parser->err); |
1129 | 0 | return false; |
1130 | 0 | } |
1131 | 0 | } |
1132 | 0 | else { |
1133 | 0 | ucl_chunk_skipc (chunk, p); |
1134 | 0 | } |
1135 | 1 | } |
1136 | 5 | *need_unescape = true; |
1137 | 5 | *ucl_escape = true; |
1138 | 5 | continue; |
1139 | 6 | } |
1140 | 300k | else if (c == '"') { |
1141 | 4.46k | ucl_chunk_skipc (chunk, p); |
1142 | 0 | return true; |
1143 | 4.46k | } |
1144 | 296k | else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) { |
1145 | 29.0k | *ucl_escape = true; |
1146 | 29.0k | } |
1147 | 267k | else if (c == '$') { |
1148 | 29.1k | *var_expand = true; |
1149 | 29.1k | } |
1150 | 296k | ucl_chunk_skipc (chunk, p); |
1151 | 296k | } |
1152 | | |
1153 | 662 | ucl_set_err (parser, UCL_ESYNTAX, |
1154 | 662 | "no quote at the end of json string", |
1155 | 662 | &parser->err); |
1156 | 662 | return false; |
1157 | 5.17k | } |
1158 | | |
1159 | | /** |
1160 | | * Process single quoted string |
1161 | | * @param parser |
1162 | | * @param chunk |
1163 | | * @param need_unescape |
1164 | | * @return |
1165 | | */ |
1166 | | static bool |
1167 | | ucl_lex_squoted_string (struct ucl_parser *parser, |
1168 | | struct ucl_chunk *chunk, bool *need_unescape) |
1169 | 36 | { |
1170 | 36 | const unsigned char *p = chunk->pos; |
1171 | 36 | unsigned char c; |
1172 | | |
1173 | 3.21M | while (p < chunk->end) { |
1174 | 3.21M | c = *p; |
1175 | 3.21M | if (c == '\\') { |
1176 | 210k | ucl_chunk_skipc (chunk, p); |
1177 | | |
1178 | 210k | if (p >= chunk->end) { |
1179 | 3 | ucl_set_err (parser, UCL_ESYNTAX, |
1180 | 3 | "unfinished escape character", |
1181 | 3 | &parser->err); |
1182 | 3 | return false; |
1183 | 3 | } |
1184 | 210k | else { |
1185 | 210k | ucl_chunk_skipc (chunk, p); |
1186 | 210k | } |
1187 | | |
1188 | 210k | *need_unescape = true; |
1189 | 210k | continue; |
1190 | 210k | } |
1191 | 2.99M | else if (c == '\'') { |
1192 | 20 | ucl_chunk_skipc (chunk, p); |
1193 | 0 | return true; |
1194 | 20 | } |
1195 | | |
1196 | 2.99M | ucl_chunk_skipc (chunk, p); |
1197 | 2.99M | } |
1198 | | |
1199 | 13 | ucl_set_err (parser, UCL_ESYNTAX, |
1200 | 13 | "no quote at the end of single quoted string", |
1201 | 13 | &parser->err); |
1202 | 13 | return false; |
1203 | 36 | } |
1204 | | |
1205 | | static void |
1206 | | ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont, |
1207 | | ucl_object_t *top, |
1208 | | ucl_object_t *elt) |
1209 | 7.24k | { |
1210 | 7.24k | ucl_object_t *nobj; |
1211 | | |
1212 | 7.24k | if ((parser->flags & UCL_PARSER_NO_IMPLICIT_ARRAYS) == 0) { |
1213 | | /* Implicit array */ |
1214 | 7.24k | top->flags |= UCL_OBJECT_MULTIVALUE; |
1215 | 7.24k | DL_APPEND (top, elt); |
1216 | 7.24k | parser->stack->obj->len ++; |
1217 | 7.24k | } |
1218 | 0 | else { |
1219 | 0 | if ((top->flags & UCL_OBJECT_MULTIVALUE) != 0) { |
1220 | | /* Just add to the explicit array */ |
1221 | 0 | ucl_array_append (top, elt); |
1222 | 0 | } |
1223 | 0 | else { |
1224 | | /* Convert to an array */ |
1225 | 0 | nobj = ucl_object_typed_new (UCL_ARRAY); |
1226 | 0 | nobj->key = top->key; |
1227 | 0 | nobj->keylen = top->keylen; |
1228 | 0 | nobj->flags |= UCL_OBJECT_MULTIVALUE; |
1229 | 0 | ucl_array_append (nobj, top); |
1230 | 0 | ucl_array_append (nobj, elt); |
1231 | 0 | ucl_hash_replace (cont, top, nobj); |
1232 | 0 | } |
1233 | 0 | } |
1234 | 7.24k | } |
1235 | | |
1236 | | bool |
1237 | | ucl_parser_process_object_element (struct ucl_parser *parser, ucl_object_t *nobj) |
1238 | 158k | { |
1239 | 158k | ucl_hash_t *container; |
1240 | 158k | ucl_object_t *tobj = NULL, *cur; |
1241 | 158k | char errmsg[256]; |
1242 | | |
1243 | 158k | container = parser->stack->obj->value.ov; |
1244 | | |
1245 | 158k | DL_FOREACH (parser->stack->obj, cur) { |
1246 | 158k | tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (cur->value.ov, nobj)); |
1247 | | |
1248 | 158k | if (tobj != NULL) { |
1249 | 11.4k | break; |
1250 | 11.4k | } |
1251 | 158k | } |
1252 | | |
1253 | | |
1254 | 158k | if (tobj == NULL) { |
1255 | 147k | container = ucl_hash_insert_object (container, nobj, |
1256 | 147k | parser->flags & UCL_PARSER_KEY_LOWERCASE); |
1257 | 147k | if (container == NULL) { |
1258 | 0 | return false; |
1259 | 0 | } |
1260 | 147k | nobj->prev = nobj; |
1261 | 147k | nobj->next = NULL; |
1262 | 147k | parser->stack->obj->len ++; |
1263 | 147k | } |
1264 | 11.4k | else { |
1265 | 11.4k | unsigned priold = ucl_object_get_priority (tobj), |
1266 | 11.4k | prinew = ucl_object_get_priority (nobj); |
1267 | 11.4k | switch (parser->chunks->strategy) { |
1268 | | |
1269 | 11.4k | case UCL_DUPLICATE_APPEND: |
1270 | | /* |
1271 | | * The logic here is the following: |
1272 | | * |
1273 | | * - if we have two objects with the same priority, then we form an |
1274 | | * implicit or explicit array |
1275 | | * - if a new object has bigger priority, then we overwrite an old one |
1276 | | * - if a new object has lower priority, then we ignore it |
1277 | | */ |
1278 | | /* Special case for inherited objects */ |
1279 | 11.4k | if (tobj->flags & UCL_OBJECT_INHERITED) { |
1280 | 5 | prinew = priold + 1; |
1281 | 5 | } |
1282 | | |
1283 | 11.4k | if (priold == prinew) { |
1284 | 7.24k | ucl_parser_append_elt (parser, container, tobj, nobj); |
1285 | 7.24k | } |
1286 | 4.17k | else if (priold > prinew) { |
1287 | | /* |
1288 | | * We add this new object to a list of trash objects just to ensure |
1289 | | * that it won't come to any real object |
1290 | | * XXX: rather inefficient approach |
1291 | | */ |
1292 | 3.23k | DL_APPEND (parser->trash_objs, nobj); |
1293 | 3.23k | } |
1294 | 942 | else { |
1295 | 942 | ucl_hash_replace (container, tobj, nobj); |
1296 | 942 | ucl_object_unref (tobj); |
1297 | 942 | } |
1298 | | |
1299 | 11.4k | break; |
1300 | | |
1301 | 0 | case UCL_DUPLICATE_REWRITE: |
1302 | | /* We just rewrite old values regardless of priority */ |
1303 | 0 | ucl_hash_replace (container, tobj, nobj); |
1304 | 0 | ucl_object_unref (tobj); |
1305 | |
|
1306 | 0 | break; |
1307 | | |
1308 | 0 | case UCL_DUPLICATE_ERROR: |
1309 | 0 | snprintf(errmsg, sizeof(errmsg), |
1310 | 0 | "duplicate element for key '%s' found", |
1311 | 0 | nobj->key); |
1312 | 0 | ucl_set_err (parser, UCL_EMERGE, errmsg, &parser->err); |
1313 | 0 | return false; |
1314 | | |
1315 | 0 | case UCL_DUPLICATE_MERGE: |
1316 | | /* |
1317 | | * Here we do have some old object so we just push it on top of objects stack |
1318 | | * Check priority and then perform the merge on the remaining objects |
1319 | | */ |
1320 | 0 | if (tobj->type == UCL_OBJECT || tobj->type == UCL_ARRAY) { |
1321 | 0 | ucl_object_unref (nobj); |
1322 | 0 | nobj = tobj; |
1323 | 0 | } |
1324 | 0 | else if (priold == prinew) { |
1325 | 0 | ucl_parser_append_elt (parser, container, tobj, nobj); |
1326 | 0 | } |
1327 | 0 | else if (priold > prinew) { |
1328 | | /* |
1329 | | * We add this new object to a list of trash objects just to ensure |
1330 | | * that it won't come to any real object |
1331 | | * XXX: rather inefficient approach |
1332 | | */ |
1333 | 0 | DL_APPEND (parser->trash_objs, nobj); |
1334 | 0 | } |
1335 | 0 | else { |
1336 | 0 | ucl_hash_replace (container, tobj, nobj); |
1337 | 0 | ucl_object_unref (tobj); |
1338 | 0 | } |
1339 | 0 | break; |
1340 | 11.4k | } |
1341 | 11.4k | } |
1342 | | |
1343 | 158k | parser->stack->obj->value.ov = container; |
1344 | 158k | parser->cur_obj = nobj; |
1345 | 158k | ucl_attach_comment (parser, nobj, false); |
1346 | | |
1347 | 158k | return true; |
1348 | 158k | } |
1349 | | |
1350 | | /** |
1351 | | * Parse a key in an object |
1352 | | * @param parser |
1353 | | * @param chunk |
1354 | | * @param next_key |
1355 | | * @param end_of_object |
1356 | | * @return true if a key has been parsed |
1357 | | */ |
1358 | | static bool |
1359 | | ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, |
1360 | | bool *next_key, bool *end_of_object) |
1361 | 183k | { |
1362 | 183k | const unsigned char *p, *c = NULL, *end, *t; |
1363 | 183k | const char *key = NULL; |
1364 | 183k | bool got_quote = false, got_eq = false, got_semicolon = false, |
1365 | 183k | need_unescape = false, ucl_escape = false, var_expand = false, |
1366 | 183k | got_content = false, got_sep = false; |
1367 | 183k | ucl_object_t *nobj; |
1368 | 183k | ssize_t keylen; |
1369 | | |
1370 | 183k | p = chunk->pos; |
1371 | | |
1372 | 183k | if (*p == '.') { |
1373 | | /* It is macro actually */ |
1374 | 18.5k | if (!(parser->flags & UCL_PARSER_DISABLE_MACRO)) { |
1375 | 18.5k | ucl_chunk_skipc (chunk, p); |
1376 | 18.5k | } |
1377 | | |
1378 | 0 | parser->prev_state = parser->state; |
1379 | 18.5k | parser->state = UCL_STATE_MACRO_NAME; |
1380 | 18.5k | *end_of_object = false; |
1381 | 18.5k | return true; |
1382 | 18.5k | } |
1383 | 1.20M | while (p < chunk->end) { |
1384 | | /* |
1385 | | * A key must start with alpha, number, '/' or '_' and end with space character |
1386 | | */ |
1387 | 1.20M | if (c == NULL) { |
1388 | 166k | if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { |
1389 | 2.32k | if (!ucl_skip_comments (parser)) { |
1390 | 0 | return false; |
1391 | 0 | } |
1392 | 2.32k | p = chunk->pos; |
1393 | 2.32k | } |
1394 | 164k | else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { |
1395 | 73 | ucl_chunk_skipc (chunk, p); |
1396 | 73 | } |
1397 | 164k | else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) { |
1398 | | /* The first symbol */ |
1399 | 160k | c = p; |
1400 | 160k | ucl_chunk_skipc (chunk, p); |
1401 | 0 | got_content = true; |
1402 | 160k | } |
1403 | 3.93k | else if (*p == '"') { |
1404 | | /* JSON style key */ |
1405 | 1.04k | c = p + 1; |
1406 | 1.04k | got_quote = true; |
1407 | 1.04k | got_content = true; |
1408 | 1.04k | ucl_chunk_skipc (chunk, p); |
1409 | 1.04k | } |
1410 | 2.89k | else if (*p == '}') { |
1411 | | /* We have actually end of an object */ |
1412 | 0 | *end_of_object = true; |
1413 | 0 | return true; |
1414 | 0 | } |
1415 | 2.89k | else if (*p == '.') { |
1416 | 980 | ucl_chunk_skipc (chunk, p); |
1417 | 0 | parser->prev_state = parser->state; |
1418 | 980 | parser->state = UCL_STATE_MACRO_NAME; |
1419 | 980 | return true; |
1420 | 980 | } |
1421 | 1.91k | else { |
1422 | | /* Invalid identifier */ |
1423 | 1.91k | ucl_set_err (parser, UCL_ESYNTAX, "key must begin with a letter", |
1424 | 1.91k | &parser->err); |
1425 | 1.91k | return false; |
1426 | 1.91k | } |
1427 | 166k | } |
1428 | 1.03M | else { |
1429 | | /* Parse the body of a key */ |
1430 | 1.03M | if (!got_quote) { |
1431 | 1.03M | if (ucl_test_character (*p, UCL_CHARACTER_KEY)) { |
1432 | 873k | got_content = true; |
1433 | 873k | ucl_chunk_skipc (chunk, p); |
1434 | 873k | } |
1435 | 160k | else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) { |
1436 | 158k | end = p; |
1437 | 158k | break; |
1438 | 158k | } |
1439 | 1.37k | else { |
1440 | 1.37k | ucl_set_err (parser, UCL_ESYNTAX, "invalid character in a key", |
1441 | 1.37k | &parser->err); |
1442 | 1.37k | return false; |
1443 | 1.37k | } |
1444 | 1.03M | } |
1445 | 854 | else { |
1446 | | /* We need to parse json like quoted string */ |
1447 | 854 | if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { |
1448 | 709 | return false; |
1449 | 709 | } |
1450 | | /* Always escape keys obtained via json */ |
1451 | 145 | end = chunk->pos - 1; |
1452 | 145 | p = chunk->pos; |
1453 | 145 | break; |
1454 | 854 | } |
1455 | 1.03M | } |
1456 | 1.20M | } |
1457 | | |
1458 | 159k | if (p >= chunk->end && got_content) { |
1459 | 287 | ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err); |
1460 | 287 | return false; |
1461 | 287 | } |
1462 | 159k | else if (!got_content) { |
1463 | 325 | return true; |
1464 | 325 | } |
1465 | 158k | *end_of_object = false; |
1466 | | /* We are now at the end of the key, need to parse the rest */ |
1467 | 1.59M | while (p < chunk->end) { |
1468 | 1.59M | if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { |
1469 | 1.39M | ucl_chunk_skipc (chunk, p); |
1470 | 1.39M | } |
1471 | 198k | else if (*p == '=') { |
1472 | 17.9k | if (!got_eq && !got_semicolon) { |
1473 | 17.9k | ucl_chunk_skipc (chunk, p); |
1474 | 0 | got_eq = true; |
1475 | 17.9k | } |
1476 | 0 | else { |
1477 | 0 | ucl_set_err (parser, UCL_ESYNTAX, "unexpected '=' character", |
1478 | 0 | &parser->err); |
1479 | 0 | return false; |
1480 | 0 | } |
1481 | 17.9k | } |
1482 | 180k | else if (*p == ':') { |
1483 | 15.1k | if (!got_eq && !got_semicolon) { |
1484 | 15.1k | ucl_chunk_skipc (chunk, p); |
1485 | 0 | got_semicolon = true; |
1486 | 15.1k | } |
1487 | 0 | else { |
1488 | 0 | ucl_set_err (parser, UCL_ESYNTAX, "unexpected ':' character", |
1489 | 0 | &parser->err); |
1490 | 0 | return false; |
1491 | 0 | } |
1492 | 15.1k | } |
1493 | 165k | else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { |
1494 | | /* Check for comment */ |
1495 | 6.99k | if (!ucl_skip_comments (parser)) { |
1496 | 0 | return false; |
1497 | 0 | } |
1498 | 6.99k | p = chunk->pos; |
1499 | 6.99k | } |
1500 | 158k | else { |
1501 | | /* Start value */ |
1502 | 158k | break; |
1503 | 158k | } |
1504 | 1.59M | } |
1505 | | |
1506 | 158k | if (p >= chunk->end && got_content) { |
1507 | 357 | ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err); |
1508 | 357 | return false; |
1509 | 357 | } |
1510 | | |
1511 | 158k | got_sep = got_semicolon || got_eq; |
1512 | | |
1513 | 158k | if (!got_sep) { |
1514 | | /* |
1515 | | * Maybe we have more keys nested, so search for termination character. |
1516 | | * Possible choices: |
1517 | | * 1) key1 key2 ... keyN [:=] value <- we treat that as error |
1518 | | * 2) key1 ... keyN {} or [] <- we treat that as nested objects |
1519 | | * 3) key1 value[;,\n] <- we treat that as linear object |
1520 | | */ |
1521 | 125k | t = p; |
1522 | 125k | *next_key = false; |
1523 | 125k | while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) { |
1524 | 0 | t ++; |
1525 | 0 | } |
1526 | | /* Check first non-space character after a key */ |
1527 | 125k | if (*t != '{' && *t != '[') { |
1528 | 28.4M | while (t < chunk->end) { |
1529 | 28.4M | if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') { |
1530 | 11.8k | break; |
1531 | 11.8k | } |
1532 | 28.3M | else if (*t == '{' || *t == '[') { |
1533 | 109k | *next_key = true; |
1534 | 109k | break; |
1535 | 109k | } |
1536 | 28.2M | t ++; |
1537 | 28.2M | } |
1538 | 124k | } |
1539 | 125k | } |
1540 | | |
1541 | | /* Create a new object */ |
1542 | 158k | nobj = ucl_object_new_full (UCL_NULL, parser->chunks->priority); |
1543 | 158k | if (nobj == NULL) { |
1544 | 0 | return false; |
1545 | 0 | } |
1546 | 158k | keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY], |
1547 | 158k | &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, |
1548 | 158k | false, false); |
1549 | 158k | if (keylen == -1) { |
1550 | 0 | ucl_object_unref (nobj); |
1551 | 0 | return false; |
1552 | 0 | } |
1553 | 158k | else if (keylen == 0) { |
1554 | 0 | ucl_set_err (parser, UCL_ESYNTAX, "empty keys are not allowed", &parser->err); |
1555 | 0 | ucl_object_unref (nobj); |
1556 | 0 | return false; |
1557 | 0 | } |
1558 | | |
1559 | 158k | nobj->key = key; |
1560 | 158k | nobj->keylen = keylen; |
1561 | | |
1562 | 158k | if (!ucl_parser_process_object_element (parser, nobj)) { |
1563 | 0 | return false; |
1564 | 0 | } |
1565 | | |
1566 | 158k | if (ucl_escape) { |
1567 | 15 | nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE; |
1568 | 15 | } |
1569 | | |
1570 | | |
1571 | 158k | return true; |
1572 | 158k | } |
1573 | | |
1574 | | /** |
1575 | | * Parse a cl string |
1576 | | * @param parser |
1577 | | * @param chunk |
1578 | | * @param var_expand |
1579 | | * @param need_unescape |
1580 | | * @return true if a key has been parsed |
1581 | | */ |
1582 | | static bool |
1583 | | ucl_parse_string_value (struct ucl_parser *parser, |
1584 | | struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape) |
1585 | 675k | { |
1586 | 675k | const unsigned char *p; |
1587 | 675k | enum { |
1588 | 675k | UCL_BRACE_ROUND = 0, |
1589 | 675k | UCL_BRACE_SQUARE, |
1590 | 675k | UCL_BRACE_FIGURE |
1591 | 675k | }; |
1592 | 675k | int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}}; |
1593 | | |
1594 | 675k | p = chunk->pos; |
1595 | | |
1596 | 22.7M | while (p < chunk->end) { |
1597 | | |
1598 | | /* Skip pairs of figure braces */ |
1599 | 22.7M | if (*p == '{') { |
1600 | 2.01M | braces[UCL_BRACE_FIGURE][0] ++; |
1601 | 2.01M | } |
1602 | 20.7M | else if (*p == '}') { |
1603 | 338k | braces[UCL_BRACE_FIGURE][1] ++; |
1604 | 338k | if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) { |
1605 | | /* This is not a termination symbol, continue */ |
1606 | 338k | ucl_chunk_skipc (chunk, p); |
1607 | 0 | continue; |
1608 | 338k | } |
1609 | 338k | } |
1610 | | /* Skip pairs of square braces */ |
1611 | 20.3M | else if (*p == '[') { |
1612 | 3.12M | braces[UCL_BRACE_SQUARE][0] ++; |
1613 | 3.12M | } |
1614 | 17.2M | else if (*p == ']') { |
1615 | 1.00M | braces[UCL_BRACE_SQUARE][1] ++; |
1616 | 1.00M | if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) { |
1617 | | /* This is not a termination symbol, continue */ |
1618 | 1.00M | ucl_chunk_skipc (chunk, p); |
1619 | 0 | continue; |
1620 | 1.00M | } |
1621 | 1.00M | } |
1622 | 16.2M | else if (*p == '$') { |
1623 | 3.29M | *var_expand = true; |
1624 | 3.29M | } |
1625 | 12.9M | else if (*p == '\\') { |
1626 | 357k | *need_unescape = true; |
1627 | 357k | ucl_chunk_skipc (chunk, p); |
1628 | 357k | if (p < chunk->end) { |
1629 | 357k | ucl_chunk_skipc (chunk, p); |
1630 | 357k | } |
1631 | 0 | continue; |
1632 | 357k | } |
1633 | | |
1634 | 21.0M | if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { |
1635 | 674k | break; |
1636 | 674k | } |
1637 | 20.3M | ucl_chunk_skipc (chunk, p); |
1638 | 20.3M | } |
1639 | | |
1640 | 675k | return true; |
1641 | 675k | } |
1642 | | |
1643 | | /** |
1644 | | * Parse multiline string ending with \n{term}\n |
1645 | | * @param parser |
1646 | | * @param chunk |
1647 | | * @param term |
1648 | | * @param term_len |
1649 | | * @param beg |
1650 | | * @param var_expand |
1651 | | * @return size of multiline string or 0 in case of error |
1652 | | */ |
1653 | | static int |
1654 | | ucl_parse_multiline_string (struct ucl_parser *parser, |
1655 | | struct ucl_chunk *chunk, const unsigned char *term, |
1656 | | int term_len, unsigned char const **beg, |
1657 | | bool *var_expand) |
1658 | 198 | { |
1659 | 198 | const unsigned char *p, *c, *tend; |
1660 | 198 | bool newline = false; |
1661 | 198 | int len = 0; |
1662 | | |
1663 | 198 | p = chunk->pos; |
1664 | | |
1665 | 198 | c = p; |
1666 | | |
1667 | 3.12M | while (p < chunk->end) { |
1668 | 3.12M | if (newline) { |
1669 | 2.15k | if (chunk->end - p < term_len) { |
1670 | 50 | return 0; |
1671 | 50 | } |
1672 | 2.10k | else if (memcmp (p, term, term_len) == 0) { |
1673 | 955 | tend = p + term_len; |
1674 | 955 | if (*tend != '\n' && *tend != ';' && *tend != ',') { |
1675 | | /* Incomplete terminator */ |
1676 | 862 | ucl_chunk_skipc (chunk, p); |
1677 | 0 | continue; |
1678 | 862 | } |
1679 | 93 | len = p - c; |
1680 | 93 | chunk->remain -= term_len; |
1681 | 93 | chunk->pos = p + term_len; |
1682 | 93 | chunk->column = term_len; |
1683 | 93 | *beg = c; |
1684 | 93 | break; |
1685 | 955 | } |
1686 | 2.15k | } |
1687 | 3.12M | if (*p == '\n') { |
1688 | 1.29k | newline = true; |
1689 | 1.29k | } |
1690 | 3.12M | else { |
1691 | 3.12M | if (*p == '$') { |
1692 | 411k | *var_expand = true; |
1693 | 411k | } |
1694 | 3.12M | newline = false; |
1695 | 3.12M | } |
1696 | 3.12M | ucl_chunk_skipc (chunk, p); |
1697 | 3.12M | } |
1698 | | |
1699 | 148 | return len; |
1700 | 198 | } |
1701 | | |
1702 | | static inline ucl_object_t* |
1703 | | ucl_parser_get_container (struct ucl_parser *parser) |
1704 | 1.88M | { |
1705 | 1.88M | ucl_object_t *t, *obj = NULL; |
1706 | | |
1707 | 1.88M | if (parser == NULL || parser->stack == NULL || parser->stack->obj == NULL) { |
1708 | 0 | return NULL; |
1709 | 0 | } |
1710 | | |
1711 | 1.88M | if (parser->stack->obj->type == UCL_ARRAY) { |
1712 | | /* Object must be allocated */ |
1713 | 1.83M | obj = ucl_object_new_full (UCL_NULL, parser->chunks->priority); |
1714 | 1.83M | t = parser->stack->obj; |
1715 | | |
1716 | 1.83M | if (!ucl_array_append (t, obj)) { |
1717 | 0 | ucl_object_unref (obj); |
1718 | 0 | return NULL; |
1719 | 0 | } |
1720 | | |
1721 | 1.83M | parser->cur_obj = obj; |
1722 | 1.83M | ucl_attach_comment (parser, obj, false); |
1723 | 1.83M | } |
1724 | 49.1k | else { |
1725 | | /* Object has been already allocated */ |
1726 | 49.1k | obj = parser->cur_obj; |
1727 | 49.1k | } |
1728 | | |
1729 | 1.88M | return obj; |
1730 | 1.88M | } |
1731 | | |
1732 | | /** |
1733 | | * Handle value data |
1734 | | * @param parser |
1735 | | * @param chunk |
1736 | | * @return |
1737 | | */ |
1738 | | static bool |
1739 | | ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) |
1740 | 1.88M | { |
1741 | 1.88M | const unsigned char *p, *c; |
1742 | 1.88M | ucl_object_t *obj = NULL; |
1743 | 1.88M | unsigned int stripped_spaces; |
1744 | 1.88M | ssize_t str_len; |
1745 | 1.88M | bool need_unescape = false, ucl_escape = false, var_expand = false; |
1746 | | |
1747 | 1.88M | p = chunk->pos; |
1748 | | |
1749 | | /* Skip any spaces and comments */ |
1750 | 1.88M | if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) || |
1751 | 1.88M | (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { |
1752 | 7.67k | while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { |
1753 | 5.21k | ucl_chunk_skipc (chunk, p); |
1754 | 5.21k | } |
1755 | 2.45k | if (!ucl_skip_comments (parser)) { |
1756 | 0 | return false; |
1757 | 0 | } |
1758 | 2.45k | p = chunk->pos; |
1759 | 2.45k | } |
1760 | | |
1761 | 1.88M | while (p < chunk->end) { |
1762 | 1.88M | c = p; |
1763 | 1.88M | switch (*p) { |
1764 | 1.90k | case '"': |
1765 | 1.90k | ucl_chunk_skipc (chunk, p); |
1766 | | |
1767 | 1.90k | if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, |
1768 | 1.90k | &var_expand)) { |
1769 | 4 | return false; |
1770 | 4 | } |
1771 | | |
1772 | 1.89k | obj = ucl_parser_get_container (parser); |
1773 | 1.89k | if (!obj) { |
1774 | 0 | return false; |
1775 | 0 | } |
1776 | | |
1777 | 1.89k | str_len = chunk->pos - c - 2; |
1778 | 1.89k | obj->type = UCL_STRING; |
1779 | 1.89k | if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, |
1780 | 1.89k | &obj->trash_stack[UCL_TRASH_VALUE], |
1781 | 1.89k | &obj->value.sv, str_len, need_unescape, false, |
1782 | 1.89k | var_expand, false)) == -1) { |
1783 | 0 | return false; |
1784 | 0 | } |
1785 | | |
1786 | 1.89k | obj->len = str_len; |
1787 | 1.89k | parser->state = UCL_STATE_AFTER_VALUE; |
1788 | | |
1789 | 1.89k | return true; |
1790 | 0 | break; |
1791 | 36 | case '\'': |
1792 | 36 | ucl_chunk_skipc (chunk, p); |
1793 | | |
1794 | 36 | if (!ucl_lex_squoted_string (parser, chunk, &need_unescape)) { |
1795 | 16 | return false; |
1796 | 16 | } |
1797 | | |
1798 | 20 | obj = ucl_parser_get_container (parser); |
1799 | 20 | if (!obj) { |
1800 | 0 | return false; |
1801 | 0 | } |
1802 | | |
1803 | 20 | str_len = chunk->pos - c - 2; |
1804 | 20 | obj->type = UCL_STRING; |
1805 | 20 | obj->flags |= UCL_OBJECT_SQUOTED; |
1806 | | |
1807 | 20 | if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, |
1808 | 20 | &obj->trash_stack[UCL_TRASH_VALUE], |
1809 | 20 | &obj->value.sv, str_len, need_unescape, false, |
1810 | 20 | var_expand, true)) == -1) { |
1811 | 0 | return false; |
1812 | 0 | } |
1813 | | |
1814 | 20 | obj->len = str_len; |
1815 | | |
1816 | 20 | parser->state = UCL_STATE_AFTER_VALUE; |
1817 | | |
1818 | 20 | return true; |
1819 | 0 | break; |
1820 | 566 | case '{': |
1821 | 566 | obj = ucl_parser_get_container (parser); |
1822 | 566 | if (obj == NULL) { |
1823 | 0 | return false; |
1824 | 0 | } |
1825 | | /* We have a new object */ |
1826 | 566 | if (parser->stack) { |
1827 | 566 | obj = ucl_parser_add_container (obj, parser, false, |
1828 | 566 | parser->stack->e.params.level, true); |
1829 | 566 | } |
1830 | 0 | else { |
1831 | 0 | return false; |
1832 | 0 | } |
1833 | 566 | if (obj == NULL) { |
1834 | 0 | return false; |
1835 | 0 | } |
1836 | | |
1837 | 566 | ucl_chunk_skipc (chunk, p); |
1838 | | |
1839 | 0 | return true; |
1840 | 0 | break; |
1841 | 433 | case '[': |
1842 | 433 | obj = ucl_parser_get_container (parser); |
1843 | 433 | if (obj == NULL) { |
1844 | 0 | return false; |
1845 | 0 | } |
1846 | | /* We have a new array */ |
1847 | 433 | if (parser->stack) { |
1848 | 433 | obj = ucl_parser_add_container (obj, parser, true, |
1849 | 433 | parser->stack->e.params.level, true); |
1850 | 433 | } |
1851 | 0 | else { |
1852 | 0 | return false; |
1853 | 0 | } |
1854 | | |
1855 | 433 | if (obj == NULL) { |
1856 | 0 | return false; |
1857 | 0 | } |
1858 | | |
1859 | 433 | ucl_chunk_skipc (chunk, p); |
1860 | | |
1861 | 0 | return true; |
1862 | 0 | break; |
1863 | 144 | case ']': |
1864 | | /* We have the array ending */ |
1865 | 144 | if (parser->stack && parser->stack->obj->type == UCL_ARRAY) { |
1866 | 144 | parser->state = UCL_STATE_AFTER_VALUE; |
1867 | 144 | return true; |
1868 | 144 | } |
1869 | 0 | else { |
1870 | 0 | goto parse_string; |
1871 | 0 | } |
1872 | 0 | break; |
1873 | 442 | case '<': |
1874 | 442 | obj = ucl_parser_get_container (parser); |
1875 | | /* We have something like multiline value, which must be <<[A-Z]+\n */ |
1876 | 442 | if (chunk->end - p > 3) { |
1877 | 442 | if (memcmp (p, "<<", 2) == 0) { |
1878 | 247 | p += 2; |
1879 | | /* We allow only uppercase characters in multiline definitions */ |
1880 | 484 | while (p < chunk->end && *p >= 'A' && *p <= 'Z') { |
1881 | 237 | p ++; |
1882 | 237 | } |
1883 | 247 | if(p == chunk->end) { |
1884 | 0 | ucl_set_err (parser, UCL_ESYNTAX, |
1885 | 0 | "unterminated multiline value", &parser->err); |
1886 | 0 | return false; |
1887 | 0 | } |
1888 | 247 | if (*p =='\n') { |
1889 | | /* Set chunk positions and start multiline parsing */ |
1890 | 198 | chunk->remain -= p - c + 1; |
1891 | 198 | c += 2; |
1892 | 198 | chunk->pos = p + 1; |
1893 | 198 | chunk->column = 0; |
1894 | 198 | chunk->line ++; |
1895 | 198 | if ((str_len = ucl_parse_multiline_string (parser, chunk, c, |
1896 | 198 | p - c, &c, &var_expand)) == 0) { |
1897 | 105 | ucl_set_err (parser, UCL_ESYNTAX, |
1898 | 105 | "unterminated multiline value", &parser->err); |
1899 | 105 | return false; |
1900 | 105 | } |
1901 | | |
1902 | 93 | obj->type = UCL_STRING; |
1903 | 93 | obj->flags |= UCL_OBJECT_MULTILINE; |
1904 | 93 | if ((str_len = ucl_copy_or_store_ptr (parser, c, |
1905 | 93 | &obj->trash_stack[UCL_TRASH_VALUE], |
1906 | 93 | &obj->value.sv, str_len - 1, false, |
1907 | 93 | false, var_expand, false)) == -1) { |
1908 | 0 | return false; |
1909 | 0 | } |
1910 | 93 | obj->len = str_len; |
1911 | | |
1912 | 93 | parser->state = UCL_STATE_AFTER_VALUE; |
1913 | | |
1914 | 93 | return true; |
1915 | 93 | } |
1916 | 247 | } |
1917 | 442 | } |
1918 | | /* Fallback to ordinary strings */ |
1919 | | /* FALLTHRU */ |
1920 | 1.87M | default: |
1921 | 1.87M | parse_string: |
1922 | 1.87M | if (obj == NULL) { |
1923 | 1.87M | obj = ucl_parser_get_container (parser); |
1924 | 1.87M | } |
1925 | | |
1926 | | /* Parse atom */ |
1927 | 1.87M | if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) { |
1928 | 1.20M | if (!ucl_lex_number (parser, chunk, obj)) { |
1929 | 5.67k | if (parser->state == UCL_STATE_ERROR) { |
1930 | 113 | return false; |
1931 | 113 | } |
1932 | 5.67k | } |
1933 | 1.20M | else { |
1934 | 1.20M | parser->state = UCL_STATE_AFTER_VALUE; |
1935 | 1.20M | return true; |
1936 | 1.20M | } |
1937 | | /* Fallback to normal string */ |
1938 | 1.20M | } |
1939 | | |
1940 | 675k | if (!ucl_parse_string_value (parser, chunk, &var_expand, |
1941 | 675k | &need_unescape)) { |
1942 | 0 | return false; |
1943 | 0 | } |
1944 | | /* Cut trailing spaces */ |
1945 | 675k | stripped_spaces = 0; |
1946 | 1.92M | while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces), |
1947 | 1.92M | UCL_CHARACTER_WHITESPACE)) { |
1948 | 1.24M | stripped_spaces ++; |
1949 | 1.24M | } |
1950 | 675k | str_len = chunk->pos - c - stripped_spaces; |
1951 | 675k | if (str_len <= 0) { |
1952 | 6 | ucl_set_err (parser, UCL_ESYNTAX, "string value must not be empty", |
1953 | 6 | &parser->err); |
1954 | 6 | return false; |
1955 | 6 | } |
1956 | 675k | else if (str_len == 4 && memcmp (c, "null", 4) == 0) { |
1957 | 0 | obj->len = 0; |
1958 | 0 | obj->type = UCL_NULL; |
1959 | 0 | } |
1960 | 675k | else if (str_len == 3 && memcmp (c, "nan", 3) == 0) { |
1961 | 0 | obj->len = 0; |
1962 | 0 | obj->type = UCL_FLOAT; |
1963 | 0 | obj->value.dv = NAN; |
1964 | 0 | } |
1965 | 675k | else if (str_len == 3 && memcmp (c, "inf", 3) == 0) { |
1966 | 640 | obj->len = 0; |
1967 | 640 | obj->type = UCL_FLOAT; |
1968 | 640 | obj->value.dv = INFINITY; |
1969 | 640 | } |
1970 | 674k | else if (!ucl_maybe_parse_boolean (obj, c, str_len)) { |
1971 | 665k | obj->type = UCL_STRING; |
1972 | 665k | if ((str_len = ucl_copy_or_store_ptr (parser, c, |
1973 | 665k | &obj->trash_stack[UCL_TRASH_VALUE], |
1974 | 665k | &obj->value.sv, str_len, need_unescape, |
1975 | 665k | false, var_expand, false)) == -1) { |
1976 | 0 | return false; |
1977 | 0 | } |
1978 | 665k | obj->len = str_len; |
1979 | 665k | } |
1980 | | |
1981 | 675k | parser->state = UCL_STATE_AFTER_VALUE; |
1982 | | |
1983 | 675k | return true; |
1984 | 0 | break; |
1985 | 1.88M | } |
1986 | 1.88M | } |
1987 | | |
1988 | 0 | return true; |
1989 | 1.88M | } |
1990 | | |
1991 | | /** |
1992 | | * Handle after value data |
1993 | | * @param parser |
1994 | | * @param chunk |
1995 | | * @return |
1996 | | */ |
1997 | | static bool |
1998 | | ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk) |
1999 | 1.87M | { |
2000 | 1.87M | const unsigned char *p; |
2001 | 1.87M | bool got_sep = false; |
2002 | 1.87M | struct ucl_stack *st; |
2003 | | |
2004 | 1.87M | p = chunk->pos; |
2005 | | |
2006 | 3.88M | while (p < chunk->end) { |
2007 | 3.88M | if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { |
2008 | | /* Skip whitespaces */ |
2009 | 2.48k | ucl_chunk_skipc (chunk, p); |
2010 | 2.48k | } |
2011 | 3.87M | else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { |
2012 | | /* Skip comment */ |
2013 | 15.8k | if (!ucl_skip_comments (parser)) { |
2014 | 340 | return false; |
2015 | 340 | } |
2016 | | /* Treat comment as a separator */ |
2017 | 15.4k | got_sep = true; |
2018 | 15.4k | p = chunk->pos; |
2019 | 15.4k | } |
2020 | 3.86M | else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) { |
2021 | 1.98M | if (*p == '}' || *p == ']') { |
2022 | 244 | if (parser->stack == NULL) { |
2023 | 0 | ucl_set_err (parser, UCL_ESYNTAX, |
2024 | 0 | "end of array or object detected without corresponding start", |
2025 | 0 | &parser->err); |
2026 | 0 | return false; |
2027 | 0 | } |
2028 | 244 | if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) || |
2029 | 244 | (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) { |
2030 | | |
2031 | | /* Pop all nested objects from a stack */ |
2032 | 226 | st = parser->stack; |
2033 | | |
2034 | 226 | if (!(st->e.params.flags & UCL_STACK_HAS_OBRACE)) { |
2035 | 12 | parser->err_code = UCL_EUNPAIRED; |
2036 | 12 | ucl_create_err (&parser->err, |
2037 | 12 | "%s:%d object closed with } is not opened with { at line %d", |
2038 | 12 | chunk->fname ? chunk->fname : "memory", |
2039 | 12 | parser->chunks->line, st->e.params.line); |
2040 | | |
2041 | 12 | return false; |
2042 | 12 | } |
2043 | | |
2044 | 214 | parser->stack = st->next; |
2045 | 214 | UCL_FREE (sizeof (struct ucl_stack), st); |
2046 | | |
2047 | 214 | if (parser->cur_obj) { |
2048 | 214 | ucl_attach_comment (parser, parser->cur_obj, true); |
2049 | 214 | } |
2050 | | |
2051 | 219 | while (parser->stack != NULL) { |
2052 | 213 | st = parser->stack; |
2053 | | |
2054 | 213 | if (st->next == NULL) { |
2055 | 62 | break; |
2056 | 62 | } |
2057 | 151 | else if (st->next->e.params.level == st->e.params.level) { |
2058 | 146 | break; |
2059 | 146 | } |
2060 | | |
2061 | | |
2062 | 5 | parser->stack = st->next; |
2063 | 5 | parser->cur_obj = st->obj; |
2064 | 5 | UCL_FREE (sizeof (struct ucl_stack), st); |
2065 | 5 | } |
2066 | 214 | } |
2067 | 18 | else { |
2068 | 18 | ucl_set_err (parser, UCL_ESYNTAX, |
2069 | 18 | "unexpected terminating symbol detected", |
2070 | 18 | &parser->err); |
2071 | 18 | return false; |
2072 | 18 | } |
2073 | | |
2074 | 214 | if (parser->stack == NULL) { |
2075 | | /* Ignore everything after a top object */ |
2076 | 6 | return true; |
2077 | 6 | } |
2078 | 208 | else { |
2079 | 208 | ucl_chunk_skipc (chunk, p); |
2080 | 208 | } |
2081 | 208 | got_sep = true; |
2082 | 208 | } |
2083 | 1.98M | else { |
2084 | | /* Got a separator */ |
2085 | 1.98M | got_sep = true; |
2086 | 1.98M | ucl_chunk_skipc (chunk, p); |
2087 | 1.98M | } |
2088 | 1.98M | } |
2089 | 1.87M | else { |
2090 | | /* Anything else */ |
2091 | 1.87M | if (!got_sep) { |
2092 | 7 | ucl_set_err (parser, UCL_ESYNTAX, "delimiter is missing", |
2093 | 7 | &parser->err); |
2094 | 7 | return false; |
2095 | 7 | } |
2096 | 1.87M | return true; |
2097 | 1.87M | } |
2098 | 3.88M | } |
2099 | | |
2100 | 1.05k | return true; |
2101 | 1.87M | } |
2102 | | |
2103 | | static bool |
2104 | | ucl_skip_macro_as_comment (struct ucl_parser *parser, |
2105 | | struct ucl_chunk *chunk) |
2106 | 0 | { |
2107 | 0 | const unsigned char *p, *c; |
2108 | 0 | enum { |
2109 | 0 | macro_skip_start = 0, |
2110 | 0 | macro_has_symbols, |
2111 | 0 | macro_has_obrace, |
2112 | 0 | macro_has_quote, |
2113 | 0 | macro_has_backslash, |
2114 | 0 | macro_has_sqbrace, |
2115 | 0 | macro_save |
2116 | 0 | } state = macro_skip_start, prev_state = macro_skip_start; |
2117 | |
|
2118 | 0 | p = chunk->pos; |
2119 | 0 | c = chunk->pos; |
2120 | |
|
2121 | 0 | while (p < chunk->end) { |
2122 | 0 | switch (state) { |
2123 | 0 | case macro_skip_start: |
2124 | 0 | if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { |
2125 | 0 | state = macro_has_symbols; |
2126 | 0 | } |
2127 | 0 | else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { |
2128 | 0 | state = macro_save; |
2129 | 0 | continue; |
2130 | 0 | } |
2131 | | |
2132 | 0 | ucl_chunk_skipc (chunk, p); |
2133 | 0 | break; |
2134 | | |
2135 | 0 | case macro_has_symbols: |
2136 | 0 | if (*p == '{') { |
2137 | 0 | state = macro_has_sqbrace; |
2138 | 0 | } |
2139 | 0 | else if (*p == '(') { |
2140 | 0 | state = macro_has_obrace; |
2141 | 0 | } |
2142 | 0 | else if (*p == '"') { |
2143 | 0 | state = macro_has_quote; |
2144 | 0 | } |
2145 | 0 | else if (*p == '\n') { |
2146 | 0 | state = macro_save; |
2147 | 0 | continue; |
2148 | 0 | } |
2149 | | |
2150 | 0 | ucl_chunk_skipc (chunk, p); |
2151 | 0 | break; |
2152 | | |
2153 | 0 | case macro_has_obrace: |
2154 | 0 | if (*p == '\\') { |
2155 | 0 | prev_state = state; |
2156 | 0 | state = macro_has_backslash; |
2157 | 0 | } |
2158 | 0 | else if (*p == ')') { |
2159 | 0 | state = macro_has_symbols; |
2160 | 0 | } |
2161 | |
|
2162 | 0 | ucl_chunk_skipc (chunk, p); |
2163 | 0 | break; |
2164 | | |
2165 | 0 | case macro_has_sqbrace: |
2166 | 0 | if (*p == '\\') { |
2167 | 0 | prev_state = state; |
2168 | 0 | state = macro_has_backslash; |
2169 | 0 | } |
2170 | 0 | else if (*p == '}') { |
2171 | 0 | state = macro_save; |
2172 | 0 | } |
2173 | |
|
2174 | 0 | ucl_chunk_skipc (chunk, p); |
2175 | 0 | break; |
2176 | | |
2177 | 0 | case macro_has_quote: |
2178 | 0 | if (*p == '\\') { |
2179 | 0 | prev_state = state; |
2180 | 0 | state = macro_has_backslash; |
2181 | 0 | } |
2182 | 0 | else if (*p == '"') { |
2183 | 0 | state = macro_save; |
2184 | 0 | } |
2185 | |
|
2186 | 0 | ucl_chunk_skipc (chunk, p); |
2187 | 0 | break; |
2188 | | |
2189 | 0 | case macro_has_backslash: |
2190 | 0 | state = prev_state; |
2191 | 0 | ucl_chunk_skipc (chunk, p); |
2192 | 0 | break; |
2193 | | |
2194 | 0 | case macro_save: |
2195 | 0 | if (parser->flags & UCL_PARSER_SAVE_COMMENTS) { |
2196 | 0 | ucl_save_comment (parser, c, p - c); |
2197 | 0 | } |
2198 | |
|
2199 | 0 | return true; |
2200 | 0 | } |
2201 | 0 | } |
2202 | | |
2203 | 0 | return false; |
2204 | 0 | } |
2205 | | |
2206 | | /** |
2207 | | * Handle macro data |
2208 | | * @param parser |
2209 | | * @param chunk |
2210 | | * @param marco |
2211 | | * @param macro_start |
2212 | | * @param macro_len |
2213 | | * @return |
2214 | | */ |
2215 | | static bool |
2216 | | ucl_parse_macro_value (struct ucl_parser *parser, |
2217 | | struct ucl_chunk *chunk, struct ucl_macro *macro, |
2218 | | unsigned char const **macro_start, size_t *macro_len) |
2219 | 15.7k | { |
2220 | 15.7k | const unsigned char *p, *c; |
2221 | 15.7k | bool need_unescape = false, ucl_escape = false, var_expand = false; |
2222 | | |
2223 | 15.7k | p = chunk->pos; |
2224 | | |
2225 | 15.7k | switch (*p) { |
2226 | 2.42k | case '"': |
2227 | | /* We have macro value encoded in quotes */ |
2228 | 2.42k | c = p; |
2229 | 2.42k | ucl_chunk_skipc (chunk, p); |
2230 | 2.42k | if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { |
2231 | 3 | return false; |
2232 | 3 | } |
2233 | | |
2234 | 2.42k | *macro_start = c + 1; |
2235 | 2.42k | *macro_len = chunk->pos - c - 2; |
2236 | 2.42k | p = chunk->pos; |
2237 | 2.42k | break; |
2238 | 748 | case '{': |
2239 | | /* We got a multiline macro body */ |
2240 | 748 | ucl_chunk_skipc (chunk, p); |
2241 | | /* Skip spaces at the beginning */ |
2242 | 902 | while (p < chunk->end) { |
2243 | 428 | if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { |
2244 | 154 | ucl_chunk_skipc (chunk, p); |
2245 | 154 | } |
2246 | 274 | else { |
2247 | 274 | break; |
2248 | 274 | } |
2249 | 428 | } |
2250 | 748 | c = p; |
2251 | 3.24M | while (p < chunk->end) { |
2252 | 3.24M | if (*p == '}') { |
2253 | 9 | break; |
2254 | 9 | } |
2255 | 3.24M | ucl_chunk_skipc (chunk, p); |
2256 | 3.24M | } |
2257 | 748 | *macro_start = c; |
2258 | 748 | *macro_len = p - c; |
2259 | 748 | ucl_chunk_skipc (chunk, p); |
2260 | 0 | break; |
2261 | 12.5k | default: |
2262 | | /* Macro is not enclosed in quotes or braces */ |
2263 | 12.5k | c = p; |
2264 | 5.95M | while (p < chunk->end) { |
2265 | 5.95M | if (ucl_lex_is_atom_end (*p)) { |
2266 | 12.2k | break; |
2267 | 12.2k | } |
2268 | 5.94M | ucl_chunk_skipc (chunk, p); |
2269 | 5.94M | } |
2270 | 12.5k | *macro_start = c; |
2271 | 12.5k | *macro_len = p - c; |
2272 | 12.5k | break; |
2273 | 15.7k | } |
2274 | | |
2275 | | /* We are at the end of a macro */ |
2276 | | /* Skip ';' and space characters and return to previous state */ |
2277 | 24.8k | while (p < chunk->end) { |
2278 | 23.8k | if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') { |
2279 | 14.6k | break; |
2280 | 14.6k | } |
2281 | 9.13k | ucl_chunk_skipc (chunk, p); |
2282 | 9.13k | } |
2283 | 15.7k | return true; |
2284 | 15.7k | } |
2285 | | |
2286 | | /** |
2287 | | * Parse macro arguments as UCL object |
2288 | | * @param parser parser structure |
2289 | | * @param chunk the current data chunk |
2290 | | * @return |
2291 | | */ |
2292 | | static ucl_object_t * |
2293 | | ucl_parse_macro_arguments (struct ucl_parser *parser, |
2294 | | struct ucl_chunk *chunk) |
2295 | 16.2k | { |
2296 | 16.2k | ucl_object_t *res = NULL; |
2297 | 16.2k | struct ucl_parser *params_parser; |
2298 | 16.2k | int obraces = 1, ebraces = 0, state = 0; |
2299 | 16.2k | const unsigned char *p, *c; |
2300 | 16.2k | size_t args_len = 0; |
2301 | 16.2k | struct ucl_parser_saved_state saved; |
2302 | | |
2303 | 16.2k | saved.column = chunk->column; |
2304 | 16.2k | saved.line = chunk->line; |
2305 | 16.2k | saved.pos = chunk->pos; |
2306 | 16.2k | saved.remain = chunk->remain; |
2307 | 16.2k | p = chunk->pos; |
2308 | | |
2309 | 16.2k | if (*p != '(' || chunk->remain < 2) { |
2310 | 0 | return NULL; |
2311 | 0 | } |
2312 | | |
2313 | | /* Set begin and start */ |
2314 | 16.2k | ucl_chunk_skipc (chunk, p); |
2315 | 0 | c = p; |
2316 | | |
2317 | 10.7M | while ((p) < (chunk)->end) { |
2318 | 10.7M | switch (state) { |
2319 | 9.15M | case 0: |
2320 | | /* Parse symbols and check for '(', ')' and '"' */ |
2321 | 9.15M | if (*p == '(') { |
2322 | 75.3k | obraces ++; |
2323 | 75.3k | } |
2324 | 9.07M | else if (*p == ')') { |
2325 | 27.7k | ebraces ++; |
2326 | 27.7k | } |
2327 | 9.04M | else if (*p == '"') { |
2328 | 9.76k | state = 1; |
2329 | 9.76k | } |
2330 | | /* Check pairing */ |
2331 | 9.15M | if (obraces == ebraces) { |
2332 | 16.0k | state = 99; |
2333 | 16.0k | } |
2334 | 9.13M | else { |
2335 | 9.13M | args_len ++; |
2336 | 9.13M | } |
2337 | | /* Check overflow */ |
2338 | 9.15M | if (chunk->remain == 0) { |
2339 | 0 | goto restore_chunk; |
2340 | 0 | } |
2341 | 9.15M | ucl_chunk_skipc (chunk, p); |
2342 | 0 | break; |
2343 | 1.61M | case 1: |
2344 | | /* We have quote character, so skip all but quotes */ |
2345 | 1.61M | if (*p == '"' && *(p - 1) != '\\') { |
2346 | 9.62k | state = 0; |
2347 | 9.62k | } |
2348 | 1.61M | if (chunk->remain == 0) { |
2349 | 0 | goto restore_chunk; |
2350 | 0 | } |
2351 | 1.61M | args_len ++; |
2352 | 1.61M | ucl_chunk_skipc (chunk, p); |
2353 | 0 | break; |
2354 | 16.0k | case 99: |
2355 | | /* |
2356 | | * We have read the full body of arguments, so we need to parse and set |
2357 | | * object from that |
2358 | | */ |
2359 | 16.0k | params_parser = ucl_parser_new (parser->flags); |
2360 | 16.0k | if (!ucl_parser_add_chunk (params_parser, c, args_len)) { |
2361 | 11.5k | ucl_set_err (parser, UCL_ESYNTAX, "macro arguments parsing error", |
2362 | 11.5k | &parser->err); |
2363 | 11.5k | } |
2364 | 4.52k | else { |
2365 | 4.52k | res = ucl_parser_get_object (params_parser); |
2366 | 4.52k | } |
2367 | 16.0k | ucl_parser_free (params_parser); |
2368 | | |
2369 | 16.0k | return res; |
2370 | | |
2371 | 0 | break; |
2372 | 10.7M | } |
2373 | 10.7M | } |
2374 | | |
2375 | 168 | return res; |
2376 | | |
2377 | 0 | restore_chunk: |
2378 | 0 | chunk->column = saved.column; |
2379 | 0 | chunk->line = saved.line; |
2380 | 0 | chunk->pos = saved.pos; |
2381 | 0 | chunk->remain = saved.remain; |
2382 | |
|
2383 | 0 | return NULL; |
2384 | 16.2k | } |
2385 | | |
2386 | 20.8k | #define SKIP_SPACES_COMMENTS(parser, chunk, p) do { \ |
2387 | 21.0k | while ((p) < (chunk)->end) { \ |
2388 | 21.0k | if (!ucl_test_character (*(p), UCL_CHARACTER_WHITESPACE_UNSAFE)) { \ |
2389 | 20.8k | if ((chunk)->remain >= 2 && ucl_lex_is_comment ((p)[0], (p)[1])) { \ |
2390 | 606 | if (!ucl_skip_comments (parser)) { \ |
2391 | 590 | return false; \ |
2392 | 590 | } \ |
2393 | 606 | p = (chunk)->pos; \ |
2394 | 16 | } \ |
2395 | 20.8k | break; \ |
2396 | 20.8k | } \ |
2397 | 21.0k | ucl_chunk_skipc (chunk, p); \ |
2398 | 212 | } \ |
2399 | 20.8k | } while(0) |
2400 | | |
2401 | | /** |
2402 | | * Handle the main states of rcl parser |
2403 | | * @param parser parser structure |
2404 | | * @return true if chunk has been parsed and false in case of error |
2405 | | */ |
2406 | | static bool |
2407 | | ucl_state_machine (struct ucl_parser *parser) |
2408 | 17.1k | { |
2409 | 17.1k | ucl_object_t *obj, *macro_args; |
2410 | 17.1k | struct ucl_chunk *chunk = parser->chunks; |
2411 | 17.1k | const unsigned char *p, *c = NULL, *macro_start = NULL; |
2412 | 17.1k | unsigned char *macro_escaped; |
2413 | 17.1k | size_t macro_len = 0; |
2414 | 17.1k | struct ucl_macro *macro = NULL; |
2415 | 17.1k | bool next_key = false, end_of_object = false, ret; |
2416 | | |
2417 | 17.1k | if (parser->top_obj == NULL) { |
2418 | 16.2k | parser->state = UCL_STATE_INIT; |
2419 | 16.2k | } |
2420 | | |
2421 | 17.1k | p = chunk->pos; |
2422 | 8.25M | while (chunk->pos < chunk->end) { |
2423 | 8.24M | switch (parser->state) { |
2424 | 17.1k | case UCL_STATE_INIT: |
2425 | | /* |
2426 | | * At the init state we can either go to the parse array or object |
2427 | | * if we got [ or { correspondingly or can just treat new data as |
2428 | | * a key of newly created object |
2429 | | */ |
2430 | 17.1k | if (!ucl_skip_comments (parser)) { |
2431 | 0 | parser->prev_state = parser->state; |
2432 | 0 | parser->state = UCL_STATE_ERROR; |
2433 | 0 | return false; |
2434 | 0 | } |
2435 | 17.1k | else { |
2436 | 17.1k | bool seen_obrace = false; |
2437 | | |
2438 | | /* Skip any spaces */ |
2439 | 18.2k | while (p < chunk->end && ucl_test_character (*p, |
2440 | 18.2k | UCL_CHARACTER_WHITESPACE_UNSAFE)) { |
2441 | 1.09k | ucl_chunk_skipc (chunk, p); |
2442 | 1.09k | } |
2443 | | |
2444 | 17.1k | p = chunk->pos; |
2445 | | |
2446 | 17.1k | if (p < chunk->end) { |
2447 | 17.0k | if (*p == '[') { |
2448 | 251 | parser->state = UCL_STATE_VALUE; |
2449 | 251 | ucl_chunk_skipc (chunk, p); |
2450 | 0 | seen_obrace = true; |
2451 | 251 | } |
2452 | 16.7k | else { |
2453 | | |
2454 | 16.7k | if (*p == '{') { |
2455 | 13 | ucl_chunk_skipc (chunk, p); |
2456 | 0 | parser->state = UCL_STATE_KEY_OBRACE; |
2457 | 13 | seen_obrace = true; |
2458 | 13 | } |
2459 | 16.7k | else { |
2460 | 16.7k | parser->state = UCL_STATE_KEY; |
2461 | 16.7k | } |
2462 | 16.7k | } |
2463 | 17.0k | } |
2464 | | |
2465 | 17.1k | if (parser->top_obj == NULL) { |
2466 | 16.2k | if (parser->state == UCL_STATE_VALUE) { |
2467 | 251 | obj = ucl_parser_add_container (NULL, parser, true, 0, |
2468 | 251 | seen_obrace); |
2469 | 251 | } |
2470 | 15.9k | else { |
2471 | 15.9k | obj = ucl_parser_add_container (NULL, parser, false, 0, |
2472 | 15.9k | seen_obrace); |
2473 | 15.9k | } |
2474 | | |
2475 | 16.2k | if (obj == NULL) { |
2476 | 0 | return false; |
2477 | 0 | } |
2478 | | |
2479 | 16.2k | parser->top_obj = obj; |
2480 | 16.2k | parser->cur_obj = obj; |
2481 | 16.2k | } |
2482 | | |
2483 | 17.1k | } |
2484 | 17.1k | break; |
2485 | 183k | case UCL_STATE_KEY: |
2486 | 183k | case UCL_STATE_KEY_OBRACE: |
2487 | | /* Skip any spaces */ |
2488 | 184k | while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { |
2489 | 1.12k | ucl_chunk_skipc (chunk, p); |
2490 | 1.12k | } |
2491 | 183k | if (p == chunk->end || *p == '}') { |
2492 | | /* We have the end of an object */ |
2493 | 22 | parser->state = UCL_STATE_AFTER_VALUE; |
2494 | 22 | continue; |
2495 | 22 | } |
2496 | 183k | if (parser->stack == NULL) { |
2497 | | /* No objects are on stack, but we want to parse a key */ |
2498 | 0 | ucl_set_err (parser, UCL_ESYNTAX, "top object is finished but the parser " |
2499 | 0 | "expects a key", &parser->err); |
2500 | 0 | parser->prev_state = parser->state; |
2501 | 0 | parser->state = UCL_STATE_ERROR; |
2502 | 0 | return false; |
2503 | 0 | } |
2504 | 183k | if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) { |
2505 | 4.64k | parser->prev_state = parser->state; |
2506 | 4.64k | parser->state = UCL_STATE_ERROR; |
2507 | 4.64k | return false; |
2508 | 4.64k | } |
2509 | | |
2510 | 178k | if (end_of_object) { |
2511 | 0 | p = chunk->pos; |
2512 | 0 | parser->state = UCL_STATE_AFTER_VALUE; |
2513 | 0 | continue; |
2514 | 0 | } |
2515 | 178k | else if (parser->state != UCL_STATE_MACRO_NAME) { |
2516 | 158k | if (next_key && parser->stack->obj->type == UCL_OBJECT) { |
2517 | | /* Parse more keys and nest objects accordingly */ |
2518 | 109k | obj = ucl_parser_add_container (parser->cur_obj, |
2519 | 109k | parser, |
2520 | 109k | false, |
2521 | 109k | parser->stack->e.params.level + 1, |
2522 | 109k | parser->state == UCL_STATE_KEY_OBRACE); |
2523 | 109k | if (obj == NULL) { |
2524 | 1 | return false; |
2525 | 1 | } |
2526 | 109k | } |
2527 | 49.4k | else { |
2528 | 49.4k | parser->state = UCL_STATE_VALUE; |
2529 | 49.4k | } |
2530 | 158k | } |
2531 | 19.5k | else { |
2532 | 19.5k | c = chunk->pos; |
2533 | 19.5k | } |
2534 | 178k | p = chunk->pos; |
2535 | 178k | break; |
2536 | 1.88M | case UCL_STATE_VALUE: |
2537 | | /* We need to check what we do have */ |
2538 | 1.88M | if (!parser->cur_obj || !ucl_parse_value (parser, chunk)) { |
2539 | 244 | parser->prev_state = parser->state; |
2540 | 244 | parser->state = UCL_STATE_ERROR; |
2541 | 244 | return false; |
2542 | 244 | } |
2543 | | /* State is set in ucl_parse_value call */ |
2544 | 1.88M | p = chunk->pos; |
2545 | 1.88M | break; |
2546 | 1.87M | case UCL_STATE_AFTER_VALUE: |
2547 | 1.87M | if (!ucl_parse_after_value (parser, chunk)) { |
2548 | 377 | parser->prev_state = parser->state; |
2549 | 377 | parser->state = UCL_STATE_ERROR; |
2550 | 377 | return false; |
2551 | 377 | } |
2552 | | |
2553 | 1.87M | if (parser->stack != NULL) { |
2554 | 1.87M | if (parser->stack->obj->type == UCL_OBJECT) { |
2555 | 46.0k | parser->state = UCL_STATE_KEY; |
2556 | 46.0k | } |
2557 | 1.83M | else { |
2558 | | /* Array */ |
2559 | 1.83M | parser->state = UCL_STATE_VALUE; |
2560 | 1.83M | } |
2561 | 1.87M | } |
2562 | 6 | else { |
2563 | | /* Skip everything at the end */ |
2564 | 6 | return true; |
2565 | 6 | } |
2566 | | |
2567 | 1.87M | p = chunk->pos; |
2568 | 1.87M | break; |
2569 | 4.27M | case UCL_STATE_MACRO_NAME: |
2570 | 4.27M | if (parser->flags & UCL_PARSER_DISABLE_MACRO) { |
2571 | 0 | if (!ucl_skip_macro_as_comment (parser, chunk)) { |
2572 | | /* We have invalid macro */ |
2573 | 0 | ucl_create_err (&parser->err, |
2574 | 0 | "error at %s:%d at column %d: invalid macro", |
2575 | 0 | chunk->fname ? chunk->fname : "memory", |
2576 | 0 | chunk->line, |
2577 | 0 | chunk->column); |
2578 | 0 | parser->state = UCL_STATE_ERROR; |
2579 | 0 | return false; |
2580 | 0 | } |
2581 | 0 | else { |
2582 | 0 | p = chunk->pos; |
2583 | 0 | parser->state = parser->prev_state; |
2584 | 0 | } |
2585 | 0 | } |
2586 | 4.27M | else { |
2587 | 4.27M | if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && |
2588 | 4.27M | *p != '(') { |
2589 | 4.25M | ucl_chunk_skipc (chunk, p); |
2590 | 4.25M | } |
2591 | 17.7k | else { |
2592 | 17.7k | if (c != NULL && p - c > 0) { |
2593 | | /* We got macro name */ |
2594 | 17.7k | macro_len = (size_t) (p - c); |
2595 | 17.7k | HASH_FIND (hh, parser->macroes, c, macro_len, macro); |
2596 | 17.7k | if (macro == NULL) { |
2597 | 1.37k | ucl_create_err (&parser->err, |
2598 | 1.37k | "error at %s:%d at column %d: " |
2599 | 1.37k | "unknown macro: '%.*s', character: '%c'", |
2600 | 1.37k | chunk->fname ? chunk->fname : "memory", |
2601 | 1.37k | chunk->line, |
2602 | 1.37k | chunk->column, |
2603 | 1.37k | (int) (p - c), |
2604 | 1.37k | c, |
2605 | 1.37k | *chunk->pos); |
2606 | 1.37k | parser->state = UCL_STATE_ERROR; |
2607 | 1.37k | return false; |
2608 | 1.37k | } |
2609 | | /* Now we need to skip all spaces */ |
2610 | 16.3k | SKIP_SPACES_COMMENTS(parser, chunk, p); |
2611 | 16.3k | parser->state = UCL_STATE_MACRO; |
2612 | 16.3k | } |
2613 | 56 | else { |
2614 | | /* We have invalid macro name */ |
2615 | 56 | ucl_create_err (&parser->err, |
2616 | 56 | "error at %s:%d at column %d: invalid macro name", |
2617 | 56 | chunk->fname ? chunk->fname : "memory", |
2618 | 56 | chunk->line, |
2619 | 56 | chunk->column); |
2620 | 56 | parser->state = UCL_STATE_ERROR; |
2621 | 56 | return false; |
2622 | 56 | } |
2623 | 17.7k | } |
2624 | 4.27M | } |
2625 | 4.27M | break; |
2626 | 4.27M | case UCL_STATE_MACRO: |
2627 | 16.3k | if (*chunk->pos == '(') { |
2628 | 16.2k | macro_args = ucl_parse_macro_arguments (parser, chunk); |
2629 | 16.2k | p = chunk->pos; |
2630 | 16.2k | if (macro_args) { |
2631 | 4.52k | SKIP_SPACES_COMMENTS(parser, chunk, p); |
2632 | 4.52k | } |
2633 | 16.2k | } |
2634 | 101 | else { |
2635 | 101 | macro_args = NULL; |
2636 | 101 | } |
2637 | 15.7k | if (!ucl_parse_macro_value (parser, chunk, macro, |
2638 | 15.7k | ¯o_start, ¯o_len)) { |
2639 | 3 | parser->prev_state = parser->state; |
2640 | 3 | parser->state = UCL_STATE_ERROR; |
2641 | 3 | return false; |
2642 | 3 | } |
2643 | 15.7k | macro_len = ucl_expand_variable (parser, ¯o_escaped, |
2644 | 15.7k | macro_start, macro_len); |
2645 | 15.7k | parser->state = parser->prev_state; |
2646 | | |
2647 | 15.7k | if (macro_escaped == NULL && macro != NULL) { |
2648 | 15.4k | if (macro->is_context) { |
2649 | 7 | ret = macro->h.context_handler (macro_start, macro_len, |
2650 | 7 | macro_args, |
2651 | 7 | parser->top_obj, |
2652 | 7 | macro->ud); |
2653 | 7 | } |
2654 | 15.4k | else { |
2655 | 15.4k | ret = macro->h.handler (macro_start, macro_len, macro_args, |
2656 | 15.4k | macro->ud); |
2657 | 15.4k | } |
2658 | 15.4k | } |
2659 | 283 | else if (macro != NULL) { |
2660 | 283 | if (macro->is_context) { |
2661 | 1 | ret = macro->h.context_handler (macro_escaped, macro_len, |
2662 | 1 | macro_args, |
2663 | 1 | parser->top_obj, |
2664 | 1 | macro->ud); |
2665 | 1 | } |
2666 | 282 | else { |
2667 | 282 | ret = macro->h.handler (macro_escaped, macro_len, macro_args, |
2668 | 282 | macro->ud); |
2669 | 282 | } |
2670 | | |
2671 | 283 | UCL_FREE (macro_len + 1, macro_escaped); |
2672 | 283 | } |
2673 | 0 | else { |
2674 | 0 | ret = false; |
2675 | 0 | ucl_set_err (parser, UCL_EINTERNAL, |
2676 | 0 | "internal error: parser has macro undefined", &parser->err); |
2677 | 0 | } |
2678 | | |
2679 | | /* |
2680 | | * Chunk can be modified within macro handler |
2681 | | */ |
2682 | 15.7k | chunk = parser->chunks; |
2683 | 15.7k | p = chunk->pos; |
2684 | | |
2685 | 15.7k | if (macro_args) { |
2686 | 3.93k | ucl_object_unref (macro_args); |
2687 | 3.93k | } |
2688 | | |
2689 | 15.7k | if (!ret) { |
2690 | 4.31k | return false; |
2691 | 4.31k | } |
2692 | 11.4k | break; |
2693 | 11.4k | default: |
2694 | 0 | ucl_set_err (parser, UCL_EINTERNAL, |
2695 | 0 | "internal error: parser is in an unknown state", &parser->err); |
2696 | 0 | parser->state = UCL_STATE_ERROR; |
2697 | 0 | return false; |
2698 | 8.24M | } |
2699 | 8.24M | } |
2700 | | |
2701 | 5.57k | if (parser->last_comment) { |
2702 | 0 | if (parser->cur_obj) { |
2703 | 0 | ucl_attach_comment (parser, parser->cur_obj, true); |
2704 | 0 | } |
2705 | 0 | else if (parser->stack && parser->stack->obj) { |
2706 | 0 | ucl_attach_comment (parser, parser->stack->obj, true); |
2707 | 0 | } |
2708 | 0 | else if (parser->top_obj) { |
2709 | 0 | ucl_attach_comment (parser, parser->top_obj, true); |
2710 | 0 | } |
2711 | 0 | else { |
2712 | 0 | ucl_object_unref (parser->last_comment); |
2713 | 0 | } |
2714 | 0 | } |
2715 | | |
2716 | 5.57k | if (parser->stack != NULL && parser->state != UCL_STATE_ERROR) { |
2717 | 5.57k | struct ucl_stack *st; |
2718 | 5.57k | bool has_error = false; |
2719 | | |
2720 | 49.3k | LL_FOREACH (parser->stack, st) { |
2721 | 49.3k | if (st->chunk != parser->chunks) { |
2722 | 965 | break; /* Not our chunk, give up */ |
2723 | 965 | } |
2724 | 48.4k | if (st->e.params.flags & UCL_STACK_HAS_OBRACE) { |
2725 | 132 | if (parser->err == NULL) { |
2726 | 88 | utstring_new (parser->err); |
2727 | 88 | } |
2728 | | |
2729 | 132 | utstring_printf (parser->err, "%s:%d unmatched open brace at %d; ", |
2730 | 132 | chunk->fname ? chunk->fname : "memory", |
2731 | 132 | parser->chunks->line, |
2732 | 132 | st->e.params.line); |
2733 | | |
2734 | 132 | has_error = true; |
2735 | 132 | } |
2736 | 48.4k | } |
2737 | | |
2738 | 5.57k | if (has_error) { |
2739 | 88 | parser->err_code = UCL_EUNPAIRED; |
2740 | | |
2741 | 88 | return false; |
2742 | 88 | } |
2743 | 5.57k | } |
2744 | | |
2745 | 5.48k | return true; |
2746 | 5.57k | } |
2747 | | |
2748 | 97.3k | #define UPRM_SAFE(fn, a, b, c, el) do { \ |
2749 | 97.3k | if (!fn(a, b, c, a)) \ |
2750 | 97.3k | goto el; \ |
2751 | 97.3k | } while (0) |
2752 | | |
2753 | | struct ucl_parser* |
2754 | | ucl_parser_new (int flags) |
2755 | 16.2k | { |
2756 | 16.2k | struct ucl_parser *parser; |
2757 | | |
2758 | 16.2k | parser = UCL_ALLOC (sizeof (struct ucl_parser)); |
2759 | 16.2k | if (parser == NULL) { |
2760 | 0 | return NULL; |
2761 | 0 | } |
2762 | | |
2763 | 16.2k | memset (parser, 0, sizeof (struct ucl_parser)); |
2764 | | |
2765 | 16.2k | UPRM_SAFE(ucl_parser_register_macro, parser, "include", ucl_include_handler, e0); |
2766 | 16.2k | UPRM_SAFE(ucl_parser_register_macro, parser, "try_include", ucl_try_include_handler, e0); |
2767 | 16.2k | UPRM_SAFE(ucl_parser_register_macro, parser, "includes", ucl_includes_handler, e0); |
2768 | 16.2k | UPRM_SAFE(ucl_parser_register_macro, parser, "priority", ucl_priority_handler, e0); |
2769 | 16.2k | UPRM_SAFE(ucl_parser_register_macro, parser, "load", ucl_load_handler, e0); |
2770 | 16.2k | UPRM_SAFE(ucl_parser_register_context_macro, parser, "inherit", ucl_inherit_handler, e0); |
2771 | | |
2772 | 16.2k | parser->flags = flags; |
2773 | 16.2k | parser->includepaths = NULL; |
2774 | | |
2775 | 16.2k | if (flags & UCL_PARSER_SAVE_COMMENTS) { |
2776 | 0 | parser->comments = ucl_object_typed_new (UCL_OBJECT); |
2777 | 0 | } |
2778 | | |
2779 | 16.2k | if (!(flags & UCL_PARSER_NO_FILEVARS)) { |
2780 | | /* Initial assumption about filevars */ |
2781 | 16.2k | ucl_parser_set_filevars (parser, NULL, false); |
2782 | 16.2k | } |
2783 | | |
2784 | 16.2k | return parser; |
2785 | 0 | e0: |
2786 | 0 | ucl_parser_free(parser); |
2787 | 0 | return NULL; |
2788 | 16.2k | } |
2789 | | |
2790 | | bool |
2791 | | ucl_parser_set_default_priority (struct ucl_parser *parser, unsigned prio) |
2792 | 0 | { |
2793 | 0 | if (parser == NULL) { |
2794 | 0 | return false; |
2795 | 0 | } |
2796 | | |
2797 | 0 | parser->default_priority = prio; |
2798 | |
|
2799 | 0 | return true; |
2800 | 0 | } |
2801 | | |
2802 | | int |
2803 | | ucl_parser_get_default_priority (struct ucl_parser *parser) |
2804 | 0 | { |
2805 | 0 | if (parser == NULL) { |
2806 | 0 | return -1; |
2807 | 0 | } |
2808 | | |
2809 | 0 | return parser->default_priority; |
2810 | 0 | } |
2811 | | |
2812 | | bool |
2813 | | ucl_parser_register_macro (struct ucl_parser *parser, const char *macro, |
2814 | | ucl_macro_handler handler, void* ud) |
2815 | 81.1k | { |
2816 | 81.1k | struct ucl_macro *new; |
2817 | | |
2818 | 81.1k | if (macro == NULL || handler == NULL) { |
2819 | 0 | return false; |
2820 | 0 | } |
2821 | | |
2822 | 81.1k | new = UCL_ALLOC (sizeof (struct ucl_macro)); |
2823 | 81.1k | if (new == NULL) { |
2824 | 0 | return false; |
2825 | 0 | } |
2826 | | |
2827 | 81.1k | memset (new, 0, sizeof (struct ucl_macro)); |
2828 | 81.1k | new->h.handler = handler; |
2829 | 81.1k | new->name = strdup (macro); |
2830 | 81.1k | if (new->name == NULL) { |
2831 | 0 | UCL_FREE (sizeof (struct ucl_macro), new); |
2832 | 0 | return false; |
2833 | 0 | } |
2834 | 81.1k | new->ud = ud; |
2835 | 81.1k | HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); |
2836 | 81.1k | return true; |
2837 | 81.1k | } |
2838 | | |
2839 | | bool |
2840 | | ucl_parser_register_context_macro (struct ucl_parser *parser, const char *macro, |
2841 | | ucl_context_macro_handler handler, void* ud) |
2842 | 16.2k | { |
2843 | 16.2k | struct ucl_macro *new; |
2844 | | |
2845 | 16.2k | if (macro == NULL || handler == NULL) { |
2846 | 0 | return false; |
2847 | 0 | } |
2848 | | |
2849 | 16.2k | new = UCL_ALLOC (sizeof (struct ucl_macro)); |
2850 | 16.2k | if (new == NULL) { |
2851 | 0 | return false; |
2852 | 0 | } |
2853 | | |
2854 | 16.2k | memset (new, 0, sizeof (struct ucl_macro)); |
2855 | 16.2k | new->h.context_handler = handler; |
2856 | 16.2k | new->name = strdup (macro); |
2857 | 16.2k | if (new->name == NULL) { |
2858 | 0 | UCL_FREE (sizeof (struct ucl_macro), new); |
2859 | 0 | return false; |
2860 | 0 | } |
2861 | 16.2k | new->ud = ud; |
2862 | 16.2k | new->is_context = true; |
2863 | 16.2k | HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); |
2864 | 16.2k | return true; |
2865 | 16.2k | } |
2866 | | |
2867 | | void |
2868 | | ucl_parser_register_variable (struct ucl_parser *parser, const char *var, |
2869 | | const char *value) |
2870 | 34.3k | { |
2871 | 34.3k | struct ucl_variable *new = NULL, *cur; |
2872 | | |
2873 | 34.3k | if (var == NULL) { |
2874 | 0 | return; |
2875 | 0 | } |
2876 | | |
2877 | | /* Find whether a variable already exists */ |
2878 | 34.3k | LL_FOREACH (parser->variables, cur) { |
2879 | 17.1k | if (strcmp (cur->var, var) == 0) { |
2880 | 0 | new = cur; |
2881 | 0 | break; |
2882 | 0 | } |
2883 | 17.1k | } |
2884 | | |
2885 | 34.3k | if (value == NULL) { |
2886 | |
|
2887 | 0 | if (new != NULL) { |
2888 | | /* Remove variable */ |
2889 | 0 | DL_DELETE (parser->variables, new); |
2890 | 0 | free (new->var); |
2891 | 0 | free (new->value); |
2892 | 0 | UCL_FREE (sizeof (struct ucl_variable), new); |
2893 | 0 | } |
2894 | 0 | else { |
2895 | | /* Do nothing */ |
2896 | 0 | return; |
2897 | 0 | } |
2898 | 0 | } |
2899 | 34.3k | else { |
2900 | 34.3k | if (new == NULL) { |
2901 | 34.3k | new = UCL_ALLOC (sizeof (struct ucl_variable)); |
2902 | 34.3k | if (new == NULL) { |
2903 | 0 | return; |
2904 | 0 | } |
2905 | 34.3k | memset (new, 0, sizeof (struct ucl_variable)); |
2906 | 34.3k | new->var = strdup (var); |
2907 | 34.3k | new->var_len = strlen (var); |
2908 | 34.3k | new->value = strdup (value); |
2909 | 34.3k | new->value_len = strlen (value); |
2910 | | |
2911 | 34.3k | DL_APPEND (parser->variables, new); |
2912 | 34.3k | } |
2913 | 0 | else { |
2914 | 0 | free (new->value); |
2915 | 0 | new->value = strdup (value); |
2916 | 0 | new->value_len = strlen (value); |
2917 | 0 | } |
2918 | 34.3k | } |
2919 | 34.3k | } |
2920 | | |
2921 | | void |
2922 | | ucl_parser_set_variables_handler (struct ucl_parser *parser, |
2923 | | ucl_variable_handler handler, void *ud) |
2924 | 0 | { |
2925 | 0 | parser->var_handler = handler; |
2926 | 0 | parser->var_data = ud; |
2927 | 0 | } |
2928 | | |
2929 | | bool |
2930 | | ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data, |
2931 | | size_t len, unsigned priority, enum ucl_duplicate_strategy strat, |
2932 | | enum ucl_parse_type parse_type) |
2933 | 17.1k | { |
2934 | 17.1k | struct ucl_chunk *chunk; |
2935 | 17.1k | struct ucl_parser_special_handler *special_handler; |
2936 | | |
2937 | 17.1k | if (parser == NULL) { |
2938 | 0 | return false; |
2939 | 0 | } |
2940 | | |
2941 | 17.1k | if (data == NULL && len != 0) { |
2942 | 0 | ucl_create_err (&parser->err, "invalid chunk added"); |
2943 | 0 | return false; |
2944 | 0 | } |
2945 | | |
2946 | 17.1k | if (parser->state != UCL_STATE_ERROR) { |
2947 | 17.1k | chunk = UCL_ALLOC (sizeof (struct ucl_chunk)); |
2948 | 17.1k | if (chunk == NULL) { |
2949 | 0 | ucl_create_err (&parser->err, "cannot allocate chunk structure"); |
2950 | 0 | return false; |
2951 | 0 | } |
2952 | | |
2953 | 17.1k | memset (chunk, 0, sizeof (*chunk)); |
2954 | | |
2955 | | /* Apply all matching handlers from the first to the last */ |
2956 | 17.1k | LL_FOREACH (parser->special_handlers, special_handler) { |
2957 | 0 | if ((special_handler->flags & UCL_SPECIAL_HANDLER_PREPROCESS_ALL) || |
2958 | 0 | (len >= special_handler->magic_len && |
2959 | 0 | memcmp (data, special_handler->magic, special_handler->magic_len) == 0)) { |
2960 | 0 | unsigned char *ndata = NULL; |
2961 | 0 | size_t nlen = 0; |
2962 | |
|
2963 | 0 | if (!special_handler->handler (parser, data, len, &ndata, &nlen, |
2964 | 0 | special_handler->user_data)) { |
2965 | 0 | UCL_FREE(sizeof (struct ucl_chunk), chunk); |
2966 | 0 | ucl_create_err (&parser->err, "call for external handler failed"); |
2967 | |
|
2968 | 0 | return false; |
2969 | 0 | } |
2970 | | |
2971 | 0 | struct ucl_parser_special_handler_chain *nchain; |
2972 | 0 | nchain = UCL_ALLOC (sizeof (*nchain)); |
2973 | 0 | nchain->begin = ndata; |
2974 | 0 | nchain->len = nlen; |
2975 | 0 | nchain->special_handler = special_handler; |
2976 | | |
2977 | | /* Free order is reversed */ |
2978 | 0 | LL_PREPEND (chunk->special_handlers, nchain); |
2979 | |
|
2980 | 0 | data = ndata; |
2981 | 0 | len = nlen; |
2982 | 0 | } |
2983 | 0 | } |
2984 | | |
2985 | 17.1k | if (parse_type == UCL_PARSE_AUTO && len > 0) { |
2986 | | /* We need to detect parse type by the first symbol */ |
2987 | 0 | if ((*data & 0x80) == 0x80 && (*data >= 0xdc && *data <= 0xdf)) { |
2988 | 0 | parse_type = UCL_PARSE_MSGPACK; |
2989 | 0 | } |
2990 | 0 | else if (*data == '(') { |
2991 | 0 | parse_type = UCL_PARSE_CSEXP; |
2992 | 0 | } |
2993 | 0 | else { |
2994 | 0 | parse_type = UCL_PARSE_UCL; |
2995 | 0 | } |
2996 | 0 | } |
2997 | | |
2998 | 17.1k | chunk->begin = data; |
2999 | 17.1k | chunk->remain = len; |
3000 | 17.1k | chunk->pos = chunk->begin; |
3001 | 17.1k | chunk->end = chunk->begin + len; |
3002 | 17.1k | chunk->line = 1; |
3003 | 17.1k | chunk->column = 0; |
3004 | 17.1k | chunk->priority = priority; |
3005 | 17.1k | chunk->strategy = strat; |
3006 | 17.1k | chunk->parse_type = parse_type; |
3007 | | |
3008 | 17.1k | if (parser->cur_file) { |
3009 | 967 | chunk->fname = strdup (parser->cur_file); |
3010 | 967 | } |
3011 | | |
3012 | 17.1k | LL_PREPEND (parser->chunks, chunk); |
3013 | 17.1k | parser->recursion ++; |
3014 | | |
3015 | 17.1k | if (parser->recursion > UCL_MAX_RECURSION) { |
3016 | 0 | ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d", |
3017 | 0 | parser->recursion); |
3018 | 0 | return false; |
3019 | 0 | } |
3020 | | |
3021 | 17.1k | if (len > 0) { |
3022 | | /* Need to parse something */ |
3023 | 17.1k | switch (parse_type) { |
3024 | 0 | default: |
3025 | 17.1k | case UCL_PARSE_UCL: |
3026 | 17.1k | return ucl_state_machine (parser); |
3027 | 0 | case UCL_PARSE_MSGPACK: |
3028 | 0 | return ucl_parse_msgpack (parser); |
3029 | 0 | case UCL_PARSE_CSEXP: |
3030 | 0 | return ucl_parse_csexp (parser); |
3031 | 17.1k | } |
3032 | 17.1k | } |
3033 | 6 | else { |
3034 | | /* Just add empty chunk and go forward */ |
3035 | 6 | if (parser->top_obj == NULL) { |
3036 | | /* |
3037 | | * In case of empty object, create one to indicate that we've |
3038 | | * read something |
3039 | | */ |
3040 | 4 | parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority); |
3041 | 4 | } |
3042 | | |
3043 | 6 | return true; |
3044 | 6 | } |
3045 | 17.1k | } |
3046 | | |
3047 | 0 | ucl_create_err (&parser->err, "a parser is in an invalid state"); |
3048 | |
|
3049 | 0 | return false; |
3050 | 17.1k | } |
3051 | | |
3052 | | bool |
3053 | | ucl_parser_add_chunk_priority (struct ucl_parser *parser, |
3054 | | const unsigned char *data, size_t len, unsigned priority) |
3055 | 165 | { |
3056 | | /* We dereference parser, so this check is essential */ |
3057 | 165 | if (parser == NULL) { |
3058 | 0 | return false; |
3059 | 0 | } |
3060 | | |
3061 | 165 | return ucl_parser_add_chunk_full (parser, data, len, |
3062 | 165 | priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL); |
3063 | 165 | } |
3064 | | |
3065 | | bool |
3066 | | ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data, |
3067 | | size_t len) |
3068 | 16.0k | { |
3069 | 16.0k | if (parser == NULL) { |
3070 | 0 | return false; |
3071 | 0 | } |
3072 | | |
3073 | 16.0k | return ucl_parser_add_chunk_full (parser, data, len, |
3074 | 16.0k | parser->default_priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL); |
3075 | 16.0k | } |
3076 | | |
3077 | | bool |
3078 | | ucl_parser_insert_chunk (struct ucl_parser *parser, const unsigned char *data, |
3079 | | size_t len) |
3080 | 0 | { |
3081 | 0 | if (parser == NULL || parser->top_obj == NULL) { |
3082 | 0 | return false; |
3083 | 0 | } |
3084 | | |
3085 | 0 | bool res; |
3086 | 0 | struct ucl_chunk *chunk; |
3087 | |
|
3088 | 0 | int state = parser->state; |
3089 | 0 | parser->state = UCL_STATE_INIT; |
3090 | | |
3091 | | /* Prevent inserted chunks from unintentionally closing the current object */ |
3092 | 0 | if (parser->stack != NULL && parser->stack->next != NULL) { |
3093 | 0 | parser->stack->e.params.level = parser->stack->next->e.params.level; |
3094 | 0 | } |
3095 | |
|
3096 | 0 | res = ucl_parser_add_chunk_full (parser, data, len, parser->chunks->priority, |
3097 | 0 | parser->chunks->strategy, parser->chunks->parse_type); |
3098 | | |
3099 | | /* Remove chunk from the stack */ |
3100 | 0 | chunk = parser->chunks; |
3101 | 0 | if (chunk != NULL) { |
3102 | 0 | parser->chunks = chunk->next; |
3103 | 0 | ucl_chunk_free (chunk); |
3104 | 0 | parser->recursion --; |
3105 | 0 | } |
3106 | |
|
3107 | 0 | parser->state = state; |
3108 | |
|
3109 | 0 | return res; |
3110 | 0 | } |
3111 | | |
3112 | | bool |
3113 | | ucl_parser_add_string_priority (struct ucl_parser *parser, const char *data, |
3114 | | size_t len, unsigned priority) |
3115 | 165 | { |
3116 | 165 | if (data == NULL) { |
3117 | 0 | ucl_create_err (&parser->err, "invalid string added"); |
3118 | 0 | return false; |
3119 | 0 | } |
3120 | 165 | if (len == 0) { |
3121 | 0 | len = strlen (data); |
3122 | 0 | } |
3123 | | |
3124 | 165 | return ucl_parser_add_chunk_priority (parser, |
3125 | 165 | (const unsigned char *)data, len, priority); |
3126 | 165 | } |
3127 | | |
3128 | | bool |
3129 | | ucl_parser_add_string (struct ucl_parser *parser, const char *data, |
3130 | | size_t len) |
3131 | 165 | { |
3132 | 165 | if (parser == NULL) { |
3133 | 0 | return false; |
3134 | 0 | } |
3135 | | |
3136 | 165 | return ucl_parser_add_string_priority (parser, |
3137 | 165 | (const unsigned char *)data, len, parser->default_priority); |
3138 | 165 | } |
3139 | | |
3140 | | bool |
3141 | | ucl_set_include_path (struct ucl_parser *parser, ucl_object_t *paths) |
3142 | 10 | { |
3143 | 10 | if (parser == NULL || paths == NULL) { |
3144 | 0 | return false; |
3145 | 0 | } |
3146 | | |
3147 | 10 | if (parser->includepaths == NULL) { |
3148 | 10 | parser->includepaths = ucl_object_copy (paths); |
3149 | 10 | } |
3150 | 0 | else { |
3151 | 0 | ucl_object_unref (parser->includepaths); |
3152 | 0 | parser->includepaths = ucl_object_copy (paths); |
3153 | 0 | } |
3154 | | |
3155 | 10 | if (parser->includepaths == NULL) { |
3156 | 0 | return false; |
3157 | 0 | } |
3158 | | |
3159 | 10 | return true; |
3160 | 10 | } |
3161 | | |
3162 | | unsigned char ucl_parser_chunk_peek (struct ucl_parser *parser) |
3163 | 0 | { |
3164 | 0 | if (parser == NULL || parser->chunks == NULL || parser->chunks->pos == NULL || parser->chunks->end == NULL || |
3165 | 0 | parser->chunks->pos == parser->chunks->end) { |
3166 | 0 | return 0; |
3167 | 0 | } |
3168 | | |
3169 | 0 | return( *parser->chunks->pos ); |
3170 | 0 | } |
3171 | | |
3172 | | bool ucl_parser_chunk_skip (struct ucl_parser *parser) |
3173 | 0 | { |
3174 | 0 | if (parser == NULL || parser->chunks == NULL || parser->chunks->pos == NULL || parser->chunks->end == NULL || |
3175 | 0 | parser->chunks->pos == parser->chunks->end) { |
3176 | 0 | return false; |
3177 | 0 | } |
3178 | | |
3179 | 0 | const unsigned char *p = parser->chunks->pos; |
3180 | 0 | ucl_chunk_skipc( parser->chunks, p ); |
3181 | 0 | if( parser->chunks->pos != NULL ) return true; |
3182 | 0 | return false; |
3183 | 0 | } |
3184 | | |
3185 | | ucl_object_t* |
3186 | | ucl_parser_get_current_stack_object (struct ucl_parser *parser, unsigned int depth) |
3187 | 0 | { |
3188 | 0 | ucl_object_t *obj; |
3189 | |
|
3190 | 0 | if (parser == NULL || parser->stack == NULL) { |
3191 | 0 | return NULL; |
3192 | 0 | } |
3193 | | |
3194 | 0 | struct ucl_stack *stack = parser->stack; |
3195 | 0 | if(stack == NULL || stack->obj == NULL || ucl_object_type (stack->obj) != UCL_OBJECT) |
3196 | 0 | { |
3197 | 0 | return NULL; |
3198 | 0 | } |
3199 | | |
3200 | 0 | for( unsigned int i = 0; i < depth; ++i ) |
3201 | 0 | { |
3202 | 0 | stack = stack->next; |
3203 | 0 | if(stack == NULL || stack->obj == NULL || ucl_object_type (stack->obj) != UCL_OBJECT) |
3204 | 0 | { |
3205 | 0 | return NULL; |
3206 | 0 | } |
3207 | 0 | } |
3208 | | |
3209 | 0 | obj = ucl_object_ref (stack->obj); |
3210 | 0 | return obj; |
3211 | 0 | } |
3212 | | |