/src/tinysparql/subprojects/json-glib-1.10.6/json-glib/json-scanner.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* json-scanner.c: Tokenizer for JSON |
2 | | * |
3 | | * SPDX-FileCopyrightText: 1997 Tim Janik |
4 | | * SPDX-FileCopyrightText: 1997-2000 The GLib Team and others |
5 | | * SPDX-FileCopyrightText: 2008 OpenedHand Ltd. |
6 | | * SPDX-FileCopyrightText: 2024 Emmanuele Bassi |
7 | | * SPDX-License-Identifier: LGPL-2.1-or-later |
8 | | * |
9 | | * Based on JsonScanner: Flexible lexical scanner for general purpose. |
10 | | * |
11 | | * Modified by Emmanuele Bassi <ebassi@openedhand.com> |
12 | | * |
13 | | * This library is free software; you can redistribute it and/or |
14 | | * modify it under the terms of the GNU Lesser General Public |
15 | | * License as published by the Free Software Foundation; either |
16 | | * version 2 of the License, or (at your option) any later version. |
17 | | * |
18 | | * This library is distributed in the hope that it will be useful, |
19 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
20 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
21 | | * Lesser General Public License for more details. |
22 | | * |
23 | | * You should have received a copy of the GNU Lesser General Public |
24 | | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
25 | | */ |
26 | | |
27 | | #include "config.h" |
28 | | |
29 | | #include "json-scanner.h" |
30 | | |
31 | | #include <errno.h> |
32 | | #include <stdint.h> |
33 | | #include <stdbool.h> |
34 | | #include <stdlib.h> |
35 | | #include <stdarg.h> |
36 | | #include <string.h> |
37 | | #include <stdio.h> |
38 | | #ifdef HAVE_UNISTD_H |
39 | | #include <unistd.h> |
40 | | #endif |
41 | | |
42 | | #include <glib.h> |
43 | | #include <glib/gprintf.h> |
44 | | |
45 | | typedef enum |
46 | | { |
47 | | JSON_ERROR_TYPE_UNKNOWN, |
48 | | JSON_ERROR_TYPE_UNEXP_EOF, |
49 | | JSON_ERROR_TYPE_UNEXP_EOF_IN_STRING, |
50 | | JSON_ERROR_TYPE_UNEXP_EOF_IN_COMMENT, |
51 | | JSON_ERROR_TYPE_NON_DIGIT_IN_CONST, |
52 | | JSON_ERROR_TYPE_DIGIT_RADIX, |
53 | | JSON_ERROR_TYPE_FLOAT_RADIX, |
54 | | JSON_ERROR_TYPE_FLOAT_MALFORMED, |
55 | | JSON_ERROR_TYPE_MALFORMED_SURROGATE_PAIR, |
56 | | JSON_ERROR_TYPE_LEADING_ZERO, |
57 | | JSON_ERROR_TYPE_UNESCAPED_CTRL, |
58 | | JSON_ERROR_TYPE_UNKNOWN_ESCAPE, |
59 | | JSON_ERROR_TYPE_MALFORMED_UNICODE |
60 | | } JsonErrorType; |
61 | | |
62 | | typedef struct |
63 | | { |
64 | | const char *cset_skip_characters; |
65 | | const char *cset_identifier_first; |
66 | | const char *cset_identifier_nth; |
67 | | const char *cpair_comment_single; |
68 | | bool strict; |
69 | | } JsonScannerConfig; |
70 | | |
71 | | typedef union |
72 | | { |
73 | | gpointer v_symbol; |
74 | | char *v_identifier; |
75 | | gint64 v_int64; |
76 | | double v_float; |
77 | | char *v_string; |
78 | | unsigned int v_error; |
79 | | } JsonTokenValue; |
80 | | |
81 | | /*< private > |
82 | | * JsonScanner: |
83 | | * |
84 | | * Tokenizer scanner for JSON. See #GScanner |
85 | | * |
86 | | * Since: 0.6 |
87 | | */ |
88 | | struct _JsonScanner |
89 | | { |
90 | | /* name of input stream, featured by the default message handler */ |
91 | | const char *input_name; |
92 | | |
93 | | /* link into the scanner configuration */ |
94 | | JsonScannerConfig config; |
95 | | |
96 | | /* fields filled in after json_scanner_get_next_token() */ |
97 | | unsigned int token; |
98 | | JsonTokenValue value; |
99 | | unsigned int line; |
100 | | unsigned int position; |
101 | | |
102 | | /* fields filled in after json_scanner_peek_next_token() */ |
103 | | unsigned int next_token; |
104 | | JsonTokenValue next_value; |
105 | | unsigned int next_line; |
106 | | unsigned int next_position; |
107 | | |
108 | | /* to be considered private */ |
109 | | const char *text; |
110 | | const char *text_end; |
111 | | char *buffer; |
112 | | |
113 | | /* handler function for _warn and _error */ |
114 | | JsonScannerMsgFunc msg_handler; |
115 | | gpointer user_data; |
116 | | }; |
117 | | |
118 | | static const gchar json_symbol_names[] = |
119 | | "true\0" |
120 | | "false\0" |
121 | | "null\0" |
122 | | "var\0"; |
123 | | |
124 | | static const struct |
125 | | { |
126 | | unsigned int name_offset; |
127 | | unsigned int token; |
128 | | } json_symbols[] = { |
129 | | { 0, JSON_TOKEN_TRUE }, |
130 | | { 5, JSON_TOKEN_FALSE }, |
131 | | { 11, JSON_TOKEN_NULL }, |
132 | | { 16, JSON_TOKEN_VAR } |
133 | | }; |
134 | | |
135 | | static void json_scanner_get_token_ll (JsonScanner *scanner, |
136 | | unsigned int *token_p, |
137 | | JsonTokenValue *value_p, |
138 | | unsigned int *line_p, |
139 | | unsigned int *position_p); |
140 | | static void json_scanner_get_token_i (JsonScanner *scanner, |
141 | | unsigned int *token_p, |
142 | | JsonTokenValue *value_p, |
143 | | unsigned int *line_p, |
144 | | unsigned int *position_p); |
145 | | |
146 | | static unsigned char json_scanner_peek_next_char (JsonScanner *scanner); |
147 | | static unsigned char json_scanner_get_char (JsonScanner *scanner, |
148 | | unsigned int *line_p, |
149 | | unsigned int *position_p); |
150 | | static bool json_scanner_get_unichar (JsonScanner *scanner, |
151 | | gunichar *ucs, |
152 | | unsigned int *line_p, |
153 | | unsigned int *position_p); |
154 | | static void json_scanner_error (JsonScanner *scanner, |
155 | | const char *format, |
156 | | ...) G_GNUC_PRINTF (2,3); |
157 | | |
158 | | JsonScanner * |
159 | | json_scanner_new (bool strict) |
160 | 16.3k | { |
161 | 16.3k | JsonScanner *scanner; |
162 | | |
163 | 16.3k | scanner = g_new0 (JsonScanner, 1); |
164 | | |
165 | 16.3k | scanner->config = (JsonScannerConfig) { |
166 | | // Skip whitespace |
167 | 16.3k | .cset_skip_characters = ( " \t\r\n" ), |
168 | | |
169 | | // Identifiers can only be lower case |
170 | 16.3k | .cset_identifier_first = ( |
171 | 16.3k | G_CSET_a_2_z |
172 | 16.3k | ), |
173 | 16.3k | .cset_identifier_nth = ( |
174 | 16.3k | G_CSET_a_2_z |
175 | 16.3k | ), |
176 | | |
177 | | // Only used if strict = false |
178 | 16.3k | .cpair_comment_single = ( "//\n" ), |
179 | 16.3k | .strict = strict, |
180 | 16.3k | }; |
181 | | |
182 | 16.3k | scanner->token = JSON_TOKEN_NONE; |
183 | 16.3k | scanner->value.v_int64 = 0; |
184 | 16.3k | scanner->line = 1; |
185 | 16.3k | scanner->position = 0; |
186 | | |
187 | 16.3k | scanner->next_token = JSON_TOKEN_NONE; |
188 | 16.3k | scanner->next_value.v_int64 = 0; |
189 | 16.3k | scanner->next_line = 1; |
190 | 16.3k | scanner->next_position = 0; |
191 | | |
192 | 16.3k | return scanner; |
193 | 16.3k | } |
194 | | |
195 | | static inline void |
196 | | json_scanner_free_value (JsonTokenType *token_p, |
197 | | JsonTokenValue *value_p) |
198 | 17.4M | { |
199 | 17.4M | switch (*token_p) |
200 | 17.4M | { |
201 | 2.47M | case JSON_TOKEN_STRING: |
202 | 2.48M | case JSON_TOKEN_IDENTIFIER: |
203 | 2.48M | case JSON_TOKEN_COMMENT_SINGLE: |
204 | 2.48M | case JSON_TOKEN_COMMENT_MULTI: |
205 | 2.48M | g_free (value_p->v_string); |
206 | 2.48M | break; |
207 | | |
208 | 14.9M | default: |
209 | 14.9M | break; |
210 | 17.4M | } |
211 | | |
212 | 17.4M | *token_p = JSON_TOKEN_NONE; |
213 | 17.4M | } |
214 | | |
215 | | void |
216 | | json_scanner_destroy (JsonScanner *scanner) |
217 | 16.3k | { |
218 | 16.3k | g_return_if_fail (scanner != NULL); |
219 | | |
220 | 16.3k | json_scanner_free_value (&scanner->token, &scanner->value); |
221 | 16.3k | json_scanner_free_value (&scanner->next_token, &scanner->next_value); |
222 | | |
223 | 16.3k | g_free (scanner->buffer); |
224 | 16.3k | g_free (scanner); |
225 | 16.3k | } |
226 | | |
227 | | void |
228 | | json_scanner_set_msg_handler (JsonScanner *scanner, |
229 | | JsonScannerMsgFunc msg_handler, |
230 | | gpointer user_data) |
231 | 16.3k | { |
232 | 16.3k | g_return_if_fail (scanner != NULL); |
233 | | |
234 | 16.3k | scanner->msg_handler = msg_handler; |
235 | 16.3k | scanner->user_data = user_data; |
236 | 16.3k | } |
237 | | |
238 | | static void |
239 | | json_scanner_error (JsonScanner *scanner, |
240 | | const char *format, |
241 | | ...) |
242 | 1.44k | { |
243 | 1.44k | g_return_if_fail (scanner != NULL); |
244 | 1.44k | g_return_if_fail (format != NULL); |
245 | | |
246 | 1.44k | if (scanner->msg_handler) |
247 | 1.44k | { |
248 | 1.44k | va_list args; |
249 | 1.44k | char *string; |
250 | | |
251 | 1.44k | va_start (args, format); |
252 | 1.44k | string = g_strdup_vprintf (format, args); |
253 | 1.44k | va_end (args); |
254 | | |
255 | 1.44k | scanner->msg_handler (scanner, string, scanner->user_data); |
256 | | |
257 | 1.44k | g_free (string); |
258 | 1.44k | } |
259 | 1.44k | } |
260 | | |
261 | | unsigned int |
262 | | json_scanner_peek_next_token (JsonScanner *scanner) |
263 | 9.60M | { |
264 | 9.60M | g_return_val_if_fail (scanner != NULL, JSON_TOKEN_EOF); |
265 | | |
266 | 9.60M | if (scanner->next_token == JSON_TOKEN_NONE) |
267 | 8.67M | { |
268 | 8.67M | scanner->next_line = scanner->line; |
269 | 8.67M | scanner->next_position = scanner->position; |
270 | 8.67M | json_scanner_get_token_i (scanner, |
271 | 8.67M | &scanner->next_token, |
272 | 8.67M | &scanner->next_value, |
273 | 8.67M | &scanner->next_line, |
274 | 8.67M | &scanner->next_position); |
275 | 8.67M | } |
276 | | |
277 | 9.60M | return scanner->next_token; |
278 | 9.60M | } |
279 | | |
280 | | unsigned int |
281 | | json_scanner_get_next_token (JsonScanner *scanner) |
282 | 8.67M | { |
283 | 8.67M | g_return_val_if_fail (scanner != NULL, JSON_TOKEN_EOF); |
284 | | |
285 | 8.67M | if (scanner->next_token != JSON_TOKEN_NONE) |
286 | 8.65M | { |
287 | 8.65M | json_scanner_free_value (&scanner->token, &scanner->value); |
288 | | |
289 | 8.65M | scanner->token = scanner->next_token; |
290 | 8.65M | scanner->value = scanner->next_value; |
291 | 8.65M | scanner->line = scanner->next_line; |
292 | 8.65M | scanner->position = scanner->next_position; |
293 | 8.65M | scanner->next_token = JSON_TOKEN_NONE; |
294 | 8.65M | } |
295 | 11.5k | else |
296 | 11.5k | json_scanner_get_token_i (scanner, |
297 | 11.5k | &scanner->token, |
298 | 11.5k | &scanner->value, |
299 | 11.5k | &scanner->line, |
300 | 11.5k | &scanner->position); |
301 | | |
302 | 8.67M | return scanner->token; |
303 | 8.67M | } |
304 | | |
305 | | void |
306 | | json_scanner_input_text (JsonScanner *scanner, |
307 | | const char *text, |
308 | | guint text_len) |
309 | 16.3k | { |
310 | 16.3k | g_return_if_fail (scanner != NULL); |
311 | 16.3k | if (text_len) |
312 | 16.3k | g_return_if_fail (text != NULL); |
313 | 1 | else |
314 | 1 | text = NULL; |
315 | | |
316 | 16.3k | scanner->token = JSON_TOKEN_NONE; |
317 | 16.3k | scanner->value.v_int64 = 0; |
318 | 16.3k | scanner->line = 1; |
319 | 16.3k | scanner->position = 0; |
320 | 16.3k | scanner->next_token = JSON_TOKEN_NONE; |
321 | | |
322 | 16.3k | scanner->text = text; |
323 | 16.3k | scanner->text_end = text != NULL ? text + text_len : 0; |
324 | | |
325 | 16.3k | g_clear_pointer (&scanner->buffer, g_free); |
326 | 16.3k | } |
327 | | |
328 | | static guchar |
329 | | json_scanner_peek_next_char (JsonScanner *scanner) |
330 | 1.90M | { |
331 | 1.90M | if (scanner->text < scanner->text_end) |
332 | 1.90M | return *scanner->text; |
333 | 510 | else |
334 | 510 | return 0; |
335 | 1.90M | } |
336 | | |
337 | | static guchar |
338 | | json_scanner_get_char (JsonScanner *scanner, |
339 | | guint *line_p, |
340 | | guint *position_p) |
341 | 253M | { |
342 | 253M | guchar fchar; |
343 | | |
344 | 253M | if (scanner->text < scanner->text_end) |
345 | 253M | fchar = *(scanner->text++); |
346 | 698 | else |
347 | 698 | fchar = 0; |
348 | | |
349 | 253M | if (fchar == '\n') |
350 | 81.3k | { |
351 | 81.3k | (*position_p) = 0; |
352 | 81.3k | (*line_p)++; |
353 | 81.3k | } |
354 | 253M | else if (fchar) |
355 | 253M | { |
356 | 253M | (*position_p)++; |
357 | 253M | } |
358 | | |
359 | 253M | return fchar; |
360 | 253M | } |
361 | | |
362 | 79.7k | #define is_oct_digit(c) ((c) >= '0' && (c) <= '7') |
363 | 47.8k | #define is_hex_digit(c) (((c) >= '0' && (c) <= '9') || \ |
364 | 47.8k | ((c) >= 'a' && (c) <= 'f') || \ |
365 | 47.8k | ((c) >= 'A' && (c) <= 'F')) |
366 | 38.6k | #define to_hex_digit(c) (((c) <= '9') ? (c) - '0' : ((c) & 7) + 9) |
367 | | |
368 | | static bool |
369 | | json_scanner_get_unichar (JsonScanner *scanner, |
370 | | gunichar *ucs, |
371 | | guint *line_p, |
372 | | guint *position_p) |
373 | 9.70k | { |
374 | 9.70k | gunichar uchar; |
375 | | |
376 | 9.70k | uchar = 0; |
377 | 48.3k | for (int i = 0; i < 4; i++) |
378 | 38.6k | { |
379 | 38.6k | char ch = json_scanner_get_char (scanner, line_p, position_p); |
380 | | |
381 | 38.6k | if (is_hex_digit (ch)) |
382 | 38.6k | uchar += ((gunichar) to_hex_digit (ch) << ((3 - i) * 4)); |
383 | 90 | else |
384 | 90 | return false; |
385 | 38.6k | } |
386 | | |
387 | 9.61k | *ucs = uchar; |
388 | | |
389 | 9.61k | return true; |
390 | 9.70k | } |
391 | | |
392 | | /* |
393 | | * decode_utf16_surrogate_pair: |
394 | | * @units: (array length=2): a pair of UTF-16 code points |
395 | | * |
396 | | * Decodes a surrogate pair of UTF-16 code points into the equivalent |
397 | | * Unicode code point. |
398 | | * |
399 | | * Returns: the Unicode code point equivalent to the surrogate pair |
400 | | */ |
401 | | static inline gunichar |
402 | | decode_utf16_surrogate_pair (const gunichar units[2]) |
403 | 477 | { |
404 | 477 | gunichar ucs; |
405 | | |
406 | | /* Already checked by caller */ |
407 | 477 | g_assert (0xd800 <= units[0] && units[0] <= 0xdbff); |
408 | 477 | g_assert (0xdc00 <= units[1] && units[1] <= 0xdfff); |
409 | | |
410 | 477 | ucs = 0x10000; |
411 | 477 | ucs += (units[0] & 0x3ff) << 10; |
412 | 477 | ucs += (units[1] & 0x3ff); |
413 | | |
414 | 477 | return ucs; |
415 | 477 | } |
416 | | |
417 | | static void |
418 | | json_scanner_unexp_token (JsonScanner *scanner, |
419 | | unsigned int expected_token, |
420 | | const char *identifier_spec, |
421 | | const char *symbol_spec, |
422 | | const char *symbol_name, |
423 | | const char *message) |
424 | 1.44k | { |
425 | 1.44k | char *token_string; |
426 | 1.44k | gsize token_string_len; |
427 | 1.44k | char *expected_string; |
428 | 1.44k | gsize expected_string_len; |
429 | 1.44k | const char *message_prefix; |
430 | 1.44k | bool print_unexp; |
431 | | |
432 | 1.44k | g_return_if_fail (scanner != NULL); |
433 | | |
434 | 1.44k | if (identifier_spec == NULL) |
435 | 1.44k | identifier_spec = "identifier"; |
436 | 1.44k | if (symbol_spec == NULL) |
437 | 0 | symbol_spec = "symbol"; |
438 | | |
439 | 1.44k | token_string_len = 56; |
440 | 1.44k | token_string = g_new (char, token_string_len + 1); |
441 | 1.44k | expected_string_len = 64; |
442 | 1.44k | expected_string = g_new (char, expected_string_len + 1); |
443 | 1.44k | print_unexp = true; |
444 | | |
445 | 1.44k | switch (scanner->token) |
446 | 1.44k | { |
447 | 128 | case JSON_TOKEN_EOF: |
448 | 128 | g_snprintf (token_string, token_string_len, "end of file"); |
449 | 128 | break; |
450 | | |
451 | 343 | default: |
452 | 343 | if (scanner->token >= 1 && scanner->token <= 255) |
453 | 331 | { |
454 | 331 | if ((scanner->token >= ' ' && scanner->token <= '~') || |
455 | 331 | strchr (scanner->config.cset_identifier_first, scanner->token) || |
456 | 331 | strchr (scanner->config.cset_identifier_nth, scanner->token)) |
457 | 276 | g_snprintf (token_string, token_string_len, "character `%c'", scanner->token); |
458 | 55 | else |
459 | 55 | g_snprintf (token_string, token_string_len, "character `\\%o'", scanner->token); |
460 | 331 | break; |
461 | 331 | } |
462 | 12 | G_GNUC_FALLTHROUGH; |
463 | | |
464 | 12 | case JSON_TOKEN_SYMBOL: |
465 | 12 | if (expected_token == JSON_TOKEN_SYMBOL || expected_token > JSON_TOKEN_LAST) |
466 | 0 | print_unexp = false; |
467 | 12 | if (symbol_name) |
468 | 12 | g_snprintf (token_string, token_string_len, |
469 | 12 | "%s%s `%s'", |
470 | 12 | print_unexp ? "" : "invalid ", |
471 | 12 | symbol_spec, |
472 | 12 | symbol_name); |
473 | 0 | else |
474 | 0 | g_snprintf (token_string, token_string_len, |
475 | 0 | "%s%s", |
476 | 0 | print_unexp ? "" : "invalid ", |
477 | 0 | symbol_spec); |
478 | 12 | break; |
479 | | |
480 | 669 | case JSON_TOKEN_ERROR: |
481 | 669 | print_unexp = false; |
482 | 669 | expected_token = JSON_TOKEN_NONE; |
483 | 669 | switch (scanner->value.v_error) |
484 | 669 | { |
485 | 0 | case JSON_ERROR_TYPE_UNEXP_EOF: |
486 | 0 | g_snprintf (token_string, token_string_len, "scanner: unexpected end of file"); |
487 | 0 | break; |
488 | | |
489 | 411 | case JSON_ERROR_TYPE_UNEXP_EOF_IN_STRING: |
490 | 411 | g_snprintf (token_string, token_string_len, "scanner: unterminated string constant"); |
491 | 411 | break; |
492 | | |
493 | 114 | case JSON_ERROR_TYPE_UNEXP_EOF_IN_COMMENT: |
494 | 114 | g_snprintf (token_string, token_string_len, "scanner: unterminated comment"); |
495 | 114 | break; |
496 | | |
497 | 71 | case JSON_ERROR_TYPE_NON_DIGIT_IN_CONST: |
498 | 71 | g_snprintf (token_string, token_string_len, "scanner: non digit in constant"); |
499 | 71 | break; |
500 | | |
501 | 0 | case JSON_ERROR_TYPE_FLOAT_RADIX: |
502 | 0 | g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant"); |
503 | 0 | break; |
504 | | |
505 | 11 | case JSON_ERROR_TYPE_FLOAT_MALFORMED: |
506 | 11 | g_snprintf (token_string, token_string_len, "scanner: malformed floating constant"); |
507 | 11 | break; |
508 | | |
509 | 8 | case JSON_ERROR_TYPE_DIGIT_RADIX: |
510 | 8 | g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix"); |
511 | 8 | break; |
512 | | |
513 | 13 | case JSON_ERROR_TYPE_MALFORMED_SURROGATE_PAIR: |
514 | 13 | g_snprintf (token_string, token_string_len, "scanner: malformed surrogate pair"); |
515 | 13 | break; |
516 | | |
517 | 10 | case JSON_ERROR_TYPE_LEADING_ZERO: |
518 | 10 | g_snprintf (token_string, token_string_len, "scanner: leading zero in number"); |
519 | 10 | break; |
520 | | |
521 | 0 | case JSON_ERROR_TYPE_UNESCAPED_CTRL: |
522 | 0 | g_snprintf (token_string, token_string_len, "scanner: unescaped control character"); |
523 | 0 | break; |
524 | | |
525 | 0 | case JSON_ERROR_TYPE_UNKNOWN_ESCAPE: |
526 | 0 | g_snprintf (token_string, token_string_len, "scanner: unknown backslash escape sequence"); |
527 | 0 | break; |
528 | | |
529 | 31 | case JSON_ERROR_TYPE_MALFORMED_UNICODE: |
530 | 31 | g_snprintf (token_string, token_string_len, "scanner: malformed Unicode escape"); |
531 | 31 | break; |
532 | | |
533 | 0 | case JSON_ERROR_TYPE_UNKNOWN: |
534 | 0 | default: |
535 | 0 | g_snprintf (token_string, token_string_len, "scanner: unknown error"); |
536 | 0 | break; |
537 | 669 | } |
538 | 669 | break; |
539 | | |
540 | 669 | case JSON_TOKEN_IDENTIFIER: |
541 | 89 | if (expected_token == JSON_TOKEN_IDENTIFIER) |
542 | 0 | print_unexp = false; |
543 | 89 | g_snprintf (token_string, token_string_len, |
544 | 89 | "%s%s `%s'", |
545 | 89 | print_unexp ? "" : "invalid ", |
546 | 89 | identifier_spec, |
547 | 89 | scanner->value.v_string); |
548 | 89 | break; |
549 | | |
550 | 108 | case JSON_TOKEN_INT: |
551 | 108 | g_snprintf (token_string, token_string_len, "number `%" G_GINT64_FORMAT "'", scanner->value.v_int64); |
552 | 108 | break; |
553 | | |
554 | 7 | case JSON_TOKEN_FLOAT: |
555 | 7 | g_snprintf (token_string, token_string_len, "number `%.3f'", scanner->value.v_float); |
556 | 7 | break; |
557 | | |
558 | 103 | case JSON_TOKEN_STRING: |
559 | 103 | if (expected_token == JSON_TOKEN_STRING) |
560 | 43 | print_unexp = false; |
561 | 103 | g_snprintf (token_string, token_string_len, |
562 | 103 | "%s%sstring constant \"%s\"", |
563 | 103 | print_unexp ? "" : "invalid ", |
564 | 103 | scanner->value.v_string[0] == 0 ? "empty " : "", |
565 | 103 | scanner->value.v_string); |
566 | 103 | token_string[token_string_len - 2] = '"'; |
567 | 103 | token_string[token_string_len - 1] = 0; |
568 | 103 | break; |
569 | | |
570 | 0 | case JSON_TOKEN_COMMENT_SINGLE: |
571 | 0 | case JSON_TOKEN_COMMENT_MULTI: |
572 | 0 | g_snprintf (token_string, token_string_len, "comment"); |
573 | 0 | break; |
574 | | |
575 | 0 | case JSON_TOKEN_NONE: |
576 | | /* somehow the user's parsing code is screwed, there isn't much |
577 | | * we can do about it. |
578 | | * Note, a common case to trigger this is |
579 | | * json_scanner_peek_next_token(); json_scanner_unexp_token(); |
580 | | * without an intermediate json_scanner_get_next_token(). |
581 | | */ |
582 | 0 | g_assert_not_reached (); |
583 | 0 | break; |
584 | 1.44k | } |
585 | | |
586 | | |
587 | 1.44k | switch (expected_token) |
588 | 1.44k | { |
589 | 75 | case JSON_TOKEN_EOF: |
590 | 75 | g_snprintf (expected_string, expected_string_len, "end of file"); |
591 | 75 | break; |
592 | 306 | default: |
593 | 306 | if (expected_token >= 1 && expected_token <= 255) |
594 | 306 | { |
595 | 306 | if ((expected_token >= ' ' && expected_token <= '~') || |
596 | 306 | strchr (scanner->config.cset_identifier_first, expected_token) || |
597 | 306 | strchr (scanner->config.cset_identifier_nth, expected_token)) |
598 | 306 | g_snprintf (expected_string, expected_string_len, "character `%c'", expected_token); |
599 | 0 | else |
600 | 0 | g_snprintf (expected_string, expected_string_len, "character `\\%o'", expected_token); |
601 | 306 | break; |
602 | 306 | } |
603 | 0 | G_GNUC_FALLTHROUGH; |
604 | |
|
605 | 146 | case JSON_TOKEN_SYMBOL: |
606 | 146 | { |
607 | 146 | bool need_valid = (scanner->token == JSON_TOKEN_SYMBOL || scanner->token > JSON_TOKEN_LAST); |
608 | 146 | g_snprintf (expected_string, expected_string_len, |
609 | 146 | "%s%s", |
610 | 146 | need_valid ? "valid " : "", |
611 | 146 | symbol_spec); |
612 | 146 | } |
613 | 146 | break; |
614 | 0 | case JSON_TOKEN_INT: |
615 | 0 | g_snprintf (expected_string, |
616 | 0 | expected_string_len, |
617 | 0 | "%snumber (integer)", |
618 | 0 | scanner->token == expected_token ? "valid " : ""); |
619 | 0 | break; |
620 | 0 | case JSON_TOKEN_FLOAT: |
621 | 0 | g_snprintf (expected_string, |
622 | 0 | expected_string_len, |
623 | 0 | "%snumber (float)", |
624 | 0 | scanner->token == expected_token ? "valid " : ""); |
625 | 0 | break; |
626 | 231 | case JSON_TOKEN_STRING: |
627 | 231 | g_snprintf (expected_string, |
628 | 231 | expected_string_len, |
629 | 231 | "%sstring constant", |
630 | 231 | scanner->token == JSON_TOKEN_STRING ? "valid " : ""); |
631 | 231 | break; |
632 | 20 | case JSON_TOKEN_IDENTIFIER: |
633 | 20 | g_snprintf (expected_string, |
634 | 20 | expected_string_len, |
635 | 20 | "%s%s", |
636 | 20 | scanner->token == JSON_TOKEN_IDENTIFIER ? "valid " : "", |
637 | 20 | identifier_spec); |
638 | 20 | break; |
639 | 0 | case JSON_TOKEN_COMMENT_SINGLE: |
640 | 0 | g_snprintf (expected_string, |
641 | 0 | expected_string_len, |
642 | 0 | "%scomment (single-line)", |
643 | 0 | scanner->token == expected_token ? "valid " : ""); |
644 | 0 | break; |
645 | 0 | case JSON_TOKEN_COMMENT_MULTI: |
646 | 0 | g_snprintf (expected_string, |
647 | 0 | expected_string_len, |
648 | 0 | "%scomment (multi-line)", |
649 | 0 | scanner->token == expected_token ? "valid " : ""); |
650 | 0 | break; |
651 | 669 | case JSON_TOKEN_NONE: |
652 | 669 | case JSON_TOKEN_ERROR: |
653 | | /* this is handled upon printout */ |
654 | 669 | break; |
655 | 1.44k | } |
656 | | |
657 | 1.44k | if (message && message[0] != 0) |
658 | 0 | message_prefix = " - "; |
659 | 1.44k | else |
660 | 1.44k | { |
661 | 1.44k | message_prefix = ""; |
662 | 1.44k | message = ""; |
663 | 1.44k | } |
664 | 1.44k | if (expected_token == JSON_TOKEN_ERROR) |
665 | 0 | { |
666 | 0 | json_scanner_error (scanner, |
667 | 0 | "failure around %s%s%s", |
668 | 0 | token_string, |
669 | 0 | message_prefix, |
670 | 0 | message); |
671 | 0 | } |
672 | 1.44k | else if (expected_token == JSON_TOKEN_NONE) |
673 | 669 | { |
674 | 669 | if (print_unexp) |
675 | 0 | json_scanner_error (scanner, |
676 | 0 | "unexpected %s%s%s", |
677 | 0 | token_string, |
678 | 0 | message_prefix, |
679 | 0 | message); |
680 | 669 | else |
681 | 669 | json_scanner_error (scanner, |
682 | 669 | "%s%s%s", |
683 | 669 | token_string, |
684 | 669 | message_prefix, |
685 | 669 | message); |
686 | 669 | } |
687 | 778 | else |
688 | 778 | { |
689 | 778 | if (print_unexp) |
690 | 735 | json_scanner_error (scanner, |
691 | 735 | "unexpected %s, expected %s%s%s", |
692 | 735 | token_string, |
693 | 735 | expected_string, |
694 | 735 | message_prefix, |
695 | 735 | message); |
696 | 43 | else |
697 | 43 | json_scanner_error (scanner, |
698 | 43 | "%s, expected %s%s%s", |
699 | 43 | token_string, |
700 | 43 | expected_string, |
701 | 43 | message_prefix, |
702 | 43 | message); |
703 | 778 | } |
704 | | |
705 | 1.44k | g_free (token_string); |
706 | 1.44k | g_free (expected_string); |
707 | 1.44k | } |
708 | | |
709 | | void |
710 | | json_scanner_unknown_token (JsonScanner *scanner, |
711 | | unsigned int token) |
712 | 1.44k | { |
713 | 1.44k | const char *symbol_name; |
714 | 1.44k | char *msg; |
715 | 1.44k | unsigned int cur_token; |
716 | | |
717 | 1.44k | cur_token = json_scanner_get_current_token (scanner); |
718 | 1.44k | msg = NULL; |
719 | | |
720 | 1.44k | symbol_name = NULL; |
721 | 7.23k | for (unsigned i = 0; i < G_N_ELEMENTS (json_symbols); i++) |
722 | 5.78k | if (json_symbols[i].token == token) |
723 | 0 | symbol_name = json_symbol_names + json_symbols[i].name_offset; |
724 | | |
725 | 1.44k | if (symbol_name != NULL) |
726 | 0 | msg = g_strconcat ("e.g. '", symbol_name, "'", NULL); |
727 | | |
728 | 1.44k | symbol_name = "???"; |
729 | 7.23k | for (unsigned i = 0; i < G_N_ELEMENTS (json_symbols); i++) |
730 | 5.78k | if (json_symbols[i].token == cur_token) |
731 | 12 | symbol_name = json_symbol_names + json_symbols[i].name_offset; |
732 | | |
733 | 1.44k | json_scanner_unexp_token (scanner, token, |
734 | 1.44k | NULL, "value", |
735 | 1.44k | symbol_name, |
736 | 1.44k | msg); |
737 | | |
738 | 1.44k | g_free (msg); |
739 | 1.44k | } |
740 | | |
741 | | static void |
742 | | json_scanner_get_token_i (JsonScanner *scanner, |
743 | | unsigned int *token_p, |
744 | | JsonTokenValue *value_p, |
745 | | guint *line_p, |
746 | | guint *position_p) |
747 | 8.68M | { |
748 | 8.68M | do |
749 | 8.76M | { |
750 | 8.76M | json_scanner_free_value (token_p, value_p); |
751 | 8.76M | json_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p); |
752 | 8.76M | } |
753 | 8.76M | while (((*token_p > 0 && *token_p < 256) && |
754 | 8.76M | strchr (scanner->config.cset_skip_characters, *token_p)) || |
755 | 8.76M | *token_p == JSON_TOKEN_COMMENT_MULTI || |
756 | 8.76M | *token_p == JSON_TOKEN_COMMENT_SINGLE); |
757 | | |
758 | 8.68M | switch (*token_p) |
759 | 8.68M | { |
760 | 6.24k | case JSON_TOKEN_IDENTIFIER: |
761 | 6.24k | break; |
762 | | |
763 | 6.85k | case JSON_TOKEN_SYMBOL: |
764 | 6.85k | *token_p = GPOINTER_TO_UINT (value_p->v_symbol); |
765 | 6.85k | break; |
766 | | |
767 | 8.67M | default: |
768 | 8.67M | break; |
769 | 8.68M | } |
770 | | |
771 | 8.68M | errno = 0; |
772 | 8.68M | } |
773 | | |
774 | | static void |
775 | | json_scanner_get_token_ll (JsonScanner *scanner, |
776 | | unsigned int *token_p, |
777 | | JsonTokenValue *value_p, |
778 | | guint *line_p, |
779 | | guint *position_p) |
780 | 8.76M | { |
781 | 8.76M | const JsonScannerConfig *config; |
782 | 8.76M | unsigned int token; |
783 | 8.76M | bool in_comment_multi = false; |
784 | 8.76M | bool in_comment_single = false; |
785 | 8.76M | bool in_string_sq = false; |
786 | 8.76M | bool in_string_dq = false; |
787 | 8.76M | GString *gstring = NULL; |
788 | 8.76M | JsonTokenValue value; |
789 | 8.76M | guchar ch; |
790 | | |
791 | 8.76M | config = &scanner->config; |
792 | 8.76M | (*value_p).v_int64 = 0; |
793 | | |
794 | 8.76M | if (scanner->text >= scanner->text_end || |
795 | 8.76M | scanner->token == JSON_TOKEN_EOF) |
796 | 15.1k | { |
797 | 15.1k | *token_p = JSON_TOKEN_EOF; |
798 | 15.1k | return; |
799 | 15.1k | } |
800 | | |
801 | 8.75M | gstring = NULL; |
802 | | |
803 | 8.75M | do /* while (ch != 0) */ |
804 | 8.75M | { |
805 | 8.75M | ch = json_scanner_get_char (scanner, line_p, position_p); |
806 | | |
807 | 8.75M | value.v_int64 = 0; |
808 | 8.75M | token = JSON_TOKEN_NONE; |
809 | | |
810 | | /* this is *evil*, but needed ;( |
811 | | * we first check for identifier first character, because it |
812 | | * might interfere with other key chars like slashes or numbers |
813 | | */ |
814 | 8.75M | if (ch != 0 && strchr (config->cset_identifier_first, ch)) |
815 | 13.1k | goto identifier_precedence; |
816 | | |
817 | 8.73M | switch (ch) |
818 | 8.73M | { |
819 | 0 | case 0: |
820 | 0 | token = JSON_TOKEN_EOF; |
821 | 0 | (*position_p)++; |
822 | | /* ch = 0; */ |
823 | 0 | break; |
824 | | |
825 | 1.18k | case '/': |
826 | 1.18k | if (config->strict || json_scanner_peek_next_char (scanner) != '*') |
827 | 801 | goto default_case; |
828 | 380 | json_scanner_get_char (scanner, line_p, position_p); |
829 | 380 | token = JSON_TOKEN_COMMENT_MULTI; |
830 | 380 | in_comment_multi = true; |
831 | 380 | gstring = g_string_new (NULL); |
832 | 872k | while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0) |
833 | 872k | { |
834 | 872k | if (ch == '*' && json_scanner_peek_next_char (scanner) == '/') |
835 | 320 | { |
836 | 320 | json_scanner_get_char (scanner, line_p, position_p); |
837 | 320 | in_comment_multi = false; |
838 | 320 | break; |
839 | 320 | } |
840 | 872k | else |
841 | 872k | gstring = g_string_append_c (gstring, ch); |
842 | 872k | } |
843 | 380 | ch = 0; |
844 | 380 | break; |
845 | | |
846 | 825 | case '\'': |
847 | 825 | if (config->strict) |
848 | 0 | goto default_case; |
849 | 825 | token = JSON_TOKEN_STRING; |
850 | 825 | in_string_sq = true; |
851 | 825 | gstring = g_string_new (NULL); |
852 | 977k | while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0) |
853 | 977k | { |
854 | 977k | if (ch == '\'' || token == JSON_TOKEN_ERROR) |
855 | 784 | { |
856 | 784 | in_string_sq = false; |
857 | 784 | break; |
858 | 784 | } |
859 | 976k | else |
860 | 976k | { |
861 | 976k | g_string_append_c (gstring, ch); |
862 | 976k | } |
863 | 977k | } |
864 | 825 | ch = 0; |
865 | 825 | break; |
866 | | |
867 | 2.47M | case '"': |
868 | 2.47M | token = JSON_TOKEN_STRING; |
869 | 2.47M | in_string_dq = true; |
870 | 2.47M | gstring = g_string_new (NULL); |
871 | 238M | while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0) |
872 | 238M | { |
873 | 238M | if (ch == '"' || token == JSON_TOKEN_ERROR) |
874 | 2.47M | { |
875 | 2.47M | in_string_dq = false; |
876 | 2.47M | break; |
877 | 2.47M | } |
878 | 236M | else |
879 | 236M | { |
880 | 236M | if (ch == '\\') |
881 | 2.21M | { |
882 | 2.21M | ch = json_scanner_get_char (scanner, line_p, position_p); |
883 | 2.21M | switch (ch) |
884 | 2.21M | { |
885 | 26 | case 0: |
886 | 26 | break; |
887 | | |
888 | 46.0k | case '"': |
889 | 46.0k | gstring = g_string_append_c (gstring, '"'); |
890 | 46.0k | break; |
891 | | |
892 | 1.10M | case '\\': |
893 | 1.10M | gstring = g_string_append_c (gstring, '\\'); |
894 | 1.10M | break; |
895 | | |
896 | 7.59k | case '/': |
897 | 7.59k | gstring = g_string_append_c (gstring, '/'); |
898 | 7.59k | break; |
899 | | |
900 | 103k | case 'n': |
901 | 103k | gstring = g_string_append_c (gstring, '\n'); |
902 | 103k | break; |
903 | | |
904 | 117k | case 't': |
905 | 117k | gstring = g_string_append_c (gstring, '\t'); |
906 | 117k | break; |
907 | | |
908 | 8.50k | case 'r': |
909 | 8.50k | gstring = g_string_append_c (gstring, '\r'); |
910 | 8.50k | break; |
911 | | |
912 | 41.2k | case 'b': |
913 | 41.2k | gstring = g_string_append_c (gstring, '\b'); |
914 | 41.2k | break; |
915 | | |
916 | 20.6k | case 'f': |
917 | 20.6k | gstring = g_string_append_c (gstring, '\f'); |
918 | 20.6k | break; |
919 | | |
920 | 9.19k | case 'u': |
921 | 9.19k | { |
922 | 9.19k | guint fchar = json_scanner_peek_next_char (scanner); |
923 | 9.19k | if (is_hex_digit (fchar)) |
924 | 9.18k | { |
925 | 9.18k | gunichar ucs; |
926 | | |
927 | 9.18k | if (!json_scanner_get_unichar (scanner, &ucs, line_p, position_p)) |
928 | 58 | { |
929 | 58 | token = JSON_TOKEN_ERROR; |
930 | 58 | value.v_error = JSON_ERROR_TYPE_MALFORMED_UNICODE; |
931 | 58 | g_string_free (gstring, TRUE); |
932 | 58 | gstring = NULL; |
933 | 58 | break; |
934 | 58 | } |
935 | | |
936 | | /* resolve UTF-16 surrogates for Unicode characters not in the BMP, |
937 | | * as per ECMA 404, § 9, "String" |
938 | | */ |
939 | 9.12k | if (g_unichar_type (ucs) == G_UNICODE_SURROGATE) |
940 | 551 | { |
941 | 551 | unsigned int next_ch; |
942 | | |
943 | 551 | next_ch = json_scanner_peek_next_char (scanner); |
944 | 551 | if (next_ch != '\\') |
945 | 22 | { |
946 | 22 | token = JSON_TOKEN_ERROR; |
947 | 22 | value.v_error = JSON_ERROR_TYPE_MALFORMED_SURROGATE_PAIR; |
948 | 22 | g_string_free (gstring, TRUE); |
949 | 22 | gstring = NULL; |
950 | 22 | break; |
951 | 22 | } |
952 | 529 | else |
953 | 529 | json_scanner_get_char (scanner, line_p, position_p); |
954 | | |
955 | 529 | next_ch = json_scanner_peek_next_char (scanner); |
956 | 529 | if (next_ch != 'u') |
957 | 6 | { |
958 | 6 | token = JSON_TOKEN_ERROR; |
959 | 6 | value.v_error = JSON_ERROR_TYPE_MALFORMED_SURROGATE_PAIR; |
960 | 6 | g_string_free (gstring, TRUE); |
961 | 6 | gstring = NULL; |
962 | 6 | break; |
963 | 6 | } |
964 | 523 | else |
965 | 523 | json_scanner_get_char (scanner, line_p, position_p); |
966 | | |
967 | | /* read next surrogate */ |
968 | 523 | gunichar units[2]; |
969 | | |
970 | 523 | units[0] = ucs; |
971 | | |
972 | 523 | if (!json_scanner_get_unichar (scanner, &ucs, line_p, position_p)) |
973 | 32 | { |
974 | 32 | token = JSON_TOKEN_ERROR; |
975 | 32 | value.v_error = JSON_ERROR_TYPE_MALFORMED_UNICODE; |
976 | 32 | g_string_free (gstring, TRUE); |
977 | 32 | gstring = NULL; |
978 | 32 | break; |
979 | 32 | } |
980 | | |
981 | 491 | units[1] = ucs; |
982 | | |
983 | 491 | if (0xdc00 <= units[1] && units[1] <= 0xdfff && |
984 | 491 | 0xd800 <= units[0] && units[0] <= 0xdbff) |
985 | 477 | { |
986 | 477 | ucs = decode_utf16_surrogate_pair (units); |
987 | 477 | if (!g_unichar_validate (ucs)) |
988 | 0 | { |
989 | 0 | token = JSON_TOKEN_ERROR; |
990 | 0 | value.v_error = JSON_ERROR_TYPE_MALFORMED_UNICODE; |
991 | 0 | g_string_free (gstring, TRUE); |
992 | 0 | gstring = NULL; |
993 | 0 | break; |
994 | 0 | } |
995 | 477 | } |
996 | 14 | else |
997 | 14 | { |
998 | 14 | token = JSON_TOKEN_ERROR; |
999 | 14 | value.v_error = JSON_ERROR_TYPE_MALFORMED_SURROGATE_PAIR; |
1000 | 14 | g_string_free (gstring, TRUE); |
1001 | 14 | gstring = NULL; |
1002 | 14 | break; |
1003 | 14 | } |
1004 | 491 | } |
1005 | 8.57k | else |
1006 | 8.57k | { |
1007 | 8.57k | if (!g_unichar_validate (ucs)) |
1008 | 0 | { |
1009 | 0 | token = JSON_TOKEN_ERROR; |
1010 | 0 | value.v_error = JSON_ERROR_TYPE_MALFORMED_UNICODE; |
1011 | 0 | g_string_free (gstring, TRUE); |
1012 | 0 | gstring = NULL; |
1013 | 0 | break; |
1014 | 0 | } |
1015 | 8.57k | } |
1016 | | |
1017 | 9.05k | gstring = g_string_append_unichar (gstring, ucs); |
1018 | 9.05k | } |
1019 | 13 | else |
1020 | 13 | { |
1021 | 13 | token = JSON_TOKEN_ERROR; |
1022 | 13 | value.v_error = JSON_ERROR_TYPE_MALFORMED_UNICODE; |
1023 | 13 | g_string_free (gstring, TRUE); |
1024 | 13 | gstring = NULL; |
1025 | 13 | } |
1026 | 9.06k | break; |
1027 | 9.19k | } |
1028 | | |
1029 | 9.06k | case '0': |
1030 | 9.67k | case '1': |
1031 | 11.8k | case '2': |
1032 | 17.1k | case '3': |
1033 | 30.8k | case '4': |
1034 | 38.5k | case '5': |
1035 | 43.5k | case '6': |
1036 | 50.2k | case '7': |
1037 | 50.2k | if (config->strict) |
1038 | 0 | { |
1039 | 0 | token = JSON_TOKEN_ERROR; |
1040 | 0 | value.v_error = JSON_ERROR_TYPE_UNKNOWN_ESCAPE; |
1041 | 0 | g_string_free (gstring, TRUE); |
1042 | 0 | gstring = NULL; |
1043 | 0 | } |
1044 | 50.2k | else |
1045 | 50.2k | { |
1046 | 50.2k | gunichar ucs = (ch - '0'); |
1047 | 50.2k | guchar next_ch; |
1048 | 50.2k | unsigned i; |
1049 | | |
1050 | 102k | for (i = 0; i < 2; i++) |
1051 | 79.7k | { |
1052 | 79.7k | next_ch = json_scanner_peek_next_char (scanner); |
1053 | | |
1054 | 79.7k | if (is_oct_digit (next_ch)) |
1055 | 52.3k | { |
1056 | 52.3k | ucs = ucs * 8 + (next_ch - '0'); |
1057 | 52.3k | json_scanner_get_char (scanner, line_p, position_p); |
1058 | 52.3k | } |
1059 | 27.4k | else |
1060 | 27.4k | { |
1061 | 27.4k | break; |
1062 | 27.4k | } |
1063 | 79.7k | } |
1064 | | |
1065 | 50.2k | gstring = g_string_append_unichar (gstring, ucs); |
1066 | 50.2k | } |
1067 | 50.2k | break; |
1068 | | |
1069 | 713k | default: |
1070 | 713k | if (config->strict) |
1071 | 0 | { |
1072 | 0 | token = JSON_TOKEN_ERROR; |
1073 | 0 | value.v_error = JSON_ERROR_TYPE_UNKNOWN_ESCAPE; |
1074 | 0 | g_string_free (gstring, TRUE); |
1075 | 0 | gstring = NULL; |
1076 | 0 | } |
1077 | 713k | else |
1078 | 713k | { |
1079 | 713k | gstring = g_string_append_c (gstring, ch); |
1080 | 713k | } |
1081 | 713k | break; |
1082 | 2.21M | } |
1083 | 2.21M | } |
1084 | 234M | else if (ch == '\n' || ch == '\t' || ch == '\r' || ch == '\f' || ch == '\b') |
1085 | 530 | { |
1086 | 530 | token = JSON_TOKEN_ERROR; |
1087 | 530 | value.v_error = JSON_ERROR_TYPE_UNESCAPED_CTRL; |
1088 | 530 | g_string_free (gstring, TRUE); |
1089 | 530 | gstring = NULL; |
1090 | 530 | break; |
1091 | 530 | } |
1092 | 234M | else |
1093 | 234M | gstring = g_string_append_c (gstring, ch); |
1094 | 236M | } |
1095 | 238M | } |
1096 | 2.47M | ch = 0; |
1097 | 2.47M | break; |
1098 | | |
1099 | | /* {{{ number parsing */ |
1100 | 854 | case '-': |
1101 | 854 | if (!g_ascii_isdigit (json_scanner_peek_next_char (scanner))) |
1102 | 4 | { |
1103 | 4 | token = JSON_TOKEN_ERROR; |
1104 | 4 | value.v_error = JSON_ERROR_TYPE_NON_DIGIT_IN_CONST; |
1105 | 4 | ch = 0; |
1106 | 4 | break; |
1107 | 4 | } |
1108 | 850 | G_GNUC_FALLTHROUGH; |
1109 | | |
1110 | 4.20k | case '0': |
1111 | 5.98k | case '1': |
1112 | 7.26k | case '2': |
1113 | 39.3k | case '3': |
1114 | 40.6k | case '4': |
1115 | 41.3k | case '5': |
1116 | 138k | case '6': |
1117 | 146k | case '7': |
1118 | 153k | case '8': |
1119 | 212k | case '9': |
1120 | 212k | { |
1121 | 212k | bool in_number = true; |
1122 | 212k | bool leading_sign = ch == '-'; |
1123 | 212k | bool leading_zero = ch == '0'; |
1124 | 212k | char *endptr; |
1125 | | |
1126 | 212k | if (token == JSON_TOKEN_NONE) |
1127 | 212k | token = JSON_TOKEN_INT; |
1128 | | |
1129 | 212k | gstring = g_string_new (""); |
1130 | 212k | gstring = g_string_append_c (gstring, ch); |
1131 | | |
1132 | 212k | if (leading_sign) |
1133 | 850 | { |
1134 | 850 | ch = json_scanner_get_char (scanner, line_p, position_p); |
1135 | 850 | leading_zero = ch == '0'; |
1136 | 850 | g_string_append_c (gstring, ch); |
1137 | 850 | } |
1138 | | |
1139 | 212k | do /* while (in_number) */ |
1140 | 1.61M | { |
1141 | 1.61M | bool is_E = token == JSON_TOKEN_FLOAT && (ch == 'e' || ch == 'E'); |
1142 | | |
1143 | 1.61M | ch = json_scanner_peek_next_char (scanner); |
1144 | | |
1145 | 1.61M | if ((ch >= '0' && ch <= '9') || |
1146 | 1.61M | (ch == 'e' || ch == 'E') || |
1147 | 1.61M | ch == '.' || |
1148 | 1.61M | (is_E && (ch == '+' || ch == '-'))) |
1149 | 1.40M | { |
1150 | 1.40M | ch = json_scanner_get_char (scanner, line_p, position_p); |
1151 | | |
1152 | 1.40M | switch (ch) |
1153 | 1.40M | { |
1154 | 1.05k | case '.': |
1155 | 1.05k | { |
1156 | 1.05k | unsigned int next_ch = json_scanner_peek_next_char (scanner); |
1157 | | |
1158 | 1.05k | if (!g_ascii_isdigit (next_ch)) |
1159 | 13 | { |
1160 | 13 | token = JSON_TOKEN_ERROR; |
1161 | 13 | value.v_error = JSON_ERROR_TYPE_FLOAT_MALFORMED; |
1162 | 13 | in_number = false; |
1163 | 13 | } |
1164 | 1.03k | else |
1165 | 1.03k | { |
1166 | 1.03k | token = JSON_TOKEN_FLOAT; |
1167 | 1.03k | gstring = g_string_append_c (gstring, ch); |
1168 | 1.03k | } |
1169 | 1.05k | } |
1170 | 1.05k | break; |
1171 | | |
1172 | 776k | case '0': |
1173 | 1.13M | case '1': |
1174 | 1.13M | case '2': |
1175 | 1.14M | case '3': |
1176 | 1.15M | case '4': |
1177 | 1.15M | case '5': |
1178 | 1.16M | case '6': |
1179 | 1.16M | case '7': |
1180 | 1.18M | case '8': |
1181 | 1.19M | case '9': |
1182 | 1.19M | if (leading_zero && token != JSON_TOKEN_FLOAT) |
1183 | 442 | { |
1184 | 442 | token = JSON_TOKEN_ERROR; |
1185 | 442 | value.v_error= JSON_ERROR_TYPE_LEADING_ZERO; |
1186 | 442 | in_number = false; |
1187 | 442 | } |
1188 | 1.19M | else |
1189 | 1.19M | gstring = g_string_append_c (gstring, ch); |
1190 | 1.19M | break; |
1191 | | |
1192 | 85.8k | case '-': |
1193 | 86.4k | case '+': |
1194 | 86.4k | if (token != JSON_TOKEN_FLOAT) |
1195 | 0 | { |
1196 | 0 | token = JSON_TOKEN_ERROR; |
1197 | 0 | value.v_error = JSON_ERROR_TYPE_NON_DIGIT_IN_CONST; |
1198 | 0 | in_number = false; |
1199 | 0 | } |
1200 | 86.4k | else |
1201 | 86.4k | gstring = g_string_append_c (gstring, ch); |
1202 | 86.4k | break; |
1203 | | |
1204 | 18.6k | case 'e': |
1205 | 126k | case 'E': |
1206 | 126k | token = JSON_TOKEN_FLOAT; |
1207 | 126k | gstring = g_string_append_c (gstring, ch); |
1208 | 126k | break; |
1209 | | |
1210 | 0 | default: |
1211 | 0 | token = JSON_TOKEN_ERROR; |
1212 | 0 | value.v_error = JSON_ERROR_TYPE_NON_DIGIT_IN_CONST; |
1213 | 0 | in_number = false; |
1214 | 0 | break; |
1215 | 1.40M | } |
1216 | 1.40M | } |
1217 | 211k | else |
1218 | 211k | in_number = false; |
1219 | 1.61M | } |
1220 | 1.61M | while (in_number); |
1221 | | |
1222 | 212k | if (token != JSON_TOKEN_ERROR) |
1223 | 211k | { |
1224 | 211k | endptr = NULL; |
1225 | 211k | if (token == JSON_TOKEN_FLOAT) |
1226 | 1.03k | value.v_float = g_ascii_strtod (gstring->str, &endptr); |
1227 | 210k | else if (token == JSON_TOKEN_INT) |
1228 | 210k | value.v_int64 = g_ascii_strtoll (gstring->str, &endptr, 10); |
1229 | | |
1230 | 211k | if (endptr && *endptr) |
1231 | 88 | { |
1232 | 88 | token = JSON_TOKEN_ERROR; |
1233 | 88 | if (*endptr == 'e' || *endptr == 'E') |
1234 | 79 | value.v_error = JSON_ERROR_TYPE_NON_DIGIT_IN_CONST; |
1235 | 9 | else |
1236 | 9 | value.v_error = JSON_ERROR_TYPE_DIGIT_RADIX; |
1237 | 88 | } |
1238 | 211k | } |
1239 | 212k | g_string_free (gstring, TRUE); |
1240 | 212k | gstring = NULL; |
1241 | 212k | ch = 0; |
1242 | 212k | } |
1243 | 0 | break; /* number parsing }}} */ |
1244 | | |
1245 | 6.04M | default: |
1246 | 6.04M | default_case: |
1247 | 6.04M | { |
1248 | 6.04M | if (!config->strict && |
1249 | 6.04M | config->cpair_comment_single && |
1250 | 6.04M | ch == config->cpair_comment_single[0]) |
1251 | 801 | { |
1252 | 801 | token = JSON_TOKEN_COMMENT_SINGLE; |
1253 | 801 | in_comment_single = true; |
1254 | 801 | gstring = g_string_new (NULL); |
1255 | 801 | ch = json_scanner_get_char (scanner, line_p, position_p); |
1256 | 299k | while (ch != 0) |
1257 | 299k | { |
1258 | 299k | if (ch == config->cpair_comment_single[1]) |
1259 | 738 | { |
1260 | 738 | in_comment_single = false; |
1261 | 738 | ch = 0; |
1262 | 738 | break; |
1263 | 738 | } |
1264 | | |
1265 | 299k | gstring = g_string_append_c (gstring, ch); |
1266 | 299k | ch = json_scanner_get_char (scanner, line_p, position_p); |
1267 | 299k | } |
1268 | | /* ignore a missing newline at EOF for single line comments */ |
1269 | 801 | if (in_comment_single && |
1270 | 801 | config->cpair_comment_single[1] == '\n') |
1271 | 0 | in_comment_single = false; |
1272 | 801 | } |
1273 | 6.04M | else if (ch && strchr (config->cset_identifier_first, ch)) |
1274 | 0 | { |
1275 | 13.1k | identifier_precedence: |
1276 | | |
1277 | 13.1k | if (config->cset_identifier_nth && ch && |
1278 | 13.1k | strchr (config->cset_identifier_nth, |
1279 | 13.1k | json_scanner_peek_next_char (scanner))) |
1280 | 13.1k | { |
1281 | 13.1k | token = JSON_TOKEN_IDENTIFIER; |
1282 | 13.1k | gstring = g_string_new (NULL); |
1283 | 13.1k | gstring = g_string_append_c (gstring, ch); |
1284 | 13.1k | do |
1285 | 178k | { |
1286 | 178k | ch = json_scanner_get_char (scanner, line_p, position_p); |
1287 | 178k | gstring = g_string_append_c (gstring, ch); |
1288 | 178k | ch = json_scanner_peek_next_char (scanner); |
1289 | 178k | } |
1290 | 178k | while (ch && strchr (config->cset_identifier_nth, ch)); |
1291 | 13.1k | ch = 0; |
1292 | 13.1k | } |
1293 | 13.1k | } |
1294 | 6.05M | if (ch) |
1295 | 6.04M | { |
1296 | 6.04M | token = ch; |
1297 | 6.04M | ch = 0; |
1298 | 6.04M | } |
1299 | 6.05M | } /* default_case:... */ |
1300 | 6.05M | break; |
1301 | 8.73M | } |
1302 | 8.75M | g_assert (ch == 0 && token != JSON_TOKEN_NONE); /* paranoid */ |
1303 | 8.75M | } |
1304 | 8.75M | while (ch != 0); |
1305 | | |
1306 | 8.75M | if (in_comment_multi || in_comment_single || |
1307 | 8.75M | in_string_sq || in_string_dq) |
1308 | 1.11k | { |
1309 | 1.11k | token = JSON_TOKEN_ERROR; |
1310 | 1.11k | if (gstring) |
1311 | 510 | { |
1312 | 510 | g_string_free (gstring, TRUE); |
1313 | 510 | gstring = NULL; |
1314 | 510 | } |
1315 | 1.11k | (*position_p)++; |
1316 | 1.11k | if (in_comment_multi || in_comment_single) |
1317 | 123 | value.v_error = JSON_ERROR_TYPE_UNEXP_EOF_IN_COMMENT; |
1318 | 996 | else /* (in_string_sq || in_string_dq) */ |
1319 | 996 | value.v_error = JSON_ERROR_TYPE_UNEXP_EOF_IN_STRING; |
1320 | 1.11k | } |
1321 | | |
1322 | 8.75M | if (gstring) |
1323 | 2.49M | { |
1324 | 2.49M | value.v_string = g_string_free (gstring, FALSE); |
1325 | 2.49M | gstring = NULL; |
1326 | 2.49M | } |
1327 | | |
1328 | 8.75M | if (token == JSON_TOKEN_IDENTIFIER) |
1329 | 13.1k | { |
1330 | 57.2k | for (unsigned i = 0; i < G_N_ELEMENTS (json_symbols); i++) |
1331 | 50.9k | { |
1332 | 50.9k | const char *symbol = json_symbol_names + json_symbols[i].name_offset; |
1333 | 50.9k | if (strcmp (value.v_identifier, symbol) == 0) |
1334 | 6.85k | { |
1335 | 6.85k | g_free (value.v_identifier); |
1336 | 6.85k | token = JSON_TOKEN_SYMBOL; |
1337 | 6.85k | value.v_symbol = GUINT_TO_POINTER (json_symbols[i].token); |
1338 | 6.85k | break; |
1339 | 6.85k | } |
1340 | 50.9k | } |
1341 | 13.1k | } |
1342 | | |
1343 | 8.75M | *token_p = token; |
1344 | 8.75M | *value_p = value; |
1345 | 8.75M | } |
1346 | | |
1347 | | gint64 |
1348 | | json_scanner_get_int64_value (const JsonScanner *scanner) |
1349 | 210k | { |
1350 | 210k | return scanner->value.v_int64; |
1351 | 210k | } |
1352 | | |
1353 | | double |
1354 | | json_scanner_get_float_value (const JsonScanner *scanner) |
1355 | 941 | { |
1356 | 941 | return scanner->value.v_float; |
1357 | 941 | } |
1358 | | |
1359 | | const char * |
1360 | | json_scanner_get_string_value (const JsonScanner *scanner) |
1361 | 641k | { |
1362 | 641k | return scanner->value.v_string; |
1363 | 641k | } |
1364 | | |
1365 | | char * |
1366 | | json_scanner_dup_string_value (const JsonScanner *scanner) |
1367 | 1.83M | { |
1368 | 1.83M | return g_strdup (scanner->value.v_string); |
1369 | 1.83M | } |
1370 | | |
1371 | | const char * |
1372 | | json_scanner_get_identifier (const JsonScanner *scanner) |
1373 | 0 | { |
1374 | 0 | return scanner->value.v_identifier; |
1375 | 0 | } |
1376 | | |
1377 | | char * |
1378 | | json_scanner_dup_identifier (const JsonScanner *scanner) |
1379 | 5.51k | { |
1380 | 5.51k | return g_strdup (scanner->value.v_identifier); |
1381 | 5.51k | } |
1382 | | |
1383 | | unsigned int |
1384 | | json_scanner_get_current_line (const JsonScanner *scanner) |
1385 | 1.44k | { |
1386 | 1.44k | return scanner->line; |
1387 | 1.44k | } |
1388 | | |
1389 | | unsigned int |
1390 | | json_scanner_get_current_position (const JsonScanner *scanner) |
1391 | 1.44k | { |
1392 | 1.44k | return scanner->position; |
1393 | 1.44k | } |
1394 | | |
1395 | | unsigned int |
1396 | | json_scanner_get_current_token (const JsonScanner *scanner) |
1397 | 1.44k | { |
1398 | 1.44k | return scanner->token; |
1399 | 1.44k | } |