/src/tinysparql/subprojects/json-glib-1.10.6/json-glib/json-scanner.c
Line | Count | Source |
1 | | /* json-scanner.c: Tokenizer for JSON |
2 | | * |
3 | | * SPDX-FileCopyrightText: 1997 Tim Janik |
4 | | * SPDX-FileCopyrightText: 1997-2000 The GLib Team and others |
5 | | * SPDX-FileCopyrightText: 2008 OpenedHand Ltd. |
6 | | * SPDX-FileCopyrightText: 2024 Emmanuele Bassi |
7 | | * SPDX-License-Identifier: LGPL-2.1-or-later |
8 | | * |
9 | | * Based on JsonScanner: Flexible lexical scanner for general purpose. |
10 | | * |
11 | | * Modified by Emmanuele Bassi <ebassi@openedhand.com> |
12 | | * |
13 | | * This library is free software; you can redistribute it and/or |
14 | | * modify it under the terms of the GNU Lesser General Public |
15 | | * License as published by the Free Software Foundation; either |
16 | | * version 2 of the License, or (at your option) any later version. |
17 | | * |
18 | | * This library is distributed in the hope that it will be useful, |
19 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
20 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
21 | | * Lesser General Public License for more details. |
22 | | * |
23 | | * You should have received a copy of the GNU Lesser General Public |
24 | | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
25 | | */ |
26 | | |
27 | | #include "config.h" |
28 | | |
29 | | #include "json-scanner.h" |
30 | | |
31 | | #include <errno.h> |
32 | | #include <stdint.h> |
33 | | #include <stdbool.h> |
34 | | #include <stdlib.h> |
35 | | #include <stdarg.h> |
36 | | #include <string.h> |
37 | | #include <stdio.h> |
38 | | #ifdef HAVE_UNISTD_H |
39 | | #include <unistd.h> |
40 | | #endif |
41 | | |
42 | | #include <glib.h> |
43 | | #include <glib/gprintf.h> |
44 | | |
45 | | typedef enum |
46 | | { |
47 | | JSON_ERROR_TYPE_UNKNOWN, |
48 | | JSON_ERROR_TYPE_UNEXP_EOF, |
49 | | JSON_ERROR_TYPE_UNEXP_EOF_IN_STRING, |
50 | | JSON_ERROR_TYPE_UNEXP_EOF_IN_COMMENT, |
51 | | JSON_ERROR_TYPE_NON_DIGIT_IN_CONST, |
52 | | JSON_ERROR_TYPE_DIGIT_RADIX, |
53 | | JSON_ERROR_TYPE_FLOAT_RADIX, |
54 | | JSON_ERROR_TYPE_FLOAT_MALFORMED, |
55 | | JSON_ERROR_TYPE_MALFORMED_SURROGATE_PAIR, |
56 | | JSON_ERROR_TYPE_LEADING_ZERO, |
57 | | JSON_ERROR_TYPE_UNESCAPED_CTRL, |
58 | | JSON_ERROR_TYPE_UNKNOWN_ESCAPE, |
59 | | JSON_ERROR_TYPE_MALFORMED_UNICODE |
60 | | } JsonErrorType; |
61 | | |
62 | | typedef struct |
63 | | { |
64 | | const char *cset_skip_characters; |
65 | | const char *cset_identifier_first; |
66 | | const char *cset_identifier_nth; |
67 | | const char *cpair_comment_single; |
68 | | bool strict; |
69 | | } JsonScannerConfig; |
70 | | |
71 | | typedef union |
72 | | { |
73 | | gpointer v_symbol; |
74 | | char *v_identifier; |
75 | | gint64 v_int64; |
76 | | double v_float; |
77 | | char *v_string; |
78 | | unsigned int v_error; |
79 | | } JsonTokenValue; |
80 | | |
81 | | /*< private > |
82 | | * JsonScanner: |
83 | | * |
84 | | * Tokenizer scanner for JSON. See #GScanner |
85 | | * |
86 | | * Since: 0.6 |
87 | | */ |
88 | | struct _JsonScanner |
89 | | { |
90 | | /* name of input stream, featured by the default message handler */ |
91 | | const char *input_name; |
92 | | |
93 | | /* link into the scanner configuration */ |
94 | | JsonScannerConfig config; |
95 | | |
96 | | /* fields filled in after json_scanner_get_next_token() */ |
97 | | unsigned int token; |
98 | | JsonTokenValue value; |
99 | | unsigned int line; |
100 | | unsigned int position; |
101 | | |
102 | | /* fields filled in after json_scanner_peek_next_token() */ |
103 | | unsigned int next_token; |
104 | | JsonTokenValue next_value; |
105 | | unsigned int next_line; |
106 | | unsigned int next_position; |
107 | | |
108 | | /* to be considered private */ |
109 | | const char *text; |
110 | | const char *text_end; |
111 | | char *buffer; |
112 | | |
113 | | /* handler function for _warn and _error */ |
114 | | JsonScannerMsgFunc msg_handler; |
115 | | gpointer user_data; |
116 | | }; |
117 | | |
118 | | static const gchar json_symbol_names[] = |
119 | | "true\0" |
120 | | "false\0" |
121 | | "null\0" |
122 | | "var\0"; |
123 | | |
124 | | static const struct |
125 | | { |
126 | | unsigned int name_offset; |
127 | | unsigned int token; |
128 | | } json_symbols[] = { |
129 | | { 0, JSON_TOKEN_TRUE }, |
130 | | { 5, JSON_TOKEN_FALSE }, |
131 | | { 11, JSON_TOKEN_NULL }, |
132 | | { 16, JSON_TOKEN_VAR } |
133 | | }; |
134 | | |
135 | | static void json_scanner_get_token_ll (JsonScanner *scanner, |
136 | | unsigned int *token_p, |
137 | | JsonTokenValue *value_p, |
138 | | unsigned int *line_p, |
139 | | unsigned int *position_p); |
140 | | static void json_scanner_get_token_i (JsonScanner *scanner, |
141 | | unsigned int *token_p, |
142 | | JsonTokenValue *value_p, |
143 | | unsigned int *line_p, |
144 | | unsigned int *position_p); |
145 | | |
146 | | static unsigned char json_scanner_peek_next_char (JsonScanner *scanner); |
147 | | static unsigned char json_scanner_get_char (JsonScanner *scanner, |
148 | | unsigned int *line_p, |
149 | | unsigned int *position_p); |
150 | | static bool json_scanner_get_unichar (JsonScanner *scanner, |
151 | | gunichar *ucs, |
152 | | unsigned int *line_p, |
153 | | unsigned int *position_p); |
154 | | static void json_scanner_error (JsonScanner *scanner, |
155 | | const char *format, |
156 | | ...) G_GNUC_PRINTF (2,3); |
157 | | |
158 | | JsonScanner * |
159 | | json_scanner_new (bool strict) |
160 | 9.11k | { |
161 | 9.11k | JsonScanner *scanner; |
162 | | |
163 | 9.11k | scanner = g_new0 (JsonScanner, 1); |
164 | | |
165 | 9.11k | scanner->config = (JsonScannerConfig) { |
166 | | // Skip whitespace |
167 | 9.11k | .cset_skip_characters = ( " \t\r\n" ), |
168 | | |
169 | | // Identifiers can only be lower case |
170 | 9.11k | .cset_identifier_first = ( |
171 | 9.11k | G_CSET_a_2_z |
172 | 9.11k | ), |
173 | 9.11k | .cset_identifier_nth = ( |
174 | 9.11k | G_CSET_a_2_z |
175 | 9.11k | ), |
176 | | |
177 | | // Only used if strict = false |
178 | 9.11k | .cpair_comment_single = ( "//\n" ), |
179 | 9.11k | .strict = strict, |
180 | 9.11k | }; |
181 | | |
182 | 9.11k | scanner->token = JSON_TOKEN_NONE; |
183 | 9.11k | scanner->value.v_int64 = 0; |
184 | 9.11k | scanner->line = 1; |
185 | 9.11k | scanner->position = 0; |
186 | | |
187 | 9.11k | scanner->next_token = JSON_TOKEN_NONE; |
188 | 9.11k | scanner->next_value.v_int64 = 0; |
189 | 9.11k | scanner->next_line = 1; |
190 | 9.11k | scanner->next_position = 0; |
191 | | |
192 | 9.11k | return scanner; |
193 | 9.11k | } |
194 | | |
195 | | static inline void |
196 | | json_scanner_free_value (JsonTokenType *token_p, |
197 | | JsonTokenValue *value_p) |
198 | 16.2M | { |
199 | 16.2M | switch (*token_p) |
200 | 16.2M | { |
201 | 2.19M | case JSON_TOKEN_STRING: |
202 | 2.20M | case JSON_TOKEN_IDENTIFIER: |
203 | 2.20M | case JSON_TOKEN_COMMENT_SINGLE: |
204 | 2.20M | case JSON_TOKEN_COMMENT_MULTI: |
205 | 2.20M | g_free (value_p->v_string); |
206 | 2.20M | break; |
207 | | |
208 | 14.0M | default: |
209 | 14.0M | break; |
210 | 16.2M | } |
211 | | |
212 | 16.2M | *token_p = JSON_TOKEN_NONE; |
213 | 16.2M | } |
214 | | |
215 | | void |
216 | | json_scanner_destroy (JsonScanner *scanner) |
217 | 9.11k | { |
218 | 9.11k | g_return_if_fail (scanner != NULL); |
219 | | |
220 | 9.11k | json_scanner_free_value (&scanner->token, &scanner->value); |
221 | 9.11k | json_scanner_free_value (&scanner->next_token, &scanner->next_value); |
222 | | |
223 | 9.11k | g_free (scanner->buffer); |
224 | 9.11k | g_free (scanner); |
225 | 9.11k | } |
226 | | |
227 | | void |
228 | | json_scanner_set_msg_handler (JsonScanner *scanner, |
229 | | JsonScannerMsgFunc msg_handler, |
230 | | gpointer user_data) |
231 | 9.11k | { |
232 | 9.11k | g_return_if_fail (scanner != NULL); |
233 | | |
234 | 9.11k | scanner->msg_handler = msg_handler; |
235 | 9.11k | scanner->user_data = user_data; |
236 | 9.11k | } |
237 | | |
238 | | static void |
239 | | json_scanner_error (JsonScanner *scanner, |
240 | | const char *format, |
241 | | ...) |
242 | 1.26k | { |
243 | 1.26k | g_return_if_fail (scanner != NULL); |
244 | 1.26k | g_return_if_fail (format != NULL); |
245 | | |
246 | 1.26k | if (scanner->msg_handler) |
247 | 1.26k | { |
248 | 1.26k | va_list args; |
249 | 1.26k | char *string; |
250 | | |
251 | 1.26k | va_start (args, format); |
252 | 1.26k | string = g_strdup_vprintf (format, args); |
253 | 1.26k | va_end (args); |
254 | | |
255 | 1.26k | scanner->msg_handler (scanner, string, scanner->user_data); |
256 | | |
257 | 1.26k | g_free (string); |
258 | 1.26k | } |
259 | 1.26k | } |
260 | | |
261 | | unsigned int |
262 | | json_scanner_peek_next_token (JsonScanner *scanner) |
263 | 9.21M | { |
264 | 9.21M | g_return_val_if_fail (scanner != NULL, JSON_TOKEN_EOF); |
265 | | |
266 | 9.21M | if (scanner->next_token == JSON_TOKEN_NONE) |
267 | 8.06M | { |
268 | 8.06M | scanner->next_line = scanner->line; |
269 | 8.06M | scanner->next_position = scanner->position; |
270 | 8.06M | json_scanner_get_token_i (scanner, |
271 | 8.06M | &scanner->next_token, |
272 | 8.06M | &scanner->next_value, |
273 | 8.06M | &scanner->next_line, |
274 | 8.06M | &scanner->next_position); |
275 | 8.06M | } |
276 | | |
277 | 9.21M | return scanner->next_token; |
278 | 9.21M | } |
279 | | |
280 | | unsigned int |
281 | | json_scanner_get_next_token (JsonScanner *scanner) |
282 | 8.06M | { |
283 | 8.06M | g_return_val_if_fail (scanner != NULL, JSON_TOKEN_EOF); |
284 | | |
285 | 8.06M | if (scanner->next_token != JSON_TOKEN_NONE) |
286 | 8.05M | { |
287 | 8.05M | json_scanner_free_value (&scanner->token, &scanner->value); |
288 | | |
289 | 8.05M | scanner->token = scanner->next_token; |
290 | 8.05M | scanner->value = scanner->next_value; |
291 | 8.05M | scanner->line = scanner->next_line; |
292 | 8.05M | scanner->position = scanner->next_position; |
293 | 8.05M | scanner->next_token = JSON_TOKEN_NONE; |
294 | 8.05M | } |
295 | 12.0k | else |
296 | 12.0k | json_scanner_get_token_i (scanner, |
297 | 12.0k | &scanner->token, |
298 | 12.0k | &scanner->value, |
299 | 12.0k | &scanner->line, |
300 | 12.0k | &scanner->position); |
301 | | |
302 | 8.06M | return scanner->token; |
303 | 8.06M | } |
304 | | |
305 | | void |
306 | | json_scanner_input_text (JsonScanner *scanner, |
307 | | const char *text, |
308 | | guint text_len) |
309 | 9.11k | { |
310 | 9.11k | g_return_if_fail (scanner != NULL); |
311 | 9.11k | if (text_len) |
312 | 9.11k | g_return_if_fail (text != NULL); |
313 | 1 | else |
314 | 1 | text = NULL; |
315 | | |
316 | 9.11k | scanner->token = JSON_TOKEN_NONE; |
317 | 9.11k | scanner->value.v_int64 = 0; |
318 | 9.11k | scanner->line = 1; |
319 | 9.11k | scanner->position = 0; |
320 | 9.11k | scanner->next_token = JSON_TOKEN_NONE; |
321 | | |
322 | 9.11k | scanner->text = text; |
323 | 9.11k | scanner->text_end = text != NULL ? text + text_len : 0; |
324 | | |
325 | 9.11k | g_clear_pointer (&scanner->buffer, g_free); |
326 | 9.11k | } |
327 | | |
328 | | static guchar |
329 | | json_scanner_peek_next_char (JsonScanner *scanner) |
330 | 1.88M | { |
331 | 1.88M | if (scanner->text < scanner->text_end) |
332 | 1.88M | return *scanner->text; |
333 | 451 | else |
334 | 451 | return 0; |
335 | 1.88M | } |
336 | | |
337 | | static guchar |
338 | | json_scanner_get_char (JsonScanner *scanner, |
339 | | guint *line_p, |
340 | | guint *position_p) |
341 | 118M | { |
342 | 118M | guchar fchar; |
343 | | |
344 | 118M | if (scanner->text < scanner->text_end) |
345 | 118M | fchar = *(scanner->text++); |
346 | 585 | else |
347 | 585 | fchar = 0; |
348 | | |
349 | 118M | if (fchar == '\n') |
350 | 83.8k | { |
351 | 83.8k | (*position_p) = 0; |
352 | 83.8k | (*line_p)++; |
353 | 83.8k | } |
354 | 118M | else if (fchar) |
355 | 118M | { |
356 | 118M | (*position_p)++; |
357 | 118M | } |
358 | | |
359 | 118M | return fchar; |
360 | 118M | } |
361 | | |
362 | 51.7k | #define is_oct_digit(c) ((c) >= '0' && (c) <= '7') |
363 | 61.9k | #define is_hex_digit(c) (((c) >= '0' && (c) <= '9') || \ |
364 | 61.9k | ((c) >= 'a' && (c) <= 'f') || \ |
365 | 61.9k | ((c) >= 'A' && (c) <= 'F')) |
366 | 48.9k | #define to_hex_digit(c) (((c) <= '9') ? (c) - '0' : ((c) & 7) + 9) |
367 | | |
368 | | static bool |
369 | | json_scanner_get_unichar (JsonScanner *scanner, |
370 | | gunichar *ucs, |
371 | | guint *line_p, |
372 | | guint *position_p) |
373 | 12.5k | { |
374 | 12.5k | gunichar uchar; |
375 | | |
376 | 12.5k | uchar = 0; |
377 | 61.4k | for (int i = 0; i < 4; i++) |
378 | 49.5k | { |
379 | 49.5k | char ch = json_scanner_get_char (scanner, line_p, position_p); |
380 | | |
381 | 49.5k | if (is_hex_digit (ch)) |
382 | 48.9k | uchar += ((gunichar) to_hex_digit (ch) << ((3 - i) * 4)); |
383 | 668 | else |
384 | 668 | return false; |
385 | 49.5k | } |
386 | | |
387 | 11.8k | *ucs = uchar; |
388 | | |
389 | 11.8k | return true; |
390 | 12.5k | } |
391 | | |
392 | | /* |
393 | | * decode_utf16_surrogate_pair: |
394 | | * @units: (array length=2): a pair of UTF-16 code points |
395 | | * |
396 | | * Decodes a surrogate pair of UTF-16 code points into the equivalent |
397 | | * Unicode code point. |
398 | | * |
399 | | * Returns: the Unicode code point equivalent to the surrogate pair |
400 | | */ |
401 | | static inline gunichar |
402 | | decode_utf16_surrogate_pair (const gunichar units[2]) |
403 | 479 | { |
404 | 479 | gunichar ucs; |
405 | | |
406 | | /* Already checked by caller */ |
407 | 479 | g_assert (0xd800 <= units[0] && units[0] <= 0xdbff); |
408 | 479 | g_assert (0xdc00 <= units[1] && units[1] <= 0xdfff); |
409 | | |
410 | 479 | ucs = 0x10000; |
411 | 479 | ucs += (units[0] & 0x3ff) << 10; |
412 | 479 | ucs += (units[1] & 0x3ff); |
413 | | |
414 | 479 | return ucs; |
415 | 479 | } |
416 | | |
417 | | static void |
418 | | json_scanner_unexp_token (JsonScanner *scanner, |
419 | | unsigned int expected_token, |
420 | | const char *identifier_spec, |
421 | | const char *symbol_spec, |
422 | | const char *symbol_name, |
423 | | const char *message) |
424 | 1.26k | { |
425 | 1.26k | char *token_string; |
426 | 1.26k | gsize token_string_len; |
427 | 1.26k | char *expected_string; |
428 | 1.26k | gsize expected_string_len; |
429 | 1.26k | const char *message_prefix; |
430 | 1.26k | bool print_unexp; |
431 | | |
432 | 1.26k | g_return_if_fail (scanner != NULL); |
433 | | |
434 | 1.26k | if (identifier_spec == NULL) |
435 | 1.26k | identifier_spec = "identifier"; |
436 | 1.26k | if (symbol_spec == NULL) |
437 | 0 | symbol_spec = "symbol"; |
438 | | |
439 | 1.26k | token_string_len = 56; |
440 | 1.26k | token_string = g_new (char, token_string_len + 1); |
441 | 1.26k | expected_string_len = 64; |
442 | 1.26k | expected_string = g_new (char, expected_string_len + 1); |
443 | 1.26k | print_unexp = true; |
444 | | |
445 | 1.26k | switch (scanner->token) |
446 | 1.26k | { |
447 | 140 | case JSON_TOKEN_EOF: |
448 | 140 | g_snprintf (token_string, token_string_len, "end of file"); |
449 | 140 | break; |
450 | | |
451 | 258 | default: |
452 | 258 | if (scanner->token >= 1 && scanner->token <= 255) |
453 | 245 | { |
454 | 245 | if ((scanner->token >= ' ' && scanner->token <= '~') || |
455 | 44 | strchr (scanner->config.cset_identifier_first, scanner->token) || |
456 | 44 | strchr (scanner->config.cset_identifier_nth, scanner->token)) |
457 | 201 | g_snprintf (token_string, token_string_len, "character `%c'", scanner->token); |
458 | 44 | else |
459 | 44 | g_snprintf (token_string, token_string_len, "character `\\%o'", scanner->token); |
460 | 245 | break; |
461 | 245 | } |
462 | 13 | G_GNUC_FALLTHROUGH; |
463 | | |
464 | 13 | case JSON_TOKEN_SYMBOL: |
465 | 13 | if (expected_token == JSON_TOKEN_SYMBOL || expected_token > JSON_TOKEN_LAST) |
466 | 0 | print_unexp = false; |
467 | 13 | if (symbol_name) |
468 | 13 | g_snprintf (token_string, token_string_len, |
469 | 13 | "%s%s `%s'", |
470 | 13 | print_unexp ? "" : "invalid ", |
471 | 13 | symbol_spec, |
472 | 13 | symbol_name); |
473 | 0 | else |
474 | 0 | g_snprintf (token_string, token_string_len, |
475 | 0 | "%s%s", |
476 | 0 | print_unexp ? "" : "invalid ", |
477 | 0 | symbol_spec); |
478 | 13 | break; |
479 | | |
480 | 593 | case JSON_TOKEN_ERROR: |
481 | 593 | print_unexp = false; |
482 | 593 | expected_token = JSON_TOKEN_NONE; |
483 | 593 | switch (scanner->value.v_error) |
484 | 593 | { |
485 | 0 | case JSON_ERROR_TYPE_UNEXP_EOF: |
486 | 0 | g_snprintf (token_string, token_string_len, "scanner: unexpected end of file"); |
487 | 0 | break; |
488 | | |
489 | 360 | case JSON_ERROR_TYPE_UNEXP_EOF_IN_STRING: |
490 | 360 | g_snprintf (token_string, token_string_len, "scanner: unterminated string constant"); |
491 | 360 | break; |
492 | | |
493 | 90 | case JSON_ERROR_TYPE_UNEXP_EOF_IN_COMMENT: |
494 | 90 | g_snprintf (token_string, token_string_len, "scanner: unterminated comment"); |
495 | 90 | break; |
496 | | |
497 | 64 | case JSON_ERROR_TYPE_NON_DIGIT_IN_CONST: |
498 | 64 | g_snprintf (token_string, token_string_len, "scanner: non digit in constant"); |
499 | 64 | break; |
500 | | |
501 | 0 | case JSON_ERROR_TYPE_FLOAT_RADIX: |
502 | 0 | g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant"); |
503 | 0 | break; |
504 | | |
505 | 7 | case JSON_ERROR_TYPE_FLOAT_MALFORMED: |
506 | 7 | g_snprintf (token_string, token_string_len, "scanner: malformed floating constant"); |
507 | 7 | break; |
508 | | |
509 | 9 | case JSON_ERROR_TYPE_DIGIT_RADIX: |
510 | 9 | g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix"); |
511 | 9 | break; |
512 | | |
513 | 17 | case JSON_ERROR_TYPE_MALFORMED_SURROGATE_PAIR: |
514 | 17 | g_snprintf (token_string, token_string_len, "scanner: malformed surrogate pair"); |
515 | 17 | break; |
516 | | |
517 | 10 | case JSON_ERROR_TYPE_LEADING_ZERO: |
518 | 10 | g_snprintf (token_string, token_string_len, "scanner: leading zero in number"); |
519 | 10 | break; |
520 | | |
521 | 0 | case JSON_ERROR_TYPE_UNESCAPED_CTRL: |
522 | 0 | g_snprintf (token_string, token_string_len, "scanner: unescaped control character"); |
523 | 0 | break; |
524 | | |
525 | 0 | case JSON_ERROR_TYPE_UNKNOWN_ESCAPE: |
526 | 0 | g_snprintf (token_string, token_string_len, "scanner: unknown backslash escape sequence"); |
527 | 0 | break; |
528 | | |
529 | 36 | case JSON_ERROR_TYPE_MALFORMED_UNICODE: |
530 | 36 | g_snprintf (token_string, token_string_len, "scanner: malformed Unicode escape"); |
531 | 36 | break; |
532 | | |
533 | 0 | case JSON_ERROR_TYPE_UNKNOWN: |
534 | 0 | default: |
535 | 0 | g_snprintf (token_string, token_string_len, "scanner: unknown error"); |
536 | 0 | break; |
537 | 593 | } |
538 | 593 | break; |
539 | | |
540 | 593 | case JSON_TOKEN_IDENTIFIER: |
541 | 68 | if (expected_token == JSON_TOKEN_IDENTIFIER) |
542 | 0 | print_unexp = false; |
543 | 68 | g_snprintf (token_string, token_string_len, |
544 | 68 | "%s%s `%s'", |
545 | 68 | print_unexp ? "" : "invalid ", |
546 | 68 | identifier_spec, |
547 | 68 | scanner->value.v_string); |
548 | 68 | break; |
549 | | |
550 | 117 | case JSON_TOKEN_INT: |
551 | 117 | g_snprintf (token_string, token_string_len, "number `%" G_GINT64_FORMAT "'", scanner->value.v_int64); |
552 | 117 | break; |
553 | | |
554 | 8 | case JSON_TOKEN_FLOAT: |
555 | 8 | g_snprintf (token_string, token_string_len, "number `%.3f'", scanner->value.v_float); |
556 | 8 | break; |
557 | | |
558 | 77 | case JSON_TOKEN_STRING: |
559 | 77 | if (expected_token == JSON_TOKEN_STRING) |
560 | 30 | print_unexp = false; |
561 | 77 | g_snprintf (token_string, token_string_len, |
562 | 77 | "%s%sstring constant \"%s\"", |
563 | 77 | print_unexp ? "" : "invalid ", |
564 | 77 | scanner->value.v_string[0] == 0 ? "empty " : "", |
565 | 77 | scanner->value.v_string); |
566 | 77 | token_string[token_string_len - 2] = '"'; |
567 | 77 | token_string[token_string_len - 1] = 0; |
568 | 77 | break; |
569 | | |
570 | 0 | case JSON_TOKEN_COMMENT_SINGLE: |
571 | 0 | case JSON_TOKEN_COMMENT_MULTI: |
572 | 0 | g_snprintf (token_string, token_string_len, "comment"); |
573 | 0 | break; |
574 | | |
575 | 0 | case JSON_TOKEN_NONE: |
576 | | /* somehow the user's parsing code is screwed, there isn't much |
577 | | * we can do about it. |
578 | | * Note, a common case to trigger this is |
579 | | * json_scanner_peek_next_token(); json_scanner_unexp_token(); |
580 | | * without an intermediate json_scanner_get_next_token(). |
581 | | */ |
582 | 0 | g_assert_not_reached (); |
583 | 0 | break; |
584 | 1.26k | } |
585 | | |
586 | | |
587 | 1.26k | switch (expected_token) |
588 | 1.26k | { |
589 | 73 | case JSON_TOKEN_EOF: |
590 | 73 | g_snprintf (expected_string, expected_string_len, "end of file"); |
591 | 73 | break; |
592 | 283 | default: |
593 | 283 | if (expected_token >= 1 && expected_token <= 255) |
594 | 283 | { |
595 | 283 | if ((expected_token >= ' ' && expected_token <= '~') || |
596 | 0 | strchr (scanner->config.cset_identifier_first, expected_token) || |
597 | 0 | strchr (scanner->config.cset_identifier_nth, expected_token)) |
598 | 283 | g_snprintf (expected_string, expected_string_len, "character `%c'", expected_token); |
599 | 0 | else |
600 | 0 | g_snprintf (expected_string, expected_string_len, "character `\\%o'", expected_token); |
601 | 283 | break; |
602 | 283 | } |
603 | 0 | G_GNUC_FALLTHROUGH; |
604 | |
|
605 | 117 | case JSON_TOKEN_SYMBOL: |
606 | 117 | { |
607 | 117 | bool need_valid = (scanner->token == JSON_TOKEN_SYMBOL || scanner->token > JSON_TOKEN_LAST); |
608 | 117 | g_snprintf (expected_string, expected_string_len, |
609 | 117 | "%s%s", |
610 | 117 | need_valid ? "valid " : "", |
611 | 117 | symbol_spec); |
612 | 117 | } |
613 | 117 | break; |
614 | 0 | case JSON_TOKEN_INT: |
615 | 0 | g_snprintf (expected_string, |
616 | 0 | expected_string_len, |
617 | 0 | "%snumber (integer)", |
618 | 0 | scanner->token == expected_token ? "valid " : ""); |
619 | 0 | break; |
620 | 0 | case JSON_TOKEN_FLOAT: |
621 | 0 | g_snprintf (expected_string, |
622 | 0 | expected_string_len, |
623 | 0 | "%snumber (float)", |
624 | 0 | scanner->token == expected_token ? "valid " : ""); |
625 | 0 | break; |
626 | 168 | case JSON_TOKEN_STRING: |
627 | 168 | g_snprintf (expected_string, |
628 | 168 | expected_string_len, |
629 | 168 | "%sstring constant", |
630 | 168 | scanner->token == JSON_TOKEN_STRING ? "valid " : ""); |
631 | 168 | break; |
632 | 27 | case JSON_TOKEN_IDENTIFIER: |
633 | 27 | g_snprintf (expected_string, |
634 | 27 | expected_string_len, |
635 | 27 | "%s%s", |
636 | 27 | scanner->token == JSON_TOKEN_IDENTIFIER ? "valid " : "", |
637 | 27 | identifier_spec); |
638 | 27 | break; |
639 | 0 | case JSON_TOKEN_COMMENT_SINGLE: |
640 | 0 | g_snprintf (expected_string, |
641 | 0 | expected_string_len, |
642 | 0 | "%scomment (single-line)", |
643 | 0 | scanner->token == expected_token ? "valid " : ""); |
644 | 0 | break; |
645 | 0 | case JSON_TOKEN_COMMENT_MULTI: |
646 | 0 | g_snprintf (expected_string, |
647 | 0 | expected_string_len, |
648 | 0 | "%scomment (multi-line)", |
649 | 0 | scanner->token == expected_token ? "valid " : ""); |
650 | 0 | break; |
651 | 593 | case JSON_TOKEN_NONE: |
652 | 593 | case JSON_TOKEN_ERROR: |
653 | | /* this is handled upon printout */ |
654 | 593 | break; |
655 | 1.26k | } |
656 | | |
657 | 1.26k | if (message && message[0] != 0) |
658 | 0 | message_prefix = " - "; |
659 | 1.26k | else |
660 | 1.26k | { |
661 | 1.26k | message_prefix = ""; |
662 | 1.26k | message = ""; |
663 | 1.26k | } |
664 | 1.26k | if (expected_token == JSON_TOKEN_ERROR) |
665 | 0 | { |
666 | 0 | json_scanner_error (scanner, |
667 | 0 | "failure around %s%s%s", |
668 | 0 | token_string, |
669 | 0 | message_prefix, |
670 | 0 | message); |
671 | 0 | } |
672 | 1.26k | else if (expected_token == JSON_TOKEN_NONE) |
673 | 593 | { |
674 | 593 | if (print_unexp) |
675 | 0 | json_scanner_error (scanner, |
676 | 0 | "unexpected %s%s%s", |
677 | 0 | token_string, |
678 | 0 | message_prefix, |
679 | 0 | message); |
680 | 593 | else |
681 | 593 | json_scanner_error (scanner, |
682 | 593 | "%s%s%s", |
683 | 593 | token_string, |
684 | 593 | message_prefix, |
685 | 593 | message); |
686 | 593 | } |
687 | 668 | else |
688 | 668 | { |
689 | 668 | if (print_unexp) |
690 | 638 | json_scanner_error (scanner, |
691 | 638 | "unexpected %s, expected %s%s%s", |
692 | 638 | token_string, |
693 | 638 | expected_string, |
694 | 638 | message_prefix, |
695 | 638 | message); |
696 | 30 | else |
697 | 30 | json_scanner_error (scanner, |
698 | 30 | "%s, expected %s%s%s", |
699 | 30 | token_string, |
700 | 30 | expected_string, |
701 | 30 | message_prefix, |
702 | 30 | message); |
703 | 668 | } |
704 | | |
705 | 1.26k | g_free (token_string); |
706 | 1.26k | g_free (expected_string); |
707 | 1.26k | } |
708 | | |
709 | | void |
710 | | json_scanner_unknown_token (JsonScanner *scanner, |
711 | | unsigned int token) |
712 | 1.26k | { |
713 | 1.26k | const char *symbol_name; |
714 | 1.26k | char *msg; |
715 | 1.26k | unsigned int cur_token; |
716 | | |
717 | 1.26k | cur_token = json_scanner_get_current_token (scanner); |
718 | 1.26k | msg = NULL; |
719 | | |
720 | 1.26k | symbol_name = NULL; |
721 | 6.30k | for (unsigned i = 0; i < G_N_ELEMENTS (json_symbols); i++) |
722 | 5.04k | if (json_symbols[i].token == token) |
723 | 0 | symbol_name = json_symbol_names + json_symbols[i].name_offset; |
724 | | |
725 | 1.26k | if (symbol_name != NULL) |
726 | 0 | msg = g_strconcat ("e.g. '", symbol_name, "'", NULL); |
727 | | |
728 | 1.26k | symbol_name = "???"; |
729 | 6.30k | for (unsigned i = 0; i < G_N_ELEMENTS (json_symbols); i++) |
730 | 5.04k | if (json_symbols[i].token == cur_token) |
731 | 13 | symbol_name = json_symbol_names + json_symbols[i].name_offset; |
732 | | |
733 | 1.26k | json_scanner_unexp_token (scanner, token, |
734 | 1.26k | NULL, "value", |
735 | 1.26k | symbol_name, |
736 | 1.26k | msg); |
737 | | |
738 | 1.26k | g_free (msg); |
739 | 1.26k | } |
740 | | |
741 | | static void |
742 | | json_scanner_get_token_i (JsonScanner *scanner, |
743 | | unsigned int *token_p, |
744 | | JsonTokenValue *value_p, |
745 | | guint *line_p, |
746 | | guint *position_p) |
747 | 8.07M | { |
748 | 8.07M | do |
749 | 8.16M | { |
750 | 8.16M | json_scanner_free_value (token_p, value_p); |
751 | 8.16M | json_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p); |
752 | 8.16M | } |
753 | 8.16M | while (((*token_p > 0 && *token_p < 256) && |
754 | 5.67M | strchr (scanner->config.cset_skip_characters, *token_p)) || |
755 | 8.07M | *token_p == JSON_TOKEN_COMMENT_MULTI || |
756 | 8.07M | *token_p == JSON_TOKEN_COMMENT_SINGLE); |
757 | | |
758 | 8.07M | switch (*token_p) |
759 | 8.07M | { |
760 | 6.47k | case JSON_TOKEN_IDENTIFIER: |
761 | 6.47k | break; |
762 | | |
763 | 7.57k | case JSON_TOKEN_SYMBOL: |
764 | 7.57k | *token_p = GPOINTER_TO_UINT (value_p->v_symbol); |
765 | 7.57k | break; |
766 | | |
767 | 8.06M | default: |
768 | 8.06M | break; |
769 | 8.07M | } |
770 | | |
771 | 8.07M | errno = 0; |
772 | 8.07M | } |
773 | | |
774 | | static void |
775 | | json_scanner_get_token_ll (JsonScanner *scanner, |
776 | | unsigned int *token_p, |
777 | | JsonTokenValue *value_p, |
778 | | guint *line_p, |
779 | | guint *position_p) |
780 | 8.16M | { |
781 | 8.16M | const JsonScannerConfig *config; |
782 | 8.16M | unsigned int token; |
783 | 8.16M | bool in_comment_multi = false; |
784 | 8.16M | bool in_comment_single = false; |
785 | 8.16M | bool in_string_sq = false; |
786 | 8.16M | bool in_string_dq = false; |
787 | 8.16M | GString *gstring = NULL; |
788 | 8.16M | JsonTokenValue value; |
789 | 8.16M | guchar ch; |
790 | | |
791 | 8.16M | config = &scanner->config; |
792 | 8.16M | (*value_p).v_int64 = 0; |
793 | | |
794 | 8.16M | if (scanner->text >= scanner->text_end || |
795 | 8.15M | scanner->token == JSON_TOKEN_EOF) |
796 | 8.16k | { |
797 | 8.16k | *token_p = JSON_TOKEN_EOF; |
798 | 8.16k | return; |
799 | 8.16k | } |
800 | | |
801 | 8.15M | gstring = NULL; |
802 | | |
803 | 8.15M | do /* while (ch != 0) */ |
804 | 8.15M | { |
805 | 8.15M | ch = json_scanner_get_char (scanner, line_p, position_p); |
806 | | |
807 | 8.15M | value.v_int64 = 0; |
808 | 8.15M | token = JSON_TOKEN_NONE; |
809 | | |
810 | | /* this is *evil*, but needed ;( |
811 | | * we first check for identifier first character, because it |
812 | | * might interfere with other key chars like slashes or numbers |
813 | | */ |
814 | 8.15M | if (ch != 0 && strchr (config->cset_identifier_first, ch)) |
815 | 14.1k | goto identifier_precedence; |
816 | | |
817 | 8.14M | switch (ch) |
818 | 8.14M | { |
819 | 0 | case 0: |
820 | 0 | token = JSON_TOKEN_EOF; |
821 | 0 | (*position_p)++; |
822 | | /* ch = 0; */ |
823 | 0 | break; |
824 | | |
825 | 1.15k | case '/': |
826 | 1.15k | if (config->strict || json_scanner_peek_next_char (scanner) != '*') |
827 | 805 | goto default_case; |
828 | 350 | json_scanner_get_char (scanner, line_p, position_p); |
829 | 350 | token = JSON_TOKEN_COMMENT_MULTI; |
830 | 350 | in_comment_multi = true; |
831 | 350 | gstring = g_string_new (NULL); |
832 | 344k | while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0) |
833 | 344k | { |
834 | 344k | if (ch == '*' && json_scanner_peek_next_char (scanner) == '/') |
835 | 304 | { |
836 | 304 | json_scanner_get_char (scanner, line_p, position_p); |
837 | 304 | in_comment_multi = false; |
838 | 304 | break; |
839 | 304 | } |
840 | 343k | else |
841 | 343k | gstring = g_string_append_c (gstring, ch); |
842 | 344k | } |
843 | 350 | ch = 0; |
844 | 350 | break; |
845 | | |
846 | 549 | case '\'': |
847 | 549 | if (config->strict) |
848 | 0 | goto default_case; |
849 | 549 | token = JSON_TOKEN_STRING; |
850 | 549 | in_string_sq = true; |
851 | 549 | gstring = g_string_new (NULL); |
852 | 117k | while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0) |
853 | 117k | { |
854 | 117k | if (ch == '\'' || token == JSON_TOKEN_ERROR) |
855 | 509 | { |
856 | 509 | in_string_sq = false; |
857 | 509 | break; |
858 | 509 | } |
859 | 117k | else |
860 | 117k | { |
861 | 117k | g_string_append_c (gstring, ch); |
862 | 117k | } |
863 | 117k | } |
864 | 549 | ch = 0; |
865 | 549 | break; |
866 | | |
867 | 2.19M | case '"': |
868 | 2.19M | token = JSON_TOKEN_STRING; |
869 | 2.19M | in_string_dq = true; |
870 | 2.19M | gstring = g_string_new (NULL); |
871 | 107M | while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0) |
872 | 107M | { |
873 | 107M | if (ch == '"' || token == JSON_TOKEN_ERROR) |
874 | 2.19M | { |
875 | 2.19M | in_string_dq = false; |
876 | 2.19M | break; |
877 | 2.19M | } |
878 | 105M | else |
879 | 105M | { |
880 | 105M | if (ch == '\\') |
881 | 962k | { |
882 | 962k | ch = json_scanner_get_char (scanner, line_p, position_p); |
883 | 962k | switch (ch) |
884 | 962k | { |
885 | 27 | case 0: |
886 | 27 | break; |
887 | | |
888 | 39.3k | case '"': |
889 | 39.3k | gstring = g_string_append_c (gstring, '"'); |
890 | 39.3k | break; |
891 | | |
892 | 462k | case '\\': |
893 | 462k | gstring = g_string_append_c (gstring, '\\'); |
894 | 462k | break; |
895 | | |
896 | 14.1k | case '/': |
897 | 14.1k | gstring = g_string_append_c (gstring, '/'); |
898 | 14.1k | break; |
899 | | |
900 | 84.0k | case 'n': |
901 | 84.0k | gstring = g_string_append_c (gstring, '\n'); |
902 | 84.0k | break; |
903 | | |
904 | 37.0k | case 't': |
905 | 37.0k | gstring = g_string_append_c (gstring, '\t'); |
906 | 37.0k | break; |
907 | | |
908 | 1.04k | case 'r': |
909 | 1.04k | gstring = g_string_append_c (gstring, '\r'); |
910 | 1.04k | break; |
911 | | |
912 | 34.3k | case 'b': |
913 | 34.3k | gstring = g_string_append_c (gstring, '\b'); |
914 | 34.3k | break; |
915 | | |
916 | 12.8k | case 'f': |
917 | 12.8k | gstring = g_string_append_c (gstring, '\f'); |
918 | 12.8k | break; |
919 | | |
920 | 12.3k | case 'u': |
921 | 12.3k | { |
922 | 12.3k | guint fchar = json_scanner_peek_next_char (scanner); |
923 | 12.3k | if (is_hex_digit (fchar)) |
924 | 12.0k | { |
925 | 12.0k | gunichar ucs; |
926 | | |
927 | 12.0k | if (!json_scanner_get_unichar (scanner, &ucs, line_p, position_p)) |
928 | 652 | { |
929 | 652 | token = JSON_TOKEN_ERROR; |
930 | 652 | value.v_error = JSON_ERROR_TYPE_MALFORMED_UNICODE; |
931 | 652 | g_string_free (gstring, TRUE); |
932 | 652 | gstring = NULL; |
933 | 652 | break; |
934 | 652 | } |
935 | | |
936 | | /* resolve UTF-16 surrogates for Unicode characters not in the BMP, |
937 | | * as per ECMA 404, § 9, "String" |
938 | | */ |
939 | 11.3k | if (g_unichar_type (ucs) == G_UNICODE_SURROGATE) |
940 | 631 | { |
941 | 631 | unsigned int next_ch; |
942 | | |
943 | 631 | next_ch = json_scanner_peek_next_char (scanner); |
944 | 631 | if (next_ch != '\\') |
945 | 105 | { |
946 | 105 | token = JSON_TOKEN_ERROR; |
947 | 105 | value.v_error = JSON_ERROR_TYPE_MALFORMED_SURROGATE_PAIR; |
948 | 105 | g_string_free (gstring, TRUE); |
949 | 105 | gstring = NULL; |
950 | 105 | break; |
951 | 105 | } |
952 | 526 | else |
953 | 526 | json_scanner_get_char (scanner, line_p, position_p); |
954 | | |
955 | 526 | next_ch = json_scanner_peek_next_char (scanner); |
956 | 526 | if (next_ch != 'u') |
957 | 12 | { |
958 | 12 | token = JSON_TOKEN_ERROR; |
959 | 12 | value.v_error = JSON_ERROR_TYPE_MALFORMED_SURROGATE_PAIR; |
960 | 12 | g_string_free (gstring, TRUE); |
961 | 12 | gstring = NULL; |
962 | 12 | break; |
963 | 12 | } |
964 | 514 | else |
965 | 514 | json_scanner_get_char (scanner, line_p, position_p); |
966 | | |
967 | | /* read next surrogate */ |
968 | 514 | gunichar units[2]; |
969 | | |
970 | 514 | units[0] = ucs; |
971 | | |
972 | 514 | if (!json_scanner_get_unichar (scanner, &ucs, line_p, position_p)) |
973 | 16 | { |
974 | 16 | token = JSON_TOKEN_ERROR; |
975 | 16 | value.v_error = JSON_ERROR_TYPE_MALFORMED_UNICODE; |
976 | 16 | g_string_free (gstring, TRUE); |
977 | 16 | gstring = NULL; |
978 | 16 | break; |
979 | 16 | } |
980 | | |
981 | 498 | units[1] = ucs; |
982 | | |
983 | 498 | if (0xdc00 <= units[1] && units[1] <= 0xdfff && |
984 | 480 | 0xd800 <= units[0] && units[0] <= 0xdbff) |
985 | 479 | { |
986 | 479 | ucs = decode_utf16_surrogate_pair (units); |
987 | 479 | if (!g_unichar_validate (ucs)) |
988 | 0 | { |
989 | 0 | token = JSON_TOKEN_ERROR; |
990 | 0 | value.v_error = JSON_ERROR_TYPE_MALFORMED_UNICODE; |
991 | 0 | g_string_free (gstring, TRUE); |
992 | 0 | gstring = NULL; |
993 | 0 | break; |
994 | 0 | } |
995 | 479 | } |
996 | 19 | else |
997 | 19 | { |
998 | 19 | token = JSON_TOKEN_ERROR; |
999 | 19 | value.v_error = JSON_ERROR_TYPE_MALFORMED_SURROGATE_PAIR; |
1000 | 19 | g_string_free (gstring, TRUE); |
1001 | 19 | gstring = NULL; |
1002 | 19 | break; |
1003 | 19 | } |
1004 | 498 | } |
1005 | 10.7k | else |
1006 | 10.7k | { |
1007 | 10.7k | if (!g_unichar_validate (ucs)) |
1008 | 0 | { |
1009 | 0 | token = JSON_TOKEN_ERROR; |
1010 | 0 | value.v_error = JSON_ERROR_TYPE_MALFORMED_UNICODE; |
1011 | 0 | g_string_free (gstring, TRUE); |
1012 | 0 | gstring = NULL; |
1013 | 0 | break; |
1014 | 0 | } |
1015 | 10.7k | } |
1016 | | |
1017 | 11.2k | gstring = g_string_append_unichar (gstring, ucs); |
1018 | 11.2k | } |
1019 | 345 | else |
1020 | 345 | { |
1021 | 345 | token = JSON_TOKEN_ERROR; |
1022 | 345 | value.v_error = JSON_ERROR_TYPE_MALFORMED_UNICODE; |
1023 | 345 | g_string_free (gstring, TRUE); |
1024 | 345 | gstring = NULL; |
1025 | 345 | } |
1026 | 11.5k | break; |
1027 | 12.3k | } |
1028 | | |
1029 | 11.5k | case '0': |
1030 | 9.72k | case '1': |
1031 | 16.7k | case '2': |
1032 | 18.7k | case '3': |
1033 | 23.4k | case '4': |
1034 | 27.9k | case '5': |
1035 | 31.1k | case '6': |
1036 | 34.1k | case '7': |
1037 | 34.1k | if (config->strict) |
1038 | 0 | { |
1039 | 0 | token = JSON_TOKEN_ERROR; |
1040 | 0 | value.v_error = JSON_ERROR_TYPE_UNKNOWN_ESCAPE; |
1041 | 0 | g_string_free (gstring, TRUE); |
1042 | 0 | gstring = NULL; |
1043 | 0 | } |
1044 | 34.1k | else |
1045 | 34.1k | { |
1046 | 34.1k | gunichar ucs = (ch - '0'); |
1047 | 34.1k | guchar next_ch; |
1048 | 34.1k | unsigned i; |
1049 | | |
1050 | 60.3k | for (i = 0; i < 2; i++) |
1051 | 51.7k | { |
1052 | 51.7k | next_ch = json_scanner_peek_next_char (scanner); |
1053 | | |
1054 | 51.7k | if (is_oct_digit (next_ch)) |
1055 | 26.1k | { |
1056 | 26.1k | ucs = ucs * 8 + (next_ch - '0'); |
1057 | 26.1k | json_scanner_get_char (scanner, line_p, position_p); |
1058 | 26.1k | } |
1059 | 25.6k | else |
1060 | 25.6k | { |
1061 | 25.6k | break; |
1062 | 25.6k | } |
1063 | 51.7k | } |
1064 | | |
1065 | 34.1k | gstring = g_string_append_unichar (gstring, ucs); |
1066 | 34.1k | } |
1067 | 34.1k | break; |
1068 | | |
1069 | 230k | default: |
1070 | 230k | if (config->strict) |
1071 | 0 | { |
1072 | 0 | token = JSON_TOKEN_ERROR; |
1073 | 0 | value.v_error = JSON_ERROR_TYPE_UNKNOWN_ESCAPE; |
1074 | 0 | g_string_free (gstring, TRUE); |
1075 | 0 | gstring = NULL; |
1076 | 0 | } |
1077 | 230k | else |
1078 | 230k | { |
1079 | 230k | gstring = g_string_append_c (gstring, ch); |
1080 | 230k | } |
1081 | 230k | break; |
1082 | 962k | } |
1083 | 962k | } |
1084 | 104M | else if (ch == '\n' || ch == '\t' || ch == '\r' || ch == '\f' || ch == '\b') |
1085 | 672 | { |
1086 | 672 | token = JSON_TOKEN_ERROR; |
1087 | 672 | value.v_error = JSON_ERROR_TYPE_UNESCAPED_CTRL; |
1088 | 672 | g_string_free (gstring, TRUE); |
1089 | 672 | gstring = NULL; |
1090 | 672 | break; |
1091 | 672 | } |
1092 | 104M | else |
1093 | 104M | gstring = g_string_append_c (gstring, ch); |
1094 | 105M | } |
1095 | 107M | } |
1096 | 2.19M | ch = 0; |
1097 | 2.19M | break; |
1098 | | |
1099 | | /* {{{ number parsing */ |
1100 | 2.24k | case '-': |
1101 | 2.24k | if (!g_ascii_isdigit (json_scanner_peek_next_char (scanner))) |
1102 | 94 | { |
1103 | 94 | token = JSON_TOKEN_ERROR; |
1104 | 94 | value.v_error = JSON_ERROR_TYPE_NON_DIGIT_IN_CONST; |
1105 | 94 | ch = 0; |
1106 | 94 | break; |
1107 | 94 | } |
1108 | 2.15k | G_GNUC_FALLTHROUGH; |
1109 | | |
1110 | 243k | case '0': |
1111 | 244k | case '1': |
1112 | 246k | case '2': |
1113 | 249k | case '3': |
1114 | 250k | case '4': |
1115 | 254k | case '5': |
1116 | 257k | case '6': |
1117 | 257k | case '7': |
1118 | 258k | case '8': |
1119 | 260k | case '9': |
1120 | 260k | { |
1121 | 260k | bool in_number = true; |
1122 | 260k | bool leading_sign = ch == '-'; |
1123 | 260k | bool leading_zero = ch == '0'; |
1124 | 260k | char *endptr; |
1125 | | |
1126 | 260k | if (token == JSON_TOKEN_NONE) |
1127 | 260k | token = JSON_TOKEN_INT; |
1128 | | |
1129 | 260k | gstring = g_string_new (""); |
1130 | 260k | gstring = g_string_append_c (gstring, ch); |
1131 | | |
1132 | 260k | if (leading_sign) |
1133 | 2.15k | { |
1134 | 2.15k | ch = json_scanner_get_char (scanner, line_p, position_p); |
1135 | 2.15k | leading_zero = ch == '0'; |
1136 | 2.15k | g_string_append_c (gstring, ch); |
1137 | 2.15k | } |
1138 | | |
1139 | 260k | do /* while (in_number) */ |
1140 | 1.71M | { |
1141 | 1.71M | bool is_E = token == JSON_TOKEN_FLOAT && (ch == 'e' || ch == 'E'); |
1142 | | |
1143 | 1.71M | ch = json_scanner_peek_next_char (scanner); |
1144 | | |
1145 | 1.71M | if ((ch >= '0' && ch <= '9') || |
1146 | 422k | (ch == 'e' || ch == 'E') || |
1147 | 331k | ch == '.' || |
1148 | 330k | (is_E && (ch == '+' || ch == '-'))) |
1149 | 1.45M | { |
1150 | 1.45M | ch = json_scanner_get_char (scanner, line_p, position_p); |
1151 | | |
1152 | 1.45M | switch (ch) |
1153 | 1.45M | { |
1154 | 1.45k | case '.': |
1155 | 1.45k | { |
1156 | 1.45k | unsigned int next_ch = json_scanner_peek_next_char (scanner); |
1157 | | |
1158 | 1.45k | if (!g_ascii_isdigit (next_ch)) |
1159 | 41 | { |
1160 | 41 | token = JSON_TOKEN_ERROR; |
1161 | 41 | value.v_error = JSON_ERROR_TYPE_FLOAT_MALFORMED; |
1162 | 41 | in_number = false; |
1163 | 41 | } |
1164 | 1.41k | else |
1165 | 1.41k | { |
1166 | 1.41k | token = JSON_TOKEN_FLOAT; |
1167 | 1.41k | gstring = g_string_append_c (gstring, ch); |
1168 | 1.41k | } |
1169 | 1.45k | } |
1170 | 1.45k | break; |
1171 | | |
1172 | 1.18M | case '0': |
1173 | 1.20M | case '1': |
1174 | 1.21M | case '2': |
1175 | 1.21M | case '3': |
1176 | 1.22M | case '4': |
1177 | 1.23M | case '5': |
1178 | 1.24M | case '6': |
1179 | 1.25M | case '7': |
1180 | 1.28M | case '8': |
1181 | 1.29M | case '9': |
1182 | 1.29M | if (leading_zero && token != JSON_TOKEN_FLOAT) |
1183 | 135 | { |
1184 | 135 | token = JSON_TOKEN_ERROR; |
1185 | 135 | value.v_error= JSON_ERROR_TYPE_LEADING_ZERO; |
1186 | 135 | in_number = false; |
1187 | 135 | } |
1188 | 1.29M | else |
1189 | 1.29M | gstring = g_string_append_c (gstring, ch); |
1190 | 1.29M | break; |
1191 | | |
1192 | 69.9k | case '-': |
1193 | 70.3k | case '+': |
1194 | 70.3k | if (token != JSON_TOKEN_FLOAT) |
1195 | 0 | { |
1196 | 0 | token = JSON_TOKEN_ERROR; |
1197 | 0 | value.v_error = JSON_ERROR_TYPE_NON_DIGIT_IN_CONST; |
1198 | 0 | in_number = false; |
1199 | 0 | } |
1200 | 70.3k | else |
1201 | 70.3k | gstring = g_string_append_c (gstring, ch); |
1202 | 70.3k | break; |
1203 | | |
1204 | 2.28k | case 'e': |
1205 | 90.8k | case 'E': |
1206 | 90.8k | token = JSON_TOKEN_FLOAT; |
1207 | 90.8k | gstring = g_string_append_c (gstring, ch); |
1208 | 90.8k | break; |
1209 | | |
1210 | 0 | default: |
1211 | 0 | token = JSON_TOKEN_ERROR; |
1212 | 0 | value.v_error = JSON_ERROR_TYPE_NON_DIGIT_IN_CONST; |
1213 | 0 | in_number = false; |
1214 | 0 | break; |
1215 | 1.45M | } |
1216 | 1.45M | } |
1217 | 260k | else |
1218 | 260k | in_number = false; |
1219 | 1.71M | } |
1220 | 1.71M | while (in_number); |
1221 | | |
1222 | 260k | if (token != JSON_TOKEN_ERROR) |
1223 | 260k | { |
1224 | 260k | endptr = NULL; |
1225 | 260k | if (token == JSON_TOKEN_FLOAT) |
1226 | 3.02k | value.v_float = g_ascii_strtod (gstring->str, &endptr); |
1227 | 257k | else if (token == JSON_TOKEN_INT) |
1228 | 257k | value.v_int64 = g_ascii_strtoll (gstring->str, &endptr, 10); |
1229 | | |
1230 | 260k | if (endptr && *endptr) |
1231 | 1.06k | { |
1232 | 1.06k | token = JSON_TOKEN_ERROR; |
1233 | 1.06k | if (*endptr == 'e' || *endptr == 'E') |
1234 | 1.00k | value.v_error = JSON_ERROR_TYPE_NON_DIGIT_IN_CONST; |
1235 | 57 | else |
1236 | 57 | value.v_error = JSON_ERROR_TYPE_DIGIT_RADIX; |
1237 | 1.06k | } |
1238 | 260k | } |
1239 | 260k | g_string_free (gstring, TRUE); |
1240 | 260k | gstring = NULL; |
1241 | 260k | ch = 0; |
1242 | 260k | } |
1243 | 0 | break; /* number parsing }}} */ |
1244 | | |
1245 | 5.67M | default: |
1246 | 5.68M | default_case: |
1247 | 5.68M | { |
1248 | 5.68M | if (!config->strict && |
1249 | 5.68M | config->cpair_comment_single && |
1250 | 5.68M | ch == config->cpair_comment_single[0]) |
1251 | 805 | { |
1252 | 805 | token = JSON_TOKEN_COMMENT_SINGLE; |
1253 | 805 | in_comment_single = true; |
1254 | 805 | gstring = g_string_new (NULL); |
1255 | 805 | ch = json_scanner_get_char (scanner, line_p, position_p); |
1256 | 65.9k | while (ch != 0) |
1257 | 65.9k | { |
1258 | 65.9k | if (ch == config->cpair_comment_single[1]) |
1259 | 758 | { |
1260 | 758 | in_comment_single = false; |
1261 | 758 | ch = 0; |
1262 | 758 | break; |
1263 | 758 | } |
1264 | | |
1265 | 65.1k | gstring = g_string_append_c (gstring, ch); |
1266 | 65.1k | ch = json_scanner_get_char (scanner, line_p, position_p); |
1267 | 65.1k | } |
1268 | | /* ignore a missing newline at EOF for single line comments */ |
1269 | 805 | if (in_comment_single && |
1270 | 47 | config->cpair_comment_single[1] == '\n') |
1271 | 0 | in_comment_single = false; |
1272 | 805 | } |
1273 | 5.67M | else if (ch && strchr (config->cset_identifier_first, ch)) |
1274 | 0 | { |
1275 | 14.1k | identifier_precedence: |
1276 | | |
1277 | 14.1k | if (config->cset_identifier_nth && ch && |
1278 | 14.1k | strchr (config->cset_identifier_nth, |
1279 | 14.1k | json_scanner_peek_next_char (scanner))) |
1280 | 14.0k | { |
1281 | 14.0k | token = JSON_TOKEN_IDENTIFIER; |
1282 | 14.0k | gstring = g_string_new (NULL); |
1283 | 14.0k | gstring = g_string_append_c (gstring, ch); |
1284 | 14.0k | do |
1285 | 83.0k | { |
1286 | 83.0k | ch = json_scanner_get_char (scanner, line_p, position_p); |
1287 | 83.0k | gstring = g_string_append_c (gstring, ch); |
1288 | 83.0k | ch = json_scanner_peek_next_char (scanner); |
1289 | 83.0k | } |
1290 | 83.0k | while (ch && strchr (config->cset_identifier_nth, ch)); |
1291 | 14.0k | ch = 0; |
1292 | 14.0k | } |
1293 | 14.1k | } |
1294 | 5.69M | if (ch) |
1295 | 5.67M | { |
1296 | 5.67M | token = ch; |
1297 | 5.67M | ch = 0; |
1298 | 5.67M | } |
1299 | 5.69M | } /* default_case:... */ |
1300 | 5.69M | break; |
1301 | 8.14M | } |
1302 | 8.15M | g_assert (ch == 0 && token != JSON_TOKEN_NONE); /* paranoid */ |
1303 | 8.15M | } |
1304 | 8.15M | while (ch != 0); |
1305 | | |
1306 | 8.15M | if (in_comment_multi || in_comment_single || |
1307 | 8.15M | in_string_sq || in_string_dq) |
1308 | 1.16k | { |
1309 | 1.16k | token = JSON_TOKEN_ERROR; |
1310 | 1.16k | if (gstring) |
1311 | 428 | { |
1312 | 428 | g_string_free (gstring, TRUE); |
1313 | 428 | gstring = NULL; |
1314 | 428 | } |
1315 | 1.16k | (*position_p)++; |
1316 | 1.16k | if (in_comment_multi || in_comment_single) |
1317 | 93 | value.v_error = JSON_ERROR_TYPE_UNEXP_EOF_IN_COMMENT; |
1318 | 1.06k | else /* (in_string_sq || in_string_dq) */ |
1319 | 1.06k | value.v_error = JSON_ERROR_TYPE_UNEXP_EOF_IN_STRING; |
1320 | 1.16k | } |
1321 | | |
1322 | 8.15M | if (gstring) |
1323 | 2.21M | { |
1324 | 2.21M | value.v_string = g_string_free (gstring, FALSE); |
1325 | 2.21M | gstring = NULL; |
1326 | 2.21M | } |
1327 | | |
1328 | 8.15M | if (token == JSON_TOKEN_IDENTIFIER) |
1329 | 14.0k | { |
1330 | 59.3k | for (unsigned i = 0; i < G_N_ELEMENTS (json_symbols); i++) |
1331 | 52.9k | { |
1332 | 52.9k | const char *symbol = json_symbol_names + json_symbols[i].name_offset; |
1333 | 52.9k | if (strcmp (value.v_identifier, symbol) == 0) |
1334 | 7.57k | { |
1335 | 7.57k | g_free (value.v_identifier); |
1336 | 7.57k | token = JSON_TOKEN_SYMBOL; |
1337 | 7.57k | value.v_symbol = GUINT_TO_POINTER (json_symbols[i].token); |
1338 | 7.57k | break; |
1339 | 7.57k | } |
1340 | 52.9k | } |
1341 | 14.0k | } |
1342 | | |
1343 | 8.15M | *token_p = token; |
1344 | 8.15M | *value_p = value; |
1345 | 8.15M | } |
1346 | | |
1347 | | gint64 |
1348 | | json_scanner_get_int64_value (const JsonScanner *scanner) |
1349 | 256k | { |
1350 | 256k | return scanner->value.v_int64; |
1351 | 256k | } |
1352 | | |
1353 | | double |
1354 | | json_scanner_get_float_value (const JsonScanner *scanner) |
1355 | 1.89k | { |
1356 | 1.89k | return scanner->value.v_float; |
1357 | 1.89k | } |
1358 | | |
1359 | | const char * |
1360 | | json_scanner_get_string_value (const JsonScanner *scanner) |
1361 | 614k | { |
1362 | 614k | return scanner->value.v_string; |
1363 | 614k | } |
1364 | | |
1365 | | char * |
1366 | | json_scanner_dup_string_value (const JsonScanner *scanner) |
1367 | 1.58M | { |
1368 | 1.58M | return g_strdup (scanner->value.v_string); |
1369 | 1.58M | } |
1370 | | |
1371 | | const char * |
1372 | | json_scanner_get_identifier (const JsonScanner *scanner) |
1373 | 0 | { |
1374 | 0 | return scanner->value.v_identifier; |
1375 | 0 | } |
1376 | | |
1377 | | char * |
1378 | | json_scanner_dup_identifier (const JsonScanner *scanner) |
1379 | 5.91k | { |
1380 | 5.91k | return g_strdup (scanner->value.v_identifier); |
1381 | 5.91k | } |
1382 | | |
1383 | | unsigned int |
1384 | | json_scanner_get_current_line (const JsonScanner *scanner) |
1385 | 1.26k | { |
1386 | 1.26k | return scanner->line; |
1387 | 1.26k | } |
1388 | | |
1389 | | unsigned int |
1390 | | json_scanner_get_current_position (const JsonScanner *scanner) |
1391 | 1.26k | { |
1392 | 1.26k | return scanner->position; |
1393 | 1.26k | } |
1394 | | |
1395 | | unsigned int |
1396 | | json_scanner_get_current_token (const JsonScanner *scanner) |
1397 | 1.26k | { |
1398 | 1.26k | return scanner->token; |
1399 | 1.26k | } |