Coverage Report

Created: 2025-07-18 06:10

/src/tinysparql/subprojects/json-glib-1.10.6/json-glib/json-scanner.c
Line
Count
Source (jump to first uncovered line)
1
/* json-scanner.c: Tokenizer for JSON
2
 *
3
 * SPDX-FileCopyrightText: 1997 Tim Janik
4
 * SPDX-FileCopyrightText: 1997-2000 The GLib Team and others
5
 * SPDX-FileCopyrightText: 2008 OpenedHand Ltd.
6
 * SPDX-FileCopyrightText: 2024 Emmanuele Bassi
7
 * SPDX-License-Identifier: LGPL-2.1-or-later
8
 *
9
 * Based on JsonScanner: Flexible lexical scanner for general purpose.
10
 *
11
 * Modified by Emmanuele Bassi <ebassi@openedhand.com>
12
 *
13
 * This library is free software; you can redistribute it and/or
14
 * modify it under the terms of the GNU Lesser General Public
15
 * License as published by the Free Software Foundation; either
16
 * version 2 of the License, or (at your option) any later version.
17
 *
18
 * This library is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21
 * Lesser General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU Lesser General Public
24
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25
 */
26
27
#include "config.h"
28
29
#include "json-scanner.h"
30
31
#include <errno.h>
32
#include <stdint.h>
33
#include <stdbool.h>
34
#include <stdlib.h>
35
#include <stdarg.h>
36
#include <string.h>
37
#include <stdio.h>
38
#ifdef HAVE_UNISTD_H
39
#include <unistd.h>
40
#endif
41
42
#include <glib.h>
43
#include <glib/gprintf.h>
44
45
typedef enum
46
{
47
  JSON_ERROR_TYPE_UNKNOWN,
48
  JSON_ERROR_TYPE_UNEXP_EOF,
49
  JSON_ERROR_TYPE_UNEXP_EOF_IN_STRING,
50
  JSON_ERROR_TYPE_UNEXP_EOF_IN_COMMENT,
51
  JSON_ERROR_TYPE_NON_DIGIT_IN_CONST,
52
  JSON_ERROR_TYPE_DIGIT_RADIX,
53
  JSON_ERROR_TYPE_FLOAT_RADIX,
54
  JSON_ERROR_TYPE_FLOAT_MALFORMED,
55
  JSON_ERROR_TYPE_MALFORMED_SURROGATE_PAIR,
56
  JSON_ERROR_TYPE_LEADING_ZERO,
57
  JSON_ERROR_TYPE_UNESCAPED_CTRL,
58
  JSON_ERROR_TYPE_UNKNOWN_ESCAPE,
59
  JSON_ERROR_TYPE_MALFORMED_UNICODE
60
} JsonErrorType;
61
62
typedef struct
63
{
64
  const char *cset_skip_characters;
65
  const char *cset_identifier_first;
66
  const char *cset_identifier_nth;
67
  const char *cpair_comment_single;
68
  bool strict;
69
} JsonScannerConfig;
70
71
typedef union
72
{
73
  gpointer v_symbol;
74
  char *v_identifier;
75
  gint64 v_int64;
76
  double v_float;
77
  char *v_string;
78
  unsigned int v_error;
79
} JsonTokenValue;
80
81
/*< private >
82
 * JsonScanner:
83
 *
84
 * Tokenizer scanner for JSON. See #GScanner
85
 *
86
 * Since: 0.6
87
 */
88
struct _JsonScanner
89
{
90
  /* name of input stream, featured by the default message handler */
91
  const char *input_name;
92
93
  /* link into the scanner configuration */
94
  JsonScannerConfig config;
95
96
  /* fields filled in after json_scanner_get_next_token() */
97
  unsigned int token;
98
  JsonTokenValue value;
99
  unsigned int line;
100
  unsigned int position;
101
102
  /* fields filled in after json_scanner_peek_next_token() */
103
  unsigned int next_token;
104
  JsonTokenValue next_value;
105
  unsigned int next_line;
106
  unsigned int next_position;
107
108
  /* to be considered private */
109
  const char *text;
110
  const char *text_end;
111
  char *buffer;
112
113
  /* handler function for _warn and _error */
114
  JsonScannerMsgFunc msg_handler;
115
  gpointer user_data;
116
};
117
118
static const gchar json_symbol_names[] =
119
  "true\0"
120
  "false\0"
121
  "null\0"
122
  "var\0";
123
124
static const struct
125
{
126
  unsigned int name_offset;
127
  unsigned int token;
128
} json_symbols[] = {
129
  {  0, JSON_TOKEN_TRUE  },
130
  {  5, JSON_TOKEN_FALSE },
131
  { 11, JSON_TOKEN_NULL  },
132
  { 16, JSON_TOKEN_VAR   }
133
};
134
135
static void          json_scanner_get_token_ll   (JsonScanner    *scanner,
136
                                                   unsigned int   *token_p,
137
                                                  JsonTokenValue *value_p,
138
                                                  unsigned int   *line_p,
139
                                                  unsigned int   *position_p);
140
static void      json_scanner_get_token_i    (JsonScanner    *scanner,
141
                                                  unsigned int   *token_p,
142
                                                  JsonTokenValue *value_p,
143
                                                  unsigned int   *line_p,
144
                                                  unsigned int   *position_p);
145
146
static unsigned char json_scanner_peek_next_char (JsonScanner    *scanner);
147
static unsigned char json_scanner_get_char       (JsonScanner    *scanner,
148
                                                  unsigned int   *line_p,
149
                                                  unsigned int   *position_p);
150
static bool          json_scanner_get_unichar    (JsonScanner    *scanner,
151
                                                  gunichar       *ucs,
152
                                                  unsigned int   *line_p,
153
                                                  unsigned int   *position_p);
154
static void          json_scanner_error          (JsonScanner    *scanner,
155
                                                  const char     *format,
156
                                                  ...) G_GNUC_PRINTF (2,3);
157
158
JsonScanner *
159
json_scanner_new (bool strict)
160
16.3k
{
161
16.3k
  JsonScanner *scanner;
162
  
163
16.3k
  scanner = g_new0 (JsonScanner, 1);
164
  
165
16.3k
  scanner->config = (JsonScannerConfig) {
166
    // Skip whitespace
167
16.3k
    .cset_skip_characters = ( " \t\r\n" ),
168
169
    // Identifiers can only be lower case
170
16.3k
    .cset_identifier_first = (
171
16.3k
      G_CSET_a_2_z
172
16.3k
    ),
173
16.3k
    .cset_identifier_nth = (
174
16.3k
      G_CSET_a_2_z
175
16.3k
    ),
176
177
    // Only used if strict = false
178
16.3k
    .cpair_comment_single = ( "//\n" ),
179
16.3k
    .strict = strict,
180
16.3k
  };
181
182
16.3k
  scanner->token = JSON_TOKEN_NONE;
183
16.3k
  scanner->value.v_int64 = 0;
184
16.3k
  scanner->line = 1;
185
16.3k
  scanner->position = 0;
186
187
16.3k
  scanner->next_token = JSON_TOKEN_NONE;
188
16.3k
  scanner->next_value.v_int64 = 0;
189
16.3k
  scanner->next_line = 1;
190
16.3k
  scanner->next_position = 0;
191
192
16.3k
  return scanner;
193
16.3k
}
194
195
static inline void
196
json_scanner_free_value (JsonTokenType  *token_p,
197
                         JsonTokenValue *value_p)
198
17.4M
{
199
17.4M
  switch (*token_p)
200
17.4M
    {
201
2.47M
    case JSON_TOKEN_STRING:
202
2.48M
    case JSON_TOKEN_IDENTIFIER:
203
2.48M
    case JSON_TOKEN_COMMENT_SINGLE:
204
2.48M
    case JSON_TOKEN_COMMENT_MULTI:
205
2.48M
      g_free (value_p->v_string);
206
2.48M
      break;
207
      
208
14.9M
    default:
209
14.9M
      break;
210
17.4M
    }
211
  
212
17.4M
  *token_p = JSON_TOKEN_NONE;
213
17.4M
}
214
215
void
216
json_scanner_destroy (JsonScanner *scanner)
217
16.3k
{
218
16.3k
  g_return_if_fail (scanner != NULL);
219
  
220
16.3k
  json_scanner_free_value (&scanner->token, &scanner->value);
221
16.3k
  json_scanner_free_value (&scanner->next_token, &scanner->next_value);
222
223
16.3k
  g_free (scanner->buffer);
224
16.3k
  g_free (scanner);
225
16.3k
}
226
227
void
228
json_scanner_set_msg_handler (JsonScanner        *scanner,
229
                              JsonScannerMsgFunc  msg_handler,
230
                              gpointer            user_data)
231
16.3k
{
232
16.3k
  g_return_if_fail (scanner != NULL);
233
234
16.3k
  scanner->msg_handler = msg_handler;
235
16.3k
  scanner->user_data = user_data;
236
16.3k
}
237
238
static void
239
json_scanner_error (JsonScanner *scanner,
240
                    const char  *format,
241
                    ...)
242
1.44k
{
243
1.44k
  g_return_if_fail (scanner != NULL);
244
1.44k
  g_return_if_fail (format != NULL);
245
246
1.44k
  if (scanner->msg_handler)
247
1.44k
    {
248
1.44k
      va_list args;
249
1.44k
      char *string;
250
      
251
1.44k
      va_start (args, format);
252
1.44k
      string = g_strdup_vprintf (format, args);
253
1.44k
      va_end (args);
254
      
255
1.44k
      scanner->msg_handler (scanner, string, scanner->user_data);
256
      
257
1.44k
      g_free (string);
258
1.44k
    }
259
1.44k
}
260
261
unsigned int
262
json_scanner_peek_next_token (JsonScanner *scanner)
263
9.60M
{
264
9.60M
  g_return_val_if_fail (scanner != NULL, JSON_TOKEN_EOF);
265
266
9.60M
  if (scanner->next_token == JSON_TOKEN_NONE)
267
8.67M
    {
268
8.67M
      scanner->next_line = scanner->line;
269
8.67M
      scanner->next_position = scanner->position;
270
8.67M
      json_scanner_get_token_i (scanner,
271
8.67M
                                &scanner->next_token,
272
8.67M
                                &scanner->next_value,
273
8.67M
                                &scanner->next_line,
274
8.67M
                                &scanner->next_position);
275
8.67M
    }
276
277
9.60M
  return scanner->next_token;
278
9.60M
}
279
280
unsigned int
281
json_scanner_get_next_token (JsonScanner *scanner)
282
8.67M
{
283
8.67M
  g_return_val_if_fail (scanner != NULL, JSON_TOKEN_EOF);
284
285
8.67M
  if (scanner->next_token != JSON_TOKEN_NONE)
286
8.65M
    {
287
8.65M
      json_scanner_free_value (&scanner->token, &scanner->value);
288
289
8.65M
      scanner->token = scanner->next_token;
290
8.65M
      scanner->value = scanner->next_value;
291
8.65M
      scanner->line = scanner->next_line;
292
8.65M
      scanner->position = scanner->next_position;
293
8.65M
      scanner->next_token = JSON_TOKEN_NONE;
294
8.65M
    }
295
11.5k
  else
296
11.5k
    json_scanner_get_token_i (scanner,
297
11.5k
                              &scanner->token,
298
11.5k
                              &scanner->value,
299
11.5k
                              &scanner->line,
300
11.5k
                              &scanner->position);
301
302
8.67M
  return scanner->token;
303
8.67M
}
304
305
void
306
json_scanner_input_text (JsonScanner *scanner,
307
                         const char  *text,
308
                         guint        text_len)
309
16.3k
{
310
16.3k
  g_return_if_fail (scanner != NULL);
311
16.3k
  if (text_len)
312
16.3k
    g_return_if_fail (text != NULL);
313
1
  else
314
1
    text = NULL;
315
316
16.3k
  scanner->token = JSON_TOKEN_NONE;
317
16.3k
  scanner->value.v_int64 = 0;
318
16.3k
  scanner->line = 1;
319
16.3k
  scanner->position = 0;
320
16.3k
  scanner->next_token = JSON_TOKEN_NONE;
321
322
16.3k
  scanner->text = text;
323
16.3k
  scanner->text_end = text != NULL ? text + text_len : 0;
324
325
16.3k
  g_clear_pointer (&scanner->buffer, g_free);
326
16.3k
}
327
328
static guchar
329
json_scanner_peek_next_char (JsonScanner *scanner)
330
1.90M
{
331
1.90M
  if (scanner->text < scanner->text_end)
332
1.90M
    return *scanner->text;
333
510
  else
334
510
    return 0;
335
1.90M
}
336
337
static guchar
338
json_scanner_get_char (JsonScanner *scanner,
339
                       guint       *line_p,
340
                       guint       *position_p)
341
253M
{
342
253M
  guchar fchar;
343
344
253M
  if (scanner->text < scanner->text_end)
345
253M
    fchar = *(scanner->text++);
346
698
  else
347
698
    fchar = 0;
348
  
349
253M
  if (fchar == '\n')
350
81.3k
    {
351
81.3k
      (*position_p) = 0;
352
81.3k
      (*line_p)++;
353
81.3k
    }
354
253M
  else if (fchar)
355
253M
    {
356
253M
      (*position_p)++;
357
253M
    }
358
  
359
253M
  return fchar;
360
253M
}
361
362
79.7k
#define is_oct_digit(c)         ((c) >= '0' && (c) <= '7')
363
47.8k
#define is_hex_digit(c)         (((c) >= '0' && (c) <= '9') || \
364
47.8k
                                 ((c) >= 'a' && (c) <= 'f') || \
365
47.8k
                                 ((c) >= 'A' && (c) <= 'F'))
366
38.6k
#define to_hex_digit(c)         (((c) <= '9') ? (c) - '0' : ((c) & 7) + 9)
367
368
static bool
369
json_scanner_get_unichar (JsonScanner *scanner,
370
                          gunichar    *ucs,
371
                          guint       *line_p,
372
                          guint       *position_p)
373
9.70k
{
374
9.70k
  gunichar uchar;
375
376
9.70k
  uchar = 0;
377
48.3k
  for (int i = 0; i < 4; i++)
378
38.6k
    {
379
38.6k
      char ch = json_scanner_get_char (scanner, line_p, position_p);
380
381
38.6k
      if (is_hex_digit (ch))
382
38.6k
        uchar += ((gunichar) to_hex_digit (ch) << ((3 - i) * 4));
383
90
      else
384
90
        return false;
385
38.6k
    }
386
387
9.61k
  *ucs = uchar;
388
389
9.61k
  return true;
390
9.70k
}
391
392
/*
393
 * decode_utf16_surrogate_pair:
394
 * @units: (array length=2): a pair of UTF-16 code points
395
 *
396
 * Decodes a surrogate pair of UTF-16 code points into the equivalent
397
 * Unicode code point.
398
 *
399
 * Returns: the Unicode code point equivalent to the surrogate pair
400
 */
401
static inline gunichar
402
decode_utf16_surrogate_pair (const gunichar units[2])
403
477
{
404
477
  gunichar ucs;
405
406
  /* Already checked by caller */
407
477
  g_assert (0xd800 <= units[0] && units[0] <= 0xdbff);
408
477
  g_assert (0xdc00 <= units[1] && units[1] <= 0xdfff);
409
410
477
  ucs = 0x10000;
411
477
  ucs += (units[0] & 0x3ff) << 10;
412
477
  ucs += (units[1] & 0x3ff);
413
414
477
  return ucs;
415
477
}
416
417
static void
418
json_scanner_unexp_token (JsonScanner  *scanner,
419
                          unsigned int  expected_token,
420
                          const char   *identifier_spec,
421
                          const char   *symbol_spec,
422
                          const char   *symbol_name,
423
                          const char   *message)
424
1.44k
{
425
1.44k
  char *token_string;
426
1.44k
  gsize token_string_len;
427
1.44k
  char *expected_string;
428
1.44k
  gsize expected_string_len;
429
1.44k
  const char *message_prefix;
430
1.44k
  bool print_unexp;
431
  
432
1.44k
  g_return_if_fail (scanner != NULL);
433
  
434
1.44k
  if (identifier_spec == NULL)
435
1.44k
    identifier_spec = "identifier";
436
1.44k
  if (symbol_spec == NULL)
437
0
    symbol_spec = "symbol";
438
  
439
1.44k
  token_string_len = 56;
440
1.44k
  token_string = g_new (char, token_string_len + 1);
441
1.44k
  expected_string_len = 64;
442
1.44k
  expected_string = g_new (char, expected_string_len + 1);
443
1.44k
  print_unexp = true;
444
  
445
1.44k
  switch (scanner->token)
446
1.44k
    {
447
128
    case JSON_TOKEN_EOF:
448
128
      g_snprintf (token_string, token_string_len, "end of file");
449
128
      break;
450
      
451
343
    default:
452
343
      if (scanner->token >= 1 && scanner->token <= 255)
453
331
  {
454
331
    if ((scanner->token >= ' ' && scanner->token <= '~') ||
455
331
        strchr (scanner->config.cset_identifier_first, scanner->token) ||
456
331
        strchr (scanner->config.cset_identifier_nth, scanner->token))
457
276
      g_snprintf (token_string, token_string_len, "character `%c'", scanner->token);
458
55
    else
459
55
      g_snprintf (token_string, token_string_len, "character `\\%o'", scanner->token);
460
331
    break;
461
331
  }
462
12
      G_GNUC_FALLTHROUGH;
463
464
12
    case JSON_TOKEN_SYMBOL:
465
12
      if (expected_token == JSON_TOKEN_SYMBOL || expected_token > JSON_TOKEN_LAST)
466
0
  print_unexp = false;
467
12
      if (symbol_name)
468
12
  g_snprintf (token_string, token_string_len,
469
12
                    "%s%s `%s'",
470
12
                    print_unexp ? "" : "invalid ",
471
12
                    symbol_spec,
472
12
                    symbol_name);
473
0
      else
474
0
  g_snprintf (token_string, token_string_len,
475
0
                    "%s%s",
476
0
                    print_unexp ? "" : "invalid ",
477
0
                    symbol_spec);
478
12
      break;
479
 
480
669
    case JSON_TOKEN_ERROR:
481
669
      print_unexp = false;
482
669
      expected_token = JSON_TOKEN_NONE;
483
669
      switch (scanner->value.v_error)
484
669
  {
485
0
  case JSON_ERROR_TYPE_UNEXP_EOF:
486
0
    g_snprintf (token_string, token_string_len, "scanner: unexpected end of file");
487
0
    break;
488
    
489
411
  case JSON_ERROR_TYPE_UNEXP_EOF_IN_STRING:
490
411
    g_snprintf (token_string, token_string_len, "scanner: unterminated string constant");
491
411
    break;
492
    
493
114
  case JSON_ERROR_TYPE_UNEXP_EOF_IN_COMMENT:
494
114
    g_snprintf (token_string, token_string_len, "scanner: unterminated comment");
495
114
    break;
496
    
497
71
  case JSON_ERROR_TYPE_NON_DIGIT_IN_CONST:
498
71
    g_snprintf (token_string, token_string_len, "scanner: non digit in constant");
499
71
    break;
500
    
501
0
  case JSON_ERROR_TYPE_FLOAT_RADIX:
502
0
    g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant");
503
0
    break;
504
    
505
11
  case JSON_ERROR_TYPE_FLOAT_MALFORMED:
506
11
    g_snprintf (token_string, token_string_len, "scanner: malformed floating constant");
507
11
    break;
508
    
509
8
  case JSON_ERROR_TYPE_DIGIT_RADIX:
510
8
    g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix");
511
8
    break;
512
513
13
  case JSON_ERROR_TYPE_MALFORMED_SURROGATE_PAIR:
514
13
    g_snprintf (token_string, token_string_len, "scanner: malformed surrogate pair");
515
13
    break;
516
517
10
        case JSON_ERROR_TYPE_LEADING_ZERO:
518
10
          g_snprintf (token_string, token_string_len, "scanner: leading zero in number");
519
10
          break;
520
521
0
        case JSON_ERROR_TYPE_UNESCAPED_CTRL:
522
0
          g_snprintf (token_string, token_string_len, "scanner: unescaped control character");
523
0
          break;
524
525
0
        case JSON_ERROR_TYPE_UNKNOWN_ESCAPE:
526
0
          g_snprintf (token_string, token_string_len, "scanner: unknown backslash escape sequence");
527
0
          break;
528
529
31
        case JSON_ERROR_TYPE_MALFORMED_UNICODE:
530
31
          g_snprintf (token_string, token_string_len, "scanner: malformed Unicode escape");
531
31
          break;
532
533
0
  case JSON_ERROR_TYPE_UNKNOWN:
534
0
  default:
535
0
    g_snprintf (token_string, token_string_len, "scanner: unknown error");
536
0
    break;
537
669
  }
538
669
      break;
539
      
540
669
    case JSON_TOKEN_IDENTIFIER:
541
89
      if (expected_token == JSON_TOKEN_IDENTIFIER)
542
0
  print_unexp = false;
543
89
      g_snprintf (token_string, token_string_len,
544
89
                  "%s%s `%s'",
545
89
                  print_unexp ? "" : "invalid ",
546
89
                  identifier_spec,
547
89
                  scanner->value.v_string);
548
89
      break;
549
      
550
108
    case JSON_TOKEN_INT:
551
108
      g_snprintf (token_string, token_string_len, "number `%" G_GINT64_FORMAT "'", scanner->value.v_int64);
552
108
      break;
553
      
554
7
    case JSON_TOKEN_FLOAT:
555
7
      g_snprintf (token_string, token_string_len, "number `%.3f'", scanner->value.v_float);
556
7
      break;
557
      
558
103
    case JSON_TOKEN_STRING:
559
103
      if (expected_token == JSON_TOKEN_STRING)
560
43
  print_unexp = false;
561
103
      g_snprintf (token_string, token_string_len,
562
103
                  "%s%sstring constant \"%s\"",
563
103
                  print_unexp ? "" : "invalid ",
564
103
                  scanner->value.v_string[0] == 0 ? "empty " : "",
565
103
                  scanner->value.v_string);
566
103
      token_string[token_string_len - 2] = '"';
567
103
      token_string[token_string_len - 1] = 0;
568
103
      break;
569
      
570
0
    case JSON_TOKEN_COMMENT_SINGLE:
571
0
    case JSON_TOKEN_COMMENT_MULTI:
572
0
      g_snprintf (token_string, token_string_len, "comment");
573
0
      break;
574
      
575
0
    case JSON_TOKEN_NONE:
576
      /* somehow the user's parsing code is screwed, there isn't much
577
       * we can do about it.
578
       * Note, a common case to trigger this is
579
       * json_scanner_peek_next_token(); json_scanner_unexp_token();
580
       * without an intermediate json_scanner_get_next_token().
581
       */
582
0
      g_assert_not_reached ();
583
0
      break;
584
1.44k
    }
585
  
586
  
587
1.44k
  switch (expected_token)
588
1.44k
    {
589
75
    case JSON_TOKEN_EOF:
590
75
      g_snprintf (expected_string, expected_string_len, "end of file");
591
75
      break;
592
306
    default:
593
306
      if (expected_token >= 1 && expected_token <= 255)
594
306
  {
595
306
    if ((expected_token >= ' ' && expected_token <= '~') ||
596
306
        strchr (scanner->config.cset_identifier_first, expected_token) ||
597
306
        strchr (scanner->config.cset_identifier_nth, expected_token))
598
306
      g_snprintf (expected_string, expected_string_len, "character `%c'", expected_token);
599
0
    else
600
0
      g_snprintf (expected_string, expected_string_len, "character `\\%o'", expected_token);
601
306
    break;
602
306
  }
603
0
      G_GNUC_FALLTHROUGH;
604
605
146
    case JSON_TOKEN_SYMBOL:
606
146
      {
607
146
        bool need_valid = (scanner->token == JSON_TOKEN_SYMBOL || scanner->token > JSON_TOKEN_LAST);
608
146
        g_snprintf (expected_string, expected_string_len,
609
146
                    "%s%s",
610
146
                    need_valid ? "valid " : "",
611
146
                    symbol_spec);
612
146
      }
613
146
      break;
614
0
    case JSON_TOKEN_INT:
615
0
      g_snprintf (expected_string,
616
0
                  expected_string_len,
617
0
                  "%snumber (integer)",
618
0
      scanner->token == expected_token ? "valid " : "");
619
0
      break;
620
0
    case JSON_TOKEN_FLOAT:
621
0
      g_snprintf (expected_string,
622
0
                  expected_string_len,
623
0
                  "%snumber (float)",
624
0
      scanner->token == expected_token ? "valid " : "");
625
0
      break;
626
231
    case JSON_TOKEN_STRING:
627
231
      g_snprintf (expected_string,
628
231
      expected_string_len,
629
231
      "%sstring constant",
630
231
      scanner->token == JSON_TOKEN_STRING ? "valid " : "");
631
231
      break;
632
20
    case JSON_TOKEN_IDENTIFIER:
633
20
      g_snprintf (expected_string,
634
20
      expected_string_len,
635
20
      "%s%s",
636
20
      scanner->token == JSON_TOKEN_IDENTIFIER ? "valid " : "",
637
20
      identifier_spec);
638
20
      break;
639
0
    case JSON_TOKEN_COMMENT_SINGLE:
640
0
      g_snprintf (expected_string,
641
0
                  expected_string_len,
642
0
                  "%scomment (single-line)",
643
0
      scanner->token == expected_token ? "valid " : "");
644
0
      break;
645
0
    case JSON_TOKEN_COMMENT_MULTI:
646
0
      g_snprintf (expected_string,
647
0
                  expected_string_len,
648
0
                  "%scomment (multi-line)",
649
0
      scanner->token == expected_token ? "valid " : "");
650
0
      break;
651
669
    case JSON_TOKEN_NONE:
652
669
    case JSON_TOKEN_ERROR:
653
      /* this is handled upon printout */
654
669
      break;
655
1.44k
    }
656
  
657
1.44k
  if (message && message[0] != 0)
658
0
    message_prefix = " - ";
659
1.44k
  else
660
1.44k
    {
661
1.44k
      message_prefix = "";
662
1.44k
      message = "";
663
1.44k
    }
664
1.44k
  if (expected_token == JSON_TOKEN_ERROR)
665
0
    {
666
0
      json_scanner_error (scanner,
667
0
                          "failure around %s%s%s",
668
0
                          token_string,
669
0
                          message_prefix,
670
0
                          message);
671
0
    }
672
1.44k
  else if (expected_token == JSON_TOKEN_NONE)
673
669
    {
674
669
      if (print_unexp)
675
0
  json_scanner_error (scanner,
676
0
                            "unexpected %s%s%s",
677
0
                            token_string,
678
0
                            message_prefix,
679
0
                            message);
680
669
      else
681
669
  json_scanner_error (scanner,
682
669
                            "%s%s%s",
683
669
                            token_string,
684
669
                            message_prefix,
685
669
                            message);
686
669
    }
687
778
  else
688
778
    {
689
778
      if (print_unexp)
690
735
  json_scanner_error (scanner,
691
735
                            "unexpected %s, expected %s%s%s",
692
735
                            token_string,
693
735
                            expected_string,
694
735
                            message_prefix,
695
735
                            message);
696
43
      else
697
43
  json_scanner_error (scanner,
698
43
                            "%s, expected %s%s%s",
699
43
                            token_string,
700
43
                            expected_string,
701
43
                            message_prefix,
702
43
                            message);
703
778
    }
704
  
705
1.44k
  g_free (token_string);
706
1.44k
  g_free (expected_string);
707
1.44k
}
708
709
void
710
json_scanner_unknown_token (JsonScanner  *scanner,
711
                            unsigned int  token)
712
1.44k
{
713
1.44k
  const char *symbol_name;
714
1.44k
  char *msg;
715
1.44k
  unsigned int cur_token;
716
717
1.44k
  cur_token = json_scanner_get_current_token (scanner);
718
1.44k
  msg = NULL;
719
720
1.44k
  symbol_name = NULL;
721
7.23k
  for (unsigned i = 0; i < G_N_ELEMENTS (json_symbols); i++)
722
5.78k
    if (json_symbols[i].token == token)
723
0
      symbol_name = json_symbol_names + json_symbols[i].name_offset;
724
725
1.44k
  if (symbol_name != NULL)
726
0
    msg = g_strconcat ("e.g. '", symbol_name, "'", NULL);
727
728
1.44k
  symbol_name = "???";
729
7.23k
  for (unsigned i = 0; i < G_N_ELEMENTS (json_symbols); i++)
730
5.78k
    if (json_symbols[i].token == cur_token)
731
12
      symbol_name = json_symbol_names + json_symbols[i].name_offset;
732
733
1.44k
  json_scanner_unexp_token (scanner, token,
734
1.44k
                            NULL, "value",
735
1.44k
                            symbol_name,
736
1.44k
                            msg);
737
738
1.44k
  g_free (msg);
739
1.44k
}
740
741
static void
742
json_scanner_get_token_i (JsonScanner    *scanner,
743
              unsigned int   *token_p,
744
              JsonTokenValue *value_p,
745
              guint          *line_p,
746
              guint          *position_p)
747
8.68M
{
748
8.68M
  do
749
8.76M
    {
750
8.76M
      json_scanner_free_value (token_p, value_p);
751
8.76M
      json_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p);
752
8.76M
    }
753
8.76M
  while (((*token_p > 0 && *token_p < 256) &&
754
8.76M
    strchr (scanner->config.cset_skip_characters, *token_p)) ||
755
8.76M
   *token_p == JSON_TOKEN_COMMENT_MULTI ||
756
8.76M
   *token_p == JSON_TOKEN_COMMENT_SINGLE);
757
758
8.68M
  switch (*token_p)
759
8.68M
    {
760
6.24k
    case JSON_TOKEN_IDENTIFIER:
761
6.24k
      break;
762
763
6.85k
    case JSON_TOKEN_SYMBOL:
764
6.85k
      *token_p = GPOINTER_TO_UINT (value_p->v_symbol);
765
6.85k
      break;
766
767
8.67M
    default:
768
8.67M
      break;
769
8.68M
    }
770
  
771
8.68M
  errno = 0;
772
8.68M
}
773
774
static void
775
json_scanner_get_token_ll (JsonScanner    *scanner,
776
                           unsigned int   *token_p,
777
                           JsonTokenValue *value_p,
778
                           guint          *line_p,
779
                           guint          *position_p)
780
8.76M
{
781
8.76M
  const JsonScannerConfig *config;
782
8.76M
  unsigned int token;
783
8.76M
  bool in_comment_multi = false;
784
8.76M
  bool in_comment_single = false;
785
8.76M
  bool in_string_sq = false;
786
8.76M
  bool in_string_dq = false;
787
8.76M
  GString *gstring = NULL;
788
8.76M
  JsonTokenValue value;
789
8.76M
  guchar ch;
790
  
791
8.76M
  config = &scanner->config;
792
8.76M
  (*value_p).v_int64 = 0;
793
  
794
8.76M
  if (scanner->text >= scanner->text_end ||
795
8.76M
      scanner->token == JSON_TOKEN_EOF)
796
15.1k
    {
797
15.1k
      *token_p = JSON_TOKEN_EOF;
798
15.1k
      return;
799
15.1k
    }
800
  
801
8.75M
  gstring = NULL;
802
  
803
8.75M
  do /* while (ch != 0) */
804
8.75M
    {
805
8.75M
      ch = json_scanner_get_char (scanner, line_p, position_p);
806
807
8.75M
      value.v_int64 = 0;
808
8.75M
      token = JSON_TOKEN_NONE;
809
810
      /* this is *evil*, but needed ;(
811
       * we first check for identifier first character, because  it
812
       * might interfere with other key chars like slashes or numbers
813
       */
814
8.75M
      if (ch != 0 && strchr (config->cset_identifier_first, ch))
815
13.1k
  goto identifier_precedence;
816
817
8.73M
      switch (ch)
818
8.73M
  {
819
0
  case 0:
820
0
    token = JSON_TOKEN_EOF;
821
0
    (*position_p)++;
822
    /* ch = 0; */
823
0
    break;
824
825
1.18k
  case '/':
826
1.18k
    if (config->strict || json_scanner_peek_next_char (scanner) != '*')
827
801
      goto default_case;
828
380
    json_scanner_get_char (scanner, line_p, position_p);
829
380
    token = JSON_TOKEN_COMMENT_MULTI;
830
380
    in_comment_multi = true;
831
380
    gstring = g_string_new (NULL);
832
872k
    while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
833
872k
      {
834
872k
        if (ch == '*' && json_scanner_peek_next_char (scanner) == '/')
835
320
    {
836
320
      json_scanner_get_char (scanner, line_p, position_p);
837
320
      in_comment_multi = false;
838
320
      break;
839
320
    }
840
872k
        else
841
872k
    gstring = g_string_append_c (gstring, ch);
842
872k
      }
843
380
    ch = 0;
844
380
    break;
845
846
825
        case '\'':
847
825
          if (config->strict)
848
0
            goto default_case;
849
825
          token = JSON_TOKEN_STRING;
850
825
          in_string_sq = true;
851
825
          gstring = g_string_new (NULL);
852
977k
          while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
853
977k
            {
854
977k
              if (ch == '\'' || token == JSON_TOKEN_ERROR)
855
784
                {
856
784
                  in_string_sq = false;
857
784
                  break;
858
784
                }
859
976k
              else
860
976k
                {
861
976k
                  g_string_append_c (gstring, ch);
862
976k
                }
863
977k
            }
864
825
          ch = 0;
865
825
          break;
866
867
2.47M
  case '"':
868
2.47M
    token = JSON_TOKEN_STRING;
869
2.47M
    in_string_dq = true;
870
2.47M
    gstring = g_string_new (NULL);
871
238M
    while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
872
238M
      {
873
238M
        if (ch == '"' || token == JSON_TOKEN_ERROR)
874
2.47M
    {
875
2.47M
      in_string_dq = false;
876
2.47M
      break;
877
2.47M
    }
878
236M
        else
879
236M
    {
880
236M
      if (ch == '\\')
881
2.21M
        {
882
2.21M
          ch = json_scanner_get_char (scanner, line_p, position_p);
883
2.21M
          switch (ch)
884
2.21M
      {
885
26
      case 0:
886
26
        break;
887
888
46.0k
                        case '"':
889
46.0k
                          gstring = g_string_append_c (gstring, '"');
890
46.0k
                          break;
891
        
892
1.10M
      case '\\':
893
1.10M
        gstring = g_string_append_c (gstring, '\\');
894
1.10M
        break;
895
896
7.59k
                        case '/':
897
7.59k
                          gstring = g_string_append_c (gstring, '/');
898
7.59k
                          break;
899
        
900
103k
      case 'n':
901
103k
        gstring = g_string_append_c (gstring, '\n');
902
103k
        break;
903
        
904
117k
      case 't':
905
117k
        gstring = g_string_append_c (gstring, '\t');
906
117k
        break;
907
        
908
8.50k
      case 'r':
909
8.50k
        gstring = g_string_append_c (gstring, '\r');
910
8.50k
        break;
911
        
912
41.2k
      case 'b':
913
41.2k
        gstring = g_string_append_c (gstring, '\b');
914
41.2k
        break;
915
        
916
20.6k
      case 'f':
917
20.6k
        gstring = g_string_append_c (gstring, '\f');
918
20.6k
        break;
919
920
9.19k
                        case 'u':
921
9.19k
                        {
922
9.19k
                          guint fchar = json_scanner_peek_next_char (scanner);
923
9.19k
                          if (is_hex_digit (fchar))
924
9.18k
                            {
925
9.18k
                              gunichar ucs;
926
927
9.18k
                              if (!json_scanner_get_unichar (scanner, &ucs, line_p, position_p))
928
58
                                {
929
58
                                  token = JSON_TOKEN_ERROR;
930
58
                                  value.v_error = JSON_ERROR_TYPE_MALFORMED_UNICODE;
931
58
                                  g_string_free (gstring, TRUE);
932
58
                                  gstring = NULL;
933
58
                                  break;
934
58
                                }
935
936
                              /* resolve UTF-16 surrogates for Unicode characters not in the BMP,
937
                               * as per ECMA 404, § 9, "String"
938
                               */
939
9.12k
                              if (g_unichar_type (ucs) == G_UNICODE_SURROGATE)
940
551
                                {
941
551
                                  unsigned int next_ch;
942
943
551
                                  next_ch = json_scanner_peek_next_char (scanner);
944
551
                                  if (next_ch != '\\')
945
22
                                    {
946
22
                                      token = JSON_TOKEN_ERROR;
947
22
                                      value.v_error = JSON_ERROR_TYPE_MALFORMED_SURROGATE_PAIR;
948
22
                                      g_string_free (gstring, TRUE);
949
22
                                      gstring = NULL;
950
22
                                      break;
951
22
                                    }
952
529
                                  else
953
529
                                    json_scanner_get_char (scanner, line_p, position_p);
954
955
529
                                  next_ch = json_scanner_peek_next_char (scanner);
956
529
                                  if (next_ch != 'u')
957
6
                                    {
958
6
                                      token = JSON_TOKEN_ERROR;
959
6
                                      value.v_error = JSON_ERROR_TYPE_MALFORMED_SURROGATE_PAIR;
960
6
                                      g_string_free (gstring, TRUE);
961
6
                                      gstring = NULL;
962
6
                                      break;
963
6
                                    }
964
523
                                  else
965
523
                                    json_scanner_get_char (scanner, line_p, position_p);
966
967
                                  /* read next surrogate */
968
523
                                  gunichar units[2];
969
970
523
                                  units[0] = ucs;
971
972
523
                                  if (!json_scanner_get_unichar (scanner, &ucs, line_p, position_p))
973
32
                                    {
974
32
                                      token = JSON_TOKEN_ERROR;
975
32
                                      value.v_error = JSON_ERROR_TYPE_MALFORMED_UNICODE;
976
32
                                      g_string_free (gstring, TRUE);
977
32
                                      gstring = NULL;
978
32
                                      break;
979
32
                                    }
980
981
491
                                  units[1] = ucs;
982
983
491
                                  if (0xdc00 <= units[1] && units[1] <= 0xdfff &&
984
491
                                      0xd800 <= units[0] && units[0] <= 0xdbff)
985
477
                                    {
986
477
                                      ucs = decode_utf16_surrogate_pair (units);
987
477
                                      if (!g_unichar_validate (ucs))
988
0
                                        {
989
0
                                          token = JSON_TOKEN_ERROR;
990
0
                                          value.v_error = JSON_ERROR_TYPE_MALFORMED_UNICODE;
991
0
                                          g_string_free (gstring, TRUE);
992
0
                                          gstring = NULL;
993
0
                                          break;
994
0
                                        }
995
477
                                    }
996
14
                                  else
997
14
                                    {
998
14
                                      token = JSON_TOKEN_ERROR;
999
14
                                      value.v_error = JSON_ERROR_TYPE_MALFORMED_SURROGATE_PAIR;
1000
14
                                      g_string_free (gstring, TRUE);
1001
14
                                      gstring = NULL;
1002
14
                                      break;
1003
14
                                    }
1004
491
                                }
1005
8.57k
                              else
1006
8.57k
                                {
1007
8.57k
                                  if (!g_unichar_validate (ucs))
1008
0
                                    {
1009
0
                                      token = JSON_TOKEN_ERROR;
1010
0
                                      value.v_error = JSON_ERROR_TYPE_MALFORMED_UNICODE;
1011
0
                                      g_string_free (gstring, TRUE);
1012
0
                                      gstring = NULL;
1013
0
                                      break;
1014
0
                                    }
1015
8.57k
                                }
1016
1017
9.05k
                              gstring = g_string_append_unichar (gstring, ucs);
1018
9.05k
                            }
1019
13
                          else
1020
13
                            {
1021
13
                              token = JSON_TOKEN_ERROR;
1022
13
                              value.v_error = JSON_ERROR_TYPE_MALFORMED_UNICODE;
1023
13
                              g_string_free (gstring, TRUE);
1024
13
                              gstring = NULL;
1025
13
                            }
1026
9.06k
                          break;
1027
9.19k
                        }
1028
1029
9.06k
                        case '0':
1030
9.67k
                        case '1':
1031
11.8k
                        case '2':
1032
17.1k
                        case '3':
1033
30.8k
                        case '4':
1034
38.5k
                        case '5':
1035
43.5k
                        case '6':
1036
50.2k
                        case '7':
1037
50.2k
                          if (config->strict)
1038
0
                            {
1039
0
                              token = JSON_TOKEN_ERROR;
1040
0
                              value.v_error = JSON_ERROR_TYPE_UNKNOWN_ESCAPE;
1041
0
                              g_string_free (gstring, TRUE);
1042
0
                              gstring = NULL;
1043
0
                            }
1044
50.2k
                          else
1045
50.2k
                            {
1046
50.2k
                              gunichar ucs = (ch - '0');
1047
50.2k
                              guchar next_ch;
1048
50.2k
                              unsigned i;
1049
1050
102k
                              for (i = 0; i < 2; i++)
1051
79.7k
                                {
1052
79.7k
                                  next_ch = json_scanner_peek_next_char (scanner);
1053
1054
79.7k
                                  if (is_oct_digit (next_ch))
1055
52.3k
                                    {
1056
52.3k
                                      ucs = ucs * 8 + (next_ch - '0');
1057
52.3k
                                      json_scanner_get_char (scanner, line_p, position_p);
1058
52.3k
                                    }
1059
27.4k
                                  else
1060
27.4k
                                    {
1061
27.4k
                                      break;
1062
27.4k
                                    }
1063
79.7k
                                }
1064
1065
50.2k
                              gstring = g_string_append_unichar (gstring, ucs);
1066
50.2k
                            }
1067
50.2k
                          break;
1068
1069
713k
      default:
1070
713k
                          if (config->strict)
1071
0
                            {
1072
0
                              token = JSON_TOKEN_ERROR;
1073
0
                              value.v_error = JSON_ERROR_TYPE_UNKNOWN_ESCAPE;
1074
0
                              g_string_free (gstring, TRUE);
1075
0
                              gstring = NULL;
1076
0
                            }
1077
713k
                          else
1078
713k
                            {
1079
713k
                              gstring = g_string_append_c (gstring, ch);
1080
713k
                            }
1081
713k
        break;
1082
2.21M
      }
1083
2.21M
        }
1084
234M
                  else if (ch == '\n' || ch == '\t' || ch == '\r' || ch == '\f' || ch == '\b')
1085
530
                    {
1086
530
                      token = JSON_TOKEN_ERROR;
1087
530
                      value.v_error = JSON_ERROR_TYPE_UNESCAPED_CTRL;
1088
530
                      g_string_free (gstring, TRUE);
1089
530
                      gstring = NULL;
1090
530
                      break;
1091
530
                    }
1092
234M
      else
1093
234M
        gstring = g_string_append_c (gstring, ch);
1094
236M
    }
1095
238M
      }
1096
2.47M
    ch = 0;
1097
2.47M
    break;
1098
1099
        /* {{{ number parsing */
1100
854
        case '-':
1101
854
          if (!g_ascii_isdigit (json_scanner_peek_next_char (scanner)))
1102
4
            {
1103
4
              token = JSON_TOKEN_ERROR;
1104
4
              value.v_error = JSON_ERROR_TYPE_NON_DIGIT_IN_CONST;
1105
4
              ch = 0;
1106
4
              break;
1107
4
            }
1108
850
          G_GNUC_FALLTHROUGH;
1109
1110
4.20k
  case '0':
1111
5.98k
  case '1':
1112
7.26k
  case '2':
1113
39.3k
  case '3':
1114
40.6k
  case '4':
1115
41.3k
  case '5':
1116
138k
  case '6':
1117
146k
  case '7':
1118
153k
  case '8':
1119
212k
  case '9':
1120
212k
  {
1121
212k
          bool in_number = true;
1122
212k
          bool leading_sign = ch == '-';
1123
212k
          bool leading_zero = ch == '0';
1124
212k
    char *endptr;
1125
    
1126
212k
    if (token == JSON_TOKEN_NONE)
1127
212k
      token = JSON_TOKEN_INT;
1128
    
1129
212k
    gstring = g_string_new ("");
1130
212k
    gstring = g_string_append_c (gstring, ch);
1131
1132
212k
          if (leading_sign)
1133
850
            {
1134
850
              ch = json_scanner_get_char (scanner, line_p, position_p);
1135
850
              leading_zero = ch == '0';
1136
850
              g_string_append_c (gstring, ch);
1137
850
            }
1138
1139
212k
    do /* while (in_number) */
1140
1.61M
      {
1141
1.61M
        bool is_E = token == JSON_TOKEN_FLOAT && (ch == 'e' || ch == 'E');
1142
1143
1.61M
        ch = json_scanner_peek_next_char (scanner);
1144
1145
1.61M
        if ((ch >= '0' && ch <= '9') ||
1146
1.61M
                  (ch == 'e' || ch == 'E') ||
1147
1.61M
      ch == '.' ||
1148
1.61M
      (is_E && (ch == '+' || ch == '-')))
1149
1.40M
    {
1150
1.40M
      ch = json_scanner_get_char (scanner, line_p, position_p);
1151
1152
1.40M
      switch (ch)
1153
1.40M
        {
1154
1.05k
        case '.':
1155
1.05k
                      {
1156
1.05k
                        unsigned int next_ch = json_scanner_peek_next_char (scanner);
1157
1158
1.05k
                        if (!g_ascii_isdigit (next_ch))
1159
13
                          {
1160
13
                            token = JSON_TOKEN_ERROR;
1161
13
                            value.v_error = JSON_ERROR_TYPE_FLOAT_MALFORMED;
1162
13
                            in_number = false;
1163
13
                          }
1164
1.03k
                        else
1165
1.03k
        {
1166
1.03k
                            token = JSON_TOKEN_FLOAT;
1167
1.03k
                            gstring = g_string_append_c (gstring, ch);
1168
1.03k
                          }
1169
1.05k
                      }
1170
1.05k
                      break;
1171
1172
776k
        case '0':
1173
1.13M
        case '1':
1174
1.13M
        case '2':
1175
1.14M
        case '3':
1176
1.15M
        case '4':
1177
1.15M
        case '5':
1178
1.16M
        case '6':
1179
1.16M
        case '7':
1180
1.18M
        case '8':
1181
1.19M
        case '9':
1182
1.19M
                      if (leading_zero && token != JSON_TOKEN_FLOAT)
1183
442
                        {
1184
442
                          token = JSON_TOKEN_ERROR;
1185
442
                          value.v_error= JSON_ERROR_TYPE_LEADING_ZERO;
1186
442
                          in_number = false;
1187
442
                        }
1188
1.19M
                      else
1189
1.19M
                        gstring = g_string_append_c (gstring, ch);
1190
1.19M
          break;
1191
1192
85.8k
        case '-':
1193
86.4k
        case '+':
1194
86.4k
          if (token != JSON_TOKEN_FLOAT)
1195
0
      {
1196
0
        token = JSON_TOKEN_ERROR;
1197
0
        value.v_error = JSON_ERROR_TYPE_NON_DIGIT_IN_CONST;
1198
0
        in_number = false;
1199
0
      }
1200
86.4k
          else
1201
86.4k
      gstring = g_string_append_c (gstring, ch);
1202
86.4k
          break;
1203
1204
18.6k
        case 'e':
1205
126k
        case 'E':
1206
126k
                      token = JSON_TOKEN_FLOAT;
1207
126k
                      gstring = g_string_append_c (gstring, ch);
1208
126k
          break;
1209
1210
0
        default:
1211
0
                      token = JSON_TOKEN_ERROR;
1212
0
                      value.v_error = JSON_ERROR_TYPE_NON_DIGIT_IN_CONST;
1213
0
                      in_number = false;
1214
0
                      break;
1215
1.40M
        }
1216
1.40M
    }
1217
211k
        else
1218
211k
    in_number = false;
1219
1.61M
      }
1220
1.61M
    while (in_number);
1221
1222
212k
          if (token != JSON_TOKEN_ERROR)
1223
211k
            {
1224
211k
              endptr = NULL;
1225
211k
              if (token == JSON_TOKEN_FLOAT)
1226
1.03k
                value.v_float = g_ascii_strtod (gstring->str, &endptr);
1227
210k
              else if (token == JSON_TOKEN_INT)
1228
210k
                value.v_int64 = g_ascii_strtoll (gstring->str, &endptr, 10);
1229
1230
211k
              if (endptr && *endptr)
1231
88
                {
1232
88
                  token = JSON_TOKEN_ERROR;
1233
88
                  if (*endptr == 'e' || *endptr == 'E')
1234
79
                    value.v_error = JSON_ERROR_TYPE_NON_DIGIT_IN_CONST;
1235
9
                  else
1236
9
                    value.v_error = JSON_ERROR_TYPE_DIGIT_RADIX;
1237
88
                }
1238
211k
            }
1239
212k
    g_string_free (gstring, TRUE);
1240
212k
    gstring = NULL;
1241
212k
    ch = 0;
1242
212k
  }
1243
0
  break; /* number parsing }}} */
1244
1245
6.04M
  default:
1246
6.04M
  default_case:
1247
6.04M
  {
1248
6.04M
    if (!config->strict &&
1249
6.04M
              config->cpair_comment_single &&
1250
6.04M
        ch == config->cpair_comment_single[0])
1251
801
      {
1252
801
        token = JSON_TOKEN_COMMENT_SINGLE;
1253
801
        in_comment_single = true;
1254
801
        gstring = g_string_new (NULL);
1255
801
        ch = json_scanner_get_char (scanner, line_p, position_p);
1256
299k
        while (ch != 0)
1257
299k
    {
1258
299k
      if (ch == config->cpair_comment_single[1])
1259
738
        {
1260
738
          in_comment_single = false;
1261
738
          ch = 0;
1262
738
          break;
1263
738
        }
1264
      
1265
299k
      gstring = g_string_append_c (gstring, ch);
1266
299k
      ch = json_scanner_get_char (scanner, line_p, position_p);
1267
299k
    }
1268
        /* ignore a missing newline at EOF for single line comments */
1269
801
        if (in_comment_single &&
1270
801
      config->cpair_comment_single[1] == '\n')
1271
0
    in_comment_single = false;
1272
801
      }
1273
6.04M
    else if (ch && strchr (config->cset_identifier_first, ch))
1274
0
      {
1275
13.1k
      identifier_precedence:
1276
        
1277
13.1k
        if (config->cset_identifier_nth && ch &&
1278
13.1k
      strchr (config->cset_identifier_nth,
1279
13.1k
        json_scanner_peek_next_char (scanner)))
1280
13.1k
    {
1281
13.1k
      token = JSON_TOKEN_IDENTIFIER;
1282
13.1k
      gstring = g_string_new (NULL);
1283
13.1k
      gstring = g_string_append_c (gstring, ch);
1284
13.1k
      do
1285
178k
        {
1286
178k
          ch = json_scanner_get_char (scanner, line_p, position_p);
1287
178k
          gstring = g_string_append_c (gstring, ch);
1288
178k
          ch = json_scanner_peek_next_char (scanner);
1289
178k
        }
1290
178k
      while (ch && strchr (config->cset_identifier_nth, ch));
1291
13.1k
      ch = 0;
1292
13.1k
    }
1293
13.1k
      }
1294
6.05M
    if (ch)
1295
6.04M
      {
1296
6.04M
              token = ch;
1297
6.04M
        ch = 0;
1298
6.04M
      }
1299
6.05M
  } /* default_case:... */
1300
6.05M
  break;
1301
8.73M
  }
1302
8.75M
      g_assert (ch == 0 && token != JSON_TOKEN_NONE); /* paranoid */
1303
8.75M
    }
1304
8.75M
  while (ch != 0);
1305
  
1306
8.75M
  if (in_comment_multi || in_comment_single ||
1307
8.75M
      in_string_sq || in_string_dq)
1308
1.11k
    {
1309
1.11k
      token = JSON_TOKEN_ERROR;
1310
1.11k
      if (gstring)
1311
510
  {
1312
510
    g_string_free (gstring, TRUE);
1313
510
    gstring = NULL;
1314
510
  }
1315
1.11k
      (*position_p)++;
1316
1.11k
      if (in_comment_multi || in_comment_single)
1317
123
  value.v_error = JSON_ERROR_TYPE_UNEXP_EOF_IN_COMMENT;
1318
996
      else /* (in_string_sq || in_string_dq) */
1319
996
  value.v_error = JSON_ERROR_TYPE_UNEXP_EOF_IN_STRING;
1320
1.11k
    }
1321
  
1322
8.75M
  if (gstring)
1323
2.49M
    {
1324
2.49M
      value.v_string = g_string_free (gstring, FALSE);
1325
2.49M
      gstring = NULL;
1326
2.49M
    }
1327
  
1328
8.75M
  if (token == JSON_TOKEN_IDENTIFIER)
1329
13.1k
    {
1330
57.2k
      for (unsigned i = 0; i < G_N_ELEMENTS (json_symbols); i++)
1331
50.9k
        {
1332
50.9k
          const char *symbol = json_symbol_names + json_symbols[i].name_offset;
1333
50.9k
          if (strcmp (value.v_identifier, symbol) == 0)
1334
6.85k
            {
1335
6.85k
              g_free (value.v_identifier);
1336
6.85k
              token = JSON_TOKEN_SYMBOL;
1337
6.85k
              value.v_symbol = GUINT_TO_POINTER (json_symbols[i].token);
1338
6.85k
              break;
1339
6.85k
            }
1340
50.9k
        }
1341
13.1k
    }
1342
  
1343
8.75M
  *token_p = token;
1344
8.75M
  *value_p = value;
1345
8.75M
}
1346
1347
gint64
1348
json_scanner_get_int64_value (const JsonScanner *scanner)
1349
210k
{
1350
210k
  return scanner->value.v_int64;
1351
210k
}
1352
1353
double
1354
json_scanner_get_float_value (const JsonScanner *scanner)
1355
941
{
1356
941
  return scanner->value.v_float;
1357
941
}
1358
1359
const char *
1360
json_scanner_get_string_value (const JsonScanner *scanner)
1361
641k
{
1362
641k
  return scanner->value.v_string;
1363
641k
}
1364
1365
char *
1366
json_scanner_dup_string_value (const JsonScanner *scanner)
1367
1.83M
{
1368
1.83M
  return g_strdup (scanner->value.v_string);
1369
1.83M
}
1370
1371
const char *
1372
json_scanner_get_identifier (const JsonScanner *scanner)
1373
0
{
1374
0
  return scanner->value.v_identifier;
1375
0
}
1376
1377
char *
1378
json_scanner_dup_identifier (const JsonScanner *scanner)
1379
5.51k
{
1380
5.51k
  return g_strdup (scanner->value.v_identifier);
1381
5.51k
}
1382
1383
unsigned int
1384
json_scanner_get_current_line (const JsonScanner *scanner)
1385
1.44k
{
1386
1.44k
  return scanner->line;
1387
1.44k
}
1388
1389
unsigned int
1390
json_scanner_get_current_position (const JsonScanner *scanner)
1391
1.44k
{
1392
1.44k
  return scanner->position;
1393
1.44k
}
1394
1395
unsigned int
1396
json_scanner_get_current_token (const JsonScanner *scanner)
1397
1.44k
{
1398
1.44k
  return scanner->token;
1399
1.44k
}