Coverage Report

Created: 2025-07-11 06:31

/src/json-glib/json-glib/json-scanner.c
Line
Count
Source (jump to first uncovered line)
1
/* json-scanner.c: Tokenizer for JSON
2
 * Copyright (C) 2008 OpenedHand
3
 *
4
 * Based on JsonScanner: Flexible lexical scanner for general purpose.
5
 * Copyright (C) 1997, 1998 Tim Janik
6
 *
7
 * Modified by Emmanuele Bassi <ebassi@openedhand.com>
8
 *
9
 * This library is free software; you can redistribute it and/or
10
 * modify it under the terms of the GNU Lesser General Public
11
 * License as published by the Free Software Foundation; either
12
 * version 2 of the License, or (at your option) any later version.
13
 *
14
 * This library is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 * Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
#include "config.h"
24
25
#include <errno.h>
26
#include <stdlib.h>
27
#include <stdarg.h>
28
#include <string.h>
29
#include <stdio.h>
30
#ifdef HAVE_UNISTD_H
31
#include <unistd.h>
32
#endif
33
34
#include <glib.h>
35
#include <glib/gprintf.h>
36
37
#include "json-scanner.h"
38
39
#ifdef G_OS_WIN32
40
#include <io.h> /* For _read() */
41
#endif
42
43
enum {
44
  JSON_ERR_MALFORMED_SURROGATE_PAIR = G_TOKEN_LAST + 1,
45
};
46
47
struct _JsonScannerConfig
48
{
49
  /* Character sets
50
   */
51
  gchar *cset_skip_characters; /* default: " \t\n" */
52
  gchar *cset_identifier_first;
53
  gchar *cset_identifier_nth;
54
  gchar *cpair_comment_single; /* default: "#\n" */
55
  
56
  /* Should symbol lookup work case sensitive? */
57
  guint case_sensitive : 1;
58
  
59
  /* Boolean values to be adjusted "on the fly"
60
   * to configure scanning behaviour.
61
   */
62
  guint skip_comment_multi : 1;  /* C like comment */
63
  guint skip_comment_single : 1; /* single line comment */
64
  guint scan_comment_multi : 1;  /* scan multi line comments? */
65
  guint scan_identifier : 1;
66
  guint scan_identifier_1char : 1;
67
  guint scan_identifier_NULL : 1;
68
  guint scan_symbols : 1;
69
  guint scan_binary : 1;
70
  guint scan_octal : 1;
71
  guint scan_float : 1;
72
  guint scan_hex : 1;            /* `0x0ff0' */
73
  guint scan_hex_dollar : 1;     /* `$0ff0' */
74
  guint scan_string_sq : 1;      /* string: 'anything' */
75
  guint scan_string_dq : 1;      /* string: "\\-escapes!\n" */
76
  guint numbers_2_int : 1;       /* bin, octal, hex => int */
77
  guint int_2_float : 1;         /* int => G_TOKEN_FLOAT? */
78
  guint identifier_2_string : 1;
79
  guint char_2_token : 1;        /* return G_TOKEN_CHAR? */
80
  guint symbol_2_token : 1;
81
  guint scope_0_fallback : 1;    /* try scope 0 on lookups? */
82
  guint store_int64 : 1;         /* use value.v_int64 rather than v_int */
83
  guint padding_dummy;
84
};
85
86
static JsonScannerConfig json_scanner_config_template =
87
{
88
  .cset_skip_characters = ( " \t\r\n" ),
89
  .cset_identifier_first = (
90
    "_"
91
    G_CSET_a_2_z
92
    G_CSET_A_2_Z
93
  ),
94
  .cset_identifier_nth = (
95
    G_CSET_DIGITS
96
    "-_"
97
    G_CSET_a_2_z
98
    G_CSET_A_2_Z
99
  ),
100
  .cpair_comment_single = ( "//\n" ),
101
  .case_sensitive = TRUE,
102
  .skip_comment_multi = TRUE,
103
  .skip_comment_single = TRUE,
104
  .scan_comment_multi = FALSE,
105
  .scan_identifier = TRUE,
106
  .scan_identifier_1char = TRUE,
107
  .scan_identifier_NULL = FALSE,
108
  .scan_symbols = TRUE,
109
  .scan_binary = TRUE,
110
  .scan_octal = TRUE,
111
  .scan_float = TRUE,
112
  .scan_hex = TRUE,
113
  .scan_hex_dollar = TRUE,
114
  .scan_string_sq = TRUE,
115
  .scan_string_dq = TRUE,
116
  .numbers_2_int = TRUE,
117
  .int_2_float = FALSE,
118
  .identifier_2_string = FALSE,
119
  .char_2_token = TRUE,
120
  .symbol_2_token = TRUE,
121
  .scope_0_fallback = FALSE,
122
  .store_int64 = TRUE,
123
  .padding_dummy = 0,
124
};
125
126
/* --- defines --- */
127
0
#define to_lower(c)       ( \
128
0
  (guchar) (              \
129
0
    ( (((guchar)(c))>='A' && ((guchar)(c))<='Z') * ('a'-'A') ) | \
130
0
    ( (((guchar)(c))>=192 && ((guchar)(c))<=214) * (224-192) ) | \
131
0
    ( (((guchar)(c))>=216 && ((guchar)(c))<=222) * (248-216) ) | \
132
0
    ((guchar)(c))             \
133
0
  )               \
134
0
)
135
136
#define READ_BUFFER_SIZE  (4000)
137
138
/* --- typedefs --- */
139
typedef struct  _JsonScannerKey JsonScannerKey;
140
141
struct  _JsonScannerKey
142
{
143
  guint scope_id;
144
  gchar *symbol;
145
  gpointer value;
146
};
147
148
/* --- prototypes --- */
149
static gboolean json_scanner_key_equal (gconstpointer v1,
150
                                        gconstpointer v2);
151
static guint    json_scanner_key_hash  (gconstpointer v);
152
153
static inline
154
JsonScannerKey *json_scanner_lookup_internal (JsonScanner *scanner,
155
                                              guint        scope_id,
156
                                              const gchar *symbol);
157
static void     json_scanner_get_token_ll    (JsonScanner *scanner,
158
                                              GTokenType  *token_p,
159
                                              GTokenValue *value_p,
160
                                              guint       *line_p,
161
                                              guint       *position_p);
162
static void json_scanner_get_token_i     (JsonScanner *scanner,
163
                                              GTokenType  *token_p,
164
                                              GTokenValue *value_p,
165
                                              guint       *line_p,
166
                                              guint       *position_p);
167
168
static guchar   json_scanner_peek_next_char  (JsonScanner *scanner);
169
static guchar   json_scanner_get_char        (JsonScanner *scanner,
170
                                              guint       *line_p,
171
                                              guint       *position_p);
172
static gunichar json_scanner_get_unichar     (JsonScanner *scanner,
173
                                              guint       *line_p,
174
                                              guint       *position_p);
175
176
/* --- functions --- */
177
static inline gint
178
json_scanner_char_2_num (guchar c,
179
                         guchar base)
180
0
{
181
0
  if (c >= '0' && c <= '9')
182
0
    c -= '0';
183
0
  else if (c >= 'A' && c <= 'Z')
184
0
    c -= 'A' - 10;
185
0
  else if (c >= 'a' && c <= 'z')
186
0
    c -= 'a' - 10;
187
0
  else
188
0
    return -1;
189
  
190
0
  if (c < base)
191
0
    return c;
192
  
193
0
  return -1;
194
0
}
195
196
JsonScanner *
197
json_scanner_new (void)
198
0
{
199
0
  JsonScanner *scanner;
200
0
  JsonScannerConfig *config_templ;
201
  
202
0
  config_templ = &json_scanner_config_template;
203
  
204
0
  scanner = g_new0 (JsonScanner, 1);
205
  
206
0
  scanner->user_data = NULL;
207
0
  scanner->max_parse_errors = 1;
208
0
  scanner->parse_errors = 0;
209
0
  scanner->input_name = NULL;
210
0
  g_datalist_init (&scanner->qdata);
211
  
212
0
  scanner->config = g_new0 (JsonScannerConfig, 1);
213
  
214
0
  scanner->config->case_sensitive  = config_templ->case_sensitive;
215
0
  scanner->config->cset_skip_characters  = config_templ->cset_skip_characters;
216
0
  if (!scanner->config->cset_skip_characters)
217
0
    scanner->config->cset_skip_characters = "";
218
0
  scanner->config->cset_identifier_first = config_templ->cset_identifier_first;
219
0
  scanner->config->cset_identifier_nth   = config_templ->cset_identifier_nth;
220
0
  scanner->config->cpair_comment_single  = config_templ->cpair_comment_single;
221
0
  scanner->config->skip_comment_multi  = config_templ->skip_comment_multi;
222
0
  scanner->config->skip_comment_single   = config_templ->skip_comment_single;
223
0
  scanner->config->scan_comment_multi  = config_templ->scan_comment_multi;
224
0
  scanner->config->scan_identifier   = config_templ->scan_identifier;
225
0
  scanner->config->scan_identifier_1char = config_templ->scan_identifier_1char;
226
0
  scanner->config->scan_identifier_NULL  = config_templ->scan_identifier_NULL;
227
0
  scanner->config->scan_symbols    = config_templ->scan_symbols;
228
0
  scanner->config->scan_binary     = config_templ->scan_binary;
229
0
  scanner->config->scan_octal    = config_templ->scan_octal;
230
0
  scanner->config->scan_float    = config_templ->scan_float;
231
0
  scanner->config->scan_hex    = config_templ->scan_hex;
232
0
  scanner->config->scan_hex_dollar   = config_templ->scan_hex_dollar;
233
0
  scanner->config->scan_string_sq  = config_templ->scan_string_sq;
234
0
  scanner->config->scan_string_dq  = config_templ->scan_string_dq;
235
0
  scanner->config->numbers_2_int   = config_templ->numbers_2_int;
236
0
  scanner->config->int_2_float     = config_templ->int_2_float;
237
0
  scanner->config->identifier_2_string   = config_templ->identifier_2_string;
238
0
  scanner->config->char_2_token    = config_templ->char_2_token;
239
0
  scanner->config->symbol_2_token  = config_templ->symbol_2_token;
240
0
  scanner->config->scope_0_fallback  = config_templ->scope_0_fallback;
241
0
  scanner->config->store_int64     = config_templ->store_int64;
242
  
243
0
  scanner->token = G_TOKEN_NONE;
244
0
  scanner->value.v_int64 = 0;
245
0
  scanner->line = 1;
246
0
  scanner->position = 0;
247
  
248
0
  scanner->next_token = G_TOKEN_NONE;
249
0
  scanner->next_value.v_int64 = 0;
250
0
  scanner->next_line = 1;
251
0
  scanner->next_position = 0;
252
  
253
0
  scanner->symbol_table = g_hash_table_new (json_scanner_key_hash,
254
0
                                            json_scanner_key_equal);
255
0
  scanner->text = NULL;
256
0
  scanner->text_end = NULL;
257
0
  scanner->buffer = NULL;
258
0
  scanner->scope_id = 0;
259
  
260
0
  return scanner;
261
0
}
262
263
static inline void
264
json_scanner_free_value (GTokenType  *token_p,
265
                         GTokenValue *value_p)
266
0
{
267
0
  switch (*token_p)
268
0
    {
269
0
    case G_TOKEN_STRING:
270
0
    case G_TOKEN_IDENTIFIER:
271
0
    case G_TOKEN_IDENTIFIER_NULL:
272
0
    case G_TOKEN_COMMENT_SINGLE:
273
0
    case G_TOKEN_COMMENT_MULTI:
274
0
      g_free (value_p->v_string);
275
0
      break;
276
      
277
0
    default:
278
0
      break;
279
0
    }
280
  
281
0
  *token_p = G_TOKEN_NONE;
282
0
}
283
284
static void
285
json_scanner_destroy_symbol_table_entry (gpointer _key,
286
                                         gpointer _value G_GNUC_UNUSED,
287
                                         gpointer _data G_GNUC_UNUSED)
288
0
{
289
0
  JsonScannerKey *key = _key;
290
  
291
0
  g_free (key->symbol);
292
0
  g_slice_free (JsonScannerKey, key);
293
0
}
294
295
void
296
json_scanner_destroy (JsonScanner *scanner)
297
0
{
298
0
  g_return_if_fail (scanner != NULL);
299
  
300
0
  g_datalist_clear (&scanner->qdata);
301
0
  g_hash_table_foreach (scanner->symbol_table, 
302
0
      json_scanner_destroy_symbol_table_entry,
303
0
                        NULL);
304
0
  g_hash_table_destroy (scanner->symbol_table);
305
0
  json_scanner_free_value (&scanner->token, &scanner->value);
306
0
  json_scanner_free_value (&scanner->next_token, &scanner->next_value);
307
0
  g_free (scanner->config);
308
0
  g_free (scanner->buffer);
309
0
  g_free (scanner);
310
0
}
311
312
void
313
json_scanner_error (JsonScanner *scanner,
314
                    const gchar *format,
315
                    ...)
316
0
{
317
0
  g_return_if_fail (scanner != NULL);
318
0
  g_return_if_fail (format != NULL);
319
  
320
0
  scanner->parse_errors++;
321
  
322
0
  if (scanner->msg_handler)
323
0
    {
324
0
      va_list args;
325
0
      gchar *string;
326
      
327
0
      va_start (args, format);
328
0
      string = g_strdup_vprintf (format, args);
329
0
      va_end (args);
330
      
331
0
      scanner->msg_handler (scanner, string);
332
      
333
0
      g_free (string);
334
0
    }
335
0
}
336
337
static gboolean
338
json_scanner_key_equal (gconstpointer v1,
339
                        gconstpointer v2)
340
0
{
341
0
  const JsonScannerKey *key1 = v1;
342
0
  const JsonScannerKey *key2 = v2;
343
  
344
0
  return (key1->scope_id == key2->scope_id) &&
345
0
         (strcmp (key1->symbol, key2->symbol) == 0);
346
0
}
347
348
static guint
349
json_scanner_key_hash (gconstpointer v)
350
0
{
351
0
  const JsonScannerKey *key = v;
352
0
  gchar *c;
353
0
  guint h;
354
  
355
0
  h = key->scope_id;
356
0
  for (c = key->symbol; *c; c++)
357
0
    h = (h << 5) - h + *c;
358
  
359
0
  return h;
360
0
}
361
362
static inline JsonScannerKey *
363
json_scanner_lookup_internal (JsonScanner *scanner,
364
                              guint        scope_id,
365
                              const gchar *symbol)
366
0
{
367
0
  JsonScannerKey *key_p;
368
0
  JsonScannerKey key;
369
  
370
0
  key.scope_id = scope_id;
371
  
372
0
  if (!scanner->config->case_sensitive)
373
0
    {
374
0
      gchar *d;
375
0
      const gchar *c;
376
      
377
0
      key.symbol = g_new (gchar, strlen (symbol) + 1);
378
0
      for (d = key.symbol, c = symbol; *c; c++, d++)
379
0
  *d = to_lower (*c);
380
0
      *d = 0;
381
0
      key_p = g_hash_table_lookup (scanner->symbol_table, &key);
382
0
      g_free (key.symbol);
383
0
    }
384
0
  else
385
0
    {
386
0
      key.symbol = (gchar*) symbol;
387
0
      key_p = g_hash_table_lookup (scanner->symbol_table, &key);
388
0
    }
389
  
390
0
  return key_p;
391
0
}
392
393
void
394
json_scanner_scope_add_symbol (JsonScanner *scanner,
395
                               guint        scope_id,
396
                               const gchar *symbol,
397
                               gpointer     value)
398
0
{
399
0
  JsonScannerKey *key;
400
401
0
  g_return_if_fail (scanner != NULL);
402
0
  g_return_if_fail (symbol != NULL);
403
404
0
  key = json_scanner_lookup_internal (scanner, scope_id, symbol);
405
0
  if (!key)
406
0
    {
407
0
      key = g_slice_new (JsonScannerKey);
408
0
      key->scope_id = scope_id;
409
0
      key->symbol = g_strdup (symbol);
410
0
      key->value = value;
411
0
      if (!scanner->config->case_sensitive)
412
0
  {
413
0
    gchar *c;
414
415
0
    c = key->symbol;
416
0
    while (*c != 0)
417
0
      {
418
0
        *c = to_lower (*c);
419
0
        c++;
420
0
      }
421
0
  }
422
423
0
      g_hash_table_insert (scanner->symbol_table, key, key);
424
0
    }
425
0
  else
426
0
    key->value = value;
427
0
}
428
429
GTokenType
430
json_scanner_peek_next_token (JsonScanner *scanner)
431
0
{
432
0
  g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
433
434
0
  if (scanner->next_token == G_TOKEN_NONE)
435
0
    {
436
0
      scanner->next_line = scanner->line;
437
0
      scanner->next_position = scanner->position;
438
0
      json_scanner_get_token_i (scanner,
439
0
                                &scanner->next_token,
440
0
                                &scanner->next_value,
441
0
                                &scanner->next_line,
442
0
                                &scanner->next_position);
443
0
    }
444
445
0
  return scanner->next_token;
446
0
}
447
448
GTokenType
449
json_scanner_get_next_token (JsonScanner *scanner)
450
0
{
451
0
  g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
452
453
0
  if (scanner->next_token != G_TOKEN_NONE)
454
0
    {
455
0
      json_scanner_free_value (&scanner->token, &scanner->value);
456
457
0
      scanner->token = scanner->next_token;
458
0
      scanner->value = scanner->next_value;
459
0
      scanner->line = scanner->next_line;
460
0
      scanner->position = scanner->next_position;
461
0
      scanner->next_token = G_TOKEN_NONE;
462
0
    }
463
0
  else
464
0
    json_scanner_get_token_i (scanner,
465
0
                              &scanner->token,
466
0
                              &scanner->value,
467
0
                              &scanner->line,
468
0
                              &scanner->position);
469
470
0
  return scanner->token;
471
0
}
472
473
void
474
json_scanner_input_text (JsonScanner *scanner,
475
                         const gchar *text,
476
                         guint        text_len)
477
0
{
478
0
  g_return_if_fail (scanner != NULL);
479
0
  if (text_len)
480
0
    g_return_if_fail (text != NULL);
481
0
  else
482
0
    text = NULL;
483
484
0
  scanner->token = G_TOKEN_NONE;
485
0
  scanner->value.v_int64 = 0;
486
0
  scanner->line = 1;
487
0
  scanner->position = 0;
488
0
  scanner->next_token = G_TOKEN_NONE;
489
490
0
  scanner->text = text;
491
0
  scanner->text_end = text + text_len;
492
493
0
  if (scanner->buffer)
494
0
    {
495
0
      g_free (scanner->buffer);
496
0
      scanner->buffer = NULL;
497
0
    }
498
0
}
499
500
static guchar
501
json_scanner_peek_next_char (JsonScanner *scanner)
502
0
{
503
0
  if (scanner->text < scanner->text_end)
504
0
    return *scanner->text;
505
0
  else
506
0
    return 0;
507
0
}
508
509
static guchar
510
json_scanner_get_char (JsonScanner *scanner,
511
                       guint       *line_p,
512
                       guint       *position_p)
513
0
{
514
0
  guchar fchar;
515
516
0
  if (scanner->text < scanner->text_end)
517
0
    fchar = *(scanner->text++);
518
0
  else
519
0
    fchar = 0;
520
  
521
0
  if (fchar == '\n')
522
0
    {
523
0
      (*position_p) = 0;
524
0
      (*line_p)++;
525
0
    }
526
0
  else if (fchar)
527
0
    {
528
0
      (*position_p)++;
529
0
    }
530
  
531
0
  return fchar;
532
0
}
533
534
0
#define is_hex_digit(c)         (((c) >= '0' && (c) <= '9') || \
535
0
                                 ((c) >= 'a' && (c) <= 'f') || \
536
0
                                 ((c) >= 'A' && (c) <= 'F'))
537
0
#define to_hex_digit(c)         (((c) <= '9') ? (c) - '0' : ((c) & 7) + 9)
538
539
static gunichar
540
json_scanner_get_unichar (JsonScanner *scanner,
541
                          guint       *line_p,
542
                          guint       *position_p)
543
0
{
544
0
  gunichar uchar;
545
0
  gchar ch;
546
0
  gint i;
547
548
0
  uchar = 0;
549
0
  for (i = 0; i < 4; i++)
550
0
    {
551
0
      ch = json_scanner_get_char (scanner, line_p, position_p);
552
553
0
      if (is_hex_digit (ch))
554
0
        uchar += ((gunichar) to_hex_digit (ch) << ((3 - i) * 4));
555
0
      else
556
0
        break;
557
0
    }
558
559
0
  g_assert (g_unichar_validate (uchar) || g_unichar_type (uchar) == G_UNICODE_SURROGATE);
560
561
0
  return uchar;
562
0
}
563
564
/*
565
 * decode_utf16_surrogate_pair:
566
 * @units: (array length=2): a pair of UTF-16 code points
567
 *
568
 * Decodes a surrogate pair of UTF-16 code points into the equivalent
569
 * Unicode code point.
570
 *
571
 * Returns: the Unicode code point equivalent to the surrogate pair
572
 */
573
static inline gunichar
574
decode_utf16_surrogate_pair (const gunichar units[2])
575
0
{
576
0
  gunichar ucs;
577
578
0
  g_assert (0xd800 <= units[0] && units[0] <= 0xdbff);
579
0
  g_assert (0xdc00 <= units[1] && units[1] <= 0xdfff);
580
581
0
  ucs = 0x10000;
582
0
  ucs += (units[0] & 0x3ff) << 10;
583
0
  ucs += (units[1] & 0x3ff);
584
585
0
  return ucs;
586
0
}
587
588
void
589
json_scanner_unexp_token (JsonScanner *scanner,
590
                          GTokenType   expected_token,
591
                          const gchar *identifier_spec,
592
                          const gchar *symbol_spec,
593
                          const gchar *symbol_name,
594
                          const gchar *message)
595
0
{
596
0
  gchar *token_string;
597
0
  guint token_string_len;
598
0
  gchar *expected_string;
599
0
  guint expected_string_len;
600
0
  gchar *message_prefix;
601
0
  gboolean print_unexp;
602
  
603
0
  g_return_if_fail (scanner != NULL);
604
  
605
0
  if (!identifier_spec)
606
0
    identifier_spec = "identifier";
607
0
  if (!symbol_spec)
608
0
    symbol_spec = "symbol";
609
  
610
0
  token_string_len = 56;
611
0
  token_string = g_new (gchar, token_string_len + 1);
612
0
  expected_string_len = 64;
613
0
  expected_string = g_new (gchar, expected_string_len + 1);
614
0
  print_unexp = TRUE;
615
  
616
0
  switch (scanner->token)
617
0
    {
618
0
    case G_TOKEN_EOF:
619
0
      g_snprintf (token_string, token_string_len, "end of file");
620
0
      break;
621
      
622
0
    default:
623
0
      if (scanner->token >= 1 && scanner->token <= 255)
624
0
  {
625
0
    if ((scanner->token >= ' ' && scanner->token <= '~') ||
626
0
        strchr (scanner->config->cset_identifier_first, scanner->token) ||
627
0
        strchr (scanner->config->cset_identifier_nth, scanner->token))
628
0
      g_snprintf (token_string, token_string_len, "character `%c'", scanner->token);
629
0
    else
630
0
      g_snprintf (token_string, token_string_len, "character `\\%o'", scanner->token);
631
0
    break;
632
0
  }
633
0
      else if (!scanner->config->symbol_2_token)
634
0
  {
635
0
    g_snprintf (token_string, token_string_len, "(unknown) token <%d>", scanner->token);
636
0
    break;
637
0
  }
638
      /* fall through */
639
0
    case G_TOKEN_SYMBOL:
640
0
      if (expected_token == G_TOKEN_SYMBOL ||
641
0
    (scanner->config->symbol_2_token &&
642
0
     expected_token > G_TOKEN_LAST))
643
0
  print_unexp = FALSE;
644
0
      if (symbol_name)
645
0
  g_snprintf (token_string, token_string_len,
646
0
                    "%s%s `%s'",
647
0
                    print_unexp ? "" : "invalid ",
648
0
                    symbol_spec,
649
0
                    symbol_name);
650
0
      else
651
0
  g_snprintf (token_string, token_string_len,
652
0
                    "%s%s",
653
0
                    print_unexp ? "" : "invalid ",
654
0
                    symbol_spec);
655
0
      break;
656
 
657
0
    case G_TOKEN_ERROR:
658
0
      print_unexp = FALSE;
659
0
      expected_token = G_TOKEN_NONE;
660
0
      switch (scanner->value.v_error)
661
0
  {
662
0
  case G_ERR_UNEXP_EOF:
663
0
    g_snprintf (token_string, token_string_len, "scanner: unexpected end of file");
664
0
    break;
665
    
666
0
  case G_ERR_UNEXP_EOF_IN_STRING:
667
0
    g_snprintf (token_string, token_string_len, "scanner: unterminated string constant");
668
0
    break;
669
    
670
0
  case G_ERR_UNEXP_EOF_IN_COMMENT:
671
0
    g_snprintf (token_string, token_string_len, "scanner: unterminated comment");
672
0
    break;
673
    
674
0
  case G_ERR_NON_DIGIT_IN_CONST:
675
0
    g_snprintf (token_string, token_string_len, "scanner: non digit in constant");
676
0
    break;
677
    
678
0
  case G_ERR_FLOAT_RADIX:
679
0
    g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant");
680
0
    break;
681
    
682
0
  case G_ERR_FLOAT_MALFORMED:
683
0
    g_snprintf (token_string, token_string_len, "scanner: malformed floating constant");
684
0
    break;
685
    
686
0
  case G_ERR_DIGIT_RADIX:
687
0
    g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix");
688
0
    break;
689
690
0
  case JSON_ERR_MALFORMED_SURROGATE_PAIR:
691
0
    g_snprintf (token_string, token_string_len, "scanner: malformed surrogate pair");
692
0
    break;
693
694
0
  case G_ERR_UNKNOWN:
695
0
  default:
696
0
    g_snprintf (token_string, token_string_len, "scanner: unknown error");
697
0
    break;
698
0
  }
699
0
      break;
700
      
701
0
    case G_TOKEN_CHAR:
702
0
      g_snprintf (token_string, token_string_len, "character `%c'", scanner->value.v_char);
703
0
      break;
704
      
705
0
    case G_TOKEN_IDENTIFIER:
706
0
    case G_TOKEN_IDENTIFIER_NULL:
707
0
      if (expected_token == G_TOKEN_IDENTIFIER ||
708
0
    expected_token == G_TOKEN_IDENTIFIER_NULL)
709
0
  print_unexp = FALSE;
710
0
      g_snprintf (token_string, token_string_len,
711
0
                  "%s%s `%s'",
712
0
                  print_unexp ? "" : "invalid ",
713
0
                  identifier_spec,
714
0
                  scanner->token == G_TOKEN_IDENTIFIER ? scanner->value.v_string : "null");
715
0
      break;
716
      
717
0
    case G_TOKEN_BINARY:
718
0
    case G_TOKEN_OCTAL:
719
0
    case G_TOKEN_INT:
720
0
    case G_TOKEN_HEX:
721
0
      if (scanner->config->store_int64)
722
0
  g_snprintf (token_string, token_string_len, "number `%" G_GUINT64_FORMAT "'", scanner->value.v_int64);
723
0
      else
724
0
  g_snprintf (token_string, token_string_len, "number `%lu'", scanner->value.v_int);
725
0
      break;
726
      
727
0
    case G_TOKEN_FLOAT:
728
0
      g_snprintf (token_string, token_string_len, "number `%.3f'", scanner->value.v_float);
729
0
      break;
730
      
731
0
    case G_TOKEN_STRING:
732
0
      if (expected_token == G_TOKEN_STRING)
733
0
  print_unexp = FALSE;
734
0
      g_snprintf (token_string, token_string_len,
735
0
                  "%s%sstring constant \"%s\"",
736
0
                  print_unexp ? "" : "invalid ",
737
0
                  scanner->value.v_string[0] == 0 ? "empty " : "",
738
0
                  scanner->value.v_string);
739
0
      token_string[token_string_len - 2] = '"';
740
0
      token_string[token_string_len - 1] = 0;
741
0
      break;
742
      
743
0
    case G_TOKEN_COMMENT_SINGLE:
744
0
    case G_TOKEN_COMMENT_MULTI:
745
0
      g_snprintf (token_string, token_string_len, "comment");
746
0
      break;
747
      
748
0
    case G_TOKEN_NONE:
749
      /* somehow the user's parsing code is screwed, there isn't much
750
       * we can do about it.
751
       * Note, a common case to trigger this is
752
       * json_scanner_peek_next_token(); json_scanner_unexp_token();
753
       * without an intermediate json_scanner_get_next_token().
754
       */
755
0
      g_assert_not_reached ();
756
0
      break;
757
0
    }
758
  
759
  
760
0
  switch (expected_token)
761
0
    {
762
0
      gboolean need_valid;
763
0
      gchar *tstring;
764
0
    case G_TOKEN_EOF:
765
0
      g_snprintf (expected_string, expected_string_len, "end of file");
766
0
      break;
767
0
    default:
768
0
      if (expected_token >= 1 && expected_token <= 255)
769
0
  {
770
0
    if ((expected_token >= ' ' && expected_token <= '~') ||
771
0
        strchr (scanner->config->cset_identifier_first, expected_token) ||
772
0
        strchr (scanner->config->cset_identifier_nth, expected_token))
773
0
      g_snprintf (expected_string, expected_string_len, "character `%c'", expected_token);
774
0
    else
775
0
      g_snprintf (expected_string, expected_string_len, "character `\\%o'", expected_token);
776
0
    break;
777
0
  }
778
0
      else if (!scanner->config->symbol_2_token)
779
0
  {
780
0
    g_snprintf (expected_string, expected_string_len, "(unknown) token <%d>", expected_token);
781
0
    break;
782
0
  }
783
      /* fall through */
784
0
    case G_TOKEN_SYMBOL:
785
0
      need_valid = (scanner->token == G_TOKEN_SYMBOL ||
786
0
        (scanner->config->symbol_2_token &&
787
0
         scanner->token > G_TOKEN_LAST));
788
0
      g_snprintf (expected_string, expected_string_len,
789
0
                  "%s%s",
790
0
                  need_valid ? "valid " : "",
791
0
                  symbol_spec);
792
      /* FIXME: should we attempt to lookup the symbol_name for symbol_2_token? */
793
0
      break;
794
0
    case G_TOKEN_CHAR:
795
0
      g_snprintf (expected_string, expected_string_len, "%scharacter",
796
0
      scanner->token == G_TOKEN_CHAR ? "valid " : "");
797
0
      break;
798
0
    case G_TOKEN_BINARY:
799
0
      tstring = "binary";
800
0
      g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
801
0
      scanner->token == expected_token ? "valid " : "", tstring);
802
0
      break;
803
0
    case G_TOKEN_OCTAL:
804
0
      tstring = "octal";
805
0
      g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
806
0
      scanner->token == expected_token ? "valid " : "", tstring);
807
0
      break;
808
0
    case G_TOKEN_INT:
809
0
      tstring = "integer";
810
0
      g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
811
0
      scanner->token == expected_token ? "valid " : "", tstring);
812
0
      break;
813
0
    case G_TOKEN_HEX:
814
0
      tstring = "hexadecimal";
815
0
      g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
816
0
      scanner->token == expected_token ? "valid " : "", tstring);
817
0
      break;
818
0
    case G_TOKEN_FLOAT:
819
0
      tstring = "float";
820
0
      g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
821
0
      scanner->token == expected_token ? "valid " : "", tstring);
822
0
      break;
823
0
    case G_TOKEN_STRING:
824
0
      g_snprintf (expected_string,
825
0
      expected_string_len,
826
0
      "%sstring constant",
827
0
      scanner->token == G_TOKEN_STRING ? "valid " : "");
828
0
      break;
829
0
    case G_TOKEN_IDENTIFIER:
830
0
    case G_TOKEN_IDENTIFIER_NULL:
831
0
      need_valid = (scanner->token == G_TOKEN_IDENTIFIER_NULL ||
832
0
        scanner->token == G_TOKEN_IDENTIFIER);
833
0
      g_snprintf (expected_string,
834
0
      expected_string_len,
835
0
      "%s%s",
836
0
      need_valid ? "valid " : "",
837
0
      identifier_spec);
838
0
      break;
839
0
    case G_TOKEN_COMMENT_SINGLE:
840
0
      tstring = "single-line";
841
0
      g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
842
0
      scanner->token == expected_token ? "valid " : "", tstring);
843
0
      break;
844
0
    case G_TOKEN_COMMENT_MULTI:
845
0
      tstring = "multi-line";
846
0
      g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
847
0
      scanner->token == expected_token ? "valid " : "", tstring);
848
0
      break;
849
0
    case G_TOKEN_NONE:
850
0
    case G_TOKEN_ERROR:
851
      /* this is handled upon printout */
852
0
      break;
853
0
    }
854
  
855
0
  if (message && message[0] != 0)
856
0
    message_prefix = " - ";
857
0
  else
858
0
    {
859
0
      message_prefix = "";
860
0
      message = "";
861
0
    }
862
0
  if (expected_token == G_TOKEN_ERROR)
863
0
    {
864
0
      json_scanner_error (scanner,
865
0
                          "failure around %s%s%s",
866
0
                          token_string,
867
0
                          message_prefix,
868
0
                          message);
869
0
    }
870
0
  else if (expected_token == G_TOKEN_NONE)
871
0
    {
872
0
      if (print_unexp)
873
0
  json_scanner_error (scanner,
874
0
                            "unexpected %s%s%s",
875
0
                            token_string,
876
0
                            message_prefix,
877
0
                            message);
878
0
      else
879
0
  json_scanner_error (scanner,
880
0
                            "%s%s%s",
881
0
                            token_string,
882
0
                            message_prefix,
883
0
                            message);
884
0
    }
885
0
  else
886
0
    {
887
0
      if (print_unexp)
888
0
  json_scanner_error (scanner,
889
0
                            "unexpected %s, expected %s%s%s",
890
0
                            token_string,
891
0
                            expected_string,
892
0
                            message_prefix,
893
0
                            message);
894
0
      else
895
0
  json_scanner_error (scanner,
896
0
                            "%s, expected %s%s%s",
897
0
                            token_string,
898
0
                            expected_string,
899
0
                            message_prefix,
900
0
                            message);
901
0
    }
902
  
903
0
  g_free (token_string);
904
0
  g_free (expected_string);
905
0
}
906
907
static void
908
json_scanner_get_token_i (JsonScanner *scanner,
909
              GTokenType  *token_p,
910
              GTokenValue *value_p,
911
              guint   *line_p,
912
              guint   *position_p)
913
0
{
914
0
  do
915
0
    {
916
0
      json_scanner_free_value (token_p, value_p);
917
0
      json_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p);
918
0
    }
919
0
  while (((*token_p > 0 && *token_p < 256) &&
920
0
    strchr (scanner->config->cset_skip_characters, *token_p)) ||
921
0
   (*token_p == G_TOKEN_CHAR &&
922
0
    strchr (scanner->config->cset_skip_characters, value_p->v_char)) ||
923
0
   (*token_p == G_TOKEN_COMMENT_MULTI &&
924
0
    scanner->config->skip_comment_multi) ||
925
0
   (*token_p == G_TOKEN_COMMENT_SINGLE &&
926
0
    scanner->config->skip_comment_single));
927
  
928
0
  switch (*token_p)
929
0
    {
930
0
    case G_TOKEN_IDENTIFIER:
931
0
      if (scanner->config->identifier_2_string)
932
0
  *token_p = G_TOKEN_STRING;
933
0
      break;
934
      
935
0
    case G_TOKEN_SYMBOL:
936
0
      if (scanner->config->symbol_2_token)
937
0
  *token_p = GPOINTER_TO_INT (value_p->v_symbol);
938
0
      break;
939
      
940
0
    case G_TOKEN_BINARY:
941
0
    case G_TOKEN_OCTAL:
942
0
    case G_TOKEN_HEX:
943
0
      if (scanner->config->numbers_2_int)
944
0
  *token_p = G_TOKEN_INT;
945
0
      break;
946
      
947
0
    default:
948
0
      break;
949
0
    }
950
  
951
0
  if (*token_p == G_TOKEN_INT &&
952
0
      scanner->config->int_2_float)
953
0
    {
954
0
      *token_p = G_TOKEN_FLOAT;
955
0
      if (scanner->config->store_int64)
956
0
        {
957
#ifdef _MSC_VER
958
          /* work around error C2520, see gvaluetransform.c */
959
          value_p->v_float = (__int64)value_p->v_int64;
960
#else
961
0
          value_p->v_float = value_p->v_int64;
962
0
#endif
963
0
        }
964
0
      else
965
0
  value_p->v_float = value_p->v_int;
966
0
    }
967
  
968
0
  errno = 0;
969
0
}
970
971
static void
972
json_scanner_get_token_ll (JsonScanner *scanner,
973
                           GTokenType  *token_p,
974
                           GTokenValue *value_p,
975
                           guint       *line_p,
976
                           guint       *position_p)
977
0
{
978
0
  JsonScannerConfig *config;
979
0
  GTokenType     token;
980
0
  gboolean     in_comment_multi;
981
0
  gboolean     in_comment_single;
982
0
  gboolean     in_string_sq;
983
0
  gboolean     in_string_dq;
984
0
  GString   *gstring;
985
0
  GTokenValue    value;
986
0
  guchar     ch;
987
  
988
0
  config = scanner->config;
989
0
  (*value_p).v_int64 = 0;
990
  
991
0
  if (scanner->text >= scanner->text_end ||
992
0
      scanner->token == G_TOKEN_EOF)
993
0
    {
994
0
      *token_p = G_TOKEN_EOF;
995
0
      return;
996
0
    }
997
  
998
0
  in_comment_multi = FALSE;
999
0
  in_comment_single = FALSE;
1000
0
  in_string_sq = FALSE;
1001
0
  in_string_dq = FALSE;
1002
0
  gstring = NULL;
1003
  
1004
0
  do /* while (ch != 0) */
1005
0
    {
1006
0
      gboolean dotted_float = FALSE;
1007
      
1008
0
      ch = json_scanner_get_char (scanner, line_p, position_p);
1009
      
1010
0
      value.v_int64 = 0;
1011
0
      token = G_TOKEN_NONE;
1012
      
1013
      /* this is *evil*, but needed ;(
1014
       * we first check for identifier first character, because  it
1015
       * might interfere with other key chars like slashes or numbers
1016
       */
1017
0
      if (config->scan_identifier &&
1018
0
    ch && strchr (config->cset_identifier_first, ch))
1019
0
  goto identifier_precedence;
1020
      
1021
0
      switch (ch)
1022
0
  {
1023
0
  case 0:
1024
0
    token = G_TOKEN_EOF;
1025
0
    (*position_p)++;
1026
    /* ch = 0; */
1027
0
    break;
1028
    
1029
0
  case '/':
1030
0
    if (!config->scan_comment_multi ||
1031
0
        json_scanner_peek_next_char (scanner) != '*')
1032
0
      goto default_case;
1033
0
    json_scanner_get_char (scanner, line_p, position_p);
1034
0
    token = G_TOKEN_COMMENT_MULTI;
1035
0
    in_comment_multi = TRUE;
1036
0
    gstring = g_string_new (NULL);
1037
0
    while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
1038
0
      {
1039
0
        if (ch == '*' && json_scanner_peek_next_char (scanner) == '/')
1040
0
    {
1041
0
      json_scanner_get_char (scanner, line_p, position_p);
1042
0
      in_comment_multi = FALSE;
1043
0
      break;
1044
0
    }
1045
0
        else
1046
0
    gstring = g_string_append_c (gstring, ch);
1047
0
      }
1048
0
    ch = 0;
1049
0
    break;
1050
    
1051
0
  case '\'':
1052
0
    if (!config->scan_string_sq)
1053
0
      goto default_case;
1054
0
    token = G_TOKEN_STRING;
1055
0
    in_string_sq = TRUE;
1056
0
    gstring = g_string_new (NULL);
1057
0
    while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
1058
0
      {
1059
0
        if (ch == '\'')
1060
0
    {
1061
0
      in_string_sq = FALSE;
1062
0
      break;
1063
0
    }
1064
0
        else
1065
0
    gstring = g_string_append_c (gstring, ch);
1066
0
      }
1067
0
    ch = 0;
1068
0
    break;
1069
    
1070
0
  case '"':
1071
0
    if (!config->scan_string_dq)
1072
0
      goto default_case;
1073
0
    token = G_TOKEN_STRING;
1074
0
    in_string_dq = TRUE;
1075
0
    gstring = g_string_new (NULL);
1076
0
    while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
1077
0
      {
1078
0
        if (ch == '"' || token == G_TOKEN_ERROR)
1079
0
    {
1080
0
      in_string_dq = FALSE;
1081
0
      break;
1082
0
    }
1083
0
        else
1084
0
    {
1085
0
      if (ch == '\\')
1086
0
        {
1087
0
          ch = json_scanner_get_char (scanner, line_p, position_p);
1088
0
          switch (ch)
1089
0
      {
1090
0
        guint i;
1091
0
        guint fchar;
1092
        
1093
0
      case 0:
1094
0
        break;
1095
        
1096
0
      case '\\':
1097
0
        gstring = g_string_append_c (gstring, '\\');
1098
0
        break;
1099
        
1100
0
      case 'n':
1101
0
        gstring = g_string_append_c (gstring, '\n');
1102
0
        break;
1103
        
1104
0
      case 't':
1105
0
        gstring = g_string_append_c (gstring, '\t');
1106
0
        break;
1107
        
1108
0
      case 'r':
1109
0
        gstring = g_string_append_c (gstring, '\r');
1110
0
        break;
1111
        
1112
0
      case 'b':
1113
0
        gstring = g_string_append_c (gstring, '\b');
1114
0
        break;
1115
        
1116
0
      case 'f':
1117
0
        gstring = g_string_append_c (gstring, '\f');
1118
0
        break;
1119
1120
0
                        case 'u':
1121
0
                          fchar = json_scanner_peek_next_char (scanner);
1122
0
                          if (is_hex_digit (fchar))
1123
0
                            {
1124
0
                              gunichar ucs;
1125
1126
0
                              ucs = json_scanner_get_unichar (scanner, line_p, position_p);
1127
1128
                              /* resolve UTF-16 surrogates for Unicode characters not in the BMP,
1129
                                * as per ECMA 404, § 9, "String"
1130
                                */
1131
0
                              if (g_unichar_type (ucs) == G_UNICODE_SURROGATE)
1132
0
                                {
1133
                                  /* read next surrogate */
1134
0
                                  if ('\\' == json_scanner_get_char (scanner, line_p, position_p) &&
1135
0
                                      'u' == json_scanner_get_char (scanner, line_p, position_p))
1136
0
                                    {
1137
0
                                      gunichar units[2];
1138
1139
0
                                      units[0] = ucs;
1140
0
                                      units[1] = json_scanner_get_unichar (scanner, line_p, position_p);
1141
1142
0
                                      if (0xdc00 <= units[1] && units[1] <= 0xdfff &&
1143
0
                                          0xd800 <= units[0] && units[0] <= 0xdbff)
1144
0
                                        {
1145
0
                                          ucs = decode_utf16_surrogate_pair (units);
1146
0
                                          g_assert (g_unichar_validate (ucs));
1147
0
                                        }
1148
0
                                      else
1149
0
                                        {
1150
0
                                          token = G_TOKEN_ERROR;
1151
0
                                          value.v_error = JSON_ERR_MALFORMED_SURROGATE_PAIR;
1152
0
                                          gstring = NULL;
1153
0
                                          break;
1154
0
                                        }
1155
1156
0
                                    }
1157
0
                                }
1158
1159
0
                              gstring = g_string_append_unichar (gstring, ucs);
1160
0
                            }
1161
0
                          break;
1162
        
1163
0
      case '0':
1164
0
      case '1':
1165
0
      case '2':
1166
0
      case '3':
1167
0
      case '4':
1168
0
      case '5':
1169
0
      case '6':
1170
0
      case '7':
1171
0
        i = ch - '0';
1172
0
        fchar = json_scanner_peek_next_char (scanner);
1173
0
        if (fchar >= '0' && fchar <= '7')
1174
0
          {
1175
0
            ch = json_scanner_get_char (scanner, line_p, position_p);
1176
0
            i = i * 8 + ch - '0';
1177
0
            fchar = json_scanner_peek_next_char (scanner);
1178
0
            if (fchar >= '0' && fchar <= '7')
1179
0
        {
1180
0
          ch = json_scanner_get_char (scanner, line_p, position_p);
1181
0
          i = i * 8 + ch - '0';
1182
0
        }
1183
0
          }
1184
0
        gstring = g_string_append_c (gstring, i);
1185
0
        break;
1186
        
1187
0
      default:
1188
0
        gstring = g_string_append_c (gstring, ch);
1189
0
        break;
1190
0
      }
1191
0
        }
1192
0
      else
1193
0
        gstring = g_string_append_c (gstring, ch);
1194
0
    }
1195
0
      }
1196
0
    ch = 0;
1197
0
    break;
1198
    
1199
0
  case '.':
1200
0
    if (!config->scan_float)
1201
0
      goto default_case;
1202
0
    token = G_TOKEN_FLOAT;
1203
0
    dotted_float = TRUE;
1204
0
    ch = json_scanner_get_char (scanner, line_p, position_p);
1205
0
    goto number_parsing;
1206
    
1207
0
  case '$':
1208
0
    if (!config->scan_hex_dollar)
1209
0
      goto default_case;
1210
0
    token = G_TOKEN_HEX;
1211
0
    ch = json_scanner_get_char (scanner, line_p, position_p);
1212
0
    goto number_parsing;
1213
    
1214
0
  case '0':
1215
0
    if (config->scan_octal)
1216
0
      token = G_TOKEN_OCTAL;
1217
0
    else
1218
0
      token = G_TOKEN_INT;
1219
0
    ch = json_scanner_peek_next_char (scanner);
1220
0
    if (config->scan_hex && (ch == 'x' || ch == 'X'))
1221
0
      {
1222
0
        token = G_TOKEN_HEX;
1223
0
        json_scanner_get_char (scanner, line_p, position_p);
1224
0
        ch = json_scanner_get_char (scanner, line_p, position_p);
1225
0
        if (ch == 0)
1226
0
    {
1227
0
      token = G_TOKEN_ERROR;
1228
0
      value.v_error = G_ERR_UNEXP_EOF;
1229
0
      (*position_p)++;
1230
0
      break;
1231
0
    }
1232
0
        if (json_scanner_char_2_num (ch, 16) < 0)
1233
0
    {
1234
0
      token = G_TOKEN_ERROR;
1235
0
      value.v_error = G_ERR_DIGIT_RADIX;
1236
0
      ch = 0;
1237
0
      break;
1238
0
    }
1239
0
      }
1240
0
    else if (config->scan_binary && (ch == 'b' || ch == 'B'))
1241
0
      {
1242
0
        token = G_TOKEN_BINARY;
1243
0
        json_scanner_get_char (scanner, line_p, position_p);
1244
0
        ch = json_scanner_get_char (scanner, line_p, position_p);
1245
0
        if (ch == 0)
1246
0
    {
1247
0
      token = G_TOKEN_ERROR;
1248
0
      value.v_error = G_ERR_UNEXP_EOF;
1249
0
      (*position_p)++;
1250
0
      break;
1251
0
    }
1252
0
        if (json_scanner_char_2_num (ch, 10) < 0)
1253
0
    {
1254
0
      token = G_TOKEN_ERROR;
1255
0
      value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1256
0
      ch = 0;
1257
0
      break;
1258
0
    }
1259
0
      }
1260
0
    else
1261
0
      ch = '0';
1262
    /* fall through */
1263
0
  case '1':
1264
0
  case '2':
1265
0
  case '3':
1266
0
  case '4':
1267
0
  case '5':
1268
0
  case '6':
1269
0
  case '7':
1270
0
  case '8':
1271
0
  case '9':
1272
0
  number_parsing:
1273
0
  {
1274
0
          gboolean in_number = TRUE;
1275
0
    gchar *endptr;
1276
    
1277
0
    if (token == G_TOKEN_NONE)
1278
0
      token = G_TOKEN_INT;
1279
    
1280
0
    gstring = g_string_new (dotted_float ? "0." : "");
1281
0
    gstring = g_string_append_c (gstring, ch);
1282
    
1283
0
    do /* while (in_number) */
1284
0
      {
1285
0
        gboolean is_E;
1286
        
1287
0
        is_E = token == G_TOKEN_FLOAT && (ch == 'e' || ch == 'E');
1288
        
1289
0
        ch = json_scanner_peek_next_char (scanner);
1290
        
1291
0
        if (json_scanner_char_2_num (ch, 36) >= 0 ||
1292
0
      (config->scan_float && ch == '.') ||
1293
0
      (is_E && (ch == '+' || ch == '-')))
1294
0
    {
1295
0
      ch = json_scanner_get_char (scanner, line_p, position_p);
1296
      
1297
0
      switch (ch)
1298
0
        {
1299
0
        case '.':
1300
0
          if (token != G_TOKEN_INT && token != G_TOKEN_OCTAL)
1301
0
      {
1302
0
        value.v_error = token == G_TOKEN_FLOAT ? G_ERR_FLOAT_MALFORMED : G_ERR_FLOAT_RADIX;
1303
0
        token = G_TOKEN_ERROR;
1304
0
        in_number = FALSE;
1305
0
      }
1306
0
          else
1307
0
      {
1308
0
        token = G_TOKEN_FLOAT;
1309
0
        gstring = g_string_append_c (gstring, ch);
1310
0
      }
1311
0
          break;
1312
          
1313
0
        case '0':
1314
0
        case '1':
1315
0
        case '2':
1316
0
        case '3':
1317
0
        case '4':
1318
0
        case '5':
1319
0
        case '6':
1320
0
        case '7':
1321
0
        case '8':
1322
0
        case '9':
1323
0
          gstring = g_string_append_c (gstring, ch);
1324
0
          break;
1325
          
1326
0
        case '-':
1327
0
        case '+':
1328
0
          if (token != G_TOKEN_FLOAT)
1329
0
      {
1330
0
        token = G_TOKEN_ERROR;
1331
0
        value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1332
0
        in_number = FALSE;
1333
0
      }
1334
0
          else
1335
0
      gstring = g_string_append_c (gstring, ch);
1336
0
          break;
1337
          
1338
0
        case 'e':
1339
0
        case 'E':
1340
0
          if ((token != G_TOKEN_HEX && !config->scan_float) ||
1341
0
        (token != G_TOKEN_HEX &&
1342
0
         token != G_TOKEN_OCTAL &&
1343
0
         token != G_TOKEN_FLOAT &&
1344
0
         token != G_TOKEN_INT))
1345
0
      {
1346
0
        token = G_TOKEN_ERROR;
1347
0
        value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1348
0
        in_number = FALSE;
1349
0
      }
1350
0
          else
1351
0
      {
1352
0
        if (token != G_TOKEN_HEX)
1353
0
          token = G_TOKEN_FLOAT;
1354
0
        gstring = g_string_append_c (gstring, ch);
1355
0
      }
1356
0
          break;
1357
          
1358
0
        default:
1359
0
          if (token != G_TOKEN_HEX)
1360
0
      {
1361
0
        token = G_TOKEN_ERROR;
1362
0
        value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1363
0
        in_number = FALSE;
1364
0
      }
1365
0
          else
1366
0
      gstring = g_string_append_c (gstring, ch);
1367
0
          break;
1368
0
        }
1369
0
    }
1370
0
        else
1371
0
    in_number = FALSE;
1372
0
      }
1373
0
    while (in_number);
1374
    
1375
0
    endptr = NULL;
1376
0
    if (token == G_TOKEN_FLOAT)
1377
0
      value.v_float = g_strtod (gstring->str, &endptr);
1378
0
    else
1379
0
      {
1380
0
        guint64 ui64 = 0;
1381
0
        switch (token)
1382
0
    {
1383
0
    case G_TOKEN_BINARY:
1384
0
      ui64 = g_ascii_strtoull (gstring->str, &endptr, 2);
1385
0
      break;
1386
0
    case G_TOKEN_OCTAL:
1387
0
      ui64 = g_ascii_strtoull (gstring->str, &endptr, 8);
1388
0
      break;
1389
0
    case G_TOKEN_INT:
1390
0
      ui64 = g_ascii_strtoull (gstring->str, &endptr, 10);
1391
0
      break;
1392
0
    case G_TOKEN_HEX:
1393
0
      ui64 = g_ascii_strtoull (gstring->str, &endptr, 16);
1394
0
      break;
1395
0
    default: ;
1396
0
    }
1397
0
        if (scanner->config->store_int64)
1398
0
    value.v_int64 = ui64;
1399
0
        else
1400
0
    value.v_int = ui64;
1401
0
      }
1402
0
    if (endptr && *endptr)
1403
0
      {
1404
0
        token = G_TOKEN_ERROR;
1405
0
        if (*endptr == 'e' || *endptr == 'E')
1406
0
    value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1407
0
        else
1408
0
    value.v_error = G_ERR_DIGIT_RADIX;
1409
0
      }
1410
0
    g_string_free (gstring, TRUE);
1411
0
    gstring = NULL;
1412
0
    ch = 0;
1413
0
  } /* number_parsing:... */
1414
0
  break;
1415
  
1416
0
  default:
1417
0
  default_case:
1418
0
  {
1419
0
    if (config->cpair_comment_single &&
1420
0
        ch == config->cpair_comment_single[0])
1421
0
      {
1422
0
        token = G_TOKEN_COMMENT_SINGLE;
1423
0
        in_comment_single = TRUE;
1424
0
        gstring = g_string_new (NULL);
1425
0
        ch = json_scanner_get_char (scanner, line_p, position_p);
1426
0
        while (ch != 0)
1427
0
    {
1428
0
      if (ch == config->cpair_comment_single[1])
1429
0
        {
1430
0
          in_comment_single = FALSE;
1431
0
          ch = 0;
1432
0
          break;
1433
0
        }
1434
      
1435
0
      gstring = g_string_append_c (gstring, ch);
1436
0
      ch = json_scanner_get_char (scanner, line_p, position_p);
1437
0
    }
1438
        /* ignore a missing newline at EOF for single line comments */
1439
0
        if (in_comment_single &&
1440
0
      config->cpair_comment_single[1] == '\n')
1441
0
    in_comment_single = FALSE;
1442
0
      }
1443
0
    else if (config->scan_identifier && ch &&
1444
0
       strchr (config->cset_identifier_first, ch))
1445
0
      {
1446
0
      identifier_precedence:
1447
        
1448
0
        if (config->cset_identifier_nth && ch &&
1449
0
      strchr (config->cset_identifier_nth,
1450
0
        json_scanner_peek_next_char (scanner)))
1451
0
    {
1452
0
      token = G_TOKEN_IDENTIFIER;
1453
0
      gstring = g_string_new (NULL);
1454
0
      gstring = g_string_append_c (gstring, ch);
1455
0
      do
1456
0
        {
1457
0
          ch = json_scanner_get_char (scanner, line_p, position_p);
1458
0
          gstring = g_string_append_c (gstring, ch);
1459
0
          ch = json_scanner_peek_next_char (scanner);
1460
0
        }
1461
0
      while (ch && strchr (config->cset_identifier_nth, ch));
1462
0
      ch = 0;
1463
0
    }
1464
0
        else if (config->scan_identifier_1char)
1465
0
    {
1466
0
      token = G_TOKEN_IDENTIFIER;
1467
0
      value.v_identifier = g_new0 (gchar, 2);
1468
0
      value.v_identifier[0] = ch;
1469
0
      ch = 0;
1470
0
    }
1471
0
      }
1472
0
    if (ch)
1473
0
      {
1474
0
        if (config->char_2_token)
1475
0
    token = ch;
1476
0
        else
1477
0
    {
1478
0
      token = G_TOKEN_CHAR;
1479
0
      value.v_char = ch;
1480
0
    }
1481
0
        ch = 0;
1482
0
      }
1483
0
  } /* default_case:... */
1484
0
  break;
1485
0
  }
1486
0
      g_assert (ch == 0 && token != G_TOKEN_NONE); /* paranoid */
1487
0
    }
1488
0
  while (ch != 0);
1489
  
1490
0
  if (in_comment_multi || in_comment_single ||
1491
0
      in_string_sq || in_string_dq)
1492
0
    {
1493
0
      token = G_TOKEN_ERROR;
1494
0
      if (gstring)
1495
0
  {
1496
0
    g_string_free (gstring, TRUE);
1497
0
    gstring = NULL;
1498
0
  }
1499
0
      (*position_p)++;
1500
0
      if (in_comment_multi || in_comment_single)
1501
0
  value.v_error = G_ERR_UNEXP_EOF_IN_COMMENT;
1502
0
      else /* (in_string_sq || in_string_dq) */
1503
0
  value.v_error = G_ERR_UNEXP_EOF_IN_STRING;
1504
0
    }
1505
  
1506
0
  if (gstring)
1507
0
    {
1508
0
      value.v_string = g_string_free (gstring, FALSE);
1509
0
      gstring = NULL;
1510
0
    }
1511
  
1512
0
  if (token == G_TOKEN_IDENTIFIER)
1513
0
    {
1514
0
      if (config->scan_symbols)
1515
0
  {
1516
0
    JsonScannerKey *key;
1517
0
    guint scope_id;
1518
    
1519
0
    scope_id = scanner->scope_id;
1520
0
    key = json_scanner_lookup_internal (scanner, scope_id, value.v_identifier);
1521
0
    if (!key && scope_id && scanner->config->scope_0_fallback)
1522
0
      key = json_scanner_lookup_internal (scanner, 0, value.v_identifier);
1523
    
1524
0
    if (key)
1525
0
      {
1526
0
        g_free (value.v_identifier);
1527
0
        token = G_TOKEN_SYMBOL;
1528
0
        value.v_symbol = key->value;
1529
0
      }
1530
0
  }
1531
      
1532
0
      if (token == G_TOKEN_IDENTIFIER &&
1533
0
    config->scan_identifier_NULL &&
1534
0
    strlen (value.v_identifier) == 4)
1535
0
  {
1536
0
    gchar *null_upper = "NULL";
1537
0
    gchar *null_lower = "null";
1538
    
1539
0
    if (scanner->config->case_sensitive)
1540
0
      {
1541
0
        if (value.v_identifier[0] == null_upper[0] &&
1542
0
      value.v_identifier[1] == null_upper[1] &&
1543
0
      value.v_identifier[2] == null_upper[2] &&
1544
0
      value.v_identifier[3] == null_upper[3])
1545
0
    token = G_TOKEN_IDENTIFIER_NULL;
1546
0
      }
1547
0
    else
1548
0
      {
1549
0
        if ((value.v_identifier[0] == null_upper[0] ||
1550
0
       value.v_identifier[0] == null_lower[0]) &&
1551
0
      (value.v_identifier[1] == null_upper[1] ||
1552
0
       value.v_identifier[1] == null_lower[1]) &&
1553
0
      (value.v_identifier[2] == null_upper[2] ||
1554
0
       value.v_identifier[2] == null_lower[2]) &&
1555
0
      (value.v_identifier[3] == null_upper[3] ||
1556
0
       value.v_identifier[3] == null_lower[3]))
1557
0
    token = G_TOKEN_IDENTIFIER_NULL;
1558
0
      }
1559
0
  }
1560
0
    }
1561
  
1562
0
  *token_p = token;
1563
0
  *value_p = value;
1564
0
}