Coverage Report

Created: 2025-07-18 06:08

/src/tinysparql/subprojects/glib-2.80.3/glib/gscanner.c
Line
Count
Source (jump to first uncovered line)
1
/* GLIB - Library of useful routines for C programming
2
 * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
3
 *
4
 * GScanner: Flexible lexical scanner for general purpose.
5
 * Copyright (C) 1997, 1998 Tim Janik
6
 *
7
 * SPDX-License-Identifier: LGPL-2.1-or-later
8
 *
9
 * This library is free software; you can redistribute it and/or
10
 * modify it under the terms of the GNU Lesser General Public
11
 * License as published by the Free Software Foundation; either
12
 * version 2.1 of the License, or (at your option) any later version.
13
 *
14
 * This library is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 * Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
/*
24
 * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
25
 * file for a list of people on the GLib Team.  See the ChangeLog
26
 * files for a list of changes.  These files are distributed with
27
 * GLib at ftp://ftp.gtk.org/pub/gtk/.
28
 */
29
30
/*
31
 * MT safe
32
 */
33
34
#include "config.h"
35
36
#include <errno.h>
37
#include <stdlib.h>
38
#include <stdarg.h>
39
#include <string.h>
40
#include <stdio.h>
41
42
#include "gscanner.h"
43
44
#include "gprintfint.h"
45
#include "gstrfuncs.h"
46
#include "gstring.h"
47
#include "gtestutils.h"
48
49
#ifdef G_OS_UNIX
50
#include <unistd.h>
51
#endif
52
#ifdef G_OS_WIN32
53
#include <io.h>
54
#endif
55
56
57
/**
58
 * GScannerMsgFunc:
59
 * @scanner: a #GScanner
60
 * @message: the message
61
 * @error: %TRUE if the message signals an error,
62
 *     %FALSE if it signals a warning.
63
 *
64
 * Specifies the type of the message handler function.
65
 */
66
67
/**
68
 * G_CSET_a_2_z:
69
 *
70
 * The set of lowercase ASCII alphabet characters.
71
 * Used for specifying valid identifier characters
72
 * in #GScannerConfig.
73
 */
74
75
/**
76
 * G_CSET_A_2_Z:
77
 *
78
 * The set of uppercase ASCII alphabet characters.
79
 * Used for specifying valid identifier characters
80
 * in #GScannerConfig.
81
 */
82
83
/**
84
 * G_CSET_DIGITS:
85
 *
86
 * The set of ASCII digits.
87
 * Used for specifying valid identifier characters
88
 * in #GScannerConfig.
89
 */
90
91
/**
92
 * G_CSET_LATINC:
93
 *
94
 * The set of uppercase ISO 8859-1 alphabet characters
95
 * which are not ASCII characters.
96
 * Used for specifying valid identifier characters
97
 * in #GScannerConfig.
98
 */
99
100
/**
101
 * G_CSET_LATINS:
102
 *
103
 * The set of lowercase ISO 8859-1 alphabet characters
104
 * which are not ASCII characters.
105
 * Used for specifying valid identifier characters
106
 * in #GScannerConfig.
107
 */
108
109
/**
110
 * GTokenType:
111
 * @G_TOKEN_EOF: the end of the file
112
 * @G_TOKEN_LEFT_PAREN: a '(' character
113
 * @G_TOKEN_LEFT_CURLY: a '{' character
114
 * @G_TOKEN_LEFT_BRACE: a '[' character
115
 * @G_TOKEN_RIGHT_CURLY: a '}' character
116
 * @G_TOKEN_RIGHT_PAREN: a ')' character
117
 * @G_TOKEN_RIGHT_BRACE: a ']' character
118
 * @G_TOKEN_EQUAL_SIGN: a '=' character
119
 * @G_TOKEN_COMMA: a ',' character
120
 * @G_TOKEN_NONE: not a token
121
 * @G_TOKEN_ERROR: an error occurred
122
 * @G_TOKEN_CHAR: a character
123
 * @G_TOKEN_BINARY: a binary integer
124
 * @G_TOKEN_OCTAL: an octal integer
125
 * @G_TOKEN_INT: an integer
126
 * @G_TOKEN_HEX: a hex integer
127
 * @G_TOKEN_FLOAT: a floating point number
128
 * @G_TOKEN_STRING: a string
129
 * @G_TOKEN_SYMBOL: a symbol
130
 * @G_TOKEN_IDENTIFIER: an identifier
131
 * @G_TOKEN_IDENTIFIER_NULL: a null identifier
132
 * @G_TOKEN_COMMENT_SINGLE: one line comment
133
 * @G_TOKEN_COMMENT_MULTI: multi line comment
134
 *
135
 * The possible types of token returned from each
136
 * g_scanner_get_next_token() call.
137
 */
138
139
/**
140
 * GTokenValue:
141
 * @v_symbol: token symbol value
142
 * @v_identifier: token identifier value
143
 * @v_binary: token binary integer value
144
 * @v_octal: octal integer value
145
 * @v_int: integer value
146
 * @v_int64: 64-bit integer value
147
 * @v_float: floating point value
148
 * @v_hex: hex integer value
149
 * @v_string: string value
150
 * @v_comment: comment value
151
 * @v_char: character value
152
 * @v_error: error value
153
 *
154
 * A union holding the value of the token.
155
 */
156
157
/**
158
 * GErrorType:
159
 * @G_ERR_UNKNOWN: unknown error
160
 * @G_ERR_UNEXP_EOF: unexpected end of file
161
 * @G_ERR_UNEXP_EOF_IN_STRING: unterminated string constant
162
 * @G_ERR_UNEXP_EOF_IN_COMMENT: unterminated comment
163
 * @G_ERR_NON_DIGIT_IN_CONST: non-digit character in a number
164
 * @G_ERR_DIGIT_RADIX: digit beyond radix in a number
165
 * @G_ERR_FLOAT_RADIX: non-decimal floating point number
166
 * @G_ERR_FLOAT_MALFORMED: malformed floating point number
167
 *
168
 * The possible errors, used in the @v_error field
169
 * of #GTokenValue, when the token is a %G_TOKEN_ERROR.
170
 */
171
172
/**
173
 * GScanner:
174
 * @user_data: unused
175
 * @max_parse_errors: unused
176
 * @parse_errors: g_scanner_error() increments this field
177
 * @input_name: name of input stream, featured by the default message handler
178
 * @qdata: quarked data
179
 * @config: link into the scanner configuration
180
 * @token: token parsed by the last g_scanner_get_next_token()
181
 * @value: value of the last token from g_scanner_get_next_token()
182
 * @line: line number of the last token from g_scanner_get_next_token()
183
 * @position: char number of the last token from g_scanner_get_next_token()
184
 * @next_token: token parsed by the last g_scanner_peek_next_token()
185
 * @next_value: value of the last token from g_scanner_peek_next_token()
186
 * @next_line: line number of the last token from g_scanner_peek_next_token()
187
 * @next_position: char number of the last token from g_scanner_peek_next_token()
188
 * @msg_handler: handler function for _warn and _error
189
 *
190
 * `GScanner` provides a general-purpose lexical scanner.
191
 *
192
 * You should set @input_name after creating the scanner, since
193
 * it is used by the default message handler when displaying
194
 * warnings and errors. If you are scanning a file, the filename
195
 * would be a good choice.
196
 *
197
 * The @user_data and @max_parse_errors fields are not used.
198
 * If you need to associate extra data with the scanner you
199
 * can place them here.
200
 *
201
 * If you want to use your own message handler you can set the
202
 * @msg_handler field. The type of the message handler function
203
 * is declared by #GScannerMsgFunc.
204
 */
205
206
/**
207
 * GScannerConfig:
208
 * @cset_skip_characters: specifies which characters should be skipped
209
 *     by the scanner (the default is the whitespace characters: space,
210
 *     tab, carriage-return and line-feed).
211
 * @cset_identifier_first: specifies the characters which can start
212
 *     identifiers (the default is %G_CSET_a_2_z, "_", and %G_CSET_A_2_Z).
213
 * @cset_identifier_nth: specifies the characters which can be used
214
 *     in identifiers, after the first character (the default is
215
 *     %G_CSET_a_2_z, "_0123456789", %G_CSET_A_2_Z, %G_CSET_LATINS,
216
 *     %G_CSET_LATINC).
217
 * @cpair_comment_single: specifies the characters at the start and
218
 *     end of single-line comments. The default is "#\n" which means
219
 *     that single-line comments start with a '#' and continue until
220
 *     a '\n' (end of line).
221
 * @case_sensitive: specifies if symbols are case sensitive (the
222
 *     default is %FALSE).
223
 * @skip_comment_multi: specifies if multi-line comments are skipped
224
 *     and not returned as tokens (the default is %TRUE).
225
 * @skip_comment_single: specifies if single-line comments are skipped
226
 *     and not returned as tokens (the default is %TRUE).
227
 * @scan_comment_multi: specifies if multi-line comments are recognized
228
 *     (the default is %TRUE).
229
 * @scan_identifier: specifies if identifiers are recognized (the
230
 *     default is %TRUE).
231
 * @scan_identifier_1char: specifies if single-character
232
 *     identifiers are recognized (the default is %FALSE).
233
 * @scan_identifier_NULL: specifies if %NULL is reported as
234
 *     %G_TOKEN_IDENTIFIER_NULL (the default is %FALSE).
235
 * @scan_symbols: specifies if symbols are recognized (the default
236
 *     is %TRUE).
237
 * @scan_binary: specifies if binary numbers are recognized (the
238
 *     default is %FALSE).
239
 * @scan_octal: specifies if octal numbers are recognized (the
240
 *     default is %TRUE).
241
 * @scan_float: specifies if floating point numbers are recognized
242
 *     (the default is %TRUE).
243
 * @scan_hex: specifies if hexadecimal numbers are recognized (the
244
 *     default is %TRUE).
245
 * @scan_hex_dollar: specifies if '$' is recognized as a prefix for
246
 *     hexadecimal numbers (the default is %FALSE).
247
 * @scan_string_sq: specifies if strings can be enclosed in single
248
 *     quotes (the default is %TRUE).
249
 * @scan_string_dq: specifies if strings can be enclosed in double
250
 *     quotes (the default is %TRUE).
251
 * @numbers_2_int: specifies if binary, octal and hexadecimal numbers
252
 *     are reported as %G_TOKEN_INT (the default is %TRUE).
253
 * @int_2_float: specifies if all numbers are reported as %G_TOKEN_FLOAT
254
 *     (the default is %FALSE).
255
 * @identifier_2_string: specifies if identifiers are reported as strings
256
 *     (the default is %FALSE).
257
 * @char_2_token: specifies if characters are reported by setting
258
 *     `token = ch` or as %G_TOKEN_CHAR (the default is %TRUE).
259
 * @symbol_2_token: specifies if symbols are reported by setting
260
 *     `token = v_symbol` or as %G_TOKEN_SYMBOL (the default is %FALSE).
261
 * @scope_0_fallback: specifies if a symbol is searched for in the
262
 *     default scope in addition to the current scope (the default is %FALSE).
263
 * @store_int64: use value.v_int64 rather than v_int
264
 *
265
 * Specifies the #GScanner parser configuration. Most settings can
266
 * be changed during the parsing phase and will affect the lexical
267
 * parsing of the next unpeeked token.
268
 */
269
270
/* --- defines --- */
271
0
#define to_lower(c)       ( \
272
0
  (guchar) (              \
273
0
    ( (((guchar)(c))>='A' && ((guchar)(c))<='Z') * ('a'-'A') ) | \
274
0
    ( (((guchar)(c))>=192 && ((guchar)(c))<=214) * (224-192) ) | \
275
0
    ( (((guchar)(c))>=216 && ((guchar)(c))<=222) * (248-216) ) | \
276
0
    ((guchar)(c))             \
277
0
  )               \
278
0
)
279
0
#define READ_BUFFER_SIZE  (4000)
280
281
282
/* --- typedefs --- */
283
typedef struct  _GScannerKey  GScannerKey;
284
285
struct  _GScannerKey
286
{
287
  guint    scope_id;
288
  gchar   *symbol;
289
  gpointer   value;
290
};
291
292
293
/* --- variables --- */
294
static const GScannerConfig g_scanner_config_template =
295
{
296
  (
297
   " \t\r\n"
298
   )      /* cset_skip_characters */,
299
  (
300
   G_CSET_a_2_z
301
   "_"
302
   G_CSET_A_2_Z
303
   )      /* cset_identifier_first */,
304
  (
305
   G_CSET_a_2_z
306
   "_"
307
   G_CSET_A_2_Z
308
   G_CSET_DIGITS
309
   G_CSET_LATINS
310
   G_CSET_LATINC
311
   )      /* cset_identifier_nth */,
312
  ( "#\n" )   /* cpair_comment_single */,
313
  
314
  FALSE     /* case_sensitive */,
315
  
316
  TRUE      /* skip_comment_multi */,
317
  TRUE      /* skip_comment_single */,
318
  TRUE      /* scan_comment_multi */,
319
  TRUE      /* scan_identifier */,
320
  FALSE     /* scan_identifier_1char */,
321
  FALSE     /* scan_identifier_NULL */,
322
  TRUE      /* scan_symbols */,
323
  FALSE     /* scan_binary */,
324
  TRUE      /* scan_octal */,
325
  TRUE      /* scan_float */,
326
  TRUE      /* scan_hex */,
327
  FALSE     /* scan_hex_dollar */,
328
  TRUE      /* scan_string_sq */,
329
  TRUE      /* scan_string_dq */,
330
  TRUE      /* numbers_2_int */,
331
  FALSE     /* int_2_float */,
332
  FALSE     /* identifier_2_string */,
333
  TRUE      /* char_2_token */,
334
  FALSE     /* symbol_2_token */,
335
  FALSE     /* scope_0_fallback */,
336
  FALSE     /* store_int64 */,
337
  0         /* padding_dummy */
338
};
339
340
341
/* --- prototypes --- */
342
static inline
343
GScannerKey*  g_scanner_lookup_internal (GScanner *scanner,
344
             guint   scope_id,
345
             const gchar  *symbol);
346
static gboolean g_scanner_key_equal   (gconstpointer v1,
347
             gconstpointer v2);
348
static guint  g_scanner_key_hash    (gconstpointer v);
349
static void g_scanner_get_token_ll    (GScanner *scanner,
350
             GTokenType *token_p,
351
             GTokenValue  *value_p,
352
             guint  *line_p,
353
             guint  *position_p);
354
static void g_scanner_get_token_i   (GScanner *scanner,
355
             GTokenType *token_p,
356
             GTokenValue  *value_p,
357
             guint  *line_p,
358
             guint  *position_p);
359
360
static guchar g_scanner_peek_next_char  (GScanner *scanner);
361
static guchar g_scanner_get_char    (GScanner *scanner,
362
             guint  *line_p,
363
             guint  *position_p);
364
static void g_scanner_msg_handler   (GScanner *scanner,
365
             gchar  *message,
366
             gboolean  is_error);
367
368
369
/* --- functions --- */
370
static inline gint
371
g_scanner_char_2_num (guchar  c,
372
          guchar  base)
373
0
{
374
0
  if (c >= '0' && c <= '9')
375
0
    c -= '0';
376
0
  else if (c >= 'A' && c <= 'Z')
377
0
    c -= 'A' - 10;
378
0
  else if (c >= 'a' && c <= 'z')
379
0
    c -= 'a' - 10;
380
0
  else
381
0
    return -1;
382
  
383
0
  if (c < base)
384
0
    return c;
385
  
386
0
  return -1;
387
0
}
388
389
/**
390
 * g_scanner_new:
391
 * @config_templ: the initial scanner settings
392
 *
393
 * Creates a new #GScanner.
394
 *
395
 * The @config_templ structure specifies the initial settings
396
 * of the scanner, which are copied into the #GScanner
397
 * @config field. If you pass %NULL then the default settings
398
 * are used.
399
 *
400
 * Returns: the new #GScanner
401
 */
402
GScanner *
403
g_scanner_new (const GScannerConfig *config_templ)
404
0
{
405
0
  GScanner *scanner;
406
  
407
0
  if (!config_templ)
408
0
    config_templ = &g_scanner_config_template;
409
  
410
0
  scanner = g_new0 (GScanner, 1);
411
  
412
0
  scanner->user_data = NULL;
413
0
  scanner->max_parse_errors = 1;
414
0
  scanner->parse_errors = 0;
415
0
  scanner->input_name = NULL;
416
0
  g_datalist_init (&scanner->qdata);
417
  
418
0
  scanner->config = g_new0 (GScannerConfig, 1);
419
  
420
0
  scanner->config->case_sensitive  = config_templ->case_sensitive;
421
0
  scanner->config->cset_skip_characters  = config_templ->cset_skip_characters;
422
0
  if (!scanner->config->cset_skip_characters)
423
0
    scanner->config->cset_skip_characters = "";
424
0
  scanner->config->cset_identifier_first = config_templ->cset_identifier_first;
425
0
  scanner->config->cset_identifier_nth   = config_templ->cset_identifier_nth;
426
0
  scanner->config->cpair_comment_single  = config_templ->cpair_comment_single;
427
0
  scanner->config->skip_comment_multi  = config_templ->skip_comment_multi;
428
0
  scanner->config->skip_comment_single   = config_templ->skip_comment_single;
429
0
  scanner->config->scan_comment_multi  = config_templ->scan_comment_multi;
430
0
  scanner->config->scan_identifier   = config_templ->scan_identifier;
431
0
  scanner->config->scan_identifier_1char = config_templ->scan_identifier_1char;
432
0
  scanner->config->scan_identifier_NULL  = config_templ->scan_identifier_NULL;
433
0
  scanner->config->scan_symbols    = config_templ->scan_symbols;
434
0
  scanner->config->scan_binary     = config_templ->scan_binary;
435
0
  scanner->config->scan_octal    = config_templ->scan_octal;
436
0
  scanner->config->scan_float    = config_templ->scan_float;
437
0
  scanner->config->scan_hex    = config_templ->scan_hex;
438
0
  scanner->config->scan_hex_dollar   = config_templ->scan_hex_dollar;
439
0
  scanner->config->scan_string_sq  = config_templ->scan_string_sq;
440
0
  scanner->config->scan_string_dq  = config_templ->scan_string_dq;
441
0
  scanner->config->numbers_2_int   = config_templ->numbers_2_int;
442
0
  scanner->config->int_2_float     = config_templ->int_2_float;
443
0
  scanner->config->identifier_2_string   = config_templ->identifier_2_string;
444
0
  scanner->config->char_2_token    = config_templ->char_2_token;
445
0
  scanner->config->symbol_2_token  = config_templ->symbol_2_token;
446
0
  scanner->config->scope_0_fallback  = config_templ->scope_0_fallback;
447
0
  scanner->config->store_int64     = config_templ->store_int64;
448
  
449
0
  scanner->token = G_TOKEN_NONE;
450
0
  scanner->value.v_int64 = 0;
451
0
  scanner->line = 1;
452
0
  scanner->position = 0;
453
  
454
0
  scanner->next_token = G_TOKEN_NONE;
455
0
  scanner->next_value.v_int64 = 0;
456
0
  scanner->next_line = 1;
457
0
  scanner->next_position = 0;
458
  
459
0
  scanner->symbol_table = g_hash_table_new (g_scanner_key_hash, g_scanner_key_equal);
460
0
  scanner->input_fd = -1;
461
0
  scanner->text = NULL;
462
0
  scanner->text_end = NULL;
463
0
  scanner->buffer = NULL;
464
0
  scanner->scope_id = 0;
465
  
466
0
  scanner->msg_handler = g_scanner_msg_handler;
467
  
468
0
  return scanner;
469
0
}
470
471
static inline void
472
g_scanner_free_value (GTokenType     *token_p,
473
          GTokenValue     *value_p)
474
0
{
475
0
  switch (*token_p)
476
0
    {
477
0
    case G_TOKEN_STRING:
478
0
    case G_TOKEN_IDENTIFIER:
479
0
    case G_TOKEN_IDENTIFIER_NULL:
480
0
    case G_TOKEN_COMMENT_SINGLE:
481
0
    case G_TOKEN_COMMENT_MULTI:
482
0
      g_free (value_p->v_string);
483
0
      break;
484
      
485
0
    default:
486
0
      break;
487
0
    }
488
  
489
0
  *token_p = G_TOKEN_NONE;
490
0
}
491
492
static void
493
g_scanner_destroy_symbol_table_entry (gpointer _key,
494
              gpointer _value,
495
              gpointer _data)
496
0
{
497
0
  GScannerKey *key = _key;
498
  
499
0
  g_free (key->symbol);
500
0
  g_free (key);
501
0
}
502
503
/**
504
 * g_scanner_destroy:
505
 * @scanner: a #GScanner
506
 *
507
 * Frees all memory used by the #GScanner.
508
 */
509
void
510
g_scanner_destroy (GScanner *scanner)
511
0
{
512
0
  g_return_if_fail (scanner != NULL);
513
  
514
0
  g_datalist_clear (&scanner->qdata);
515
0
  g_hash_table_foreach (scanner->symbol_table, 
516
0
      g_scanner_destroy_symbol_table_entry, NULL);
517
0
  g_hash_table_destroy (scanner->symbol_table);
518
0
  g_scanner_free_value (&scanner->token, &scanner->value);
519
0
  g_scanner_free_value (&scanner->next_token, &scanner->next_value);
520
0
  g_free (scanner->config);
521
0
  g_free (scanner->buffer);
522
0
  g_free (scanner);
523
0
}
524
525
static void
526
g_scanner_msg_handler (GScanner   *scanner,
527
           gchar    *message,
528
           gboolean   is_error)
529
0
{
530
0
  g_return_if_fail (scanner != NULL);
531
  
532
0
  _g_fprintf (stderr, "%s:%d: ",
533
0
        scanner->input_name ? scanner->input_name : "<memory>",
534
0
        scanner->line);
535
0
  if (is_error)
536
0
    _g_fprintf (stderr, "error: ");
537
0
  _g_fprintf (stderr, "%s\n", message);
538
0
}
539
540
/**
541
 * g_scanner_error:
542
 * @scanner: a #GScanner
543
 * @format: the message format. See the printf() documentation
544
 * @...: the parameters to insert into the format string
545
 *
546
 * Outputs an error message, via the #GScanner message handler.
547
 */
548
void
549
g_scanner_error (GScanner *scanner,
550
     const gchar  *format,
551
     ...)
552
0
{
553
0
  g_return_if_fail (scanner != NULL);
554
0
  g_return_if_fail (format != NULL);
555
  
556
0
  scanner->parse_errors++;
557
  
558
0
  if (scanner->msg_handler)
559
0
    {
560
0
      va_list args;
561
0
      gchar *string;
562
      
563
0
      va_start (args, format);
564
0
      string = g_strdup_vprintf (format, args);
565
0
      va_end (args);
566
      
567
0
      scanner->msg_handler (scanner, string, TRUE);
568
      
569
0
      g_free (string);
570
0
    }
571
0
}
572
573
/**
574
 * g_scanner_warn:
575
 * @scanner: a #GScanner
576
 * @format: the message format. See the printf() documentation
577
 * @...: the parameters to insert into the format string
578
 *
579
 * Outputs a warning message, via the #GScanner message handler.
580
 */
581
void
582
g_scanner_warn (GScanner       *scanner,
583
    const gchar    *format,
584
    ...)
585
0
{
586
0
  g_return_if_fail (scanner != NULL);
587
0
  g_return_if_fail (format != NULL);
588
  
589
0
  if (scanner->msg_handler)
590
0
    {
591
0
      va_list args;
592
0
      gchar *string;
593
      
594
0
      va_start (args, format);
595
0
      string = g_strdup_vprintf (format, args);
596
0
      va_end (args);
597
      
598
0
      scanner->msg_handler (scanner, string, FALSE);
599
      
600
0
      g_free (string);
601
0
    }
602
0
}
603
604
static gboolean
605
g_scanner_key_equal (gconstpointer v1,
606
         gconstpointer v2)
607
0
{
608
0
  const GScannerKey *key1 = v1;
609
0
  const GScannerKey *key2 = v2;
610
  
611
0
  return (key1->scope_id == key2->scope_id) && (strcmp (key1->symbol, key2->symbol) == 0);
612
0
}
613
614
static guint
615
g_scanner_key_hash (gconstpointer v)
616
0
{
617
0
  const GScannerKey *key = v;
618
0
  gchar *c;
619
0
  guint h;
620
  
621
0
  h = key->scope_id;
622
0
  for (c = key->symbol; *c; c++)
623
0
    h = (h << 5) - h + *c;
624
  
625
0
  return h;
626
0
}
627
628
static inline GScannerKey*
629
g_scanner_lookup_internal (GScanner *scanner,
630
         guint   scope_id,
631
         const gchar  *symbol)
632
0
{
633
0
  GScannerKey *key_p;
634
0
  GScannerKey key;
635
  
636
0
  key.scope_id = scope_id;
637
  
638
0
  if (!scanner->config->case_sensitive)
639
0
    {
640
0
      gchar *d;
641
0
      const gchar *c;
642
      
643
0
      key.symbol = g_new (gchar, strlen (symbol) + 1);
644
0
      for (d = key.symbol, c = symbol; *c; c++, d++)
645
0
  *d = to_lower (*c);
646
0
      *d = 0;
647
0
      key_p = g_hash_table_lookup (scanner->symbol_table, &key);
648
0
      g_free (key.symbol);
649
0
    }
650
0
  else
651
0
    {
652
0
      key.symbol = (gchar*) symbol;
653
0
      key_p = g_hash_table_lookup (scanner->symbol_table, &key);
654
0
    }
655
  
656
0
  return key_p;
657
0
}
658
659
/**
660
 * g_scanner_add_symbol:
661
 * @scanner: a #GScanner
662
 * @symbol: the symbol to add
663
 * @value: the value of the symbol
664
 *
665
 * Adds a symbol to the default scope.
666
 *
667
 * Deprecated: 2.2: Use g_scanner_scope_add_symbol() instead.
668
 */
669
670
/**
671
 * g_scanner_scope_add_symbol:
672
 * @scanner: a #GScanner
673
 * @scope_id: the scope id
674
 * @symbol: the symbol to add
675
 * @value: the value of the symbol
676
 *
677
 * Adds a symbol to the given scope.
678
 */
679
void
680
g_scanner_scope_add_symbol (GScanner  *scanner,
681
          guint  scope_id,
682
          const gchar *symbol,
683
          gpointer   value)
684
0
{
685
0
  GScannerKey *key;
686
  
687
0
  g_return_if_fail (scanner != NULL);
688
0
  g_return_if_fail (symbol != NULL);
689
  
690
0
  key = g_scanner_lookup_internal (scanner, scope_id, symbol);
691
  
692
0
  if (!key)
693
0
    {
694
0
      key = g_new (GScannerKey, 1);
695
0
      key->scope_id = scope_id;
696
0
      key->symbol = g_strdup (symbol);
697
0
      key->value = value;
698
0
      if (!scanner->config->case_sensitive)
699
0
  {
700
0
    gchar *c;
701
    
702
0
    c = key->symbol;
703
0
    while (*c != 0)
704
0
      {
705
0
        *c = to_lower (*c);
706
0
        c++;
707
0
      }
708
0
  }
709
0
      g_hash_table_add (scanner->symbol_table, key);
710
0
    }
711
0
  else
712
0
    key->value = value;
713
0
}
714
715
/**
716
 * g_scanner_remove_symbol:
717
 * @scanner: a #GScanner
718
 * @symbol: the symbol to remove
719
 *
720
 * Removes a symbol from the default scope.
721
 *
722
 * Deprecated: 2.2: Use g_scanner_scope_remove_symbol() instead.
723
 */
724
725
/**
726
 * g_scanner_scope_remove_symbol:
727
 * @scanner: a #GScanner
728
 * @scope_id: the scope id
729
 * @symbol: the symbol to remove
730
 *
731
 * Removes a symbol from a scope.
732
 */
733
void
734
g_scanner_scope_remove_symbol (GScanner    *scanner,
735
             guint      scope_id,
736
             const gchar *symbol)
737
0
{
738
0
  GScannerKey *key;
739
  
740
0
  g_return_if_fail (scanner != NULL);
741
0
  g_return_if_fail (symbol != NULL);
742
  
743
0
  key = g_scanner_lookup_internal (scanner, scope_id, symbol);
744
  
745
0
  if (key)
746
0
    {
747
0
      g_hash_table_remove (scanner->symbol_table, key);
748
0
      g_free (key->symbol);
749
0
      g_free (key);
750
0
    }
751
0
}
752
753
/**
754
 * g_scanner_freeze_symbol_table:
755
 * @scanner: a #GScanner
756
 *
757
 * There is no reason to use this macro, since it does nothing.
758
 *
759
 * Deprecated: 2.2: This macro does nothing.
760
 */
761
762
/**
763
 * g_scanner_thaw_symbol_table:
764
 * @scanner: a #GScanner
765
 *
766
 * There is no reason to use this macro, since it does nothing.
767
 *
768
 * Deprecated: 2.2: This macro does nothing.
769
 */
770
771
/**
772
 * g_scanner_lookup_symbol:
773
 * @scanner: a #GScanner
774
 * @symbol: the symbol to look up
775
 *
776
 * Looks up a symbol in the current scope and return its value.
777
 * If the symbol is not bound in the current scope, %NULL is
778
 * returned.
779
 *
780
 * Returns: the value of @symbol in the current scope, or %NULL
781
 *     if @symbol is not bound in the current scope
782
 */
783
gpointer
784
g_scanner_lookup_symbol (GScanner *scanner,
785
       const gchar  *symbol)
786
0
{
787
0
  GScannerKey *key;
788
0
  guint scope_id;
789
  
790
0
  g_return_val_if_fail (scanner != NULL, NULL);
791
  
792
0
  if (!symbol)
793
0
    return NULL;
794
  
795
0
  scope_id = scanner->scope_id;
796
0
  key = g_scanner_lookup_internal (scanner, scope_id, symbol);
797
0
  if (!key && scope_id && scanner->config->scope_0_fallback)
798
0
    key = g_scanner_lookup_internal (scanner, 0, symbol);
799
  
800
0
  if (key)
801
0
    return key->value;
802
0
  else
803
0
    return NULL;
804
0
}
805
806
/**
807
 * g_scanner_scope_lookup_symbol:
808
 * @scanner: a #GScanner
809
 * @scope_id: the scope id
810
 * @symbol: the symbol to look up
811
 *
812
 * Looks up a symbol in a scope and return its value. If the
813
 * symbol is not bound in the scope, %NULL is returned.
814
 *
815
 * Returns: the value of @symbol in the given scope, or %NULL
816
 *     if @symbol is not bound in the given scope.
817
 *
818
 */
819
gpointer
820
g_scanner_scope_lookup_symbol (GScanner       *scanner,
821
             guint         scope_id,
822
             const gchar    *symbol)
823
0
{
824
0
  GScannerKey *key;
825
  
826
0
  g_return_val_if_fail (scanner != NULL, NULL);
827
  
828
0
  if (!symbol)
829
0
    return NULL;
830
  
831
0
  key = g_scanner_lookup_internal (scanner, scope_id, symbol);
832
  
833
0
  if (key)
834
0
    return key->value;
835
0
  else
836
0
    return NULL;
837
0
}
838
839
/**
840
 * g_scanner_set_scope:
841
 * @scanner: a #GScanner
842
 * @scope_id: the new scope id
843
 *
844
 * Sets the current scope.
845
 *
846
 * Returns: the old scope id
847
 */
848
guint
849
g_scanner_set_scope (GScanner     *scanner,
850
         guint       scope_id)
851
0
{
852
0
  guint old_scope_id;
853
  
854
0
  g_return_val_if_fail (scanner != NULL, 0);
855
  
856
0
  old_scope_id = scanner->scope_id;
857
0
  scanner->scope_id = scope_id;
858
  
859
0
  return old_scope_id;
860
0
}
861
862
static void
863
g_scanner_foreach_internal (gpointer  _key,
864
          gpointer  _value,
865
          gpointer  _user_data)
866
0
{
867
0
  GScannerKey *key;
868
0
  gpointer *d;
869
0
  GHFunc func;
870
0
  gpointer user_data;
871
0
  guint *scope_id;
872
  
873
0
  d = _user_data;
874
0
  func = (GHFunc) d[0];
875
0
  user_data = d[1];
876
0
  scope_id = d[2];
877
0
  key = _value;
878
  
879
0
  if (key->scope_id == *scope_id)
880
0
    func (key->symbol, key->value, user_data);
881
0
}
882
883
/**
884
 * g_scanner_foreach_symbol:
885
 * @scanner: a #GScanner
886
 * @func: the function to call with each symbol
887
 * @data: data to pass to the function
888
 *
889
 * Calls a function for each symbol in the default scope.
890
 *
891
 * Deprecated: 2.2: Use g_scanner_scope_foreach_symbol() instead.
892
 */
893
894
/**
895
 * g_scanner_scope_foreach_symbol:
896
 * @scanner: a #GScanner
897
 * @scope_id: the scope id
898
 * @func: (scope call): the function to call for each symbol/value pair
899
 * @user_data: user data to pass to the function
900
 *
901
 * Calls the given function for each of the symbol/value pairs
902
 * in the given scope of the #GScanner. The function is passed
903
 * the symbol and value of each pair, and the given @user_data
904
 * parameter.
905
 */
906
void
907
g_scanner_scope_foreach_symbol (GScanner       *scanner,
908
        guint   scope_id,
909
        GHFunc    func,
910
        gpointer  user_data)
911
0
{
912
0
  gpointer d[3];
913
  
914
0
  g_return_if_fail (scanner != NULL);
915
  
916
0
  d[0] = (gpointer) func;
917
0
  d[1] = user_data;
918
0
  d[2] = &scope_id;
919
  
920
0
  g_hash_table_foreach (scanner->symbol_table, g_scanner_foreach_internal, d);
921
0
}
922
923
/**
924
 * g_scanner_peek_next_token:
925
 * @scanner: a #GScanner
926
 *
927
 * Parses the next token, without removing it from the input stream.
928
 * The token data is placed in the @next_token, @next_value, @next_line,
929
 * and @next_position fields of the #GScanner structure.
930
 *
931
 * Note that, while the token is not removed from the input stream
932
 * (i.e. the next call to g_scanner_get_next_token() will return the
933
 * same token), it will not be reevaluated. This can lead to surprising
934
 * results when changing scope or the scanner configuration after peeking
935
 * the next token. Getting the next token after switching the scope or
936
 * configuration will return whatever was peeked before, regardless of
937
 * any symbols that may have been added or removed in the new scope.
938
 *
939
 * Returns: the type of the token
940
 */
941
GTokenType
942
g_scanner_peek_next_token (GScanner *scanner)
943
0
{
944
0
  g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
945
  
946
0
  if (scanner->next_token == G_TOKEN_NONE)
947
0
    {
948
0
      scanner->next_line = scanner->line;
949
0
      scanner->next_position = scanner->position;
950
0
      g_scanner_get_token_i (scanner,
951
0
           &scanner->next_token,
952
0
           &scanner->next_value,
953
0
           &scanner->next_line,
954
0
           &scanner->next_position);
955
0
    }
956
  
957
0
  return scanner->next_token;
958
0
}
959
960
/**
961
 * g_scanner_get_next_token:
962
 * @scanner: a #GScanner
963
 *
964
 * Parses the next token just like g_scanner_peek_next_token()
965
 * and also removes it from the input stream. The token data is
966
 * placed in the @token, @value, @line, and @position fields of
967
 * the #GScanner structure.
968
 *
969
 * Returns: the type of the token
970
 */
971
GTokenType
972
g_scanner_get_next_token (GScanner  *scanner)
973
0
{
974
0
  g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
975
  
976
0
  if (scanner->next_token != G_TOKEN_NONE)
977
0
    {
978
0
      g_scanner_free_value (&scanner->token, &scanner->value);
979
      
980
0
      scanner->token = scanner->next_token;
981
0
      scanner->value = scanner->next_value;
982
0
      scanner->line = scanner->next_line;
983
0
      scanner->position = scanner->next_position;
984
0
      scanner->next_token = G_TOKEN_NONE;
985
0
    }
986
0
  else
987
0
    g_scanner_get_token_i (scanner,
988
0
         &scanner->token,
989
0
         &scanner->value,
990
0
         &scanner->line,
991
0
         &scanner->position);
992
  
993
0
  return scanner->token;
994
0
}
995
996
/**
997
 * g_scanner_cur_token:
998
 * @scanner: a #GScanner
999
 *
1000
 * Gets the current token type. This is simply the @token
1001
 * field in the #GScanner structure.
1002
 *
1003
 * Returns: the current token type
1004
 */
1005
GTokenType
1006
g_scanner_cur_token (GScanner *scanner)
1007
0
{
1008
0
  g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
1009
  
1010
0
  return scanner->token;
1011
0
}
1012
1013
/**
1014
 * g_scanner_cur_value:
1015
 * @scanner: a #GScanner
1016
 *
1017
 * Gets the current token value. This is simply the @value
1018
 * field in the #GScanner structure.
1019
 *
1020
 * Returns: the current token value
1021
 */
1022
GTokenValue
1023
g_scanner_cur_value (GScanner *scanner)
1024
0
{
1025
0
  GTokenValue v;
1026
  
1027
0
  v.v_int64 = 0;
1028
  
1029
0
  g_return_val_if_fail (scanner != NULL, v);
1030
1031
  /* MSC isn't capable of handling return scanner->value; ? */
1032
1033
0
  v = scanner->value;
1034
1035
0
  return v;
1036
0
}
1037
1038
/**
1039
 * g_scanner_cur_line:
1040
 * @scanner: a #GScanner
1041
 *
1042
 * Returns the current line in the input stream (counting
1043
 * from 1). This is the line of the last token parsed via
1044
 * g_scanner_get_next_token().
1045
 *
1046
 * Returns: the current line
1047
 */
1048
guint
1049
g_scanner_cur_line (GScanner *scanner)
1050
0
{
1051
0
  g_return_val_if_fail (scanner != NULL, 0);
1052
  
1053
0
  return scanner->line;
1054
0
}
1055
1056
/**
1057
 * g_scanner_cur_position:
1058
 * @scanner: a #GScanner
1059
 *
1060
 * Returns the current position in the current line (counting
1061
 * from 0). This is the position of the last token parsed via
1062
 * g_scanner_get_next_token().
1063
 *
1064
 * Returns: the current position on the line
1065
 */
1066
guint
1067
g_scanner_cur_position (GScanner *scanner)
1068
0
{
1069
0
  g_return_val_if_fail (scanner != NULL, 0);
1070
  
1071
0
  return scanner->position;
1072
0
}
1073
1074
/**
1075
 * g_scanner_eof:
1076
 * @scanner: a #GScanner
1077
 *
1078
 * Returns %TRUE if the scanner has reached the end of
1079
 * the file or text buffer.
1080
 *
1081
 * Returns: %TRUE if the scanner has reached the end of
1082
 *     the file or text buffer
1083
 */
1084
gboolean
1085
g_scanner_eof (GScanner *scanner)
1086
0
{
1087
0
  g_return_val_if_fail (scanner != NULL, TRUE);
1088
  
1089
0
  return scanner->token == G_TOKEN_EOF || scanner->token == G_TOKEN_ERROR;
1090
0
}
1091
1092
/**
1093
 * g_scanner_input_file:
1094
 * @scanner: a #GScanner
1095
 * @input_fd: a file descriptor
1096
 *
1097
 * Prepares to scan a file.
1098
 */
1099
void
1100
g_scanner_input_file (GScanner *scanner,
1101
          gint  input_fd)
1102
0
{
1103
0
  g_return_if_fail (scanner != NULL);
1104
0
  g_return_if_fail (input_fd >= 0);
1105
1106
0
  if (scanner->input_fd >= 0)
1107
0
    g_scanner_sync_file_offset (scanner);
1108
1109
0
  scanner->token = G_TOKEN_NONE;
1110
0
  scanner->value.v_int64 = 0;
1111
0
  scanner->line = 1;
1112
0
  scanner->position = 0;
1113
0
  scanner->next_token = G_TOKEN_NONE;
1114
1115
0
  scanner->input_fd = input_fd;
1116
0
  scanner->text = NULL;
1117
0
  scanner->text_end = NULL;
1118
1119
0
  if (!scanner->buffer)
1120
0
    scanner->buffer = g_new (gchar, READ_BUFFER_SIZE + 1);
1121
0
}
1122
1123
/**
1124
 * g_scanner_input_text:
1125
 * @scanner: a #GScanner
1126
 * @text: the text buffer to scan
1127
 * @text_len: the length of the text buffer
1128
 *
1129
 * Prepares to scan a text buffer.
1130
 */
1131
void
1132
g_scanner_input_text (GScanner    *scanner,
1133
          const gchar *text,
1134
          guint    text_len)
1135
0
{
1136
0
  g_return_if_fail (scanner != NULL);
1137
0
  if (text_len)
1138
0
    g_return_if_fail (text != NULL);
1139
0
  else
1140
0
    text = NULL;
1141
1142
0
  if (scanner->input_fd >= 0)
1143
0
    g_scanner_sync_file_offset (scanner);
1144
1145
0
  scanner->token = G_TOKEN_NONE;
1146
0
  scanner->value.v_int64 = 0;
1147
0
  scanner->line = 1;
1148
0
  scanner->position = 0;
1149
0
  scanner->next_token = G_TOKEN_NONE;
1150
1151
0
  scanner->input_fd = -1;
1152
0
  scanner->text = text;
1153
0
  scanner->text_end = text + text_len;
1154
1155
0
  if (scanner->buffer)
1156
0
    {
1157
0
      g_free (scanner->buffer);
1158
0
      scanner->buffer = NULL;
1159
0
    }
1160
0
}
1161
1162
static guchar
1163
g_scanner_peek_next_char (GScanner *scanner)
1164
0
{
1165
0
  if (scanner->text < scanner->text_end)
1166
0
    {
1167
0
      return *scanner->text;
1168
0
    }
1169
0
  else if (scanner->input_fd >= 0)
1170
0
    {
1171
0
      gint count;
1172
0
      gchar *buffer;
1173
1174
0
      buffer = scanner->buffer;
1175
0
      do
1176
0
  {
1177
0
    count = read (scanner->input_fd, buffer, READ_BUFFER_SIZE);
1178
0
  }
1179
0
      while (count == -1 && (errno == EINTR || errno == EAGAIN));
1180
1181
0
      if (count < 1)
1182
0
  {
1183
0
    scanner->input_fd = -1;
1184
1185
0
    return 0;
1186
0
  }
1187
0
      else
1188
0
  {
1189
0
    scanner->text = buffer;
1190
0
    scanner->text_end = buffer + count;
1191
1192
0
    return *buffer;
1193
0
  }
1194
0
    }
1195
0
  else
1196
0
    return 0;
1197
0
}
1198
1199
/**
1200
 * g_scanner_sync_file_offset:
1201
 * @scanner: a #GScanner
1202
 *
1203
 * Rewinds the filedescriptor to the current buffer position
1204
 * and blows the file read ahead buffer. This is useful for
1205
 * third party uses of the scanners filedescriptor, which hooks
1206
 * onto the current scanning position.
1207
 */
1208
void
1209
g_scanner_sync_file_offset (GScanner *scanner)
1210
0
{
1211
0
  g_return_if_fail (scanner != NULL);
1212
1213
  /* for file input, rewind the filedescriptor to the current
1214
   * buffer position and blow the file read ahead buffer. useful
1215
   * for third party uses of our file descriptor, which hooks 
1216
   * onto the current scanning position.
1217
   */
1218
1219
0
  if (scanner->input_fd >= 0 && scanner->text_end > scanner->text)
1220
0
    {
1221
0
      gint buffered;
1222
1223
0
      buffered = scanner->text_end - scanner->text;
1224
0
      if (lseek (scanner->input_fd, - buffered, SEEK_CUR) >= 0)
1225
0
  {
1226
    /* we succeeded, blow our buffer's contents now */
1227
0
    scanner->text = NULL;
1228
0
    scanner->text_end = NULL;
1229
0
  }
1230
0
      else
1231
0
  errno = 0;
1232
0
    }
1233
0
}
1234
1235
static guchar
1236
g_scanner_get_char (GScanner  *scanner,
1237
        guint *line_p,
1238
        guint *position_p)
1239
0
{
1240
0
  guchar fchar;
1241
1242
0
  if (scanner->text < scanner->text_end)
1243
0
    fchar = *(scanner->text++);
1244
0
  else if (scanner->input_fd >= 0)
1245
0
    {
1246
0
      gint count;
1247
0
      gchar *buffer;
1248
1249
0
      buffer = scanner->buffer;
1250
0
      do
1251
0
  {
1252
0
    count = read (scanner->input_fd, buffer, READ_BUFFER_SIZE);
1253
0
  }
1254
0
      while (count == -1 && (errno == EINTR || errno == EAGAIN));
1255
1256
0
      if (count < 1)
1257
0
  {
1258
0
    scanner->input_fd = -1;
1259
0
    fchar = 0;
1260
0
  }
1261
0
      else
1262
0
  {
1263
0
    scanner->text = buffer + 1;
1264
0
    scanner->text_end = buffer + count;
1265
0
    fchar = *buffer;
1266
0
    if (!fchar)
1267
0
      {
1268
0
        g_scanner_sync_file_offset (scanner);
1269
0
        scanner->text_end = scanner->text;
1270
0
        scanner->input_fd = -1;
1271
0
      }
1272
0
  }
1273
0
    }
1274
0
  else
1275
0
    fchar = 0;
1276
  
1277
0
  if (fchar == '\n')
1278
0
    {
1279
0
      (*position_p) = 0;
1280
0
      (*line_p)++;
1281
0
    }
1282
0
  else if (fchar)
1283
0
    {
1284
0
      (*position_p)++;
1285
0
    }
1286
  
1287
0
  return fchar;
1288
0
}
1289
1290
/**
1291
 * g_scanner_unexp_token:
1292
 * @scanner: a #GScanner
1293
 * @expected_token: the expected token
1294
 * @identifier_spec: a string describing how the scanner's user
1295
 *     refers to identifiers (%NULL defaults to "identifier").
1296
 *     This is used if @expected_token is %G_TOKEN_IDENTIFIER or
1297
 *     %G_TOKEN_IDENTIFIER_NULL.
1298
 * @symbol_spec: a string describing how the scanner's user refers
1299
 *     to symbols (%NULL defaults to "symbol"). This is used if
1300
 *     @expected_token is %G_TOKEN_SYMBOL or any token value greater
1301
 *     than %G_TOKEN_LAST.
1302
 * @symbol_name: the name of the symbol, if the scanner's current
1303
 *     token is a symbol.
1304
 * @message: a message string to output at the end of the
1305
 *     warning/error, or %NULL.
1306
 * @is_error: if %TRUE it is output as an error. If %FALSE it is
1307
 *     output as a warning.
1308
 *
1309
 * Outputs a message through the scanner's msg_handler,
1310
 * resulting from an unexpected token in the input stream.
1311
 * Note that you should not call g_scanner_peek_next_token()
1312
 * followed by g_scanner_unexp_token() without an intermediate
1313
 * call to g_scanner_get_next_token(), as g_scanner_unexp_token()
1314
 * evaluates the scanner's current token (not the peeked token)
1315
 * to construct part of the message.
1316
 */
1317
void
1318
g_scanner_unexp_token (GScanner   *scanner,
1319
           GTokenType  expected_token,
1320
           const gchar  *identifier_spec,
1321
           const gchar  *symbol_spec,
1322
           const gchar  *symbol_name,
1323
           const gchar  *message,
1324
           gint    is_error)
1325
0
{
1326
0
  gchar *token_string;
1327
0
  guint token_string_len;
1328
0
  gchar *expected_string;
1329
0
  guint expected_string_len;
1330
0
  gchar *message_prefix;
1331
0
  gboolean print_unexp;
1332
0
  void (*msg_handler) (GScanner*, const gchar*, ...);
1333
  
1334
0
  g_return_if_fail (scanner != NULL);
1335
  
1336
0
  if (is_error)
1337
0
    msg_handler = g_scanner_error;
1338
0
  else
1339
0
    msg_handler = g_scanner_warn;
1340
  
1341
0
  if (!identifier_spec)
1342
0
    identifier_spec = "identifier";
1343
0
  if (!symbol_spec)
1344
0
    symbol_spec = "symbol";
1345
  
1346
0
  token_string_len = 56;
1347
0
  token_string = g_new (gchar, token_string_len + 1);
1348
0
  expected_string_len = 64;
1349
0
  expected_string = g_new (gchar, expected_string_len + 1);
1350
0
  print_unexp = TRUE;
1351
  
1352
0
  switch (scanner->token)
1353
0
    {
1354
0
    case G_TOKEN_EOF:
1355
0
      _g_snprintf (token_string, token_string_len, "end of file");
1356
0
      break;
1357
      
1358
0
    default:
1359
0
      if (scanner->token >= 1 && scanner->token <= 255)
1360
0
  {
1361
0
    if ((scanner->token >= ' ' && scanner->token <= '~') ||
1362
0
        strchr (scanner->config->cset_identifier_first, scanner->token) ||
1363
0
        strchr (scanner->config->cset_identifier_nth, scanner->token))
1364
0
      _g_snprintf (token_string, token_string_len, "character '%c'", scanner->token);
1365
0
    else
1366
0
      _g_snprintf (token_string, token_string_len, "character '\\%o'", scanner->token);
1367
0
    break;
1368
0
  }
1369
0
      else if (!scanner->config->symbol_2_token)
1370
0
  {
1371
0
    _g_snprintf (token_string, token_string_len, "(unknown) token <%d>", scanner->token);
1372
0
    break;
1373
0
  }
1374
0
      G_GNUC_FALLTHROUGH;
1375
0
    case G_TOKEN_SYMBOL:
1376
0
      if (expected_token == G_TOKEN_SYMBOL ||
1377
0
    (scanner->config->symbol_2_token &&
1378
0
     expected_token > G_TOKEN_LAST))
1379
0
  print_unexp = FALSE;
1380
0
      if (symbol_name)
1381
0
  _g_snprintf (token_string,
1382
0
         token_string_len,
1383
0
         "%s%s '%s'",
1384
0
         print_unexp ? "" : "invalid ",
1385
0
         symbol_spec,
1386
0
         symbol_name);
1387
0
      else
1388
0
  _g_snprintf (token_string,
1389
0
         token_string_len,
1390
0
         "%s%s",
1391
0
         print_unexp ? "" : "invalid ",
1392
0
         symbol_spec);
1393
0
      break;
1394
      
1395
0
    case G_TOKEN_ERROR:
1396
0
      print_unexp = FALSE;
1397
0
      expected_token = G_TOKEN_NONE;
1398
0
      switch (scanner->value.v_error)
1399
0
  {
1400
0
  case G_ERR_UNEXP_EOF:
1401
0
    _g_snprintf (token_string, token_string_len, "scanner: unexpected end of file");
1402
0
    break;
1403
    
1404
0
  case G_ERR_UNEXP_EOF_IN_STRING:
1405
0
    _g_snprintf (token_string, token_string_len, "scanner: unterminated string constant");
1406
0
    break;
1407
    
1408
0
  case G_ERR_UNEXP_EOF_IN_COMMENT:
1409
0
    _g_snprintf (token_string, token_string_len, "scanner: unterminated comment");
1410
0
    break;
1411
    
1412
0
  case G_ERR_NON_DIGIT_IN_CONST:
1413
0
    _g_snprintf (token_string, token_string_len, "scanner: non digit in constant");
1414
0
    break;
1415
    
1416
0
  case G_ERR_FLOAT_RADIX:
1417
0
    _g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant");
1418
0
    break;
1419
    
1420
0
  case G_ERR_FLOAT_MALFORMED:
1421
0
    _g_snprintf (token_string, token_string_len, "scanner: malformed floating constant");
1422
0
    break;
1423
    
1424
0
  case G_ERR_DIGIT_RADIX:
1425
0
    _g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix");
1426
0
    break;
1427
    
1428
0
  case G_ERR_UNKNOWN:
1429
0
  default:
1430
0
    _g_snprintf (token_string, token_string_len, "scanner: unknown error");
1431
0
    break;
1432
0
  }
1433
0
      break;
1434
      
1435
0
    case G_TOKEN_CHAR:
1436
0
      _g_snprintf (token_string, token_string_len, "character '%c'", scanner->value.v_char);
1437
0
      break;
1438
      
1439
0
    case G_TOKEN_IDENTIFIER:
1440
0
    case G_TOKEN_IDENTIFIER_NULL:
1441
0
      if (expected_token == G_TOKEN_IDENTIFIER ||
1442
0
    expected_token == G_TOKEN_IDENTIFIER_NULL)
1443
0
  print_unexp = FALSE;
1444
0
      _g_snprintf (token_string,
1445
0
      token_string_len,
1446
0
      "%s%s '%s'",
1447
0
      print_unexp ? "" : "invalid ",
1448
0
      identifier_spec,
1449
0
      scanner->token == G_TOKEN_IDENTIFIER ? scanner->value.v_string : "null");
1450
0
      break;
1451
      
1452
0
    case G_TOKEN_BINARY:
1453
0
    case G_TOKEN_OCTAL:
1454
0
    case G_TOKEN_INT:
1455
0
    case G_TOKEN_HEX:
1456
0
      if (scanner->config->store_int64)
1457
0
  _g_snprintf (token_string, token_string_len, "number '%" G_GUINT64_FORMAT "'", scanner->value.v_int64);
1458
0
      else
1459
0
  _g_snprintf (token_string, token_string_len, "number '%lu'", scanner->value.v_int);
1460
0
      break;
1461
      
1462
0
    case G_TOKEN_FLOAT:
1463
0
      _g_snprintf (token_string, token_string_len, "number '%.3f'", scanner->value.v_float);
1464
0
      break;
1465
      
1466
0
    case G_TOKEN_STRING:
1467
0
      if (expected_token == G_TOKEN_STRING)
1468
0
  print_unexp = FALSE;
1469
0
      _g_snprintf (token_string,
1470
0
       token_string_len,
1471
0
       "%s%sstring constant \"%s\"",
1472
0
       print_unexp ? "" : "invalid ",
1473
0
       scanner->value.v_string[0] == 0 ? "empty " : "",
1474
0
       scanner->value.v_string);
1475
0
      token_string[token_string_len - 2] = '"';
1476
0
      token_string[token_string_len - 1] = 0;
1477
0
      break;
1478
      
1479
0
    case G_TOKEN_COMMENT_SINGLE:
1480
0
    case G_TOKEN_COMMENT_MULTI:
1481
0
      _g_snprintf (token_string, token_string_len, "comment");
1482
0
      break;
1483
      
1484
0
    case G_TOKEN_NONE:
1485
      /* somehow the user's parsing code is screwed, there isn't much
1486
       * we can do about it.
1487
       * Note, a common case to trigger this is
1488
       * g_scanner_peek_next_token(); g_scanner_unexp_token();
1489
       * without an intermediate g_scanner_get_next_token().
1490
       */
1491
0
      g_assert_not_reached ();
1492
0
      break;
1493
0
    }
1494
  
1495
  
1496
0
  switch (expected_token)
1497
0
    {
1498
0
      gboolean need_valid;
1499
0
      gchar *tstring;
1500
0
    case G_TOKEN_EOF:
1501
0
      _g_snprintf (expected_string, expected_string_len, "end of file");
1502
0
      break;
1503
0
    default:
1504
0
      if (expected_token >= 1 && expected_token <= 255)
1505
0
  {
1506
0
    if ((expected_token >= ' ' && expected_token <= '~') ||
1507
0
        strchr (scanner->config->cset_identifier_first, expected_token) ||
1508
0
        strchr (scanner->config->cset_identifier_nth, expected_token))
1509
0
      _g_snprintf (expected_string, expected_string_len, "character '%c'", expected_token);
1510
0
    else
1511
0
      _g_snprintf (expected_string, expected_string_len, "character '\\%o'", expected_token);
1512
0
    break;
1513
0
  }
1514
0
      else if (!scanner->config->symbol_2_token)
1515
0
  {
1516
0
    _g_snprintf (expected_string, expected_string_len, "(unknown) token <%d>", expected_token);
1517
0
    break;
1518
0
  }
1519
0
      G_GNUC_FALLTHROUGH;
1520
0
    case G_TOKEN_SYMBOL:
1521
0
      need_valid = (scanner->token == G_TOKEN_SYMBOL ||
1522
0
        (scanner->config->symbol_2_token &&
1523
0
         scanner->token > G_TOKEN_LAST));
1524
0
      _g_snprintf (expected_string,
1525
0
       expected_string_len,
1526
0
       "%s%s",
1527
0
       need_valid ? "valid " : "",
1528
0
       symbol_spec);
1529
      /* FIXME: should we attempt to look up the symbol_name for symbol_2_token? */
1530
0
      break;
1531
0
    case G_TOKEN_CHAR:
1532
0
      _g_snprintf (expected_string, expected_string_len, "%scharacter",
1533
0
       scanner->token == G_TOKEN_CHAR ? "valid " : "");
1534
0
      break;
1535
0
    case G_TOKEN_BINARY:
1536
0
      tstring = "binary";
1537
0
      _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1538
0
       scanner->token == expected_token ? "valid " : "", tstring);
1539
0
      break;
1540
0
    case G_TOKEN_OCTAL:
1541
0
      tstring = "octal";
1542
0
      _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1543
0
       scanner->token == expected_token ? "valid " : "", tstring);
1544
0
      break;
1545
0
    case G_TOKEN_INT:
1546
0
      tstring = "integer";
1547
0
      _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1548
0
       scanner->token == expected_token ? "valid " : "", tstring);
1549
0
      break;
1550
0
    case G_TOKEN_HEX:
1551
0
      tstring = "hexadecimal";
1552
0
      _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1553
0
       scanner->token == expected_token ? "valid " : "", tstring);
1554
0
      break;
1555
0
    case G_TOKEN_FLOAT:
1556
0
      tstring = "float";
1557
0
      _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1558
0
       scanner->token == expected_token ? "valid " : "", tstring);
1559
0
      break;
1560
0
    case G_TOKEN_STRING:
1561
0
      _g_snprintf (expected_string,
1562
0
       expected_string_len,
1563
0
       "%sstring constant",
1564
0
       scanner->token == G_TOKEN_STRING ? "valid " : "");
1565
0
      break;
1566
0
    case G_TOKEN_IDENTIFIER:
1567
0
    case G_TOKEN_IDENTIFIER_NULL:
1568
0
      need_valid = (scanner->token == G_TOKEN_IDENTIFIER_NULL ||
1569
0
        scanner->token == G_TOKEN_IDENTIFIER);
1570
0
      _g_snprintf (expected_string,
1571
0
       expected_string_len,
1572
0
       "%s%s",
1573
0
       need_valid ? "valid " : "",
1574
0
       identifier_spec);
1575
0
      break;
1576
0
    case G_TOKEN_COMMENT_SINGLE:
1577
0
      tstring = "single-line";
1578
0
      _g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
1579
0
       scanner->token == expected_token ? "valid " : "", tstring);
1580
0
      break;
1581
0
    case G_TOKEN_COMMENT_MULTI:
1582
0
      tstring = "multi-line";
1583
0
      _g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
1584
0
       scanner->token == expected_token ? "valid " : "", tstring);
1585
0
      break;
1586
0
    case G_TOKEN_NONE:
1587
0
    case G_TOKEN_ERROR:
1588
      /* this is handled upon printout */
1589
0
      break;
1590
0
    }
1591
  
1592
0
  if (message && message[0] != 0)
1593
0
    message_prefix = " - ";
1594
0
  else
1595
0
    {
1596
0
      message_prefix = "";
1597
0
      message = "";
1598
0
    }
1599
0
  if (expected_token == G_TOKEN_ERROR)
1600
0
    {
1601
0
      msg_handler (scanner,
1602
0
       "failure around %s%s%s",
1603
0
       token_string,
1604
0
       message_prefix,
1605
0
       message);
1606
0
    }
1607
0
  else if (expected_token == G_TOKEN_NONE)
1608
0
    {
1609
0
      if (print_unexp)
1610
0
  msg_handler (scanner,
1611
0
         "unexpected %s%s%s",
1612
0
         token_string,
1613
0
         message_prefix,
1614
0
         message);
1615
0
      else
1616
0
  msg_handler (scanner,
1617
0
         "%s%s%s",
1618
0
         token_string,
1619
0
         message_prefix,
1620
0
         message);
1621
0
    }
1622
0
  else
1623
0
    {
1624
0
      if (print_unexp)
1625
0
  msg_handler (scanner,
1626
0
         "unexpected %s, expected %s%s%s",
1627
0
         token_string,
1628
0
         expected_string,
1629
0
         message_prefix,
1630
0
         message);
1631
0
      else
1632
0
  msg_handler (scanner,
1633
0
         "%s, expected %s%s%s",
1634
0
         token_string,
1635
0
         expected_string,
1636
0
         message_prefix,
1637
0
         message);
1638
0
    }
1639
  
1640
0
  g_free (token_string);
1641
0
  g_free (expected_string);
1642
0
}
1643
1644
static void
1645
g_scanner_get_token_i (GScanner *scanner,
1646
           GTokenType *token_p,
1647
           GTokenValue  *value_p,
1648
           guint    *line_p,
1649
           guint    *position_p)
1650
0
{
1651
0
  do
1652
0
    {
1653
0
      g_scanner_free_value (token_p, value_p);
1654
0
      g_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p);
1655
0
    }
1656
0
  while (((*token_p > 0 && *token_p < 256) &&
1657
0
    strchr (scanner->config->cset_skip_characters, *token_p)) ||
1658
0
   (*token_p == G_TOKEN_CHAR &&
1659
0
    strchr (scanner->config->cset_skip_characters, value_p->v_char)) ||
1660
0
   (*token_p == G_TOKEN_COMMENT_MULTI &&
1661
0
    scanner->config->skip_comment_multi) ||
1662
0
   (*token_p == G_TOKEN_COMMENT_SINGLE &&
1663
0
    scanner->config->skip_comment_single));
1664
  
1665
0
  switch (*token_p)
1666
0
    {
1667
0
    case G_TOKEN_IDENTIFIER:
1668
0
      if (scanner->config->identifier_2_string)
1669
0
  *token_p = G_TOKEN_STRING;
1670
0
      break;
1671
      
1672
0
    case G_TOKEN_SYMBOL:
1673
0
      if (scanner->config->symbol_2_token)
1674
0
        *token_p = (GTokenType) ((size_t) value_p->v_symbol);
1675
0
      break;
1676
      
1677
0
    case G_TOKEN_BINARY:
1678
0
    case G_TOKEN_OCTAL:
1679
0
    case G_TOKEN_HEX:
1680
0
      if (scanner->config->numbers_2_int)
1681
0
  *token_p = G_TOKEN_INT;
1682
0
      break;
1683
      
1684
0
    default:
1685
0
      break;
1686
0
    }
1687
  
1688
0
  if (*token_p == G_TOKEN_INT &&
1689
0
      scanner->config->int_2_float)
1690
0
    {
1691
0
      *token_p = G_TOKEN_FLOAT;
1692
1693
      /* Have to assign through a temporary variable to avoid undefined behaviour
1694
       * by copying between potentially-overlapping union members. */
1695
0
      if (scanner->config->store_int64)
1696
0
        {
1697
0
          gint64 temp = value_p->v_int64;
1698
0
          value_p->v_float = temp;
1699
0
        }
1700
0
      else
1701
0
        {
1702
0
          gint temp = value_p->v_int;
1703
0
          value_p->v_float = temp;
1704
0
        }
1705
0
    }
1706
  
1707
0
  errno = 0;
1708
0
}
1709
1710
static void
1711
g_scanner_get_token_ll  (GScanner *scanner,
1712
       GTokenType *token_p,
1713
       GTokenValue  *value_p,
1714
       guint    *line_p,
1715
       guint    *position_p)
1716
0
{
1717
0
  GScannerConfig *config;
1718
0
  GTokenType     token;
1719
0
  gboolean     in_comment_multi;
1720
0
  gboolean     in_comment_single;
1721
0
  gboolean     in_string_sq;
1722
0
  gboolean     in_string_dq;
1723
0
  GString   *gstring;
1724
0
  GTokenValue    value;
1725
0
  guchar     ch;
1726
  
1727
0
  config = scanner->config;
1728
0
  (*value_p).v_int64 = 0;
1729
  
1730
0
  if ((scanner->text >= scanner->text_end && scanner->input_fd < 0) ||
1731
0
      scanner->token == G_TOKEN_EOF)
1732
0
    {
1733
0
      *token_p = G_TOKEN_EOF;
1734
0
      return;
1735
0
    }
1736
  
1737
0
  in_comment_multi = FALSE;
1738
0
  in_comment_single = FALSE;
1739
0
  in_string_sq = FALSE;
1740
0
  in_string_dq = FALSE;
1741
0
  gstring = NULL;
1742
  
1743
0
  do /* while (ch != 0) */
1744
0
    {
1745
0
      gboolean dotted_float = FALSE;
1746
      
1747
0
      ch = g_scanner_get_char (scanner, line_p, position_p);
1748
      
1749
0
      value.v_int64 = 0;
1750
0
      token = G_TOKEN_NONE;
1751
      
1752
      /* this is *evil*, but needed ;(
1753
       * we first check for identifier first character, because  it
1754
       * might interfere with other key chars like slashes or numbers
1755
       */
1756
0
      if (config->scan_identifier &&
1757
0
    ch && strchr (config->cset_identifier_first, ch))
1758
0
  goto identifier_precedence;
1759
      
1760
0
      switch (ch)
1761
0
  {
1762
0
  case 0:
1763
0
    token = G_TOKEN_EOF;
1764
0
    (*position_p)++;
1765
    /* ch = 0; */
1766
0
    break;
1767
    
1768
0
  case '/':
1769
0
    if (!config->scan_comment_multi ||
1770
0
        g_scanner_peek_next_char (scanner) != '*')
1771
0
      goto default_case;
1772
0
    g_scanner_get_char (scanner, line_p, position_p);
1773
0
    token = G_TOKEN_COMMENT_MULTI;
1774
0
    in_comment_multi = TRUE;
1775
0
    gstring = g_string_new (NULL);
1776
0
    while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
1777
0
      {
1778
0
        if (ch == '*' && g_scanner_peek_next_char (scanner) == '/')
1779
0
    {
1780
0
      g_scanner_get_char (scanner, line_p, position_p);
1781
0
      in_comment_multi = FALSE;
1782
0
      break;
1783
0
    }
1784
0
        else
1785
0
    gstring = g_string_append_c (gstring, ch);
1786
0
      }
1787
0
    ch = 0;
1788
0
    break;
1789
    
1790
0
  case '\'':
1791
0
    if (!config->scan_string_sq)
1792
0
      goto default_case;
1793
0
    token = G_TOKEN_STRING;
1794
0
    in_string_sq = TRUE;
1795
0
    gstring = g_string_new (NULL);
1796
0
    while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
1797
0
      {
1798
0
        if (ch == '\'')
1799
0
    {
1800
0
      in_string_sq = FALSE;
1801
0
      break;
1802
0
    }
1803
0
        else
1804
0
    gstring = g_string_append_c (gstring, ch);
1805
0
      }
1806
0
    ch = 0;
1807
0
    break;
1808
    
1809
0
  case '"':
1810
0
    if (!config->scan_string_dq)
1811
0
      goto default_case;
1812
0
    token = G_TOKEN_STRING;
1813
0
    in_string_dq = TRUE;
1814
0
    gstring = g_string_new (NULL);
1815
0
    while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
1816
0
      {
1817
0
        if (ch == '"')
1818
0
    {
1819
0
      in_string_dq = FALSE;
1820
0
      break;
1821
0
    }
1822
0
        else
1823
0
    {
1824
0
      if (ch == '\\')
1825
0
        {
1826
0
          ch = g_scanner_get_char (scanner, line_p, position_p);
1827
0
          switch (ch)
1828
0
      {
1829
0
        guint i;
1830
0
        guint fchar;
1831
        
1832
0
      case 0:
1833
0
        break;
1834
        
1835
0
      case '\\':
1836
0
        gstring = g_string_append_c (gstring, '\\');
1837
0
        break;
1838
        
1839
0
      case 'n':
1840
0
        gstring = g_string_append_c (gstring, '\n');
1841
0
        break;
1842
        
1843
0
      case 't':
1844
0
        gstring = g_string_append_c (gstring, '\t');
1845
0
        break;
1846
        
1847
0
      case 'r':
1848
0
        gstring = g_string_append_c (gstring, '\r');
1849
0
        break;
1850
        
1851
0
      case 'b':
1852
0
        gstring = g_string_append_c (gstring, '\b');
1853
0
        break;
1854
        
1855
0
      case 'f':
1856
0
        gstring = g_string_append_c (gstring, '\f');
1857
0
        break;
1858
        
1859
0
      case '0':
1860
0
      case '1':
1861
0
      case '2':
1862
0
      case '3':
1863
0
      case '4':
1864
0
      case '5':
1865
0
      case '6':
1866
0
      case '7':
1867
0
        i = ch - '0';
1868
0
        fchar = g_scanner_peek_next_char (scanner);
1869
0
        if (fchar >= '0' && fchar <= '7')
1870
0
          {
1871
0
            ch = g_scanner_get_char (scanner, line_p, position_p);
1872
0
            i = i * 8 + ch - '0';
1873
0
            fchar = g_scanner_peek_next_char (scanner);
1874
0
            if (fchar >= '0' && fchar <= '7')
1875
0
        {
1876
0
          ch = g_scanner_get_char (scanner, line_p, position_p);
1877
0
          i = i * 8 + ch - '0';
1878
0
        }
1879
0
          }
1880
0
        gstring = g_string_append_c (gstring, i);
1881
0
        break;
1882
        
1883
0
      default:
1884
0
        gstring = g_string_append_c (gstring, ch);
1885
0
        break;
1886
0
      }
1887
0
        }
1888
0
      else
1889
0
        gstring = g_string_append_c (gstring, ch);
1890
0
    }
1891
0
      }
1892
0
    ch = 0;
1893
0
    break;
1894
    
1895
0
  case '.':
1896
0
    if (!config->scan_float)
1897
0
      goto default_case;
1898
0
    token = G_TOKEN_FLOAT;
1899
0
    dotted_float = TRUE;
1900
0
    ch = g_scanner_get_char (scanner, line_p, position_p);
1901
0
    goto number_parsing;
1902
    
1903
0
  case '$':
1904
0
    if (!config->scan_hex_dollar)
1905
0
      goto default_case;
1906
0
    token = G_TOKEN_HEX;
1907
0
    ch = g_scanner_get_char (scanner, line_p, position_p);
1908
0
    goto number_parsing;
1909
    
1910
0
  case '0':
1911
0
    if (config->scan_octal)
1912
0
      token = G_TOKEN_OCTAL;
1913
0
    else
1914
0
      token = G_TOKEN_INT;
1915
0
    ch = g_scanner_peek_next_char (scanner);
1916
0
    if (config->scan_hex && (ch == 'x' || ch == 'X'))
1917
0
      {
1918
0
        token = G_TOKEN_HEX;
1919
0
        g_scanner_get_char (scanner, line_p, position_p);
1920
0
        ch = g_scanner_get_char (scanner, line_p, position_p);
1921
0
        if (ch == 0)
1922
0
    {
1923
0
      token = G_TOKEN_ERROR;
1924
0
      value.v_error = G_ERR_UNEXP_EOF;
1925
0
      (*position_p)++;
1926
0
      break;
1927
0
    }
1928
0
        if (g_scanner_char_2_num (ch, 16) < 0)
1929
0
    {
1930
0
      token = G_TOKEN_ERROR;
1931
0
      value.v_error = G_ERR_DIGIT_RADIX;
1932
0
      ch = 0;
1933
0
      break;
1934
0
    }
1935
0
      }
1936
0
    else if (config->scan_binary && (ch == 'b' || ch == 'B'))
1937
0
      {
1938
0
        token = G_TOKEN_BINARY;
1939
0
        g_scanner_get_char (scanner, line_p, position_p);
1940
0
        ch = g_scanner_get_char (scanner, line_p, position_p);
1941
0
        if (ch == 0)
1942
0
    {
1943
0
      token = G_TOKEN_ERROR;
1944
0
      value.v_error = G_ERR_UNEXP_EOF;
1945
0
      (*position_p)++;
1946
0
      break;
1947
0
    }
1948
0
        if (g_scanner_char_2_num (ch, 10) < 0)
1949
0
    {
1950
0
      token = G_TOKEN_ERROR;
1951
0
      value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1952
0
      ch = 0;
1953
0
      break;
1954
0
    }
1955
0
      }
1956
0
    else
1957
0
      ch = '0';
1958
0
          G_GNUC_FALLTHROUGH;
1959
0
  case '1':
1960
0
  case '2':
1961
0
  case '3':
1962
0
  case '4':
1963
0
  case '5':
1964
0
  case '6':
1965
0
  case '7':
1966
0
  case '8':
1967
0
  case '9':
1968
0
  number_parsing:
1969
0
  {
1970
0
          gboolean in_number = TRUE;
1971
0
    gchar *endptr;
1972
    
1973
0
    if (token == G_TOKEN_NONE)
1974
0
      token = G_TOKEN_INT;
1975
    
1976
0
    gstring = g_string_new (dotted_float ? "0." : "");
1977
0
    gstring = g_string_append_c (gstring, ch);
1978
    
1979
0
    do /* while (in_number) */
1980
0
      {
1981
0
        gboolean is_E;
1982
        
1983
0
        is_E = token == G_TOKEN_FLOAT && (ch == 'e' || ch == 'E');
1984
        
1985
0
        ch = g_scanner_peek_next_char (scanner);
1986
        
1987
0
        if (g_scanner_char_2_num (ch, 36) >= 0 ||
1988
0
      (config->scan_float && ch == '.') ||
1989
0
      (is_E && (ch == '+' || ch == '-')))
1990
0
    {
1991
0
      ch = g_scanner_get_char (scanner, line_p, position_p);
1992
      
1993
0
      switch (ch)
1994
0
        {
1995
0
        case '.':
1996
0
          if (token != G_TOKEN_INT && token != G_TOKEN_OCTAL)
1997
0
      {
1998
0
        value.v_error = token == G_TOKEN_FLOAT ? G_ERR_FLOAT_MALFORMED : G_ERR_FLOAT_RADIX;
1999
0
        token = G_TOKEN_ERROR;
2000
0
        in_number = FALSE;
2001
0
      }
2002
0
          else
2003
0
      {
2004
0
        token = G_TOKEN_FLOAT;
2005
0
        gstring = g_string_append_c (gstring, ch);
2006
0
      }
2007
0
          break;
2008
          
2009
0
        case '0':
2010
0
        case '1':
2011
0
        case '2':
2012
0
        case '3':
2013
0
        case '4':
2014
0
        case '5':
2015
0
        case '6':
2016
0
        case '7':
2017
0
        case '8':
2018
0
        case '9':
2019
0
          gstring = g_string_append_c (gstring, ch);
2020
0
          break;
2021
          
2022
0
        case '-':
2023
0
        case '+':
2024
0
          if (token != G_TOKEN_FLOAT)
2025
0
      {
2026
0
        token = G_TOKEN_ERROR;
2027
0
        value.v_error = G_ERR_NON_DIGIT_IN_CONST;
2028
0
        in_number = FALSE;
2029
0
      }
2030
0
          else
2031
0
      gstring = g_string_append_c (gstring, ch);
2032
0
          break;
2033
          
2034
0
        case 'e':
2035
0
        case 'E':
2036
0
          if ((token != G_TOKEN_HEX && !config->scan_float) ||
2037
0
        (token != G_TOKEN_HEX &&
2038
0
         token != G_TOKEN_OCTAL &&
2039
0
         token != G_TOKEN_FLOAT &&
2040
0
         token != G_TOKEN_INT))
2041
0
      {
2042
0
        token = G_TOKEN_ERROR;
2043
0
        value.v_error = G_ERR_NON_DIGIT_IN_CONST;
2044
0
        in_number = FALSE;
2045
0
      }
2046
0
          else
2047
0
      {
2048
0
        if (token != G_TOKEN_HEX)
2049
0
          token = G_TOKEN_FLOAT;
2050
0
        gstring = g_string_append_c (gstring, ch);
2051
0
      }
2052
0
          break;
2053
          
2054
0
        default:
2055
0
          if (token != G_TOKEN_HEX)
2056
0
      {
2057
0
        token = G_TOKEN_ERROR;
2058
0
        value.v_error = G_ERR_NON_DIGIT_IN_CONST;
2059
0
        in_number = FALSE;
2060
0
      }
2061
0
          else
2062
0
      gstring = g_string_append_c (gstring, ch);
2063
0
          break;
2064
0
        }
2065
0
    }
2066
0
        else
2067
0
    in_number = FALSE;
2068
0
      }
2069
0
    while (in_number);
2070
    
2071
0
    endptr = NULL;
2072
0
    if (token == G_TOKEN_FLOAT)
2073
0
      value.v_float = g_strtod (gstring->str, &endptr);
2074
0
    else
2075
0
      {
2076
0
        guint64 ui64 = 0;
2077
0
        switch (token)
2078
0
    {
2079
0
    case G_TOKEN_BINARY:
2080
0
      ui64 = g_ascii_strtoull (gstring->str, &endptr, 2);
2081
0
      break;
2082
0
    case G_TOKEN_OCTAL:
2083
0
      ui64 = g_ascii_strtoull (gstring->str, &endptr, 8);
2084
0
      break;
2085
0
    case G_TOKEN_INT:
2086
0
      ui64 = g_ascii_strtoull (gstring->str, &endptr, 10);
2087
0
      break;
2088
0
    case G_TOKEN_HEX:
2089
0
      ui64 = g_ascii_strtoull (gstring->str, &endptr, 16);
2090
0
      break;
2091
0
    default: ;
2092
0
    }
2093
0
        if (scanner->config->store_int64)
2094
0
    value.v_int64 = ui64;
2095
0
        else
2096
0
    value.v_int = ui64;
2097
0
      }
2098
0
    if (endptr && *endptr)
2099
0
      {
2100
0
        token = G_TOKEN_ERROR;
2101
0
        if (*endptr == 'e' || *endptr == 'E')
2102
0
    value.v_error = G_ERR_NON_DIGIT_IN_CONST;
2103
0
        else
2104
0
    value.v_error = G_ERR_DIGIT_RADIX;
2105
0
      }
2106
0
    g_string_free (gstring, TRUE);
2107
0
    gstring = NULL;
2108
0
    ch = 0;
2109
0
  } /* number_parsing:... */
2110
0
  break;
2111
  
2112
0
  default:
2113
0
  default_case:
2114
0
  {
2115
0
    if (config->cpair_comment_single &&
2116
0
        ch == config->cpair_comment_single[0])
2117
0
      {
2118
0
        token = G_TOKEN_COMMENT_SINGLE;
2119
0
        in_comment_single = TRUE;
2120
0
        gstring = g_string_new (NULL);
2121
0
        ch = g_scanner_get_char (scanner, line_p, position_p);
2122
0
        while (ch != 0)
2123
0
    {
2124
0
      if (ch == config->cpair_comment_single[1])
2125
0
        {
2126
0
          in_comment_single = FALSE;
2127
0
          ch = 0;
2128
0
          break;
2129
0
        }
2130
      
2131
0
      gstring = g_string_append_c (gstring, ch);
2132
0
      ch = g_scanner_get_char (scanner, line_p, position_p);
2133
0
    }
2134
        /* ignore a missing newline at EOF for single line comments */
2135
0
        if (in_comment_single &&
2136
0
      config->cpair_comment_single[1] == '\n')
2137
0
    in_comment_single = FALSE;
2138
0
      }
2139
0
    else if (config->scan_identifier && ch &&
2140
0
       strchr (config->cset_identifier_first, ch))
2141
0
      {
2142
0
      identifier_precedence:
2143
        
2144
0
        if (config->cset_identifier_nth && ch &&
2145
0
      strchr (config->cset_identifier_nth,
2146
0
        g_scanner_peek_next_char (scanner)))
2147
0
    {
2148
0
      token = G_TOKEN_IDENTIFIER;
2149
0
      gstring = g_string_new (NULL);
2150
0
      gstring = g_string_append_c (gstring, ch);
2151
0
      do
2152
0
        {
2153
0
          ch = g_scanner_get_char (scanner, line_p, position_p);
2154
0
          gstring = g_string_append_c (gstring, ch);
2155
0
          ch = g_scanner_peek_next_char (scanner);
2156
0
        }
2157
0
      while (ch && strchr (config->cset_identifier_nth, ch));
2158
0
      ch = 0;
2159
0
    }
2160
0
        else if (config->scan_identifier_1char)
2161
0
    {
2162
0
      token = G_TOKEN_IDENTIFIER;
2163
0
      value.v_identifier = g_new0 (gchar, 2);
2164
0
      value.v_identifier[0] = ch;
2165
0
      ch = 0;
2166
0
    }
2167
0
      }
2168
0
    if (ch)
2169
0
      {
2170
0
        if (config->char_2_token)
2171
0
    token = ch;
2172
0
        else
2173
0
    {
2174
0
      token = G_TOKEN_CHAR;
2175
0
      value.v_char = ch;
2176
0
    }
2177
0
        ch = 0;
2178
0
      }
2179
0
  } /* default_case:... */
2180
0
  break;
2181
0
  }
2182
0
      g_assert (ch == 0 && token != G_TOKEN_NONE); /* paranoid */
2183
0
    }
2184
0
  while (ch != 0);
2185
  
2186
0
  if (in_comment_multi || in_comment_single ||
2187
0
      in_string_sq || in_string_dq)
2188
0
    {
2189
0
      token = G_TOKEN_ERROR;
2190
0
      if (gstring)
2191
0
  {
2192
0
    g_string_free (gstring, TRUE);
2193
0
    gstring = NULL;
2194
0
  }
2195
0
      (*position_p)++;
2196
0
      if (in_comment_multi || in_comment_single)
2197
0
  value.v_error = G_ERR_UNEXP_EOF_IN_COMMENT;
2198
0
      else /* (in_string_sq || in_string_dq) */
2199
0
  value.v_error = G_ERR_UNEXP_EOF_IN_STRING;
2200
0
    }
2201
  
2202
0
  if (gstring)
2203
0
    {
2204
0
      value.v_string = g_string_free (gstring, FALSE);
2205
0
      gstring = NULL;
2206
0
    }
2207
  
2208
0
  if (token == G_TOKEN_IDENTIFIER)
2209
0
    {
2210
0
      if (config->scan_symbols)
2211
0
  {
2212
0
    GScannerKey *key;
2213
0
    guint scope_id;
2214
    
2215
0
    scope_id = scanner->scope_id;
2216
0
    key = g_scanner_lookup_internal (scanner, scope_id, value.v_identifier);
2217
0
    if (!key && scope_id && scanner->config->scope_0_fallback)
2218
0
      key = g_scanner_lookup_internal (scanner, 0, value.v_identifier);
2219
    
2220
0
    if (key)
2221
0
      {
2222
0
        g_free (value.v_identifier);
2223
0
        token = G_TOKEN_SYMBOL;
2224
0
        value.v_symbol = key->value;
2225
0
      }
2226
0
  }
2227
      
2228
0
      if (token == G_TOKEN_IDENTIFIER &&
2229
0
    config->scan_identifier_NULL &&
2230
0
    strlen (value.v_identifier) == 4)
2231
0
  {
2232
0
    gchar *null_upper = "NULL";
2233
0
    gchar *null_lower = "null";
2234
    
2235
0
    if (scanner->config->case_sensitive)
2236
0
      {
2237
0
        if (value.v_identifier[0] == null_upper[0] &&
2238
0
      value.v_identifier[1] == null_upper[1] &&
2239
0
      value.v_identifier[2] == null_upper[2] &&
2240
0
      value.v_identifier[3] == null_upper[3])
2241
0
    token = G_TOKEN_IDENTIFIER_NULL;
2242
0
      }
2243
0
    else
2244
0
      {
2245
0
        if ((value.v_identifier[0] == null_upper[0] ||
2246
0
       value.v_identifier[0] == null_lower[0]) &&
2247
0
      (value.v_identifier[1] == null_upper[1] ||
2248
0
       value.v_identifier[1] == null_lower[1]) &&
2249
0
      (value.v_identifier[2] == null_upper[2] ||
2250
0
       value.v_identifier[2] == null_lower[2]) &&
2251
0
      (value.v_identifier[3] == null_upper[3] ||
2252
0
       value.v_identifier[3] == null_lower[3]))
2253
0
    token = G_TOKEN_IDENTIFIER_NULL;
2254
0
      }
2255
0
  }
2256
0
    }
2257
  
2258
0
  *token_p = token;
2259
0
  *value_p = value;
2260
0
}