Coverage Report

Created: 2026-06-30 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libyaml/src/scanner.c
Line
Count
Source
1
2
/*
3
 * Introduction
4
 * ************
5
 *
6
 * The following notes assume that you are familiar with the YAML specification
7
 * (http://yaml.org/spec/cvs/current.html).  We mostly follow it, although in
8
 * some cases we are less restrictive that it requires.
9
 *
10
 * The process of transforming a YAML stream into a sequence of events is
11
 * divided on two steps: Scanning and Parsing.
12
 *
13
 * The Scanner transforms the input stream into a sequence of tokens, while the
14
 * parser transform the sequence of tokens produced by the Scanner into a
15
 * sequence of parsing events.
16
 *
17
 * The Scanner is rather clever and complicated. The Parser, on the contrary,
18
 * is a straightforward implementation of a recursive-descendant parser (or,
19
 * LL(1) parser, as it is usually called).
20
 *
21
 * Actually there are two issues of Scanning that might be called "clever", the
22
 * rest is quite straightforward.  The issues are "block collection start" and
23
 * "simple keys".  Both issues are explained below in details.
24
 *
25
 * Here the Scanning step is explained and implemented.  We start with the list
26
 * of all the tokens produced by the Scanner together with short descriptions.
27
 *
28
 * Now, tokens:
29
 *
30
 *      STREAM-START(encoding)          # The stream start.
31
 *      STREAM-END                      # The stream end.
32
 *      VERSION-DIRECTIVE(major,minor)  # The '%YAML' directive.
33
 *      TAG-DIRECTIVE(handle,prefix)    # The '%TAG' directive.
34
 *      DOCUMENT-START                  # '---'
35
 *      DOCUMENT-END                    # '...'
36
 *      BLOCK-SEQUENCE-START            # Indentation increase denoting a block
37
 *      BLOCK-MAPPING-START             # sequence or a block mapping.
38
 *      BLOCK-END                       # Indentation decrease.
39
 *      FLOW-SEQUENCE-START             # '['
40
 *      FLOW-SEQUENCE-END               # ']'
41
 *      FLOW-MAPPING-START              # '{'
42
 *      FLOW-MAPPING-END                # '}'
43
 *      BLOCK-ENTRY                     # '-'
44
 *      FLOW-ENTRY                      # ','
45
 *      KEY                             # '?' or nothing (simple keys).
46
 *      VALUE                           # ':'
47
 *      ALIAS(anchor)                   # '*anchor'
48
 *      ANCHOR(anchor)                  # '&anchor'
49
 *      TAG(handle,suffix)              # '!handle!suffix'
50
 *      SCALAR(value,style)             # A scalar.
51
 *
52
 * The following two tokens are "virtual" tokens denoting the beginning and the
53
 * end of the stream:
54
 *
55
 *      STREAM-START(encoding)
56
 *      STREAM-END
57
 *
58
 * We pass the information about the input stream encoding with the
59
 * STREAM-START token.
60
 *
61
 * The next two tokens are responsible for tags:
62
 *
63
 *      VERSION-DIRECTIVE(major,minor)
64
 *      TAG-DIRECTIVE(handle,prefix)
65
 *
66
 * Example:
67
 *
68
 *      %YAML   1.1
69
 *      %TAG    !   !foo
70
 *      %TAG    !yaml!  tag:yaml.org,2002:
71
 *      ---
72
 *
73
 * The corresponding sequence of tokens:
74
 *
75
 *      STREAM-START(utf-8)
76
 *      VERSION-DIRECTIVE(1,1)
77
 *      TAG-DIRECTIVE("!","!foo")
78
 *      TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
79
 *      DOCUMENT-START
80
 *      STREAM-END
81
 *
82
 * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
83
 * line.
84
 *
85
 * The document start and end indicators are represented by:
86
 *
87
 *      DOCUMENT-START
88
 *      DOCUMENT-END
89
 *
90
 * Note that if a YAML stream contains an implicit document (without '---'
91
 * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
92
 * produced.
93
 *
94
 * In the following examples, we present whole documents together with the
95
 * produced tokens.
96
 *
97
 *      1. An implicit document:
98
 *
99
 *          'a scalar'
100
 *
101
 *      Tokens:
102
 *
103
 *          STREAM-START(utf-8)
104
 *          SCALAR("a scalar",single-quoted)
105
 *          STREAM-END
106
 *
107
 *      2. An explicit document:
108
 *
109
 *          ---
110
 *          'a scalar'
111
 *          ...
112
 *
113
 *      Tokens:
114
 *
115
 *          STREAM-START(utf-8)
116
 *          DOCUMENT-START
117
 *          SCALAR("a scalar",single-quoted)
118
 *          DOCUMENT-END
119
 *          STREAM-END
120
 *
121
 *      3. Several documents in a stream:
122
 *
123
 *          'a scalar'
124
 *          ---
125
 *          'another scalar'
126
 *          ---
127
 *          'yet another scalar'
128
 *
129
 *      Tokens:
130
 *
131
 *          STREAM-START(utf-8)
132
 *          SCALAR("a scalar",single-quoted)
133
 *          DOCUMENT-START
134
 *          SCALAR("another scalar",single-quoted)
135
 *          DOCUMENT-START
136
 *          SCALAR("yet another scalar",single-quoted)
137
 *          STREAM-END
138
 *
139
 * We have already introduced the SCALAR token above.  The following tokens are
140
 * used to describe aliases, anchors, tag, and scalars:
141
 *
142
 *      ALIAS(anchor)
143
 *      ANCHOR(anchor)
144
 *      TAG(handle,suffix)
145
 *      SCALAR(value,style)
146
 *
147
 * The following series of examples illustrate the usage of these tokens:
148
 *
149
 *      1. A recursive sequence:
150
 *
151
 *          &A [ *A ]
152
 *
153
 *      Tokens:
154
 *
155
 *          STREAM-START(utf-8)
156
 *          ANCHOR("A")
157
 *          FLOW-SEQUENCE-START
158
 *          ALIAS("A")
159
 *          FLOW-SEQUENCE-END
160
 *          STREAM-END
161
 *
162
 *      2. A tagged scalar:
163
 *
164
 *          !!float "3.14"  # A good approximation.
165
 *
166
 *      Tokens:
167
 *
168
 *          STREAM-START(utf-8)
169
 *          TAG("!!","float")
170
 *          SCALAR("3.14",double-quoted)
171
 *          STREAM-END
172
 *
173
 *      3. Various scalar styles:
174
 *
175
 *          --- # Implicit empty plain scalars do not produce tokens.
176
 *          --- a plain scalar
177
 *          --- 'a single-quoted scalar'
178
 *          --- "a double-quoted scalar"
179
 *          --- |-
180
 *            a literal scalar
181
 *          --- >-
182
 *            a folded
183
 *            scalar
184
 *
185
 *      Tokens:
186
 *
187
 *          STREAM-START(utf-8)
188
 *          DOCUMENT-START
189
 *          DOCUMENT-START
190
 *          SCALAR("a plain scalar",plain)
191
 *          DOCUMENT-START
192
 *          SCALAR("a single-quoted scalar",single-quoted)
193
 *          DOCUMENT-START
194
 *          SCALAR("a double-quoted scalar",double-quoted)
195
 *          DOCUMENT-START
196
 *          SCALAR("a literal scalar",literal)
197
 *          DOCUMENT-START
198
 *          SCALAR("a folded scalar",folded)
199
 *          STREAM-END
200
 *
201
 * Now it's time to review collection-related tokens. We will start with
202
 * flow collections:
203
 *
204
 *      FLOW-SEQUENCE-START
205
 *      FLOW-SEQUENCE-END
206
 *      FLOW-MAPPING-START
207
 *      FLOW-MAPPING-END
208
 *      FLOW-ENTRY
209
 *      KEY
210
 *      VALUE
211
 *
212
 * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
213
 * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
214
 * correspondingly.  FLOW-ENTRY represent the ',' indicator.  Finally the
215
 * indicators '?' and ':', which are used for denoting mapping keys and values,
216
 * are represented by the KEY and VALUE tokens.
217
 *
218
 * The following examples show flow collections:
219
 *
220
 *      1. A flow sequence:
221
 *
222
 *          [item 1, item 2, item 3]
223
 *
224
 *      Tokens:
225
 *
226
 *          STREAM-START(utf-8)
227
 *          FLOW-SEQUENCE-START
228
 *          SCALAR("item 1",plain)
229
 *          FLOW-ENTRY
230
 *          SCALAR("item 2",plain)
231
 *          FLOW-ENTRY
232
 *          SCALAR("item 3",plain)
233
 *          FLOW-SEQUENCE-END
234
 *          STREAM-END
235
 *
236
 *      2. A flow mapping:
237
 *
238
 *          {
239
 *              a simple key: a value,  # Note that the KEY token is produced.
240
 *              ? a complex key: another value,
241
 *          }
242
 *
243
 *      Tokens:
244
 *
245
 *          STREAM-START(utf-8)
246
 *          FLOW-MAPPING-START
247
 *          KEY
248
 *          SCALAR("a simple key",plain)
249
 *          VALUE
250
 *          SCALAR("a value",plain)
251
 *          FLOW-ENTRY
252
 *          KEY
253
 *          SCALAR("a complex key",plain)
254
 *          VALUE
255
 *          SCALAR("another value",plain)
256
 *          FLOW-ENTRY
257
 *          FLOW-MAPPING-END
258
 *          STREAM-END
259
 *
260
 * A simple key is a key which is not denoted by the '?' indicator.  Note that
261
 * the Scanner still produce the KEY token whenever it encounters a simple key.
262
 *
263
 * For scanning block collections, the following tokens are used (note that we
264
 * repeat KEY and VALUE here):
265
 *
266
 *      BLOCK-SEQUENCE-START
267
 *      BLOCK-MAPPING-START
268
 *      BLOCK-END
269
 *      BLOCK-ENTRY
270
 *      KEY
271
 *      VALUE
272
 *
273
 * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
274
 * increase that precedes a block collection (cf. the INDENT token in Python).
275
 * The token BLOCK-END denote indentation decrease that ends a block collection
276
 * (cf. the DEDENT token in Python).  However YAML has some syntax peculiarities
277
 * that makes detections of these tokens more complex.
278
 *
279
 * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
280
 * '-', '?', and ':' correspondingly.
281
 *
282
 * The following examples show how the tokens BLOCK-SEQUENCE-START,
283
 * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
284
 *
285
 *      1. Block sequences:
286
 *
287
 *          - item 1
288
 *          - item 2
289
 *          -
290
 *            - item 3.1
291
 *            - item 3.2
292
 *          -
293
 *            key 1: value 1
294
 *            key 2: value 2
295
 *
296
 *      Tokens:
297
 *
298
 *          STREAM-START(utf-8)
299
 *          BLOCK-SEQUENCE-START
300
 *          BLOCK-ENTRY
301
 *          SCALAR("item 1",plain)
302
 *          BLOCK-ENTRY
303
 *          SCALAR("item 2",plain)
304
 *          BLOCK-ENTRY
305
 *          BLOCK-SEQUENCE-START
306
 *          BLOCK-ENTRY
307
 *          SCALAR("item 3.1",plain)
308
 *          BLOCK-ENTRY
309
 *          SCALAR("item 3.2",plain)
310
 *          BLOCK-END
311
 *          BLOCK-ENTRY
312
 *          BLOCK-MAPPING-START
313
 *          KEY
314
 *          SCALAR("key 1",plain)
315
 *          VALUE
316
 *          SCALAR("value 1",plain)
317
 *          KEY
318
 *          SCALAR("key 2",plain)
319
 *          VALUE
320
 *          SCALAR("value 2",plain)
321
 *          BLOCK-END
322
 *          BLOCK-END
323
 *          STREAM-END
324
 *
325
 *      2. Block mappings:
326
 *
327
 *          a simple key: a value   # The KEY token is produced here.
328
 *          ? a complex key
329
 *          : another value
330
 *          a mapping:
331
 *            key 1: value 1
332
 *            key 2: value 2
333
 *          a sequence:
334
 *            - item 1
335
 *            - item 2
336
 *
337
 *      Tokens:
338
 *
339
 *          STREAM-START(utf-8)
340
 *          BLOCK-MAPPING-START
341
 *          KEY
342
 *          SCALAR("a simple key",plain)
343
 *          VALUE
344
 *          SCALAR("a value",plain)
345
 *          KEY
346
 *          SCALAR("a complex key",plain)
347
 *          VALUE
348
 *          SCALAR("another value",plain)
349
 *          KEY
350
 *          SCALAR("a mapping",plain)
351
 *          VALUE
352
 *          BLOCK-MAPPING-START
353
 *          KEY
354
 *          SCALAR("key 1",plain)
355
 *          VALUE
356
 *          SCALAR("value 1",plain)
357
 *          KEY
358
 *          SCALAR("key 2",plain)
359
 *          VALUE
360
 *          SCALAR("value 2",plain)
361
 *          BLOCK-END
362
 *          KEY
363
 *          SCALAR("a sequence",plain)
364
 *          VALUE
365
 *          BLOCK-SEQUENCE-START
366
 *          BLOCK-ENTRY
367
 *          SCALAR("item 1",plain)
368
 *          BLOCK-ENTRY
369
 *          SCALAR("item 2",plain)
370
 *          BLOCK-END
371
 *          BLOCK-END
372
 *          STREAM-END
373
 *
374
 * YAML does not always require to start a new block collection from a new
375
 * line.  If the current line contains only '-', '?', and ':' indicators, a new
376
 * block collection may start at the current line.  The following examples
377
 * illustrate this case:
378
 *
379
 *      1. Collections in a sequence:
380
 *
381
 *          - - item 1
382
 *            - item 2
383
 *          - key 1: value 1
384
 *            key 2: value 2
385
 *          - ? complex key
386
 *            : complex value
387
 *
388
 *      Tokens:
389
 *
390
 *          STREAM-START(utf-8)
391
 *          BLOCK-SEQUENCE-START
392
 *          BLOCK-ENTRY
393
 *          BLOCK-SEQUENCE-START
394
 *          BLOCK-ENTRY
395
 *          SCALAR("item 1",plain)
396
 *          BLOCK-ENTRY
397
 *          SCALAR("item 2",plain)
398
 *          BLOCK-END
399
 *          BLOCK-ENTRY
400
 *          BLOCK-MAPPING-START
401
 *          KEY
402
 *          SCALAR("key 1",plain)
403
 *          VALUE
404
 *          SCALAR("value 1",plain)
405
 *          KEY
406
 *          SCALAR("key 2",plain)
407
 *          VALUE
408
 *          SCALAR("value 2",plain)
409
 *          BLOCK-END
410
 *          BLOCK-ENTRY
411
 *          BLOCK-MAPPING-START
412
 *          KEY
413
 *          SCALAR("complex key")
414
 *          VALUE
415
 *          SCALAR("complex value")
416
 *          BLOCK-END
417
 *          BLOCK-END
418
 *          STREAM-END
419
 *
420
 *      2. Collections in a mapping:
421
 *
422
 *          ? a sequence
423
 *          : - item 1
424
 *            - item 2
425
 *          ? a mapping
426
 *          : key 1: value 1
427
 *            key 2: value 2
428
 *
429
 *      Tokens:
430
 *
431
 *          STREAM-START(utf-8)
432
 *          BLOCK-MAPPING-START
433
 *          KEY
434
 *          SCALAR("a sequence",plain)
435
 *          VALUE
436
 *          BLOCK-SEQUENCE-START
437
 *          BLOCK-ENTRY
438
 *          SCALAR("item 1",plain)
439
 *          BLOCK-ENTRY
440
 *          SCALAR("item 2",plain)
441
 *          BLOCK-END
442
 *          KEY
443
 *          SCALAR("a mapping",plain)
444
 *          VALUE
445
 *          BLOCK-MAPPING-START
446
 *          KEY
447
 *          SCALAR("key 1",plain)
448
 *          VALUE
449
 *          SCALAR("value 1",plain)
450
 *          KEY
451
 *          SCALAR("key 2",plain)
452
 *          VALUE
453
 *          SCALAR("value 2",plain)
454
 *          BLOCK-END
455
 *          BLOCK-END
456
 *          STREAM-END
457
 *
458
 * YAML also permits non-indented sequences if they are included into a block
459
 * mapping.  In this case, the token BLOCK-SEQUENCE-START is not produced:
460
 *
461
 *      key:
462
 *      - item 1    # BLOCK-SEQUENCE-START is NOT produced here.
463
 *      - item 2
464
 *
465
 * Tokens:
466
 *
467
 *      STREAM-START(utf-8)
468
 *      BLOCK-MAPPING-START
469
 *      KEY
470
 *      SCALAR("key",plain)
471
 *      VALUE
472
 *      BLOCK-ENTRY
473
 *      SCALAR("item 1",plain)
474
 *      BLOCK-ENTRY
475
 *      SCALAR("item 2",plain)
476
 *      BLOCK-END
477
 */
478
479
#include "yaml_private.h"
480
481
/*
482
 * Maximum nesting level (defined in parser.c).
483
 */
484
485
extern int MAX_NESTING_LEVEL;
486
487
/*
488
 * Ensure that the buffer contains the required number of characters.
489
 * Return 1 on success, 0 on failure (reader error or memory error).
490
 */
491
492
#define CACHE(parser,length)                                                    \
493
328M
    (parser->unread >= (length)                                                 \
494
328M
        ? 1                                                                     \
495
328M
        : yaml_parser_update_buffer(parser, (length)))
496
497
/*
498
 * Advance the buffer pointer.
499
 */
500
501
#define SKIP(parser)                                                            \
502
132M
     (parser->mark.index ++,                                                    \
503
132M
      parser->mark.column ++,                                                   \
504
132M
      parser->unread --,                                                        \
505
132M
      parser->buffer.pointer += WIDTH(parser->buffer))
506
507
#define SKIP_LINE(parser)                                                       \
508
8.41M
     (IS_CRLF(parser->buffer) ?                                                 \
509
8.41M
      (parser->mark.index += 2,                                                 \
510
1.71M
       parser->mark.column = 0,                                                 \
511
1.71M
       parser->mark.line ++,                                                    \
512
1.71M
       parser->unread -= 2,                                                     \
513
1.71M
       parser->buffer.pointer += 2) :                                           \
514
8.41M
      IS_BREAK(parser->buffer) ?                                                \
515
6.70M
      (parser->mark.index ++,                                                   \
516
6.70M
       parser->mark.column = 0,                                                 \
517
6.70M
       parser->mark.line ++,                                                    \
518
6.70M
       parser->unread --,                                                       \
519
6.70M
       parser->buffer.pointer += WIDTH(parser->buffer)) : 0)
520
521
/*
522
 * Copy a character to a string buffer and advance pointers.
523
 */
524
525
#define READ(parser,string)                                                     \
526
104M
     (STRING_EXTEND(parser,string) ?                                            \
527
104M
         (COPY(string,parser->buffer),                                          \
528
104M
          parser->mark.index ++,                                                \
529
104M
          parser->mark.column ++,                                               \
530
104M
          parser->unread --,                                                    \
531
104M
          1) : 0)
532
533
/*
534
 * Copy a line break character to a string buffer and advance pointers.
535
 */
536
537
#define READ_LINE(parser,string)                                                \
538
12.1M
    (STRING_EXTEND(parser,string) ?                                             \
539
12.1M
    (((CHECK_AT(parser->buffer,'\r',0)                                          \
540
12.1M
       && CHECK_AT(parser->buffer,'\n',1)) ?        /* CR LF -> LF */           \
541
12.1M
     (*((string).pointer++) = (yaml_char_t) '\n',                               \
542
707k
      parser->buffer.pointer += 2,                                              \
543
707k
      parser->mark.index += 2,                                                  \
544
707k
      parser->mark.column = 0,                                                  \
545
707k
      parser->mark.line ++,                                                     \
546
707k
      parser->unread -= 2) :                                                    \
547
12.1M
     (CHECK_AT(parser->buffer,'\r',0)                                           \
548
11.4M
      || CHECK_AT(parser->buffer,'\n',0)) ?         /* CR|LF -> LF */           \
549
11.4M
     (*((string).pointer++) = (yaml_char_t) '\n',                               \
550
9.90M
      parser->buffer.pointer ++,                                                \
551
9.90M
      parser->mark.index ++,                                                    \
552
9.90M
      parser->mark.column = 0,                                                  \
553
9.90M
      parser->mark.line ++,                                                     \
554
9.90M
      parser->unread --) :                                                      \
555
11.4M
     (CHECK_AT(parser->buffer,'\xC2',0)                                         \
556
1.54M
      && CHECK_AT(parser->buffer,'\x85',1)) ?       /* NEL -> LF */             \
557
1.54M
     (*((string).pointer++) = (yaml_char_t) '\n',                               \
558
28.3k
      parser->buffer.pointer += 2,                                              \
559
28.3k
      parser->mark.index ++,                                                    \
560
28.3k
      parser->mark.column = 0,                                                  \
561
28.3k
      parser->mark.line ++,                                                     \
562
28.3k
      parser->unread --) :                                                      \
563
1.54M
     (CHECK_AT(parser->buffer,'\xE2',0) &&                                      \
564
1.51M
      CHECK_AT(parser->buffer,'\x80',1) &&                                      \
565
1.51M
      (CHECK_AT(parser->buffer,'\xA8',2) ||                                     \
566
1.51M
       CHECK_AT(parser->buffer,'\xA9',2))) ?        /* LS|PS -> LS|PS */        \
567
1.51M
     (*((string).pointer++) = *(parser->buffer.pointer++),                      \
568
1.51M
      *((string).pointer++) = *(parser->buffer.pointer++),                      \
569
1.51M
      *((string).pointer++) = *(parser->buffer.pointer++),                      \
570
1.51M
      parser->mark.index ++,                                                    \
571
1.51M
      parser->mark.column = 0,                                                  \
572
1.51M
      parser->mark.line ++,                                                     \
573
1.51M
      parser->unread --) : 0),                                                  \
574
12.1M
    1) : 0)
575
576
/*
577
 * Public API declarations.
578
 */
579
580
YAML_DECLARE(int)
581
yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token);
582
583
/*
584
 * Error handling.
585
 */
586
587
static int
588
yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
589
        yaml_mark_t context_mark, const char *problem);
590
591
/*
592
 * High-level token API.
593
 */
594
595
YAML_DECLARE(int)
596
yaml_parser_fetch_more_tokens(yaml_parser_t *parser);
597
598
static int
599
yaml_parser_fetch_next_token(yaml_parser_t *parser);
600
601
/*
602
 * Potential simple keys.
603
 */
604
605
static int
606
yaml_parser_stale_simple_keys(yaml_parser_t *parser);
607
608
static int
609
yaml_parser_save_simple_key(yaml_parser_t *parser);
610
611
static int
612
yaml_parser_remove_simple_key(yaml_parser_t *parser);
613
614
static int
615
yaml_parser_increase_flow_level(yaml_parser_t *parser);
616
617
static int
618
yaml_parser_decrease_flow_level(yaml_parser_t *parser);
619
620
/*
621
 * Indentation treatment.
622
 */
623
624
static int
625
yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
626
        ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark);
627
628
static int
629
yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column);
630
631
/*
632
 * Token fetchers.
633
 */
634
635
static int
636
yaml_parser_fetch_stream_start(yaml_parser_t *parser);
637
638
static int
639
yaml_parser_fetch_stream_end(yaml_parser_t *parser);
640
641
static int
642
yaml_parser_fetch_directive(yaml_parser_t *parser);
643
644
static int
645
yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
646
        yaml_token_type_t type);
647
648
static int
649
yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
650
        yaml_token_type_t type);
651
652
static int
653
yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
654
        yaml_token_type_t type);
655
656
static int
657
yaml_parser_fetch_flow_entry(yaml_parser_t *parser);
658
659
static int
660
yaml_parser_fetch_block_entry(yaml_parser_t *parser);
661
662
static int
663
yaml_parser_fetch_key(yaml_parser_t *parser);
664
665
static int
666
yaml_parser_fetch_value(yaml_parser_t *parser);
667
668
static int
669
yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type);
670
671
static int
672
yaml_parser_fetch_tag(yaml_parser_t *parser);
673
674
static int
675
yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
676
677
static int
678
yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
679
680
static int
681
yaml_parser_fetch_plain_scalar(yaml_parser_t *parser);
682
683
/*
684
 * Token scanners.
685
 */
686
687
static int
688
yaml_parser_scan_to_next_token(yaml_parser_t *parser);
689
690
static int
691
yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token);
692
693
static int
694
yaml_parser_scan_directive_name(yaml_parser_t *parser,
695
        yaml_mark_t start_mark, yaml_char_t **name);
696
697
static int
698
yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
699
        yaml_mark_t start_mark, int *major, int *minor);
700
701
static int
702
yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
703
        yaml_mark_t start_mark, int *number);
704
705
static int
706
yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
707
        yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);
708
709
static int
710
yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
711
        yaml_token_type_t type);
712
713
static int
714
yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token);
715
716
static int
717
yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
718
        yaml_mark_t start_mark, yaml_char_t **handle);
719
720
static int
721
yaml_parser_scan_tag_uri(yaml_parser_t *parser, int uri_char, int directive,
722
        yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);
723
724
static int
725
yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
726
        yaml_mark_t start_mark, yaml_string_t *string);
727
728
static int
729
yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
730
        int literal);
731
732
static int
733
yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
734
        int *indent, yaml_string_t *breaks,
735
        yaml_mark_t start_mark, yaml_mark_t *end_mark);
736
737
static int
738
yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
739
        int single);
740
741
static int
742
yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token);
743
744
/*
745
 * Get the next token.
746
 */
747
748
YAML_DECLARE(int)
749
yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token)
750
0
{
751
0
    assert(parser); /* Non-NULL parser object is expected. */
752
0
    assert(token);  /* Non-NULL token object is expected. */
753
754
    /* Erase the token object. */
755
756
0
    memset(token, 0, sizeof(yaml_token_t));
757
758
    /* No tokens after STREAM-END or error. */
759
760
0
    if (parser->stream_end_produced || parser->error) {
761
0
        return 1;
762
0
    }
763
764
    /* Ensure that the tokens queue contains enough tokens. */
765
766
0
    if (!parser->token_available) {
767
0
        if (!yaml_parser_fetch_more_tokens(parser))
768
0
            return 0;
769
0
    }
770
771
    /* Fetch the next token from the queue. */
772
773
0
    *token = DEQUEUE(parser, parser->tokens);
774
0
    parser->token_available = 0;
775
0
    parser->tokens_parsed ++;
776
777
0
    if (token->type == YAML_STREAM_END_TOKEN) {
778
0
        parser->stream_end_produced = 1;
779
0
    }
780
781
0
    return 1;
782
0
}
783
784
/*
785
 * Set the scanner error and return 0.
786
 */
787
788
static int
789
yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
790
        yaml_mark_t context_mark, const char *problem)
791
4.64k
{
792
4.64k
    parser->error = YAML_SCANNER_ERROR;
793
4.64k
    parser->context = context;
794
4.64k
    parser->context_mark = context_mark;
795
4.64k
    parser->problem = problem;
796
4.64k
    parser->problem_mark = parser->mark;
797
798
4.64k
    return 0;
799
4.64k
}
800
801
/*
802
 * Ensure that the tokens queue contains at least one token which can be
803
 * returned to the Parser.
804
 */
805
806
YAML_DECLARE(int)
807
yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
808
19.2M
{
809
19.2M
    int need_more_tokens;
810
811
    /* While we need more tokens to fetch, do it. */
812
813
29.7M
    while (1)
814
29.7M
    {
815
        /*
816
         * Check if we really need to fetch more tokens.
817
         */
818
819
29.7M
        need_more_tokens = 0;
820
821
29.7M
        if (parser->tokens.head == parser->tokens.tail)
822
8.57M
        {
823
            /* Queue is empty. */
824
825
8.57M
            need_more_tokens = 1;
826
8.57M
        }
827
21.2M
        else
828
21.2M
        {
829
21.2M
            yaml_simple_key_t *simple_key;
830
831
            /* Check if any potential simple key may occupy the head position. */
832
833
21.2M
            if (!yaml_parser_stale_simple_keys(parser))
834
50
                return 0;
835
836
21.2M
            for (simple_key = parser->simple_keys.start;
837
51.0M
                    simple_key != parser->simple_keys.top; simple_key++) {
838
31.7M
                if (simple_key->possible
839
2.74M
                        && simple_key->token_number == parser->tokens_parsed) {
840
1.99M
                    need_more_tokens = 1;
841
1.99M
                    break;
842
1.99M
                }
843
31.7M
            }
844
21.2M
        }
845
846
        /* We are finished. */
847
848
29.7M
        if (!need_more_tokens)
849
19.2M
            break;
850
851
        /* Fetch the next token. */
852
853
10.5M
        if (!yaml_parser_fetch_next_token(parser))
854
5.21k
            return 0;
855
10.5M
    }
856
857
19.2M
    parser->token_available = 1;
858
859
19.2M
    return 1;
860
19.2M
}
861
862
/*
863
 * The dispatcher for token fetchers.
864
 */
865
866
static int
867
yaml_parser_fetch_next_token(yaml_parser_t *parser)
868
10.5M
{
869
    /* Ensure that the buffer is initialized. */
870
871
10.5M
    if (!CACHE(parser, 1))
872
335
        return 0;
873
874
    /* Check if we just started scanning.  Fetch STREAM-START then. */
875
876
10.5M
    if (!parser->stream_start_produced)
877
17.2k
        return yaml_parser_fetch_stream_start(parser);
878
879
    /* Eat whitespaces and comments until we reach the next token. */
880
881
10.5M
    if (!yaml_parser_scan_to_next_token(parser))
882
28
        return 0;
883
884
    /* Remove obsolete potential simple keys. */
885
886
10.5M
    if (!yaml_parser_stale_simple_keys(parser))
887
8
        return 0;
888
889
    /* Check the indentation level against the current column. */
890
891
10.5M
    if (!yaml_parser_unroll_indent(parser, parser->mark.column))
892
0
        return 0;
893
894
    /*
895
     * Ensure that the buffer contains at least 4 characters.  4 is the length
896
     * of the longest indicators ('--- ' and '... ').
897
     */
898
899
10.5M
    if (!CACHE(parser, 4))
900
24
        return 0;
901
902
    /* Is it the end of the stream? */
903
904
10.5M
    if (IS_Z(parser->buffer))
905
8.42k
        return yaml_parser_fetch_stream_end(parser);
906
907
    /* Is it a directive? */
908
909
10.5M
    if (parser->mark.column == 0 && CHECK(parser->buffer, '%'))
910
588k
        return yaml_parser_fetch_directive(parser);
911
912
    /* Is it the document start indicator? */
913
914
9.94M
    if (parser->mark.column == 0
915
2.91M
            && CHECK_AT(parser->buffer, '-', 0)
916
2.01M
            && CHECK_AT(parser->buffer, '-', 1)
917
543k
            && CHECK_AT(parser->buffer, '-', 2)
918
539k
            && IS_BLANKZ_AT(parser->buffer, 3))
919
537k
        return yaml_parser_fetch_document_indicator(parser,
920
537k
                YAML_DOCUMENT_START_TOKEN);
921
922
    /* Is it the document end indicator? */
923
924
9.41M
    if (parser->mark.column == 0
925
2.38M
            && CHECK_AT(parser->buffer, '.', 0)
926
25.4k
            && CHECK_AT(parser->buffer, '.', 1)
927
23.4k
            && CHECK_AT(parser->buffer, '.', 2)
928
22.4k
            && IS_BLANKZ_AT(parser->buffer, 3))
929
17.0k
        return yaml_parser_fetch_document_indicator(parser,
930
17.0k
                YAML_DOCUMENT_END_TOKEN);
931
932
    /* Is it the flow sequence start indicator? */
933
934
9.39M
    if (CHECK(parser->buffer, '['))
935
41.0k
        return yaml_parser_fetch_flow_collection_start(parser,
936
41.0k
                YAML_FLOW_SEQUENCE_START_TOKEN);
937
938
    /* Is it the flow mapping start indicator? */
939
940
9.35M
    if (CHECK(parser->buffer, '{'))
941
37.7k
        return yaml_parser_fetch_flow_collection_start(parser,
942
37.7k
                YAML_FLOW_MAPPING_START_TOKEN);
943
944
    /* Is it the flow sequence end indicator? */
945
946
9.31M
    if (CHECK(parser->buffer, ']'))
947
7.46k
        return yaml_parser_fetch_flow_collection_end(parser,
948
7.46k
                YAML_FLOW_SEQUENCE_END_TOKEN);
949
950
    /* Is it the flow mapping end indicator? */
951
952
9.30M
    if (CHECK(parser->buffer, '}'))
953
6.35k
        return yaml_parser_fetch_flow_collection_end(parser,
954
6.35k
                YAML_FLOW_MAPPING_END_TOKEN);
955
956
    /* Is it the flow entry indicator? */
957
958
9.30M
    if (CHECK(parser->buffer, ','))
959
816k
        return yaml_parser_fetch_flow_entry(parser);
960
961
    /* Is it the block entry indicator? */
962
963
8.48M
    if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1))
964
3.63M
        return yaml_parser_fetch_block_entry(parser);
965
966
    /* Is it the key indicator? */
967
968
4.85M
    if (CHECK(parser->buffer, '?')
969
917k
            && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
970
910k
        return yaml_parser_fetch_key(parser);
971
972
    /* Is it the value indicator? */
973
974
3.94M
    if (CHECK(parser->buffer, ':')
975
1.65M
            && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
976
1.54M
        return yaml_parser_fetch_value(parser);
977
978
    /* Is it an alias? */
979
980
2.39M
    if (CHECK(parser->buffer, '*'))
981
4.87k
        return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN);
982
983
    /* Is it an anchor? */
984
985
2.38M
    if (CHECK(parser->buffer, '&'))
986
14.7k
        return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN);
987
988
    /* Is it a tag? */
989
990
2.37M
    if (CHECK(parser->buffer, '!'))
991
246k
        return yaml_parser_fetch_tag(parser);
992
993
    /* Is it a literal scalar? */
994
995
2.12M
    if (CHECK(parser->buffer, '|') && !parser->flow_level)
996
1.85k
        return yaml_parser_fetch_block_scalar(parser, 1);
997
998
    /* Is it a folded scalar? */
999
1000
2.12M
    if (CHECK(parser->buffer, '>') && !parser->flow_level)
1001
246k
        return yaml_parser_fetch_block_scalar(parser, 0);
1002
1003
    /* Is it a single-quoted scalar? */
1004
1005
1.87M
    if (CHECK(parser->buffer, '\''))
1006
1.85k
        return yaml_parser_fetch_flow_scalar(parser, 1);
1007
1008
    /* Is it a double-quoted scalar? */
1009
1010
1.87M
    if (CHECK(parser->buffer, '"'))
1011
12.2k
        return yaml_parser_fetch_flow_scalar(parser, 0);
1012
1013
    /*
1014
     * Is it a plain scalar?
1015
     *
1016
     * A plain scalar may start with any non-blank characters except
1017
     *
1018
     *      '-', '?', ':', ',', '[', ']', '{', '}',
1019
     *      '#', '&', '*', '!', '|', '>', '\'', '\"',
1020
     *      '%', '@', '`'.
1021
     *
1022
     * In the block context (and, for the '-' indicator, in the flow context
1023
     * too), it may also start with the characters
1024
     *
1025
     *      '-', '?', ':'
1026
     *
1027
     * if it is followed by a non-space character.
1028
     *
1029
     * The last rule is more restrictive than the specification requires.
1030
     */
1031
1032
1.86M
    if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-')
1033
1.70M
                || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')
1034
1.58M
                || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[')
1035
1.58M
                || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
1036
1.58M
                || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#')
1037
1.58M
                || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*')
1038
1.58M
                || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|')
1039
1.58M
                || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'')
1040
1.58M
                || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%')
1041
1.58M
                || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) ||
1042
284k
            (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) ||
1043
119k
            (!parser->flow_level &&
1044
119k
             (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':'))
1045
119k
             && !IS_BLANKZ_AT(parser->buffer, 1)))
1046
1.86M
        return yaml_parser_fetch_plain_scalar(parser);
1047
1048
    /*
1049
     * If we don't determine the token type so far, it is an error.
1050
     */
1051
1052
90
    return yaml_parser_set_scanner_error(parser,
1053
90
            "while scanning for the next token", parser->mark,
1054
90
            "found character that cannot start any token");
1055
1.86M
}
1056
1057
/*
1058
 * Check the list of potential simple keys and remove the positions that
1059
 * cannot contain simple keys anymore.
1060
 */
1061
1062
static int
1063
yaml_parser_stale_simple_keys(yaml_parser_t *parser)
1064
31.7M
{
1065
31.7M
    yaml_simple_key_t *simple_key;
1066
1067
    /* Check for a potential simple key for each flow level. */
1068
1069
31.7M
    for (simple_key = parser->simple_keys.start;
1070
103M
            simple_key != parser->simple_keys.top; simple_key ++)
1071
71.9M
    {
1072
        /*
1073
         * The specification requires that a simple key
1074
         *
1075
         *  - is limited to a single line,
1076
         *  - is shorter than 1024 characters.
1077
         */
1078
1079
71.9M
        if (simple_key->possible
1080
28.0M
                && (simple_key->mark.line < parser->mark.line
1081
27.8M
                    || simple_key->mark.index+1024 < parser->mark.index)) {
1082
1083
            /* Check if the potential simple key to be removed is required. */
1084
1085
192k
            if (simple_key->required) {
1086
58
                return yaml_parser_set_scanner_error(parser,
1087
58
                        "while scanning a simple key", simple_key->mark,
1088
58
                        "could not find expected ':'");
1089
58
            }
1090
1091
192k
            simple_key->possible = 0;
1092
192k
        }
1093
71.9M
    }
1094
1095
31.7M
    return 1;
1096
31.7M
}
1097
1098
/*
1099
 * Check if a simple key may start at the current position and add it if
1100
 * needed.
1101
 */
1102
1103
static int
1104
yaml_parser_save_simple_key(yaml_parser_t *parser)
1105
2.22M
{
1106
    /*
1107
     * A simple key is required at the current position if the scanner is in
1108
     * the block context and the current column coincides with the indentation
1109
     * level.
1110
     */
1111
1112
2.22M
    int required = (!parser->flow_level
1113
1.58M
            && parser->indent == (ptrdiff_t)parser->mark.column);
1114
1115
    /*
1116
     * If the current position may start a simple key, save it.
1117
     */
1118
1119
2.22M
    if (parser->simple_key_allowed)
1120
2.01M
    {
1121
2.01M
        yaml_simple_key_t simple_key;
1122
2.01M
        simple_key.possible = 1;
1123
2.01M
        simple_key.required = required;
1124
2.01M
        simple_key.token_number =
1125
2.01M
            parser->tokens_parsed + (parser->tokens.tail - parser->tokens.head);
1126
2.01M
        simple_key.mark = parser->mark;
1127
1128
2.01M
        if (!yaml_parser_remove_simple_key(parser)) return 0;
1129
1130
2.01M
        *(parser->simple_keys.top-1) = simple_key;
1131
2.01M
    }
1132
1133
2.22M
    return 1;
1134
2.22M
}
1135
1136
/*
1137
 * Remove a potential simple key at the current flow level.
1138
 */
1139
1140
static int
1141
yaml_parser_remove_simple_key(yaml_parser_t *parser)
1142
8.78M
{
1143
8.78M
    yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1144
1145
8.78M
    if (simple_key->possible)
1146
382k
    {
1147
        /* If the key is required, it is an error. */
1148
1149
382k
        if (simple_key->required) {
1150
230
            return yaml_parser_set_scanner_error(parser,
1151
230
                    "while scanning a simple key", simple_key->mark,
1152
230
                    "could not find expected ':'");
1153
230
        }
1154
382k
    }
1155
1156
    /* Remove the key from the stack. */
1157
1158
8.78M
    simple_key->possible = 0;
1159
1160
8.78M
    return 1;
1161
8.78M
}
1162
1163
/*
1164
 * Increase the flow level and resize the simple key list if needed.
1165
 */
1166
1167
static int
1168
yaml_parser_increase_flow_level(yaml_parser_t *parser)
1169
78.8k
{
1170
78.8k
    yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } };
1171
1172
    /* Reset the simple key on the next level. */
1173
1174
78.8k
    if (!PUSH(parser, parser->simple_keys, empty_simple_key))
1175
0
        return 0;
1176
1177
    /* Increase the flow level. */
1178
1179
78.8k
    if (parser->flow_level == INT_MAX) {
1180
0
        parser->error = YAML_MEMORY_ERROR;
1181
0
        return 0;
1182
0
    }
1183
1184
78.8k
    if (!STACK_LIMIT(parser, parser->indents, MAX_NESTING_LEVEL - parser->flow_level)) {
1185
2
        return yaml_parser_set_scanner_error(parser,
1186
2
                "while increasing flow level", parser->mark,
1187
2
                "exceeded maximum nesting depth");
1188
2
    }
1189
1190
78.8k
    parser->flow_level++;
1191
1192
78.8k
    return 1;
1193
78.8k
}
1194
1195
/*
1196
 * Decrease the flow level.
1197
 */
1198
1199
static int
1200
yaml_parser_decrease_flow_level(yaml_parser_t *parser)
1201
13.8k
{
1202
13.8k
    if (parser->flow_level) {
1203
13.7k
        parser->flow_level --;
1204
13.7k
        (void)POP(parser, parser->simple_keys);
1205
13.7k
    }
1206
1207
13.8k
    return 1;
1208
13.8k
}
1209
1210
/*
1211
 * Push the current indentation level to the stack and set the new level
1212
 * the current column is greater than the indentation level.  In this case,
1213
 * append or insert the specified token into the token queue.
1214
 *
1215
 */
1216
1217
static int
1218
yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
1219
        ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark)
1220
5.64M
{
1221
5.64M
    yaml_token_t token;
1222
1223
    /* In the flow context, do nothing. */
1224
1225
5.64M
    if (parser->flow_level)
1226
8.15k
        return 1;
1227
1228
5.63M
    if (parser->indent < column)
1229
3.67M
    {
1230
        /*
1231
         * Push the current indentation level to the stack and set the new
1232
         * indentation level.
1233
         */
1234
1235
3.67M
        if (!PUSH(parser, parser->indents, parser->indent))
1236
0
            return 0;
1237
1238
3.67M
        if (!STACK_LIMIT(parser, parser->indents, MAX_NESTING_LEVEL - parser->flow_level)) {
1239
0
            return yaml_parser_set_scanner_error(parser,
1240
0
                    "while increasing block level", parser->mark,
1241
0
                    "exceeded maximum nesting depth");
1242
0
        }
1243
1244
3.67M
        if (column > INT_MAX) {
1245
0
            parser->error = YAML_MEMORY_ERROR;
1246
0
            return 0;
1247
0
        }
1248
1249
3.67M
        parser->indent = column;
1250
1251
        /* Create a token and insert it into the queue. */
1252
1253
3.67M
        TOKEN_INIT(token, type, mark, mark);
1254
1255
3.67M
        if (number == -1) {
1256
2.82M
            if (!ENQUEUE(parser, parser->tokens, token))
1257
0
                return 0;
1258
2.82M
        }
1259
849k
        else {
1260
849k
            if (!QUEUE_INSERT(parser,
1261
849k
                        parser->tokens, number - parser->tokens_parsed, token))
1262
0
                return 0;
1263
849k
        }
1264
3.67M
    }
1265
1266
5.63M
    return 1;
1267
5.63M
}
1268
1269
/*
1270
 * Pop indentation levels from the indents stack until the current level
1271
 * becomes less or equal to the column.  For each indentation level, append
1272
 * the BLOCK-END token.
1273
 */
1274
1275
1276
static int
1277
yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column)
1278
11.6M
{
1279
11.6M
    yaml_token_t token;
1280
1281
    /* In the flow context, do nothing. */
1282
1283
11.6M
    if (parser->flow_level)
1284
1.93M
        return 1;
1285
1286
    /* Loop through the indentation levels in the stack. */
1287
1288
13.4M
    while (parser->indent > column)
1289
3.67M
    {
1290
        /* Create a token and append it to the queue. */
1291
1292
3.67M
        TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark);
1293
1294
3.67M
        if (!ENQUEUE(parser, parser->tokens, token))
1295
0
            return 0;
1296
1297
        /* Pop the indentation level. */
1298
1299
3.67M
        parser->indent = POP(parser, parser->indents);
1300
3.67M
    }
1301
1302
9.76M
    return 1;
1303
9.76M
}
1304
1305
/*
1306
 * Initialize the scanner and produce the STREAM-START token.
1307
 */
1308
1309
static int
1310
yaml_parser_fetch_stream_start(yaml_parser_t *parser)
1311
17.2k
{
1312
17.2k
    yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } };
1313
17.2k
    yaml_token_t token;
1314
1315
    /* Set the initial indentation. */
1316
1317
17.2k
    parser->indent = -1;
1318
1319
    /* Initialize the simple key stack. */
1320
1321
17.2k
    if (!PUSH(parser, parser->simple_keys, simple_key))
1322
0
        return 0;
1323
1324
    /* A simple key is allowed at the beginning of the stream. */
1325
1326
17.2k
    parser->simple_key_allowed = 1;
1327
1328
    /* We have started. */
1329
1330
17.2k
    parser->stream_start_produced = 1;
1331
1332
    /* Create the STREAM-START token and append it to the queue. */
1333
1334
17.2k
    STREAM_START_TOKEN_INIT(token, parser->encoding,
1335
17.2k
            parser->mark, parser->mark);
1336
1337
17.2k
    if (!ENQUEUE(parser, parser->tokens, token))
1338
0
        return 0;
1339
1340
17.2k
    return 1;
1341
17.2k
}
1342
1343
/*
1344
 * Produce the STREAM-END token and shut down the scanner.
1345
 */
1346
1347
static int
1348
yaml_parser_fetch_stream_end(yaml_parser_t *parser)
1349
8.42k
{
1350
8.42k
    yaml_token_t token;
1351
1352
    /* Force new line. */
1353
1354
8.42k
    if (parser->mark.column != 0) {
1355
7.51k
        parser->mark.column = 0;
1356
7.51k
        parser->mark.line ++;
1357
7.51k
    }
1358
1359
    /* Reset the indentation level. */
1360
1361
8.42k
    if (!yaml_parser_unroll_indent(parser, -1))
1362
0
        return 0;
1363
1364
    /* Reset simple keys. */
1365
1366
8.42k
    if (!yaml_parser_remove_simple_key(parser))
1367
222
        return 0;
1368
1369
8.20k
    parser->simple_key_allowed = 0;
1370
1371
    /* Create the STREAM-END token and append it to the queue. */
1372
1373
8.20k
    STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark);
1374
1375
8.20k
    if (!ENQUEUE(parser, parser->tokens, token))
1376
0
        return 0;
1377
1378
8.20k
    return 1;
1379
8.20k
}
1380
1381
/*
1382
 * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
1383
 */
1384
1385
static int
1386
yaml_parser_fetch_directive(yaml_parser_t *parser)
1387
588k
{
1388
588k
    yaml_token_t token;
1389
1390
    /* Reset the indentation level. */
1391
1392
588k
    if (!yaml_parser_unroll_indent(parser, -1))
1393
0
        return 0;
1394
1395
    /* Reset simple keys. */
1396
1397
588k
    if (!yaml_parser_remove_simple_key(parser))
1398
0
        return 0;
1399
1400
588k
    parser->simple_key_allowed = 0;
1401
1402
    /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */
1403
1404
588k
    if (!yaml_parser_scan_directive(parser, &token))
1405
1.01k
        return 0;
1406
1407
    /* Append the token to the queue. */
1408
1409
587k
    if (!ENQUEUE(parser, parser->tokens, token)) {
1410
0
        yaml_token_delete(&token);
1411
0
        return 0;
1412
0
    }
1413
1414
587k
    return 1;
1415
587k
}
1416
1417
/*
1418
 * Produce the DOCUMENT-START or DOCUMENT-END token.
1419
 */
1420
1421
static int
1422
yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
1423
        yaml_token_type_t type)
1424
554k
{
1425
554k
    yaml_mark_t start_mark, end_mark;
1426
554k
    yaml_token_t token;
1427
1428
    /* Reset the indentation level. */
1429
1430
554k
    if (!yaml_parser_unroll_indent(parser, -1))
1431
0
        return 0;
1432
1433
    /* Reset simple keys. */
1434
1435
554k
    if (!yaml_parser_remove_simple_key(parser))
1436
0
        return 0;
1437
1438
554k
    parser->simple_key_allowed = 0;
1439
1440
    /* Consume the token. */
1441
1442
554k
    start_mark = parser->mark;
1443
1444
554k
    SKIP(parser);
1445
554k
    SKIP(parser);
1446
554k
    SKIP(parser);
1447
1448
554k
    end_mark = parser->mark;
1449
1450
    /* Create the DOCUMENT-START or DOCUMENT-END token. */
1451
1452
554k
    TOKEN_INIT(token, type, start_mark, end_mark);
1453
1454
    /* Append the token to the queue. */
1455
1456
554k
    if (!ENQUEUE(parser, parser->tokens, token))
1457
0
        return 0;
1458
1459
554k
    return 1;
1460
554k
}
1461
1462
/*
1463
 * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1464
 */
1465
1466
static int
1467
yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
1468
        yaml_token_type_t type)
1469
78.8k
{
1470
78.8k
    yaml_mark_t start_mark, end_mark;
1471
78.8k
    yaml_token_t token;
1472
1473
    /* The indicators '[' and '{' may start a simple key. */
1474
1475
78.8k
    if (!yaml_parser_save_simple_key(parser))
1476
0
        return 0;
1477
1478
    /* Increase the flow level. */
1479
1480
78.8k
    if (!yaml_parser_increase_flow_level(parser))
1481
2
        return 0;
1482
1483
    /* A simple key may follow the indicators '[' and '{'. */
1484
1485
78.8k
    parser->simple_key_allowed = 1;
1486
1487
    /* Consume the token. */
1488
1489
78.8k
    start_mark = parser->mark;
1490
78.8k
    SKIP(parser);
1491
78.8k
    end_mark = parser->mark;
1492
1493
    /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */
1494
1495
78.8k
    TOKEN_INIT(token, type, start_mark, end_mark);
1496
1497
    /* Append the token to the queue. */
1498
1499
78.8k
    if (!ENQUEUE(parser, parser->tokens, token))
1500
0
        return 0;
1501
1502
78.8k
    return 1;
1503
78.8k
}
1504
1505
/*
1506
 * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1507
 */
1508
1509
static int
1510
yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
1511
        yaml_token_type_t type)
1512
13.8k
{
1513
13.8k
    yaml_mark_t start_mark, end_mark;
1514
13.8k
    yaml_token_t token;
1515
1516
    /* Reset any potential simple key on the current flow level. */
1517
1518
13.8k
    if (!yaml_parser_remove_simple_key(parser))
1519
3
        return 0;
1520
1521
    /* Decrease the flow level. */
1522
1523
13.8k
    if (!yaml_parser_decrease_flow_level(parser))
1524
0
        return 0;
1525
1526
    /* No simple keys after the indicators ']' and '}'. */
1527
1528
13.8k
    parser->simple_key_allowed = 0;
1529
1530
    /* Consume the token. */
1531
1532
13.8k
    start_mark = parser->mark;
1533
13.8k
    SKIP(parser);
1534
13.8k
    end_mark = parser->mark;
1535
1536
    /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */
1537
1538
13.8k
    TOKEN_INIT(token, type, start_mark, end_mark);
1539
1540
    /* Append the token to the queue. */
1541
1542
13.8k
    if (!ENQUEUE(parser, parser->tokens, token))
1543
0
        return 0;
1544
1545
13.8k
    return 1;
1546
13.8k
}
1547
1548
/*
1549
 * Produce the FLOW-ENTRY token.
1550
 */
1551
1552
static int
1553
yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
1554
816k
{
1555
816k
    yaml_mark_t start_mark, end_mark;
1556
816k
    yaml_token_t token;
1557
1558
    /* Reset any potential simple keys on the current flow level. */
1559
1560
816k
    if (!yaml_parser_remove_simple_key(parser))
1561
3
        return 0;
1562
1563
    /* Simple keys are allowed after ','. */
1564
1565
816k
    parser->simple_key_allowed = 1;
1566
1567
    /* Consume the token. */
1568
1569
816k
    start_mark = parser->mark;
1570
816k
    SKIP(parser);
1571
816k
    end_mark = parser->mark;
1572
1573
    /* Create the FLOW-ENTRY token and append it to the queue. */
1574
1575
816k
    TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);
1576
1577
816k
    if (!ENQUEUE(parser, parser->tokens, token))
1578
0
        return 0;
1579
1580
816k
    return 1;
1581
816k
}
1582
1583
/*
1584
 * Produce the BLOCK-ENTRY token.
1585
 */
1586
1587
static int
1588
yaml_parser_fetch_block_entry(yaml_parser_t *parser)
1589
3.63M
{
1590
3.63M
    yaml_mark_t start_mark, end_mark;
1591
3.63M
    yaml_token_t token;
1592
1593
    /* Check if the scanner is in the block context. */
1594
1595
3.63M
    if (!parser->flow_level)
1596
3.63M
    {
1597
        /* Check if we are allowed to start a new entry. */
1598
1599
3.63M
        if (!parser->simple_key_allowed) {
1600
19
            return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1601
19
                    "block sequence entries are not allowed in this context");
1602
19
        }
1603
1604
        /* Add the BLOCK-SEQUENCE-START token if needed. */
1605
1606
3.63M
        if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1607
3.63M
                    YAML_BLOCK_SEQUENCE_START_TOKEN, parser->mark))
1608
0
            return 0;
1609
3.63M
    }
1610
1.99k
    else
1611
1.99k
    {
1612
        /*
1613
         * It is an error for the '-' indicator to occur in the flow context,
1614
         * but we let the Parser detect and report about it because the Parser
1615
         * is able to point to the context.
1616
         */
1617
1.99k
    }
1618
1619
    /* Reset any potential simple keys on the current flow level. */
1620
1621
3.63M
    if (!yaml_parser_remove_simple_key(parser))
1622
0
        return 0;
1623
1624
    /* Simple keys are allowed after '-'. */
1625
1626
3.63M
    parser->simple_key_allowed = 1;
1627
1628
    /* Consume the token. */
1629
1630
3.63M
    start_mark = parser->mark;
1631
3.63M
    SKIP(parser);
1632
3.63M
    end_mark = parser->mark;
1633
1634
    /* Create the BLOCK-ENTRY token and append it to the queue. */
1635
1636
3.63M
    TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);
1637
1638
3.63M
    if (!ENQUEUE(parser, parser->tokens, token))
1639
0
        return 0;
1640
1641
3.63M
    return 1;
1642
3.63M
}
1643
1644
/*
1645
 * Produce the KEY token.
1646
 */
1647
1648
static int
1649
yaml_parser_fetch_key(yaml_parser_t *parser)
1650
910k
{
1651
910k
    yaml_mark_t start_mark, end_mark;
1652
910k
    yaml_token_t token;
1653
1654
    /* In the block context, additional checks are required. */
1655
1656
910k
    if (!parser->flow_level)
1657
586k
    {
1658
        /* Check if we are allowed to start a new key (not necessary simple). */
1659
1660
586k
        if (!parser->simple_key_allowed) {
1661
8
            return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1662
8
                    "mapping keys are not allowed in this context");
1663
8
        }
1664
1665
        /* Add the BLOCK-MAPPING-START token if needed. */
1666
1667
586k
        if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1668
586k
                    YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
1669
0
            return 0;
1670
586k
    }
1671
1672
    /* Reset any potential simple keys on the current flow level. */
1673
1674
910k
    if (!yaml_parser_remove_simple_key(parser))
1675
0
        return 0;
1676
1677
    /* Simple keys are allowed after '?' in the block context. */
1678
1679
910k
    parser->simple_key_allowed = (!parser->flow_level);
1680
1681
    /* Consume the token. */
1682
1683
910k
    start_mark = parser->mark;
1684
910k
    SKIP(parser);
1685
910k
    end_mark = parser->mark;
1686
1687
    /* Create the KEY token and append it to the queue. */
1688
1689
910k
    TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark);
1690
1691
910k
    if (!ENQUEUE(parser, parser->tokens, token))
1692
0
        return 0;
1693
1694
910k
    return 1;
1695
910k
}
1696
1697
/*
1698
 * Produce the VALUE token.
1699
 */
1700
1701
static int
1702
yaml_parser_fetch_value(yaml_parser_t *parser)
1703
1.54M
{
1704
1.54M
    yaml_mark_t start_mark, end_mark;
1705
1.54M
    yaml_token_t token;
1706
1.54M
    yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1707
1708
    /* Have we found a simple key? */
1709
1710
1.54M
    if (simple_key->possible)
1711
1.42M
    {
1712
1713
        /* Create the KEY token and insert it into the queue. */
1714
1715
1.42M
        TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);
1716
1717
1.42M
        if (!QUEUE_INSERT(parser, parser->tokens,
1718
1.42M
                    simple_key->token_number - parser->tokens_parsed, token))
1719
0
            return 0;
1720
1721
        /* In the block context, we may need to add the BLOCK-MAPPING-START token. */
1722
1723
1.42M
        if (!yaml_parser_roll_indent(parser, simple_key->mark.column,
1724
1.42M
                    simple_key->token_number,
1725
1.42M
                    YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
1726
0
            return 0;
1727
1728
        /* Remove the simple key. */
1729
1730
1.42M
        simple_key->possible = 0;
1731
1732
        /* A simple key cannot follow another simple key. */
1733
1734
1.42M
        parser->simple_key_allowed = 0;
1735
1.42M
    }
1736
123k
    else
1737
123k
    {
1738
        /* The ':' indicator follows a complex key. */
1739
1740
        /* In the block context, extra checks are required. */
1741
1742
123k
        if (!parser->flow_level)
1743
1.61k
        {
1744
            /* Check if we are allowed to start a complex value. */
1745
1746
1.61k
            if (!parser->simple_key_allowed) {
1747
59
                return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1748
59
                        "mapping values are not allowed in this context");
1749
59
            }
1750
1751
            /* Add the BLOCK-MAPPING-START token if needed. */
1752
1753
1.56k
            if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1754
1.56k
                        YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
1755
0
                return 0;
1756
1.56k
        }
1757
1758
        /* Simple keys after ':' are allowed in the block context. */
1759
1760
123k
        parser->simple_key_allowed = (!parser->flow_level);
1761
123k
    }
1762
1763
    /* Consume the token. */
1764
1765
1.54M
    start_mark = parser->mark;
1766
1.54M
    SKIP(parser);
1767
1.54M
    end_mark = parser->mark;
1768
1769
    /* Create the VALUE token and append it to the queue. */
1770
1771
1.54M
    TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark);
1772
1773
1.54M
    if (!ENQUEUE(parser, parser->tokens, token))
1774
0
        return 0;
1775
1776
1.54M
    return 1;
1777
1.54M
}
1778
1779
/*
1780
 * Produce the ALIAS or ANCHOR token.
1781
 */
1782
1783
static int
1784
yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
1785
19.6k
{
1786
19.6k
    yaml_token_t token;
1787
1788
    /* An anchor or an alias could be a simple key. */
1789
1790
19.6k
    if (!yaml_parser_save_simple_key(parser))
1791
0
        return 0;
1792
1793
    /* A simple key cannot follow an anchor or an alias. */
1794
1795
19.6k
    parser->simple_key_allowed = 0;
1796
1797
    /* Create the ALIAS or ANCHOR token and append it to the queue. */
1798
1799
19.6k
    if (!yaml_parser_scan_anchor(parser, &token, type))
1800
238
        return 0;
1801
1802
19.3k
    if (!ENQUEUE(parser, parser->tokens, token)) {
1803
0
        yaml_token_delete(&token);
1804
0
        return 0;
1805
0
    }
1806
19.3k
    return 1;
1807
19.3k
}
1808
1809
/*
1810
 * Produce the TAG token.
1811
 */
1812
1813
static int
1814
yaml_parser_fetch_tag(yaml_parser_t *parser)
1815
246k
{
1816
246k
    yaml_token_t token;
1817
1818
    /* A tag could be a simple key. */
1819
1820
246k
    if (!yaml_parser_save_simple_key(parser))
1821
0
        return 0;
1822
1823
    /* A simple key cannot follow a tag. */
1824
1825
246k
    parser->simple_key_allowed = 0;
1826
1827
    /* Create the TAG token and append it to the queue. */
1828
1829
246k
    if (!yaml_parser_scan_tag(parser, &token))
1830
547
        return 0;
1831
1832
245k
    if (!ENQUEUE(parser, parser->tokens, token)) {
1833
0
        yaml_token_delete(&token);
1834
0
        return 0;
1835
0
    }
1836
1837
245k
    return 1;
1838
245k
}
1839
1840
/*
1841
 * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
1842
 */
1843
1844
static int
1845
yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
1846
247k
{
1847
247k
    yaml_token_t token;
1848
1849
    /* Remove any potential simple keys. */
1850
1851
247k
    if (!yaml_parser_remove_simple_key(parser))
1852
2
        return 0;
1853
1854
    /* A simple key may follow a block scalar. */
1855
1856
247k
    parser->simple_key_allowed = 1;
1857
1858
    /* Create the SCALAR token and append it to the queue. */
1859
1860
247k
    if (!yaml_parser_scan_block_scalar(parser, &token, literal))
1861
219
        return 0;
1862
1863
247k
    if (!ENQUEUE(parser, parser->tokens, token)) {
1864
0
        yaml_token_delete(&token);
1865
0
        return 0;
1866
0
    }
1867
1868
247k
    return 1;
1869
247k
}
1870
1871
/*
1872
 * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
1873
 */
1874
1875
static int
1876
yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
1877
14.0k
{
1878
14.0k
    yaml_token_t token;
1879
1880
    /* A plain scalar could be a simple key. */
1881
1882
14.0k
    if (!yaml_parser_save_simple_key(parser))
1883
0
        return 0;
1884
1885
    /* A simple key cannot follow a flow scalar. */
1886
1887
14.0k
    parser->simple_key_allowed = 0;
1888
1889
    /* Create the SCALAR token and append it to the queue. */
1890
1891
14.0k
    if (!yaml_parser_scan_flow_scalar(parser, &token, single))
1892
2.27k
        return 0;
1893
1894
11.7k
    if (!ENQUEUE(parser, parser->tokens, token)) {
1895
0
        yaml_token_delete(&token);
1896
0
        return 0;
1897
0
    }
1898
1899
11.7k
    return 1;
1900
11.7k
}
1901
1902
/*
1903
 * Produce the SCALAR(...,plain) token.
1904
 */
1905
1906
static int
1907
yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
1908
1.86M
{
1909
1.86M
    yaml_token_t token;
1910
1911
    /* A plain scalar could be a simple key. */
1912
1913
1.86M
    if (!yaml_parser_save_simple_key(parser))
1914
0
        return 0;
1915
1916
    /* A simple key cannot follow a flow scalar. */
1917
1918
1.86M
    parser->simple_key_allowed = 0;
1919
1920
    /* Create the SCALAR token and append it to the queue. */
1921
1922
1.86M
    if (!yaml_parser_scan_plain_scalar(parser, &token))
1923
119
        return 0;
1924
1925
1.86M
    if (!ENQUEUE(parser, parser->tokens, token)) {
1926
0
        yaml_token_delete(&token);
1927
0
        return 0;
1928
0
    }
1929
1930
1.86M
    return 1;
1931
1.86M
}
1932
1933
/*
1934
 * Eat whitespaces and comments until the next token is found.
1935
 */
1936
1937
static int
1938
yaml_parser_scan_to_next_token(yaml_parser_t *parser)
1939
10.5M
{
1940
    /* Until the next token is not found. */
1941
1942
17.3M
    while (1)
1943
17.3M
    {
1944
        /* Allow the BOM mark to start a line. */
1945
1946
17.3M
        if (!CACHE(parser, 1)) return 0;
1947
1948
17.3M
        if (parser->mark.column == 0 && IS_BOM(parser->buffer))
1949
484k
            SKIP(parser);
1950
1951
        /*
1952
         * Eat whitespaces.
1953
         *
1954
         * Tabs are allowed:
1955
         *
1956
         *  - in the flow context;
1957
         *  - in the block context, but not at the beginning of the line or
1958
         *  after '-', '?', or ':' (complex value).
1959
         */
1960
1961
17.3M
        if (!CACHE(parser, 1)) return 0;
1962
1963
88.8M
        while (CHECK(parser->buffer,' ') ||
1964
17.4M
                ((parser->flow_level || !parser->simple_key_allowed) &&
1965
71.5M
                 CHECK(parser->buffer, '\t'))) {
1966
71.5M
            SKIP(parser);
1967
71.5M
            if (!CACHE(parser, 1)) return 0;
1968
71.5M
        }
1969
1970
        /* Eat a comment until a line break. */
1971
1972
17.3M
        if (CHECK(parser->buffer, '#')) {
1973
4.20M
            while (!IS_BREAKZ(parser->buffer)) {
1974
4.14M
                SKIP(parser);
1975
4.14M
                if (!CACHE(parser, 1)) return 0;
1976
4.14M
            }
1977
67.9k
        }
1978
1979
        /* If it is a line break, eat it. */
1980
1981
17.3M
        if (IS_BREAK(parser->buffer))
1982
6.77M
        {
1983
6.77M
            if (!CACHE(parser, 2)) return 0;
1984
6.77M
            SKIP_LINE(parser);
1985
1986
            /* In the block context, a new line may start a simple key. */
1987
1988
6.77M
            if (!parser->flow_level) {
1989
5.70M
                parser->simple_key_allowed = 1;
1990
5.70M
            }
1991
6.77M
        }
1992
10.5M
        else
1993
10.5M
        {
1994
            /* We have found a token. */
1995
1996
10.5M
            break;
1997
10.5M
        }
1998
17.3M
    }
1999
2000
10.5M
    return 1;
2001
10.5M
}
2002
2003
/*
2004
 * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
2005
 *
2006
 * Scope:
2007
 *      %YAML    1.1    # a comment \n
2008
 *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2009
 *      %TAG    !yaml!  tag:yaml.org,2002:  \n
2010
 *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2011
 */
2012
2013
int
2014
yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token)
2015
588k
{
2016
588k
    yaml_mark_t start_mark, end_mark;
2017
588k
    yaml_char_t *name = NULL;
2018
588k
    int major, minor;
2019
588k
    yaml_char_t *handle = NULL, *prefix = NULL;
2020
2021
    /* Eat '%'. */
2022
2023
588k
    start_mark = parser->mark;
2024
2025
588k
    SKIP(parser);
2026
2027
    /* Scan the directive name. */
2028
2029
588k
    if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
2030
197
        goto error;
2031
2032
    /* Is it a YAML directive? */
2033
2034
587k
    if (strcmp((char *)name, "YAML") == 0)
2035
537k
    {
2036
        /* Scan the VERSION directive value. */
2037
2038
537k
        if (!yaml_parser_scan_version_directive_value(parser, start_mark,
2039
537k
                    &major, &minor))
2040
183
            goto error;
2041
2042
537k
        end_mark = parser->mark;
2043
2044
        /* Create a VERSION-DIRECTIVE token. */
2045
2046
537k
        VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor,
2047
537k
                start_mark, end_mark);
2048
537k
    }
2049
2050
    /* Is it a TAG directive? */
2051
2052
50.1k
    else if (strcmp((char *)name, "TAG") == 0)
2053
49.8k
    {
2054
        /* Scan the TAG directive value. */
2055
2056
49.8k
        if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
2057
49.8k
                    &handle, &prefix))
2058
242
            goto error;
2059
2060
49.6k
        end_mark = parser->mark;
2061
2062
        /* Create a TAG-DIRECTIVE token. */
2063
2064
49.6k
        TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix,
2065
49.6k
                start_mark, end_mark);
2066
49.6k
    }
2067
2068
    /* Unknown directive. */
2069
2070
292
    else
2071
292
    {
2072
292
        yaml_parser_set_scanner_error(parser, "while scanning a directive",
2073
292
                start_mark, "found unknown directive name");
2074
292
        goto error;
2075
292
    }
2076
2077
    /* Eat the rest of the line including any comments. */
2078
2079
587k
    if (!CACHE(parser, 1)) goto error;
2080
2081
2.67M
    while (IS_BLANK(parser->buffer)) {
2082
2.67M
        SKIP(parser);
2083
2.67M
        if (!CACHE(parser, 1)) goto error;
2084
2.67M
    }
2085
2086
587k
    if (CHECK(parser->buffer, '#')) {
2087
3.71M
        while (!IS_BREAKZ(parser->buffer)) {
2088
3.59M
            SKIP(parser);
2089
3.59M
            if (!CACHE(parser, 1)) goto error;
2090
3.59M
        }
2091
120k
    }
2092
2093
    /* Check if we are at the end of the line. */
2094
2095
587k
    if (!IS_BREAKZ(parser->buffer)) {
2096
81
        yaml_parser_set_scanner_error(parser, "while scanning a directive",
2097
81
                start_mark, "did not find expected comment or line break");
2098
81
        goto error;
2099
81
    }
2100
2101
    /* Eat a line break. */
2102
2103
587k
    if (IS_BREAK(parser->buffer)) {
2104
586k
        if (!CACHE(parser, 2)) goto error;
2105
586k
        SKIP_LINE(parser);
2106
586k
    }
2107
2108
587k
    yaml_free(name);
2109
2110
587k
    return 1;
2111
2112
1.01k
error:
2113
1.01k
    yaml_free(prefix);
2114
1.01k
    yaml_free(handle);
2115
1.01k
    yaml_free(name);
2116
1.01k
    return 0;
2117
587k
}
2118
2119
/*
2120
 * Scan the directive name.
2121
 *
2122
 * Scope:
2123
 *      %YAML   1.1     # a comment \n
2124
 *       ^^^^
2125
 *      %TAG    !yaml!  tag:yaml.org,2002:  \n
2126
 *       ^^^
2127
 */
2128
2129
static int
2130
yaml_parser_scan_directive_name(yaml_parser_t *parser,
2131
        yaml_mark_t start_mark, yaml_char_t **name)
2132
588k
{
2133
588k
    yaml_string_t string = NULL_STRING;
2134
2135
588k
    if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2136
2137
    /* Consume the directive name. */
2138
2139
588k
    if (!CACHE(parser, 1)) goto error;
2140
2141
588k
    while (IS_ALPHA(parser->buffer))
2142
6.21M
    {
2143
6.21M
        if (!READ(parser, string)) goto error;
2144
6.21M
        if (!CACHE(parser, 1)) goto error;
2145
6.21M
    }
2146
2147
    /* Check if the name is empty. */
2148
2149
588k
    if (string.start == string.pointer) {
2150
108
        yaml_parser_set_scanner_error(parser, "while scanning a directive",
2151
108
                start_mark, "could not find expected directive name");
2152
108
        goto error;
2153
108
    }
2154
2155
    /* Check for an blank character after the name. */
2156
2157
588k
    if (!IS_BLANKZ(parser->buffer)) {
2158
87
        yaml_parser_set_scanner_error(parser, "while scanning a directive",
2159
87
                start_mark, "found unexpected non-alphabetical character");
2160
87
        goto error;
2161
87
    }
2162
2163
587k
    *name = string.start;
2164
2165
587k
    return 1;
2166
2167
197
error:
2168
197
    STRING_DEL(parser, string);
2169
197
    return 0;
2170
588k
}
2171
2172
/*
2173
 * Scan the value of VERSION-DIRECTIVE.
2174
 *
2175
 * Scope:
2176
 *      %YAML   1.1     # a comment \n
2177
 *           ^^^^^^
2178
 */
2179
2180
static int
2181
yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
2182
        yaml_mark_t start_mark, int *major, int *minor)
2183
537k
{
2184
    /* Eat whitespaces. */
2185
2186
537k
    if (!CACHE(parser, 1)) return 0;
2187
2188
2.60M
    while (IS_BLANK(parser->buffer)) {
2189
2.60M
        SKIP(parser);
2190
2.60M
        if (!CACHE(parser, 1)) return 0;
2191
2.60M
    }
2192
2193
    /* Consume the major version number. */
2194
2195
537k
    if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
2196
94
        return 0;
2197
2198
    /* Eat '.'. */
2199
2200
537k
    if (!CHECK(parser->buffer, '.')) {
2201
49
        return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2202
49
                start_mark, "did not find expected digit or '.' character");
2203
49
    }
2204
2205
537k
    SKIP(parser);
2206
2207
    /* Consume the minor version number. */
2208
2209
537k
    if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
2210
38
        return 0;
2211
2212
537k
    return 1;
2213
537k
}
2214
2215
1.07M
#define MAX_NUMBER_LENGTH   9
2216
2217
/*
2218
 * Scan the version number of VERSION-DIRECTIVE.
2219
 *
2220
 * Scope:
2221
 *      %YAML   1.1     # a comment \n
2222
 *              ^
2223
 *      %YAML   1.1     # a comment \n
2224
 *                ^
2225
 */
2226
2227
static int
2228
yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
2229
        yaml_mark_t start_mark, int *number)
2230
1.07M
{
2231
1.07M
    int value = 0;
2232
1.07M
    size_t length = 0;
2233
2234
    /* Repeat while the next character is digit. */
2235
2236
1.07M
    if (!CACHE(parser, 1)) return 0;
2237
2238
1.07M
    while (IS_DIGIT(parser->buffer))
2239
1.07M
    {
2240
        /* Check if the number is too long. */
2241
2242
1.07M
        if (++length > MAX_NUMBER_LENGTH) {
2243
8
            return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2244
8
                    start_mark, "found extremely long version number");
2245
8
        }
2246
2247
1.07M
        value = value*10 + AS_DIGIT(parser->buffer);
2248
2249
1.07M
        SKIP(parser);
2250
2251
1.07M
        if (!CACHE(parser, 1)) return 0;
2252
1.07M
    }
2253
2254
    /* Check if the number was present. */
2255
2256
1.07M
    if (!length) {
2257
119
        return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2258
119
                start_mark, "did not find expected version number");
2259
119
    }
2260
2261
1.07M
    *number = value;
2262
2263
1.07M
    return 1;
2264
1.07M
}
2265
2266
/*
2267
 * Scan the value of a TAG-DIRECTIVE token.
2268
 *
2269
 * Scope:
2270
 *      %TAG    !yaml!  tag:yaml.org,2002:  \n
2271
 *          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2272
 */
2273
2274
static int
2275
yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
2276
        yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
2277
49.8k
{
2278
49.8k
    yaml_char_t *handle_value = NULL;
2279
49.8k
    yaml_char_t *prefix_value = NULL;
2280
2281
    /* Eat whitespaces. */
2282
2283
49.8k
    if (!CACHE(parser, 1)) goto error;
2284
2285
2.23M
    while (IS_BLANK(parser->buffer)) {
2286
2.23M
        SKIP(parser);
2287
2.23M
        if (!CACHE(parser, 1)) goto error;
2288
2.23M
    }
2289
2290
    /* Scan a handle. */
2291
2292
49.8k
    if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
2293
96
        goto error;
2294
2295
    /* Expect a whitespace. */
2296
2297
49.7k
    if (!CACHE(parser, 1)) goto error;
2298
2299
49.7k
    if (!IS_BLANK(parser->buffer)) {
2300
28
        yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2301
28
                start_mark, "did not find expected whitespace");
2302
28
        goto error;
2303
28
    }
2304
2305
    /* Eat whitespaces. */
2306
2307
3.29M
    while (IS_BLANK(parser->buffer)) {
2308
3.29M
        SKIP(parser);
2309
3.29M
        if (!CACHE(parser, 1)) goto error;
2310
3.29M
    }
2311
2312
    /* Scan a prefix. */
2313
2314
49.7k
    if (!yaml_parser_scan_tag_uri(parser, 1, 1, NULL, start_mark, &prefix_value))
2315
71
        goto error;
2316
2317
    /* Expect a whitespace or line break. */
2318
2319
49.6k
    if (!CACHE(parser, 1)) goto error;
2320
2321
49.6k
    if (!IS_BLANKZ(parser->buffer)) {
2322
41
        yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2323
41
                start_mark, "did not find expected whitespace or line break");
2324
41
        goto error;
2325
41
    }
2326
2327
49.6k
    *handle = handle_value;
2328
49.6k
    *prefix = prefix_value;
2329
2330
49.6k
    return 1;
2331
2332
242
error:
2333
242
    yaml_free(handle_value);
2334
242
    yaml_free(prefix_value);
2335
242
    return 0;
2336
49.6k
}
2337
2338
static int
2339
yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
2340
        yaml_token_type_t type)
2341
19.6k
{
2342
19.6k
    int length = 0;
2343
19.6k
    yaml_mark_t start_mark, end_mark;
2344
19.6k
    yaml_string_t string = NULL_STRING;
2345
2346
19.6k
    if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2347
2348
    /* Eat the indicator character. */
2349
2350
19.6k
    start_mark = parser->mark;
2351
2352
19.6k
    SKIP(parser);
2353
2354
    /* Consume the value. */
2355
2356
19.6k
    if (!CACHE(parser, 1)) goto error;
2357
2358
9.81M
    while (IS_ALPHA(parser->buffer)) {
2359
9.81M
        if (!READ(parser, string)) goto error;
2360
9.81M
        if (!CACHE(parser, 1)) goto error;
2361
9.81M
        length ++;
2362
9.81M
    }
2363
2364
19.6k
    end_mark = parser->mark;
2365
2366
    /*
2367
     * Check if length of the anchor is greater than 0 and it is followed by
2368
     * a whitespace character or one of the indicators:
2369
     *
2370
     *      '?', ':', ',', ']', '}', '%', '@', '`'.
2371
     */
2372
2373
19.6k
    if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?')
2374
9.37k
                || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',')
2375
1.39k
                || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}')
2376
83
                || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@')
2377
236
                || CHECK(parser->buffer, '`'))) {
2378
236
        yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ?
2379
121
                "while scanning an anchor" : "while scanning an alias", start_mark,
2380
236
                "did not find expected alphabetic or numeric character");
2381
236
        goto error;
2382
236
    }
2383
2384
    /* Create a token. */
2385
2386
19.3k
    if (type == YAML_ANCHOR_TOKEN) {
2387
14.6k
        ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2388
14.6k
    }
2389
4.75k
    else {
2390
4.75k
        ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2391
4.75k
    }
2392
2393
19.3k
    return 1;
2394
2395
238
error:
2396
238
    STRING_DEL(parser, string);
2397
238
    return 0;
2398
19.6k
}
2399
2400
/*
2401
 * Scan a TAG token.
2402
 */
2403
2404
static int
2405
yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token)
2406
246k
{
2407
246k
    yaml_char_t *handle = NULL;
2408
246k
    yaml_char_t *suffix = NULL;
2409
246k
    yaml_mark_t start_mark, end_mark;
2410
2411
246k
    start_mark = parser->mark;
2412
2413
    /* Check if the tag is in the canonical form. */
2414
2415
246k
    if (!CACHE(parser, 2)) goto error;
2416
2417
246k
    if (CHECK_AT(parser->buffer, '<', 1))
2418
173k
    {
2419
        /* Set the handle to '' */
2420
2421
173k
        handle = YAML_MALLOC(1);
2422
173k
        if (!handle) goto error;
2423
173k
        handle[0] = '\0';
2424
2425
        /* Eat '!<' */
2426
2427
173k
        SKIP(parser);
2428
173k
        SKIP(parser);
2429
2430
        /* Consume the tag value. */
2431
2432
173k
        if (!yaml_parser_scan_tag_uri(parser, 1, 0, NULL, start_mark, &suffix))
2433
22
            goto error;
2434
2435
        /* Check for '>' and eat it. */
2436
2437
173k
        if (!CHECK(parser->buffer, '>')) {
2438
58
            yaml_parser_set_scanner_error(parser, "while scanning a tag",
2439
58
                    start_mark, "did not find the expected '>'");
2440
58
            goto error;
2441
58
        }
2442
2443
173k
        SKIP(parser);
2444
173k
    }
2445
73.0k
    else
2446
73.0k
    {
2447
        /* The tag has either the '!suffix' or the '!handle!suffix' form. */
2448
2449
        /* First, try to scan a handle. */
2450
2451
73.0k
        if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
2452
2
            goto error;
2453
2454
        /* Check if it is, indeed, handle. */
2455
2456
73.0k
        if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
2457
2.48k
        {
2458
            /* Scan the suffix now. */
2459
2460
2.48k
            if (!yaml_parser_scan_tag_uri(parser, 0, 0, NULL, start_mark, &suffix))
2461
38
                goto error;
2462
2.48k
        }
2463
70.5k
        else
2464
70.5k
        {
2465
            /* It wasn't a handle after all.  Scan the rest of the tag. */
2466
2467
70.5k
            if (!yaml_parser_scan_tag_uri(parser, 0, 0, handle, start_mark, &suffix))
2468
296
                goto error;
2469
2470
            /* Set the handle to '!'. */
2471
2472
70.2k
            yaml_free(handle);
2473
70.2k
            handle = YAML_MALLOC(2);
2474
70.2k
            if (!handle) goto error;
2475
70.2k
            handle[0] = '!';
2476
70.2k
            handle[1] = '\0';
2477
2478
            /*
2479
             * A special case: the '!' tag.  Set the handle to '' and the
2480
             * suffix to '!'.
2481
             */
2482
2483
70.2k
            if (suffix[0] == '\0') {
2484
46.7k
                yaml_char_t *tmp = handle;
2485
46.7k
                handle = suffix;
2486
46.7k
                suffix = tmp;
2487
46.7k
            }
2488
70.2k
        }
2489
73.0k
    }
2490
2491
    /* Check the character which ends the tag. */
2492
2493
246k
    if (!CACHE(parser, 1)) goto error;
2494
2495
246k
    if (!IS_BLANKZ(parser->buffer)) {
2496
218k
        if (!parser->flow_level || !CHECK(parser->buffer, ',') ) {
2497
129
            yaml_parser_set_scanner_error(parser, "while scanning a tag",
2498
129
                    start_mark, "did not find expected whitespace or line break");
2499
129
            goto error;
2500
129
        }
2501
218k
    }
2502
2503
245k
    end_mark = parser->mark;
2504
2505
    /* Create a token. */
2506
2507
245k
    TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark);
2508
2509
245k
    return 1;
2510
2511
547
error:
2512
547
    yaml_free(handle);
2513
547
    yaml_free(suffix);
2514
547
    return 0;
2515
246k
}
2516
2517
/*
2518
 * Scan a tag handle.
2519
 */
2520
2521
static int
2522
yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
2523
        yaml_mark_t start_mark, yaml_char_t **handle)
2524
122k
{
2525
122k
    yaml_string_t string = NULL_STRING;
2526
2527
122k
    if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2528
2529
    /* Check the initial '!' character. */
2530
2531
122k
    if (!CACHE(parser, 1)) goto error;
2532
2533
122k
    if (!CHECK(parser->buffer, '!')) {
2534
88
        yaml_parser_set_scanner_error(parser, directive ?
2535
88
                "while scanning a tag directive" : "while scanning a tag",
2536
88
                start_mark, "did not find expected '!'");
2537
88
        goto error;
2538
88
    }
2539
2540
    /* Copy the '!' character. */
2541
2542
122k
    if (!READ(parser, string)) goto error;
2543
2544
    /* Copy all subsequent alphabetical and numerical characters. */
2545
2546
122k
    if (!CACHE(parser, 1)) goto error;
2547
2548
122k
    while (IS_ALPHA(parser->buffer))
2549
16.4M
    {
2550
16.4M
        if (!READ(parser, string)) goto error;
2551
16.4M
        if (!CACHE(parser, 1)) goto error;
2552
16.4M
    }
2553
2554
    /* Check if the trailing character is '!' and copy it. */
2555
2556
122k
    if (CHECK(parser->buffer, '!'))
2557
46.1k
    {
2558
46.1k
        if (!READ(parser, string)) goto error;
2559
46.1k
    }
2560
76.7k
    else
2561
76.7k
    {
2562
        /*
2563
         * It's either the '!' tag or not really a tag handle.  If it's a %TAG
2564
         * directive, it's an error.  If it's a tag token, it must be a part of
2565
         * URI.
2566
         */
2567
2568
76.7k
        if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) {
2569
6
            yaml_parser_set_scanner_error(parser, "while parsing a tag directive",
2570
6
                    start_mark, "did not find expected '!'");
2571
6
            goto error;
2572
6
        }
2573
76.7k
    }
2574
2575
122k
    *handle = string.start;
2576
2577
122k
    return 1;
2578
2579
98
error:
2580
98
    STRING_DEL(parser, string);
2581
98
    return 0;
2582
122k
}
2583
2584
/*
2585
 * Scan a tag.
2586
 */
2587
2588
static int
2589
yaml_parser_scan_tag_uri(yaml_parser_t *parser, int uri_char, int directive,
2590
        yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
2591
296k
{
2592
296k
    size_t length = head ? strlen((char *)head) : 0;
2593
296k
    yaml_string_t string = NULL_STRING;
2594
2595
296k
    if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2596
2597
    /* Resize the string to include the head. */
2598
2599
298k
    while ((size_t)(string.end - string.start) <= length) {
2600
2.19k
        if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) {
2601
0
            parser->error = YAML_MEMORY_ERROR;
2602
0
            goto error;
2603
0
        }
2604
2.19k
    }
2605
2606
    /*
2607
     * Copy the head if needed.
2608
     *
2609
     * Note that we don't copy the leading '!' character.
2610
     */
2611
2612
296k
    if (length > 1) {
2613
13.7k
        memcpy(string.start, head+1, length-1);
2614
13.7k
        string.pointer += length-1;
2615
13.7k
    }
2616
2617
    /* Scan the tag. */
2618
2619
296k
    if (!CACHE(parser, 1)) goto error;
2620
2621
    /*
2622
     * The set of characters that may appear in URI is as follows:
2623
     *
2624
     *      '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
2625
     *      '=', '+', '$', '.', '!', '~', '*', '\'', '(', ')', '%'.
2626
     *
2627
     * If we are inside a verbatim tag <...> (parameter uri_char is true)
2628
     * then also the following flow indicators are allowed:
2629
     *      ',', '[', ']'
2630
     */
2631
2632
11.5M
    while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';')
2633
4.47M
            || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?')
2634
4.46M
            || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@')
2635
4.45M
            || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=')
2636
4.44M
            || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$')
2637
4.26M
            || CHECK(parser->buffer, '.') || CHECK(parser->buffer, '%')
2638
3.82M
            || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~')
2639
2.94M
            || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'')
2640
1.14M
            || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')')
2641
300k
            || (uri_char && (
2642
228k
                CHECK(parser->buffer, ',')
2643
224k
                || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']')
2644
228k
                )
2645
300k
            ))
2646
11.2M
    {
2647
        /* Check if it is a URI-escape sequence. */
2648
2649
11.2M
        if (CHECK(parser->buffer, '%')) {
2650
348k
            if (!STRING_EXTEND(parser, string))
2651
0
                goto error;
2652
2653
348k
            if (!yaml_parser_scan_uri_escapes(parser,
2654
348k
                        directive, start_mark, &string)) goto error;
2655
348k
        }
2656
10.9M
        else {
2657
10.9M
            if (!READ(parser, string)) goto error;
2658
10.9M
        }
2659
2660
11.2M
        length ++;
2661
11.2M
        if (!CACHE(parser, 1)) goto error;
2662
11.2M
    }
2663
2664
    /* Check if the tag is non-empty. */
2665
2666
295k
    if (!length) {
2667
120
        if (!STRING_EXTEND(parser, string))
2668
0
            goto error;
2669
2670
120
        yaml_parser_set_scanner_error(parser, directive ?
2671
66
                "while parsing a %TAG directive" : "while parsing a tag",
2672
120
                start_mark, "did not find expected tag URI");
2673
120
        goto error;
2674
120
    }
2675
2676
295k
    *uri = string.start;
2677
2678
295k
    return 1;
2679
2680
427
error:
2681
427
    STRING_DEL(parser, string);
2682
427
    return 0;
2683
295k
}
2684
2685
/*
2686
 * Decode an URI-escape sequence corresponding to a single UTF-8 character.
2687
 */
2688
2689
static int
2690
yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
2691
        yaml_mark_t start_mark, yaml_string_t *string)
2692
348k
{
2693
348k
    int width = 0;
2694
2695
    /* Decode the required number of characters. */
2696
2697
532k
    do {
2698
2699
532k
        unsigned char octet = 0;
2700
2701
        /* Check for a URI-escaped octet. */
2702
2703
532k
        if (!CACHE(parser, 3)) return 0;
2704
2705
532k
        if (!(CHECK(parser->buffer, '%')
2706
532k
                    && IS_HEX_AT(parser->buffer, 1)
2707
532k
                    && IS_HEX_AT(parser->buffer, 2))) {
2708
262
            return yaml_parser_set_scanner_error(parser, directive ?
2709
259
                    "while parsing a %TAG directive" : "while parsing a tag",
2710
262
                    start_mark, "did not find URI escaped octet");
2711
262
        }
2712
2713
        /* Get the octet. */
2714
2715
532k
        octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2);
2716
2717
        /* If it is the leading octet, determine the length of the UTF-8 sequence. */
2718
2719
532k
        if (!width)
2720
348k
        {
2721
348k
            width = (octet & 0x80) == 0x00 ? 1 :
2722
348k
                    (octet & 0xE0) == 0xC0 ? 2 :
2723
102k
                    (octet & 0xF0) == 0xE0 ? 3 :
2724
81.1k
                    (octet & 0xF8) == 0xF0 ? 4 : 0;
2725
348k
            if (!width) {
2726
19
                return yaml_parser_set_scanner_error(parser, directive ?
2727
17
                        "while parsing a %TAG directive" : "while parsing a tag",
2728
19
                        start_mark, "found an incorrect leading UTF-8 octet");
2729
19
            }
2730
348k
        }
2731
184k
        else
2732
184k
        {
2733
            /* Check if the trailing octet is correct. */
2734
2735
184k
            if ((octet & 0xC0) != 0x80) {
2736
20
                return yaml_parser_set_scanner_error(parser, directive ?
2737
20
                        "while parsing a %TAG directive" : "while parsing a tag",
2738
20
                        start_mark, "found an incorrect trailing UTF-8 octet");
2739
20
            }
2740
184k
        }
2741
2742
        /* Copy the octet and move the pointers. */
2743
2744
532k
        *(string->pointer++) = octet;
2745
532k
        SKIP(parser);
2746
532k
        SKIP(parser);
2747
532k
        SKIP(parser);
2748
2749
532k
    } while (--width);
2750
2751
348k
    return 1;
2752
348k
}
2753
2754
/*
2755
 * Scan a block scalar.
2756
 */
2757
2758
static int
2759
yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
2760
        int literal)
2761
247k
{
2762
247k
    yaml_mark_t start_mark;
2763
247k
    yaml_mark_t end_mark;
2764
247k
    yaml_string_t string = NULL_STRING;
2765
247k
    yaml_string_t leading_break = NULL_STRING;
2766
247k
    yaml_string_t trailing_breaks = NULL_STRING;
2767
247k
    int chomping = 0;
2768
247k
    int increment = 0;
2769
247k
    int indent = 0;
2770
247k
    int leading_blank = 0;
2771
247k
    int trailing_blank = 0;
2772
2773
247k
    if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2774
247k
    if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
2775
247k
    if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
2776
2777
    /* Eat the indicator '|' or '>'. */
2778
2779
247k
    start_mark = parser->mark;
2780
2781
247k
    SKIP(parser);
2782
2783
    /* Scan the additional block scalar indicators. */
2784
2785
247k
    if (!CACHE(parser, 1)) goto error;
2786
2787
    /* Check for a chomping indicator. */
2788
2789
247k
    if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-'))
2790
57.3k
    {
2791
        /* Set the chomping method and eat the indicator. */
2792
2793
57.3k
        chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2794
2795
57.3k
        SKIP(parser);
2796
2797
        /* Check for an indentation indicator. */
2798
2799
57.3k
        if (!CACHE(parser, 1)) goto error;
2800
2801
57.3k
        if (IS_DIGIT(parser->buffer))
2802
55.7k
        {
2803
            /* Check that the indentation is greater than 0. */
2804
2805
55.7k
            if (CHECK(parser->buffer, '0')) {
2806
2
                yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2807
2
                        start_mark, "found an indentation indicator equal to 0");
2808
2
                goto error;
2809
2
            }
2810
2811
            /* Get the indentation level and eat the indicator. */
2812
2813
55.7k
            increment = AS_DIGIT(parser->buffer);
2814
2815
55.7k
            SKIP(parser);
2816
55.7k
        }
2817
57.3k
    }
2818
2819
    /* Do the same as above, but in the opposite order. */
2820
2821
190k
    else if (IS_DIGIT(parser->buffer))
2822
42.2k
    {
2823
42.2k
        if (CHECK(parser->buffer, '0')) {
2824
4
            yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2825
4
                    start_mark, "found an indentation indicator equal to 0");
2826
4
            goto error;
2827
4
        }
2828
2829
42.2k
        increment = AS_DIGIT(parser->buffer);
2830
2831
42.2k
        SKIP(parser);
2832
2833
42.2k
        if (!CACHE(parser, 1)) goto error;
2834
2835
42.2k
        if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) {
2836
40.9k
            chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2837
2838
40.9k
            SKIP(parser);
2839
40.9k
        }
2840
42.2k
    }
2841
2842
    /* Eat whitespaces and comments to the end of the line. */
2843
2844
247k
    if (!CACHE(parser, 1)) goto error;
2845
2846
2.13M
    while (IS_BLANK(parser->buffer)) {
2847
2.13M
        SKIP(parser);
2848
2.13M
        if (!CACHE(parser, 1)) goto error;
2849
2.13M
    }
2850
2851
247k
    if (CHECK(parser->buffer, '#')) {
2852
12.7M
        while (!IS_BREAKZ(parser->buffer)) {
2853
12.6M
            SKIP(parser);
2854
12.6M
            if (!CACHE(parser, 1)) goto error;
2855
12.6M
        }
2856
93.1k
    }
2857
2858
    /* Check if we are at the end of the line. */
2859
2860
247k
    if (!IS_BREAKZ(parser->buffer)) {
2861
150
        yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2862
150
                start_mark, "did not find expected comment or line break");
2863
150
        goto error;
2864
150
    }
2865
2866
    /* Eat a line break. */
2867
2868
247k
    if (IS_BREAK(parser->buffer)) {
2869
247k
        if (!CACHE(parser, 2)) goto error;
2870
247k
        SKIP_LINE(parser);
2871
247k
    }
2872
2873
247k
    end_mark = parser->mark;
2874
2875
    /* Set the indentation level if it was specified. */
2876
2877
247k
    if (increment) {
2878
98.0k
        indent = parser->indent >= 0 ? parser->indent+increment : increment;
2879
98.0k
    }
2880
2881
    /* Scan the leading line breaks and determine the indentation level if needed. */
2882
2883
247k
    if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks,
2884
247k
                start_mark, &end_mark)) goto error;
2885
2886
    /* Scan the block scalar content. */
2887
2888
247k
    if (!CACHE(parser, 1)) goto error;
2889
2890
808k
    while ((int)parser->mark.column == indent && !(IS_Z(parser->buffer)))
2891
560k
    {
2892
        /*
2893
         * We are at the beginning of a non-empty line.
2894
         */
2895
2896
        /* Is it a trailing whitespace? */
2897
2898
560k
        trailing_blank = IS_BLANK(parser->buffer);
2899
2900
        /* Check if we need to fold the leading line break. */
2901
2902
560k
        if (!literal && (*leading_break.start == '\n')
2903
491k
                && !leading_blank && !trailing_blank)
2904
488k
        {
2905
            /* Do we need to join the lines by space? */
2906
2907
488k
            if (*trailing_breaks.start == '\0') {
2908
278k
                if (!STRING_EXTEND(parser, string)) goto error;
2909
278k
                *(string.pointer ++) = ' ';
2910
278k
            }
2911
2912
488k
            CLEAR(parser, leading_break);
2913
488k
        }
2914
72.8k
        else {
2915
72.8k
            if (!JOIN(parser, string, leading_break)) goto error;
2916
72.8k
            CLEAR(parser, leading_break);
2917
72.8k
        }
2918
2919
        /* Append the remaining line breaks. */
2920
2921
560k
        if (!JOIN(parser, string, trailing_breaks)) goto error;
2922
560k
        CLEAR(parser, trailing_breaks);
2923
2924
        /* Is it a leading whitespace? */
2925
2926
560k
        leading_blank = IS_BLANK(parser->buffer);
2927
2928
        /* Consume the current line. */
2929
2930
11.1M
        while (!IS_BREAKZ(parser->buffer)) {
2931
10.5M
            if (!READ(parser, string)) goto error;
2932
10.5M
            if (!CACHE(parser, 1)) goto error;
2933
10.5M
        }
2934
2935
        /* Consume the line break. */
2936
2937
560k
        if (!CACHE(parser, 2)) goto error;
2938
2939
560k
        if (!READ_LINE(parser, leading_break)) goto error;
2940
2941
        /* Eat the following indentation spaces and line breaks. */
2942
2943
560k
        if (!yaml_parser_scan_block_scalar_breaks(parser,
2944
560k
                    &indent, &trailing_breaks, start_mark, &end_mark)) goto error;
2945
560k
    }
2946
2947
    /* Chomp the tail. */
2948
2949
247k
    if (chomping != -1) {
2950
158k
        if (!JOIN(parser, string, leading_break)) goto error;
2951
158k
    }
2952
247k
    if (chomping == 1) {
2953
8.93k
        if (!JOIN(parser, string, trailing_breaks)) goto error;
2954
8.93k
    }
2955
2956
    /* Create a token. */
2957
2958
247k
    SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
2959
247k
            literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE,
2960
247k
            start_mark, end_mark);
2961
2962
247k
    STRING_DEL(parser, leading_break);
2963
247k
    STRING_DEL(parser, trailing_breaks);
2964
2965
247k
    return 1;
2966
2967
219
error:
2968
219
    STRING_DEL(parser, string);
2969
219
    STRING_DEL(parser, leading_break);
2970
219
    STRING_DEL(parser, trailing_breaks);
2971
2972
219
    return 0;
2973
247k
}
2974
2975
/*
2976
 * Scan indentation spaces and line breaks for a block scalar.  Determine the
2977
 * indentation level if needed.
2978
 */
2979
2980
static int
2981
yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
2982
        int *indent, yaml_string_t *breaks,
2983
        yaml_mark_t start_mark, yaml_mark_t *end_mark)
2984
808k
{
2985
808k
    int max_indent = 0;
2986
2987
808k
    *end_mark = parser->mark;
2988
2989
    /* Eat the indentation spaces and line breaks. */
2990
2991
4.75M
    while (1)
2992
4.75M
    {
2993
        /* Eat the indentation spaces. */
2994
2995
4.75M
        if (!CACHE(parser, 1)) return 0;
2996
2997
11.3M
        while ((!*indent || (int)parser->mark.column < *indent)
2998
10.7M
                && IS_SPACE(parser->buffer)) {
2999
6.57M
            SKIP(parser);
3000
6.57M
            if (!CACHE(parser, 1)) return 0;
3001
6.57M
        }
3002
3003
4.75M
        if ((int)parser->mark.column > max_indent)
3004
565k
            max_indent = (int)parser->mark.column;
3005
3006
        /* Check for a tab character messing the indentation. */
3007
3008
4.75M
        if ((!*indent || (int)parser->mark.column < *indent)
3009
4.18M
                && IS_TAB(parser->buffer)) {
3010
12
            return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
3011
12
                    start_mark, "found a tab character where an indentation space is expected");
3012
12
        }
3013
3014
        /* Have we found a non-empty line? */
3015
3016
4.75M
        if (!IS_BREAK(parser->buffer)) break;
3017
3018
        /* Consume the line break. */
3019
3020
3.94M
        if (!CACHE(parser, 2)) return 0;
3021
3.94M
        if (!READ_LINE(parser, *breaks)) return 0;
3022
3.94M
        *end_mark = parser->mark;
3023
3.94M
    }
3024
3025
    /* Determine the indentation level if needed. */
3026
3027
808k
    if (!*indent) {
3028
149k
        *indent = max_indent;
3029
149k
        if (*indent < parser->indent + 1)
3030
145k
            *indent = parser->indent + 1;
3031
149k
        if (*indent < 1)
3032
766
            *indent = 1;
3033
149k
    }
3034
3035
808k
   return 1;
3036
808k
}
3037
3038
/*
3039
 * Scan a quoted scalar.
3040
 */
3041
3042
static int
3043
yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
3044
        int single)
3045
14.0k
{
3046
14.0k
    yaml_mark_t start_mark;
3047
14.0k
    yaml_mark_t end_mark;
3048
14.0k
    yaml_string_t string = NULL_STRING;
3049
14.0k
    yaml_string_t leading_break = NULL_STRING;
3050
14.0k
    yaml_string_t trailing_breaks = NULL_STRING;
3051
14.0k
    yaml_string_t whitespaces = NULL_STRING;
3052
14.0k
    int leading_blanks;
3053
3054
14.0k
    if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3055
14.0k
    if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3056
14.0k
    if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3057
14.0k
    if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3058
3059
    /* Eat the left quote. */
3060
3061
14.0k
    start_mark = parser->mark;
3062
3063
14.0k
    SKIP(parser);
3064
3065
    /* Consume the content of the quoted scalar. */
3066
3067
1.77M
    while (1)
3068
1.77M
    {
3069
        /* Check that there are no document indicators at the beginning of the line. */
3070
3071
1.77M
        if (!CACHE(parser, 4)) goto error;
3072
3073
1.77M
        if (parser->mark.column == 0 &&
3074
907k
            ((CHECK_AT(parser->buffer, '-', 0) &&
3075
14.8k
              CHECK_AT(parser->buffer, '-', 1) &&
3076
2.46k
              CHECK_AT(parser->buffer, '-', 2)) ||
3077
905k
             (CHECK_AT(parser->buffer, '.', 0) &&
3078
7.20k
              CHECK_AT(parser->buffer, '.', 1) &&
3079
1.90k
              CHECK_AT(parser->buffer, '.', 2))) &&
3080
2.78k
            IS_BLANKZ_AT(parser->buffer, 3))
3081
56
        {
3082
56
            yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3083
56
                    start_mark, "found unexpected document indicator");
3084
56
            goto error;
3085
56
        }
3086
3087
        /* Check for EOF. */
3088
3089
1.77M
        if (IS_Z(parser->buffer)) {
3090
1.84k
            yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3091
1.84k
                    start_mark, "found unexpected end of stream");
3092
1.84k
            goto error;
3093
1.84k
        }
3094
3095
        /* Consume non-blank characters. */
3096
3097
1.77M
        if (!CACHE(parser, 2)) goto error;
3098
3099
1.77M
        leading_blanks = 0;
3100
3101
10.8M
        while (!IS_BLANKZ(parser->buffer))
3102
9.88M
        {
3103
            /* Check for an escaped single quote. */
3104
3105
9.88M
            if (single && CHECK_AT(parser->buffer, '\'', 0)
3106
9.26k
                    && CHECK_AT(parser->buffer, '\'', 1))
3107
7.89k
            {
3108
7.89k
                if (!STRING_EXTEND(parser, string)) goto error;
3109
7.89k
                *(string.pointer++) = '\'';
3110
7.89k
                SKIP(parser);
3111
7.89k
                SKIP(parser);
3112
7.89k
            }
3113
3114
            /* Check for the right quote. */
3115
3116
9.87M
            else if (CHECK(parser->buffer, single ? '\'' : '"'))
3117
11.7k
            {
3118
11.7k
                break;
3119
11.7k
            }
3120
3121
            /* Check for an escaped line break. */
3122
3123
9.86M
            else if (!single && CHECK(parser->buffer, '\\')
3124
1.89M
                    && IS_BREAK_AT(parser->buffer, 1))
3125
811k
            {
3126
811k
                if (!CACHE(parser, 3)) goto error;
3127
811k
                SKIP(parser);
3128
811k
                SKIP_LINE(parser);
3129
811k
                leading_blanks = 1;
3130
811k
                break;
3131
811k
            }
3132
3133
            /* Check for an escape sequence. */
3134
3135
9.05M
            else if (!single && CHECK(parser->buffer, '\\'))
3136
1.08M
            {
3137
1.08M
                size_t code_length = 0;
3138
3139
1.08M
                if (!STRING_EXTEND(parser, string)) goto error;
3140
3141
                /* Check the escape character. */
3142
3143
1.08M
                switch (parser->buffer.pointer[1])
3144
1.08M
                {
3145
932
                    case '0':
3146
932
                        *(string.pointer++) = '\0';
3147
932
                        break;
3148
3149
396
                    case 'a':
3150
396
                        *(string.pointer++) = '\x07';
3151
396
                        break;
3152
3153
27.7k
                    case 'b':
3154
27.7k
                        *(string.pointer++) = '\x08';
3155
27.7k
                        break;
3156
3157
123k
                    case 't':
3158
124k
                    case '\t':
3159
124k
                        *(string.pointer++) = '\x09';
3160
124k
                        break;
3161
3162
1.19k
                    case 'n':
3163
1.19k
                        *(string.pointer++) = '\x0A';
3164
1.19k
                        break;
3165
3166
453
                    case 'v':
3167
453
                        *(string.pointer++) = '\x0B';
3168
453
                        break;
3169
3170
3.88k
                    case 'f':
3171
3.88k
                        *(string.pointer++) = '\x0C';
3172
3.88k
                        break;
3173
3174
53.5k
                    case 'r':
3175
53.5k
                        *(string.pointer++) = '\x0D';
3176
53.5k
                        break;
3177
3178
459
                    case 'e':
3179
459
                        *(string.pointer++) = '\x1B';
3180
459
                        break;
3181
3182
912
                    case ' ':
3183
912
                        *(string.pointer++) = '\x20';
3184
912
                        break;
3185
3186
1.14k
                    case '"':
3187
1.14k
                        *(string.pointer++) = '"';
3188
1.14k
                        break;
3189
3190
646
                    case '/':
3191
646
                        *(string.pointer++) = '/';
3192
646
                        break;
3193
3194
393k
                    case '\\':
3195
393k
                        *(string.pointer++) = '\\';
3196
393k
                        break;
3197
3198
487
                    case 'N':   /* NEL (#x85) */
3199
487
                        *(string.pointer++) = '\xC2';
3200
487
                        *(string.pointer++) = '\x85';
3201
487
                        break;
3202
3203
425
                    case '_':   /* #xA0 */
3204
425
                        *(string.pointer++) = '\xC2';
3205
425
                        *(string.pointer++) = '\xA0';
3206
425
                        break;
3207
3208
589
                    case 'L':   /* LS (#x2028) */
3209
589
                        *(string.pointer++) = '\xE2';
3210
589
                        *(string.pointer++) = '\x80';
3211
589
                        *(string.pointer++) = '\xA8';
3212
589
                        break;
3213
3214
1.27k
                    case 'P':   /* PS (#x2029) */
3215
1.27k
                        *(string.pointer++) = '\xE2';
3216
1.27k
                        *(string.pointer++) = '\x80';
3217
1.27k
                        *(string.pointer++) = '\xA9';
3218
1.27k
                        break;
3219
3220
461k
                    case 'x':
3221
461k
                        code_length = 2;
3222
461k
                        break;
3223
3224
6.96k
                    case 'u':
3225
6.96k
                        code_length = 4;
3226
6.96k
                        break;
3227
3228
997
                    case 'U':
3229
997
                        code_length = 8;
3230
997
                        break;
3231
3232
83
                    default:
3233
83
                        yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3234
83
                                start_mark, "found unknown escape character");
3235
83
                        goto error;
3236
1.08M
                }
3237
3238
1.08M
                SKIP(parser);
3239
1.08M
                SKIP(parser);
3240
3241
                /* Consume an arbitrary escape code. */
3242
3243
1.08M
                if (code_length)
3244
469k
                {
3245
469k
                    unsigned int value = 0;
3246
469k
                    size_t k;
3247
3248
                    /* Scan the character value. */
3249
3250
469k
                    if (!CACHE(parser, code_length)) goto error;
3251
3252
1.42M
                    for (k = 0; k < code_length; k ++) {
3253
957k
                        if (!IS_HEX_AT(parser->buffer, k)) {
3254
161
                            yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3255
161
                                    start_mark, "did not find expected hexdecimal number");
3256
161
                            goto error;
3257
161
                        }
3258
957k
                        value = (value << 4) + AS_HEX_AT(parser->buffer, k);
3259
957k
                    }
3260
3261
                    /* Check the value and write the character. */
3262
3263
468k
                    if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
3264
92
                        yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3265
92
                                start_mark, "found invalid Unicode character escape code");
3266
92
                        goto error;
3267
92
                    }
3268
3269
468k
                    if (value <= 0x7F) {
3270
316k
                        *(string.pointer++) = value;
3271
316k
                    }
3272
152k
                    else if (value <= 0x7FF) {
3273
150k
                        *(string.pointer++) = 0xC0 + (value >> 6);
3274
150k
                        *(string.pointer++) = 0x80 + (value & 0x3F);
3275
150k
                    }
3276
2.43k
                    else if (value <= 0xFFFF) {
3277
1.61k
                        *(string.pointer++) = 0xE0 + (value >> 12);
3278
1.61k
                        *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3279
1.61k
                        *(string.pointer++) = 0x80 + (value & 0x3F);
3280
1.61k
                    }
3281
813
                    else {
3282
813
                        *(string.pointer++) = 0xF0 + (value >> 18);
3283
813
                        *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F);
3284
813
                        *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3285
813
                        *(string.pointer++) = 0x80 + (value & 0x3F);
3286
813
                    }
3287
3288
                    /* Advance the pointer. */
3289
3290
1.42M
                    for (k = 0; k < code_length; k ++) {
3291
956k
                        SKIP(parser);
3292
956k
                    }
3293
468k
                }
3294
1.08M
            }
3295
3296
7.97M
            else
3297
7.97M
            {
3298
                /* It is a non-escaped non-blank character. */
3299
3300
7.97M
                if (!READ(parser, string)) goto error;
3301
7.97M
            }
3302
3303
9.06M
            if (!CACHE(parser, 2)) goto error;
3304
9.06M
        }
3305
3306
        /* Check if we are at the end of the scalar. */
3307
3308
        /* Fix for crash uninitialized value crash
3309
         * Credit for the bug and input is to OSS Fuzz
3310
         * Credit for the fix to Alex Gaynor
3311
         */
3312
1.77M
        if (!CACHE(parser, 1)) goto error;
3313
1.77M
        if (CHECK(parser->buffer, single ? '\'' : '"'))
3314
11.7k
            break;
3315
3316
        /* Consume blank characters. */
3317
3318
1.76M
        if (!CACHE(parser, 1)) goto error;
3319
3320
12.3M
        while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3321
10.6M
        {
3322
10.6M
            if (IS_BLANK(parser->buffer))
3323
8.42M
            {
3324
                /* Consume a space or a tab character. */
3325
3326
8.42M
                if (!leading_blanks) {
3327
8.14M
                    if (!READ(parser, whitespaces)) goto error;
3328
8.14M
                }
3329
275k
                else {
3330
275k
                    SKIP(parser);
3331
275k
                }
3332
8.42M
            }
3333
2.18M
            else
3334
2.18M
            {
3335
2.18M
                if (!CACHE(parser, 2)) goto error;
3336
3337
                /* Check if it is a first line break. */
3338
3339
2.18M
                if (!leading_blanks)
3340
98.9k
                {
3341
98.9k
                    CLEAR(parser, whitespaces);
3342
98.9k
                    if (!READ_LINE(parser, leading_break)) goto error;
3343
98.9k
                    leading_blanks = 1;
3344
98.9k
                }
3345
2.08M
                else
3346
2.08M
                {
3347
2.08M
                    if (!READ_LINE(parser, trailing_breaks)) goto error;
3348
2.08M
                }
3349
2.18M
            }
3350
10.6M
            if (!CACHE(parser, 1)) goto error;
3351
10.6M
        }
3352
3353
        /* Join the whitespaces or fold line breaks. */
3354
3355
1.76M
        if (leading_blanks)
3356
910k
        {
3357
            /* Do we need to fold line breaks? */
3358
3359
910k
            if (leading_break.start[0] == '\n') {
3360
57.5k
                if (trailing_breaks.start[0] == '\0') {
3361
49.9k
                    if (!STRING_EXTEND(parser, string)) goto error;
3362
49.9k
                    *(string.pointer++) = ' ';
3363
49.9k
                }
3364
7.59k
                else {
3365
7.59k
                    if (!JOIN(parser, string, trailing_breaks)) goto error;
3366
7.59k
                    CLEAR(parser, trailing_breaks);
3367
7.59k
                }
3368
57.5k
                CLEAR(parser, leading_break);
3369
57.5k
            }
3370
853k
            else {
3371
853k
                if (!JOIN(parser, string, leading_break)) goto error;
3372
853k
                if (!JOIN(parser, string, trailing_breaks)) goto error;
3373
853k
                CLEAR(parser, leading_break);
3374
853k
                CLEAR(parser, trailing_breaks);
3375
853k
            }
3376
910k
        }
3377
854k
        else
3378
854k
        {
3379
854k
            if (!JOIN(parser, string, whitespaces)) goto error;
3380
854k
            CLEAR(parser, whitespaces);
3381
854k
        }
3382
1.76M
    }
3383
3384
    /* Eat the right quote. */
3385
3386
11.7k
    SKIP(parser);
3387
3388
11.7k
    end_mark = parser->mark;
3389
3390
    /* Create a token. */
3391
3392
11.7k
    SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3393
11.7k
            single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE,
3394
11.7k
            start_mark, end_mark);
3395
3396
11.7k
    STRING_DEL(parser, leading_break);
3397
11.7k
    STRING_DEL(parser, trailing_breaks);
3398
11.7k
    STRING_DEL(parser, whitespaces);
3399
3400
11.7k
    return 1;
3401
3402
2.27k
error:
3403
2.27k
    STRING_DEL(parser, string);
3404
2.27k
    STRING_DEL(parser, leading_break);
3405
2.27k
    STRING_DEL(parser, trailing_breaks);
3406
2.27k
    STRING_DEL(parser, whitespaces);
3407
3408
2.27k
    return 0;
3409
14.0k
}
3410
3411
/*
3412
 * Scan a plain scalar.
3413
 */
3414
3415
static int
3416
yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token)
3417
1.86M
{
3418
1.86M
    yaml_mark_t start_mark;
3419
1.86M
    yaml_mark_t end_mark;
3420
1.86M
    yaml_string_t string = NULL_STRING;
3421
1.86M
    yaml_string_t leading_break = NULL_STRING;
3422
1.86M
    yaml_string_t trailing_breaks = NULL_STRING;
3423
1.86M
    yaml_string_t whitespaces = NULL_STRING;
3424
1.86M
    int leading_blanks = 0;
3425
1.86M
    int indent = parser->indent+1;
3426
3427
1.86M
    if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3428
1.86M
    if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3429
1.86M
    if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3430
1.86M
    if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3431
3432
1.86M
    start_mark = end_mark = parser->mark;
3433
3434
    /* Consume the content of the plain scalar. */
3435
3436
3.41M
    while (1)
3437
3.41M
    {
3438
        /* Check for a document indicator. */
3439
3440
3.41M
        if (!CACHE(parser, 4)) goto error;
3441
3442
3.41M
        if (parser->mark.column == 0 &&
3443
1.00M
            ((CHECK_AT(parser->buffer, '-', 0) &&
3444
37.6k
              CHECK_AT(parser->buffer, '-', 1) &&
3445
20.6k
              CHECK_AT(parser->buffer, '-', 2)) ||
3446
1.00M
             (CHECK_AT(parser->buffer, '.', 0) &&
3447
15.5k
              CHECK_AT(parser->buffer, '.', 1) &&
3448
10.7k
              CHECK_AT(parser->buffer, '.', 2))) &&
3449
14.7k
            IS_BLANKZ_AT(parser->buffer, 3)) break;
3450
3451
        /* Check for a comment. */
3452
3453
3.41M
        if (CHECK(parser->buffer, '#'))
3454
1.12k
            break;
3455
3456
        /* Consume non-blank characters. */
3457
3458
28.3M
        while (!IS_BLANKZ(parser->buffer))
3459
26.6M
        {
3460
            /* Check for "x:" + one of ',?[]{}' in the flow context. TODO: Fix the test "spec-08-13".
3461
             * This is not completely according to the spec
3462
             * See http://yaml.org/spec/1.1/#id907281 9.1.3. Plain
3463
             */
3464
3465
26.6M
            if (parser->flow_level
3466
2.12M
                    && CHECK(parser->buffer, ':')
3467
6.75k
                    && (
3468
6.75k
                        CHECK_AT(parser->buffer, ',', 1)
3469
6.74k
                        || CHECK_AT(parser->buffer, '?', 1)
3470
6.74k
                        || CHECK_AT(parser->buffer, '[', 1)
3471
6.74k
                        || CHECK_AT(parser->buffer, ']', 1)
3472
6.73k
                        || CHECK_AT(parser->buffer, '{', 1)
3473
6.73k
                        || CHECK_AT(parser->buffer, '}', 1)
3474
6.75k
                    )
3475
26.6M
                    ) {
3476
16
                yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3477
16
                        start_mark, "found unexpected ':'");
3478
16
                goto error;
3479
16
            }
3480
3481
            /* Check for indicators that may end a plain scalar. */
3482
3483
26.6M
            if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1))
3484
25.2M
                    || (parser->flow_level &&
3485
2.11M
                        (CHECK(parser->buffer, ',')
3486
1.81M
                         || CHECK(parser->buffer, '[')
3487
1.80M
                         || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
3488
1.80M
                         || CHECK(parser->buffer, '}'))))
3489
1.73M
                break;
3490
3491
            /* Check if we need to join whitespaces and breaks. */
3492
3493
24.9M
            if (leading_blanks || whitespaces.start != whitespaces.pointer)
3494
1.42M
            {
3495
1.42M
                if (leading_blanks)
3496
440k
                {
3497
                    /* Do we need to fold line breaks? */
3498
3499
440k
                    if (leading_break.start[0] == '\n') {
3500
244k
                        if (trailing_breaks.start[0] == '\0') {
3501
214k
                            if (!STRING_EXTEND(parser, string)) goto error;
3502
214k
                            *(string.pointer++) = ' ';
3503
214k
                        }
3504
30.0k
                        else {
3505
30.0k
                            if (!JOIN(parser, string, trailing_breaks)) goto error;
3506
30.0k
                            CLEAR(parser, trailing_breaks);
3507
30.0k
                        }
3508
244k
                        CLEAR(parser, leading_break);
3509
244k
                    }
3510
196k
                    else {
3511
196k
                        if (!JOIN(parser, string, leading_break)) goto error;
3512
196k
                        if (!JOIN(parser, string, trailing_breaks)) goto error;
3513
196k
                        CLEAR(parser, leading_break);
3514
196k
                        CLEAR(parser, trailing_breaks);
3515
196k
                    }
3516
3517
440k
                    leading_blanks = 0;
3518
440k
                }
3519
989k
                else
3520
989k
                {
3521
989k
                    if (!JOIN(parser, string, whitespaces)) goto error;
3522
989k
                    CLEAR(parser, whitespaces);
3523
989k
                }
3524
1.42M
            }
3525
3526
            /* Copy the character. */
3527
3528
24.9M
            if (!READ(parser, string)) goto error;
3529
3530
24.9M
            end_mark = parser->mark;
3531
3532
24.9M
            if (!CACHE(parser, 2)) goto error;
3533
24.9M
        }
3534
3535
        /* Is it the end? */
3536
3537
3.40M
        if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)))
3538
1.73M
            break;
3539
3540
        /* Consume blank characters. */
3541
3542
1.67M
        if (!CACHE(parser, 1)) goto error;
3543
3544
19.4M
        while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3545
17.7M
        {
3546
17.7M
            if (IS_BLANK(parser->buffer))
3547
12.3M
            {
3548
                /* Check for tab characters that abuse indentation. */
3549
3550
12.3M
                if (leading_blanks && (int)parser->mark.column < indent
3551
51.4k
                        && IS_TAB(parser->buffer)) {
3552
3
                    yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3553
3
                            start_mark, "found a tab character that violates indentation");
3554
3
                    goto error;
3555
3
                }
3556
3557
                /* Consume a space or a tab character. */
3558
3559
12.3M
                if (!leading_blanks) {
3560
9.53M
                    if (!READ(parser, whitespaces)) goto error;
3561
9.53M
                }
3562
2.76M
                else {
3563
2.76M
                    SKIP(parser);
3564
2.76M
                }
3565
12.3M
            }
3566
5.46M
            else
3567
5.46M
            {
3568
5.46M
                if (!CACHE(parser, 2)) goto error;
3569
3570
                /* Check if it is a first line break. */
3571
3572
5.46M
                if (!leading_blanks)
3573
670k
                {
3574
670k
                    CLEAR(parser, whitespaces);
3575
670k
                    if (!READ_LINE(parser, leading_break)) goto error;
3576
670k
                    leading_blanks = 1;
3577
670k
                }
3578
4.79M
                else
3579
4.79M
                {
3580
4.79M
                    if (!READ_LINE(parser, trailing_breaks)) goto error;
3581
4.79M
                }
3582
5.46M
            }
3583
17.7M
            if (!CACHE(parser, 1)) goto error;
3584
17.7M
        }
3585
3586
        /* Check indentation level. */
3587
3588
1.67M
        if (!parser->flow_level && (int)parser->mark.column < indent)
3589
123k
            break;
3590
1.67M
    }
3591
3592
    /* Create a token. */
3593
3594
1.86M
    SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3595
1.86M
            YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark);
3596
3597
    /* Note that we change the 'simple_key_allowed' flag. */
3598
3599
1.86M
    if (leading_blanks) {
3600
229k
        parser->simple_key_allowed = 1;
3601
229k
    }
3602
3603
1.86M
    STRING_DEL(parser, leading_break);
3604
1.86M
    STRING_DEL(parser, trailing_breaks);
3605
1.86M
    STRING_DEL(parser, whitespaces);
3606
3607
1.86M
    return 1;
3608
3609
119
error:
3610
119
    STRING_DEL(parser, string);
3611
119
    STRING_DEL(parser, leading_break);
3612
119
    STRING_DEL(parser, trailing_breaks);
3613
119
    STRING_DEL(parser, whitespaces);
3614
3615
119
    return 0;
3616
1.86M
}