Coverage Report

Created: 2025-06-24 07:03

/src/moddable/xs/tools/yaml/scanner.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2006-2016 Kirill Simonov
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
5
 * this software and associated documentation files (the "Software"), to deal in
6
 * the Software without restriction, including without limitation the rights to
7
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
8
 * of the Software, and to permit persons to whom the Software is furnished to do
9
 * so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in all
12
 * copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20
 * SOFTWARE.
21
 *
22
 */
23
24
/*
25
 * Introduction
26
 * ************
27
 *
28
 * The following notes assume that you are familiar with the YAML specification
29
 * (http://yaml.org/spec/cvs/current.html).  We mostly follow it, although in
30
 * some cases we are less restrictive that it requires.
31
 *
32
 * The process of transforming a YAML stream into a sequence of events is
33
 * divided on two steps: Scanning and Parsing.
34
 *
35
 * The Scanner transforms the input stream into a sequence of tokens, while the
36
 * parser transform the sequence of tokens produced by the Scanner into a
37
 * sequence of parsing events.
38
 *
39
 * The Scanner is rather clever and complicated. The Parser, on the contrary,
40
 * is a straightforward implementation of a recursive-descendant parser (or,
41
 * LL(1) parser, as it is usually called).
42
 *
43
 * Actually there are two issues of Scanning that might be called "clever", the
44
 * rest is quite straightforward.  The issues are "block collection start" and
45
 * "simple keys".  Both issues are explained below in details.
46
 *
47
 * Here the Scanning step is explained and implemented.  We start with the list
48
 * of all the tokens produced by the Scanner together with short descriptions.
49
 *
50
 * Now, tokens:
51
 *
52
 *      STREAM-START(encoding)          # The stream start.
53
 *      STREAM-END                      # The stream end.
54
 *      VERSION-DIRECTIVE(major,minor)  # The '%YAML' directive.
55
 *      TAG-DIRECTIVE(handle,prefix)    # The '%TAG' directive.
56
 *      DOCUMENT-START                  # '---'
57
 *      DOCUMENT-END                    # '...'
58
 *      BLOCK-SEQUENCE-START            # Indentation increase denoting a block
59
 *      BLOCK-MAPPING-START             # sequence or a block mapping.
60
 *      BLOCK-END                       # Indentation decrease.
61
 *      FLOW-SEQUENCE-START             # '['
62
 *      FLOW-SEQUENCE-END               # ']'
63
 *      BLOCK-SEQUENCE-START            # '{'
64
 *      BLOCK-SEQUENCE-END              # '}'
65
 *      BLOCK-ENTRY                     # '-'
66
 *      FLOW-ENTRY                      # ','
67
 *      KEY                             # '?' or nothing (simple keys).
68
 *      VALUE                           # ':'
69
 *      ALIAS(anchor)                   # '*anchor'
70
 *      ANCHOR(anchor)                  # '&anchor'
71
 *      TAG(handle,suffix)              # '!handle!suffix'
72
 *      SCALAR(value,style)             # A scalar.
73
 *
74
 * The following two tokens are "virtual" tokens denoting the beginning and the
75
 * end of the stream:
76
 *
77
 *      STREAM-START(encoding)
78
 *      STREAM-END
79
 *
80
 * We pass the information about the input stream encoding with the
81
 * STREAM-START token.
82
 *
83
 * The next two tokens are responsible for tags:
84
 *
85
 *      VERSION-DIRECTIVE(major,minor)
86
 *      TAG-DIRECTIVE(handle,prefix)
87
 *
88
 * Example:
89
 *
90
 *      %YAML   1.1
91
 *      %TAG    !   !foo
92
 *      %TAG    !yaml!  tag:yaml.org,2002:
93
 *      ---
94
 *
95
 * The correspoding sequence of tokens:
96
 *
97
 *      STREAM-START(utf-8)
98
 *      VERSION-DIRECTIVE(1,1)
99
 *      TAG-DIRECTIVE("!","!foo")
100
 *      TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
101
 *      DOCUMENT-START
102
 *      STREAM-END
103
 *
104
 * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
105
 * line.
106
 *
107
 * The document start and end indicators are represented by:
108
 *
109
 *      DOCUMENT-START
110
 *      DOCUMENT-END
111
 *
112
 * Note that if a YAML stream contains an implicit document (without '---'
113
 * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
114
 * produced.
115
 *
116
 * In the following examples, we present whole documents together with the
117
 * produced tokens.
118
 *
119
 *      1. An implicit document:
120
 *
121
 *          'a scalar'
122
 *
123
 *      Tokens:
124
 *
125
 *          STREAM-START(utf-8)
126
 *          SCALAR("a scalar",single-quoted)
127
 *          STREAM-END
128
 *
129
 *      2. An explicit document:
130
 *
131
 *          ---
132
 *          'a scalar'
133
 *          ...
134
 *
135
 *      Tokens:
136
 *
137
 *          STREAM-START(utf-8)
138
 *          DOCUMENT-START
139
 *          SCALAR("a scalar",single-quoted)
140
 *          DOCUMENT-END
141
 *          STREAM-END
142
 *
143
 *      3. Several documents in a stream:
144
 *
145
 *          'a scalar'
146
 *          ---
147
 *          'another scalar'
148
 *          ---
149
 *          'yet another scalar'
150
 *
151
 *      Tokens:
152
 *
153
 *          STREAM-START(utf-8)
154
 *          SCALAR("a scalar",single-quoted)
155
 *          DOCUMENT-START
156
 *          SCALAR("another scalar",single-quoted)
157
 *          DOCUMENT-START
158
 *          SCALAR("yet another scalar",single-quoted)
159
 *          STREAM-END
160
 *
161
 * We have already introduced the SCALAR token above.  The following tokens are
162
 * used to describe aliases, anchors, tag, and scalars:
163
 *
164
 *      ALIAS(anchor)
165
 *      ANCHOR(anchor)
166
 *      TAG(handle,suffix)
167
 *      SCALAR(value,style)
168
 *
169
 * The following series of examples illustrate the usage of these tokens:
170
 *
171
 *      1. A recursive sequence:
172
 *
173
 *          &A [ *A ]
174
 *
175
 *      Tokens:
176
 *
177
 *          STREAM-START(utf-8)
178
 *          ANCHOR("A")
179
 *          FLOW-SEQUENCE-START
180
 *          ALIAS("A")
181
 *          FLOW-SEQUENCE-END
182
 *          STREAM-END
183
 *
184
 *      2. A tagged scalar:
185
 *
186
 *          !!float "3.14"  # A good approximation.
187
 *
188
 *      Tokens:
189
 *
190
 *          STREAM-START(utf-8)
191
 *          TAG("!!","float")
192
 *          SCALAR("3.14",double-quoted)
193
 *          STREAM-END
194
 *
195
 *      3. Various scalar styles:
196
 *
197
 *          --- # Implicit empty plain scalars do not produce tokens.
198
 *          --- a plain scalar
199
 *          --- 'a single-quoted scalar'
200
 *          --- "a double-quoted scalar"
201
 *          --- |-
202
 *            a literal scalar
203
 *          --- >-
204
 *            a folded
205
 *            scalar
206
 *
207
 *      Tokens:
208
 *
209
 *          STREAM-START(utf-8)
210
 *          DOCUMENT-START
211
 *          DOCUMENT-START
212
 *          SCALAR("a plain scalar",plain)
213
 *          DOCUMENT-START
214
 *          SCALAR("a single-quoted scalar",single-quoted)
215
 *          DOCUMENT-START
216
 *          SCALAR("a double-quoted scalar",double-quoted)
217
 *          DOCUMENT-START
218
 *          SCALAR("a literal scalar",literal)
219
 *          DOCUMENT-START
220
 *          SCALAR("a folded scalar",folded)
221
 *          STREAM-END
222
 *
223
 * Now it's time to review collection-related tokens. We will start with
224
 * flow collections:
225
 *
226
 *      FLOW-SEQUENCE-START
227
 *      FLOW-SEQUENCE-END
228
 *      FLOW-MAPPING-START
229
 *      FLOW-MAPPING-END
230
 *      FLOW-ENTRY
231
 *      KEY
232
 *      VALUE
233
 *
234
 * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
235
 * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
236
 * correspondingly.  FLOW-ENTRY represent the ',' indicator.  Finally the
237
 * indicators '?' and ':', which are used for denoting mapping keys and values,
238
 * are represented by the KEY and VALUE tokens.
239
 *
240
 * The following examples show flow collections:
241
 *
242
 *      1. A flow sequence:
243
 *
244
 *          [item 1, item 2, item 3]
245
 *
246
 *      Tokens:
247
 *
248
 *          STREAM-START(utf-8)
249
 *          FLOW-SEQUENCE-START
250
 *          SCALAR("item 1",plain)
251
 *          FLOW-ENTRY
252
 *          SCALAR("item 2",plain)
253
 *          FLOW-ENTRY
254
 *          SCALAR("item 3",plain)
255
 *          FLOW-SEQUENCE-END
256
 *          STREAM-END
257
 *
258
 *      2. A flow mapping:
259
 *
260
 *          {
261
 *              a simple key: a value,  # Note that the KEY token is produced.
262
 *              ? a complex key: another value,
263
 *          }
264
 *
265
 *      Tokens:
266
 *
267
 *          STREAM-START(utf-8)
268
 *          FLOW-MAPPING-START
269
 *          KEY
270
 *          SCALAR("a simple key",plain)
271
 *          VALUE
272
 *          SCALAR("a value",plain)
273
 *          FLOW-ENTRY
274
 *          KEY
275
 *          SCALAR("a complex key",plain)
276
 *          VALUE
277
 *          SCALAR("another value",plain)
278
 *          FLOW-ENTRY
279
 *          FLOW-MAPPING-END
280
 *          STREAM-END
281
 *
282
 * A simple key is a key which is not denoted by the '?' indicator.  Note that
283
 * the Scanner still produce the KEY token whenever it encounters a simple key.
284
 *
285
 * For scanning block collections, the following tokens are used (note that we
286
 * repeat KEY and VALUE here):
287
 *
288
 *      BLOCK-SEQUENCE-START
289
 *      BLOCK-MAPPING-START
290
 *      BLOCK-END
291
 *      BLOCK-ENTRY
292
 *      KEY
293
 *      VALUE
294
 *
295
 * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
296
 * increase that precedes a block collection (cf. the INDENT token in Python).
297
 * The token BLOCK-END denote indentation decrease that ends a block collection
298
 * (cf. the DEDENT token in Python).  However YAML has some syntax pecularities
299
 * that makes detections of these tokens more complex.
300
 *
301
 * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
302
 * '-', '?', and ':' correspondingly.
303
 *
304
 * The following examples show how the tokens BLOCK-SEQUENCE-START,
305
 * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
306
 *
307
 *      1. Block sequences:
308
 *
309
 *          - item 1
310
 *          - item 2
311
 *          -
312
 *            - item 3.1
313
 *            - item 3.2
314
 *          -
315
 *            key 1: value 1
316
 *            key 2: value 2
317
 *
318
 *      Tokens:
319
 *
320
 *          STREAM-START(utf-8)
321
 *          BLOCK-SEQUENCE-START
322
 *          BLOCK-ENTRY
323
 *          SCALAR("item 1",plain)
324
 *          BLOCK-ENTRY
325
 *          SCALAR("item 2",plain)
326
 *          BLOCK-ENTRY
327
 *          BLOCK-SEQUENCE-START
328
 *          BLOCK-ENTRY
329
 *          SCALAR("item 3.1",plain)
330
 *          BLOCK-ENTRY
331
 *          SCALAR("item 3.2",plain)
332
 *          BLOCK-END
333
 *          BLOCK-ENTRY
334
 *          BLOCK-MAPPING-START
335
 *          KEY
336
 *          SCALAR("key 1",plain)
337
 *          VALUE
338
 *          SCALAR("value 1",plain)
339
 *          KEY
340
 *          SCALAR("key 2",plain)
341
 *          VALUE
342
 *          SCALAR("value 2",plain)
343
 *          BLOCK-END
344
 *          BLOCK-END
345
 *          STREAM-END
346
 *
347
 *      2. Block mappings:
348
 *
349
 *          a simple key: a value   # The KEY token is produced here.
350
 *          ? a complex key
351
 *          : another value
352
 *          a mapping:
353
 *            key 1: value 1
354
 *            key 2: value 2
355
 *          a sequence:
356
 *            - item 1
357
 *            - item 2
358
 *
359
 *      Tokens:
360
 *
361
 *          STREAM-START(utf-8)
362
 *          BLOCK-MAPPING-START
363
 *          KEY
364
 *          SCALAR("a simple key",plain)
365
 *          VALUE
366
 *          SCALAR("a value",plain)
367
 *          KEY
368
 *          SCALAR("a complex key",plain)
369
 *          VALUE
370
 *          SCALAR("another value",plain)
371
 *          KEY
372
 *          SCALAR("a mapping",plain)
373
 *          BLOCK-MAPPING-START
374
 *          KEY
375
 *          SCALAR("key 1",plain)
376
 *          VALUE
377
 *          SCALAR("value 1",plain)
378
 *          KEY
379
 *          SCALAR("key 2",plain)
380
 *          VALUE
381
 *          SCALAR("value 2",plain)
382
 *          BLOCK-END
383
 *          KEY
384
 *          SCALAR("a sequence",plain)
385
 *          VALUE
386
 *          BLOCK-SEQUENCE-START
387
 *          BLOCK-ENTRY
388
 *          SCALAR("item 1",plain)
389
 *          BLOCK-ENTRY
390
 *          SCALAR("item 2",plain)
391
 *          BLOCK-END
392
 *          BLOCK-END
393
 *          STREAM-END
394
 *
395
 * YAML does not always require to start a new block collection from a new
396
 * line.  If the current line contains only '-', '?', and ':' indicators, a new
397
 * block collection may start at the current line.  The following examples
398
 * illustrate this case:
399
 *
400
 *      1. Collections in a sequence:
401
 *
402
 *          - - item 1
403
 *            - item 2
404
 *          - key 1: value 1
405
 *            key 2: value 2
406
 *          - ? complex key
407
 *            : complex value
408
 *
409
 *      Tokens:
410
 *
411
 *          STREAM-START(utf-8)
412
 *          BLOCK-SEQUENCE-START
413
 *          BLOCK-ENTRY
414
 *          BLOCK-SEQUENCE-START
415
 *          BLOCK-ENTRY
416
 *          SCALAR("item 1",plain)
417
 *          BLOCK-ENTRY
418
 *          SCALAR("item 2",plain)
419
 *          BLOCK-END
420
 *          BLOCK-ENTRY
421
 *          BLOCK-MAPPING-START
422
 *          KEY
423
 *          SCALAR("key 1",plain)
424
 *          VALUE
425
 *          SCALAR("value 1",plain)
426
 *          KEY
427
 *          SCALAR("key 2",plain)
428
 *          VALUE
429
 *          SCALAR("value 2",plain)
430
 *          BLOCK-END
431
 *          BLOCK-ENTRY
432
 *          BLOCK-MAPPING-START
433
 *          KEY
434
 *          SCALAR("complex key")
435
 *          VALUE
436
 *          SCALAR("complex value")
437
 *          BLOCK-END
438
 *          BLOCK-END
439
 *          STREAM-END
440
 *
441
 *      2. Collections in a mapping:
442
 *
443
 *          ? a sequence
444
 *          : - item 1
445
 *            - item 2
446
 *          ? a mapping
447
 *          : key 1: value 1
448
 *            key 2: value 2
449
 *
450
 *      Tokens:
451
 *
452
 *          STREAM-START(utf-8)
453
 *          BLOCK-MAPPING-START
454
 *          KEY
455
 *          SCALAR("a sequence",plain)
456
 *          VALUE
457
 *          BLOCK-SEQUENCE-START
458
 *          BLOCK-ENTRY
459
 *          SCALAR("item 1",plain)
460
 *          BLOCK-ENTRY
461
 *          SCALAR("item 2",plain)
462
 *          BLOCK-END
463
 *          KEY
464
 *          SCALAR("a mapping",plain)
465
 *          VALUE
466
 *          BLOCK-MAPPING-START
467
 *          KEY
468
 *          SCALAR("key 1",plain)
469
 *          VALUE
470
 *          SCALAR("value 1",plain)
471
 *          KEY
472
 *          SCALAR("key 2",plain)
473
 *          VALUE
474
 *          SCALAR("value 2",plain)
475
 *          BLOCK-END
476
 *          BLOCK-END
477
 *          STREAM-END
478
 *
479
 * YAML also permits non-indented sequences if they are included into a block
480
 * mapping.  In this case, the token BLOCK-SEQUENCE-START is not produced:
481
 *
482
 *      key:
483
 *      - item 1    # BLOCK-SEQUENCE-START is NOT produced here.
484
 *      - item 2
485
 *
486
 * Tokens:
487
 *
488
 *      STREAM-START(utf-8)
489
 *      BLOCK-MAPPING-START
490
 *      KEY
491
 *      SCALAR("key",plain)
492
 *      VALUE
493
 *      BLOCK-ENTRY
494
 *      SCALAR("item 1",plain)
495
 *      BLOCK-ENTRY
496
 *      SCALAR("item 2",plain)
497
 *      BLOCK-END
498
 */
499
500
#include "yaml_private.h"
501
502
/*
503
 * Ensure that the buffer contains the required number of characters.
504
 * Return 1 on success, 0 on failure (reader error or memory error).
505
 */
506
507
#define CACHE(parser,length)                                                    \
508
0
    (parser->unread >= (length)                                                 \
509
0
        ? 1                                                                     \
510
0
        : yaml_parser_update_buffer(parser, (length)))
511
512
/*
513
 * Advance the buffer pointer.
514
 */
515
516
#define SKIP(parser)                                                            \
517
0
     (parser->mark.index ++,                                                    \
518
0
      parser->mark.column ++,                                                   \
519
0
      parser->unread --,                                                        \
520
0
      parser->buffer.pointer += WIDTH(parser->buffer))
521
522
#define SKIP_LINE(parser)                                                       \
523
0
     (IS_CRLF(parser->buffer) ?                                                 \
524
0
      (parser->mark.index += 2,                                                 \
525
0
       parser->mark.column = 0,                                                 \
526
0
       parser->mark.line ++,                                                    \
527
0
       parser->unread -= 2,                                                     \
528
0
       parser->buffer.pointer += 2) :                                           \
529
0
      IS_BREAK(parser->buffer) ?                                                \
530
0
      (parser->mark.index ++,                                                   \
531
0
       parser->mark.column = 0,                                                 \
532
0
       parser->mark.line ++,                                                    \
533
0
       parser->unread --,                                                       \
534
0
       parser->buffer.pointer += WIDTH(parser->buffer)) : 0)
535
536
/*
537
 * Copy a character to a string buffer and advance pointers.
538
 */
539
540
#define READ(parser,string)                                                     \
541
0
     (STRING_EXTEND(parser,string) ?                                            \
542
0
         (COPY(string,parser->buffer),                                          \
543
0
          parser->mark.index ++,                                                \
544
0
          parser->mark.column ++,                                               \
545
0
          parser->unread --,                                                    \
546
0
          1) : 0)
547
548
/*
549
 * Copy a line break character to a string buffer and advance pointers.
550
 */
551
552
#define READ_LINE(parser,string)                                                \
553
0
    (STRING_EXTEND(parser,string) ?                                             \
554
0
    (((CHECK_AT(parser->buffer,'\r',0)                                          \
555
0
       && CHECK_AT(parser->buffer,'\n',1)) ?        /* CR LF -> LF */           \
556
0
     (*((string).pointer++) = (yaml_char_t) '\n',                               \
557
0
      parser->buffer.pointer += 2,                                              \
558
0
      parser->mark.index += 2,                                                  \
559
0
      parser->mark.column = 0,                                                  \
560
0
      parser->mark.line ++,                                                     \
561
0
      parser->unread -= 2) :                                                    \
562
0
     (CHECK_AT(parser->buffer,'\r',0)                                           \
563
0
      || CHECK_AT(parser->buffer,'\n',0)) ?         /* CR|LF -> LF */           \
564
0
     (*((string).pointer++) = (yaml_char_t) '\n',                               \
565
0
      parser->buffer.pointer ++,                                                \
566
0
      parser->mark.index ++,                                                    \
567
0
      parser->mark.column = 0,                                                  \
568
0
      parser->mark.line ++,                                                     \
569
0
      parser->unread --) :                                                      \
570
0
     (CHECK_AT(parser->buffer,'\xC2',0)                                         \
571
0
      && CHECK_AT(parser->buffer,'\x85',1)) ?       /* NEL -> LF */             \
572
0
     (*((string).pointer++) = (yaml_char_t) '\n',                               \
573
0
      parser->buffer.pointer += 2,                                              \
574
0
      parser->mark.index ++,                                                    \
575
0
      parser->mark.column = 0,                                                  \
576
0
      parser->mark.line ++,                                                     \
577
0
      parser->unread --) :                                                      \
578
0
     (CHECK_AT(parser->buffer,'\xE2',0) &&                                      \
579
0
      CHECK_AT(parser->buffer,'\x80',1) &&                                      \
580
0
      (CHECK_AT(parser->buffer,'\xA8',2) ||                                     \
581
0
       CHECK_AT(parser->buffer,'\xA9',2))) ?        /* LS|PS -> LS|PS */        \
582
0
     (*((string).pointer++) = *(parser->buffer.pointer++),                      \
583
0
      *((string).pointer++) = *(parser->buffer.pointer++),                      \
584
0
      *((string).pointer++) = *(parser->buffer.pointer++),                      \
585
0
      parser->mark.index ++,                                                    \
586
0
      parser->mark.column = 0,                                                  \
587
0
      parser->mark.line ++,                                                     \
588
0
      parser->unread --) : 0),                                                  \
589
0
    1) : 0)
590
591
/*
592
 * Public API declarations.
593
 */
594
595
YAML_DECLARE(int)
596
yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token);
597
598
/*
599
 * Error handling.
600
 */
601
602
static int
603
yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
604
        yaml_mark_t context_mark, const char *problem);
605
606
/*
607
 * High-level token API.
608
 */
609
610
YAML_DECLARE(int)
611
yaml_parser_fetch_more_tokens(yaml_parser_t *parser);
612
613
static int
614
yaml_parser_fetch_next_token(yaml_parser_t *parser);
615
616
/*
617
 * Potential simple keys.
618
 */
619
620
static int
621
yaml_parser_stale_simple_keys(yaml_parser_t *parser);
622
623
static int
624
yaml_parser_save_simple_key(yaml_parser_t *parser);
625
626
static int
627
yaml_parser_remove_simple_key(yaml_parser_t *parser);
628
629
static int
630
yaml_parser_increase_flow_level(yaml_parser_t *parser);
631
632
static int
633
yaml_parser_decrease_flow_level(yaml_parser_t *parser);
634
635
/*
636
 * Indentation treatment.
637
 */
638
639
static int
640
yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
641
        ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark);
642
643
static int
644
yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column);
645
646
/*
647
 * Token fetchers.
648
 */
649
650
static int
651
yaml_parser_fetch_stream_start(yaml_parser_t *parser);
652
653
static int
654
yaml_parser_fetch_stream_end(yaml_parser_t *parser);
655
656
static int
657
yaml_parser_fetch_directive(yaml_parser_t *parser);
658
659
static int
660
yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
661
        yaml_token_type_t type);
662
663
static int
664
yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
665
        yaml_token_type_t type);
666
667
static int
668
yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
669
        yaml_token_type_t type);
670
671
static int
672
yaml_parser_fetch_flow_entry(yaml_parser_t *parser);
673
674
static int
675
yaml_parser_fetch_block_entry(yaml_parser_t *parser);
676
677
static int
678
yaml_parser_fetch_key(yaml_parser_t *parser);
679
680
static int
681
yaml_parser_fetch_value(yaml_parser_t *parser);
682
683
static int
684
yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type);
685
686
static int
687
yaml_parser_fetch_tag(yaml_parser_t *parser);
688
689
static int
690
yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
691
692
static int
693
yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
694
695
static int
696
yaml_parser_fetch_plain_scalar(yaml_parser_t *parser);
697
698
/*
699
 * Token scanners.
700
 */
701
702
static int
703
yaml_parser_scan_to_next_token(yaml_parser_t *parser);
704
705
static int
706
yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token);
707
708
static int
709
yaml_parser_scan_directive_name(yaml_parser_t *parser,
710
        yaml_mark_t start_mark, yaml_char_t **name);
711
712
static int
713
yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
714
        yaml_mark_t start_mark, int *major, int *minor);
715
716
static int
717
yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
718
        yaml_mark_t start_mark, int *number);
719
720
static int
721
yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
722
        yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);
723
724
static int
725
yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
726
        yaml_token_type_t type);
727
728
static int
729
yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token);
730
731
static int
732
yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
733
        yaml_mark_t start_mark, yaml_char_t **handle);
734
735
static int
736
yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
737
        yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);
738
739
static int
740
yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
741
        yaml_mark_t start_mark, yaml_string_t *string);
742
743
static int
744
yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
745
        int literal);
746
747
static int
748
yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
749
        int *indent, yaml_string_t *breaks,
750
        yaml_mark_t start_mark, yaml_mark_t *end_mark);
751
752
static int
753
yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
754
        int single);
755
756
static int
757
yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token);
758
759
/*
760
 * Get the next token.
761
 */
762
763
YAML_DECLARE(int)
764
yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token)
765
0
{
766
0
    assert(parser); /* Non-NULL parser object is expected. */
767
0
    assert(token);  /* Non-NULL token object is expected. */
768
769
    /* Erase the token object. */
770
771
0
    memset(token, 0, sizeof(yaml_token_t));
772
773
    /* No tokens after STREAM-END or error. */
774
775
0
    if (parser->stream_end_produced || parser->error) {
776
0
        return 1;
777
0
    }
778
779
    /* Ensure that the tokens queue contains enough tokens. */
780
781
0
    if (!parser->token_available) {
782
0
        if (!yaml_parser_fetch_more_tokens(parser))
783
0
            return 0;
784
0
    }
785
786
    /* Fetch the next token from the queue. */
787
    
788
0
    *token = DEQUEUE(parser, parser->tokens);
789
0
    parser->token_available = 0;
790
0
    parser->tokens_parsed ++;
791
792
0
    if (token->type == YAML_STREAM_END_TOKEN) {
793
0
        parser->stream_end_produced = 1;
794
0
    }
795
796
0
    return 1;
797
0
}
798
799
/*
800
 * Set the scanner error and return 0.
801
 */
802
803
static int
804
yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
805
        yaml_mark_t context_mark, const char *problem)
806
0
{
807
0
    parser->error = YAML_SCANNER_ERROR;
808
0
    parser->context = context;
809
0
    parser->context_mark = context_mark;
810
0
    parser->problem = problem;
811
0
    parser->problem_mark = parser->mark;
812
813
0
    return 0;
814
0
}
815
816
/*
817
 * Ensure that the tokens queue contains at least one token which can be
818
 * returned to the Parser.
819
 */
820
821
YAML_DECLARE(int)
822
yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
823
0
{
824
0
    int need_more_tokens;
825
826
    /* While we need more tokens to fetch, do it. */
827
828
0
    while (1)
829
0
    {
830
        /*
831
         * Check if we really need to fetch more tokens.
832
         */
833
834
0
        need_more_tokens = 0;
835
836
0
        if (parser->tokens.head == parser->tokens.tail)
837
0
        {
838
            /* Queue is empty. */
839
840
0
            need_more_tokens = 1;
841
0
        }
842
0
        else
843
0
        {
844
0
            yaml_simple_key_t *simple_key;
845
846
            /* Check if any potential simple key may occupy the head position. */
847
848
0
            if (!yaml_parser_stale_simple_keys(parser))
849
0
                return 0;
850
851
0
            for (simple_key = parser->simple_keys.start;
852
0
                    simple_key != parser->simple_keys.top; simple_key++) {
853
0
                if (simple_key->possible
854
0
                        && simple_key->token_number == parser->tokens_parsed) {
855
0
                    need_more_tokens = 1;
856
0
                    break;
857
0
                }
858
0
            }
859
0
        }
860
861
        /* We are finished. */
862
863
0
        if (!need_more_tokens)
864
0
            break;
865
866
        /* Fetch the next token. */
867
868
0
        if (!yaml_parser_fetch_next_token(parser))
869
0
            return 0;
870
0
    }
871
872
0
    parser->token_available = 1;
873
874
0
    return 1;
875
0
}
876
877
/*
878
 * The dispatcher for token fetchers.
879
 */
880
881
static int
882
yaml_parser_fetch_next_token(yaml_parser_t *parser)
883
0
{
884
    /* Ensure that the buffer is initialized. */
885
886
0
    if (!CACHE(parser, 1))
887
0
        return 0;
888
889
    /* Check if we just started scanning.  Fetch STREAM-START then. */
890
891
0
    if (!parser->stream_start_produced)
892
0
        return yaml_parser_fetch_stream_start(parser);
893
894
    /* Eat whitespaces and comments until we reach the next token. */
895
896
0
    if (!yaml_parser_scan_to_next_token(parser))
897
0
        return 0;
898
899
    /* Remove obsolete potential simple keys. */
900
901
0
    if (!yaml_parser_stale_simple_keys(parser))
902
0
        return 0;
903
904
    /* Check the indentation level against the current column. */
905
906
0
    if (!yaml_parser_unroll_indent(parser, parser->mark.column))
907
0
        return 0;
908
909
    /*
910
     * Ensure that the buffer contains at least 4 characters.  4 is the length
911
     * of the longest indicators ('--- ' and '... ').
912
     */
913
914
0
    if (!CACHE(parser, 4))
915
0
        return 0;
916
917
    /* Is it the end of the stream? */
918
919
0
    if (IS_Z(parser->buffer))
920
0
        return yaml_parser_fetch_stream_end(parser);
921
922
    /* Is it a directive? */
923
924
0
    if (parser->mark.column == 0 && CHECK(parser->buffer, '%'))
925
0
        return yaml_parser_fetch_directive(parser);
926
927
    /* Is it the document start indicator? */
928
929
0
    if (parser->mark.column == 0
930
0
            && CHECK_AT(parser->buffer, '-', 0)
931
0
            && CHECK_AT(parser->buffer, '-', 1)
932
0
            && CHECK_AT(parser->buffer, '-', 2)
933
0
            && IS_BLANKZ_AT(parser->buffer, 3))
934
0
        return yaml_parser_fetch_document_indicator(parser,
935
0
                YAML_DOCUMENT_START_TOKEN);
936
937
    /* Is it the document end indicator? */
938
939
0
    if (parser->mark.column == 0
940
0
            && CHECK_AT(parser->buffer, '.', 0)
941
0
            && CHECK_AT(parser->buffer, '.', 1)
942
0
            && CHECK_AT(parser->buffer, '.', 2)
943
0
            && IS_BLANKZ_AT(parser->buffer, 3))
944
0
        return yaml_parser_fetch_document_indicator(parser,
945
0
                YAML_DOCUMENT_END_TOKEN);
946
947
    /* Is it the flow sequence start indicator? */
948
949
0
    if (CHECK(parser->buffer, '['))
950
0
        return yaml_parser_fetch_flow_collection_start(parser,
951
0
                YAML_FLOW_SEQUENCE_START_TOKEN);
952
953
    /* Is it the flow mapping start indicator? */
954
955
0
    if (CHECK(parser->buffer, '{'))
956
0
        return yaml_parser_fetch_flow_collection_start(parser,
957
0
                YAML_FLOW_MAPPING_START_TOKEN);
958
959
    /* Is it the flow sequence end indicator? */
960
961
0
    if (CHECK(parser->buffer, ']'))
962
0
        return yaml_parser_fetch_flow_collection_end(parser,
963
0
                YAML_FLOW_SEQUENCE_END_TOKEN);
964
965
    /* Is it the flow mapping end indicator? */
966
967
0
    if (CHECK(parser->buffer, '}'))
968
0
        return yaml_parser_fetch_flow_collection_end(parser,
969
0
                YAML_FLOW_MAPPING_END_TOKEN);
970
971
    /* Is it the flow entry indicator? */
972
973
0
    if (CHECK(parser->buffer, ','))
974
0
        return yaml_parser_fetch_flow_entry(parser);
975
976
    /* Is it the block entry indicator? */
977
978
0
    if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1))
979
0
        return yaml_parser_fetch_block_entry(parser);
980
981
    /* Is it the key indicator? */
982
983
0
    if (CHECK(parser->buffer, '?')
984
0
            && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
985
0
        return yaml_parser_fetch_key(parser);
986
987
    /* Is it the value indicator? */
988
989
0
    if (CHECK(parser->buffer, ':')
990
0
            && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
991
0
        return yaml_parser_fetch_value(parser);
992
993
    /* Is it an alias? */
994
995
0
    if (CHECK(parser->buffer, '*'))
996
0
        return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN);
997
998
    /* Is it an anchor? */
999
1000
0
    if (CHECK(parser->buffer, '&'))
1001
0
        return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN);
1002
1003
    /* Is it a tag? */
1004
1005
0
    if (CHECK(parser->buffer, '!'))
1006
0
        return yaml_parser_fetch_tag(parser);
1007
1008
    /* Is it a literal scalar? */
1009
1010
0
    if (CHECK(parser->buffer, '|') && !parser->flow_level)
1011
0
        return yaml_parser_fetch_block_scalar(parser, 1);
1012
1013
    /* Is it a folded scalar? */
1014
1015
0
    if (CHECK(parser->buffer, '>') && !parser->flow_level)
1016
0
        return yaml_parser_fetch_block_scalar(parser, 0);
1017
1018
    /* Is it a single-quoted scalar? */
1019
1020
0
    if (CHECK(parser->buffer, '\''))
1021
0
        return yaml_parser_fetch_flow_scalar(parser, 1);
1022
1023
    /* Is it a double-quoted scalar? */
1024
1025
0
    if (CHECK(parser->buffer, '"'))
1026
0
        return yaml_parser_fetch_flow_scalar(parser, 0);
1027
1028
    /*
1029
     * Is it a plain scalar?
1030
     *
1031
     * A plain scalar may start with any non-blank characters except
1032
     *
1033
     *      '-', '?', ':', ',', '[', ']', '{', '}',
1034
     *      '#', '&', '*', '!', '|', '>', '\'', '\"',
1035
     *      '%', '@', '`'.
1036
     *
1037
     * In the block context (and, for the '-' indicator, in the flow context
1038
     * too), it may also start with the characters
1039
     *
1040
     *      '-', '?', ':'
1041
     *
1042
     * if it is followed by a non-space character.
1043
     *
1044
     * The last rule is more restrictive than the specification requires.
1045
     */
1046
1047
0
    if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-')
1048
0
                || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')
1049
0
                || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[')
1050
0
                || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
1051
0
                || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#')
1052
0
                || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*')
1053
0
                || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|')
1054
0
                || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'')
1055
0
                || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%')
1056
0
                || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) ||
1057
0
            (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) ||
1058
0
            (!parser->flow_level &&
1059
0
             (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':'))
1060
0
             && !IS_BLANKZ_AT(parser->buffer, 1)))
1061
0
        return yaml_parser_fetch_plain_scalar(parser);
1062
1063
    /*
1064
     * If we don't determine the token type so far, it is an error.
1065
     */
1066
1067
0
    return yaml_parser_set_scanner_error(parser,
1068
0
            "while scanning for the next token", parser->mark,
1069
0
            "found character that cannot start any token");
1070
0
}
1071
1072
/*
1073
 * Check the list of potential simple keys and remove the positions that
1074
 * cannot contain simple keys anymore.
1075
 */
1076
1077
static int
1078
yaml_parser_stale_simple_keys(yaml_parser_t *parser)
1079
0
{
1080
0
    yaml_simple_key_t *simple_key;
1081
1082
    /* Check for a potential simple key for each flow level. */
1083
1084
0
    for (simple_key = parser->simple_keys.start;
1085
0
            simple_key != parser->simple_keys.top; simple_key ++)
1086
0
    {
1087
        /*
1088
         * The specification requires that a simple key
1089
         *
1090
         *  - is limited to a single line,
1091
         *  - is shorter than 1024 characters.
1092
         */
1093
1094
0
        if (simple_key->possible
1095
0
                && (simple_key->mark.line < parser->mark.line
1096
0
                    || simple_key->mark.index+1024 < parser->mark.index)) {
1097
1098
            /* Check if the potential simple key to be removed is required. */
1099
1100
0
            if (simple_key->required) {
1101
0
                return yaml_parser_set_scanner_error(parser,
1102
0
                        "while scanning a simple key", simple_key->mark,
1103
0
                        "could not find expected ':'");
1104
0
            }
1105
1106
0
            simple_key->possible = 0;
1107
0
        }
1108
0
    }
1109
1110
0
    return 1;
1111
0
}
1112
1113
/*
1114
 * Check if a simple key may start at the current position and add it if
1115
 * needed.
1116
 */
1117
1118
static int
1119
yaml_parser_save_simple_key(yaml_parser_t *parser)
1120
0
{
1121
    /*
1122
     * A simple key is required at the current position if the scanner is in
1123
     * the block context and the current column coincides with the indentation
1124
     * level.
1125
     */
1126
1127
0
    int required = (!parser->flow_level
1128
0
            && parser->indent == (ptrdiff_t)parser->mark.column);
1129
1130
    /*
1131
     * If the current position may start a simple key, save it.
1132
     */
1133
1134
0
    if (parser->simple_key_allowed)
1135
0
    {
1136
0
        yaml_simple_key_t simple_key;
1137
0
        simple_key.possible = 1;
1138
0
        simple_key.required = required;
1139
0
        simple_key.token_number = 
1140
0
            parser->tokens_parsed + (parser->tokens.tail - parser->tokens.head);
1141
0
        simple_key.mark = parser->mark;
1142
1143
0
        if (!yaml_parser_remove_simple_key(parser)) return 0;
1144
1145
0
        *(parser->simple_keys.top-1) = simple_key;
1146
0
    }
1147
1148
0
    return 1;
1149
0
}
1150
1151
/*
1152
 * Remove a potential simple key at the current flow level.
1153
 */
1154
1155
static int
1156
yaml_parser_remove_simple_key(yaml_parser_t *parser)
1157
0
{
1158
0
    yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1159
1160
0
    if (simple_key->possible)
1161
0
    {
1162
        /* If the key is required, it is an error. */
1163
1164
0
        if (simple_key->required) {
1165
0
            return yaml_parser_set_scanner_error(parser,
1166
0
                    "while scanning a simple key", simple_key->mark,
1167
0
                    "could not find expected ':'");
1168
0
        }
1169
0
    }
1170
1171
    /* Remove the key from the stack. */
1172
1173
0
    simple_key->possible = 0;
1174
1175
0
    return 1;
1176
0
}
1177
1178
/*
1179
 * Increase the flow level and resize the simple key list if needed.
1180
 */
1181
1182
static int
1183
yaml_parser_increase_flow_level(yaml_parser_t *parser)
1184
0
{
1185
0
    yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } };
1186
1187
    /* Reset the simple key on the next level. */
1188
1189
0
    if (!PUSH(parser, parser->simple_keys, empty_simple_key))
1190
0
        return 0;
1191
1192
    /* Increase the flow level. */
1193
1194
0
    if (parser->flow_level == INT_MAX) {
1195
0
        parser->error = YAML_MEMORY_ERROR;
1196
0
        return 0;
1197
0
    }
1198
1199
0
    parser->flow_level++;
1200
1201
0
    return 1;
1202
0
}
1203
1204
/*
1205
 * Decrease the flow level.
1206
 */
1207
1208
static int
1209
yaml_parser_decrease_flow_level(yaml_parser_t *parser)
1210
0
{
1211
//     yaml_simple_key_t dummy_key;    /* Used to eliminate a compiler warning. */
1212
1213
0
    if (parser->flow_level) {
1214
0
        parser->flow_level --;
1215
0
        /*dummy_key =*/ POP(parser, parser->simple_keys);
1216
0
    }
1217
1218
0
    return 1;
1219
0
}
1220
1221
/*
1222
 * Push the current indentation level to the stack and set the new level
1223
 * the current column is greater than the indentation level.  In this case,
1224
 * append or insert the specified token into the token queue.
1225
 * 
1226
 */
1227
1228
static int
1229
yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
1230
        ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark)
1231
0
{
1232
0
    yaml_token_t token;
1233
1234
    /* In the flow context, do nothing. */
1235
1236
0
    if (parser->flow_level)
1237
0
        return 1;
1238
1239
0
    if (parser->indent < column)
1240
0
    {
1241
        /*
1242
         * Push the current indentation level to the stack and set the new
1243
         * indentation level.
1244
         */
1245
1246
0
        if (!PUSH(parser, parser->indents, parser->indent))
1247
0
            return 0;
1248
1249
0
        if (column > INT_MAX) {
1250
0
            parser->error = YAML_MEMORY_ERROR;
1251
0
            return 0;
1252
0
        }
1253
1254
0
        parser->indent = (int)column;
1255
1256
        /* Create a token and insert it into the queue. */
1257
1258
0
        TOKEN_INIT(token, type, mark, mark);
1259
1260
0
        if (number == -1) {
1261
0
            if (!ENQUEUE(parser, parser->tokens, token))
1262
0
                return 0;
1263
0
        }
1264
0
        else {
1265
0
            if (!QUEUE_INSERT(parser,
1266
0
                        parser->tokens, number - parser->tokens_parsed, token))
1267
0
                return 0;
1268
0
        }
1269
0
    }
1270
1271
0
    return 1;
1272
0
}
1273
1274
/*
1275
 * Pop indentation levels from the indents stack until the current level
1276
 * becomes less or equal to the column.  For each intendation level, append
1277
 * the BLOCK-END token.
1278
 */
1279
1280
1281
static int
1282
yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column)
1283
0
{
1284
0
    yaml_token_t token;
1285
1286
    /* In the flow context, do nothing. */
1287
1288
0
    if (parser->flow_level)
1289
0
        return 1;
1290
1291
    /* Loop through the intendation levels in the stack. */
1292
1293
0
    while (parser->indent > column)
1294
0
    {
1295
        /* Create a token and append it to the queue. */
1296
1297
0
        TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark);
1298
1299
0
        if (!ENQUEUE(parser, parser->tokens, token))
1300
0
            return 0;
1301
1302
        /* Pop the indentation level. */
1303
1304
0
        parser->indent = POP(parser, parser->indents);
1305
0
    }
1306
1307
0
    return 1;
1308
0
}
1309
1310
/*
1311
 * Initialize the scanner and produce the STREAM-START token.
1312
 */
1313
1314
static int
1315
yaml_parser_fetch_stream_start(yaml_parser_t *parser)
1316
0
{
1317
0
    yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } };
1318
0
    yaml_token_t token;
1319
1320
    /* Set the initial indentation. */
1321
1322
0
    parser->indent = -1;
1323
1324
    /* Initialize the simple key stack. */
1325
1326
0
    if (!PUSH(parser, parser->simple_keys, simple_key))
1327
0
        return 0;
1328
1329
    /* A simple key is allowed at the beginning of the stream. */
1330
1331
0
    parser->simple_key_allowed = 1;
1332
1333
    /* We have started. */
1334
1335
0
    parser->stream_start_produced = 1;
1336
1337
    /* Create the STREAM-START token and append it to the queue. */
1338
1339
0
    STREAM_START_TOKEN_INIT(token, parser->encoding,
1340
0
            parser->mark, parser->mark);
1341
1342
0
    if (!ENQUEUE(parser, parser->tokens, token))
1343
0
        return 0;
1344
1345
0
    return 1;
1346
0
}
1347
1348
/*
1349
 * Produce the STREAM-END token and shut down the scanner.
1350
 */
1351
1352
static int
1353
yaml_parser_fetch_stream_end(yaml_parser_t *parser)
1354
0
{
1355
0
    yaml_token_t token;
1356
1357
    /* Force new line. */
1358
1359
0
    if (parser->mark.column != 0) {
1360
0
        parser->mark.column = 0;
1361
0
        parser->mark.line ++;
1362
0
    }
1363
1364
    /* Reset the indentation level. */
1365
1366
0
    if (!yaml_parser_unroll_indent(parser, -1))
1367
0
        return 0;
1368
1369
    /* Reset simple keys. */
1370
1371
0
    if (!yaml_parser_remove_simple_key(parser))
1372
0
        return 0;
1373
1374
0
    parser->simple_key_allowed = 0;
1375
1376
    /* Create the STREAM-END token and append it to the queue. */
1377
1378
0
    STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark);
1379
1380
0
    if (!ENQUEUE(parser, parser->tokens, token))
1381
0
        return 0;
1382
1383
0
    return 1;
1384
0
}
1385
1386
/*
1387
 * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
1388
 */
1389
1390
static int
1391
yaml_parser_fetch_directive(yaml_parser_t *parser)
1392
0
{
1393
0
    yaml_token_t token;
1394
1395
    /* Reset the indentation level. */
1396
1397
0
    if (!yaml_parser_unroll_indent(parser, -1))
1398
0
        return 0;
1399
1400
    /* Reset simple keys. */
1401
1402
0
    if (!yaml_parser_remove_simple_key(parser))
1403
0
        return 0;
1404
1405
0
    parser->simple_key_allowed = 0;
1406
1407
    /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */
1408
1409
0
    if (!yaml_parser_scan_directive(parser, &token))
1410
0
        return 0;
1411
1412
    /* Append the token to the queue. */
1413
1414
0
    if (!ENQUEUE(parser, parser->tokens, token)) {
1415
0
        yaml_token_delete(&token);
1416
0
        return 0;
1417
0
    }
1418
1419
0
    return 1;
1420
0
}
1421
1422
/*
1423
 * Produce the DOCUMENT-START or DOCUMENT-END token.
1424
 */
1425
1426
static int
1427
yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
1428
        yaml_token_type_t type)
1429
0
{
1430
0
    yaml_mark_t start_mark, end_mark;
1431
0
    yaml_token_t token;
1432
1433
    /* Reset the indentation level. */
1434
1435
0
    if (!yaml_parser_unroll_indent(parser, -1))
1436
0
        return 0;
1437
1438
    /* Reset simple keys. */
1439
1440
0
    if (!yaml_parser_remove_simple_key(parser))
1441
0
        return 0;
1442
1443
0
    parser->simple_key_allowed = 0;
1444
1445
    /* Consume the token. */
1446
1447
0
    start_mark = parser->mark;
1448
1449
0
    SKIP(parser);
1450
0
    SKIP(parser);
1451
0
    SKIP(parser);
1452
1453
0
    end_mark = parser->mark;
1454
1455
    /* Create the DOCUMENT-START or DOCUMENT-END token. */
1456
1457
0
    TOKEN_INIT(token, type, start_mark, end_mark);
1458
1459
    /* Append the token to the queue. */
1460
1461
0
    if (!ENQUEUE(parser, parser->tokens, token))
1462
0
        return 0;
1463
1464
0
    return 1;
1465
0
}
1466
1467
/*
1468
 * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1469
 */
1470
1471
static int
1472
yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
1473
        yaml_token_type_t type)
1474
0
{
1475
0
    yaml_mark_t start_mark, end_mark;
1476
0
    yaml_token_t token;
1477
1478
    /* The indicators '[' and '{' may start a simple key. */
1479
1480
0
    if (!yaml_parser_save_simple_key(parser))
1481
0
        return 0;
1482
1483
    /* Increase the flow level. */
1484
1485
0
    if (!yaml_parser_increase_flow_level(parser))
1486
0
        return 0;
1487
1488
    /* A simple key may follow the indicators '[' and '{'. */
1489
1490
0
    parser->simple_key_allowed = 1;
1491
1492
    /* Consume the token. */
1493
1494
0
    start_mark = parser->mark;
1495
0
    SKIP(parser);
1496
0
    end_mark = parser->mark;
1497
1498
    /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */
1499
1500
0
    TOKEN_INIT(token, type, start_mark, end_mark);
1501
1502
    /* Append the token to the queue. */
1503
1504
0
    if (!ENQUEUE(parser, parser->tokens, token))
1505
0
        return 0;
1506
1507
0
    return 1;
1508
0
}
1509
1510
/*
1511
 * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1512
 */
1513
1514
static int
1515
yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
1516
        yaml_token_type_t type)
1517
0
{
1518
0
    yaml_mark_t start_mark, end_mark;
1519
0
    yaml_token_t token;
1520
1521
    /* Reset any potential simple key on the current flow level. */
1522
1523
0
    if (!yaml_parser_remove_simple_key(parser))
1524
0
        return 0;
1525
1526
    /* Decrease the flow level. */
1527
1528
0
    if (!yaml_parser_decrease_flow_level(parser))
1529
0
        return 0;
1530
1531
    /* No simple keys after the indicators ']' and '}'. */
1532
1533
0
    parser->simple_key_allowed = 0;
1534
1535
    /* Consume the token. */
1536
1537
0
    start_mark = parser->mark;
1538
0
    SKIP(parser);
1539
0
    end_mark = parser->mark;
1540
1541
    /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */
1542
1543
0
    TOKEN_INIT(token, type, start_mark, end_mark);
1544
1545
    /* Append the token to the queue. */
1546
1547
0
    if (!ENQUEUE(parser, parser->tokens, token))
1548
0
        return 0;
1549
1550
0
    return 1;
1551
0
}
1552
1553
/*
1554
 * Produce the FLOW-ENTRY token.
1555
 */
1556
1557
static int
1558
yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
1559
0
{
1560
0
    yaml_mark_t start_mark, end_mark;
1561
0
    yaml_token_t token;
1562
1563
    /* Reset any potential simple keys on the current flow level. */
1564
1565
0
    if (!yaml_parser_remove_simple_key(parser))
1566
0
        return 0;
1567
1568
    /* Simple keys are allowed after ','. */
1569
1570
0
    parser->simple_key_allowed = 1;
1571
1572
    /* Consume the token. */
1573
1574
0
    start_mark = parser->mark;
1575
0
    SKIP(parser);
1576
0
    end_mark = parser->mark;
1577
1578
    /* Create the FLOW-ENTRY token and append it to the queue. */
1579
1580
0
    TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);
1581
1582
0
    if (!ENQUEUE(parser, parser->tokens, token))
1583
0
        return 0;
1584
1585
0
    return 1;
1586
0
}
1587
1588
/*
1589
 * Produce the BLOCK-ENTRY token.
1590
 */
1591
1592
static int
1593
yaml_parser_fetch_block_entry(yaml_parser_t *parser)
1594
0
{
1595
0
    yaml_mark_t start_mark, end_mark;
1596
0
    yaml_token_t token;
1597
1598
    /* Check if the scanner is in the block context. */
1599
1600
0
    if (!parser->flow_level)
1601
0
    {
1602
        /* Check if we are allowed to start a new entry. */
1603
1604
0
        if (!parser->simple_key_allowed) {
1605
0
            return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1606
0
                    "block sequence entries are not allowed in this context");
1607
0
        }
1608
1609
        /* Add the BLOCK-SEQUENCE-START token if needed. */
1610
1611
0
        if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1612
0
                    YAML_BLOCK_SEQUENCE_START_TOKEN, parser->mark))
1613
0
            return 0;
1614
0
    }
1615
0
    else
1616
0
    {
1617
        /*
1618
         * It is an error for the '-' indicator to occur in the flow context,
1619
         * but we let the Parser detect and report about it because the Parser
1620
         * is able to point to the context.
1621
         */
1622
0
    }
1623
1624
    /* Reset any potential simple keys on the current flow level. */
1625
1626
0
    if (!yaml_parser_remove_simple_key(parser))
1627
0
        return 0;
1628
1629
    /* Simple keys are allowed after '-'. */
1630
1631
0
    parser->simple_key_allowed = 1;
1632
1633
    /* Consume the token. */
1634
1635
0
    start_mark = parser->mark;
1636
0
    SKIP(parser);
1637
0
    end_mark = parser->mark;
1638
1639
    /* Create the BLOCK-ENTRY token and append it to the queue. */
1640
1641
0
    TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);
1642
1643
0
    if (!ENQUEUE(parser, parser->tokens, token))
1644
0
        return 0;
1645
1646
0
    return 1;
1647
0
}
1648
1649
/*
1650
 * Produce the KEY token.
1651
 */
1652
1653
static int
1654
yaml_parser_fetch_key(yaml_parser_t *parser)
1655
0
{
1656
0
    yaml_mark_t start_mark, end_mark;
1657
0
    yaml_token_t token;
1658
1659
    /* In the block context, additional checks are required. */
1660
1661
0
    if (!parser->flow_level)
1662
0
    {
1663
        /* Check if we are allowed to start a new key (not nessesary simple). */
1664
1665
0
        if (!parser->simple_key_allowed) {
1666
0
            return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1667
0
                    "mapping keys are not allowed in this context");
1668
0
        }
1669
1670
        /* Add the BLOCK-MAPPING-START token if needed. */
1671
1672
0
        if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1673
0
                    YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
1674
0
            return 0;
1675
0
    }
1676
1677
    /* Reset any potential simple keys on the current flow level. */
1678
1679
0
    if (!yaml_parser_remove_simple_key(parser))
1680
0
        return 0;
1681
1682
    /* Simple keys are allowed after '?' in the block context. */
1683
1684
0
    parser->simple_key_allowed = (!parser->flow_level);
1685
1686
    /* Consume the token. */
1687
1688
0
    start_mark = parser->mark;
1689
0
    SKIP(parser);
1690
0
    end_mark = parser->mark;
1691
1692
    /* Create the KEY token and append it to the queue. */
1693
1694
0
    TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark);
1695
1696
0
    if (!ENQUEUE(parser, parser->tokens, token))
1697
0
        return 0;
1698
1699
0
    return 1;
1700
0
}
1701
1702
/*
1703
 * Produce the VALUE token.
1704
 */
1705
1706
static int
1707
yaml_parser_fetch_value(yaml_parser_t *parser)
1708
0
{
1709
0
    yaml_mark_t start_mark, end_mark;
1710
0
    yaml_token_t token;
1711
0
    yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1712
1713
    /* Have we found a simple key? */
1714
1715
0
    if (simple_key->possible)
1716
0
    {
1717
1718
        /* Create the KEY token and insert it into the queue. */
1719
1720
0
        TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);
1721
1722
0
        if (!QUEUE_INSERT(parser, parser->tokens,
1723
0
                    simple_key->token_number - parser->tokens_parsed, token))
1724
0
            return 0;
1725
1726
        /* In the block context, we may need to add the BLOCK-MAPPING-START token. */
1727
1728
0
        if (!yaml_parser_roll_indent(parser, simple_key->mark.column,
1729
0
                    simple_key->token_number,
1730
0
                    YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
1731
0
            return 0;
1732
1733
        /* Remove the simple key. */
1734
1735
0
        simple_key->possible = 0;
1736
1737
        /* A simple key cannot follow another simple key. */
1738
1739
0
        parser->simple_key_allowed = 0;
1740
0
    }
1741
0
    else
1742
0
    {
1743
        /* The ':' indicator follows a complex key. */
1744
1745
        /* In the block context, extra checks are required. */
1746
1747
0
        if (!parser->flow_level)
1748
0
        {
1749
            /* Check if we are allowed to start a complex value. */
1750
1751
0
            if (!parser->simple_key_allowed) {
1752
0
                return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1753
0
                        "mapping values are not allowed in this context");
1754
0
            }
1755
1756
            /* Add the BLOCK-MAPPING-START token if needed. */
1757
1758
0
            if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1759
0
                        YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
1760
0
                return 0;
1761
0
        }
1762
1763
        /* Simple keys after ':' are allowed in the block context. */
1764
1765
0
        parser->simple_key_allowed = (!parser->flow_level);
1766
0
    }
1767
1768
    /* Consume the token. */
1769
1770
0
    start_mark = parser->mark;
1771
0
    SKIP(parser);
1772
0
    end_mark = parser->mark;
1773
1774
    /* Create the VALUE token and append it to the queue. */
1775
1776
0
    TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark);
1777
1778
0
    if (!ENQUEUE(parser, parser->tokens, token))
1779
0
        return 0;
1780
1781
0
    return 1;
1782
0
}
1783
1784
/*
1785
 * Produce the ALIAS or ANCHOR token.
1786
 */
1787
1788
static int
1789
yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
1790
0
{
1791
0
    yaml_token_t token;
1792
1793
    /* An anchor or an alias could be a simple key. */
1794
1795
0
    if (!yaml_parser_save_simple_key(parser))
1796
0
        return 0;
1797
1798
    /* A simple key cannot follow an anchor or an alias. */
1799
1800
0
    parser->simple_key_allowed = 0;
1801
1802
    /* Create the ALIAS or ANCHOR token and append it to the queue. */
1803
1804
0
    if (!yaml_parser_scan_anchor(parser, &token, type))
1805
0
        return 0;
1806
1807
0
    if (!ENQUEUE(parser, parser->tokens, token)) {
1808
0
        yaml_token_delete(&token);
1809
0
        return 0;
1810
0
    }
1811
0
    return 1;
1812
0
}
1813
1814
/*
1815
 * Produce the TAG token.
1816
 */
1817
1818
static int
1819
yaml_parser_fetch_tag(yaml_parser_t *parser)
1820
0
{
1821
0
    yaml_token_t token;
1822
1823
    /* A tag could be a simple key. */
1824
1825
0
    if (!yaml_parser_save_simple_key(parser))
1826
0
        return 0;
1827
1828
    /* A simple key cannot follow a tag. */
1829
1830
0
    parser->simple_key_allowed = 0;
1831
1832
    /* Create the TAG token and append it to the queue. */
1833
1834
0
    if (!yaml_parser_scan_tag(parser, &token))
1835
0
        return 0;
1836
1837
0
    if (!ENQUEUE(parser, parser->tokens, token)) {
1838
0
        yaml_token_delete(&token);
1839
0
        return 0;
1840
0
    }
1841
1842
0
    return 1;
1843
0
}
1844
1845
/*
1846
 * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
1847
 */
1848
1849
static int
1850
yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
1851
0
{
1852
0
    yaml_token_t token;
1853
1854
    /* Remove any potential simple keys. */
1855
1856
0
    if (!yaml_parser_remove_simple_key(parser))
1857
0
        return 0;
1858
1859
    /* A simple key may follow a block scalar. */
1860
1861
0
    parser->simple_key_allowed = 1;
1862
1863
    /* Create the SCALAR token and append it to the queue. */
1864
1865
0
    if (!yaml_parser_scan_block_scalar(parser, &token, literal))
1866
0
        return 0;
1867
1868
0
    if (!ENQUEUE(parser, parser->tokens, token)) {
1869
0
        yaml_token_delete(&token);
1870
0
        return 0;
1871
0
    }
1872
1873
0
    return 1;
1874
0
}
1875
1876
/*
1877
 * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
1878
 */
1879
1880
static int
1881
yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
1882
0
{
1883
0
    yaml_token_t token;
1884
1885
    /* A plain scalar could be a simple key. */
1886
1887
0
    if (!yaml_parser_save_simple_key(parser))
1888
0
        return 0;
1889
1890
    /* A simple key cannot follow a flow scalar. */
1891
1892
0
    parser->simple_key_allowed = 0;
1893
1894
    /* Create the SCALAR token and append it to the queue. */
1895
1896
0
    if (!yaml_parser_scan_flow_scalar(parser, &token, single))
1897
0
        return 0;
1898
1899
0
    if (!ENQUEUE(parser, parser->tokens, token)) {
1900
0
        yaml_token_delete(&token);
1901
0
        return 0;
1902
0
    }
1903
1904
0
    return 1;
1905
0
}
1906
1907
/*
1908
 * Produce the SCALAR(...,plain) token.
1909
 */
1910
1911
static int
1912
yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
1913
0
{
1914
0
    yaml_token_t token;
1915
1916
    /* A plain scalar could be a simple key. */
1917
1918
0
    if (!yaml_parser_save_simple_key(parser))
1919
0
        return 0;
1920
1921
    /* A simple key cannot follow a flow scalar. */
1922
1923
0
    parser->simple_key_allowed = 0;
1924
1925
    /* Create the SCALAR token and append it to the queue. */
1926
1927
0
    if (!yaml_parser_scan_plain_scalar(parser, &token))
1928
0
        return 0;
1929
1930
0
    if (!ENQUEUE(parser, parser->tokens, token)) {
1931
0
        yaml_token_delete(&token);
1932
0
        return 0;
1933
0
    }
1934
1935
0
    return 1;
1936
0
}
1937
1938
/*
1939
 * Eat whitespaces and comments until the next token is found.
1940
 */
1941
1942
static int
1943
yaml_parser_scan_to_next_token(yaml_parser_t *parser)
1944
0
{
1945
    /* Until the next token is not found. */
1946
1947
0
    while (1)
1948
0
    {
1949
        /* Allow the BOM mark to start a line. */
1950
1951
0
        if (!CACHE(parser, 1)) return 0;
1952
1953
0
        if (parser->mark.column == 0 && IS_BOM(parser->buffer))
1954
0
            SKIP(parser);
1955
1956
        /*
1957
         * Eat whitespaces.
1958
         *
1959
         * Tabs are allowed:
1960
         *
1961
         *  - in the flow context;
1962
         *  - in the block context, but not at the beginning of the line or
1963
         *  after '-', '?', or ':' (complex value).  
1964
         */
1965
1966
0
        if (!CACHE(parser, 1)) return 0;
1967
1968
0
        while (CHECK(parser->buffer,' ') ||
1969
0
                ((parser->flow_level || !parser->simple_key_allowed) &&
1970
0
                 CHECK(parser->buffer, '\t'))) {
1971
0
            SKIP(parser);
1972
0
            if (!CACHE(parser, 1)) return 0;
1973
0
        }
1974
1975
        /* Eat a comment until a line break. */
1976
1977
0
        if (CHECK(parser->buffer, '#')) {
1978
0
            while (!IS_BREAKZ(parser->buffer)) {
1979
0
                SKIP(parser);
1980
0
                if (!CACHE(parser, 1)) return 0;
1981
0
            }
1982
0
        }
1983
1984
        /* If it is a line break, eat it. */
1985
1986
0
        if (IS_BREAK(parser->buffer))
1987
0
        {
1988
0
            if (!CACHE(parser, 2)) return 0;
1989
0
            SKIP_LINE(parser);
1990
1991
            /* In the block context, a new line may start a simple key. */
1992
1993
0
            if (!parser->flow_level) {
1994
0
                parser->simple_key_allowed = 1;
1995
0
            }
1996
0
        }
1997
0
        else
1998
0
        {
1999
            /* We have found a token. */
2000
2001
0
            break;
2002
0
        }
2003
0
    }
2004
2005
0
    return 1;
2006
0
}
2007
2008
/*
2009
 * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
2010
 *
2011
 * Scope:
2012
 *      %YAML    1.1    # a comment \n
2013
 *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2014
 *      %TAG    !yaml!  tag:yaml.org,2002:  \n
2015
 *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2016
 */
2017
2018
int
2019
yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token)
2020
0
{
2021
0
    yaml_mark_t start_mark, end_mark;
2022
0
    yaml_char_t *name = NULL;
2023
0
    int major, minor;
2024
0
    yaml_char_t *handle = NULL, *prefix = NULL;
2025
2026
    /* Eat '%'. */
2027
2028
0
    start_mark = parser->mark;
2029
2030
0
    SKIP(parser);
2031
2032
    /* Scan the directive name. */
2033
2034
0
    if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
2035
0
        goto error;
2036
2037
    /* Is it a YAML directive? */
2038
2039
0
    if (strcmp((char *)name, "YAML") == 0)
2040
0
    {
2041
        /* Scan the VERSION directive value. */
2042
2043
0
        if (!yaml_parser_scan_version_directive_value(parser, start_mark,
2044
0
                    &major, &minor))
2045
0
            goto error;
2046
2047
0
        end_mark = parser->mark;
2048
2049
        /* Create a VERSION-DIRECTIVE token. */
2050
2051
0
        VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor,
2052
0
                start_mark, end_mark);
2053
0
    }
2054
2055
    /* Is it a TAG directive? */
2056
2057
0
    else if (strcmp((char *)name, "TAG") == 0)
2058
0
    {
2059
        /* Scan the TAG directive value. */
2060
2061
0
        if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
2062
0
                    &handle, &prefix))
2063
0
            goto error;
2064
2065
0
        end_mark = parser->mark;
2066
2067
        /* Create a TAG-DIRECTIVE token. */
2068
2069
0
        TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix,
2070
0
                start_mark, end_mark);
2071
0
    }
2072
2073
    /* Unknown directive. */
2074
2075
0
    else
2076
0
    {
2077
0
        yaml_parser_set_scanner_error(parser, "while scanning a directive",
2078
0
                start_mark, "found uknown directive name");
2079
0
        goto error;
2080
0
    }
2081
2082
    /* Eat the rest of the line including any comments. */
2083
2084
0
    if (!CACHE(parser, 1)) goto error;
2085
2086
0
    while (IS_BLANK(parser->buffer)) {
2087
0
        SKIP(parser);
2088
0
        if (!CACHE(parser, 1)) goto error;
2089
0
    }
2090
2091
0
    if (CHECK(parser->buffer, '#')) {
2092
0
        while (!IS_BREAKZ(parser->buffer)) {
2093
0
            SKIP(parser);
2094
0
            if (!CACHE(parser, 1)) goto error;
2095
0
        }
2096
0
    }
2097
2098
    /* Check if we are at the end of the line. */
2099
2100
0
    if (!IS_BREAKZ(parser->buffer)) {
2101
0
        yaml_parser_set_scanner_error(parser, "while scanning a directive",
2102
0
                start_mark, "did not find expected comment or line break");
2103
0
        goto error;
2104
0
    }
2105
2106
    /* Eat a line break. */
2107
2108
0
    if (IS_BREAK(parser->buffer)) {
2109
0
        if (!CACHE(parser, 2)) goto error;
2110
0
        SKIP_LINE(parser);
2111
0
    }
2112
2113
0
    yaml_free(name);
2114
2115
0
    return 1;
2116
2117
0
error:
2118
0
    yaml_free(prefix);
2119
0
    yaml_free(handle);
2120
0
    yaml_free(name);
2121
0
    return 0;
2122
0
}
2123
2124
/*
2125
 * Scan the directive name.
2126
 *
2127
 * Scope:
2128
 *      %YAML   1.1     # a comment \n
2129
 *       ^^^^
2130
 *      %TAG    !yaml!  tag:yaml.org,2002:  \n
2131
 *       ^^^
2132
 */
2133
2134
static int
2135
yaml_parser_scan_directive_name(yaml_parser_t *parser,
2136
        yaml_mark_t start_mark, yaml_char_t **name)
2137
0
{
2138
0
    yaml_string_t string = NULL_STRING;
2139
2140
0
    if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2141
2142
    /* Consume the directive name. */
2143
2144
0
    if (!CACHE(parser, 1)) goto error;
2145
2146
0
    while (IS_ALPHA(parser->buffer))
2147
0
    {
2148
0
        if (!READ(parser, string)) goto error;
2149
0
        if (!CACHE(parser, 1)) goto error;
2150
0
    }
2151
2152
    /* Check if the name is empty. */
2153
2154
0
    if (string.start == string.pointer) {
2155
0
        yaml_parser_set_scanner_error(parser, "while scanning a directive",
2156
0
                start_mark, "could not find expected directive name");
2157
0
        goto error;
2158
0
    }
2159
2160
    /* Check for an blank character after the name. */
2161
2162
0
    if (!IS_BLANKZ(parser->buffer)) {
2163
0
        yaml_parser_set_scanner_error(parser, "while scanning a directive",
2164
0
                start_mark, "found unexpected non-alphabetical character");
2165
0
        goto error;
2166
0
    }
2167
2168
0
    *name = string.start;
2169
2170
0
    return 1;
2171
2172
0
error:
2173
0
    STRING_DEL(parser, string);
2174
0
    return 0;
2175
0
}
2176
2177
/*
2178
 * Scan the value of VERSION-DIRECTIVE.
2179
 *
2180
 * Scope:
2181
 *      %YAML   1.1     # a comment \n
2182
 *           ^^^^^^
2183
 */
2184
2185
static int
2186
yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
2187
        yaml_mark_t start_mark, int *major, int *minor)
2188
0
{
2189
    /* Eat whitespaces. */
2190
2191
0
    if (!CACHE(parser, 1)) return 0;
2192
2193
0
    while (IS_BLANK(parser->buffer)) {
2194
0
        SKIP(parser);
2195
0
        if (!CACHE(parser, 1)) return 0;
2196
0
    }
2197
2198
    /* Consume the major version number. */
2199
2200
0
    if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
2201
0
        return 0;
2202
2203
    /* Eat '.'. */
2204
2205
0
    if (!CHECK(parser->buffer, '.')) {
2206
0
        return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2207
0
                start_mark, "did not find expected digit or '.' character");
2208
0
    }
2209
2210
0
    SKIP(parser);
2211
2212
    /* Consume the minor version number. */
2213
2214
0
    if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
2215
0
        return 0;
2216
2217
0
    return 1;
2218
0
}
2219
2220
0
#define MAX_NUMBER_LENGTH   9
2221
2222
/*
2223
 * Scan the version number of VERSION-DIRECTIVE.
2224
 *
2225
 * Scope:
2226
 *      %YAML   1.1     # a comment \n
2227
 *              ^
2228
 *      %YAML   1.1     # a comment \n
2229
 *                ^
2230
 */
2231
2232
static int
2233
yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
2234
        yaml_mark_t start_mark, int *number)
2235
0
{
2236
0
    int value = 0;
2237
0
    size_t length = 0;
2238
2239
    /* Repeat while the next character is digit. */
2240
2241
0
    if (!CACHE(parser, 1)) return 0;
2242
2243
0
    while (IS_DIGIT(parser->buffer))
2244
0
    {
2245
        /* Check if the number is too long. */
2246
2247
0
        if (++length > MAX_NUMBER_LENGTH) {
2248
0
            return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2249
0
                    start_mark, "found extremely long version number");
2250
0
        }
2251
2252
0
        value = value*10 + AS_DIGIT(parser->buffer);
2253
2254
0
        SKIP(parser);
2255
2256
0
        if (!CACHE(parser, 1)) return 0;
2257
0
    }
2258
2259
    /* Check if the number was present. */
2260
2261
0
    if (!length) {
2262
0
        return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2263
0
                start_mark, "did not find expected version number");
2264
0
    }
2265
2266
0
    *number = value;
2267
2268
0
    return 1;
2269
0
}
2270
2271
/*
2272
 * Scan the value of a TAG-DIRECTIVE token.
2273
 *
2274
 * Scope:
2275
 *      %TAG    !yaml!  tag:yaml.org,2002:  \n
2276
 *          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2277
 */
2278
2279
static int
2280
yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
2281
        yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
2282
0
{
2283
0
    yaml_char_t *handle_value = NULL;
2284
0
    yaml_char_t *prefix_value = NULL;
2285
2286
    /* Eat whitespaces. */
2287
2288
0
    if (!CACHE(parser, 1)) goto error;
2289
2290
0
    while (IS_BLANK(parser->buffer)) {
2291
0
        SKIP(parser);
2292
0
        if (!CACHE(parser, 1)) goto error;
2293
0
    }
2294
2295
    /* Scan a handle. */
2296
2297
0
    if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
2298
0
        goto error;
2299
2300
    /* Expect a whitespace. */
2301
2302
0
    if (!CACHE(parser, 1)) goto error;
2303
2304
0
    if (!IS_BLANK(parser->buffer)) {
2305
0
        yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2306
0
                start_mark, "did not find expected whitespace");
2307
0
        goto error;
2308
0
    }
2309
2310
    /* Eat whitespaces. */
2311
2312
0
    while (IS_BLANK(parser->buffer)) {
2313
0
        SKIP(parser);
2314
0
        if (!CACHE(parser, 1)) goto error;
2315
0
    }
2316
2317
    /* Scan a prefix. */
2318
2319
0
    if (!yaml_parser_scan_tag_uri(parser, 1, NULL, start_mark, &prefix_value))
2320
0
        goto error;
2321
2322
    /* Expect a whitespace or line break. */
2323
2324
0
    if (!CACHE(parser, 1)) goto error;
2325
2326
0
    if (!IS_BLANKZ(parser->buffer)) {
2327
0
        yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2328
0
                start_mark, "did not find expected whitespace or line break");
2329
0
        goto error;
2330
0
    }
2331
2332
0
    *handle = handle_value;
2333
0
    *prefix = prefix_value;
2334
2335
0
    return 1;
2336
2337
0
error:
2338
0
    yaml_free(handle_value);
2339
0
    yaml_free(prefix_value);
2340
0
    return 0;
2341
0
}
2342
2343
static int
2344
yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
2345
        yaml_token_type_t type)
2346
0
{
2347
0
    int length = 0;
2348
0
    yaml_mark_t start_mark, end_mark;
2349
0
    yaml_string_t string = NULL_STRING;
2350
2351
0
    if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2352
2353
    /* Eat the indicator character. */
2354
2355
0
    start_mark = parser->mark;
2356
2357
0
    SKIP(parser);
2358
2359
    /* Consume the value. */
2360
2361
0
    if (!CACHE(parser, 1)) goto error;
2362
2363
0
    while (IS_ALPHA(parser->buffer)) {
2364
0
        if (!READ(parser, string)) goto error;
2365
0
        if (!CACHE(parser, 1)) goto error;
2366
0
        length ++;
2367
0
    }
2368
2369
0
    end_mark = parser->mark;
2370
2371
    /*
2372
     * Check if length of the anchor is greater than 0 and it is followed by
2373
     * a whitespace character or one of the indicators:
2374
     *
2375
     *      '?', ':', ',', ']', '}', '%', '@', '`'.
2376
     */
2377
2378
0
    if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?')
2379
0
                || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',')
2380
0
                || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}')
2381
0
                || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@')
2382
0
                || CHECK(parser->buffer, '`'))) {
2383
0
        yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ?
2384
0
                "while scanning an anchor" : "while scanning an alias", start_mark,
2385
0
                "did not find expected alphabetic or numeric character");
2386
0
        goto error;
2387
0
    }
2388
2389
    /* Create a token. */
2390
2391
0
    if (type == YAML_ANCHOR_TOKEN) {
2392
0
        ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2393
0
    }
2394
0
    else {
2395
0
        ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2396
0
    }
2397
2398
0
    return 1;
2399
2400
0
error:
2401
0
    STRING_DEL(parser, string);
2402
0
    return 0;
2403
0
}
2404
2405
/*
2406
 * Scan a TAG token.
2407
 */
2408
2409
static int
2410
yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token)
2411
0
{
2412
0
    yaml_char_t *handle = NULL;
2413
0
    yaml_char_t *suffix = NULL;
2414
0
    yaml_mark_t start_mark, end_mark;
2415
2416
0
    start_mark = parser->mark;
2417
2418
    /* Check if the tag is in the canonical form. */
2419
2420
0
    if (!CACHE(parser, 2)) goto error;
2421
2422
0
    if (CHECK_AT(parser->buffer, '<', 1))
2423
0
    {
2424
        /* Set the handle to '' */
2425
2426
0
        handle = yaml_malloc(1);
2427
0
        if (!handle) goto error;
2428
0
        handle[0] = '\0';
2429
2430
        /* Eat '!<' */
2431
2432
0
        SKIP(parser);
2433
0
        SKIP(parser);
2434
2435
        /* Consume the tag value. */
2436
2437
0
        if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
2438
0
            goto error;
2439
2440
        /* Check for '>' and eat it. */
2441
2442
0
        if (!CHECK(parser->buffer, '>')) {
2443
0
            yaml_parser_set_scanner_error(parser, "while scanning a tag",
2444
0
                    start_mark, "did not find the expected '>'");
2445
0
            goto error;
2446
0
        }
2447
2448
0
        SKIP(parser);
2449
0
    }
2450
0
    else
2451
0
    {
2452
        /* The tag has either the '!suffix' or the '!handle!suffix' form. */
2453
2454
        /* First, try to scan a handle. */
2455
2456
0
        if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
2457
0
            goto error;
2458
2459
        /* Check if it is, indeed, handle. */
2460
2461
0
        if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
2462
0
        {
2463
            /* Scan the suffix now. */
2464
2465
0
            if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
2466
0
                goto error;
2467
0
        }
2468
0
        else
2469
0
        {
2470
            /* It wasn't a handle after all.  Scan the rest of the tag. */
2471
2472
0
            if (!yaml_parser_scan_tag_uri(parser, 0, handle, start_mark, &suffix))
2473
0
                goto error;
2474
2475
            /* Set the handle to '!'. */
2476
2477
0
            yaml_free(handle);
2478
0
            handle = yaml_malloc(2);
2479
0
            if (!handle) goto error;
2480
0
            handle[0] = '!';
2481
0
            handle[1] = '\0';
2482
2483
            /*
2484
             * A special case: the '!' tag.  Set the handle to '' and the
2485
             * suffix to '!'.
2486
             */
2487
2488
0
            if (suffix[0] == '\0') {
2489
0
                yaml_char_t *tmp = handle;
2490
0
                handle = suffix;
2491
0
                suffix = tmp;
2492
0
            }
2493
0
        }
2494
0
    }
2495
2496
    /* Check the character which ends the tag. */
2497
2498
0
    if (!CACHE(parser, 1)) goto error;
2499
2500
0
    if (!IS_BLANKZ(parser->buffer)) {
2501
0
        yaml_parser_set_scanner_error(parser, "while scanning a tag",
2502
0
                start_mark, "did not find expected whitespace or line break");
2503
0
        goto error;
2504
0
    }
2505
2506
0
    end_mark = parser->mark;
2507
2508
    /* Create a token. */
2509
2510
0
    TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark);
2511
2512
0
    return 1;
2513
2514
0
error:
2515
0
    yaml_free(handle);
2516
0
    yaml_free(suffix);
2517
0
    return 0;
2518
0
}
2519
2520
/*
2521
 * Scan a tag handle.
2522
 */
2523
2524
static int
2525
yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
2526
        yaml_mark_t start_mark, yaml_char_t **handle)
2527
0
{
2528
0
    yaml_string_t string = NULL_STRING;
2529
2530
0
    if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2531
2532
    /* Check the initial '!' character. */
2533
2534
0
    if (!CACHE(parser, 1)) goto error;
2535
2536
0
    if (!CHECK(parser->buffer, '!')) {
2537
0
        yaml_parser_set_scanner_error(parser, directive ?
2538
0
                "while scanning a tag directive" : "while scanning a tag",
2539
0
                start_mark, "did not find expected '!'");
2540
0
        goto error;
2541
0
    }
2542
2543
    /* Copy the '!' character. */
2544
2545
0
    if (!READ(parser, string)) goto error;
2546
2547
    /* Copy all subsequent alphabetical and numerical characters. */
2548
2549
0
    if (!CACHE(parser, 1)) goto error;
2550
2551
0
    while (IS_ALPHA(parser->buffer))
2552
0
    {
2553
0
        if (!READ(parser, string)) goto error;
2554
0
        if (!CACHE(parser, 1)) goto error;
2555
0
    }
2556
2557
    /* Check if the trailing character is '!' and copy it. */
2558
2559
0
    if (CHECK(parser->buffer, '!'))
2560
0
    {
2561
0
        if (!READ(parser, string)) goto error;
2562
0
    }
2563
0
    else
2564
0
    {
2565
        /*
2566
         * It's either the '!' tag or not really a tag handle.  If it's a %TAG
2567
         * directive, it's an error.  If it's a tag token, it must be a part of
2568
         * URI.
2569
         */
2570
2571
0
        if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) {
2572
0
            yaml_parser_set_scanner_error(parser, "while parsing a tag directive",
2573
0
                    start_mark, "did not find expected '!'");
2574
0
            goto error;
2575
0
        }
2576
0
    }
2577
2578
0
    *handle = string.start;
2579
2580
0
    return 1;
2581
2582
0
error:
2583
0
    STRING_DEL(parser, string);
2584
0
    return 0;
2585
0
}
2586
2587
/*
2588
 * Scan a tag.
2589
 */
2590
2591
static int
2592
yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
2593
        yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
2594
0
{
2595
0
    size_t length = head ? strlen((char *)head) : 0;
2596
0
    yaml_string_t string = NULL_STRING;
2597
2598
0
    if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2599
2600
    /* Resize the string to include the head. */
2601
2602
0
    while ((size_t)(string.end - string.start) <= length) {
2603
0
        if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) {
2604
0
            parser->error = YAML_MEMORY_ERROR;
2605
0
            goto error;
2606
0
        }
2607
0
    }
2608
2609
    /*
2610
     * Copy the head if needed.
2611
     *
2612
     * Note that we don't copy the leading '!' character.
2613
     */
2614
2615
0
    if (length > 1) {
2616
0
        memcpy(string.start, head+1, length-1);
2617
0
        string.pointer += length-1;
2618
0
    }
2619
2620
    /* Scan the tag. */
2621
2622
0
    if (!CACHE(parser, 1)) goto error;
2623
2624
    /*
2625
     * The set of characters that may appear in URI is as follows:
2626
     *
2627
     *      '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
2628
     *      '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
2629
     *      '%'.
2630
     */
2631
2632
0
    while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';')
2633
0
            || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?')
2634
0
            || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@')
2635
0
            || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=')
2636
0
            || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$')
2637
0
            || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '.')
2638
0
            || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~')
2639
0
            || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'')
2640
0
            || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')')
2641
0
            || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']')
2642
0
            || CHECK(parser->buffer, '%'))
2643
0
    {
2644
        /* Check if it is a URI-escape sequence. */
2645
2646
0
        if (CHECK(parser->buffer, '%')) {
2647
0
            if (!STRING_EXTEND(parser, string))
2648
0
                goto error;
2649
2650
0
            if (!yaml_parser_scan_uri_escapes(parser,
2651
0
                        directive, start_mark, &string)) goto error;
2652
0
        }
2653
0
        else {
2654
0
            if (!READ(parser, string)) goto error;
2655
0
        }
2656
2657
0
        length ++;
2658
0
        if (!CACHE(parser, 1)) goto error;
2659
0
    }
2660
2661
    /* Check if the tag is non-empty. */
2662
2663
0
    if (!length) {
2664
0
        if (!STRING_EXTEND(parser, string))
2665
0
            goto error;
2666
2667
0
        yaml_parser_set_scanner_error(parser, directive ?
2668
0
                "while parsing a %TAG directive" : "while parsing a tag",
2669
0
                start_mark, "did not find expected tag URI");
2670
0
        goto error;
2671
0
    }
2672
2673
0
    *uri = string.start;
2674
2675
0
    return 1;
2676
2677
0
error:
2678
0
    STRING_DEL(parser, string);
2679
0
    return 0;
2680
0
}
2681
2682
/*
2683
 * Decode an URI-escape sequence corresponding to a single UTF-8 character.
2684
 */
2685
2686
static int
2687
yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
2688
        yaml_mark_t start_mark, yaml_string_t *string)
2689
0
{
2690
0
    int width = 0;
2691
2692
    /* Decode the required number of characters. */
2693
2694
0
    do {
2695
2696
0
        unsigned char octet = 0;
2697
2698
        /* Check for a URI-escaped octet. */
2699
2700
0
        if (!CACHE(parser, 3)) return 0;
2701
2702
0
        if (!(CHECK(parser->buffer, '%')
2703
0
                    && IS_HEX_AT(parser->buffer, 1)
2704
0
                    && IS_HEX_AT(parser->buffer, 2))) {
2705
0
            return yaml_parser_set_scanner_error(parser, directive ?
2706
0
                    "while parsing a %TAG directive" : "while parsing a tag",
2707
0
                    start_mark, "did not find URI escaped octet");
2708
0
        }
2709
2710
        /* Get the octet. */
2711
2712
0
        octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2);
2713
2714
        /* If it is the leading octet, determine the length of the UTF-8 sequence. */
2715
2716
0
        if (!width)
2717
0
        {
2718
0
            width = (octet & 0x80) == 0x00 ? 1 :
2719
0
                    (octet & 0xE0) == 0xC0 ? 2 :
2720
0
                    (octet & 0xF0) == 0xE0 ? 3 :
2721
0
                    (octet & 0xF8) == 0xF0 ? 4 : 0;
2722
0
            if (!width) {
2723
0
                return yaml_parser_set_scanner_error(parser, directive ?
2724
0
                        "while parsing a %TAG directive" : "while parsing a tag",
2725
0
                        start_mark, "found an incorrect leading UTF-8 octet");
2726
0
            }
2727
0
        }
2728
0
        else
2729
0
        {
2730
            /* Check if the trailing octet is correct. */
2731
2732
0
            if ((octet & 0xC0) != 0x80) {
2733
0
                return yaml_parser_set_scanner_error(parser, directive ?
2734
0
                        "while parsing a %TAG directive" : "while parsing a tag",
2735
0
                        start_mark, "found an incorrect trailing UTF-8 octet");
2736
0
            }
2737
0
        }
2738
2739
        /* Copy the octet and move the pointers. */
2740
2741
0
        *(string->pointer++) = octet;
2742
0
        SKIP(parser);
2743
0
        SKIP(parser);
2744
0
        SKIP(parser);
2745
2746
0
    } while (--width);
2747
2748
0
    return 1;
2749
0
}
2750
2751
/*
2752
 * Scan a block scalar.
2753
 */
2754
2755
static int
2756
yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
2757
        int literal)
2758
0
{
2759
0
    yaml_mark_t start_mark;
2760
0
    yaml_mark_t end_mark;
2761
0
    yaml_string_t string = NULL_STRING;
2762
0
    yaml_string_t leading_break = NULL_STRING;
2763
0
    yaml_string_t trailing_breaks = NULL_STRING;
2764
0
    int chomping = 0;
2765
0
    int increment = 0;
2766
0
    int indent = 0;
2767
0
    int leading_blank = 0;
2768
0
    int trailing_blank = 0;
2769
2770
0
    if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2771
0
    if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
2772
0
    if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
2773
2774
    /* Eat the indicator '|' or '>'. */
2775
2776
0
    start_mark = parser->mark;
2777
2778
0
    SKIP(parser);
2779
2780
    /* Scan the additional block scalar indicators. */
2781
2782
0
    if (!CACHE(parser, 1)) goto error;
2783
2784
    /* Check for a chomping indicator. */
2785
2786
0
    if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-'))
2787
0
    {
2788
        /* Set the chomping method and eat the indicator. */
2789
2790
0
        chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2791
2792
0
        SKIP(parser);
2793
2794
        /* Check for an indentation indicator. */
2795
2796
0
        if (!CACHE(parser, 1)) goto error;
2797
2798
0
        if (IS_DIGIT(parser->buffer))
2799
0
        {
2800
            /* Check that the intendation is greater than 0. */
2801
2802
0
            if (CHECK(parser->buffer, '0')) {
2803
0
                yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2804
0
                        start_mark, "found an intendation indicator equal to 0");
2805
0
                goto error;
2806
0
            }
2807
2808
            /* Get the intendation level and eat the indicator. */
2809
2810
0
            increment = AS_DIGIT(parser->buffer);
2811
2812
0
            SKIP(parser);
2813
0
        }
2814
0
    }
2815
2816
    /* Do the same as above, but in the opposite order. */
2817
2818
0
    else if (IS_DIGIT(parser->buffer))
2819
0
    {
2820
0
        if (CHECK(parser->buffer, '0')) {
2821
0
            yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2822
0
                    start_mark, "found an intendation indicator equal to 0");
2823
0
            goto error;
2824
0
        }
2825
2826
0
        increment = AS_DIGIT(parser->buffer);
2827
2828
0
        SKIP(parser);
2829
2830
0
        if (!CACHE(parser, 1)) goto error;
2831
2832
0
        if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) {
2833
0
            chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2834
2835
0
            SKIP(parser);
2836
0
        }
2837
0
    }
2838
2839
    /* Eat whitespaces and comments to the end of the line. */
2840
2841
0
    if (!CACHE(parser, 1)) goto error;
2842
2843
0
    while (IS_BLANK(parser->buffer)) {
2844
0
        SKIP(parser);
2845
0
        if (!CACHE(parser, 1)) goto error;
2846
0
    }
2847
2848
0
    if (CHECK(parser->buffer, '#')) {
2849
0
        while (!IS_BREAKZ(parser->buffer)) {
2850
0
            SKIP(parser);
2851
0
            if (!CACHE(parser, 1)) goto error;
2852
0
        }
2853
0
    }
2854
2855
    /* Check if we are at the end of the line. */
2856
2857
0
    if (!IS_BREAKZ(parser->buffer)) {
2858
0
        yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2859
0
                start_mark, "did not find expected comment or line break");
2860
0
        goto error;
2861
0
    }
2862
2863
    /* Eat a line break. */
2864
2865
0
    if (IS_BREAK(parser->buffer)) {
2866
0
        if (!CACHE(parser, 2)) goto error;
2867
0
        SKIP_LINE(parser);
2868
0
    }
2869
2870
0
    end_mark = parser->mark;
2871
2872
    /* Set the intendation level if it was specified. */
2873
2874
0
    if (increment) {
2875
0
        indent = parser->indent >= 0 ? parser->indent+increment : increment;
2876
0
    }
2877
2878
    /* Scan the leading line breaks and determine the indentation level if needed. */
2879
2880
0
    if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks,
2881
0
                start_mark, &end_mark)) goto error;
2882
2883
    /* Scan the block scalar content. */
2884
2885
0
    if (!CACHE(parser, 1)) goto error;
2886
2887
0
    while ((int)parser->mark.column == indent && !IS_Z(parser->buffer))
2888
0
    {
2889
        /*
2890
         * We are at the beginning of a non-empty line.
2891
         */
2892
2893
        /* Is it a trailing whitespace? */
2894
2895
0
        trailing_blank = IS_BLANK(parser->buffer);
2896
2897
        /* Check if we need to fold the leading line break. */
2898
2899
0
        if (!literal && (*leading_break.start == '\n')
2900
0
                && !leading_blank && !trailing_blank)
2901
0
        {
2902
            /* Do we need to join the lines by space? */
2903
2904
0
            if (*trailing_breaks.start == '\0') {
2905
0
                if (!STRING_EXTEND(parser, string)) goto error;
2906
0
                *(string.pointer ++) = ' ';
2907
0
            }
2908
2909
0
            CLEAR(parser, leading_break);
2910
0
        }
2911
0
        else {
2912
0
            if (!JOIN(parser, string, leading_break)) goto error;
2913
0
            CLEAR(parser, leading_break);
2914
0
        }
2915
2916
        /* Append the remaining line breaks. */
2917
2918
0
        if (!JOIN(parser, string, trailing_breaks)) goto error;
2919
0
        CLEAR(parser, trailing_breaks);
2920
2921
        /* Is it a leading whitespace? */
2922
2923
0
        leading_blank = IS_BLANK(parser->buffer);
2924
2925
        /* Consume the current line. */
2926
2927
0
        while (!IS_BREAKZ(parser->buffer)) {
2928
0
            if (!READ(parser, string)) goto error;
2929
0
            if (!CACHE(parser, 1)) goto error;
2930
0
        }
2931
2932
        /* Consume the line break. */
2933
2934
0
        if (!CACHE(parser, 2)) goto error;
2935
2936
0
        if (!READ_LINE(parser, leading_break)) goto error;
2937
2938
        /* Eat the following intendation spaces and line breaks. */
2939
2940
0
        if (!yaml_parser_scan_block_scalar_breaks(parser,
2941
0
                    &indent, &trailing_breaks, start_mark, &end_mark)) goto error;
2942
0
    }
2943
2944
    /* Chomp the tail. */
2945
2946
0
    if (chomping != -1) {
2947
0
        if (!JOIN(parser, string, leading_break)) goto error;
2948
0
    }
2949
0
    if (chomping == 1) {
2950
0
        if (!JOIN(parser, string, trailing_breaks)) goto error;
2951
0
    }
2952
2953
    /* Create a token. */
2954
2955
0
    SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
2956
0
            literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE,
2957
0
            start_mark, end_mark);
2958
2959
0
    STRING_DEL(parser, leading_break);
2960
0
    STRING_DEL(parser, trailing_breaks);
2961
2962
0
    return 1;
2963
2964
0
error:
2965
0
    STRING_DEL(parser, string);
2966
0
    STRING_DEL(parser, leading_break);
2967
0
    STRING_DEL(parser, trailing_breaks);
2968
2969
0
    return 0;
2970
0
}
2971
2972
/*
2973
 * Scan intendation spaces and line breaks for a block scalar.  Determine the
2974
 * intendation level if needed.
2975
 */
2976
2977
static int
2978
yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
2979
        int *indent, yaml_string_t *breaks,
2980
        yaml_mark_t start_mark, yaml_mark_t *end_mark)
2981
0
{
2982
0
    int max_indent = 0;
2983
2984
0
    *end_mark = parser->mark;
2985
2986
    /* Eat the intendation spaces and line breaks. */
2987
2988
0
    while (1)
2989
0
    {
2990
        /* Eat the intendation spaces. */
2991
2992
0
        if (!CACHE(parser, 1)) return 0;
2993
2994
0
        while ((!*indent || (int)parser->mark.column < *indent)
2995
0
                && IS_SPACE(parser->buffer)) {
2996
0
            SKIP(parser);
2997
0
            if (!CACHE(parser, 1)) return 0;
2998
0
        }
2999
3000
0
        if ((int)parser->mark.column > max_indent)
3001
0
            max_indent = (int)parser->mark.column;
3002
3003
        /* Check for a tab character messing the intendation. */
3004
3005
0
        if ((!*indent || (int)parser->mark.column < *indent)
3006
0
                && IS_TAB(parser->buffer)) {
3007
0
            return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
3008
0
                    start_mark, "found a tab character where an intendation space is expected");
3009
0
        }
3010
3011
        /* Have we found a non-empty line? */
3012
3013
0
        if (!IS_BREAK(parser->buffer)) break;
3014
3015
        /* Consume the line break. */
3016
3017
0
        if (!CACHE(parser, 2)) return 0;
3018
0
        if (!READ_LINE(parser, *breaks)) return 0;
3019
0
        *end_mark = parser->mark;
3020
0
    }
3021
3022
    /* Determine the indentation level if needed. */
3023
3024
0
    if (!*indent) {
3025
0
        *indent = max_indent;
3026
0
        if (*indent < parser->indent + 1)
3027
0
            *indent = parser->indent + 1;
3028
0
        if (*indent < 1)
3029
0
            *indent = 1;
3030
0
    }
3031
3032
0
   return 1; 
3033
0
}
3034
3035
/*
3036
 * Scan a quoted scalar.
3037
 */
3038
3039
static int
3040
yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
3041
        int single)
3042
0
{
3043
0
    yaml_mark_t start_mark;
3044
0
    yaml_mark_t end_mark;
3045
0
    yaml_string_t string = NULL_STRING;
3046
0
    yaml_string_t leading_break = NULL_STRING;
3047
0
    yaml_string_t trailing_breaks = NULL_STRING;
3048
0
    yaml_string_t whitespaces = NULL_STRING;
3049
0
    int leading_blanks;
3050
3051
0
    if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3052
0
    if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3053
0
    if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3054
0
    if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3055
3056
    /* Eat the left quote. */
3057
3058
0
    start_mark = parser->mark;
3059
3060
0
    SKIP(parser);
3061
3062
    /* Consume the content of the quoted scalar. */
3063
3064
0
    while (1)
3065
0
    {
3066
        /* Check that there are no document indicators at the beginning of the line. */
3067
3068
0
        if (!CACHE(parser, 4)) goto error;
3069
3070
0
        if (parser->mark.column == 0 &&
3071
0
            ((CHECK_AT(parser->buffer, '-', 0) &&
3072
0
              CHECK_AT(parser->buffer, '-', 1) &&
3073
0
              CHECK_AT(parser->buffer, '-', 2)) ||
3074
0
             (CHECK_AT(parser->buffer, '.', 0) &&
3075
0
              CHECK_AT(parser->buffer, '.', 1) &&
3076
0
              CHECK_AT(parser->buffer, '.', 2))) &&
3077
0
            IS_BLANKZ_AT(parser->buffer, 3))
3078
0
        {
3079
0
            yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3080
0
                    start_mark, "found unexpected document indicator");
3081
0
            goto error;
3082
0
        }
3083
3084
        /* Check for EOF. */
3085
3086
0
        if (IS_Z(parser->buffer)) {
3087
0
            yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3088
0
                    start_mark, "found unexpected end of stream");
3089
0
            goto error;
3090
0
        }
3091
3092
        /* Consume non-blank characters. */
3093
3094
0
        if (!CACHE(parser, 2)) goto error;
3095
3096
0
        leading_blanks = 0;
3097
3098
0
        while (!IS_BLANKZ(parser->buffer))
3099
0
        {
3100
            /* Check for an escaped single quote. */
3101
3102
0
            if (single && CHECK_AT(parser->buffer, '\'', 0)
3103
0
                    && CHECK_AT(parser->buffer, '\'', 1))
3104
0
            {
3105
0
                if (!STRING_EXTEND(parser, string)) goto error;
3106
0
                *(string.pointer++) = '\'';
3107
0
                SKIP(parser);
3108
0
                SKIP(parser);
3109
0
            }
3110
3111
            /* Check for the right quote. */
3112
3113
0
            else if (CHECK(parser->buffer, single ? '\'' : '"'))
3114
0
            {
3115
0
                break;
3116
0
            }
3117
3118
            /* Check for an escaped line break. */
3119
3120
0
            else if (!single && CHECK(parser->buffer, '\\')
3121
0
                    && IS_BREAK_AT(parser->buffer, 1))
3122
0
            {
3123
0
                if (!CACHE(parser, 3)) goto error;
3124
0
                SKIP(parser);
3125
0
                SKIP_LINE(parser);
3126
0
                leading_blanks = 1;
3127
0
                break;
3128
0
            }
3129
3130
            /* Check for an escape sequence. */
3131
3132
0
            else if (!single && CHECK(parser->buffer, '\\'))
3133
0
            {
3134
0
                size_t code_length = 0;
3135
3136
0
                if (!STRING_EXTEND(parser, string)) goto error;
3137
3138
                /* Check the escape character. */
3139
3140
0
                switch (parser->buffer.pointer[1])
3141
0
                {
3142
0
                    case '0':
3143
0
                        *(string.pointer++) = '\0';
3144
0
                        break;
3145
3146
0
                    case 'a':
3147
0
                        *(string.pointer++) = '\x07';
3148
0
                        break;
3149
3150
0
                    case 'b':
3151
0
                        *(string.pointer++) = '\x08';
3152
0
                        break;
3153
3154
0
                    case 't':
3155
0
                    case '\t':
3156
0
                        *(string.pointer++) = '\x09';
3157
0
                        break;
3158
3159
0
                    case 'n':
3160
0
                        *(string.pointer++) = '\x0A';
3161
0
                        break;
3162
3163
0
                    case 'v':
3164
0
                        *(string.pointer++) = '\x0B';
3165
0
                        break;
3166
3167
0
                    case 'f':
3168
0
                        *(string.pointer++) = '\x0C';
3169
0
                        break;
3170
3171
0
                    case 'r':
3172
0
                        *(string.pointer++) = '\x0D';
3173
0
                        break;
3174
3175
0
                    case 'e':
3176
0
                        *(string.pointer++) = '\x1B';
3177
0
                        break;
3178
3179
0
                    case ' ':
3180
0
                        *(string.pointer++) = '\x20';
3181
0
                        break;
3182
3183
0
                    case '"':
3184
0
                        *(string.pointer++) = '"';
3185
0
                        break;
3186
3187
0
                    case '\'':
3188
0
                        *(string.pointer++) = '\'';
3189
0
                        break;
3190
3191
0
                    case '\\':
3192
0
                        *(string.pointer++) = '\\';
3193
0
                        break;
3194
3195
0
                    case 'N':   /* NEL (#x85) */
3196
0
                        *(string.pointer++) = '\xC2';
3197
0
                        *(string.pointer++) = '\x85';
3198
0
                        break;
3199
3200
0
                    case '_':   /* #xA0 */
3201
0
                        *(string.pointer++) = '\xC2';
3202
0
                        *(string.pointer++) = '\xA0';
3203
0
                        break;
3204
3205
0
                    case 'L':   /* LS (#x2028) */
3206
0
                        *(string.pointer++) = '\xE2';
3207
0
                        *(string.pointer++) = '\x80';
3208
0
                        *(string.pointer++) = '\xA8';
3209
0
                        break;
3210
3211
0
                    case 'P':   /* PS (#x2029) */
3212
0
                        *(string.pointer++) = '\xE2';
3213
0
                        *(string.pointer++) = '\x80';
3214
0
                        *(string.pointer++) = '\xA9';
3215
0
                        break;
3216
3217
0
                    case 'x':
3218
0
                        code_length = 2;
3219
0
                        break;
3220
3221
0
                    case 'u':
3222
0
                        code_length = 4;
3223
0
                        break;
3224
3225
0
                    case 'U':
3226
0
                        code_length = 8;
3227
0
                        break;
3228
3229
0
                    default:
3230
0
                        yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3231
0
                                start_mark, "found unknown escape character");
3232
0
                        goto error;
3233
0
                }
3234
3235
0
                SKIP(parser);
3236
0
                SKIP(parser);
3237
3238
                /* Consume an arbitrary escape code. */
3239
3240
0
                if (code_length)
3241
0
                {
3242
0
                    unsigned int value = 0;
3243
0
                    size_t k;
3244
3245
                    /* Scan the character value. */
3246
3247
0
                    if (!CACHE(parser, code_length)) goto error;
3248
3249
0
                    for (k = 0; k < code_length; k ++) {
3250
0
                        if (!IS_HEX_AT(parser->buffer, k)) {
3251
0
                            yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3252
0
                                    start_mark, "did not find expected hexdecimal number");
3253
0
                            goto error;
3254
0
                        }
3255
0
                        value = (value << 4) + AS_HEX_AT(parser->buffer, k);
3256
0
                    }
3257
3258
                    /* Check the value and write the character. */
3259
3260
0
                    if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
3261
0
                        yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3262
0
                                start_mark, "found invalid Unicode character escape code");
3263
0
                        goto error;
3264
0
                    }
3265
3266
0
                    if (value <= 0x7F) {
3267
0
                        *(string.pointer++) = value;
3268
0
                    }
3269
0
                    else if (value <= 0x7FF) {
3270
0
                        *(string.pointer++) = 0xC0 + (value >> 6);
3271
0
                        *(string.pointer++) = 0x80 + (value & 0x3F);
3272
0
                    }
3273
0
                    else if (value <= 0xFFFF) {
3274
0
                        *(string.pointer++) = 0xE0 + (value >> 12);
3275
0
                        *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3276
0
                        *(string.pointer++) = 0x80 + (value & 0x3F);
3277
0
                    }
3278
0
                    else {
3279
0
                        *(string.pointer++) = 0xF0 + (value >> 18);
3280
0
                        *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F);
3281
0
                        *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3282
0
                        *(string.pointer++) = 0x80 + (value & 0x3F);
3283
0
                    }
3284
3285
                    /* Advance the pointer. */
3286
3287
0
                    for (k = 0; k < code_length; k ++) {
3288
0
                        SKIP(parser);
3289
0
                    }
3290
0
                }
3291
0
            }
3292
3293
0
            else
3294
0
            {
3295
                /* It is a non-escaped non-blank character. */
3296
3297
0
                if (!READ(parser, string)) goto error;
3298
0
            }
3299
3300
0
            if (!CACHE(parser, 2)) goto error;
3301
0
        }
3302
3303
        /* Check if we are at the end of the scalar. */
3304
3305
0
        if (CHECK(parser->buffer, single ? '\'' : '"'))
3306
0
            break;
3307
3308
        /* Consume blank characters. */
3309
3310
0
        if (!CACHE(parser, 1)) goto error;
3311
3312
0
        while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3313
0
        {
3314
0
            if (IS_BLANK(parser->buffer))
3315
0
            {
3316
                /* Consume a space or a tab character. */
3317
3318
0
                if (!leading_blanks) {
3319
0
                    if (!READ(parser, whitespaces)) goto error;
3320
0
                }
3321
0
                else {
3322
0
                    SKIP(parser);
3323
0
                }
3324
0
            }
3325
0
            else
3326
0
            {
3327
0
                if (!CACHE(parser, 2)) goto error;
3328
3329
                /* Check if it is a first line break. */
3330
3331
0
                if (!leading_blanks)
3332
0
                {
3333
0
                    CLEAR(parser, whitespaces);
3334
0
                    if (!READ_LINE(parser, leading_break)) goto error;
3335
0
                    leading_blanks = 1;
3336
0
                }
3337
0
                else
3338
0
                {
3339
0
                    if (!READ_LINE(parser, trailing_breaks)) goto error;
3340
0
                }
3341
0
            }
3342
0
            if (!CACHE(parser, 1)) goto error;
3343
0
        }
3344
3345
        /* Join the whitespaces or fold line breaks. */
3346
3347
0
        if (leading_blanks)
3348
0
        {
3349
            /* Do we need to fold line breaks? */
3350
3351
0
            if (leading_break.start[0] == '\n') {
3352
0
                if (trailing_breaks.start[0] == '\0') {
3353
0
                    if (!STRING_EXTEND(parser, string)) goto error;
3354
0
                    *(string.pointer++) = ' ';
3355
0
                }
3356
0
                else {
3357
0
                    if (!JOIN(parser, string, trailing_breaks)) goto error;
3358
0
                    CLEAR(parser, trailing_breaks);
3359
0
                }
3360
0
                CLEAR(parser, leading_break);
3361
0
            }
3362
0
            else {
3363
0
                if (!JOIN(parser, string, leading_break)) goto error;
3364
0
                if (!JOIN(parser, string, trailing_breaks)) goto error;
3365
0
                CLEAR(parser, leading_break);
3366
0
                CLEAR(parser, trailing_breaks);
3367
0
            }
3368
0
        }
3369
0
        else
3370
0
        {
3371
0
            if (!JOIN(parser, string, whitespaces)) goto error;
3372
0
            CLEAR(parser, whitespaces);
3373
0
        }
3374
0
    }
3375
3376
    /* Eat the right quote. */
3377
3378
0
    SKIP(parser);
3379
3380
0
    end_mark = parser->mark;
3381
3382
    /* Create a token. */
3383
3384
0
    SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3385
0
            single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE,
3386
0
            start_mark, end_mark);
3387
3388
0
    STRING_DEL(parser, leading_break);
3389
0
    STRING_DEL(parser, trailing_breaks);
3390
0
    STRING_DEL(parser, whitespaces);
3391
3392
0
    return 1;
3393
3394
0
error:
3395
0
    STRING_DEL(parser, string);
3396
0
    STRING_DEL(parser, leading_break);
3397
0
    STRING_DEL(parser, trailing_breaks);
3398
0
    STRING_DEL(parser, whitespaces);
3399
3400
0
    return 0;
3401
0
}
3402
3403
/*
3404
 * Scan a plain scalar.
3405
 */
3406
3407
static int
3408
yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token)
3409
0
{
3410
0
    yaml_mark_t start_mark;
3411
0
    yaml_mark_t end_mark;
3412
0
    yaml_string_t string = NULL_STRING;
3413
0
    yaml_string_t leading_break = NULL_STRING;
3414
0
    yaml_string_t trailing_breaks = NULL_STRING;
3415
0
    yaml_string_t whitespaces = NULL_STRING;
3416
0
    int leading_blanks = 0;
3417
0
    int indent = parser->indent+1;
3418
3419
0
    if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3420
0
    if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3421
0
    if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3422
0
    if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3423
3424
0
    start_mark = end_mark = parser->mark;
3425
3426
    /* Consume the content of the plain scalar. */
3427
3428
0
    while (1)
3429
0
    {
3430
        /* Check for a document indicator. */
3431
3432
0
        if (!CACHE(parser, 4)) goto error;
3433
3434
0
        if (parser->mark.column == 0 &&
3435
0
            ((CHECK_AT(parser->buffer, '-', 0) &&
3436
0
              CHECK_AT(parser->buffer, '-', 1) &&
3437
0
              CHECK_AT(parser->buffer, '-', 2)) ||
3438
0
             (CHECK_AT(parser->buffer, '.', 0) &&
3439
0
              CHECK_AT(parser->buffer, '.', 1) &&
3440
0
              CHECK_AT(parser->buffer, '.', 2))) &&
3441
0
            IS_BLANKZ_AT(parser->buffer, 3)) break;
3442
3443
        /* Check for a comment. */
3444
3445
0
        if (CHECK(parser->buffer, '#'))
3446
0
            break;
3447
3448
        /* Consume non-blank characters. */
3449
3450
0
        while (!IS_BLANKZ(parser->buffer))
3451
0
        {
3452
            /* Check for 'x:x' in the flow context. TODO: Fix the test "spec-08-13". */
3453
3454
0
            if (parser->flow_level
3455
0
                    && CHECK(parser->buffer, ':')
3456
0
                    && !IS_BLANKZ_AT(parser->buffer, 1)) {
3457
0
                yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3458
0
                        start_mark, "found unexpected ':'");
3459
0
                goto error;
3460
0
            }
3461
3462
            /* Check for indicators that may end a plain scalar. */
3463
3464
0
            if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1))
3465
0
                    || (parser->flow_level &&
3466
0
                        (CHECK(parser->buffer, ',') || CHECK(parser->buffer, ':')
3467
0
                         || CHECK(parser->buffer, '?') || CHECK(parser->buffer, '[')
3468
0
                         || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
3469
0
                         || CHECK(parser->buffer, '}'))))
3470
0
                break;
3471
3472
            /* Check if we need to join whitespaces and breaks. */
3473
3474
0
            if (leading_blanks || whitespaces.start != whitespaces.pointer)
3475
0
            {
3476
0
                if (leading_blanks)
3477
0
                {
3478
                    /* Do we need to fold line breaks? */
3479
3480
0
                    if (leading_break.start[0] == '\n') {
3481
0
                        if (trailing_breaks.start[0] == '\0') {
3482
0
                            if (!STRING_EXTEND(parser, string)) goto error;
3483
0
                            *(string.pointer++) = ' ';
3484
0
                        }
3485
0
                        else {
3486
0
                            if (!JOIN(parser, string, trailing_breaks)) goto error;
3487
0
                            CLEAR(parser, trailing_breaks);
3488
0
                        }
3489
0
                        CLEAR(parser, leading_break);
3490
0
                    }
3491
0
                    else {
3492
0
                        if (!JOIN(parser, string, leading_break)) goto error;
3493
0
                        if (!JOIN(parser, string, trailing_breaks)) goto error;
3494
0
                        CLEAR(parser, leading_break);
3495
0
                        CLEAR(parser, trailing_breaks);
3496
0
                    }
3497
3498
0
                    leading_blanks = 0;
3499
0
                }
3500
0
                else
3501
0
                {
3502
0
                    if (!JOIN(parser, string, whitespaces)) goto error;
3503
0
                    CLEAR(parser, whitespaces);
3504
0
                }
3505
0
            }
3506
3507
            /* Copy the character. */
3508
3509
0
            if (!READ(parser, string)) goto error;
3510
3511
0
            end_mark = parser->mark;
3512
3513
0
            if (!CACHE(parser, 2)) goto error;
3514
0
        }
3515
3516
        /* Is it the end? */
3517
3518
0
        if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)))
3519
0
            break;
3520
3521
        /* Consume blank characters. */
3522
3523
0
        if (!CACHE(parser, 1)) goto error;
3524
3525
0
        while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3526
0
        {
3527
0
            if (IS_BLANK(parser->buffer))
3528
0
            {
3529
                /* Check for tab character that abuse intendation. */
3530
3531
0
                if (leading_blanks && (int)parser->mark.column < indent
3532
0
                        && IS_TAB(parser->buffer)) {
3533
0
                    yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3534
0
                            start_mark, "found a tab character that violate intendation");
3535
0
                    goto error;
3536
0
                }
3537
3538
                /* Consume a space or a tab character. */
3539
3540
0
                if (!leading_blanks) {
3541
0
                    if (!READ(parser, whitespaces)) goto error;
3542
0
                }
3543
0
                else {
3544
0
                    SKIP(parser);
3545
0
                }
3546
0
            }
3547
0
            else
3548
0
            {
3549
0
                if (!CACHE(parser, 2)) goto error;
3550
3551
                /* Check if it is a first line break. */
3552
3553
0
                if (!leading_blanks)
3554
0
                {
3555
0
                    CLEAR(parser, whitespaces);
3556
0
                    if (!READ_LINE(parser, leading_break)) goto error;
3557
0
                    leading_blanks = 1;
3558
0
                }
3559
0
                else
3560
0
                {
3561
0
                    if (!READ_LINE(parser, trailing_breaks)) goto error;
3562
0
                }
3563
0
            }
3564
0
            if (!CACHE(parser, 1)) goto error;
3565
0
        }
3566
3567
        /* Check intendation level. */
3568
3569
0
        if (!parser->flow_level && (int)parser->mark.column < indent)
3570
0
            break;
3571
0
    }
3572
3573
    /* Create a token. */
3574
3575
0
    SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3576
0
            YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark);
3577
3578
    /* Note that we change the 'simple_key_allowed' flag. */
3579
3580
0
    if (leading_blanks) {
3581
0
        parser->simple_key_allowed = 1;
3582
0
    }
3583
3584
0
    STRING_DEL(parser, leading_break);
3585
0
    STRING_DEL(parser, trailing_breaks);
3586
0
    STRING_DEL(parser, whitespaces);
3587
3588
0
    return 1;
3589
3590
0
error:
3591
0
    STRING_DEL(parser, string);
3592
0
    STRING_DEL(parser, leading_break);
3593
0
    STRING_DEL(parser, trailing_breaks);
3594
0
    STRING_DEL(parser, whitespaces);
3595
3596
0
    return 0;
3597
0
}
3598