Coverage Report

Created: 2025-12-12 07:27

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/hermes/external/llvh/include/llvh/Support/YAMLParser.h
Line
Count
Source
1
//===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
//  This is a YAML 1.2 parser.
11
//
12
//  See http://www.yaml.org/spec/1.2/spec.html for the full standard.
13
//
14
//  This currently does not implement the following:
15
//    * Multi-line literal folding.
16
//    * Tag resolution.
17
//    * UTF-16.
18
//    * BOMs anywhere other than the first Unicode scalar value in the file.
19
//
20
//  The most important class here is Stream. This represents a YAML stream with
21
//  0, 1, or many documents.
22
//
23
//  SourceMgr sm;
24
//  StringRef input = getInput();
25
//  yaml::Stream stream(input, sm);
26
//
27
//  for (yaml::document_iterator di = stream.begin(), de = stream.end();
28
//       di != de; ++di) {
29
//    yaml::Node *n = di->getRoot();
30
//    if (n) {
31
//      // Do something with n...
32
//    } else
33
//      break;
34
//  }
35
//
36
//===----------------------------------------------------------------------===//
37
38
#ifndef LLVM_SUPPORT_YAMLPARSER_H
39
#define LLVM_SUPPORT_YAMLPARSER_H
40
41
#include "llvh/ADT/StringRef.h"
42
#include "llvh/Support/Allocator.h"
43
#include "llvh/Support/SMLoc.h"
44
#include <cassert>
45
#include <cstddef>
46
#include <iterator>
47
#include <map>
48
#include <memory>
49
#include <string>
50
#include <system_error>
51
52
namespace llvh {
53
54
class MemoryBufferRef;
55
class SourceMgr;
56
class raw_ostream;
57
class Twine;
58
59
namespace yaml {
60
61
class Document;
62
class document_iterator;
63
class Node;
64
class Scanner;
65
struct Token;
66
67
/// Dump all the tokens in this stream to OS.
68
/// \returns true if there was an error, false otherwise.
69
bool dumpTokens(StringRef Input, raw_ostream &);
70
71
/// Scans all tokens in input without outputting anything. This is used
72
///        for benchmarking the tokenizer.
73
/// \returns true if there was an error, false otherwise.
74
bool scanTokens(StringRef Input);
75
76
/// Escape \a Input for a double quoted scalar; if \p EscapePrintable
77
/// is true, all UTF8 sequences will be escaped, if \p EscapePrintable is
78
/// false, those UTF8 sequences encoding printable unicode scalars will not be
79
/// escaped, but emitted verbatim.
80
std::string escape(StringRef Input, bool EscapePrintable = true);
81
82
/// This class represents a YAML stream potentially containing multiple
83
///        documents.
84
class Stream {
85
public:
86
  /// This keeps a reference to the string referenced by \p Input.
87
  Stream(StringRef Input, SourceMgr &, bool ShowColors = true,
88
         std::error_code *EC = nullptr);
89
90
  Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true,
91
         std::error_code *EC = nullptr);
92
  ~Stream();
93
94
  document_iterator begin();
95
  document_iterator end();
96
  void skip();
97
  bool failed();
98
99
0
  bool validate() {
100
0
    skip();
101
0
    return !failed();
102
0
  }
103
104
  void printError(Node *N, const Twine &Msg);
105
106
private:
107
  friend class Document;
108
109
  std::unique_ptr<Scanner> scanner;
110
  std::unique_ptr<Document> CurrentDoc;
111
};
112
113
/// Abstract base class for all Nodes.
114
class Node {
115
  virtual void anchor();
116
117
public:
118
  enum NodeKind {
119
    NK_Null,
120
    NK_Scalar,
121
    NK_BlockScalar,
122
    NK_KeyValue,
123
    NK_Mapping,
124
    NK_Sequence,
125
    NK_Alias
126
  };
127
128
  Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor,
129
       StringRef Tag);
130
131
  // It's not safe to copy YAML nodes; the document is streamed and the position
132
  // is part of the state.
133
  Node(const Node &) = delete;
134
  void operator=(const Node &) = delete;
135
136
  void *operator new(size_t Size, BumpPtrAllocator &Alloc,
137
0
                     size_t Alignment = 16) noexcept {
138
0
    return Alloc.Allocate(Size, Alignment);
139
0
  }
140
141
  void operator delete(void *Ptr, BumpPtrAllocator &Alloc,
142
0
                       size_t Size) noexcept {
143
0
    Alloc.Deallocate(Ptr, Size);
144
0
  }
145
146
  void operator delete(void *) noexcept = delete;
147
148
  /// Get the value of the anchor attached to this node. If it does not
149
  ///        have one, getAnchor().size() will be 0.
150
0
  StringRef getAnchor() const { return Anchor; }
151
152
  /// Get the tag as it was written in the document. This does not
153
  ///   perform tag resolution.
154
0
  StringRef getRawTag() const { return Tag; }
155
156
  /// Get the verbatium tag for a given Node. This performs tag resoluton
157
  ///   and substitution.
158
  std::string getVerbatimTag() const;
159
160
0
  SMRange getSourceRange() const { return SourceRange; }
161
0
  void setSourceRange(SMRange SR) { SourceRange = SR; }
162
163
  // These functions forward to Document and Scanner.
164
  Token &peekNext();
165
  Token getNext();
166
  Node *parseBlockNode();
167
  BumpPtrAllocator &getAllocator();
168
  void setError(const Twine &Message, Token &Location) const;
169
  bool failed() const;
170
171
0
  virtual void skip() {}
172
173
0
  unsigned int getType() const { return TypeID; }
174
175
protected:
176
  std::unique_ptr<Document> &Doc;
177
  SMRange SourceRange;
178
179
  ~Node() = default;
180
181
private:
182
  unsigned int TypeID;
183
  StringRef Anchor;
184
  /// The tag as typed in the document.
185
  StringRef Tag;
186
};
187
188
/// A null value.
189
///
190
/// Example:
191
///   !!null null
192
class NullNode final : public Node {
193
  void anchor() override;
194
195
public:
196
  NullNode(std::unique_ptr<Document> &D)
197
0
      : Node(NK_Null, D, StringRef(), StringRef()) {}
198
199
0
  static bool classof(const Node *N) { return N->getType() == NK_Null; }
200
};
201
202
/// A scalar node is an opaque datum that can be presented as a
203
///        series of zero or more Unicode scalar values.
204
///
205
/// Example:
206
///   Adena
207
class ScalarNode final : public Node {
208
  void anchor() override;
209
210
public:
211
  ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
212
             StringRef Val)
213
0
      : Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
214
0
    SMLoc Start = SMLoc::getFromPointer(Val.begin());
215
0
    SMLoc End = SMLoc::getFromPointer(Val.end());
216
0
    SourceRange = SMRange(Start, End);
217
0
  }
218
219
  // Return Value without any escaping or folding or other fun YAML stuff. This
220
  // is the exact bytes that are contained in the file (after conversion to
221
  // utf8).
222
0
  StringRef getRawValue() const { return Value; }
223
224
  /// Gets the value of this node as a StringRef.
225
  ///
226
  /// \param Storage is used to store the content of the returned StringRef iff
227
  ///        it requires any modification from how it appeared in the source.
228
  ///        This happens with escaped characters and multi-line literals.
229
  StringRef getValue(SmallVectorImpl<char> &Storage) const;
230
231
0
  static bool classof(const Node *N) {
232
0
    return N->getType() == NK_Scalar;
233
0
  }
234
235
private:
236
  StringRef Value;
237
238
  StringRef unescapeDoubleQuoted(StringRef UnquotedValue,
239
                                 StringRef::size_type Start,
240
                                 SmallVectorImpl<char> &Storage) const;
241
};
242
243
/// A block scalar node is an opaque datum that can be presented as a
244
///        series of zero or more Unicode scalar values.
245
///
246
/// Example:
247
///   |
248
///     Hello
249
///     World
250
class BlockScalarNode final : public Node {
251
  void anchor() override;
252
253
public:
254
  BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
255
                  StringRef Value, StringRef RawVal)
256
0
      : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) {
257
0
    SMLoc Start = SMLoc::getFromPointer(RawVal.begin());
258
0
    SMLoc End = SMLoc::getFromPointer(RawVal.end());
259
0
    SourceRange = SMRange(Start, End);
260
0
  }
261
262
  /// Gets the value of this node as a StringRef.
263
0
  StringRef getValue() const { return Value; }
264
265
0
  static bool classof(const Node *N) {
266
0
    return N->getType() == NK_BlockScalar;
267
0
  }
268
269
private:
270
  StringRef Value;
271
};
272
273
/// A key and value pair. While not technically a Node under the YAML
274
///        representation graph, it is easier to treat them this way.
275
///
276
/// TODO: Consider making this not a child of Node.
277
///
278
/// Example:
279
///   Section: .text
280
class KeyValueNode final : public Node {
281
  void anchor() override;
282
283
public:
284
  KeyValueNode(std::unique_ptr<Document> &D)
285
0
      : Node(NK_KeyValue, D, StringRef(), StringRef()) {}
286
287
  /// Parse and return the key.
288
  ///
289
  /// This may be called multiple times.
290
  ///
291
  /// \returns The key, or nullptr if failed() == true.
292
  Node *getKey();
293
294
  /// Parse and return the value.
295
  ///
296
  /// This may be called multiple times.
297
  ///
298
  /// \returns The value, or nullptr if failed() == true.
299
  Node *getValue();
300
301
0
  void skip() override {
302
0
    if (Node *Key = getKey()) {
303
0
      Key->skip();
304
0
      if (Node *Val = getValue())
305
0
        Val->skip();
306
0
    }
307
0
  }
308
309
0
  static bool classof(const Node *N) {
310
0
    return N->getType() == NK_KeyValue;
311
0
  }
312
313
private:
314
  Node *Key = nullptr;
315
  Node *Value = nullptr;
316
};
317
318
/// This is an iterator abstraction over YAML collections shared by both
319
///        sequences and maps.
320
///
321
/// BaseT must have a ValueT* member named CurrentEntry and a member function
322
/// increment() which must set CurrentEntry to 0 to create an end iterator.
323
template <class BaseT, class ValueT>
324
class basic_collection_iterator {
325
public:
326
  using iterator_category = std::input_iterator_tag;
327
  using value_type = ValueT;
328
  using difference_type = std::ptrdiff_t;
329
  using pointer = value_type *;
330
  using reference = value_type &;
331
332
  basic_collection_iterator() = default;
333
  basic_collection_iterator(BaseT *B) : Base(B) {}
334
335
0
  ValueT *operator->() const {
336
0
    assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
337
0
    return Base->CurrentEntry;
338
0
  }
Unexecuted instantiation: llvh::yaml::basic_collection_iterator<llvh::yaml::MappingNode, llvh::yaml::KeyValueNode>::operator->() const
Unexecuted instantiation: llvh::yaml::basic_collection_iterator<llvh::yaml::SequenceNode, llvh::yaml::Node>::operator->() const
339
340
  ValueT &operator*() const {
341
    assert(Base && Base->CurrentEntry &&
342
           "Attempted to dereference end iterator!");
343
    return *Base->CurrentEntry;
344
  }
345
346
  operator ValueT *() const {
347
    assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
348
    return Base->CurrentEntry;
349
  }
350
351
  /// Note on EqualityComparable:
352
  ///
353
  /// The iterator is not re-entrant,
354
  /// it is meant to be used for parsing YAML on-demand
355
  /// Once iteration started - it can point only to one entry at a time
356
  /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal
357
  /// iff Base and Other.Base are equal.
358
  bool operator==(const basic_collection_iterator &Other) const {
359
    if (Base && (Base == Other.Base)) {
360
      assert((Base->CurrentEntry == Other.Base->CurrentEntry)
361
             && "Equal Bases expected to point to equal Entries");
362
    }
363
364
    return Base == Other.Base;
365
  }
366
367
0
  bool operator!=(const basic_collection_iterator &Other) const {
368
0
    return !(Base == Other.Base);
369
0
  }
Unexecuted instantiation: llvh::yaml::basic_collection_iterator<llvh::yaml::MappingNode, llvh::yaml::KeyValueNode>::operator!=(llvh::yaml::basic_collection_iterator<llvh::yaml::MappingNode, llvh::yaml::KeyValueNode> const&) const
Unexecuted instantiation: llvh::yaml::basic_collection_iterator<llvh::yaml::SequenceNode, llvh::yaml::Node>::operator!=(llvh::yaml::basic_collection_iterator<llvh::yaml::SequenceNode, llvh::yaml::Node> const&) const
370
371
0
  basic_collection_iterator &operator++() {
372
0
    assert(Base && "Attempted to advance iterator past end!");
373
0
    Base->increment();
374
0
    // Create an end iterator.
375
0
    if (!Base->CurrentEntry)
376
0
      Base = nullptr;
377
0
    return *this;
378
0
  }
Unexecuted instantiation: llvh::yaml::basic_collection_iterator<llvh::yaml::MappingNode, llvh::yaml::KeyValueNode>::operator++()
Unexecuted instantiation: llvh::yaml::basic_collection_iterator<llvh::yaml::SequenceNode, llvh::yaml::Node>::operator++()
379
380
private:
381
  BaseT *Base = nullptr;
382
};
383
384
// The following two templates are used for both MappingNode and Sequence Node.
385
template <class CollectionType>
386
0
typename CollectionType::iterator begin(CollectionType &C) {
387
0
  assert(C.IsAtBeginning && "You may only iterate over a collection once!");
388
0
  C.IsAtBeginning = false;
389
0
  typename CollectionType::iterator ret(&C);
390
0
  ++ret;
391
0
  return ret;
392
0
}
Unexecuted instantiation: llvh::yaml::MappingNode::iterator llvh::yaml::begin<llvh::yaml::MappingNode>(llvh::yaml::MappingNode&)
Unexecuted instantiation: llvh::yaml::SequenceNode::iterator llvh::yaml::begin<llvh::yaml::SequenceNode>(llvh::yaml::SequenceNode&)
393
394
0
template <class CollectionType> void skip(CollectionType &C) {
395
0
  // TODO: support skipping from the middle of a parsed collection ;/
396
0
  assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
397
0
  if (C.IsAtBeginning)
398
0
    for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e;
399
0
         ++i)
400
0
      i->skip();
401
0
}
Unexecuted instantiation: void llvh::yaml::skip<llvh::yaml::MappingNode>(llvh::yaml::MappingNode&)
Unexecuted instantiation: void llvh::yaml::skip<llvh::yaml::SequenceNode>(llvh::yaml::SequenceNode&)
402
403
/// Represents a YAML map created from either a block map for a flow map.
404
///
405
/// This parses the YAML stream as increment() is called.
406
///
407
/// Example:
408
///   Name: _main
409
///   Scope: Global
410
class MappingNode final : public Node {
411
  void anchor() override;
412
413
public:
414
  enum MappingType {
415
    MT_Block,
416
    MT_Flow,
417
    MT_Inline ///< An inline mapping node is used for "[key: value]".
418
  };
419
420
  MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
421
              MappingType MT)
422
0
      : Node(NK_Mapping, D, Anchor, Tag), Type(MT) {}
423
424
  friend class basic_collection_iterator<MappingNode, KeyValueNode>;
425
426
  using iterator = basic_collection_iterator<MappingNode, KeyValueNode>;
427
428
  template <class T> friend typename T::iterator yaml::begin(T &);
429
  template <class T> friend void yaml::skip(T &);
430
431
0
  iterator begin() { return yaml::begin(*this); }
432
433
0
  iterator end() { return iterator(); }
434
435
0
  void skip() override { yaml::skip(*this); }
436
437
0
  static bool classof(const Node *N) {
438
0
    return N->getType() == NK_Mapping;
439
0
  }
440
441
private:
442
  MappingType Type;
443
  bool IsAtBeginning = true;
444
  bool IsAtEnd = false;
445
  KeyValueNode *CurrentEntry = nullptr;
446
447
  void increment();
448
};
449
450
/// Represents a YAML sequence created from either a block sequence for a
451
///        flow sequence.
452
///
453
/// This parses the YAML stream as increment() is called.
454
///
455
/// Example:
456
///   - Hello
457
///   - World
458
class SequenceNode final : public Node {
459
  void anchor() override;
460
461
public:
462
  enum SequenceType {
463
    ST_Block,
464
    ST_Flow,
465
    // Use for:
466
    //
467
    // key:
468
    // - val1
469
    // - val2
470
    //
471
    // As a BlockMappingEntry and BlockEnd are not created in this case.
472
    ST_Indentless
473
  };
474
475
  SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
476
               SequenceType ST)
477
0
      : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST) {}
478
479
  friend class basic_collection_iterator<SequenceNode, Node>;
480
481
  using iterator = basic_collection_iterator<SequenceNode, Node>;
482
483
  template <class T> friend typename T::iterator yaml::begin(T &);
484
  template <class T> friend void yaml::skip(T &);
485
486
  void increment();
487
488
0
  iterator begin() { return yaml::begin(*this); }
489
490
0
  iterator end() { return iterator(); }
491
492
0
  void skip() override { yaml::skip(*this); }
493
494
0
  static bool classof(const Node *N) {
495
0
    return N->getType() == NK_Sequence;
496
0
  }
497
498
private:
499
  SequenceType SeqType;
500
  bool IsAtBeginning = true;
501
  bool IsAtEnd = false;
502
  bool WasPreviousTokenFlowEntry = true; // Start with an imaginary ','.
503
  Node *CurrentEntry = nullptr;
504
};
505
506
/// Represents an alias to a Node with an anchor.
507
///
508
/// Example:
509
///   *AnchorName
510
class AliasNode final : public Node {
511
  void anchor() override;
512
513
public:
514
  AliasNode(std::unique_ptr<Document> &D, StringRef Val)
515
0
      : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
516
517
0
  StringRef getName() const { return Name; }
518
  Node *getTarget();
519
520
0
  static bool classof(const Node *N) { return N->getType() == NK_Alias; }
521
522
private:
523
  StringRef Name;
524
};
525
526
/// A YAML Stream is a sequence of Documents. A document contains a root
527
///        node.
528
class Document {
529
public:
530
  Document(Stream &ParentStream);
531
532
  /// Root for parsing a node. Returns a single node.
533
  Node *parseBlockNode();
534
535
  /// Finish parsing the current document and return true if there are
536
  ///        more. Return false otherwise.
537
  bool skip();
538
539
  /// Parse and return the root level node.
540
0
  Node *getRoot() {
541
0
    if (Root)
542
0
      return Root;
543
0
    return Root = parseBlockNode();
544
0
  }
545
546
0
  const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; }
547
548
private:
549
  friend class Node;
550
  friend class document_iterator;
551
552
  /// Stream to read tokens from.
553
  Stream &stream;
554
555
  /// Used to allocate nodes to. All are destroyed without calling their
556
  ///        destructor when the document is destroyed.
557
  BumpPtrAllocator NodeAllocator;
558
559
  /// The root node. Used to support skipping a partially parsed
560
  ///        document.
561
  Node *Root;
562
563
  /// Maps tag prefixes to their expansion.
564
  std::map<StringRef, StringRef> TagMap;
565
566
  Token &peekNext();
567
  Token getNext();
568
  void setError(const Twine &Message, Token &Location) const;
569
  bool failed() const;
570
571
  /// Parse %BLAH directives and return true if any were encountered.
572
  bool parseDirectives();
573
574
  /// Parse %YAML
575
  void parseYAMLDirective();
576
577
  /// Parse %TAG
578
  void parseTAGDirective();
579
580
  /// Consume the next token and error if it is not \a TK.
581
  bool expectToken(int TK);
582
};
583
584
/// Iterator abstraction for Documents over a Stream.
585
class document_iterator {
586
public:
587
  document_iterator() = default;
588
0
  document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {}
589
590
0
  bool operator==(const document_iterator &Other) const {
591
0
    if (isAtEnd() || Other.isAtEnd())
592
0
      return isAtEnd() && Other.isAtEnd();
593
0
594
0
    return Doc == Other.Doc;
595
0
  }
596
0
  bool operator!=(const document_iterator &Other) const {
597
0
    return !(*this == Other);
598
0
  }
599
600
0
  document_iterator operator++() {
601
0
    assert(Doc && "incrementing iterator past the end.");
602
0
    if (!(*Doc)->skip()) {
603
0
      Doc->reset(nullptr);
604
0
    } else {
605
0
      Stream &S = (*Doc)->stream;
606
0
      Doc->reset(new Document(S));
607
0
    }
608
0
    return *this;
609
0
  }
610
611
0
  Document &operator*() { return *Doc->get(); }
612
613
0
  std::unique_ptr<Document> &operator->() { return *Doc; }
614
615
private:
616
0
  bool isAtEnd() const { return !Doc || !*Doc; }
617
618
  std::unique_ptr<Document> *Doc = nullptr;
619
};
620
621
} // end namespace yaml
622
623
} // end namespace llvh
624
625
#endif // LLVM_SUPPORT_YAMLPARSER_H