Coverage Report

Created: 2023-11-27 06:52

/src/jsonnet/third_party/rapidyaml/rapidyaml/src/c4/yml/parse.hpp
Line
Count
Source (jump to first uncovered line)
1
#ifndef _C4_YML_PARSE_HPP_
2
#define _C4_YML_PARSE_HPP_
3
4
#ifndef _C4_YML_TREE_HPP_
5
#include "c4/yml/tree.hpp"
6
#endif
7
8
#ifndef _C4_YML_NODE_HPP_
9
#include "c4/yml/node.hpp"
10
#endif
11
12
#ifndef _C4_YML_DETAIL_STACK_HPP_
13
#include "c4/yml/detail/stack.hpp"
14
#endif
15
16
#include <stdarg.h>
17
18
#if defined(_MSC_VER)
19
#   pragma warning(push)
20
#   pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/)
21
#endif
22
23
namespace c4 {
24
namespace yml {
25
26
27
//-----------------------------------------------------------------------------
28
//-----------------------------------------------------------------------------
29
//-----------------------------------------------------------------------------
30
class RYML_EXPORT Parser
31
{
32
public:
33
34
    Parser(Allocator const& a={});
35
36
public:
37
38
    //! create a new YAML tree and parse into its root
39
    //! @note aliases and anchors are not resolved. You
40
    //! can resolve by calling Tree::resolve() after parsing.
41
0
    Tree parse(csubstr filename, csubstr src) { Tree t; t.reserve(_estimate_capacity(src)); parse(filename, t.copy_to_arena(src), &t, t.root_id()); return t; }
42
    //! create a new YAML tree and parse into its root
43
    //! @note aliases and anchors are not resolved. You
44
    //! can resolve by calling Tree::resolve() after parsing.
45
0
    Tree parse(csubstr filename,  substr src) { Tree t; t.reserve(_estimate_capacity(src)); parse(filename, src, &t, t.root_id()); return t; }
46
47
48
    //! parse with reuse of a YAML tree
49
    //! @note aliases and anchors are not resolved. You
50
    //! can resolve by calling Tree::resolve() after parsing.
51
0
    void parse(csubstr filename,  substr src, Tree *t) { parse(filename, src, t, t->root_id()); }
52
    //! parse with reuse of a YAML tree
53
    //! @note aliases and anchors are not resolved. You
54
    //! can resolve by calling Tree::resolve() after parsing.
55
0
    void parse(csubstr filename, csubstr src, Tree *t) { parse(filename, t->copy_to_arena(src), t, t->root_id()); }
56
57
58
    //! parse directly into a node
59
    //! @note aliases and anchors are not resolved. You
60
    //! can resolve by calling Tree::resolve() after parsing.
61
    void parse(csubstr filename,  substr src, Tree *t, size_t node_id); // this is the workhorse overload; everything else is syntactic candy
62
    //! parse directly into a node
63
    //! @note aliases and anchors are not resolved. You
64
    //! can resolve by calling Tree::resolve() after parsing.
65
0
    void parse(csubstr filename, csubstr src, Tree *t, size_t node_id) { parse(filename, t->copy_to_arena(src), t, node_id); }
66
67
68
    //! parse directly into a node ref
69
    //! @note aliases and anchors are not resolved. You
70
    //! can resolve by calling Tree::resolve() after parsing.
71
0
    void parse(csubstr filename,  substr src, NodeRef node) { parse(filename, src, node.tree(), node.id()); }
72
    //! parse directly into a node ref
73
    //! @note aliases and anchors are not resolved. You
74
    //! can resolve by calling Tree::resolve() after parsing.
75
0
    void parse(csubstr filename, csubstr src, NodeRef node) { parse(filename, node.tree()->copy_to_arena(src), node.tree(), node.id()); }
76
77
78
private:
79
80
    typedef enum {
81
        BLOCK_LITERAL, //!< keep newlines (|)
82
        BLOCK_FOLD     //!< replace newline with single space (>)
83
    } BlockStyle_e;
84
85
    typedef enum {
86
        CHOMP_CLIP,    //!< single newline at end (default)
87
        CHOMP_STRIP,   //!< no newline at end     (-)
88
        CHOMP_KEEP     //!< all newlines from end (+)
89
    } BlockChomp_e;
90
91
private:
92
93
0
    static size_t _estimate_capacity(csubstr src) { size_t c = _count_nlines(src); c = c >= 16 ? c : 16; return c; }
94
95
    void  _reset();
96
97
    bool  _finished_file() const;
98
    bool  _finished_line() const;
99
100
    csubstr _peek_next_line(size_t pos=npos) const;
101
    bool    _advance_to_peeked();
102
    void    _scan_line();
103
104
    csubstr _slurp_doc_scalar();
105
    bool    _scan_scalar(csubstr *scalar);
106
    csubstr _scan_comment();
107
    csubstr _scan_quoted_scalar(const char q);
108
    csubstr _scan_block();
109
    csubstr _scan_ref();
110
    substr  _scan_plain_scalar_impl(csubstr currscalar, csubstr peeked_line, size_t indentation);
111
    substr  _scan_plain_scalar_expl(csubstr currscalar, csubstr peeked_line);
112
    substr  _scan_complex_key(csubstr currscalar, csubstr peeked_line);
113
    csubstr _scan_to_next_nonempty_line(size_t indentation);
114
    csubstr _extend_scanned_scalar(csubstr currscalar);
115
116
    csubstr _filter_squot_scalar(substr s);
117
    csubstr _filter_dquot_scalar(substr s);
118
    csubstr _filter_plain_scalar(substr s, size_t indentation);
119
    csubstr _filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e chomp, size_t indentation);
120
    substr  _filter_whitespace(substr s, size_t indentation=0, bool leading_whitespace=true);
121
122
    void  _handle_finished_file();
123
    void  _handle_line();
124
125
    bool  _handle_indentation();
126
127
    bool  _handle_unk();
128
    bool  _handle_map_expl();
129
    bool  _handle_map_impl();
130
    bool  _handle_seq_expl();
131
    bool  _handle_seq_impl();
132
    bool  _handle_top();
133
    bool  _handle_key_anchors_and_refs();
134
    bool  _handle_val_anchors_and_refs();
135
    bool  _handle_types();
136
137
138
    void  _push_level(bool explicit_flow_chars = false);
139
    void  _pop_level();
140
141
    void  _start_unk(bool as_child=true);
142
143
    void  _start_map(bool as_child=true);
144
    void  _stop_map();
145
146
    void  _start_seq(bool as_child=true);
147
    void  _stop_seq();
148
149
    void  _start_seqimap();
150
    void  _stop_seqimap();
151
152
    void  _start_doc(bool as_child=true);
153
    void  _stop_doc();
154
    void  _start_new_doc(csubstr rem);
155
    void  _end_stream();
156
157
    NodeData* _append_val(csubstr val);
158
    NodeData* _append_key_val(csubstr val);
159
0
    inline NodeData* _append_val_null() { return _append_val({}/*"~"*/); }
160
0
    inline NodeData* _append_key_val_null() { return _append_key_val({}/*"~"*/); }
161
    bool  _rval_dash_start_or_continue_seq();
162
163
    void  _store_scalar(csubstr const& s);
164
0
    void  _store_scalar_null() { _store_scalar({}/*"~"*/); }
165
    csubstr _consume_scalar();
166
    void  _move_scalar_from_top();
167
168
    void  _set_indentation(size_t behind);
169
    void  _save_indentation(size_t behind=0);
170
171
    void  _write_key_anchor(size_t node_id);
172
    void  _write_val_anchor(size_t node_id);
173
174
175
private:
176
177
    static bool   _read_decimal(csubstr const& str, size_t *decimal);
178
    static size_t _count_nlines(csubstr src);
179
180
private:
181
182
    typedef enum {
183
        RTOP = 0x01 <<  0,   ///< reading at top level
184
        RUNK = 0x01 <<  1,   ///< reading an unknown: must determine whether scalar, map or seq
185
        RMAP = 0x01 <<  2,   ///< reading a map
186
        RSEQ = 0x01 <<  3,   ///< reading a seq
187
        EXPL = 0x01 <<  4,   ///< reading is inside explicit flow chars: [] or {}
188
        CPLX = 0x01 <<  5,   ///< reading a complex key
189
        RKEY = 0x01 <<  6,   ///< reading a scalar as key
190
        RVAL = 0x01 <<  7,   ///< reading a scalar as val
191
        RNXT = 0x01 <<  8,   ///< read next val or keyval
192
        SSCL = 0x01 <<  9,   ///< there's a scalar stored
193
        RSET = 0x01 << 10,   ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html
194
        NDOC = 0x01 << 11,   ///< no document mode. a document has ended and another has not started yet.
195
        //! reading an implicit map nested in an explicit seq.
196
        //! eg, {key: [key2: value2, key3: value3]}
197
        //! is parsed as {key: [{key2: value2}, {key3: value3}]}
198
        RSEQIMAP = 0x01 << 12,
199
    } State_e;
200
201
    struct LineContents
202
    {
203
        csubstr  full;        ///< the full line, including newlines on the right
204
        csubstr  stripped;    ///< the stripped line, excluding newlines on the right
205
        csubstr  rem;         ///< the stripped line remainder; initially starts at the first non-space character
206
        size_t   indentation; ///< the number of spaces on the beginning of the line
207
208
0
        LineContents() : full(), stripped(), rem(), indentation() {}
209
210
        void reset(csubstr full_, csubstr stripped_)
211
0
        {
212
0
            full = full_;
213
0
            stripped = stripped_;
214
0
            rem = stripped_;
215
            // find the first column where the character is not a space
216
0
            indentation = full.first_not_of(' ');
217
0
        }
218
219
        size_t current_col() const
220
0
        {
221
0
            return current_col(rem);
222
0
        }
223
224
        size_t current_col(csubstr s) const
225
0
        {
226
0
            RYML_ASSERT(s.str >= full.str);
227
0
            RYML_ASSERT(full.is_super(s));
228
0
            size_t col = static_cast<size_t>(s.str - full.str);
229
0
            return col;
230
0
        }
231
    };
232
233
    struct State
234
    {
235
        size_t       flags;
236
        size_t       level;
237
        size_t       node_id; // don't hold a pointer to the node as it will be relocated during tree resizes
238
        csubstr      scalar;
239
        size_t       scalar_col; // the column where the scalar (or its quotes) begin
240
241
        Location     pos;
242
        LineContents line_contents;
243
        size_t       indref;
244
245
0
        State() : flags(), level(), node_id(), scalar(), scalar_col(), pos(), line_contents(), indref() {}
246
        
247
        void reset(const char *file, size_t node_id_)
248
0
        {
249
0
            flags = RUNK|RTOP;
250
0
            level = 0;
251
0
            pos.name = to_csubstr(file);
252
0
            pos.offset = 0;
253
0
            pos.line = 1;
254
0
            pos.col = 1;
255
0
            node_id = node_id_;
256
0
            scalar_col = 0;
257
0
            scalar.clear();
258
0
            indref = 0;
259
0
        }
260
    };
261
262
    void _line_progressed(size_t ahead);
263
    void _line_ended();
264
265
    void _prepare_pop()
266
0
    {
267
0
        RYML_ASSERT(m_stack.size() > 1);
268
0
        State const& curr = m_stack.top();
269
0
        State      & next = m_stack.top(1);
270
0
        next.pos = curr.pos;
271
0
        next.line_contents = curr.line_contents;
272
0
        next.scalar = curr.scalar;
273
0
    }
274
275
    inline bool _at_line_begin() const
276
0
    {
277
0
        return m_state->line_contents.rem.begin() == m_state->line_contents.full.begin();
278
0
    }
279
    inline bool _at_line_end() const
280
0
    {
281
0
        csubstr r = m_state->line_contents.rem;
282
0
        return r.empty() || r.begins_with(' ', r.len);
283
0
    }
284
285
0
    inline NodeData * node(State const* s) const { return m_tree->get(s->node_id); }
286
0
    inline NodeData * node(State const& s) const { return m_tree->get(s .node_id); }
287
0
    inline NodeData * node(size_t node_id) const { return m_tree->get(   node_id); }
288
289
0
    inline bool has_all(size_t f) const { return (m_state->flags & f) == f; }
290
0
    inline bool has_any(size_t f) const { return (m_state->flags & f) != 0; }
291
0
    inline bool has_none(size_t f) const { return (m_state->flags & f) == 0; }
292
293
0
    static inline bool has_all(size_t f, State const* s) { return (s->flags & f) == f; }
294
0
    static inline bool has_any(size_t f, State const* s) { return (s->flags & f) != 0; }
295
0
    static inline bool has_none(size_t f, State const* s) { return (s->flags & f) == 0; }
296
297
0
    inline void set_flags(size_t f) { set_flags(f, m_state); }
298
0
    inline void add_flags(size_t on) { add_flags(on, m_state); }
299
0
    inline void addrem_flags(size_t on, size_t off) { addrem_flags(on, off, m_state); }
300
0
    inline void rem_flags(size_t off) { rem_flags(off, m_state); }
301
302
    void set_flags(size_t f, State * s);
303
    void add_flags(size_t on, State * s);
304
    void addrem_flags(size_t on, size_t off, State * s);
305
    void rem_flags(size_t off, State * s);
306
307
private:
308
309
#ifdef RYML_DBG
310
    void _dbg(const char *msg, ...) const;
311
#endif
312
    void _err(const char *msg, ...) const;
313
    int  _fmt_msg(char *buf, int buflen, const char *msg, va_list args) const;
314
    static int  _prfl(char *buf, int buflen, size_t v);
315
316
private:
317
318
    csubstr m_file;
319
     substr m_buf;
320
321
    size_t  m_root_id;
322
    Tree *  m_tree;
323
324
    detail::stack<State> m_stack;
325
    State * m_state;
326
327
    csubstr m_key_tag;
328
    csubstr m_val_tag;
329
330
    csubstr m_key_anchor;
331
    csubstr m_val_anchor;
332
333
};
334
335
336
//-----------------------------------------------------------------------------
337
//-----------------------------------------------------------------------------
338
//-----------------------------------------------------------------------------
339
340
0
inline Tree parse(                   substr buf) { Parser np; return np.parse({}      , buf); } //!< parse in-situ a modifiable YAML source buffer.
341
0
inline Tree parse(csubstr filename,  substr buf) { Parser np; return np.parse(filename, buf); } //!< parse in-situ a modifiable YAML source buffer, providing a filename for error messages.
342
0
inline Tree parse(                  csubstr buf) { Parser np; return np.parse({}      , buf); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena.
343
0
inline Tree parse(csubstr filename, csubstr buf) { Parser np; return np.parse(filename, buf); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages.
344
345
0
inline void parse(                   substr buf, Tree *t) { Parser np; np.parse({}      , buf, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer
346
0
inline void parse(csubstr filename,  substr buf, Tree *t) { Parser np; np.parse(filename, buf, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages.
347
0
inline void parse(                  csubstr buf, Tree *t) { Parser np; np.parse({}      , buf, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena.
348
0
inline void parse(csubstr filename, csubstr buf, Tree *t) { Parser np; np.parse(filename, buf, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages.
349
350
0
inline void parse(                   substr buf, Tree *t, size_t node_id) { Parser np; np.parse({}      , buf, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer
351
0
inline void parse(csubstr filename,  substr buf, Tree *t, size_t node_id) { Parser np; np.parse(filename, buf, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages.
352
0
inline void parse(                  csubstr buf, Tree *t, size_t node_id) { Parser np; np.parse({}      , buf, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena.
353
0
inline void parse(csubstr filename, csubstr buf, Tree *t, size_t node_id) { Parser np; np.parse(filename, buf, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages.
354
355
0
inline void parse(                   substr buf, NodeRef node) { Parser np; np.parse({}      , buf, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer
356
0
inline void parse(csubstr filename,  substr buf, NodeRef node) { Parser np; np.parse(filename, buf, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages.
357
0
inline void parse(                  csubstr buf, NodeRef node) { Parser np; np.parse({}      , buf, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena.
358
0
inline void parse(csubstr filename, csubstr buf, NodeRef node) { Parser np; np.parse(filename, buf, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages.
359
360
} // namespace yml
361
} // namespace c4
362
363
#if defined(_MSC_VER)
364
#   pragma warning(pop)
365
#endif
366
367
#endif /* _C4_YML_PARSE_HPP_ */