/src/jsonnet/third_party/rapidyaml/rapidyaml/src/c4/yml/parse.hpp
Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef _C4_YML_PARSE_HPP_ |
2 | | #define _C4_YML_PARSE_HPP_ |
3 | | |
4 | | #ifndef _C4_YML_TREE_HPP_ |
5 | | #include "c4/yml/tree.hpp" |
6 | | #endif |
7 | | |
8 | | #ifndef _C4_YML_NODE_HPP_ |
9 | | #include "c4/yml/node.hpp" |
10 | | #endif |
11 | | |
12 | | #ifndef _C4_YML_DETAIL_STACK_HPP_ |
13 | | #include "c4/yml/detail/stack.hpp" |
14 | | #endif |
15 | | |
16 | | #include <stdarg.h> |
17 | | |
18 | | #if defined(_MSC_VER) |
19 | | # pragma warning(push) |
20 | | # pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/) |
21 | | #endif |
22 | | |
23 | | namespace c4 { |
24 | | namespace yml { |
25 | | |
26 | | |
27 | | //----------------------------------------------------------------------------- |
28 | | //----------------------------------------------------------------------------- |
29 | | //----------------------------------------------------------------------------- |
30 | | class RYML_EXPORT Parser |
31 | | { |
32 | | public: |
33 | | |
34 | | Parser(Allocator const& a={}); |
35 | | |
36 | | public: |
37 | | |
38 | | //! create a new YAML tree and parse into its root |
39 | | //! @note aliases and anchors are not resolved. You |
40 | | //! can resolve by calling Tree::resolve() after parsing. |
41 | 0 | Tree parse(csubstr filename, csubstr src) { Tree t; t.reserve(_estimate_capacity(src)); parse(filename, t.copy_to_arena(src), &t, t.root_id()); return t; } |
42 | | //! create a new YAML tree and parse into its root |
43 | | //! @note aliases and anchors are not resolved. You |
44 | | //! can resolve by calling Tree::resolve() after parsing. |
45 | 0 | Tree parse(csubstr filename, substr src) { Tree t; t.reserve(_estimate_capacity(src)); parse(filename, src, &t, t.root_id()); return t; } |
46 | | |
47 | | |
48 | | //! parse with reuse of a YAML tree |
49 | | //! @note aliases and anchors are not resolved. You |
50 | | //! can resolve by calling Tree::resolve() after parsing. |
51 | 0 | void parse(csubstr filename, substr src, Tree *t) { parse(filename, src, t, t->root_id()); } |
52 | | //! parse with reuse of a YAML tree |
53 | | //! @note aliases and anchors are not resolved. You |
54 | | //! can resolve by calling Tree::resolve() after parsing. |
55 | 0 | void parse(csubstr filename, csubstr src, Tree *t) { parse(filename, t->copy_to_arena(src), t, t->root_id()); } |
56 | | |
57 | | |
58 | | //! parse directly into a node |
59 | | //! @note aliases and anchors are not resolved. You |
60 | | //! can resolve by calling Tree::resolve() after parsing. |
61 | | void parse(csubstr filename, substr src, Tree *t, size_t node_id); // this is the workhorse overload; everything else is syntactic candy |
62 | | //! parse directly into a node |
63 | | //! @note aliases and anchors are not resolved. You |
64 | | //! can resolve by calling Tree::resolve() after parsing. |
65 | 0 | void parse(csubstr filename, csubstr src, Tree *t, size_t node_id) { parse(filename, t->copy_to_arena(src), t, node_id); } |
66 | | |
67 | | |
68 | | //! parse directly into a node ref |
69 | | //! @note aliases and anchors are not resolved. You |
70 | | //! can resolve by calling Tree::resolve() after parsing. |
71 | 0 | void parse(csubstr filename, substr src, NodeRef node) { parse(filename, src, node.tree(), node.id()); } |
72 | | //! parse directly into a node ref |
73 | | //! @note aliases and anchors are not resolved. You |
74 | | //! can resolve by calling Tree::resolve() after parsing. |
75 | 0 | void parse(csubstr filename, csubstr src, NodeRef node) { parse(filename, node.tree()->copy_to_arena(src), node.tree(), node.id()); } |
76 | | |
77 | | |
78 | | private: |
79 | | |
80 | | typedef enum { |
81 | | BLOCK_LITERAL, //!< keep newlines (|) |
82 | | BLOCK_FOLD //!< replace newline with single space (>) |
83 | | } BlockStyle_e; |
84 | | |
85 | | typedef enum { |
86 | | CHOMP_CLIP, //!< single newline at end (default) |
87 | | CHOMP_STRIP, //!< no newline at end (-) |
88 | | CHOMP_KEEP //!< all newlines from end (+) |
89 | | } BlockChomp_e; |
90 | | |
91 | | private: |
92 | | |
93 | 0 | static size_t _estimate_capacity(csubstr src) { size_t c = _count_nlines(src); c = c >= 16 ? c : 16; return c; } |
94 | | |
95 | | void _reset(); |
96 | | |
97 | | bool _finished_file() const; |
98 | | bool _finished_line() const; |
99 | | |
100 | | csubstr _peek_next_line(size_t pos=npos) const; |
101 | | bool _advance_to_peeked(); |
102 | | void _scan_line(); |
103 | | |
104 | | csubstr _slurp_doc_scalar(); |
105 | | bool _scan_scalar(csubstr *scalar); |
106 | | csubstr _scan_comment(); |
107 | | csubstr _scan_quoted_scalar(const char q); |
108 | | csubstr _scan_block(); |
109 | | csubstr _scan_ref(); |
110 | | substr _scan_plain_scalar_impl(csubstr currscalar, csubstr peeked_line, size_t indentation); |
111 | | substr _scan_plain_scalar_expl(csubstr currscalar, csubstr peeked_line); |
112 | | substr _scan_complex_key(csubstr currscalar, csubstr peeked_line); |
113 | | csubstr _scan_to_next_nonempty_line(size_t indentation); |
114 | | csubstr _extend_scanned_scalar(csubstr currscalar); |
115 | | |
116 | | csubstr _filter_squot_scalar(substr s); |
117 | | csubstr _filter_dquot_scalar(substr s); |
118 | | csubstr _filter_plain_scalar(substr s, size_t indentation); |
119 | | csubstr _filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e chomp, size_t indentation); |
120 | | substr _filter_whitespace(substr s, size_t indentation=0, bool leading_whitespace=true); |
121 | | |
122 | | void _handle_finished_file(); |
123 | | void _handle_line(); |
124 | | |
125 | | bool _handle_indentation(); |
126 | | |
127 | | bool _handle_unk(); |
128 | | bool _handle_map_expl(); |
129 | | bool _handle_map_impl(); |
130 | | bool _handle_seq_expl(); |
131 | | bool _handle_seq_impl(); |
132 | | bool _handle_top(); |
133 | | bool _handle_key_anchors_and_refs(); |
134 | | bool _handle_val_anchors_and_refs(); |
135 | | bool _handle_types(); |
136 | | |
137 | | |
138 | | void _push_level(bool explicit_flow_chars = false); |
139 | | void _pop_level(); |
140 | | |
141 | | void _start_unk(bool as_child=true); |
142 | | |
143 | | void _start_map(bool as_child=true); |
144 | | void _stop_map(); |
145 | | |
146 | | void _start_seq(bool as_child=true); |
147 | | void _stop_seq(); |
148 | | |
149 | | void _start_seqimap(); |
150 | | void _stop_seqimap(); |
151 | | |
152 | | void _start_doc(bool as_child=true); |
153 | | void _stop_doc(); |
154 | | void _start_new_doc(csubstr rem); |
155 | | void _end_stream(); |
156 | | |
157 | | NodeData* _append_val(csubstr val); |
158 | | NodeData* _append_key_val(csubstr val); |
159 | 0 | inline NodeData* _append_val_null() { return _append_val({}/*"~"*/); } |
160 | 0 | inline NodeData* _append_key_val_null() { return _append_key_val({}/*"~"*/); } |
161 | | bool _rval_dash_start_or_continue_seq(); |
162 | | |
163 | | void _store_scalar(csubstr const& s); |
164 | 0 | void _store_scalar_null() { _store_scalar({}/*"~"*/); } |
165 | | csubstr _consume_scalar(); |
166 | | void _move_scalar_from_top(); |
167 | | |
168 | | void _set_indentation(size_t behind); |
169 | | void _save_indentation(size_t behind=0); |
170 | | |
171 | | void _write_key_anchor(size_t node_id); |
172 | | void _write_val_anchor(size_t node_id); |
173 | | |
174 | | |
175 | | private: |
176 | | |
177 | | static bool _read_decimal(csubstr const& str, size_t *decimal); |
178 | | static size_t _count_nlines(csubstr src); |
179 | | |
180 | | private: |
181 | | |
182 | | typedef enum { |
183 | | RTOP = 0x01 << 0, ///< reading at top level |
184 | | RUNK = 0x01 << 1, ///< reading an unknown: must determine whether scalar, map or seq |
185 | | RMAP = 0x01 << 2, ///< reading a map |
186 | | RSEQ = 0x01 << 3, ///< reading a seq |
187 | | EXPL = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {} |
188 | | CPLX = 0x01 << 5, ///< reading a complex key |
189 | | RKEY = 0x01 << 6, ///< reading a scalar as key |
190 | | RVAL = 0x01 << 7, ///< reading a scalar as val |
191 | | RNXT = 0x01 << 8, ///< read next val or keyval |
192 | | SSCL = 0x01 << 9, ///< there's a scalar stored |
193 | | RSET = 0x01 << 10, ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html |
194 | | NDOC = 0x01 << 11, ///< no document mode. a document has ended and another has not started yet. |
195 | | //! reading an implicit map nested in an explicit seq. |
196 | | //! eg, {key: [key2: value2, key3: value3]} |
197 | | //! is parsed as {key: [{key2: value2}, {key3: value3}]} |
198 | | RSEQIMAP = 0x01 << 12, |
199 | | } State_e; |
200 | | |
201 | | struct LineContents |
202 | | { |
203 | | csubstr full; ///< the full line, including newlines on the right |
204 | | csubstr stripped; ///< the stripped line, excluding newlines on the right |
205 | | csubstr rem; ///< the stripped line remainder; initially starts at the first non-space character |
206 | | size_t indentation; ///< the number of spaces on the beginning of the line |
207 | | |
208 | 0 | LineContents() : full(), stripped(), rem(), indentation() {} |
209 | | |
210 | | void reset(csubstr full_, csubstr stripped_) |
211 | 0 | { |
212 | 0 | full = full_; |
213 | 0 | stripped = stripped_; |
214 | 0 | rem = stripped_; |
215 | | // find the first column where the character is not a space |
216 | 0 | indentation = full.first_not_of(' '); |
217 | 0 | } |
218 | | |
219 | | size_t current_col() const |
220 | 0 | { |
221 | 0 | return current_col(rem); |
222 | 0 | } |
223 | | |
224 | | size_t current_col(csubstr s) const |
225 | 0 | { |
226 | 0 | RYML_ASSERT(s.str >= full.str); |
227 | 0 | RYML_ASSERT(full.is_super(s)); |
228 | 0 | size_t col = static_cast<size_t>(s.str - full.str); |
229 | 0 | return col; |
230 | 0 | } |
231 | | }; |
232 | | |
233 | | struct State |
234 | | { |
235 | | size_t flags; |
236 | | size_t level; |
237 | | size_t node_id; // don't hold a pointer to the node as it will be relocated during tree resizes |
238 | | csubstr scalar; |
239 | | size_t scalar_col; // the column where the scalar (or its quotes) begin |
240 | | |
241 | | Location pos; |
242 | | LineContents line_contents; |
243 | | size_t indref; |
244 | | |
245 | 0 | State() : flags(), level(), node_id(), scalar(), scalar_col(), pos(), line_contents(), indref() {} |
246 | | |
247 | | void reset(const char *file, size_t node_id_) |
248 | 0 | { |
249 | 0 | flags = RUNK|RTOP; |
250 | 0 | level = 0; |
251 | 0 | pos.name = to_csubstr(file); |
252 | 0 | pos.offset = 0; |
253 | 0 | pos.line = 1; |
254 | 0 | pos.col = 1; |
255 | 0 | node_id = node_id_; |
256 | 0 | scalar_col = 0; |
257 | 0 | scalar.clear(); |
258 | 0 | indref = 0; |
259 | 0 | } |
260 | | }; |
261 | | |
262 | | void _line_progressed(size_t ahead); |
263 | | void _line_ended(); |
264 | | |
265 | | void _prepare_pop() |
266 | 0 | { |
267 | 0 | RYML_ASSERT(m_stack.size() > 1); |
268 | 0 | State const& curr = m_stack.top(); |
269 | 0 | State & next = m_stack.top(1); |
270 | 0 | next.pos = curr.pos; |
271 | 0 | next.line_contents = curr.line_contents; |
272 | 0 | next.scalar = curr.scalar; |
273 | 0 | } |
274 | | |
275 | | inline bool _at_line_begin() const |
276 | 0 | { |
277 | 0 | return m_state->line_contents.rem.begin() == m_state->line_contents.full.begin(); |
278 | 0 | } |
279 | | inline bool _at_line_end() const |
280 | 0 | { |
281 | 0 | csubstr r = m_state->line_contents.rem; |
282 | 0 | return r.empty() || r.begins_with(' ', r.len); |
283 | 0 | } |
284 | | |
285 | 0 | inline NodeData * node(State const* s) const { return m_tree->get(s->node_id); } |
286 | 0 | inline NodeData * node(State const& s) const { return m_tree->get(s .node_id); } |
287 | 0 | inline NodeData * node(size_t node_id) const { return m_tree->get( node_id); } |
288 | | |
289 | 0 | inline bool has_all(size_t f) const { return (m_state->flags & f) == f; } |
290 | 0 | inline bool has_any(size_t f) const { return (m_state->flags & f) != 0; } |
291 | 0 | inline bool has_none(size_t f) const { return (m_state->flags & f) == 0; } |
292 | | |
293 | 0 | static inline bool has_all(size_t f, State const* s) { return (s->flags & f) == f; } |
294 | 0 | static inline bool has_any(size_t f, State const* s) { return (s->flags & f) != 0; } |
295 | 0 | static inline bool has_none(size_t f, State const* s) { return (s->flags & f) == 0; } |
296 | | |
297 | 0 | inline void set_flags(size_t f) { set_flags(f, m_state); } |
298 | 0 | inline void add_flags(size_t on) { add_flags(on, m_state); } |
299 | 0 | inline void addrem_flags(size_t on, size_t off) { addrem_flags(on, off, m_state); } |
300 | 0 | inline void rem_flags(size_t off) { rem_flags(off, m_state); } |
301 | | |
302 | | void set_flags(size_t f, State * s); |
303 | | void add_flags(size_t on, State * s); |
304 | | void addrem_flags(size_t on, size_t off, State * s); |
305 | | void rem_flags(size_t off, State * s); |
306 | | |
307 | | private: |
308 | | |
309 | | #ifdef RYML_DBG |
310 | | void _dbg(const char *msg, ...) const; |
311 | | #endif |
312 | | void _err(const char *msg, ...) const; |
313 | | int _fmt_msg(char *buf, int buflen, const char *msg, va_list args) const; |
314 | | static int _prfl(char *buf, int buflen, size_t v); |
315 | | |
316 | | private: |
317 | | |
318 | | csubstr m_file; |
319 | | substr m_buf; |
320 | | |
321 | | size_t m_root_id; |
322 | | Tree * m_tree; |
323 | | |
324 | | detail::stack<State> m_stack; |
325 | | State * m_state; |
326 | | |
327 | | csubstr m_key_tag; |
328 | | csubstr m_val_tag; |
329 | | |
330 | | csubstr m_key_anchor; |
331 | | csubstr m_val_anchor; |
332 | | |
333 | | }; |
334 | | |
335 | | |
336 | | //----------------------------------------------------------------------------- |
337 | | //----------------------------------------------------------------------------- |
338 | | //----------------------------------------------------------------------------- |
339 | | |
340 | 0 | inline Tree parse( substr buf) { Parser np; return np.parse({} , buf); } //!< parse in-situ a modifiable YAML source buffer. |
341 | 0 | inline Tree parse(csubstr filename, substr buf) { Parser np; return np.parse(filename, buf); } //!< parse in-situ a modifiable YAML source buffer, providing a filename for error messages. |
342 | 0 | inline Tree parse( csubstr buf) { Parser np; return np.parse({} , buf); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena. |
343 | 0 | inline Tree parse(csubstr filename, csubstr buf) { Parser np; return np.parse(filename, buf); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. |
344 | | |
345 | 0 | inline void parse( substr buf, Tree *t) { Parser np; np.parse({} , buf, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer |
346 | 0 | inline void parse(csubstr filename, substr buf, Tree *t) { Parser np; np.parse(filename, buf, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. |
347 | 0 | inline void parse( csubstr buf, Tree *t) { Parser np; np.parse({} , buf, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. |
348 | 0 | inline void parse(csubstr filename, csubstr buf, Tree *t) { Parser np; np.parse(filename, buf, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. |
349 | | |
350 | 0 | inline void parse( substr buf, Tree *t, size_t node_id) { Parser np; np.parse({} , buf, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer |
351 | 0 | inline void parse(csubstr filename, substr buf, Tree *t, size_t node_id) { Parser np; np.parse(filename, buf, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. |
352 | 0 | inline void parse( csubstr buf, Tree *t, size_t node_id) { Parser np; np.parse({} , buf, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. |
353 | 0 | inline void parse(csubstr filename, csubstr buf, Tree *t, size_t node_id) { Parser np; np.parse(filename, buf, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. |
354 | | |
355 | 0 | inline void parse( substr buf, NodeRef node) { Parser np; np.parse({} , buf, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer |
356 | 0 | inline void parse(csubstr filename, substr buf, NodeRef node) { Parser np; np.parse(filename, buf, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. |
357 | 0 | inline void parse( csubstr buf, NodeRef node) { Parser np; np.parse({} , buf, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. |
358 | 0 | inline void parse(csubstr filename, csubstr buf, NodeRef node) { Parser np; np.parse(filename, buf, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. |
359 | | |
360 | | } // namespace yml |
361 | | } // namespace c4 |
362 | | |
363 | | #if defined(_MSC_VER) |
364 | | # pragma warning(pop) |
365 | | #endif |
366 | | |
367 | | #endif /* _C4_YML_PARSE_HPP_ */ |