Line | Count | Source (jump to first uncovered line) |
1 | | /** |
2 | | * @file xml.c |
3 | | * @author Radek Krejci <rkrejci@cesnet.cz> |
4 | | * @author Michal Vasko <mvasko@cesnet.cz> |
5 | | * @brief Generic XML parser implementation for libyang |
6 | | * |
7 | | * Copyright (c) 2015 - 2021 CESNET, z.s.p.o. |
8 | | * |
9 | | * This source code is licensed under BSD 3-Clause License (the "License"). |
10 | | * You may not use this file except in compliance with the License. |
11 | | * You may obtain a copy of the License at |
12 | | * |
13 | | * https://opensource.org/licenses/BSD-3-Clause |
14 | | */ |
15 | | |
16 | | #define _GNU_SOURCE |
17 | | |
18 | | #include "xml.h" |
19 | | |
20 | | #include <assert.h> |
21 | | #include <ctype.h> |
22 | | #include <stdint.h> |
23 | | #include <stdlib.h> |
24 | | #include <string.h> |
25 | | |
26 | | #include "compat.h" |
27 | | #include "in_internal.h" |
28 | | #include "ly_common.h" |
29 | | #include "out_internal.h" |
30 | | #include "tree.h" |
31 | | #include "tree_schema_internal.h" |
32 | | |
33 | | /* Move input p by s characters, if EOF log with lyxml_ctx c */ |
34 | | #define move_input(c, s) \ |
35 | 1.07M | ly_in_skip(c->in, s); \ |
36 | 1.07M | LY_CHECK_ERR_RET(!c->in->current[0], LOGVAL(c->ctx, LY_VCODE_EOF), LY_EVALID) |
37 | | |
38 | | /* Ignore whitespaces in the input string p */ |
39 | | #define ign_xmlws(c) \ |
40 | 2.95M | while (is_xmlws(*(c)->in->current)) { \ |
41 | 381k | if (*(c)->in->current == '\n') { \ |
42 | 5.05k | LY_IN_NEW_LINE((c)->in); \ |
43 | 5.05k | } \ |
44 | 381k | ly_in_skip(c->in, 1); \ |
45 | 381k | } |
46 | | |
47 | | static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only, |
48 | | ly_bool *dynamic); |
49 | | |
50 | | /** |
51 | | * @brief Ignore and skip any characters until the delim of the size delim_len is read, including the delim |
52 | | * |
53 | | * @param[in] xmlctx XML parser context to provide input handler and libyang context |
54 | | * @param[in] in input handler to read the data, it is updated only in case the section is correctly terminated. |
55 | | * @param[in] delim Delimiter to detect end of the section. |
56 | | * @param[in] delim_len Length of the delimiter string to use. |
57 | | * @param[in] sectname Section name to refer in error message. |
58 | | */ |
59 | | LY_ERR |
60 | | skip_section(struct lyxml_ctx *xmlctx, const char *delim, size_t delim_len, const char *sectname) |
61 | 488 | { |
62 | 488 | size_t i; |
63 | 488 | register const char *input, *a, *b; |
64 | 488 | uint64_t parsed = 0, newlines = 0; |
65 | | |
66 | 1.80k | for (input = xmlctx->in->current; *input; ++input, ++parsed) { |
67 | 1.71k | if (*input != *delim) { |
68 | 747 | if (*input == '\n') { |
69 | 260 | ++newlines; |
70 | 260 | } |
71 | 747 | continue; |
72 | 747 | } |
73 | 963 | a = input; |
74 | 963 | b = delim; |
75 | 2.78k | for (i = 0; i < delim_len; ++i) { |
76 | 2.39k | if (*a++ != *b++) { |
77 | 569 | break; |
78 | 569 | } |
79 | 2.39k | } |
80 | 963 | if (i == delim_len) { |
81 | | /* delim found */ |
82 | 394 | xmlctx->in->line += newlines; |
83 | 394 | ly_in_skip(xmlctx->in, parsed + delim_len); |
84 | 394 | return LY_SUCCESS; |
85 | 394 | } |
86 | 963 | } |
87 | | |
88 | | /* delim not found, |
89 | | * do not update input handler to refer to the beginning of the section in error message */ |
90 | 94 | LOGVAL(xmlctx->ctx, LY_VCODE_NTERM, sectname); |
91 | 94 | return LY_EVALID; |
92 | 488 | } |
93 | | |
94 | | /** |
95 | | * @brief Check/Get an XML identifier from the input string. |
96 | | * |
97 | | * The identifier must have at least one valid character complying the name start character constraints. |
98 | | * The identifier is terminated by the first character, which does not comply to the name character constraints. |
99 | | * |
100 | | * See https://www.w3.org/TR/xml-names/#NT-NCName |
101 | | * |
102 | | * @param[in] xmlctx XML context. |
103 | | * @param[out] start Pointer to the start of the identifier. |
104 | | * @param[out] end Pointer ot the end of the identifier. |
105 | | * @return LY_ERR value. |
106 | | */ |
107 | | static LY_ERR |
108 | | lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char **end) |
109 | 580k | { |
110 | 580k | const char *s, *in; |
111 | 580k | uint32_t c; |
112 | 580k | size_t parsed; |
113 | 580k | LY_ERR rc; |
114 | | |
115 | 580k | in = s = xmlctx->in->current; |
116 | | |
117 | | /* check NameStartChar (minus colon) */ |
118 | 580k | LY_CHECK_ERR_RET(ly_getutf8(&in, &c, &parsed), |
119 | 580k | LOGVAL(xmlctx->ctx, LY_VCODE_INCHAR, in[0]), |
120 | 580k | LY_EVALID); |
121 | 579k | LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c), |
122 | 579k | LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Identifier \"%s\" starts with an invalid character.", in - parsed), |
123 | 579k | LY_EVALID); |
124 | | |
125 | | /* check rest of the identifier */ |
126 | 8.92M | do { |
127 | | /* move only successfully parsed bytes */ |
128 | 8.92M | ly_in_skip(xmlctx->in, parsed); |
129 | | |
130 | 8.92M | rc = ly_getutf8(&in, &c, &parsed); |
131 | 8.92M | LY_CHECK_ERR_RET(rc, LOGVAL(xmlctx->ctx, LY_VCODE_INCHAR, in[0]), LY_EVALID); |
132 | 8.92M | } while (is_xmlqnamechar(c)); |
133 | | |
134 | 579k | *start = s; |
135 | 579k | *end = xmlctx->in->current; |
136 | 579k | return LY_SUCCESS; |
137 | 579k | } |
138 | | |
139 | | /** |
140 | | * @brief Add namespace definition into XML context. |
141 | | * |
142 | | * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other |
143 | | * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or |
144 | | * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected |
145 | | * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix. |
146 | | * |
147 | | * When leaving processing of a subtree of some element (after it is removed from xmlctx->elements), caller is supposed to call |
148 | | * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context. |
149 | | * |
150 | | * @param[in] xmlctx XML context to work with. |
151 | | * @param[in] prefix Pointer to the namespace prefix. Can be NULL for default namespace. |
152 | | * @param[in] prefix_len Length of the prefix. |
153 | | * @param[in] uri Namespace URI (value) to store directly. Value is always spent. |
154 | | * @return LY_ERR values. |
155 | | */ |
156 | | LY_ERR |
157 | | lyxml_ns_add(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, char *uri) |
158 | 32.8k | { |
159 | 32.8k | LY_ERR rc = LY_SUCCESS; |
160 | 32.8k | struct lyxml_ns *ns; |
161 | 32.8k | uint32_t i; |
162 | | |
163 | | /* check for duplicates */ |
164 | 32.8k | if (xmlctx->ns.count) { |
165 | 3.76k | i = xmlctx->ns.count; |
166 | 9.03k | do { |
167 | 9.03k | --i; |
168 | 9.03k | ns = xmlctx->ns.objs[i]; |
169 | 9.03k | if (ns->depth < xmlctx->elements.count) { |
170 | | /* only namespaces of parents, no need to check further */ |
171 | 1.48k | break; |
172 | 7.54k | } else if (prefix && ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) { |
173 | 837 | if (!strcmp(ns->uri, uri)) { |
174 | | /* exact same prefix and namespace, ignore */ |
175 | 734 | goto cleanup; |
176 | 734 | } |
177 | | |
178 | 103 | LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Duplicate XML NS prefix \"%s\" used for namespaces \"%s\" and \"%s\".", |
179 | 103 | ns->prefix, ns->uri, uri); |
180 | 103 | rc = LY_EVALID; |
181 | 103 | goto cleanup; |
182 | 6.71k | } else if (!prefix && !ns->prefix) { |
183 | 901 | if (!strcmp(ns->uri, uri)) { |
184 | | /* exact same default namespace, ignore */ |
185 | 836 | goto cleanup; |
186 | 836 | } |
187 | | |
188 | 65 | LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Duplicate default XML namespaces \"%s\" and \"%s\".", ns->uri, uri); |
189 | 65 | rc = LY_EVALID; |
190 | 65 | goto cleanup; |
191 | 901 | } |
192 | 9.03k | } while (i); |
193 | 3.76k | } |
194 | | |
195 | 31.1k | ns = malloc(sizeof *ns); |
196 | 31.1k | LY_CHECK_ERR_GOTO(!ns, LOGMEM(xmlctx->ctx); rc = LY_EMEM, cleanup); |
197 | | |
198 | | /* we need to connect the depth of the element where the namespace is defined with the |
199 | | * namespace record to be able to maintain (remove) the record when the parser leaves |
200 | | * (to its sibling or back to the parent) the element where the namespace was defined */ |
201 | 31.1k | ns->depth = xmlctx->elements.count; |
202 | | |
203 | 31.1k | ns->uri = uri; |
204 | 31.1k | if (prefix) { |
205 | 785 | ns->prefix = strndup(prefix, prefix_len); |
206 | 785 | LY_CHECK_ERR_GOTO(!ns->prefix, LOGMEM(xmlctx->ctx); free(ns); rc = LY_EMEM, cleanup); |
207 | 30.3k | } else { |
208 | 30.3k | ns->prefix = NULL; |
209 | 30.3k | } |
210 | | |
211 | 31.1k | rc = ly_set_add(&xmlctx->ns, ns, 1, NULL); |
212 | 31.1k | LY_CHECK_ERR_GOTO(rc, free(ns->prefix); free(ns), cleanup); |
213 | | |
214 | | /* successfully stored */ |
215 | 31.1k | uri = NULL; |
216 | | |
217 | 32.8k | cleanup: |
218 | 32.8k | free(uri); |
219 | 32.8k | return rc; |
220 | 31.1k | } |
221 | | |
222 | | void |
223 | | lyxml_ns_rm(struct lyxml_ctx *xmlctx) |
224 | 449k | { |
225 | 449k | struct lyxml_ns *ns; |
226 | 449k | uint32_t u; |
227 | | |
228 | 449k | if (!xmlctx->ns.count) { |
229 | 0 | return; |
230 | 0 | } |
231 | | |
232 | 449k | u = xmlctx->ns.count; |
233 | 449k | do { |
234 | 449k | --u; |
235 | 449k | ns = (struct lyxml_ns *)xmlctx->ns.objs[u]; |
236 | | |
237 | 449k | if (ns->depth != xmlctx->elements.count + 1) { |
238 | | /* we are done, the namespaces from a single element are supposed to be together */ |
239 | 425k | break; |
240 | 425k | } |
241 | | |
242 | | /* remove the ns structure */ |
243 | 24.0k | free(ns->prefix); |
244 | 24.0k | free(ns->uri); |
245 | 24.0k | free(ns); |
246 | 24.0k | --xmlctx->ns.count; |
247 | 24.0k | } while (u); |
248 | | |
249 | 449k | if (!xmlctx->ns.count) { |
250 | | /* cleanup the xmlctx's namespaces storage */ |
251 | 23.7k | ly_set_erase(&xmlctx->ns, NULL); |
252 | 23.7k | } |
253 | 449k | } |
254 | | |
255 | | const struct lyxml_ns * |
256 | | lyxml_ns_get(const struct ly_set *ns_set, const char *prefix, size_t prefix_len) |
257 | 569k | { |
258 | 569k | struct lyxml_ns *ns; |
259 | 569k | uint32_t u; |
260 | | |
261 | 569k | if (!ns_set->count) { |
262 | 486 | return NULL; |
263 | 486 | } |
264 | | |
265 | 569k | u = ns_set->count; |
266 | 573k | do { |
267 | 573k | --u; |
268 | 573k | ns = (struct lyxml_ns *)ns_set->objs[u]; |
269 | | |
270 | 573k | if (prefix && prefix_len) { |
271 | 147k | if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) { |
272 | 2.29k | return ns; |
273 | 2.29k | } |
274 | 426k | } else if (!ns->prefix) { |
275 | | /* default namespace */ |
276 | 424k | return ns; |
277 | 424k | } |
278 | 573k | } while (u); |
279 | | |
280 | 142k | return NULL; |
281 | 569k | } |
282 | | |
283 | | /** |
284 | | * @brief Skip in the input until EOF or just after the opening tag. |
285 | | * Handles special XML constructs (comment, cdata, doctype). |
286 | | * |
287 | | * @param[in] xmlctx XML context to use. |
288 | | * @return LY_ERR value. |
289 | | */ |
290 | | static LY_ERR |
291 | | lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx) |
292 | 530k | { |
293 | 530k | const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */ |
294 | 530k | const char *endtag, *sectname; |
295 | 530k | size_t endtag_len; |
296 | | |
297 | 531k | while (1) { |
298 | 531k | ign_xmlws(xmlctx); |
299 | | |
300 | 531k | if (xmlctx->in->current[0] == '\0') { |
301 | | /* EOF */ |
302 | 487 | if (xmlctx->elements.count) { |
303 | 56 | LOGVAL(ctx, LY_VCODE_EOF); |
304 | 56 | return LY_EVALID; |
305 | 56 | } |
306 | 431 | return LY_SUCCESS; |
307 | 530k | } else if (xmlctx->in->current[0] != '<') { |
308 | 73 | LOGVAL(ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current), xmlctx->in->current, |
309 | 73 | "element tag start ('<')"); |
310 | 73 | return LY_EVALID; |
311 | 73 | } |
312 | 530k | move_input(xmlctx, 1); |
313 | | |
314 | 529k | if (xmlctx->in->current[0] == '!') { |
315 | 305 | move_input(xmlctx, 1); |
316 | | /* sections to ignore */ |
317 | 299 | if (!strncmp(xmlctx->in->current, "--", 2)) { |
318 | | /* comment */ |
319 | 227 | move_input(xmlctx, 2); |
320 | 226 | sectname = "Comment"; |
321 | 226 | endtag = "-->"; |
322 | 226 | endtag_len = ly_strlen_const("-->"); |
323 | 226 | } else if (!strncmp(xmlctx->in->current, "DOCTYPE", ly_strlen_const("DOCTYPE"))) { |
324 | | /* Document type declaration - not supported */ |
325 | 1 | LOGVAL(ctx, LY_VCODE_NSUPP, "Document Type Declaration"); |
326 | 1 | return LY_EVALID; |
327 | 71 | } else { |
328 | 71 | LOGVAL(ctx, LYVE_SYNTAX, "Unknown XML section \"%.20s\".", &xmlctx->in->current[-2]); |
329 | 71 | return LY_EVALID; |
330 | 71 | } |
331 | 226 | LY_CHECK_RET(skip_section(xmlctx, endtag, endtag_len, sectname)); |
332 | 529k | } else if (xmlctx->in->current[0] == '?') { |
333 | 262 | LY_CHECK_RET(skip_section(xmlctx, "?>", 2, "Declaration")); |
334 | 528k | } else { |
335 | | /* other non-WS character */ |
336 | 528k | break; |
337 | 528k | } |
338 | 529k | } |
339 | | |
340 | 528k | return LY_SUCCESS; |
341 | 530k | } |
342 | | |
343 | | /** |
344 | | * @brief Parse QName. |
345 | | * |
346 | | * @param[in] xmlctx XML context to use. |
347 | | * @param[out] prefix Parsed prefix, may be NULL. |
348 | | * @param[out] prefix_len Length of @p prefix. |
349 | | * @param[out] name Parsed name. |
350 | | * @param[out] name_len Length of @p name. |
351 | | * @return LY_ERR value. |
352 | | */ |
353 | | static LY_ERR |
354 | | lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len) |
355 | 576k | { |
356 | 576k | const char *start, *end; |
357 | | |
358 | 576k | *prefix = NULL; |
359 | 576k | *prefix_len = 0; |
360 | | |
361 | 576k | LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end)); |
362 | 575k | if (end[0] == ':') { |
363 | | /* we have prefixed identifier */ |
364 | 3.91k | *prefix = start; |
365 | 3.91k | *prefix_len = end - start; |
366 | | |
367 | 3.91k | move_input(xmlctx, 1); |
368 | 3.91k | LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end)); |
369 | 3.88k | } |
370 | | |
371 | 575k | *name = start; |
372 | 575k | *name_len = end - start; |
373 | 575k | return LY_SUCCESS; |
374 | 575k | } |
375 | | |
376 | | /** |
377 | | * @brief Prepare buffer for new data. |
378 | | * |
379 | | * @param[in] ctx Context for logging. |
380 | | * @param[in,out] in XML input data. |
381 | | * @param[in,out] offset Current offset in @p in. |
382 | | * @param[in] need_space Needed additional free space that is allocated. |
383 | | * @param[in,out] buf Dynamic buffer. |
384 | | * @param[in,out] len Current @p buf length (used characters). |
385 | | * @param[in,out] size Current @p buf size (allocated characters). |
386 | | * @return LY_ERR value. |
387 | | */ |
388 | | static LY_ERR |
389 | | lyxml_parse_value_use_buf(const struct ly_ctx *ctx, const char **in, size_t *offset, size_t need_space, char **buf, |
390 | | size_t *len, size_t *size) |
391 | 137k | { |
392 | 137k | #define BUFSIZE 24 |
393 | 297k | #define BUFSIZE_STEP 128 |
394 | | |
395 | 137k | if (!*buf) { |
396 | | /* prepare output buffer */ |
397 | 9.47k | *buf = malloc(BUFSIZE); |
398 | 9.47k | LY_CHECK_ERR_RET(!*buf, LOGMEM(ctx), LY_EMEM); |
399 | 9.47k | *size = BUFSIZE; |
400 | 9.47k | } |
401 | | |
402 | | /* allocate needed space */ |
403 | 285k | while (*len + *offset + need_space >= *size) { |
404 | 148k | *buf = ly_realloc(*buf, *size + BUFSIZE_STEP); |
405 | 148k | LY_CHECK_ERR_RET(!*buf, LOGMEM(ctx), LY_EMEM); |
406 | 148k | *size += BUFSIZE_STEP; |
407 | 148k | } |
408 | | |
409 | 137k | if (*offset) { |
410 | | /* store what we have so far */ |
411 | 98.4k | memcpy(&(*buf)[*len], *in, *offset); |
412 | 98.4k | *len += *offset; |
413 | 98.4k | *in += *offset; |
414 | 98.4k | *offset = 0; |
415 | 98.4k | } |
416 | | |
417 | 137k | return LY_SUCCESS; |
418 | | |
419 | 137k | #undef BUFSIZE |
420 | 137k | #undef BUFSIZE_STEP |
421 | 137k | } |
422 | | |
423 | | /** |
424 | | * @brief Parse XML text content (value). |
425 | | * |
426 | | * @param[in] xmlctx XML context to use. |
427 | | * @param[in] endchar Expected character to mark value end. |
428 | | * @param[out] value Parsed value. |
429 | | * @param[out] length Length of @p value. |
430 | | * @param[out] ws_only Whether the value is empty/white-spaces only. |
431 | | * @param[out] dynamic Whether the value was dynamically allocated. |
432 | | * @return LY_ERR value. |
433 | | */ |
434 | | static LY_ERR |
435 | | lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, ly_bool *ws_only, ly_bool *dynamic) |
436 | 123k | { |
437 | 123k | const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */ |
438 | 123k | const char *in = xmlctx->in->current, *start, *in_aux, *p; |
439 | 123k | char *buf = NULL; |
440 | 123k | size_t offset; /* read offset in input buffer */ |
441 | 123k | size_t len; /* length of the output string (write offset in output buffer) */ |
442 | 123k | size_t size = 0; /* size of the output buffer */ |
443 | 123k | uint32_t n; |
444 | 123k | size_t u; |
445 | 123k | ly_bool ws = 1; |
446 | | |
447 | 123k | assert(xmlctx); |
448 | | |
449 | | /* init */ |
450 | 123k | start = in; |
451 | 123k | offset = len = 0; |
452 | | |
453 | | /* parse */ |
454 | 25.4M | while (in[offset]) { |
455 | 25.4M | if (in[offset] == '&') { |
456 | | /* non WS */ |
457 | 132k | ws = 0; |
458 | | |
459 | | /* use buffer and allocate enough for the offset and next character, |
460 | | * we will need 4 bytes at most since we support only the predefined |
461 | | * (one-char) entities and character references */ |
462 | 132k | LY_CHECK_RET(lyxml_parse_value_use_buf(ctx, &in, &offset, 4, &buf, &len, &size)); |
463 | | |
464 | 132k | ++offset; |
465 | 132k | if (in[offset] != '#') { |
466 | | /* entity reference - only predefined references are supported */ |
467 | 24.3k | if (!strncmp(&in[offset], "lt;", ly_strlen_const("lt;"))) { |
468 | 4.93k | buf[len++] = '<'; |
469 | 4.93k | in += ly_strlen_const("<"); |
470 | 19.4k | } else if (!strncmp(&in[offset], "gt;", ly_strlen_const("gt;"))) { |
471 | 661 | buf[len++] = '>'; |
472 | 661 | in += ly_strlen_const(">"); |
473 | 18.7k | } else if (!strncmp(&in[offset], "amp;", ly_strlen_const("amp;"))) { |
474 | 195 | buf[len++] = '&'; |
475 | 195 | in += ly_strlen_const("&"); |
476 | 18.5k | } else if (!strncmp(&in[offset], "apos;", ly_strlen_const("apos;"))) { |
477 | 18.2k | buf[len++] = '\''; |
478 | 18.2k | in += ly_strlen_const("'"); |
479 | 18.2k | } else if (!strncmp(&in[offset], "quot;", ly_strlen_const("quot;"))) { |
480 | 199 | buf[len++] = '\"'; |
481 | 199 | in += ly_strlen_const("""); |
482 | 199 | } else { |
483 | 115 | LOGVAL(ctx, LYVE_SYNTAX, "Entity reference \"%.*s\" not supported, only predefined references allowed.", |
484 | 115 | 10, &in[offset - 1]); |
485 | 115 | goto error; |
486 | 115 | } |
487 | 24.2k | offset = 0; |
488 | 108k | } else { |
489 | 108k | p = &in[offset - 1]; |
490 | | /* character reference */ |
491 | 108k | ++offset; |
492 | 108k | if (isdigit(in[offset])) { |
493 | 5.29k | for (n = 0; isdigit(in[offset]); offset++) { |
494 | 5.29k | n = (LY_BASE_DEC * n) + (in[offset] - '0'); |
495 | 5.29k | } |
496 | 105k | } else if ((in[offset] == 'x') && isxdigit(in[offset + 1])) { |
497 | 309k | for (n = 0, ++offset; isxdigit(in[offset]); offset++) { |
498 | 309k | if (isdigit(in[offset])) { |
499 | 166k | u = (in[offset] - '0'); |
500 | 166k | } else if (in[offset] > 'F') { |
501 | 15.1k | u = LY_BASE_DEC + (in[offset] - 'a'); |
502 | 127k | } else { |
503 | 127k | u = LY_BASE_DEC + (in[offset] - 'A'); |
504 | 127k | } |
505 | 309k | n = (LY_BASE_HEX * n) + u; |
506 | 309k | } |
507 | 105k | } else { |
508 | 5 | LOGVAL(ctx, LYVE_SYNTAX, "Invalid character reference \"%.12s\".", p); |
509 | 5 | goto error; |
510 | | |
511 | 5 | } |
512 | | |
513 | 108k | if (in[offset] != ';') { |
514 | 38 | LOGVAL(ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"); |
515 | 38 | goto error; |
516 | 38 | } |
517 | 108k | ++offset; |
518 | 108k | if (ly_pututf8(&buf[len], n, &u)) { |
519 | 54 | LOGVAL(ctx, LYVE_SYNTAX, "Invalid character reference \"%.12s\" (0x%08" PRIx32 ").", p, n); |
520 | 54 | goto error; |
521 | 54 | } |
522 | 108k | len += u; |
523 | 108k | in += offset; |
524 | 108k | offset = 0; |
525 | 108k | } |
526 | 25.3M | } else if (!strncmp(in + offset, "<![CDATA[", ly_strlen_const("<![CDATA["))) { |
527 | | /* CDATA, find the end */ |
528 | 4.66k | in_aux = strstr(in + offset + ly_strlen_const("<![CDATA["), "]]>"); |
529 | 4.66k | if (!in_aux) { |
530 | 16 | LOGVAL(xmlctx->ctx, LY_VCODE_NTERM, "CDATA"); |
531 | 16 | goto error; |
532 | 16 | } |
533 | 4.65k | u = in_aux - (in + offset + ly_strlen_const("<![CDATA[")); |
534 | | |
535 | | /* use buffer, allocate enough for the whole CDATA */ |
536 | 4.65k | LY_CHECK_RET(lyxml_parse_value_use_buf(ctx, &in, &offset, u, &buf, &len, &size)); |
537 | | |
538 | | /* skip CDATA tag */ |
539 | 4.65k | in += ly_strlen_const("<![CDATA["); |
540 | 4.65k | assert(!offset); |
541 | | |
542 | | /* analyze CDATA for non WS and newline chars */ |
543 | 11.9M | for (n = 0; n < u; ++n) { |
544 | 11.9M | if (in[n] == '\n') { |
545 | 7.76k | LY_IN_NEW_LINE(xmlctx->in); |
546 | 11.9M | } else if (!is_xmlws(in[n])) { |
547 | 11.7M | ws = 0; |
548 | 11.7M | } |
549 | 11.9M | } |
550 | | |
551 | | /* copy CDATA */ |
552 | 4.65k | memcpy(buf + len, in, u); |
553 | 4.65k | len += u; |
554 | | |
555 | | /* move input skipping the end tag */ |
556 | 4.65k | in += u + ly_strlen_const("]]>"); |
557 | 25.3M | } else if (in[offset] == endchar) { |
558 | | /* end of string */ |
559 | 123k | if (buf) { |
560 | | /* realloc exact size string */ |
561 | 9.11k | buf = ly_realloc(buf, len + offset + 1); |
562 | 9.11k | LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM); |
563 | 9.11k | size = len + offset + 1; |
564 | 9.11k | if (offset) { |
565 | 5.07k | memcpy(&buf[len], in, offset); |
566 | 5.07k | } |
567 | | |
568 | | /* set terminating NULL byte */ |
569 | 9.11k | buf[len + offset] = '\0'; |
570 | 9.11k | } |
571 | 123k | len += offset; |
572 | 123k | in += offset; |
573 | 123k | goto success; |
574 | 25.2M | } else { |
575 | 25.2M | if (!is_xmlws(in[offset])) { |
576 | | /* non WS */ |
577 | 25.1M | ws = 0; |
578 | 25.1M | } |
579 | | |
580 | | /* log lines */ |
581 | 25.2M | if (in[offset] == '\n') { |
582 | 23.9k | LY_IN_NEW_LINE(xmlctx->in); |
583 | 23.9k | } |
584 | | |
585 | | /* continue */ |
586 | 25.2M | in_aux = &in[offset]; |
587 | 25.2M | LY_CHECK_ERR_GOTO(ly_getutf8(&in_aux, &n, &u), |
588 | 25.2M | LOGVAL(ctx, LY_VCODE_INCHAR, in[offset]), error); |
589 | 25.2M | offset += u; |
590 | 25.2M | } |
591 | 25.4M | } |
592 | | |
593 | | /* EOF reached before endchar */ |
594 | 235 | LOGVAL(ctx, LY_VCODE_EOF); |
595 | | |
596 | 510 | error: |
597 | 510 | free(buf); |
598 | 510 | return LY_EVALID; |
599 | | |
600 | 123k | success: |
601 | 123k | if (buf) { |
602 | 9.11k | *value = buf; |
603 | 9.11k | *dynamic = 1; |
604 | 114k | } else { |
605 | 114k | *value = (char *)start; |
606 | 114k | *dynamic = 0; |
607 | 114k | } |
608 | 123k | *length = len; |
609 | 123k | *ws_only = ws; |
610 | | |
611 | 123k | xmlctx->in->current = in; |
612 | 123k | return LY_SUCCESS; |
613 | 235 | } |
614 | | |
615 | | /** |
616 | | * @brief Parse XML closing element and match it to a stored starting element. |
617 | | * |
618 | | * @param[in] xmlctx XML context to use. |
619 | | * @param[in] prefix Expected closing element prefix. |
620 | | * @param[in] prefix_len Length of @p prefix. |
621 | | * @param[in] name Expected closing element name. |
622 | | * @param[in] name_len Length of @p name. |
623 | | * @param[in] empty Whether we are parsing a special "empty" element (with joined starting and closing tag) with no value. |
624 | | * @return LY_ERR value. |
625 | | */ |
626 | | static LY_ERR |
627 | | lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len, |
628 | | ly_bool empty) |
629 | 449k | { |
630 | 449k | struct lyxml_elem *e; |
631 | | |
632 | | /* match opening and closing element tags */ |
633 | 449k | if (!xmlctx->elements.count) { |
634 | 4 | LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").", |
635 | 4 | (int)name_len, name); |
636 | 4 | return LY_EVALID; |
637 | 4 | } |
638 | | |
639 | 449k | e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1]; |
640 | 449k | if ((e->prefix_len != prefix_len) || (e->name_len != name_len) || |
641 | 449k | (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) { |
642 | 203 | LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Opening (\"%.*s%s%.*s\") and closing (\"%.*s%s%.*s\") elements tag mismatch.", |
643 | 203 | (int)e->prefix_len, e->prefix ? e->prefix : "", e->prefix ? ":" : "", (int)e->name_len, e->name, |
644 | 203 | (int)prefix_len, prefix ? prefix : "", prefix ? ":" : "", (int)name_len, name); |
645 | 203 | return LY_EVALID; |
646 | 203 | } |
647 | | |
648 | | /* opening and closing element tags matches, remove record from the opening tags list */ |
649 | 449k | ly_set_rm_index(&xmlctx->elements, xmlctx->elements.count - 1, free); |
650 | | |
651 | | /* remove also the namespaces connected with the element */ |
652 | 449k | lyxml_ns_rm(xmlctx); |
653 | | |
654 | | /* skip WS */ |
655 | 449k | ign_xmlws(xmlctx); |
656 | | |
657 | | /* special "<elem/>" element */ |
658 | 449k | if (empty && (xmlctx->in->current[0] == '/')) { |
659 | 384k | move_input(xmlctx, 1); |
660 | 383k | } |
661 | | |
662 | | /* parse closing tag */ |
663 | 448k | if (xmlctx->in->current[0] != '>') { |
664 | 103 | LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current), |
665 | 103 | xmlctx->in->current, "element tag termination ('>')"); |
666 | 103 | return LY_EVALID; |
667 | 103 | } |
668 | | |
669 | | /* move after closing tag without checking for EOF */ |
670 | 448k | ly_in_skip(xmlctx->in, 1); |
671 | | |
672 | 448k | return LY_SUCCESS; |
673 | 448k | } |
674 | | |
675 | | /** |
676 | | * @brief Store parsed opening element and parse any included namespaces. |
677 | | * |
678 | | * @param[in] xmlctx XML context to use. |
679 | | * @param[in] prefix Parsed starting element prefix. |
680 | | * @param[in] prefix_len Length of @p prefix. |
681 | | * @param[in] name Parsed starting element name. |
682 | | * @param[in] name_len Length of @p name. |
683 | | * @return LY_ERR value. |
684 | | */ |
685 | | static LY_ERR |
686 | | lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len) |
687 | 462k | { |
688 | 462k | LY_ERR ret = LY_SUCCESS; |
689 | 462k | struct lyxml_elem *e; |
690 | 462k | const char *prev_input; |
691 | 462k | uint64_t prev_line; |
692 | 462k | char *value; |
693 | 462k | size_t parsed, value_len; |
694 | 462k | ly_bool ws_only, dynamic, is_ns; |
695 | 462k | uint32_t c; |
696 | | |
697 | | /* store element opening tag information */ |
698 | 462k | e = malloc(sizeof *e); |
699 | 462k | LY_CHECK_ERR_RET(!e, LOGMEM(xmlctx->ctx), LY_EMEM); |
700 | 462k | e->name = name; |
701 | 462k | e->prefix = prefix; |
702 | 462k | e->name_len = name_len; |
703 | 462k | e->prefix_len = prefix_len; |
704 | | |
705 | 462k | LY_CHECK_RET(ly_set_add(&xmlctx->elements, e, 1, NULL)); |
706 | 462k | if (xmlctx->elements.count > LY_MAX_BLOCK_DEPTH) { |
707 | 2 | LOGERR(xmlctx->ctx, LY_EINVAL, "The maximum number of open elements has been exceeded."); |
708 | 2 | return LY_EINVAL; |
709 | 2 | } |
710 | | |
711 | | /* skip WS */ |
712 | 462k | ign_xmlws(xmlctx); |
713 | | |
714 | | /* parse and store all namespaces */ |
715 | 462k | prev_input = xmlctx->in->current; |
716 | 462k | prev_line = xmlctx->in->line; |
717 | 462k | is_ns = 1; |
718 | 502k | while ((xmlctx->in->current[0] != '\0') && !(ret = ly_getutf8(&xmlctx->in->current, &c, &parsed))) { |
719 | 501k | if (!is_xmlqnamestartchar(c)) { |
720 | 461k | break; |
721 | 461k | } |
722 | 40.0k | xmlctx->in->current -= parsed; |
723 | | |
724 | | /* parse attribute name */ |
725 | 40.0k | LY_CHECK_GOTO(ret = lyxml_parse_qname(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup); |
726 | | |
727 | | /* parse the value */ |
728 | 39.8k | LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic), cleanup); |
729 | | |
730 | | /* store every namespace */ |
731 | 39.6k | if ((prefix && !ly_strncmp("xmlns", prefix, prefix_len)) || (!prefix && !ly_strncmp("xmlns", name, name_len))) { |
732 | 32.8k | ret = lyxml_ns_add(xmlctx, prefix ? name : NULL, prefix ? name_len : 0, |
733 | 32.8k | dynamic ? value : strndup(value, value_len)); |
734 | 32.8k | dynamic = 0; |
735 | 32.8k | LY_CHECK_GOTO(ret, cleanup); |
736 | 32.7k | } else { |
737 | | /* not a namespace */ |
738 | 6.81k | is_ns = 0; |
739 | 6.81k | } |
740 | 39.5k | if (dynamic) { |
741 | 901 | free(value); |
742 | 901 | } |
743 | | |
744 | | /* skip WS */ |
745 | 39.5k | ign_xmlws(xmlctx); |
746 | | |
747 | 39.5k | if (is_ns) { |
748 | | /* we can actually skip all the namespaces as there is no reason to parse them again */ |
749 | 30.9k | prev_input = xmlctx->in->current; |
750 | 30.9k | prev_line = xmlctx->in->line; |
751 | 30.9k | } |
752 | 39.5k | } |
753 | | |
754 | 462k | cleanup: |
755 | 462k | if (!ret) { |
756 | 462k | xmlctx->in->current = prev_input; |
757 | 462k | xmlctx->in->line = prev_line; |
758 | 462k | } |
759 | 462k | return ret; |
760 | 462k | } |
761 | | |
762 | | /** |
763 | | * @brief Move parser to the attribute content and parse it. |
764 | | * |
765 | | * @param[in] xmlctx XML context to use. |
766 | | * @param[out] value Parsed attribute value. |
767 | | * @param[out] value_len Length of @p value. |
768 | | * @param[out] ws_only Whether the value is empty/white-spaces only. |
769 | | * @param[out] dynamic Whether the value was dynamically allocated. |
770 | | * @return LY_ERR value. |
771 | | */ |
772 | | static LY_ERR |
773 | | lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only, ly_bool *dynamic) |
774 | 46.8k | { |
775 | 46.8k | char quot; |
776 | | |
777 | | /* skip WS */ |
778 | 46.8k | ign_xmlws(xmlctx); |
779 | | |
780 | | /* skip '=' */ |
781 | 46.8k | if (xmlctx->in->current[0] == '\0') { |
782 | 17 | LOGVAL(xmlctx->ctx, LY_VCODE_EOF); |
783 | 17 | return LY_EVALID; |
784 | 46.8k | } else if (xmlctx->in->current[0] != '=') { |
785 | 44 | LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current), |
786 | 44 | xmlctx->in->current, "'='"); |
787 | 44 | return LY_EVALID; |
788 | 44 | } |
789 | 46.8k | move_input(xmlctx, 1); |
790 | | |
791 | | /* skip WS */ |
792 | 46.8k | ign_xmlws(xmlctx); |
793 | | |
794 | | /* find quotes */ |
795 | 46.8k | if (xmlctx->in->current[0] == '\0') { |
796 | 19 | LOGVAL(xmlctx->ctx, LY_VCODE_EOF); |
797 | 19 | return LY_EVALID; |
798 | 46.7k | } else if ((xmlctx->in->current[0] != '\'') && (xmlctx->in->current[0] != '\"')) { |
799 | 18 | LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current), |
800 | 18 | xmlctx->in->current, "either single or double quotation mark"); |
801 | 18 | return LY_EVALID; |
802 | 18 | } |
803 | | |
804 | | /* remember quote */ |
805 | 46.7k | quot = xmlctx->in->current[0]; |
806 | 46.7k | move_input(xmlctx, 1); |
807 | | |
808 | | /* parse attribute value */ |
809 | 46.7k | LY_CHECK_RET(lyxml_parse_value(xmlctx, quot, (char **)value, value_len, ws_only, dynamic)); |
810 | | |
811 | | /* move after ending quote (without checking for EOF) */ |
812 | 46.7k | ly_in_skip(xmlctx->in, 1); |
813 | | |
814 | 46.7k | return LY_SUCCESS; |
815 | 46.7k | } |
816 | | |
817 | | /** |
818 | | * @brief Move parser to the next attribute and parse it. |
819 | | * |
820 | | * @param[in] xmlctx XML context to use. |
821 | | * @param[out] prefix Parsed attribute prefix. |
822 | | * @param[out] prefix_len Length of @p prefix. |
823 | | * @param[out] name Parsed attribute name. |
824 | | * @param[out] name_len Length of @p name. |
825 | | * @return LY_ERR value. |
826 | | */ |
827 | | static LY_ERR |
828 | | lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len) |
829 | 468k | { |
830 | 468k | const char *in; |
831 | 468k | char *value; |
832 | 468k | uint32_t c; |
833 | 468k | size_t parsed, value_len; |
834 | 468k | ly_bool ws_only, dynamic; |
835 | | |
836 | | /* skip WS */ |
837 | 468k | ign_xmlws(xmlctx); |
838 | | |
839 | | /* parse only possible attributes */ |
840 | 469k | while ((xmlctx->in->current[0] != '>') && (xmlctx->in->current[0] != '/')) { |
841 | 8.04k | in = xmlctx->in->current; |
842 | 8.04k | if (in[0] == '\0') { |
843 | 352 | LOGVAL(xmlctx->ctx, LY_VCODE_EOF); |
844 | 352 | return LY_EVALID; |
845 | 7.69k | } else if ((ly_getutf8(&in, &c, &parsed) || !is_xmlqnamestartchar(c))) { |
846 | 451 | LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in - parsed), in - parsed, |
847 | 451 | "element tag end ('>' or '/>') or an attribute"); |
848 | 451 | return LY_EVALID; |
849 | 451 | } |
850 | | |
851 | | /* parse attribute name */ |
852 | 7.24k | LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len)); |
853 | | |
854 | 7.24k | if ((!*prefix || ly_strncmp("xmlns", *prefix, *prefix_len)) && (*prefix || ly_strncmp("xmlns", *name, *name_len))) { |
855 | | /* standard attribute */ |
856 | 5.92k | break; |
857 | 5.92k | } |
858 | | |
859 | | /* namespace, skip it */ |
860 | 1.31k | LY_CHECK_RET(lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic)); |
861 | 1.31k | if (dynamic) { |
862 | 223 | free(value); |
863 | 223 | } |
864 | | |
865 | | /* skip WS */ |
866 | 1.31k | ign_xmlws(xmlctx); |
867 | 1.31k | } |
868 | | |
869 | 467k | return LY_SUCCESS; |
870 | 468k | } |
871 | | |
872 | | /** |
873 | | * @brief Move parser to the next element and parse it. |
874 | | * |
875 | | * @param[in] xmlctx XML context to use. |
876 | | * @param[out] prefix Parsed element prefix. |
877 | | * @param[out] prefix_len Length of @p prefix. |
878 | | * @param[out] name Parse element name. |
879 | | * @param[out] name_len Length of @p name. |
880 | | * @param[out] closing Flag if the element is closing (includes '/'). |
881 | | * @return LY_ERR value. |
882 | | */ |
883 | | static LY_ERR |
884 | | lyxml_next_element(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len, |
885 | | ly_bool *closing) |
886 | 530k | { |
887 | | /* skip WS until EOF or after opening tag '<' */ |
888 | 530k | LY_CHECK_RET(lyxml_skip_until_end_or_after_otag(xmlctx)); |
889 | 529k | if (xmlctx->in->current[0] == '\0') { |
890 | | /* set return values */ |
891 | 431 | *prefix = *name = NULL; |
892 | 431 | *prefix_len = *name_len = 0; |
893 | 431 | return LY_SUCCESS; |
894 | 431 | } |
895 | | |
896 | 528k | if (xmlctx->in->current[0] == '/') { |
897 | 65.1k | move_input(xmlctx, 1); |
898 | 65.1k | *closing = 1; |
899 | 463k | } else { |
900 | 463k | *closing = 0; |
901 | 463k | } |
902 | | |
903 | | /* skip WS */ |
904 | 528k | ign_xmlws(xmlctx); |
905 | | |
906 | | /* parse element name */ |
907 | 528k | LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len)); |
908 | | |
909 | 528k | return LY_SUCCESS; |
910 | 528k | } |
911 | | |
912 | | LY_ERR |
913 | | lyxml_ctx_new(const struct ly_ctx *ctx, struct ly_in *in, struct lyxml_ctx **xmlctx_p) |
914 | 7.87k | { |
915 | 7.87k | LY_ERR ret = LY_SUCCESS; |
916 | 7.87k | struct lyxml_ctx *xmlctx; |
917 | 7.87k | ly_bool closing; |
918 | | |
919 | | /* new context */ |
920 | 7.87k | xmlctx = calloc(1, sizeof *xmlctx); |
921 | 7.87k | LY_CHECK_ERR_RET(!xmlctx, LOGMEM(ctx), LY_EMEM); |
922 | 7.87k | xmlctx->ctx = ctx; |
923 | 7.87k | xmlctx->in = in; |
924 | | |
925 | 7.87k | ly_log_location(NULL, NULL, NULL, in); |
926 | | |
927 | | /* parse next element, if any */ |
928 | 7.87k | LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, |
929 | 7.87k | &xmlctx->name_len, &closing), cleanup); |
930 | | |
931 | 7.16k | if (xmlctx->in->current[0] == '\0') { |
932 | | /* update status */ |
933 | 45 | xmlctx->status = LYXML_END; |
934 | 7.11k | } else if (closing) { |
935 | 1 | LOGVAL(ctx, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").", (int)xmlctx->name_len, xmlctx->name); |
936 | 1 | ret = LY_EVALID; |
937 | 1 | goto cleanup; |
938 | 7.11k | } else { |
939 | | /* open an element, also parses all enclosed namespaces */ |
940 | 7.11k | LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup); |
941 | | |
942 | | /* update status */ |
943 | 6.67k | xmlctx->status = LYXML_ELEMENT; |
944 | 6.67k | } |
945 | | |
946 | 7.87k | cleanup: |
947 | 7.87k | if (ret) { |
948 | 1.15k | lyxml_ctx_free(xmlctx); |
949 | 6.71k | } else { |
950 | 6.71k | *xmlctx_p = xmlctx; |
951 | 6.71k | } |
952 | 7.87k | return ret; |
953 | 7.16k | } |
954 | | |
955 | | LY_ERR |
956 | | lyxml_ctx_next(struct lyxml_ctx *xmlctx) |
957 | 1.38M | { |
958 | 1.38M | LY_ERR ret = LY_SUCCESS; |
959 | 1.38M | ly_bool closing; |
960 | 1.38M | struct lyxml_elem *e; |
961 | | |
962 | | /* if the value was not used, free it */ |
963 | 1.38M | if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) { |
964 | 992 | free((char *)xmlctx->value); |
965 | 992 | xmlctx->value = NULL; |
966 | 992 | xmlctx->dynamic = 0; |
967 | 992 | } |
968 | | |
969 | 1.38M | switch (xmlctx->status) { |
970 | 458k | case LYXML_ELEM_CONTENT: |
971 | | /* content |</elem> */ |
972 | | |
973 | | /* handle special case when empty content for "<elem/>" was returned */ |
974 | 458k | if (xmlctx->in->current[0] == '/') { |
975 | 384k | assert(xmlctx->elements.count); |
976 | 384k | e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1]; |
977 | | |
978 | | /* close the element (parses closing tag) */ |
979 | 384k | ret = lyxml_close_element(xmlctx, e->prefix, e->prefix_len, e->name, e->name_len, 1); |
980 | 384k | LY_CHECK_GOTO(ret, cleanup); |
981 | | |
982 | | /* update status */ |
983 | 383k | xmlctx->status = LYXML_ELEM_CLOSE; |
984 | 383k | break; |
985 | 384k | } |
986 | | /* fall through */ |
987 | 523k | case LYXML_ELEM_CLOSE: |
988 | | /* </elem>| <elem2>* */ |
989 | | |
990 | | /* parse next element, if any */ |
991 | 523k | ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len, &closing); |
992 | 523k | LY_CHECK_GOTO(ret, cleanup); |
993 | | |
994 | 521k | if (xmlctx->in->current[0] == '\0') { |
995 | | /* update status */ |
996 | 386 | xmlctx->status = LYXML_END; |
997 | 520k | } else if (closing) { |
998 | | /* close an element (parses also closing tag) */ |
999 | 65.1k | ret = lyxml_close_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len, 0); |
1000 | 65.1k | LY_CHECK_GOTO(ret, cleanup); |
1001 | | |
1002 | | /* update status */ |
1003 | 64.8k | xmlctx->status = LYXML_ELEM_CLOSE; |
1004 | 455k | } else { |
1005 | | /* open an element, also parses all enclosed namespaces */ |
1006 | 455k | ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len); |
1007 | 455k | LY_CHECK_GOTO(ret, cleanup); |
1008 | | |
1009 | | /* update status */ |
1010 | 455k | xmlctx->status = LYXML_ELEMENT; |
1011 | 455k | } |
1012 | 520k | break; |
1013 | | |
1014 | 520k | case LYXML_ELEMENT: |
1015 | | /* <elem| attr='val'* > content */ |
1016 | 468k | case LYXML_ATTR_CONTENT: |
1017 | | /* attr='val'| attr='val'* > content */ |
1018 | | |
1019 | | /* parse attribute name, if any */ |
1020 | 468k | ret = lyxml_next_attribute(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len); |
1021 | 468k | LY_CHECK_GOTO(ret, cleanup); |
1022 | | |
1023 | 467k | if (xmlctx->in->current[0] == '>') { |
1024 | | /* no attributes but a closing tag */ |
1025 | 76.9k | ly_in_skip(xmlctx->in, 1); |
1026 | 76.9k | if (!xmlctx->in->current[0]) { |
1027 | 45 | LOGVAL(xmlctx->ctx, LY_VCODE_EOF); |
1028 | 45 | ret = LY_EVALID; |
1029 | 45 | goto cleanup; |
1030 | 45 | } |
1031 | | |
1032 | | /* parse element content */ |
1033 | 76.8k | ret = lyxml_parse_value(xmlctx, '<', (char **)&xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only, |
1034 | 76.8k | &xmlctx->dynamic); |
1035 | 76.8k | LY_CHECK_GOTO(ret, cleanup); |
1036 | | |
1037 | 76.4k | if (!xmlctx->value_len) { |
1038 | | /* empty value should by alocated staticaly, but check for in any case */ |
1039 | 21.9k | if (xmlctx->dynamic) { |
1040 | 180 | free((char *) xmlctx->value); |
1041 | 180 | } |
1042 | | /* use empty value, easier to work with */ |
1043 | 21.9k | xmlctx->value = ""; |
1044 | 21.9k | xmlctx->dynamic = 0; |
1045 | 21.9k | } |
1046 | | |
1047 | | /* update status */ |
1048 | 76.4k | xmlctx->status = LYXML_ELEM_CONTENT; |
1049 | 390k | } else if (xmlctx->in->current[0] == '/') { |
1050 | | /* no content but we still return it */ |
1051 | 384k | xmlctx->value = ""; |
1052 | 384k | xmlctx->value_len = 0; |
1053 | 384k | xmlctx->ws_only = 1; |
1054 | 384k | xmlctx->dynamic = 0; |
1055 | | |
1056 | | /* update status */ |
1057 | 384k | xmlctx->status = LYXML_ELEM_CONTENT; |
1058 | 384k | } else { |
1059 | | /* update status */ |
1060 | 5.92k | xmlctx->status = LYXML_ATTRIBUTE; |
1061 | 5.92k | } |
1062 | 466k | break; |
1063 | | |
1064 | 466k | case LYXML_ATTRIBUTE: |
1065 | | /* attr|='val' */ |
1066 | | |
1067 | | /* skip formatting and parse value */ |
1068 | 5.69k | ret = lyxml_next_attr_content(xmlctx, &xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only, &xmlctx->dynamic); |
1069 | 5.69k | LY_CHECK_GOTO(ret, cleanup); |
1070 | | |
1071 | | /* update status */ |
1072 | 5.69k | xmlctx->status = LYXML_ATTR_CONTENT; |
1073 | 5.69k | break; |
1074 | | |
1075 | 0 | case LYXML_END: |
1076 | | /* </elem> |EOF */ |
1077 | | /* nothing to do */ |
1078 | 0 | break; |
1079 | 1.38M | } |
1080 | | |
1081 | 1.38M | cleanup: |
1082 | 1.38M | if (ret) { |
1083 | | /* invalidate context */ |
1084 | 3.72k | xmlctx->status = LYXML_END; |
1085 | 3.72k | } |
1086 | 1.38M | return ret; |
1087 | 1.38M | } |
1088 | | |
1089 | | LY_ERR |
1090 | | lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next) |
1091 | 0 | { |
1092 | 0 | LY_ERR ret = LY_SUCCESS; |
1093 | 0 | const char *prefix, *name, *prev_input; |
1094 | 0 | size_t prefix_len, name_len; |
1095 | 0 | ly_bool closing; |
1096 | |
|
1097 | 0 | prev_input = xmlctx->in->current; |
1098 | |
|
1099 | 0 | switch (xmlctx->status) { |
1100 | 0 | case LYXML_ELEM_CONTENT: |
1101 | 0 | if (xmlctx->in->current[0] == '/') { |
1102 | 0 | *next = LYXML_ELEM_CLOSE; |
1103 | 0 | break; |
1104 | 0 | } |
1105 | | /* fall through */ |
1106 | 0 | case LYXML_ELEM_CLOSE: |
1107 | | /* parse next element, if any */ |
1108 | 0 | ret = lyxml_next_element(xmlctx, &prefix, &prefix_len, &name, &name_len, &closing); |
1109 | 0 | LY_CHECK_GOTO(ret, cleanup); |
1110 | |
|
1111 | 0 | if (xmlctx->in->current[0] == '\0') { |
1112 | 0 | *next = LYXML_END; |
1113 | 0 | } else if (closing) { |
1114 | 0 | *next = LYXML_ELEM_CLOSE; |
1115 | 0 | } else { |
1116 | 0 | *next = LYXML_ELEMENT; |
1117 | 0 | } |
1118 | 0 | break; |
1119 | 0 | case LYXML_ELEMENT: |
1120 | 0 | case LYXML_ATTR_CONTENT: |
1121 | | /* parse attribute name, if any */ |
1122 | 0 | ret = lyxml_next_attribute(xmlctx, &prefix, &prefix_len, &name, &name_len); |
1123 | 0 | LY_CHECK_GOTO(ret, cleanup); |
1124 | |
|
1125 | 0 | if ((xmlctx->in->current[0] == '>') || (xmlctx->in->current[0] == '/')) { |
1126 | 0 | *next = LYXML_ELEM_CONTENT; |
1127 | 0 | } else { |
1128 | 0 | *next = LYXML_ATTRIBUTE; |
1129 | 0 | } |
1130 | 0 | break; |
1131 | 0 | case LYXML_ATTRIBUTE: |
1132 | 0 | *next = LYXML_ATTR_CONTENT; |
1133 | 0 | break; |
1134 | 0 | case LYXML_END: |
1135 | 0 | *next = LYXML_END; |
1136 | 0 | break; |
1137 | 0 | } |
1138 | | |
1139 | 0 | cleanup: |
1140 | 0 | xmlctx->in->current = prev_input; |
1141 | 0 | return ret; |
1142 | 0 | } |
1143 | | |
1144 | | /** |
1145 | | * @brief Free all namespaces in XML context. |
1146 | | * |
1147 | | * @param[in] xmlctx XML context to use. |
1148 | | */ |
1149 | | static void |
1150 | | lyxml_ns_rm_all(struct lyxml_ctx *xmlctx) |
1151 | 7.87k | { |
1152 | 7.87k | struct lyxml_ns *ns; |
1153 | 7.87k | uint32_t i; |
1154 | | |
1155 | 14.9k | for (i = 0; i < xmlctx->ns.count; ++i) { |
1156 | 7.09k | ns = xmlctx->ns.objs[i]; |
1157 | | |
1158 | 7.09k | free(ns->prefix); |
1159 | 7.09k | free(ns->uri); |
1160 | 7.09k | free(ns); |
1161 | 7.09k | } |
1162 | 7.87k | ly_set_erase(&xmlctx->ns, NULL); |
1163 | 7.87k | } |
1164 | | |
1165 | | void |
1166 | | lyxml_ctx_free(struct lyxml_ctx *xmlctx) |
1167 | 9.45k | { |
1168 | 9.45k | if (!xmlctx) { |
1169 | 1.58k | return; |
1170 | 1.58k | } |
1171 | | |
1172 | 7.87k | ly_log_location_revert(0, 0, 0, 1); |
1173 | | |
1174 | 7.87k | if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) { |
1175 | 5 | free((char *)xmlctx->value); |
1176 | 5 | } |
1177 | 7.87k | ly_set_erase(&xmlctx->elements, free); |
1178 | 7.87k | lyxml_ns_rm_all(xmlctx); |
1179 | 7.87k | free(xmlctx); |
1180 | 7.87k | } |
1181 | | |
1182 | | /** |
1183 | | * @brief Duplicate an XML element. |
1184 | | * |
1185 | | * @param[in] elem Element to duplicate. |
1186 | | * @return Element duplicate. |
1187 | | * @return NULL on error. |
1188 | | */ |
1189 | | static struct lyxml_elem * |
1190 | | lyxml_elem_dup(const struct lyxml_elem *elem) |
1191 | 0 | { |
1192 | 0 | struct lyxml_elem *dup; |
1193 | |
|
1194 | 0 | dup = malloc(sizeof *dup); |
1195 | 0 | LY_CHECK_ERR_RET(!dup, LOGMEM(NULL), NULL); |
1196 | |
|
1197 | 0 | memcpy(dup, elem, sizeof *dup); |
1198 | |
|
1199 | 0 | return dup; |
1200 | 0 | } |
1201 | | |
1202 | | /** |
1203 | | * @brief Duplicate an XML namespace. |
1204 | | * |
1205 | | * @param[in] ns Namespace to duplicate. |
1206 | | * @return Namespace duplicate. |
1207 | | * @return NULL on error. |
1208 | | */ |
1209 | | static struct lyxml_ns * |
1210 | | lyxml_ns_dup(const struct lyxml_ns *ns) |
1211 | 0 | { |
1212 | 0 | struct lyxml_ns *dup; |
1213 | |
|
1214 | 0 | dup = malloc(sizeof *dup); |
1215 | 0 | LY_CHECK_ERR_RET(!dup, LOGMEM(NULL), NULL); |
1216 | |
|
1217 | 0 | if (ns->prefix) { |
1218 | 0 | dup->prefix = strdup(ns->prefix); |
1219 | 0 | LY_CHECK_ERR_RET(!dup->prefix, LOGMEM(NULL); free(dup), NULL); |
1220 | 0 | } else { |
1221 | 0 | dup->prefix = NULL; |
1222 | 0 | } |
1223 | 0 | dup->uri = strdup(ns->uri); |
1224 | 0 | LY_CHECK_ERR_RET(!dup->uri, LOGMEM(NULL); free(dup->prefix); free(dup), NULL); |
1225 | 0 | dup->depth = ns->depth; |
1226 | |
|
1227 | 0 | return dup; |
1228 | 0 | } |
1229 | | |
1230 | | LY_ERR |
1231 | | lyxml_ctx_backup(struct lyxml_ctx *xmlctx, struct lyxml_ctx *backup) |
1232 | 0 | { |
1233 | 0 | uint32_t i; |
1234 | | |
1235 | | /* first make shallow copy */ |
1236 | 0 | memcpy(backup, xmlctx, sizeof *backup); |
1237 | |
|
1238 | 0 | if ((xmlctx->status == LYXML_ELEM_CONTENT) && xmlctx->dynamic) { |
1239 | | /* it was backed up, do not free */ |
1240 | 0 | xmlctx->dynamic = 0; |
1241 | 0 | } |
1242 | | |
1243 | | /* backup in */ |
1244 | 0 | backup->b_current = xmlctx->in->current; |
1245 | 0 | backup->b_line = xmlctx->in->line; |
1246 | | |
1247 | | /* duplicate elements */ |
1248 | 0 | backup->elements.objs = malloc(xmlctx->elements.size * sizeof(struct lyxml_elem)); |
1249 | 0 | LY_CHECK_ERR_RET(!backup->elements.objs, LOGMEM(xmlctx->ctx), LY_EMEM); |
1250 | 0 | for (i = 0; i < xmlctx->elements.count; ++i) { |
1251 | 0 | backup->elements.objs[i] = lyxml_elem_dup(xmlctx->elements.objs[i]); |
1252 | 0 | LY_CHECK_RET(!backup->elements.objs[i], LY_EMEM); |
1253 | 0 | } |
1254 | | |
1255 | | /* duplicate ns */ |
1256 | 0 | backup->ns.objs = malloc(xmlctx->ns.size * sizeof(struct lyxml_ns)); |
1257 | 0 | LY_CHECK_ERR_RET(!backup->ns.objs, LOGMEM(xmlctx->ctx), LY_EMEM); |
1258 | 0 | for (i = 0; i < xmlctx->ns.count; ++i) { |
1259 | 0 | backup->ns.objs[i] = lyxml_ns_dup(xmlctx->ns.objs[i]); |
1260 | 0 | LY_CHECK_RET(!backup->ns.objs[i], LY_EMEM); |
1261 | 0 | } |
1262 | | |
1263 | 0 | return LY_SUCCESS; |
1264 | 0 | } |
1265 | | |
1266 | | void |
1267 | | lyxml_ctx_restore(struct lyxml_ctx *xmlctx, struct lyxml_ctx *backup) |
1268 | 0 | { |
1269 | 0 | if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) { |
1270 | | /* free dynamic value */ |
1271 | 0 | free((char *)xmlctx->value); |
1272 | 0 | } |
1273 | | |
1274 | | /* free elements */ |
1275 | 0 | ly_set_erase(&xmlctx->elements, free); |
1276 | | |
1277 | | /* free ns */ |
1278 | 0 | lyxml_ns_rm_all(xmlctx); |
1279 | | |
1280 | | /* restore in */ |
1281 | 0 | xmlctx->in->current = backup->b_current; |
1282 | 0 | xmlctx->in->line = backup->b_line; |
1283 | 0 | backup->in = xmlctx->in; |
1284 | | |
1285 | | /* restore backup */ |
1286 | 0 | memcpy(xmlctx, backup, sizeof *xmlctx); |
1287 | 0 | } |
1288 | | |
1289 | | LY_ERR |
1290 | | lyxml_dump_text(struct ly_out *out, const char *text, ly_bool attribute) |
1291 | 0 | { |
1292 | 0 | LY_ERR ret; |
1293 | |
|
1294 | 0 | if (!text) { |
1295 | 0 | return 0; |
1296 | 0 | } |
1297 | | |
1298 | 0 | for (uint64_t u = 0; text[u]; u++) { |
1299 | 0 | switch (text[u]) { |
1300 | 0 | case '&': |
1301 | 0 | ret = ly_print_(out, "&"); |
1302 | 0 | break; |
1303 | 0 | case '<': |
1304 | 0 | ret = ly_print_(out, "<"); |
1305 | 0 | break; |
1306 | 0 | case '>': |
1307 | | /* not needed, just for readability */ |
1308 | 0 | ret = ly_print_(out, ">"); |
1309 | 0 | break; |
1310 | 0 | case '"': |
1311 | 0 | if (attribute) { |
1312 | 0 | ret = ly_print_(out, """); |
1313 | 0 | break; |
1314 | 0 | } |
1315 | | /* fall through */ |
1316 | 0 | default: |
1317 | 0 | ret = ly_write_(out, &text[u], 1); |
1318 | 0 | break; |
1319 | 0 | } |
1320 | 0 | LY_CHECK_RET(ret); |
1321 | 0 | } |
1322 | | |
1323 | 0 | return LY_SUCCESS; |
1324 | 0 | } |
1325 | | |
1326 | | LY_ERR |
1327 | | lyxml_value_compare(const struct ly_ctx *ctx1, const char *value1, void *val_prefix_data1, |
1328 | | const struct ly_ctx *ctx2, const char *value2, void *val_prefix_data2) |
1329 | 0 | { |
1330 | 0 | const char *value1_iter, *value2_iter; |
1331 | 0 | const char *value1_next, *value2_next; |
1332 | 0 | uint32_t value1_len, value2_len; |
1333 | 0 | ly_bool is_prefix1, is_prefix2; |
1334 | 0 | const struct lys_module *mod1, *mod2; |
1335 | 0 | LY_ERR ret; |
1336 | |
|
1337 | 0 | if (!value1 && !value2) { |
1338 | 0 | return LY_SUCCESS; |
1339 | 0 | } |
1340 | 0 | if ((value1 && !value2) || (!value1 && value2)) { |
1341 | 0 | return LY_ENOT; |
1342 | 0 | } |
1343 | | |
1344 | 0 | if (!ctx2) { |
1345 | 0 | ctx2 = ctx1; |
1346 | 0 | } |
1347 | |
|
1348 | 0 | ret = LY_SUCCESS; |
1349 | 0 | for (value1_iter = value1, value2_iter = value2; |
1350 | 0 | value1_iter && value2_iter; |
1351 | 0 | value1_iter = value1_next, value2_iter = value2_next) { |
1352 | 0 | if ((ret = ly_value_prefix_next(value1_iter, NULL, &value1_len, &is_prefix1, &value1_next))) { |
1353 | 0 | break; |
1354 | 0 | } |
1355 | 0 | if ((ret = ly_value_prefix_next(value2_iter, NULL, &value2_len, &is_prefix2, &value2_next))) { |
1356 | 0 | break; |
1357 | 0 | } |
1358 | | |
1359 | 0 | if (is_prefix1 != is_prefix2) { |
1360 | 0 | ret = LY_ENOT; |
1361 | 0 | break; |
1362 | 0 | } |
1363 | | |
1364 | 0 | if (!is_prefix1) { |
1365 | 0 | if (value1_len != value2_len) { |
1366 | 0 | ret = LY_ENOT; |
1367 | 0 | break; |
1368 | 0 | } |
1369 | 0 | if (strncmp(value1_iter, value2_iter, value1_len)) { |
1370 | 0 | ret = LY_ENOT; |
1371 | 0 | break; |
1372 | 0 | } |
1373 | 0 | continue; |
1374 | 0 | } |
1375 | | |
1376 | 0 | mod1 = mod2 = NULL; |
1377 | 0 | if (val_prefix_data1) { |
1378 | | /* find module of the first prefix, if any */ |
1379 | 0 | mod1 = ly_resolve_prefix(ctx1, value1_iter, value1_len, LY_VALUE_XML, val_prefix_data1); |
1380 | 0 | } |
1381 | 0 | if (val_prefix_data2) { |
1382 | 0 | mod2 = ly_resolve_prefix(ctx2, value2_iter, value2_len, LY_VALUE_XML, val_prefix_data2); |
1383 | 0 | } |
1384 | 0 | if (!mod1 || !mod2) { |
1385 | | /* not a prefix or maps to different namespaces */ |
1386 | 0 | ret = LY_ENOT; |
1387 | 0 | break; |
1388 | 0 | } |
1389 | | |
1390 | 0 | if (mod1->ctx == mod2->ctx) { |
1391 | | /* same contexts */ |
1392 | 0 | if ((mod1->name != mod2->name) || (mod1->revision != mod2->revision)) { |
1393 | 0 | ret = LY_ENOT; |
1394 | 0 | break; |
1395 | 0 | } |
1396 | 0 | } else { |
1397 | | /* different contexts */ |
1398 | 0 | if (strcmp(mod1->name, mod2->name)) { |
1399 | 0 | ret = LY_ENOT; |
1400 | 0 | break; |
1401 | 0 | } |
1402 | | |
1403 | 0 | if (mod1->revision || mod2->revision) { |
1404 | 0 | if (!mod1->revision || !mod2->revision) { |
1405 | 0 | ret = LY_ENOT; |
1406 | 0 | break; |
1407 | 0 | } |
1408 | 0 | if (strcmp(mod1->revision, mod2->revision)) { |
1409 | 0 | ret = LY_ENOT; |
1410 | 0 | break; |
1411 | 0 | } |
1412 | 0 | } |
1413 | 0 | } |
1414 | 0 | } |
1415 | |
|
1416 | 0 | if (value1_iter || value2_iter) { |
1417 | 0 | ret = LY_ENOT; |
1418 | 0 | } |
1419 | |
|
1420 | 0 | return ret; |
1421 | 0 | } |