/src/httpd/srclib/apr/xml/apr_xml.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Licensed to the Apache Software Foundation (ASF) under one or more |
2 | | * contributor license agreements. See the NOTICE file distributed with |
3 | | * this work for additional information regarding copyright ownership. |
4 | | * The ASF licenses this file to You under the Apache License, Version 2.0 |
5 | | * (the "License"); you may not use this file except in compliance with |
6 | | * the License. You may obtain a copy of the License at |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | #include "apr.h" |
18 | | #include "apr_private.h" |
19 | | #include "apr_strings.h" |
20 | | |
21 | | #define APR_WANT_STDIO /* for sprintf() */ |
22 | | #define APR_WANT_STRFUNC |
23 | | #include "apr_want.h" |
24 | | |
25 | | #include "apr_xml.h" |
26 | | typedef void* XML_Parser; |
27 | | typedef int XML_Error; |
28 | | typedef unsigned char XML_Char; |
29 | | #include "apr_xml_internal.h" |
30 | | |
31 | 0 | #define DEBUG_CR "\r\n" |
32 | | |
33 | | static const char APR_KW_xmlns[] = { 0x78, 0x6D, 0x6C, 0x6E, 0x73, '\0' }; |
34 | | static const char APR_KW_xmlns_lang[] = { 0x78, 0x6D, 0x6C, 0x3A, 0x6C, 0x61, 0x6E, 0x67, '\0' }; |
35 | | |
36 | | /* errors related to namespace processing */ |
37 | 0 | #define APR_XML_NS_ERROR_UNKNOWN_PREFIX (-1000) |
38 | 0 | #define APR_XML_NS_ERROR_INVALID_DECL (-1001) |
39 | | |
40 | | /* test for a namespace prefix that begins with [Xx][Mm][Ll] */ |
41 | | #define APR_XML_NS_IS_RESERVED(name) \ |
42 | 0 | ( (name[0] == 0x58 || name[0] == 0x78) && \ |
43 | 0 | (name[1] == 0x4D || name[1] == 0x6D) && \ |
44 | 0 | (name[2] == 0x4C || name[2] == 0x6C) ) |
45 | | |
46 | | |
47 | | /* struct for scoping namespace declarations */ |
48 | | typedef struct apr_xml_ns_scope { |
49 | | const char *prefix; /* prefix used for this ns */ |
50 | | int ns; /* index into namespace table */ |
51 | | int emptyURI; /* the namespace URI is the empty string */ |
52 | | struct apr_xml_ns_scope *next; /* next scoped namespace */ |
53 | | } apr_xml_ns_scope; |
54 | | |
55 | | |
56 | | /* return namespace table index for a given prefix */ |
57 | | static int find_prefix(apr_xml_parser *parser, const char *prefix) |
58 | 0 | { |
59 | 0 | apr_xml_elem *elem = parser->cur_elem; |
60 | | |
61 | | /* |
62 | | ** Walk up the tree, looking for a namespace scope that defines this |
63 | | ** prefix. |
64 | | */ |
65 | 0 | for (; elem; elem = elem->parent) { |
66 | 0 | apr_xml_ns_scope *ns_scope; |
67 | |
|
68 | 0 | for (ns_scope = elem->ns_scope; ns_scope; ns_scope = ns_scope->next) { |
69 | 0 | if (strcmp(prefix, ns_scope->prefix) == 0) { |
70 | 0 | if (ns_scope->emptyURI) { |
71 | | /* |
72 | | ** It is possible to set the default namespace to an |
73 | | ** empty URI string; this resets the default namespace |
74 | | ** to mean "no namespace." We just found the prefix |
75 | | ** refers to an empty URI, so return "no namespace." |
76 | | */ |
77 | 0 | return APR_XML_NS_NONE; |
78 | 0 | } |
79 | | |
80 | 0 | return ns_scope->ns; |
81 | 0 | } |
82 | 0 | } |
83 | 0 | } |
84 | | |
85 | | /* |
86 | | * If the prefix is empty (""), this means that a prefix was not |
87 | | * specified in the element/attribute. The search that was performed |
88 | | * just above did not locate a default namespace URI (which is stored |
89 | | * into ns_scope with an empty prefix). This means the element/attribute |
90 | | * has "no namespace". We have a reserved value for this. |
91 | | */ |
92 | 0 | if (*prefix == '\0') { |
93 | 0 | return APR_XML_NS_NONE; |
94 | 0 | } |
95 | | |
96 | | /* not found */ |
97 | 0 | return APR_XML_NS_ERROR_UNKNOWN_PREFIX; |
98 | 0 | } |
99 | | |
100 | | /* return original prefix given ns index */ |
101 | | static const char * find_prefix_name(const apr_xml_elem *elem, int ns, int parent) |
102 | 0 | { |
103 | | /* |
104 | | ** Walk up the tree, looking for a namespace scope that defines this |
105 | | ** prefix. |
106 | | */ |
107 | 0 | for (; elem; elem = parent ? elem->parent : NULL) { |
108 | 0 | apr_xml_ns_scope *ns_scope = elem->ns_scope; |
109 | |
|
110 | 0 | for (; ns_scope; ns_scope = ns_scope->next) { |
111 | 0 | if (ns_scope->ns == ns) |
112 | 0 | return ns_scope->prefix; |
113 | 0 | } |
114 | 0 | } |
115 | | /* not found */ |
116 | 0 | return ""; |
117 | 0 | } |
118 | | |
119 | | static void start_handler(void *userdata, const char *name, const char **attrs) |
120 | 0 | { |
121 | 0 | apr_xml_parser *parser = userdata; |
122 | 0 | apr_xml_elem *elem; |
123 | 0 | apr_xml_attr *attr; |
124 | 0 | apr_xml_attr *prev; |
125 | 0 | char *colon; |
126 | 0 | const char *quoted; |
127 | 0 | char *elem_name; |
128 | | |
129 | | /* punt once we find an error */ |
130 | 0 | if (parser->error) |
131 | 0 | return; |
132 | | |
133 | 0 | elem = apr_pcalloc(parser->p, sizeof(*elem)); |
134 | | |
135 | | /* prep the element */ |
136 | 0 | elem->name = elem_name = apr_pstrdup(parser->p, name); |
137 | | |
138 | | /* fill in the attributes (note: ends up in reverse order) */ |
139 | 0 | while (attrs && *attrs) { |
140 | 0 | attr = apr_palloc(parser->p, sizeof(*attr)); |
141 | 0 | attr->name = apr_pstrdup(parser->p, *attrs++); |
142 | 0 | attr->value = apr_pstrdup(parser->p, *attrs++); |
143 | 0 | attr->next = elem->attr; |
144 | 0 | elem->attr = attr; |
145 | 0 | } |
146 | | |
147 | | /* hook the element into the tree */ |
148 | 0 | if (parser->cur_elem == NULL) { |
149 | | /* no current element; this also becomes the root */ |
150 | 0 | parser->cur_elem = parser->doc->root = elem; |
151 | 0 | } |
152 | 0 | else { |
153 | | /* this element appeared within the current elem */ |
154 | 0 | elem->parent = parser->cur_elem; |
155 | | |
156 | | /* set up the child/sibling links */ |
157 | 0 | if (elem->parent->last_child == NULL) { |
158 | | /* no first child either */ |
159 | 0 | elem->parent->first_child = elem->parent->last_child = elem; |
160 | 0 | } |
161 | 0 | else { |
162 | | /* hook onto the end of the parent's children */ |
163 | 0 | elem->parent->last_child->next = elem; |
164 | 0 | elem->parent->last_child = elem; |
165 | 0 | } |
166 | | |
167 | | /* this element is now the current element */ |
168 | 0 | parser->cur_elem = elem; |
169 | 0 | } |
170 | | |
171 | | /* scan the attributes for namespace declarations */ |
172 | 0 | for (prev = NULL, attr = elem->attr; |
173 | 0 | attr; |
174 | 0 | attr = attr->next) { |
175 | 0 | if (strncmp(attr->name, APR_KW_xmlns, 5) == 0) { |
176 | 0 | const char *prefix = &attr->name[5]; |
177 | 0 | apr_xml_ns_scope *ns_scope; |
178 | | |
179 | | /* test for xmlns:foo= form and xmlns= form */ |
180 | 0 | if (*prefix == 0x3A) { |
181 | | /* a namespace prefix declaration must have a |
182 | | non-empty value. */ |
183 | 0 | if (attr->value[0] == '\0') { |
184 | 0 | parser->error = APR_XML_NS_ERROR_INVALID_DECL; |
185 | 0 | return; |
186 | 0 | } |
187 | 0 | ++prefix; |
188 | 0 | } |
189 | 0 | else if (*prefix != '\0') { |
190 | | /* advance "prev" since "attr" is still present */ |
191 | 0 | prev = attr; |
192 | 0 | continue; |
193 | 0 | } |
194 | | |
195 | | /* quote the URI before we ever start working with it */ |
196 | 0 | quoted = apr_xml_quote_string(parser->p, attr->value, 1); |
197 | | |
198 | | /* build and insert the new scope */ |
199 | 0 | ns_scope = apr_pcalloc(parser->p, sizeof(*ns_scope)); |
200 | 0 | ns_scope->prefix = prefix; |
201 | 0 | ns_scope->ns = apr_xml_insert_uri(parser->doc->namespaces, quoted); |
202 | 0 | ns_scope->emptyURI = *quoted == '\0'; |
203 | 0 | ns_scope->next = elem->ns_scope; |
204 | 0 | elem->ns_scope = ns_scope; |
205 | | |
206 | | /* remove this attribute from the element */ |
207 | 0 | if (prev == NULL) |
208 | 0 | elem->attr = attr->next; |
209 | 0 | else |
210 | 0 | prev->next = attr->next; |
211 | | |
212 | | /* Note: prev will not be advanced since we just removed "attr" */ |
213 | 0 | } |
214 | 0 | else if (strcmp(attr->name, APR_KW_xmlns_lang) == 0) { |
215 | | /* save away the language (in quoted form) */ |
216 | 0 | elem->lang = apr_xml_quote_string(parser->p, attr->value, 1); |
217 | | |
218 | | /* remove this attribute from the element */ |
219 | 0 | if (prev == NULL) |
220 | 0 | elem->attr = attr->next; |
221 | 0 | else |
222 | 0 | prev->next = attr->next; |
223 | | |
224 | | /* Note: prev will not be advanced since we just removed "attr" */ |
225 | 0 | } |
226 | 0 | else { |
227 | | /* advance "prev" since "attr" is still present */ |
228 | 0 | prev = attr; |
229 | 0 | } |
230 | 0 | } |
231 | | |
232 | | /* |
233 | | ** If an xml:lang attribute didn't exist (lang==NULL), then copy the |
234 | | ** language from the parent element (if present). |
235 | | ** |
236 | | ** NOTE: elem_size() *depends* upon this pointer equality. |
237 | | */ |
238 | 0 | if (elem->lang == NULL && elem->parent != NULL) |
239 | 0 | elem->lang = elem->parent->lang; |
240 | | |
241 | | /* adjust the element's namespace */ |
242 | 0 | colon = strchr(elem_name, 0x3A); |
243 | 0 | if (colon == NULL) { |
244 | | /* |
245 | | * The element is using the default namespace, which will always |
246 | | * be found. Either it will be "no namespace", or a default |
247 | | * namespace URI has been specified at some point. |
248 | | */ |
249 | 0 | elem->ns = find_prefix(parser, ""); |
250 | 0 | } |
251 | 0 | else if (APR_XML_NS_IS_RESERVED(elem->name)) { |
252 | 0 | elem->ns = APR_XML_NS_NONE; |
253 | 0 | } |
254 | 0 | else { |
255 | 0 | *colon = '\0'; |
256 | 0 | elem->ns = find_prefix(parser, elem->name); |
257 | 0 | elem->name = colon + 1; |
258 | |
|
259 | 0 | if (APR_XML_NS_IS_ERROR(elem->ns)) { |
260 | 0 | parser->error = elem->ns; |
261 | 0 | return; |
262 | 0 | } |
263 | 0 | } |
264 | | |
265 | | /* adjust all remaining attributes' namespaces */ |
266 | 0 | for (attr = elem->attr; attr; attr = attr->next) { |
267 | | /* |
268 | | * apr_xml_attr defines this as "const" but we dup'd it, so we |
269 | | * know that we can change it. a bit hacky, but the existing |
270 | | * structure def is best. |
271 | | */ |
272 | 0 | char *attr_name = (char *)attr->name; |
273 | |
|
274 | 0 | colon = strchr(attr_name, 0x3A); |
275 | 0 | if (colon == NULL) { |
276 | | /* |
277 | | * Attributes do NOT use the default namespace. Therefore, |
278 | | * we place them into the "no namespace" category. |
279 | | */ |
280 | 0 | attr->ns = APR_XML_NS_NONE; |
281 | 0 | } |
282 | 0 | else if (APR_XML_NS_IS_RESERVED(attr->name)) { |
283 | 0 | attr->ns = APR_XML_NS_NONE; |
284 | 0 | } |
285 | 0 | else { |
286 | 0 | *colon = '\0'; |
287 | 0 | attr->ns = find_prefix(parser, attr->name); |
288 | 0 | attr->name = colon + 1; |
289 | |
|
290 | 0 | if (APR_XML_NS_IS_ERROR(attr->ns)) { |
291 | 0 | parser->error = attr->ns; |
292 | 0 | return; |
293 | 0 | } |
294 | 0 | } |
295 | 0 | } |
296 | 0 | } |
297 | | |
298 | | static void end_handler(void *userdata, const char *name) |
299 | 0 | { |
300 | 0 | apr_xml_parser *parser = userdata; |
301 | | |
302 | | /* punt once we find an error */ |
303 | 0 | if (parser->error) |
304 | 0 | return; |
305 | | |
306 | | /* pop up one level */ |
307 | 0 | parser->cur_elem = parser->cur_elem->parent; |
308 | 0 | } |
309 | | |
310 | | static void cdata_handler(void *userdata, const char *data, int len) |
311 | 0 | { |
312 | 0 | apr_xml_parser *parser = userdata; |
313 | 0 | apr_xml_elem *elem; |
314 | 0 | apr_text_header *hdr; |
315 | 0 | const char *s; |
316 | | |
317 | | /* punt once we find an error */ |
318 | 0 | if (parser->error) |
319 | 0 | return; |
320 | | |
321 | 0 | elem = parser->cur_elem; |
322 | 0 | s = apr_pstrndup(parser->p, data, len); |
323 | |
|
324 | 0 | if (elem->last_child == NULL) { |
325 | | /* no children yet. this cdata follows the start tag */ |
326 | 0 | hdr = &elem->first_cdata; |
327 | 0 | } |
328 | 0 | else { |
329 | | /* child elements exist. this cdata follows the last child. */ |
330 | 0 | hdr = &elem->last_child->following_cdata; |
331 | 0 | } |
332 | |
|
333 | 0 | apr_text_append(parser->p, hdr, s); |
334 | 0 | } |
335 | | |
336 | | APR_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool) |
337 | 0 | { |
338 | 0 | return apr_xml_parser_create_internal(pool, &start_handler, &end_handler, &cdata_handler); |
339 | 0 | } |
340 | | |
341 | | APR_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser, |
342 | | const char *data, |
343 | | apr_size_t len) |
344 | 0 | { |
345 | 0 | return parser->impl->Parse(parser, data, len, 0 /* is_final */); |
346 | 0 | } |
347 | | |
348 | | APR_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser, |
349 | | apr_xml_doc **pdoc) |
350 | 0 | { |
351 | 0 | apr_status_t status = parser->impl->Parse(parser, "", 0, 1 /* is_final */); |
352 | | |
353 | | /* get rid of the parser */ |
354 | 0 | (void) apr_pool_cleanup_run(parser->p, parser, parser->impl->cleanup); |
355 | |
|
356 | 0 | if (status) |
357 | 0 | return status; |
358 | | |
359 | 0 | if (pdoc != NULL) |
360 | 0 | *pdoc = parser->doc; |
361 | 0 | return APR_SUCCESS; |
362 | 0 | } |
363 | | |
364 | | APR_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser, |
365 | | char *errbuf, |
366 | | apr_size_t errbufsize) |
367 | 0 | { |
368 | 0 | int error = parser->error; |
369 | 0 | const char *msg; |
370 | | |
371 | | /* clear our record of an error */ |
372 | 0 | parser->error = 0; |
373 | |
|
374 | 0 | switch (error) { |
375 | 0 | case 0: |
376 | 0 | msg = "No error."; |
377 | 0 | break; |
378 | | |
379 | 0 | case APR_XML_NS_ERROR_UNKNOWN_PREFIX: |
380 | 0 | msg = "An undefined namespace prefix was used."; |
381 | 0 | break; |
382 | | |
383 | 0 | case APR_XML_NS_ERROR_INVALID_DECL: |
384 | 0 | msg = "A namespace prefix was defined with an empty URI."; |
385 | 0 | break; |
386 | | |
387 | 0 | case APR_XML_ERROR_EXPAT: |
388 | 0 | (void) apr_snprintf(errbuf, errbufsize, |
389 | 0 | "XML parser error code: %s (%d)", |
390 | 0 | parser->xp_msg, parser->xp_err); |
391 | 0 | return errbuf; |
392 | | |
393 | 0 | case APR_XML_ERROR_PARSE_DONE: |
394 | 0 | msg = "The parser is not active."; |
395 | 0 | break; |
396 | | |
397 | 0 | default: |
398 | 0 | msg = "There was an unknown error within the XML body."; |
399 | 0 | break; |
400 | 0 | } |
401 | | |
402 | 0 | (void) apr_cpystrn(errbuf, msg, errbufsize); |
403 | 0 | return errbuf; |
404 | 0 | } |
405 | | |
406 | | APR_DECLARE(apr_status_t) apr_xml_parse_file(apr_pool_t *p, |
407 | | apr_xml_parser **parser, |
408 | | apr_xml_doc **ppdoc, |
409 | | apr_file_t *xmlfd, |
410 | | apr_size_t buffer_length) |
411 | 0 | { |
412 | 0 | apr_status_t rv; |
413 | 0 | char *buffer; |
414 | 0 | apr_size_t length; |
415 | |
|
416 | 0 | *parser = apr_xml_parser_create(p); |
417 | 0 | if (*parser == NULL) { |
418 | | /* FIXME: returning an error code would be nice, |
419 | | * but we dont get one ;( */ |
420 | 0 | return APR_EGENERAL; |
421 | 0 | } |
422 | 0 | buffer = apr_palloc(p, buffer_length); |
423 | 0 | length = buffer_length; |
424 | |
|
425 | 0 | rv = apr_file_read(xmlfd, buffer, &length); |
426 | |
|
427 | 0 | while (rv == APR_SUCCESS) { |
428 | 0 | rv = apr_xml_parser_feed(*parser, buffer, length); |
429 | 0 | if (rv != APR_SUCCESS) { |
430 | 0 | return rv; |
431 | 0 | } |
432 | | |
433 | 0 | length = buffer_length; |
434 | 0 | rv = apr_file_read(xmlfd, buffer, &length); |
435 | 0 | } |
436 | 0 | if (rv != APR_EOF) { |
437 | 0 | return rv; |
438 | 0 | } |
439 | 0 | rv = apr_xml_parser_done(*parser, ppdoc); |
440 | 0 | *parser = NULL; |
441 | 0 | return rv; |
442 | 0 | } |
443 | | |
444 | | APR_DECLARE(void) apr_text_append(apr_pool_t * p, apr_text_header *hdr, |
445 | | const char *text) |
446 | 0 | { |
447 | 0 | apr_text *t = apr_palloc(p, sizeof(*t)); |
448 | |
|
449 | 0 | t->text = text; |
450 | 0 | t->next = NULL; |
451 | |
|
452 | 0 | if (hdr->first == NULL) { |
453 | | /* no text elements yet */ |
454 | 0 | hdr->first = hdr->last = t; |
455 | 0 | } |
456 | 0 | else { |
457 | | /* append to the last text element */ |
458 | 0 | hdr->last->next = t; |
459 | 0 | hdr->last = t; |
460 | 0 | } |
461 | 0 | } |
462 | | |
463 | | |
464 | | /* --------------------------------------------------------------- |
465 | | ** |
466 | | ** XML UTILITY FUNCTIONS |
467 | | */ |
468 | | |
469 | | /* |
470 | | ** apr_xml_quote_string: quote an XML string |
471 | | ** |
472 | | ** Replace '<', '>', and '&' with '<', '>', and '&'. |
473 | | ** If quotes is true, then replace '"' with '"'. |
474 | | ** |
475 | | ** quotes is typically set to true for XML strings that will occur within |
476 | | ** double quotes -- attribute values. |
477 | | */ |
478 | | APR_DECLARE(const char *) apr_xml_quote_string(apr_pool_t *p, const char *s, |
479 | | int quotes) |
480 | 0 | { |
481 | 0 | const char *scan; |
482 | 0 | apr_size_t len = 0; |
483 | 0 | apr_size_t extra = 0; |
484 | 0 | char *qstr; |
485 | 0 | char *qscan; |
486 | 0 | char c; |
487 | |
|
488 | 0 | for (scan = s; (c = *scan) != '\0'; ++scan, ++len) { |
489 | 0 | if (c == '<' || c == '>') |
490 | 0 | extra += 3; /* < or > */ |
491 | 0 | else if (c == '&') |
492 | 0 | extra += 4; /* & */ |
493 | 0 | else if (quotes && c == '"') |
494 | 0 | extra += 5; /* " */ |
495 | 0 | } |
496 | | |
497 | | /* nothing to do? */ |
498 | 0 | if (extra == 0) |
499 | 0 | return s; |
500 | | |
501 | 0 | qstr = apr_palloc(p, len + extra + 1); |
502 | 0 | for (scan = s, qscan = qstr; (c = *scan) != '\0'; ++scan) { |
503 | 0 | if (c == '<') { |
504 | 0 | *qscan++ = '&'; |
505 | 0 | *qscan++ = 'l'; |
506 | 0 | *qscan++ = 't'; |
507 | 0 | *qscan++ = ';'; |
508 | 0 | } |
509 | 0 | else if (c == '>') { |
510 | 0 | *qscan++ = '&'; |
511 | 0 | *qscan++ = 'g'; |
512 | 0 | *qscan++ = 't'; |
513 | 0 | *qscan++ = ';'; |
514 | 0 | } |
515 | 0 | else if (c == '&') { |
516 | 0 | *qscan++ = '&'; |
517 | 0 | *qscan++ = 'a'; |
518 | 0 | *qscan++ = 'm'; |
519 | 0 | *qscan++ = 'p'; |
520 | 0 | *qscan++ = ';'; |
521 | 0 | } |
522 | 0 | else if (quotes && c == '"') { |
523 | 0 | *qscan++ = '&'; |
524 | 0 | *qscan++ = 'q'; |
525 | 0 | *qscan++ = 'u'; |
526 | 0 | *qscan++ = 'o'; |
527 | 0 | *qscan++ = 't'; |
528 | 0 | *qscan++ = ';'; |
529 | 0 | } |
530 | 0 | else { |
531 | 0 | *qscan++ = c; |
532 | 0 | } |
533 | 0 | } |
534 | |
|
535 | 0 | *qscan = '\0'; |
536 | 0 | return qstr; |
537 | 0 | } |
538 | | |
539 | | /* how many characters for the given integer? */ |
540 | 0 | #define APR_XML_NS_LEN(ns) ((ns) < 10 ? 1 : (ns) < 100 ? 2 : (ns) < 1000 ? 3 : \ |
541 | 0 | (ns) < 10000 ? 4 : (ns) < 100000 ? 5 : \ |
542 | 0 | (ns) < 1000000 ? 6 : (ns) < 10000000 ? 7 : \ |
543 | 0 | (ns) < 100000000 ? 8 : (ns) < 1000000000 ? 9 : 10) |
544 | | |
545 | | static apr_size_t text_size(const apr_text *t) |
546 | 0 | { |
547 | 0 | apr_size_t size = 0; |
548 | |
|
549 | 0 | for (; t; t = t->next) |
550 | 0 | size += strlen(t->text); |
551 | 0 | return size; |
552 | 0 | } |
553 | | |
554 | | static apr_size_t elem_size(const apr_xml_elem *elem, int style, |
555 | | apr_array_header_t *namespaces, int *ns_map) |
556 | 0 | { |
557 | 0 | apr_size_t size; |
558 | |
|
559 | 0 | if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG || |
560 | 0 | style == APR_XML_X2T_PARSED) { |
561 | 0 | const apr_xml_attr *attr; |
562 | |
|
563 | 0 | size = 0; |
564 | |
|
565 | 0 | if (style == APR_XML_X2T_FULL_NS_LANG) { |
566 | 0 | int i; |
567 | | |
568 | | /* |
569 | | ** The outer element will contain xmlns:ns%d="%s" attributes |
570 | | ** and an xml:lang attribute, if applicable. |
571 | | */ |
572 | |
|
573 | 0 | for (i = namespaces->nelts; i--;) { |
574 | | /* compute size of: ' xmlns:ns%d="%s"' */ |
575 | 0 | size += (9 + APR_XML_NS_LEN(i) + 2 + |
576 | 0 | strlen(APR_XML_GET_URI_ITEM(namespaces, i)) + 1); |
577 | 0 | } |
578 | |
|
579 | 0 | if (elem->lang != NULL) { |
580 | | /* compute size of: ' xml:lang="%s"' */ |
581 | 0 | size += 11 + strlen(elem->lang) + 1; |
582 | 0 | } |
583 | 0 | } |
584 | 0 | else if (style == APR_XML_X2T_PARSED) { |
585 | 0 | apr_xml_ns_scope *ns_scope = elem->ns_scope; |
586 | | |
587 | | /* compute size of: ' xmlns:%s="%s"' */ |
588 | 0 | for (; ns_scope; ns_scope = ns_scope->next) { |
589 | 0 | size += 10 + strlen(find_prefix_name(elem, ns_scope->ns, 0)) + |
590 | 0 | strlen(APR_XML_GET_URI_ITEM(namespaces, ns_scope->ns)); |
591 | 0 | } |
592 | |
|
593 | 0 | if (elem->lang != NULL) { |
594 | | /* compute size of: ' xml:lang="%s"' */ |
595 | 0 | size += 11 + strlen(elem->lang) + 1; |
596 | 0 | } |
597 | 0 | } |
598 | |
|
599 | 0 | if (elem->ns == APR_XML_NS_NONE) { |
600 | | /* compute size of: <%s> */ |
601 | 0 | size += 1 + strlen(elem->name) + 1; |
602 | 0 | } |
603 | 0 | else if (style == APR_XML_X2T_PARSED) { |
604 | | /* compute size of: <%s:%s> */ |
605 | 0 | size += 3 + strlen(find_prefix_name(elem, elem->ns, 1)) + strlen(elem->name); |
606 | 0 | } |
607 | 0 | else { |
608 | 0 | int ns = ns_map ? ns_map[elem->ns] : elem->ns; |
609 | | |
610 | | /* compute size of: <ns%d:%s> */ |
611 | 0 | size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(elem->name) + 1; |
612 | 0 | } |
613 | |
|
614 | 0 | if (APR_XML_ELEM_IS_EMPTY(elem)) { |
615 | | /* insert a closing "/" */ |
616 | 0 | size += 1; |
617 | 0 | } |
618 | 0 | else { |
619 | | /* |
620 | | * two of above plus "/": |
621 | | * <ns%d:%s> ... </ns%d:%s> |
622 | | * OR <%s> ... </%s> |
623 | | */ |
624 | 0 | size = 2 * size + 1; |
625 | 0 | } |
626 | |
|
627 | 0 | for (attr = elem->attr; attr; attr = attr->next) { |
628 | 0 | if (attr->ns == APR_XML_NS_NONE) { |
629 | | /* compute size of: ' %s="%s"' */ |
630 | 0 | size += 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1; |
631 | 0 | } |
632 | 0 | else if (style == APR_XML_X2T_PARSED) { |
633 | | /* compute size of: ' %s:%s="%s"' */ |
634 | 0 | size += 5 + strlen(find_prefix_name(elem, attr->ns, 1)) + strlen(attr->name) + strlen(attr->value); |
635 | 0 | } |
636 | 0 | else { |
637 | | /* compute size of: ' ns%d:%s="%s"' */ |
638 | 0 | int ns = ns_map ? ns_map[attr->ns] : attr->ns; |
639 | 0 | size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1; |
640 | 0 | } |
641 | 0 | } |
642 | | |
643 | | /* |
644 | | ** If the element has an xml:lang value that is *different* from |
645 | | ** its parent, then add the thing in: ' xml:lang="%s"'. |
646 | | ** |
647 | | ** NOTE: we take advantage of the pointer equality established by |
648 | | ** the parsing for "inheriting" the xml:lang values from parents. |
649 | | */ |
650 | 0 | if (elem->lang != NULL && |
651 | 0 | (elem->parent == NULL || elem->lang != elem->parent->lang)) { |
652 | 0 | size += 11 + strlen(elem->lang) + 1; |
653 | 0 | } |
654 | 0 | } |
655 | 0 | else if (style == APR_XML_X2T_LANG_INNER) { |
656 | | /* |
657 | | * This style prepends the xml:lang value plus a null terminator. |
658 | | * If a lang value is not present, then we insert a null term. |
659 | | */ |
660 | 0 | size = elem->lang ? strlen(elem->lang) + 1 : 1; |
661 | 0 | } |
662 | 0 | else |
663 | 0 | size = 0; |
664 | |
|
665 | 0 | size += text_size(elem->first_cdata.first); |
666 | |
|
667 | 0 | for (elem = elem->first_child; elem; elem = elem->next) { |
668 | | /* the size of the child element plus the CDATA that follows it */ |
669 | 0 | size += (elem_size(elem, style == APR_XML_X2T_PARSED ? APR_XML_X2T_PARSED : APR_XML_X2T_FULL, NULL, ns_map) + |
670 | 0 | text_size(elem->following_cdata.first)); |
671 | 0 | } |
672 | |
|
673 | 0 | return size; |
674 | 0 | } |
675 | | |
676 | | static char *write_text(char *s, const apr_text *t) |
677 | 0 | { |
678 | 0 | for (; t; t = t->next) { |
679 | 0 | apr_size_t len = strlen(t->text); |
680 | 0 | memcpy(s, t->text, len); |
681 | 0 | s += len; |
682 | 0 | } |
683 | 0 | return s; |
684 | 0 | } |
685 | | |
686 | | static char *write_elem(char *s, const apr_xml_elem *elem, int style, |
687 | | apr_array_header_t *namespaces, int *ns_map) |
688 | 0 | { |
689 | 0 | const apr_xml_elem *child; |
690 | 0 | apr_size_t len; |
691 | 0 | int ns; |
692 | |
|
693 | 0 | if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG || |
694 | 0 | style == APR_XML_X2T_PARSED) { |
695 | 0 | int empty = APR_XML_ELEM_IS_EMPTY(elem); |
696 | 0 | const apr_xml_attr *attr; |
697 | |
|
698 | 0 | if (elem->ns == APR_XML_NS_NONE) { |
699 | 0 | len = sprintf(s, "<%s", elem->name); |
700 | 0 | } |
701 | 0 | else if (style == APR_XML_X2T_PARSED) { |
702 | 0 | len = sprintf(s, "<%s:%s", find_prefix_name(elem, elem->ns, 1), elem->name); |
703 | 0 | } |
704 | 0 | else { |
705 | 0 | ns = ns_map ? ns_map[elem->ns] : elem->ns; |
706 | 0 | len = sprintf(s, "<ns%d:%s", ns, elem->name); |
707 | 0 | } |
708 | 0 | s += len; |
709 | |
|
710 | 0 | for (attr = elem->attr; attr; attr = attr->next) { |
711 | 0 | if (attr->ns == APR_XML_NS_NONE) { |
712 | 0 | len = sprintf(s, " %s=\"%s\"", attr->name, attr->value); |
713 | 0 | } |
714 | 0 | else if (style == APR_XML_X2T_PARSED) { |
715 | 0 | len = sprintf(s, " %s:%s=\"%s\"", |
716 | 0 | find_prefix_name(elem, attr->ns, 1), attr->name, attr->value); |
717 | 0 | } |
718 | 0 | else { |
719 | 0 | ns = ns_map ? ns_map[attr->ns] : attr->ns; |
720 | 0 | len = sprintf(s, " ns%d:%s=\"%s\"", ns, attr->name, attr->value); |
721 | 0 | } |
722 | 0 | s += len; |
723 | 0 | } |
724 | | |
725 | | /* add the xml:lang value if necessary */ |
726 | 0 | if (elem->lang != NULL && |
727 | 0 | (style == APR_XML_X2T_FULL_NS_LANG || |
728 | 0 | elem->parent == NULL || |
729 | 0 | elem->lang != elem->parent->lang)) { |
730 | 0 | len = sprintf(s, " xml:lang=\"%s\"", elem->lang); |
731 | 0 | s += len; |
732 | 0 | } |
733 | | |
734 | | /* add namespace definitions, if required */ |
735 | 0 | if (style == APR_XML_X2T_FULL_NS_LANG) { |
736 | 0 | int i; |
737 | |
|
738 | 0 | for (i = namespaces->nelts; i--;) { |
739 | 0 | len = sprintf(s, " xmlns:ns%d=\"%s\"", i, |
740 | 0 | APR_XML_GET_URI_ITEM(namespaces, i)); |
741 | 0 | s += len; |
742 | 0 | } |
743 | 0 | } |
744 | | |
745 | 0 | else if (style == APR_XML_X2T_PARSED) { |
746 | 0 | apr_xml_ns_scope *ns_scope = elem->ns_scope; |
747 | |
|
748 | 0 | for (; ns_scope; ns_scope = ns_scope->next) { |
749 | 0 | const char *prefix = find_prefix_name(elem, ns_scope->ns, 0); |
750 | |
|
751 | 0 | len = sprintf(s, " xmlns%s%s=\"%s\"", |
752 | 0 | *prefix ? ":" : "", *prefix ? prefix : "", |
753 | 0 | APR_XML_GET_URI_ITEM(namespaces, ns_scope->ns)); |
754 | 0 | s += len; |
755 | 0 | } |
756 | 0 | } |
757 | | |
758 | | /* no more to do. close it up and go. */ |
759 | 0 | if (empty) { |
760 | 0 | *s++ = '/'; |
761 | 0 | *s++ = '>'; |
762 | 0 | return s; |
763 | 0 | } |
764 | | |
765 | | /* just close it */ |
766 | 0 | *s++ = '>'; |
767 | 0 | } |
768 | 0 | else if (style == APR_XML_X2T_LANG_INNER) { |
769 | | /* prepend the xml:lang value */ |
770 | 0 | if (elem->lang != NULL) { |
771 | 0 | len = strlen(elem->lang); |
772 | 0 | memcpy(s, elem->lang, len); |
773 | 0 | s += len; |
774 | 0 | } |
775 | 0 | *s++ = '\0'; |
776 | 0 | } |
777 | | |
778 | 0 | s = write_text(s, elem->first_cdata.first); |
779 | |
|
780 | 0 | for (child = elem->first_child; child; child = child->next) { |
781 | 0 | s = write_elem(s, child, |
782 | 0 | style == APR_XML_X2T_PARSED ? APR_XML_X2T_PARSED : APR_XML_X2T_FULL, |
783 | 0 | NULL, ns_map); |
784 | 0 | s = write_text(s, child->following_cdata.first); |
785 | 0 | } |
786 | |
|
787 | 0 | if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG || |
788 | 0 | style == APR_XML_X2T_PARSED) { |
789 | 0 | if (elem->ns == APR_XML_NS_NONE) { |
790 | 0 | len = sprintf(s, "</%s>", elem->name); |
791 | 0 | } |
792 | 0 | else if (style == APR_XML_X2T_PARSED) { |
793 | 0 | len = sprintf(s, "</%s:%s>", find_prefix_name(elem, elem->ns, 1), elem->name); |
794 | 0 | } |
795 | 0 | else { |
796 | 0 | ns = ns_map ? ns_map[elem->ns] : elem->ns; |
797 | 0 | len = sprintf(s, "</ns%d:%s>", ns, elem->name); |
798 | 0 | } |
799 | 0 | s += len; |
800 | 0 | } |
801 | |
|
802 | 0 | return s; |
803 | 0 | } |
804 | | |
805 | | APR_DECLARE(void) apr_xml_quote_elem(apr_pool_t *p, apr_xml_elem *elem) |
806 | 0 | { |
807 | 0 | apr_text *scan_txt; |
808 | 0 | apr_xml_attr *scan_attr; |
809 | 0 | apr_xml_elem *scan_elem; |
810 | | |
811 | | /* convert the element's text */ |
812 | 0 | for (scan_txt = elem->first_cdata.first; |
813 | 0 | scan_txt != NULL; |
814 | 0 | scan_txt = scan_txt->next) { |
815 | 0 | scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0); |
816 | 0 | } |
817 | 0 | for (scan_txt = elem->following_cdata.first; |
818 | 0 | scan_txt != NULL; |
819 | 0 | scan_txt = scan_txt->next) { |
820 | 0 | scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0); |
821 | 0 | } |
822 | | |
823 | | /* convert the attribute values */ |
824 | 0 | for (scan_attr = elem->attr; |
825 | 0 | scan_attr != NULL; |
826 | 0 | scan_attr = scan_attr->next) { |
827 | 0 | scan_attr->value = apr_xml_quote_string(p, scan_attr->value, 1); |
828 | 0 | } |
829 | | |
830 | | /* convert the child elements */ |
831 | 0 | for (scan_elem = elem->first_child; |
832 | 0 | scan_elem != NULL; |
833 | 0 | scan_elem = scan_elem->next) { |
834 | 0 | apr_xml_quote_elem(p, scan_elem); |
835 | 0 | } |
836 | 0 | } |
837 | | |
838 | | /* convert an element to a text string */ |
839 | | APR_DECLARE(void) apr_xml_to_text(apr_pool_t * p, const apr_xml_elem *elem, |
840 | | int style, apr_array_header_t *namespaces, |
841 | | int *ns_map, const char **pbuf, |
842 | | apr_size_t *psize) |
843 | 0 | { |
844 | | /* get the exact size, plus a null terminator */ |
845 | 0 | apr_size_t size = elem_size(elem, style, namespaces, ns_map) + 1; |
846 | 0 | char *s = apr_palloc(p, size); |
847 | |
|
848 | 0 | (void) write_elem(s, elem, style, namespaces, ns_map); |
849 | 0 | s[size - 1] = '\0'; |
850 | |
|
851 | 0 | *pbuf = s; |
852 | 0 | if (psize) |
853 | 0 | *psize = size; |
854 | 0 | } |
855 | | |
856 | | APR_DECLARE(const char *) apr_xml_empty_elem(apr_pool_t * p, |
857 | | const apr_xml_elem *elem) |
858 | 0 | { |
859 | 0 | if (elem->ns == APR_XML_NS_NONE) { |
860 | | /* |
861 | | * The prefix (xml...) is already within the prop name, or |
862 | | * the element simply has no prefix. |
863 | | */ |
864 | 0 | return apr_psprintf(p, "<%s/>" DEBUG_CR, elem->name); |
865 | 0 | } |
866 | | |
867 | 0 | return apr_psprintf(p, "<ns%d:%s/>" DEBUG_CR, elem->ns, elem->name); |
868 | 0 | } |
869 | | |
870 | | /* return the URI's (existing) index, or insert it and return a new index */ |
871 | | APR_DECLARE(int) apr_xml_insert_uri(apr_array_header_t *uri_array, |
872 | | const char *uri) |
873 | 0 | { |
874 | 0 | int i; |
875 | 0 | const char **pelt; |
876 | | |
877 | | /* never insert an empty URI; this index is always APR_XML_NS_NONE */ |
878 | 0 | if (*uri == '\0') |
879 | 0 | return APR_XML_NS_NONE; |
880 | | |
881 | 0 | for (i = uri_array->nelts; i--;) { |
882 | 0 | if (strcmp(uri, APR_XML_GET_URI_ITEM(uri_array, i)) == 0) |
883 | 0 | return i; |
884 | 0 | } |
885 | | |
886 | 0 | pelt = apr_array_push(uri_array); |
887 | 0 | *pelt = uri; /* assume uri is const or in a pool */ |
888 | 0 | return uri_array->nelts - 1; |
889 | 0 | } |
890 | | |
891 | | /* convert the element to EBCDIC */ |
892 | | #if APR_CHARSET_EBCDIC |
893 | | static apr_status_t apr_xml_parser_convert_elem(apr_xml_elem *e, |
894 | | apr_xlate_t *convset) |
895 | | { |
896 | | apr_xml_attr *a; |
897 | | apr_xml_elem *ec; |
898 | | apr_text *t; |
899 | | apr_size_t inbytes_left, outbytes_left; |
900 | | apr_status_t status; |
901 | | |
902 | | inbytes_left = outbytes_left = strlen(e->name); |
903 | | status = apr_xlate_conv_buffer(convset, e->name, &inbytes_left, (char *) e->name, &outbytes_left); |
904 | | if (status) { |
905 | | return status; |
906 | | } |
907 | | |
908 | | for (t = e->first_cdata.first; t != NULL; t = t->next) { |
909 | | inbytes_left = outbytes_left = strlen(t->text); |
910 | | status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left); |
911 | | if (status) { |
912 | | return status; |
913 | | } |
914 | | } |
915 | | |
916 | | for (t = e->following_cdata.first; t != NULL; t = t->next) { |
917 | | inbytes_left = outbytes_left = strlen(t->text); |
918 | | status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left); |
919 | | if (status) { |
920 | | return status; |
921 | | } |
922 | | } |
923 | | |
924 | | for (a = e->attr; a != NULL; a = a->next) { |
925 | | inbytes_left = outbytes_left = strlen(a->name); |
926 | | status = apr_xlate_conv_buffer(convset, a->name, &inbytes_left, (char *) a->name, &outbytes_left); |
927 | | if (status) { |
928 | | return status; |
929 | | } |
930 | | inbytes_left = outbytes_left = strlen(a->value); |
931 | | status = apr_xlate_conv_buffer(convset, a->value, &inbytes_left, (char *) a->value, &outbytes_left); |
932 | | if (status) { |
933 | | return status; |
934 | | } |
935 | | } |
936 | | |
937 | | for (ec = e->first_child; ec != NULL; ec = ec->next) { |
938 | | status = apr_xml_parser_convert_elem(ec, convset); |
939 | | if (status) { |
940 | | return status; |
941 | | } |
942 | | } |
943 | | return APR_SUCCESS; |
944 | | } |
945 | | |
946 | | /* convert the whole document to EBCDIC */ |
947 | | APR_DECLARE(apr_status_t) apr_xml_parser_convert_doc(apr_pool_t *pool, |
948 | | apr_xml_doc *pdoc, |
949 | | apr_xlate_t *convset) |
950 | | { |
951 | | apr_status_t status; |
952 | | /* Don't convert the namespaces: they are constant! */ |
953 | | if (pdoc->namespaces != NULL) { |
954 | | int i; |
955 | | apr_array_header_t *namespaces; |
956 | | namespaces = apr_array_make(pool, pdoc->namespaces->nelts, sizeof(const char *)); |
957 | | if (namespaces == NULL) |
958 | | return APR_ENOMEM; |
959 | | for (i = 0; i < pdoc->namespaces->nelts; i++) { |
960 | | apr_size_t inbytes_left, outbytes_left; |
961 | | char *ptr = (char *) APR_XML_GET_URI_ITEM(pdoc->namespaces, i); |
962 | | ptr = apr_pstrdup(pool, ptr); |
963 | | if ( ptr == NULL) |
964 | | return APR_ENOMEM; |
965 | | inbytes_left = outbytes_left = strlen(ptr); |
966 | | status = apr_xlate_conv_buffer(convset, ptr, &inbytes_left, ptr, &outbytes_left); |
967 | | if (status) { |
968 | | return status; |
969 | | } |
970 | | apr_xml_insert_uri(namespaces, ptr); |
971 | | } |
972 | | pdoc->namespaces = namespaces; |
973 | | } |
974 | | return apr_xml_parser_convert_elem(pdoc->root, convset); |
975 | | } |
976 | | #endif |