/src/mupdf/thirdparty/extract/src/xml.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "extract/alloc.h" |
2 | | |
3 | | #include "mem.h" |
4 | | #include "outf.h" |
5 | | #include "xml.h" |
6 | | |
7 | | #include <assert.h> |
8 | | #include <errno.h> |
9 | | #include <float.h> |
10 | | #include <limits.h> |
11 | | |
12 | | #include "compat_stdint.h" |
13 | | |
14 | | #include <stdlib.h> |
15 | | #include <string.h> |
16 | | |
17 | | |
18 | | /* These str_*() functions realloc buffer as required. All return 0 or -1 with |
19 | | errno set. */ |
20 | | |
21 | | /* Appends first <s_len> chars of string <s> to *p. */ |
22 | | static int str_catl(extract_alloc_t *alloc, char **p, const char *s, int s_len) |
23 | 0 | { |
24 | 0 | size_t p_len = (*p) ? strlen(*p) : 0; |
25 | |
|
26 | 0 | if (extract_realloc2(alloc, |
27 | 0 | p, |
28 | 0 | p_len + 1, |
29 | 0 | p_len + s_len + 1)) return -1; |
30 | 0 | memcpy(*p + p_len, s, s_len); |
31 | 0 | (*p)[p_len + s_len] = 0; |
32 | |
|
33 | 0 | return 0; |
34 | 0 | } |
35 | | |
36 | | /* Appends a char. */ |
37 | | static int str_catc(extract_alloc_t *alloc, char **p, char c) |
38 | 0 | { |
39 | 0 | return str_catl(alloc, p, &c, 1); |
40 | 0 | } |
41 | | |
42 | | /* Unused but useful to keep code here. */ |
43 | | #if 0 |
44 | | /* Appends a string. */ |
45 | | static int str_cat(extract_alloc_t *alloc, char **p, const char *s) |
46 | | { |
47 | | return str_catl(alloc, p, s, strlen(s)); |
48 | | } |
49 | | #endif |
50 | | |
51 | | char *extract_xml_tag_attributes_find(extract_xml_tag_t *tag, const char *name) |
52 | 0 | { |
53 | 0 | int i; |
54 | |
|
55 | 0 | for (i=0; i<tag->attributes_num; ++i) { |
56 | 0 | if (!strcmp(tag->attributes[i].name, name)) { |
57 | 0 | char* ret = tag->attributes[i].value; |
58 | 0 | return ret; |
59 | 0 | } |
60 | 0 | } |
61 | 0 | outf("Failed to find attribute '%s'",name); |
62 | |
|
63 | 0 | return NULL; |
64 | 0 | } |
65 | | |
66 | | int extract_xml_tag_attributes_find_float( |
67 | | extract_xml_tag_t *tag, |
68 | | const char *name, |
69 | | float *o_out) |
70 | 0 | { |
71 | 0 | const char *value = extract_xml_tag_attributes_find(tag, name); |
72 | |
|
73 | 0 | if (!value) { |
74 | 0 | errno = ESRCH; |
75 | 0 | return -1; |
76 | 0 | } |
77 | 0 | if (extract_xml_str_to_float(value, o_out)) return -1; |
78 | | |
79 | 0 | return 0; |
80 | 0 | } |
81 | | |
82 | | int extract_xml_tag_attributes_find_double( |
83 | | extract_xml_tag_t *tag, |
84 | | const char *name, |
85 | | double *o_out) |
86 | 0 | { |
87 | 0 | const char *value = extract_xml_tag_attributes_find(tag, name); |
88 | |
|
89 | 0 | if (!value) { |
90 | 0 | errno = ESRCH; |
91 | 0 | return -1; |
92 | 0 | } |
93 | 0 | if (extract_xml_str_to_double(value, o_out)) return -1; |
94 | | |
95 | 0 | return 0; |
96 | 0 | } |
97 | | |
98 | | int extract_xml_tag_attributes_find_int( |
99 | | extract_xml_tag_t *tag, |
100 | | const char *name, |
101 | | int *o_out) |
102 | 0 | { |
103 | 0 | const char *text = extract_xml_tag_attributes_find(tag, name); |
104 | |
|
105 | 0 | return extract_xml_str_to_int(text, o_out); |
106 | 0 | } |
107 | | |
108 | | int extract_xml_tag_attributes_find_uint( |
109 | | extract_xml_tag_t *tag, |
110 | | const char *name, |
111 | | unsigned *o_out) |
112 | 0 | { |
113 | 0 | const char *text = extract_xml_tag_attributes_find(tag, name); |
114 | |
|
115 | 0 | return extract_xml_str_to_uint(text, o_out); |
116 | 0 | } |
117 | | |
118 | | int extract_xml_tag_attributes_find_size( |
119 | | extract_xml_tag_t *tag, |
120 | | const char *name, |
121 | | size_t *o_out) |
122 | 0 | { |
123 | 0 | const char *text = extract_xml_tag_attributes_find(tag, name); |
124 | |
|
125 | 0 | return extract_xml_str_to_size(text, o_out); |
126 | 0 | } |
127 | | |
128 | | int extract_xml_str_to_llint(const char *text, long long*o_out) |
129 | 0 | { |
130 | 0 | char *endptr; |
131 | 0 | long long x; |
132 | |
|
133 | 0 | if (!text) { |
134 | 0 | errno = ESRCH; |
135 | 0 | return -1; |
136 | 0 | } |
137 | 0 | if (text[0] == 0) { |
138 | 0 | errno = EINVAL; |
139 | 0 | return -1; |
140 | 0 | } |
141 | 0 | errno = 0; |
142 | 0 | x = strtoll(text, &endptr, 10 /*base*/); |
143 | 0 | if (errno) { |
144 | 0 | return -1; |
145 | 0 | } |
146 | 0 | if (*endptr) { |
147 | 0 | errno = EINVAL; |
148 | 0 | return -1; |
149 | 0 | } |
150 | 0 | *o_out = x; |
151 | |
|
152 | 0 | return 0; |
153 | 0 | } |
154 | | |
155 | | int extract_xml_str_to_ullint(const char *text, unsigned long long *o_out) |
156 | 0 | { |
157 | 0 | char *endptr; |
158 | 0 | unsigned long long x; |
159 | |
|
160 | 0 | if (!text) { |
161 | 0 | errno = ESRCH; |
162 | 0 | return -1; |
163 | 0 | } |
164 | 0 | if (text[0] == 0) { |
165 | 0 | errno = EINVAL; |
166 | 0 | return -1; |
167 | 0 | } |
168 | 0 | errno = 0; |
169 | 0 | x = strtoull(text, &endptr, 10 /*base*/); |
170 | 0 | if (errno) { |
171 | 0 | return -1; |
172 | 0 | } |
173 | 0 | if (*endptr) { |
174 | 0 | errno = EINVAL; |
175 | 0 | return -1; |
176 | 0 | } |
177 | 0 | *o_out = x; |
178 | |
|
179 | 0 | return 0; |
180 | 0 | } |
181 | | |
182 | | int extract_xml_str_to_int(const char *text, int *o_out) |
183 | 0 | { |
184 | 0 | long long x; |
185 | |
|
186 | 0 | if (extract_xml_str_to_llint(text, &x)) return -1; |
187 | 0 | if (x > INT_MAX || x < INT_MIN) { |
188 | 0 | errno = ERANGE; |
189 | 0 | return -1; |
190 | 0 | } |
191 | 0 | *o_out = (int) x; |
192 | |
|
193 | 0 | return 0; |
194 | 0 | } |
195 | | |
196 | | int extract_xml_str_to_uint(const char *text, unsigned *o_out) |
197 | 0 | { |
198 | 0 | unsigned long long x; |
199 | |
|
200 | 0 | if (extract_xml_str_to_ullint(text, &x)) return -1; |
201 | 0 | if (x > UINT_MAX) { |
202 | 0 | errno = ERANGE; |
203 | 0 | return -1; |
204 | 0 | } |
205 | 0 | *o_out = (unsigned) x; |
206 | |
|
207 | 0 | return 0; |
208 | 0 | } |
209 | | |
210 | | int extract_xml_str_to_size(const char *text, size_t *o_out) |
211 | 0 | { |
212 | 0 | unsigned long long x; |
213 | |
|
214 | 0 | if (extract_xml_str_to_ullint(text, &x)) return -1; |
215 | 0 | if (x > SIZE_MAX) { |
216 | 0 | errno = ERANGE; |
217 | 0 | return -1; |
218 | 0 | } |
219 | 0 | *o_out = (size_t) x; |
220 | |
|
221 | 0 | return 0; |
222 | 0 | } |
223 | | |
224 | | int extract_xml_str_to_double(const char *text, double *o_out) |
225 | 0 | { |
226 | 0 | char *endptr; |
227 | 0 | double x; |
228 | |
|
229 | 0 | if (!text) { |
230 | 0 | errno = ESRCH; |
231 | 0 | return -1; |
232 | 0 | } |
233 | 0 | if (text[0] == 0) { |
234 | 0 | errno = EINVAL; |
235 | 0 | return -1; |
236 | 0 | } |
237 | 0 | errno = 0; |
238 | 0 | x = strtod(text, &endptr); |
239 | 0 | if (errno) { |
240 | 0 | return -1; |
241 | 0 | } |
242 | 0 | if (*endptr) { |
243 | 0 | errno = EINVAL; |
244 | 0 | return -1; |
245 | 0 | } |
246 | 0 | *o_out = x; |
247 | |
|
248 | 0 | return 0; |
249 | 0 | } |
250 | | |
251 | | int extract_xml_str_to_float(const char *text, float *o_out) |
252 | 0 | { |
253 | 0 | double x; |
254 | |
|
255 | 0 | if (extract_xml_str_to_double(text, &x)) { |
256 | 0 | return -1; |
257 | 0 | } |
258 | 0 | if (x > FLT_MAX || x < -FLT_MAX) { |
259 | 0 | errno = ERANGE; |
260 | 0 | return -1; |
261 | 0 | } |
262 | 0 | *o_out = (float) x; |
263 | |
|
264 | 0 | return 0; |
265 | 0 | } |
266 | | |
267 | | static int |
268 | | extract_xml_tag_attributes_append( |
269 | | extract_alloc_t *alloc, |
270 | | extract_xml_tag_t *tag, |
271 | | char *name, |
272 | | char *value) |
273 | 0 | { |
274 | 0 | if (extract_realloc2(alloc, |
275 | 0 | &tag->attributes, |
276 | 0 | sizeof(extract_xml_attribute_t) * tag->attributes_num, |
277 | 0 | sizeof(extract_xml_attribute_t) * (tag->attributes_num+1))) |
278 | 0 | { |
279 | 0 | return -1; |
280 | 0 | } |
281 | 0 | tag->attributes[tag->attributes_num].name = name; |
282 | 0 | tag->attributes[tag->attributes_num].value = value; |
283 | 0 | tag->attributes_num += 1; |
284 | |
|
285 | 0 | return 0; |
286 | 0 | } |
287 | | |
288 | | void extract_xml_tag_init(extract_xml_tag_t *tag) |
289 | 0 | { |
290 | 0 | tag->name = NULL; |
291 | 0 | tag->attributes = NULL; |
292 | 0 | tag->attributes_num = 0; |
293 | 0 | extract_astring_init(&tag->text); |
294 | 0 | } |
295 | | |
296 | | void extract_xml_tag_free(extract_alloc_t *alloc, extract_xml_tag_t *tag) |
297 | 0 | { |
298 | 0 | int i; |
299 | |
|
300 | 0 | if (tag == NULL) |
301 | 0 | return; |
302 | | |
303 | 0 | extract_free(alloc, &tag->name); |
304 | 0 | for (i=0; i<tag->attributes_num; ++i) { |
305 | 0 | extract_xml_attribute_t* attribute = &tag->attributes[i]; |
306 | 0 | extract_free(alloc, &attribute->name); |
307 | 0 | extract_free(alloc, &attribute->value); |
308 | 0 | } |
309 | 0 | extract_free(alloc, &tag->attributes); |
310 | 0 | extract_astring_free(alloc, &tag->text); |
311 | 0 | extract_xml_tag_init(tag); |
312 | 0 | } |
313 | | |
314 | | /* Unused but useful to keep code here. */ |
315 | | #if 0 |
316 | | /* Like strcmp() but also handles NULL. */ |
317 | | static int extract_xml_strcmp_null(const char *a, const char *b) |
318 | | { |
319 | | if (!a && !b) return 0; |
320 | | if (!a) return -1; |
321 | | if (!b) return 1; |
322 | | return strcmp(a, b); |
323 | | } |
324 | | #endif |
325 | | |
326 | | /* Unused but useful to keep code here. */ |
327 | | #if 0 |
328 | | /* Compares tag name, then attributes; returns -1, 0 or +1. Does not compare |
329 | | extract_xml_tag_t::text members. */ |
330 | | int extract_xml_compare_tags(const extract_xml_tag_t *lhs, const extract_xml_tag_t *rhs) |
331 | | { |
332 | | int d; |
333 | | int i; |
334 | | d = extract_xml_strcmp_null(lhs->name, rhs->name); |
335 | | if (d) return d; |
336 | | for(i=0;; ++i) { |
337 | | if (i >= lhs->attributes_num || i >= rhs->attributes_num) { |
338 | | break; |
339 | | } |
340 | | const extract_xml_attribute_t* lhs_attribute = &lhs->attributes[i]; |
341 | | const extract_xml_attribute_t* rhs_attribute = &rhs->attributes[i]; |
342 | | d = extract_xml_strcmp_null(lhs_attribute->name, rhs_attribute->name); |
343 | | if (d) return d; |
344 | | d = extract_xml_strcmp_null(lhs_attribute->value, rhs_attribute->value); |
345 | | if (d) return d; |
346 | | } |
347 | | if (lhs->attributes_num > rhs->attributes_num) return +1; |
348 | | if (lhs->attributes_num < rhs->attributes_num) return -1; |
349 | | return 0; |
350 | | } |
351 | | #endif |
352 | | |
353 | | |
354 | | int extract_xml_pparse_init(extract_alloc_t *alloc, extract_buffer_t *buffer, const char *first_line) |
355 | 0 | { |
356 | 0 | char *first_line_buffer = NULL; |
357 | 0 | int e = -1; |
358 | |
|
359 | 0 | if (first_line) { |
360 | 0 | size_t first_line_len = strlen(first_line); |
361 | 0 | size_t actual; |
362 | 0 | if (extract_malloc(alloc, &first_line_buffer, first_line_len + 1)) goto end; |
363 | | |
364 | 0 | if (extract_buffer_read(buffer, first_line_buffer, first_line_len, &actual)) { |
365 | 0 | outf("error: failed to read first line."); |
366 | 0 | goto end; |
367 | 0 | } |
368 | 0 | first_line_buffer[actual] = 0; |
369 | 0 | if (strcmp(first_line, first_line_buffer)) { |
370 | 0 | outf("Unrecognised prefix: %s", first_line_buffer); |
371 | 0 | errno = ESRCH; |
372 | 0 | goto end; |
373 | 0 | } |
374 | 0 | } |
375 | | |
376 | 0 | for(;;) { |
377 | 0 | char c; |
378 | 0 | int ee = extract_buffer_read(buffer, &c, 1, NULL); |
379 | 0 | if (ee) { |
380 | 0 | if (ee==1) errno = ESRCH; /* EOF. */ |
381 | 0 | goto end; |
382 | 0 | } |
383 | 0 | if (c == '<') { |
384 | 0 | break; |
385 | 0 | } |
386 | 0 | else if (c == ' ' || c == '\n') {} |
387 | 0 | else { |
388 | 0 | outf("Expected '<' but found c=%i", c); |
389 | 0 | goto end; |
390 | 0 | } |
391 | 0 | } |
392 | | |
393 | 0 | e = 0; |
394 | 0 | end: |
395 | |
|
396 | 0 | extract_free(alloc, &first_line_buffer); |
397 | |
|
398 | 0 | return e; |
399 | 0 | } |
400 | | |
401 | | static int s_next(extract_buffer_t *buffer, int *ret, char *o_c) |
402 | | /* Reads next char, but if EOF sets *ret=+1, errno=ESRCH and returns +1. */ |
403 | 0 | { |
404 | 0 | int e = extract_buffer_read(buffer, o_c, 1, NULL); |
405 | |
|
406 | 0 | if (e == +1) { |
407 | 0 | *ret = +1; |
408 | 0 | errno = ESRCH; |
409 | 0 | } |
410 | |
|
411 | 0 | return e; |
412 | 0 | } |
413 | | |
414 | | static const char * |
415 | | extract_xml_tag_string(extract_alloc_t *alloc, extract_xml_tag_t *tag) |
416 | 0 | { |
417 | 0 | static char *buffer = NULL; |
418 | 0 |
|
419 | 0 | extract_free(alloc, &buffer); |
420 | 0 | if (extract_asprintf(alloc, &buffer, "<name=%s>", tag->name ? tag->name : "")) |
421 | 0 | { |
422 | 0 | return ""; |
423 | 0 | } |
424 | 0 |
|
425 | 0 | return buffer; |
426 | 0 | } |
427 | | |
428 | | int extract_xml_pparse_next(extract_buffer_t *buffer, extract_xml_tag_t *out) |
429 | 0 | { |
430 | 0 | int ret = -1; |
431 | 0 | char *attribute_name = NULL; |
432 | 0 | char *attribute_value = NULL; |
433 | 0 | char c; |
434 | 0 | extract_alloc_t *alloc = extract_buffer_alloc(buffer); |
435 | |
|
436 | 0 | if (0) outf("out is: %s", extract_xml_tag_string(extract_buffer_alloc(buffer), out)); |
437 | 0 | assert(buffer); |
438 | 0 | extract_xml_tag_free(alloc, out); |
439 | | |
440 | | /* Read tag name. Initialise it to empty string so we never return |
441 | | out->name==null on success. */ |
442 | 0 | if (str_catl( alloc, &out->name, NULL, 0)) goto end; |
443 | 0 | for(;;) { |
444 | 0 | int e = extract_buffer_read(buffer, &c, 1, NULL); |
445 | 0 | if (e) { |
446 | 0 | if (e == +1) ret = 1; /* EOF is not an error here. */ |
447 | 0 | goto end; |
448 | 0 | } |
449 | 0 | if (c == '>' || c == ' ') break; |
450 | 0 | if (str_catc(alloc, &out->name, c)) goto end; |
451 | 0 | } |
452 | 0 | if (c == ' ') { |
453 | | |
454 | | /* Read attributes. */ |
455 | 0 | for(;;) { |
456 | | |
457 | | /* Read attribute name. */ |
458 | 0 | for(;;) { |
459 | 0 | if (s_next(buffer, &ret, &c)) goto end; |
460 | 0 | if (c == '=' || c == '>' || c == ' ') break; |
461 | 0 | if (str_catc(alloc, &attribute_name, c)) goto end; |
462 | 0 | } |
463 | 0 | if (c == '>') break; |
464 | | |
465 | 0 | if (c == '=') { |
466 | | /* Read attribute value. */ |
467 | 0 | int quote_single = 0; |
468 | 0 | int quote_double = 0; |
469 | 0 | size_t l; |
470 | 0 | if (str_catl( alloc, &attribute_value, NULL, 0)) goto end; |
471 | 0 | for(;;) { |
472 | 0 | if (s_next(buffer, &ret, &c)) goto end; |
473 | 0 | if (c == '\'') quote_single = !quote_single; |
474 | 0 | else if (c == '"') quote_double = !quote_double; |
475 | 0 | else if (!quote_single && !quote_double |
476 | 0 | && (c == ' ' || c == '/' || c == '>') |
477 | 0 | ) { |
478 | | /* We are at end of attribute value. */ |
479 | 0 | break; |
480 | 0 | } |
481 | 0 | else if (c == '\\') { |
482 | | // Escape next character. |
483 | 0 | if (s_next(buffer, &ret, &c)) goto end; |
484 | 0 | } |
485 | 0 | if (str_catc(alloc, &attribute_value, c)) goto end; |
486 | 0 | } |
487 | | |
488 | | /* Remove any enclosing quotes. */ |
489 | 0 | l = strlen(attribute_value); |
490 | 0 | if (l >= 2) { |
491 | 0 | if ( |
492 | 0 | (attribute_value[0] == '"' && attribute_value[l-1] == '"') |
493 | 0 | || |
494 | 0 | (attribute_value[0] == '\'' && attribute_value[l-1] == '\'') |
495 | 0 | ) { |
496 | 0 | memmove(attribute_value, attribute_value+1, l-2); |
497 | 0 | attribute_value[l-2] = 0; |
498 | 0 | } |
499 | 0 | } |
500 | 0 | } |
501 | | |
502 | | /* Ensure name and value are not NULL. */ |
503 | 0 | if (str_catl( alloc, &attribute_name, NULL, 0)) goto end; |
504 | 0 | if (str_catl( alloc, &attribute_value, NULL, 0)) goto end; |
505 | | |
506 | 0 | if (extract_xml_tag_attributes_append(alloc, out, attribute_name, attribute_value)) goto end; |
507 | 0 | attribute_name = NULL; |
508 | 0 | attribute_value = NULL; |
509 | 0 | if (c == '/') { |
510 | 0 | if (s_next(buffer, &ret, &c)) goto end; |
511 | 0 | } |
512 | 0 | if (c == '>') break; |
513 | 0 | } |
514 | 0 | } |
515 | | |
516 | | /* Read plain text until next '<'. */ |
517 | 0 | for(;;) { |
518 | | /* We don't use s_next() here because EOF is not an error. */ |
519 | 0 | int e = extract_buffer_read(buffer, &c, 1, NULL); |
520 | 0 | if (e == +1) { |
521 | 0 | break; /* EOF is not an error here. */ |
522 | 0 | } |
523 | 0 | if (e) goto end; |
524 | 0 | if (c == '<') break; |
525 | 0 | if (extract_astring_catc(alloc, &out->text, c)) goto end; |
526 | 0 | } |
527 | | |
528 | 0 | ret = 0; |
529 | 0 | end: |
530 | |
|
531 | 0 | extract_free(alloc, &attribute_name); |
532 | 0 | extract_free(alloc, &attribute_value); |
533 | 0 | if (ret) { |
534 | 0 | extract_xml_tag_free(alloc, out); |
535 | 0 | } |
536 | |
|
537 | 0 | return ret; |
538 | 0 | } |