Coverage Report

Created: 2025-01-11 06:55

/src/mupdf/thirdparty/extract/src/xml.c
Line
Count
Source (jump to first uncovered line)
1
#include "extract/alloc.h"
2
3
#include "mem.h"
4
#include "outf.h"
5
#include "xml.h"
6
7
#include <assert.h>
8
#include <errno.h>
9
#include <float.h>
10
#include <limits.h>
11
12
#include "compat_stdint.h"
13
14
#include <stdlib.h>
15
#include <string.h>
16
17
18
/* These str_*() functions realloc buffer as required. All return 0 or -1 with
19
errno set. */
20
21
/* Appends first <s_len> chars of string <s> to *p. */
22
static int str_catl(extract_alloc_t *alloc, char **p, const char *s, int s_len)
23
0
{
24
0
  size_t p_len = (*p) ? strlen(*p) : 0;
25
26
0
  if (extract_realloc2(alloc,
27
0
             p,
28
0
             p_len + 1,
29
0
             p_len + s_len + 1)) return -1;
30
0
  memcpy(*p + p_len, s, s_len);
31
0
  (*p)[p_len + s_len] = 0;
32
33
0
  return 0;
34
0
}
35
36
/* Appends a char.  */
37
static int str_catc(extract_alloc_t *alloc, char **p, char c)
38
0
{
39
0
  return str_catl(alloc, p, &c, 1);
40
0
}
41
42
/* Unused but useful to keep code here. */
43
#if 0
44
/* Appends a string. */
45
static int str_cat(extract_alloc_t *alloc, char **p, const char *s)
46
{
47
  return str_catl(alloc, p, s, strlen(s));
48
}
49
#endif
50
51
char *extract_xml_tag_attributes_find(extract_xml_tag_t *tag, const char *name)
52
0
{
53
0
  int i;
54
55
0
  for (i=0; i<tag->attributes_num; ++i) {
56
0
    if (!strcmp(tag->attributes[i].name, name)) {
57
0
      char* ret = tag->attributes[i].value;
58
0
      return ret;
59
0
    }
60
0
  }
61
0
  outf("Failed to find attribute '%s'",name);
62
63
0
  return NULL;
64
0
}
65
66
int extract_xml_tag_attributes_find_float(
67
    extract_xml_tag_t *tag,
68
    const char        *name,
69
    float             *o_out)
70
0
{
71
0
  const char *value = extract_xml_tag_attributes_find(tag, name);
72
73
0
  if (!value) {
74
0
    errno = ESRCH;
75
0
    return -1;
76
0
  }
77
0
  if (extract_xml_str_to_float(value, o_out)) return -1;
78
79
0
  return 0;
80
0
}
81
82
int extract_xml_tag_attributes_find_double(
83
    extract_xml_tag_t *tag,
84
    const char        *name,
85
    double            *o_out)
86
0
{
87
0
  const char *value = extract_xml_tag_attributes_find(tag, name);
88
89
0
  if (!value) {
90
0
    errno = ESRCH;
91
0
    return -1;
92
0
  }
93
0
  if (extract_xml_str_to_double(value, o_out)) return -1;
94
95
0
  return 0;
96
0
}
97
98
int extract_xml_tag_attributes_find_int(
99
    extract_xml_tag_t *tag,
100
    const char        *name,
101
    int               *o_out)
102
0
{
103
0
  const char *text = extract_xml_tag_attributes_find(tag, name);
104
105
0
  return extract_xml_str_to_int(text, o_out);
106
0
}
107
108
int extract_xml_tag_attributes_find_uint(
109
    extract_xml_tag_t *tag,
110
    const char        *name,
111
    unsigned          *o_out)
112
0
{
113
0
  const char *text = extract_xml_tag_attributes_find(tag, name);
114
115
0
  return extract_xml_str_to_uint(text, o_out);
116
0
}
117
118
int extract_xml_tag_attributes_find_size(
119
    extract_xml_tag_t *tag,
120
    const char        *name,
121
    size_t            *o_out)
122
0
{
123
0
  const char *text = extract_xml_tag_attributes_find(tag, name);
124
125
0
  return extract_xml_str_to_size(text, o_out);
126
0
}
127
128
int extract_xml_str_to_llint(const char *text, long long*o_out)
129
0
{
130
0
  char      *endptr;
131
0
  long long  x;
132
133
0
  if (!text) {
134
0
    errno = ESRCH;
135
0
    return -1;
136
0
  }
137
0
  if (text[0] == 0) {
138
0
    errno = EINVAL;
139
0
    return -1;
140
0
  }
141
0
  errno = 0;
142
0
  x = strtoll(text, &endptr, 10 /*base*/);
143
0
  if (errno) {
144
0
    return -1;
145
0
  }
146
0
  if (*endptr) {
147
0
    errno = EINVAL;
148
0
    return -1;
149
0
  }
150
0
  *o_out = x;
151
152
0
  return 0;
153
0
}
154
155
int extract_xml_str_to_ullint(const char *text, unsigned long long *o_out)
156
0
{
157
0
  char               *endptr;
158
0
  unsigned long long  x;
159
160
0
  if (!text) {
161
0
    errno = ESRCH;
162
0
    return -1;
163
0
  }
164
0
  if (text[0] == 0) {
165
0
    errno = EINVAL;
166
0
    return -1;
167
0
  }
168
0
  errno = 0;
169
0
  x = strtoull(text, &endptr, 10 /*base*/);
170
0
  if (errno) {
171
0
    return -1;
172
0
  }
173
0
  if (*endptr) {
174
0
    errno = EINVAL;
175
0
    return -1;
176
0
  }
177
0
  *o_out = x;
178
179
0
  return 0;
180
0
}
181
182
int extract_xml_str_to_int(const char *text, int *o_out)
183
0
{
184
0
  long long x;
185
186
0
  if (extract_xml_str_to_llint(text, &x)) return -1;
187
0
  if (x > INT_MAX || x < INT_MIN) {
188
0
    errno = ERANGE;
189
0
    return -1;
190
0
  }
191
0
  *o_out = (int) x;
192
193
0
  return 0;
194
0
}
195
196
int extract_xml_str_to_uint(const char *text, unsigned *o_out)
197
0
{
198
0
  unsigned long long x;
199
200
0
  if (extract_xml_str_to_ullint(text, &x)) return -1;
201
0
  if (x > UINT_MAX) {
202
0
    errno = ERANGE;
203
0
    return -1;
204
0
  }
205
0
  *o_out = (unsigned) x;
206
207
0
  return 0;
208
0
}
209
210
int extract_xml_str_to_size(const char *text, size_t *o_out)
211
0
{
212
0
  unsigned long long x;
213
214
0
  if (extract_xml_str_to_ullint(text, &x)) return -1;
215
0
  if (x > SIZE_MAX) {
216
0
    errno = ERANGE;
217
0
    return -1;
218
0
  }
219
0
  *o_out = (size_t) x;
220
221
0
  return 0;
222
0
}
223
224
int extract_xml_str_to_double(const char *text, double *o_out)
225
0
{
226
0
  char   *endptr;
227
0
  double  x;
228
229
0
  if (!text) {
230
0
    errno = ESRCH;
231
0
    return -1;
232
0
  }
233
0
  if (text[0] == 0) {
234
0
    errno = EINVAL;
235
0
    return -1;
236
0
  }
237
0
  errno = 0;
238
0
  x = strtod(text, &endptr);
239
0
  if (errno) {
240
0
    return -1;
241
0
  }
242
0
  if (*endptr) {
243
0
    errno = EINVAL;
244
0
    return -1;
245
0
  }
246
0
  *o_out = x;
247
248
0
  return 0;
249
0
}
250
251
int extract_xml_str_to_float(const char *text, float *o_out)
252
0
{
253
0
  double x;
254
255
0
  if (extract_xml_str_to_double(text, &x)) {
256
0
    return -1;
257
0
  }
258
0
  if (x > FLT_MAX || x < -FLT_MAX) {
259
0
    errno = ERANGE;
260
0
    return -1;
261
0
  }
262
0
  *o_out = (float) x;
263
264
0
  return 0;
265
0
}
266
267
static int
268
extract_xml_tag_attributes_append(
269
    extract_alloc_t   *alloc,
270
    extract_xml_tag_t *tag,
271
    char              *name,
272
    char              *value)
273
0
{
274
0
  if (extract_realloc2(alloc,
275
0
             &tag->attributes,
276
0
             sizeof(extract_xml_attribute_t) * tag->attributes_num,
277
0
             sizeof(extract_xml_attribute_t) * (tag->attributes_num+1)))
278
0
  {
279
0
    return -1;
280
0
  }
281
0
  tag->attributes[tag->attributes_num].name = name;
282
0
  tag->attributes[tag->attributes_num].value = value;
283
0
  tag->attributes_num += 1;
284
285
0
  return 0;
286
0
}
287
288
void extract_xml_tag_init(extract_xml_tag_t *tag)
289
0
{
290
0
  tag->name = NULL;
291
0
  tag->attributes = NULL;
292
0
  tag->attributes_num = 0;
293
0
  extract_astring_init(&tag->text);
294
0
}
295
296
void extract_xml_tag_free(extract_alloc_t *alloc, extract_xml_tag_t *tag)
297
0
{
298
0
  int i;
299
300
0
  if (tag == NULL)
301
0
    return;
302
303
0
  extract_free(alloc, &tag->name);
304
0
  for (i=0; i<tag->attributes_num; ++i) {
305
0
    extract_xml_attribute_t* attribute = &tag->attributes[i];
306
0
    extract_free(alloc, &attribute->name);
307
0
    extract_free(alloc, &attribute->value);
308
0
  }
309
0
  extract_free(alloc, &tag->attributes);
310
0
  extract_astring_free(alloc, &tag->text);
311
0
  extract_xml_tag_init(tag);
312
0
}
313
314
/* Unused but useful to keep code here. */
315
#if 0
316
/* Like strcmp() but also handles NULL. */
317
static int extract_xml_strcmp_null(const char *a, const char *b)
318
{
319
  if (!a && !b) return 0;
320
  if (!a) return -1;
321
  if (!b) return 1;
322
  return strcmp(a, b);
323
}
324
#endif
325
326
/* Unused but useful to keep code here. */
327
#if 0
328
/* Compares tag name, then attributes; returns -1, 0 or +1. Does not compare
329
extract_xml_tag_t::text members. */
330
int extract_xml_compare_tags(const extract_xml_tag_t *lhs, const extract_xml_tag_t *rhs)
331
{
332
  int d;
333
  int i;
334
  d = extract_xml_strcmp_null(lhs->name, rhs->name);
335
  if (d)  return d;
336
  for(i=0;; ++i) {
337
    if (i >= lhs->attributes_num || i >= rhs->attributes_num) {
338
      break;
339
    }
340
    const extract_xml_attribute_t* lhs_attribute = &lhs->attributes[i];
341
    const extract_xml_attribute_t* rhs_attribute = &rhs->attributes[i];
342
    d = extract_xml_strcmp_null(lhs_attribute->name, rhs_attribute->name);
343
    if (d)  return d;
344
    d = extract_xml_strcmp_null(lhs_attribute->value, rhs_attribute->value);
345
    if (d)  return d;
346
  }
347
  if (lhs->attributes_num > rhs->attributes_num) return +1;
348
  if (lhs->attributes_num < rhs->attributes_num) return -1;
349
  return 0;
350
}
351
#endif
352
353
354
int extract_xml_pparse_init(extract_alloc_t *alloc, extract_buffer_t *buffer, const char *first_line)
355
0
{
356
0
  char *first_line_buffer = NULL;
357
0
  int   e = -1;
358
359
0
  if (first_line) {
360
0
    size_t first_line_len = strlen(first_line);
361
0
    size_t actual;
362
0
    if (extract_malloc(alloc, &first_line_buffer, first_line_len + 1)) goto end;
363
364
0
    if (extract_buffer_read(buffer, first_line_buffer, first_line_len, &actual)) {
365
0
      outf("error: failed to read first line.");
366
0
      goto end;
367
0
    }
368
0
    first_line_buffer[actual] = 0;
369
0
    if (strcmp(first_line, first_line_buffer)) {
370
0
      outf("Unrecognised prefix: %s", first_line_buffer);
371
0
      errno = ESRCH;
372
0
      goto end;
373
0
    }
374
0
  }
375
376
0
  for(;;) {
377
0
    char c;
378
0
    int ee = extract_buffer_read(buffer, &c, 1, NULL);
379
0
    if (ee) {
380
0
      if (ee==1) errno = ESRCH;   /* EOF. */
381
0
      goto end;
382
0
    }
383
0
    if (c == '<') {
384
0
      break;
385
0
    }
386
0
    else if (c == ' ' || c == '\n') {}
387
0
    else {
388
0
      outf("Expected '<' but found c=%i", c);
389
0
      goto end;
390
0
    }
391
0
  }
392
393
0
  e = 0;
394
0
end:
395
396
0
  extract_free(alloc, &first_line_buffer);
397
398
0
  return e;
399
0
}
400
401
static int s_next(extract_buffer_t *buffer, int *ret, char *o_c)
402
/* Reads next char, but if EOF sets *ret=+1, errno=ESRCH and returns +1. */
403
0
{
404
0
  int e = extract_buffer_read(buffer, o_c, 1, NULL);
405
406
0
  if (e == +1) {
407
0
    *ret = +1;
408
0
    errno = ESRCH;
409
0
  }
410
411
0
  return e;
412
0
}
413
414
static const char *
415
extract_xml_tag_string(extract_alloc_t *alloc, extract_xml_tag_t *tag)
416
0
{
417
0
  static char *buffer = NULL;
418
0
419
0
  extract_free(alloc, &buffer);
420
0
  if (extract_asprintf(alloc, &buffer, "<name=%s>", tag->name ? tag->name : ""))
421
0
  {
422
0
    return "";
423
0
  }
424
0
425
0
  return buffer;
426
0
}
427
428
int extract_xml_pparse_next(extract_buffer_t *buffer, extract_xml_tag_t *out)
429
0
{
430
0
  int              ret = -1;
431
0
  char            *attribute_name = NULL;
432
0
  char            *attribute_value = NULL;
433
0
  char             c;
434
0
  extract_alloc_t *alloc = extract_buffer_alloc(buffer);
435
436
0
  if (0) outf("out is: %s", extract_xml_tag_string(extract_buffer_alloc(buffer), out));
437
0
  assert(buffer);
438
0
  extract_xml_tag_free(alloc, out);
439
440
  /* Read tag name. Initialise it to empty string so we never return
441
  out->name==null on success. */
442
0
  if (str_catl( alloc, &out->name, NULL, 0)) goto end;
443
0
  for(;;) {
444
0
    int e = extract_buffer_read(buffer, &c, 1, NULL);
445
0
    if (e) {
446
0
      if (e == +1) ret = 1;   /* EOF is not an error here. */
447
0
      goto end;
448
0
    }
449
0
    if (c == '>' || c == ' ')  break;
450
0
    if (str_catc(alloc, &out->name, c)) goto end;
451
0
  }
452
0
  if (c == ' ') {
453
454
    /* Read attributes. */
455
0
    for(;;) {
456
457
      /* Read attribute name. */
458
0
      for(;;) {
459
0
        if (s_next(buffer, &ret, &c)) goto end;
460
0
        if (c == '=' || c == '>' || c == ' ') break;
461
0
        if (str_catc(alloc, &attribute_name, c)) goto end;
462
0
      }
463
0
      if (c == '>') break;
464
465
0
      if (c == '=') {
466
        /* Read attribute value. */
467
0
        int quote_single = 0;
468
0
        int quote_double = 0;
469
0
        size_t l;
470
0
        if (str_catl( alloc, &attribute_value, NULL, 0)) goto end;
471
0
        for(;;) {
472
0
          if (s_next(buffer, &ret, &c)) goto end;
473
0
          if (c == '\'')      quote_single = !quote_single;
474
0
          else if (c == '"')  quote_double = !quote_double;
475
0
          else if (!quote_single && !quote_double
476
0
              && (c == ' ' || c == '/' || c == '>')
477
0
              ) {
478
            /* We are at end of attribute value. */
479
0
            break;
480
0
          }
481
0
          else if (c == '\\') {
482
            // Escape next character.
483
0
            if (s_next(buffer, &ret, &c)) goto end;
484
0
          }
485
0
          if (str_catc(alloc, &attribute_value, c)) goto end;
486
0
        }
487
488
        /* Remove any enclosing quotes. */
489
0
        l = strlen(attribute_value);
490
0
        if (l >= 2) {
491
0
          if (
492
0
              (attribute_value[0] == '"' && attribute_value[l-1] == '"')
493
0
              ||
494
0
              (attribute_value[0] == '\'' && attribute_value[l-1] == '\'')
495
0
              ) {
496
0
            memmove(attribute_value, attribute_value+1, l-2);
497
0
            attribute_value[l-2] = 0;
498
0
          }
499
0
        }
500
0
      }
501
502
      /* Ensure name and value are not NULL. */
503
0
      if (str_catl( alloc, &attribute_name, NULL, 0)) goto end;
504
0
      if (str_catl( alloc, &attribute_value, NULL, 0)) goto end;
505
506
0
      if (extract_xml_tag_attributes_append(alloc, out, attribute_name, attribute_value)) goto end;
507
0
      attribute_name = NULL;
508
0
      attribute_value = NULL;
509
0
      if (c == '/') {
510
0
        if (s_next(buffer, &ret, &c)) goto end;
511
0
      }
512
0
      if (c == '>') break;
513
0
    }
514
0
  }
515
516
  /* Read plain text until next '<'. */
517
0
  for(;;) {
518
    /* We don't use s_next() here because EOF is not an error. */
519
0
    int e = extract_buffer_read(buffer, &c, 1, NULL);
520
0
    if (e == +1) {
521
0
      break;   /* EOF is not an error here. */
522
0
    }
523
0
    if (e) goto end;
524
0
    if (c == '<') break;
525
0
    if (extract_astring_catc(alloc, &out->text, c)) goto end;
526
0
  }
527
528
0
  ret = 0;
529
0
end:
530
531
0
  extract_free(alloc, &attribute_name);
532
0
  extract_free(alloc, &attribute_value);
533
0
  if (ret) {
534
0
    extract_xml_tag_free(alloc, out);
535
0
  }
536
537
0
  return ret;
538
0
}