Coverage Report

Created: 2025-12-03 06:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mysql-server/strings/xml.cc
Line
Count
Source
1
/* Copyright (c) 2003, 2025, Oracle and/or its affiliates.
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License, version 2.0,
5
   as published by the Free Software Foundation.
6
7
   This program is designed to work with certain software (including
8
   but not limited to OpenSSL) that is licensed under separate terms,
9
   as designated in a particular file or component or in included license
10
   documentation.  The authors of MySQL hereby grant you an additional
11
   permission to link the program and your derivative works with the
12
   separately licensed software that they have either included with
13
   the program or referenced in the documentation.
14
15
   Without limiting anything contained in the foregoing, this file,
16
   which is part of C Driver for MySQL (Connector/C), is also subject to the
17
   Universal FOSS Exception, version 1.0, a copy of which can be found at
18
   http://oss.oracle.com/licenses/universal-foss-exception.
19
20
   This program is distributed in the hope that it will be useful,
21
   but WITHOUT ANY WARRANTY; without even the implied warranty of
22
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
   GNU General Public License, version 2.0, for more details.
24
25
   You should have received a copy of the GNU General Public License
26
   along with this program; if not, write to the Free Software
27
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
28
29
#include <cstdint>
30
#include <cstdio>
31
#include <cstring>
32
#include <limits>
33
34
#include "my_xml.h"
35
#include "string_with_len.h"
36
#include "strings/str_alloc.h"
37
38
0
#define MY_XML_UNKNOWN 'U'
39
0
#define MY_XML_EOF 'E'
40
0
#define MY_XML_STRING 'S'
41
0
#define MY_XML_IDENT 'I'
42
0
#define MY_XML_EQ '='
43
0
#define MY_XML_LT '<'
44
0
#define MY_XML_GT '>'
45
0
#define MY_XML_SLASH '/'
46
0
#define MY_XML_COMMENT 'C'
47
0
#define MY_XML_TEXT 'T'
48
0
#define MY_XML_QUESTION '?'
49
0
#define MY_XML_EXCLAM '!'
50
0
#define MY_XML_CDATA 'D'
51
52
struct MY_XML_ATTR {
53
  const char *beg;
54
  const char *end;
55
};
56
57
/*
58
  XML ctype:
59
*/
60
0
#define MY_XML_ID0 0x01 /* Identifier initial character */
61
0
#define MY_XML_ID1 0x02 /* Identifier medial  character */
62
0
#define MY_XML_SPC 0x08 /* Spacing character */
63
64
/*
65
 http://www.w3.org/TR/REC-xml/
66
 [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
67
                  CombiningChar | Extender
68
 [5] Name ::= (Letter | '_' | ':') (NameChar)*
69
*/
70
71
static char my_xml_ctype[256] = {
72
    /*00*/ 0, 0, 0, 0, 0, 0, 0, 0,
73
    0,        8, 8, 0, 0, 8, 0, 0,
74
    /*10*/ 0, 0, 0, 0, 0, 0, 0, 0,
75
    0,        0, 0, 0, 0, 0, 0, 0,
76
    /*20*/ 8, 0, 0, 0, 0, 0, 0, 0,
77
    0,        0, 0, 0, 0, 2, 2, 0, /*  !"#$%&'()*+,-./ */
78
    /*30*/ 2, 2, 2, 2, 2, 2, 2, 2,
79
    2,        2, 3, 0, 0, 0, 0, 0, /* 0123456789:;<=>? */
80
    /*40*/ 0, 3, 3, 3, 3, 3, 3, 3,
81
    3,        3, 3, 3, 3, 3, 3, 3, /* @ABCDEFGHIJKLMNO */
82
    /*50*/ 3, 3, 3, 3, 3, 3, 3, 3,
83
    3,        3, 3, 0, 0, 0, 0, 3, /* PQRSTUVWXYZ[\]^_ */
84
    /*60*/ 0, 3, 3, 3, 3, 3, 3, 3,
85
    3,        3, 3, 3, 3, 3, 3, 3, /* `abcdefghijklmno */
86
    /*70*/ 3, 3, 3, 3, 3, 3, 3, 3,
87
    3,        3, 3, 0, 0, 0, 0, 0, /* pqrstuvwxyz{|}~  */
88
    /*80*/ 3, 3, 3, 3, 3, 3, 3, 3,
89
    3,        3, 3, 3, 3, 3, 3, 3,
90
    /*90*/ 3, 3, 3, 3, 3, 3, 3, 3,
91
    3,        3, 3, 3, 3, 3, 3, 3,
92
    /*A0*/ 3, 3, 3, 3, 3, 3, 3, 3,
93
    3,        3, 3, 3, 3, 3, 3, 3,
94
    /*B0*/ 3, 3, 3, 3, 3, 3, 3, 3,
95
    3,        3, 3, 3, 3, 3, 3, 3,
96
    /*C0*/ 3, 3, 3, 3, 3, 3, 3, 3,
97
    3,        3, 3, 3, 3, 3, 3, 3,
98
    /*D0*/ 3, 3, 3, 3, 3, 3, 3, 3,
99
    3,        3, 3, 3, 3, 3, 3, 3,
100
    /*E0*/ 3, 3, 3, 3, 3, 3, 3, 3,
101
    3,        3, 3, 3, 3, 3, 3, 3,
102
    /*F0*/ 3, 3, 3, 3, 3, 3, 3, 3,
103
    3,        3, 3, 3, 3, 3, 3, 3};
104
105
0
#define my_xml_is_space(c) (my_xml_ctype[(uint8_t)(c)] & MY_XML_SPC)
106
0
#define my_xml_is_id0(c) (my_xml_ctype[(uint8_t)(c)] & MY_XML_ID0)
107
0
#define my_xml_is_id1(c) (my_xml_ctype[(uint8_t)(c)] & MY_XML_ID1)
108
109
0
static const char *lex2str(int lex) {
110
0
  switch (lex) {
111
0
    case MY_XML_EOF:
112
0
      return "END-OF-INPUT";
113
0
    case MY_XML_STRING:
114
0
      return "STRING";
115
0
    case MY_XML_IDENT:
116
0
      return "IDENT";
117
0
    case MY_XML_CDATA:
118
0
      return "CDATA";
119
0
    case MY_XML_EQ:
120
0
      return "'='";
121
0
    case MY_XML_LT:
122
0
      return "'<'";
123
0
    case MY_XML_GT:
124
0
      return "'>'";
125
0
    case MY_XML_SLASH:
126
0
      return "'/'";
127
0
    case MY_XML_COMMENT:
128
0
      return "COMMENT";
129
0
    case MY_XML_TEXT:
130
0
      return "TEXT";
131
0
    case MY_XML_QUESTION:
132
0
      return "'?'";
133
0
    case MY_XML_EXCLAM:
134
0
      return "'!'";
135
0
  }
136
0
  return "unknown token";
137
0
}
138
139
0
static void my_xml_norm_text(MY_XML_ATTR *a) {
140
0
  for (; (a->beg < a->end) && my_xml_is_space(a->beg[0]); a->beg++)
141
0
    ;
142
0
  for (; (a->beg < a->end) && my_xml_is_space(a->end[-1]); a->end--)
143
0
    ;
144
0
}
145
146
static inline bool my_xml_parser_prefix_cmp(MY_XML_PARSER *p, const char *s,
147
0
                                            size_t slen) {
148
0
  return (p->cur + slen > p->end) || memcmp(p->cur, s, slen) != 0;
149
0
}
150
151
0
static int my_xml_scan(MY_XML_PARSER *p, MY_XML_ATTR *a) {
152
0
  int lex;
153
154
0
  for (; (p->cur < p->end) && my_xml_is_space(p->cur[0]); p->cur++)
155
0
    ;
156
157
0
  if (p->cur >= p->end) {
158
0
    a->beg = p->end;
159
0
    a->end = p->end;
160
0
    lex = MY_XML_EOF;
161
0
    goto ret;
162
0
  }
163
164
0
  a->beg = p->cur;
165
0
  a->end = p->cur;
166
167
0
  if (!my_xml_parser_prefix_cmp(p, STRING_WITH_LEN("<!--"))) {
168
0
    for (; p->cur < p->end; p->cur++) {
169
0
      if (!my_xml_parser_prefix_cmp(p, STRING_WITH_LEN("-->"))) {
170
0
        p->cur += 3;
171
0
        break;
172
0
      }
173
0
    }
174
0
    a->end = p->cur;
175
0
    lex = MY_XML_COMMENT;
176
0
  } else if (!my_xml_parser_prefix_cmp(p, STRING_WITH_LEN("<![CDATA["))) {
177
0
    p->cur += 9;
178
0
    for (; p->cur < p->end - 2; p->cur++) {
179
0
      if (p->cur[0] == ']' && p->cur[1] == ']' && p->cur[2] == '>') {
180
0
        p->cur += 3;
181
0
        a->end = p->cur;
182
0
        break;
183
0
      }
184
0
    }
185
0
    lex = MY_XML_CDATA;
186
0
  } else if (strchr("?=/<>!", p->cur[0])) {
187
0
    p->cur++;
188
0
    a->end = p->cur;
189
0
    lex = a->beg[0];
190
0
  } else if ((p->cur[0] == '"') || (p->cur[0] == '\'')) {
191
    /*
192
      "string" or 'string' found.
193
      Scan until the closing quote/doublequote, or until the END-OF-INPUT.
194
    */
195
0
    p->cur++;
196
0
    for (; (p->cur < p->end) && (p->cur[0] != a->beg[0]); p->cur++) {
197
0
    }
198
0
    a->end = p->cur;
199
0
    if (p->cur < p->end) /* Closing quote or doublequote has been found */
200
0
      p->cur++;
201
0
    a->beg++;
202
0
    if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION)) my_xml_norm_text(a);
203
0
    lex = MY_XML_STRING;
204
0
  } else if (my_xml_is_id0(p->cur[0])) {
205
0
    p->cur++;
206
0
    while (p->cur < p->end && my_xml_is_id1(p->cur[0])) p->cur++;
207
0
    a->end = p->cur;
208
0
    my_xml_norm_text(a);
209
0
    lex = MY_XML_IDENT;
210
0
  } else
211
0
    lex = MY_XML_UNKNOWN;
212
213
#if 0
214
  printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg);
215
#endif
216
217
0
ret:
218
0
  return lex;
219
0
}
220
221
0
static int my_xml_value(MY_XML_PARSER *st, const char *str, size_t len) {
222
0
  return (st->value) ? (st->value)(st, str, len) : MY_XML_OK;
223
0
}
224
225
/**
226
  Ensure the attr buffer is wide enough to hold the new value
227
228
  Expand and/or allocate dynamic buffer as needed to hold the concatenated
229
  path and the terminating zero.
230
231
  @param st   the parser instance
232
  @param len  the length of the attribute to be added
233
  @return state
234
  @retval 1  failed
235
  @retval 0  success
236
*/
237
0
static int my_xml_attr_ensure_space(MY_XML_PARSER *st, size_t len) {
238
0
  size_t const ofs = st->attr.end - st->attr.start;
239
0
  len++;  // Add terminating zero.
240
0
  if (ofs + len > st->attr.buffer_size) {
241
0
    st->attr.buffer_size =
242
0
        (std::numeric_limits<size_t>::max() - len) / 2 > st->attr.buffer_size
243
0
            ? st->attr.buffer_size * 2 + len
244
0
            : std::numeric_limits<size_t>::max();
245
246
0
    if (!st->attr.buffer) {
247
0
      st->attr.buffer = (char *)my_str_malloc(st->attr.buffer_size);
248
0
      if (st->attr.buffer)
249
0
        memcpy(st->attr.buffer, st->attr.static_buffer,
250
0
               ofs + 1 /*term. zero */);
251
0
    } else
252
0
      st->attr.buffer =
253
0
          (char *)my_str_realloc(st->attr.buffer, st->attr.buffer_size);
254
0
    st->attr.start = st->attr.buffer;
255
0
    st->attr.end = st->attr.start + ofs;
256
257
0
    return st->attr.buffer ? MY_XML_OK : MY_XML_ERROR;
258
0
  }
259
0
  return MY_XML_OK;
260
0
}
261
262
/** rewind the attr buffer to initial state */
263
0
static void my_xml_attr_rewind(MY_XML_PARSER *p) {
264
  /* keep the buffer already allocated */
265
0
  p->attr.end = p->attr.start;
266
0
}
267
268
0
static int my_xml_enter(MY_XML_PARSER *st, const char *str, size_t len) {
269
0
  if (my_xml_attr_ensure_space(st, len + 1 /* the separator char */))
270
0
    return MY_XML_ERROR;
271
272
0
  if (st->attr.end > st->attr.start) {
273
0
    st->attr.end[0] = '/';
274
0
    st->attr.end++;
275
0
  }
276
0
  memcpy(st->attr.end, str, len);
277
0
  st->attr.end += len;
278
0
  st->attr.end[0] = '\0';
279
0
  if (st->flags & MY_XML_FLAG_RELATIVE_NAMES)
280
0
    return st->enter ? st->enter(st, str, len) : MY_XML_OK;
281
0
  return st->enter
282
0
             ? st->enter(st, st->attr.start, st->attr.end - st->attr.start)
283
0
             : MY_XML_OK;
284
0
}
285
286
0
static void mstr(char *s, const char *src, size_t l1, size_t l2) {
287
0
  l1 = l1 < l2 ? l1 : l2;
288
0
  memcpy(s, src, l1);
289
0
  s[l1] = '\0';
290
0
}
291
292
0
static int my_xml_leave(MY_XML_PARSER *p, const char *str, size_t slen) {
293
0
  char *e;
294
0
  size_t glen;
295
0
  char s[32];
296
0
  char g[32];
297
0
  int rc;
298
299
  /* Find previous '/' or beginning */
300
0
  for (e = p->attr.end; (e > p->attr.start) && (e[0] != '/'); e--)
301
0
    ;
302
0
  glen = (size_t)((e[0] == '/') ? (p->attr.end - e - 1) : p->attr.end - e);
303
304
0
  if (str && (slen != glen)) {
305
0
    mstr(s, str, sizeof(s) - 1, slen);
306
0
    if (glen) {
307
0
      mstr(g, e + 1, sizeof(g) - 1, glen),
308
0
          sprintf(p->errstr, "'</%s>' unexpected ('</%s>' wanted)", s, g);
309
0
    } else
310
0
      sprintf(p->errstr, "'</%s>' unexpected (END-OF-INPUT wanted)", s);
311
0
    return MY_XML_ERROR;
312
0
  }
313
314
0
  if (p->flags & MY_XML_FLAG_RELATIVE_NAMES)
315
0
    rc = p->leave_xml ? p->leave_xml(p, str, slen) : MY_XML_OK;
316
0
  else
317
0
    rc = (p->leave_xml
318
0
              ? p->leave_xml(p, p->attr.start, p->attr.end - p->attr.start)
319
0
              : MY_XML_OK);
320
321
0
  *e = '\0';
322
0
  p->attr.end = e;
323
324
0
  return rc;
325
0
}
326
327
0
int my_xml_parse(MY_XML_PARSER *p, const char *str, size_t len) {
328
0
  my_xml_attr_rewind(p);
329
330
0
  p->beg = str;
331
0
  p->cur = str;
332
0
  p->end = str + len;
333
334
0
  while (p->cur < p->end) {
335
0
    MY_XML_ATTR a;
336
0
    if (p->cur[0] == '<') {
337
0
      int lex;
338
0
      int question = 0;
339
0
      int exclam = 0;
340
341
0
      lex = my_xml_scan(p, &a);
342
343
0
      if (MY_XML_COMMENT == lex) continue;
344
345
0
      if (lex == MY_XML_CDATA) {
346
0
        a.beg += 9;
347
0
        a.end -= 3;
348
0
        my_xml_value(p, a.beg, (size_t)(a.end - a.beg));
349
0
        continue;
350
0
      }
351
352
0
      lex = my_xml_scan(p, &a);
353
354
0
      if (MY_XML_SLASH == lex) {
355
0
        if (MY_XML_IDENT != (lex = my_xml_scan(p, &a))) {
356
0
          sprintf(p->errstr, "%s unexpected (ident wanted)", lex2str(lex));
357
0
          return MY_XML_ERROR;
358
0
        }
359
0
        if (MY_XML_OK != my_xml_leave(p, a.beg, (size_t)(a.end - a.beg)))
360
0
          return MY_XML_ERROR;
361
0
        lex = my_xml_scan(p, &a);
362
0
        goto gt;
363
0
      }
364
365
0
      if (MY_XML_EXCLAM == lex) {
366
0
        lex = my_xml_scan(p, &a);
367
0
        exclam = 1;
368
0
      } else if (MY_XML_QUESTION == lex) {
369
0
        lex = my_xml_scan(p, &a);
370
0
        question = 1;
371
0
      }
372
373
0
      if (MY_XML_IDENT == lex) {
374
0
        p->current_node_type = MY_XML_NODE_TAG;
375
0
        if (MY_XML_OK != my_xml_enter(p, a.beg, (size_t)(a.end - a.beg)))
376
0
          return MY_XML_ERROR;
377
0
      } else {
378
0
        sprintf(p->errstr, "%s unexpected (ident or '/' wanted)", lex2str(lex));
379
0
        return MY_XML_ERROR;
380
0
      }
381
382
0
      while ((MY_XML_IDENT == (lex = my_xml_scan(p, &a))) ||
383
0
             ((MY_XML_STRING == lex && exclam))) {
384
0
        MY_XML_ATTR b;
385
0
        if (MY_XML_EQ == (lex = my_xml_scan(p, &b))) {
386
0
          lex = my_xml_scan(p, &b);
387
0
          if ((lex == MY_XML_IDENT) || (lex == MY_XML_STRING)) {
388
0
            p->current_node_type = MY_XML_NODE_ATTR;
389
0
            if ((MY_XML_OK !=
390
0
                 my_xml_enter(p, a.beg, (size_t)(a.end - a.beg))) ||
391
0
                (MY_XML_OK !=
392
0
                 my_xml_value(p, b.beg, (size_t)(b.end - b.beg))) ||
393
0
                (MY_XML_OK != my_xml_leave(p, a.beg, (size_t)(a.end - a.beg))))
394
0
              return MY_XML_ERROR;
395
0
          } else {
396
0
            sprintf(p->errstr, "%s unexpected (ident or string wanted)",
397
0
                    lex2str(lex));
398
0
            return MY_XML_ERROR;
399
0
          }
400
0
        } else if (MY_XML_IDENT == lex) {
401
0
          p->current_node_type = MY_XML_NODE_ATTR;
402
0
          if ((MY_XML_OK != my_xml_enter(p, a.beg, (size_t)(a.end - a.beg))) ||
403
0
              (MY_XML_OK != my_xml_leave(p, a.beg, (size_t)(a.end - a.beg))))
404
0
            return MY_XML_ERROR;
405
0
        } else if ((MY_XML_STRING == lex) && exclam) {
406
          /*
407
            We are in <!DOCTYPE>, e.g.
408
            <!DOCTYPE name SYSTEM "SystemLiteral">
409
            <!DOCTYPE name PUBLIC "PublidLiteral" "SystemLiteral">
410
            Just skip "SystemLiteral" and "PublicidLiteral"
411
          */
412
0
        } else
413
0
          break;
414
0
      }
415
416
0
      if (lex == MY_XML_SLASH) {
417
0
        if (MY_XML_OK != my_xml_leave(p, nullptr, 0)) return MY_XML_ERROR;
418
0
        lex = my_xml_scan(p, &a);
419
0
      }
420
421
0
    gt:
422
0
      if (question) {
423
0
        if (lex != MY_XML_QUESTION) {
424
0
          sprintf(p->errstr, "%s unexpected ('?' wanted)", lex2str(lex));
425
0
          return MY_XML_ERROR;
426
0
        }
427
0
        if (MY_XML_OK != my_xml_leave(p, nullptr, 0)) return MY_XML_ERROR;
428
0
        lex = my_xml_scan(p, &a);
429
0
      }
430
431
0
      if (exclam) {
432
0
        if (MY_XML_OK != my_xml_leave(p, nullptr, 0)) return MY_XML_ERROR;
433
0
      }
434
435
0
      if (lex != MY_XML_GT) {
436
0
        sprintf(p->errstr, "%s unexpected ('>' wanted)", lex2str(lex));
437
0
        return MY_XML_ERROR;
438
0
      }
439
0
    } else {
440
0
      a.beg = p->cur;
441
0
      for (; (p->cur < p->end) && (p->cur[0] != '<'); p->cur++)
442
0
        ;
443
0
      a.end = p->cur;
444
445
0
      if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION))
446
0
        my_xml_norm_text(&a);
447
0
      if (a.beg != a.end) {
448
0
        my_xml_value(p, a.beg, (size_t)(a.end - a.beg));
449
0
      }
450
0
    }
451
0
  }
452
453
0
  if (p->attr.start[0]) {
454
0
    sprintf(p->errstr, "unexpected END-OF-INPUT");
455
0
    return MY_XML_ERROR;
456
0
  }
457
0
  return MY_XML_OK;
458
0
}
459
460
0
void my_xml_parser_create(MY_XML_PARSER *p) {
461
0
  memset(p, 0, sizeof(p[0]));
462
  /*
463
    Use static buffer while it's sufficient.
464
  */
465
0
  p->attr.start = p->attr.end = p->attr.static_buffer;
466
0
  p->attr.buffer_size = sizeof(p->attr.static_buffer);
467
0
}
468
469
0
void my_xml_parser_free(MY_XML_PARSER *p) {
470
0
  if (p->attr.buffer) {
471
0
    my_str_free(p->attr.buffer);
472
0
    p->attr.buffer = nullptr;
473
0
  }
474
0
}
475
476
void my_xml_set_value_handler(MY_XML_PARSER *p,
477
                              int (*action)(MY_XML_PARSER *p, const char *s,
478
0
                                            size_t l)) {
479
0
  p->value = action;
480
0
}
481
482
void my_xml_set_enter_handler(MY_XML_PARSER *p,
483
                              int (*action)(MY_XML_PARSER *p, const char *s,
484
0
                                            size_t l)) {
485
0
  p->enter = action;
486
0
}
487
488
void my_xml_set_leave_handler(MY_XML_PARSER *p,
489
                              int (*action)(MY_XML_PARSER *p, const char *s,
490
0
                                            size_t l)) {
491
0
  p->leave_xml = action;
492
0
}
493
494
0
void my_xml_set_user_data(MY_XML_PARSER *p, void *user_data) {
495
0
  p->user_data = user_data;
496
0
}
497
498
0
const char *my_xml_error_string(MY_XML_PARSER *p) { return p->errstr; }
499
500
0
size_t my_xml_error_pos(MY_XML_PARSER *p) {
501
0
  const char *beg = p->beg;
502
0
  const char *s;
503
0
  for (s = p->cur - 1; s > p->beg - 1; s--) {
504
0
    if (s[0] == '\n') {
505
0
      beg = s;
506
0
      break;
507
0
    }
508
0
  }
509
0
  return (size_t)(p->cur - beg);
510
0
}
511
512
0
unsigned my_xml_error_lineno(MY_XML_PARSER *st) {
513
0
  unsigned res = 0;
514
0
  for (const char *s = st->beg; s < st->cur; s++) {
515
0
    if (s[0] == '\n') res++;
516
0
  }
517
0
  return res;
518
0
}