Coverage Report

Created: 2024-06-18 07:03

/src/server/strings/xml.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (c) 2003, 2011, Oracle and/or its affiliates.
2
   Copyright (c) 2011 Monty Program Ab
3
4
   This program is free software; you can redistribute it and/or modify
5
   it under the terms of the GNU General Public License as published by
6
   the Free Software Foundation; version 2 of the License.
7
8
   This program is distributed in the hope that it will be useful,
9
   but WITHOUT ANY WARRANTY; without even the implied warranty of
10
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
   GNU General Public License for more details.
12
13
   You should have received a copy of the GNU General Public License
14
   along with this program; if not, write to the Free Software
15
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA */
16
17
#include "strings_def.h"
18
#include "m_string.h"
19
#include "my_xml.h"
20
#include "my_sys.h"
21
22
23
0
#define MY_XML_UNKNOWN  'U'
24
0
#define MY_XML_EOF  'E'
25
0
#define MY_XML_STRING 'S'
26
0
#define MY_XML_IDENT  'I'
27
0
#define MY_XML_EQ '='
28
0
#define MY_XML_LT '<'
29
0
#define MY_XML_GT '>'
30
0
#define MY_XML_SLASH  '/'
31
0
#define MY_XML_COMMENT  'C'
32
0
#define MY_XML_TEXT 'T'
33
0
#define MY_XML_QUESTION '?'
34
0
#define MY_XML_EXCLAM   '!'
35
0
#define MY_XML_CDATA    'D'
36
37
typedef struct xml_attr_st
38
{
39
  const char *beg;
40
  const char *end;
41
} MY_XML_ATTR;
42
43
44
/*
45
  XML ctype:
46
*/
47
0
#define MY_XML_ID0  0x01 /* Identifier initial character */
48
0
#define MY_XML_ID1  0x02 /* Identifier medial  character */
49
0
#define MY_XML_SPC  0x08 /* Spacing character */
50
51
52
/*
53
 http://www.w3.org/TR/REC-xml/ 
54
 [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
55
                  CombiningChar | Extender
56
 [5] Name ::= (Letter | '_' | ':') (NameChar)*
57
*/
58
59
static char my_xml_ctype[256]=
60
{
61
/*00*/  0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0,
62
/*10*/  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
63
/*20*/  8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,  /*  !"#$%&'()*+,-./ */
64
/*30*/  2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0,  /* 0123456789:;<=>? */
65
/*40*/  0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,  /* @ABCDEFGHIJKLMNO */
66
/*50*/  3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3,  /* PQRSTUVWXYZ[\]^_ */
67
/*60*/  0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,  /* `abcdefghijklmno */
68
/*70*/  3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,  /* pqrstuvwxyz{|}~  */
69
/*80*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
70
/*90*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
71
/*A0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
72
/*B0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
73
/*C0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
74
/*D0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
75
/*E0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
76
/*F0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
77
};
78
79
0
#define my_xml_is_space(c)  (my_xml_ctype[(uchar) (c)] & MY_XML_SPC)
80
0
#define my_xml_is_id0(c)    (my_xml_ctype[(uchar) (c)] & MY_XML_ID0)
81
0
#define my_xml_is_id1(c)    (my_xml_ctype[(uchar) (c)] & MY_XML_ID1)
82
83
84
static const char *lex2str(int lex)
85
0
{
86
0
  switch(lex)
87
0
  {
88
0
    case MY_XML_EOF:      return "END-OF-INPUT";
89
0
    case MY_XML_STRING:   return "STRING";
90
0
    case MY_XML_IDENT:    return "IDENT";
91
0
    case MY_XML_CDATA:    return "CDATA";
92
0
    case MY_XML_EQ:       return "'='";
93
0
    case MY_XML_LT:       return "'<'";
94
0
    case MY_XML_GT:       return "'>'";
95
0
    case MY_XML_SLASH:    return "'/'";
96
0
    case MY_XML_COMMENT:  return "COMMENT";
97
0
    case MY_XML_TEXT:     return "TEXT";
98
0
    case MY_XML_QUESTION: return "'?'";
99
0
    case MY_XML_EXCLAM:   return "'!'";
100
0
  }
101
0
  return "unknown token";
102
0
}
103
104
static void my_xml_norm_text(MY_XML_ATTR *a)
105
0
{
106
0
  for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ );
107
0
  for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- );
108
0
}
109
110
111
static inline my_bool
112
my_xml_parser_prefix_cmp(MY_XML_PARSER *p, const char *s, size_t slen)
113
0
{
114
0
  return (p->cur + slen > p->end) || memcmp(p->cur, s, slen);
115
0
}
116
117
118
static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a)
119
0
{
120
0
  int lex;
121
  
122
0
  for (; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ;  p->cur++);
123
  
124
0
  if (p->cur >= p->end)
125
0
  {
126
0
    a->beg=p->end;
127
0
    a->end=p->end;
128
0
    lex=MY_XML_EOF;
129
0
    goto ret;
130
0
  }
131
  
132
0
  a->beg=p->cur;
133
0
  a->end=p->cur;
134
  
135
0
  if (!my_xml_parser_prefix_cmp(p, C_STRING_WITH_LEN("<!--")))
136
0
  {
137
0
    for (; p->cur < p->end; p->cur++)
138
0
    {
139
0
      if (!my_xml_parser_prefix_cmp(p, C_STRING_WITH_LEN("-->")))
140
0
      {
141
0
        p->cur+= 3;
142
0
        break;
143
0
      }
144
0
    }
145
0
    a->end=p->cur;
146
0
    lex=MY_XML_COMMENT;
147
0
  }
148
0
  else if (!my_xml_parser_prefix_cmp(p, C_STRING_WITH_LEN("<![CDATA[")))
149
0
  {
150
0
    p->cur+= 9;
151
0
    for (; p->cur < p->end - 2 ; p->cur++)
152
0
    {
153
0
      if (p->cur[0] == ']' && p->cur[1] == ']' && p->cur[2] == '>')
154
0
      {
155
0
        p->cur+= 3;
156
0
        a->end= p->cur;
157
0
        break;
158
0
      }
159
0
    }
160
0
    lex= MY_XML_CDATA;
161
0
  }
162
0
  else if (strchr("?=/<>!",p->cur[0]))
163
0
  {
164
0
    p->cur++;
165
0
    a->end=p->cur;
166
0
    lex=a->beg[0];
167
0
  }
168
0
  else if ( (p->cur[0] == '"') || (p->cur[0] == '\'') )
169
0
  {
170
    /*
171
      "string" or 'string' found.
172
      Scan until the closing quote/doublequote, or until the END-OF-INPUT.
173
    */
174
0
    p->cur++;
175
0
    for (; ( p->cur < p->end ) && (p->cur[0] != a->beg[0]); p->cur++)
176
0
    {}
177
0
    a->end=p->cur;
178
0
    if (p->cur < p->end) /* Closing quote or doublequote has been found */
179
0
      p->cur++;
180
0
    a->beg++;
181
0
    if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION))
182
0
      my_xml_norm_text(a);
183
0
    lex=MY_XML_STRING;
184
0
  }
185
0
  else if (my_xml_is_id0(p->cur[0]))
186
0
  {
187
0
    p->cur++;
188
0
    while (p->cur < p->end && my_xml_is_id1(p->cur[0]))
189
0
      p->cur++;
190
0
    a->end=p->cur;
191
0
    my_xml_norm_text(a);
192
0
    lex=MY_XML_IDENT;
193
0
  }
194
0
  else
195
0
    lex= MY_XML_UNKNOWN;
196
197
#if 0
198
  printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg);
199
#endif
200
201
0
ret:
202
0
  return lex;
203
0
}
204
205
206
static int my_xml_value(MY_XML_PARSER *st, const char *str, size_t len)
207
0
{
208
0
  return (st->value) ? (st->value)(st,str,len) : MY_XML_OK;
209
0
}
210
211
212
/**
213
  Ensure the attr buffer is wide enough to hold the new value
214
215
  Expand and/or allocate dynamic buffer as needed to hold the concatenated
216
  path and the terminating zero.
217
218
  @attr st   the parser instance
219
  @attr len  the length of the attribute to be added
220
  @return state
221
  @retval 1  failed
222
  @retval 0  success
223
*/
224
static int my_xml_attr_ensure_space(MY_XML_PARSER *st, size_t len)
225
0
{
226
0
  size_t ofs= st->attr.end - st->attr.start;
227
0
  len++; // Add terminating zero.
228
0
  if (ofs + len > st->attr.buffer_size)
229
0
  {
230
0
    st->attr.buffer_size= (SIZE_T_MAX - len) / 2 > st->attr.buffer_size ?
231
0
                            st->attr.buffer_size * 2 + len : SIZE_T_MAX;
232
233
0
    if (!st->attr.buffer)
234
0
    {
235
0
      st->attr.buffer= (char *) my_malloc(PSI_INSTRUMENT_ME, st->attr.buffer_size, MYF(0));
236
0
      if (st->attr.buffer)
237
0
        memcpy(st->attr.buffer, st->attr.static_buffer, ofs + 1 /*term. zero */);
238
0
    }
239
0
    else
240
0
      st->attr.buffer= (char *) my_realloc(PSI_INSTRUMENT_ME, st->attr.buffer,
241
0
                                           st->attr.buffer_size, MYF(0));
242
0
    st->attr.start= st->attr.buffer;
243
0
    st->attr.end= st->attr.start + ofs;
244
    
245
0
    return st->attr.buffer ? MY_XML_OK : MY_XML_ERROR;
246
0
  }
247
0
  return MY_XML_OK;
248
0
}
249
250
251
/** rewind the attr buffer to initial state */
252
static void my_xml_attr_rewind(MY_XML_PARSER *p)
253
0
{
254
  /* keep the buffer already allocated */
255
0
  p->attr.end= p->attr.start;
256
0
}
257
258
259
static int my_xml_enter(MY_XML_PARSER *st, const char *str, size_t len)
260
0
{
261
0
  if (my_xml_attr_ensure_space(st, len + 1 /* the separator char */))
262
0
    return MY_XML_ERROR;
263
264
0
  if (st->attr.end > st->attr.start)
265
0
  {
266
0
    st->attr.end[0]= '/';
267
0
    st->attr.end++;
268
0
  }
269
0
  memcpy(st->attr.end, str, len);
270
0
  st->attr.end+= len;
271
0
  st->attr.end[0]= '\0';
272
0
  if (st->flags & MY_XML_FLAG_RELATIVE_NAMES)
273
0
    return st->enter ? st->enter(st, str, len) : MY_XML_OK;
274
0
  else
275
0
    return st->enter ?
276
0
      st->enter(st, st->attr.start, st->attr.end - st->attr.start) : MY_XML_OK;
277
0
}
278
279
280
static void mstr(char *s,const char *src,size_t l1, size_t l2)
281
0
{
282
0
  l1 = l1<l2 ? l1 : l2;
283
0
  memcpy(s,src,l1);
284
0
  s[l1]='\0';
285
0
}
286
287
288
static int my_xml_leave(MY_XML_PARSER *p, const char *str, size_t slen)
289
0
{
290
0
  char *e, *tag;
291
0
  size_t glen;
292
0
  char s[32];
293
0
  char g[32];
294
0
  int  rc;
295
296
  /* Find previous '/' or beginning */
297
0
  for (e= p->attr.end; (e > p->attr.start) && (e[0] != '/') ; e--);
298
0
  glen= (size_t) ((e[0] == '/') ? (p->attr.end - e - 1) : p->attr.end - e);
299
0
  tag= e[0] == '/' ? e + 1 : e;
300
  
301
0
  if (str && (slen != glen || memcmp(str, tag, slen)))
302
0
  {
303
0
    mstr(s,str,sizeof(s)-1,slen);
304
0
    if (glen)
305
0
    {
306
0
      mstr(g, tag, sizeof(g)-1, glen);
307
0
      snprintf(p->errstr,sizeof(p->errstr),"'</%s>' unexpected ('</%s>' wanted)",s,g);
308
0
    }
309
0
    else
310
0
      snprintf(p->errstr,sizeof(p->errstr),"'</%s>' unexpected (END-OF-INPUT wanted)", s);
311
0
    return MY_XML_ERROR;
312
0
  }
313
  
314
0
  if (p->flags & MY_XML_FLAG_RELATIVE_NAMES)
315
0
    rc= p->leave_xml ? p->leave_xml(p, str, slen) : MY_XML_OK;
316
0
  else
317
0
    rc= (p->leave_xml ? 
318
0
         p->leave_xml(p, p->attr.start, p->attr.end - p->attr.start) :
319
0
         MY_XML_OK);
320
  
321
0
  *e='\0';
322
0
  p->attr.end= e;
323
  
324
0
  return rc;
325
0
}
326
327
328
int my_xml_parse(MY_XML_PARSER *p,const char *str, size_t len)
329
0
{
330
331
0
  my_xml_attr_rewind(p);
332
333
0
  p->beg=str;
334
0
  p->cur=str;
335
0
  p->end=str+len;
336
  
337
0
  while ( p->cur < p->end )
338
0
  {
339
0
    MY_XML_ATTR a;
340
0
    if (p->cur[0] == '<')
341
0
    {
342
0
      int lex;
343
0
      int question=0;
344
0
      int exclam=0;
345
      
346
0
      lex=my_xml_scan(p,&a);
347
      
348
0
      if (MY_XML_COMMENT == lex)
349
0
        continue;
350
      
351
0
      if (lex == MY_XML_CDATA)
352
0
      {
353
0
        a.beg+= 9;
354
0
        a.end-= 3;
355
0
        my_xml_value(p, a.beg, (size_t) (a.end-a.beg));
356
0
        continue;
357
0
      }
358
      
359
0
      lex=my_xml_scan(p,&a);
360
      
361
0
      if (MY_XML_SLASH == lex)
362
0
      {
363
0
        if (MY_XML_IDENT != (lex=my_xml_scan(p,&a)))
364
0
        {
365
0
          snprintf(p->errstr,sizeof(p->errstr),"%s unexpected (ident wanted)",lex2str(lex));
366
0
          return MY_XML_ERROR;
367
0
        }
368
0
        if (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg)))
369
0
          return MY_XML_ERROR;
370
0
        lex=my_xml_scan(p,&a);
371
0
        goto gt;
372
0
      }
373
      
374
0
      if (MY_XML_EXCLAM == lex)
375
0
      {
376
0
        lex=my_xml_scan(p,&a);
377
0
        exclam=1;
378
0
      }
379
0
      else if (MY_XML_QUESTION == lex)
380
0
      {
381
0
        lex=my_xml_scan(p,&a);
382
0
        question=1;
383
0
      }
384
      
385
0
      if (MY_XML_IDENT == lex)
386
0
      {
387
0
        p->current_node_type= MY_XML_NODE_TAG;
388
0
        if (MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg)))
389
0
          return MY_XML_ERROR;
390
0
      }
391
0
      else
392
0
      {
393
0
        snprintf(p->errstr,sizeof(p->errstr),"%s unexpected (ident or '/' wanted)",
394
0
    lex2str(lex));
395
0
        return MY_XML_ERROR;
396
0
      }
397
      
398
0
      while ((MY_XML_IDENT == (lex=my_xml_scan(p,&a))) ||
399
0
             ((MY_XML_STRING == lex && exclam)))
400
0
      {
401
0
        MY_XML_ATTR b;
402
0
        if (MY_XML_EQ == (lex=my_xml_scan(p,&b)))
403
0
        {
404
0
          lex=my_xml_scan(p,&b);
405
0
          if ( (lex == MY_XML_IDENT) || (lex == MY_XML_STRING) )
406
0
          {
407
0
            p->current_node_type= MY_XML_NODE_ATTR;
408
0
            if ((MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg)))  ||
409
0
                (MY_XML_OK != my_xml_value(p,b.beg,(size_t) (b.end-b.beg)))  ||
410
0
                (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg))))
411
0
              return MY_XML_ERROR;
412
0
          }
413
0
          else
414
0
          {
415
0
            snprintf(p->errstr,sizeof(p->errstr),"%s unexpected (ident or string wanted)",
416
0
        lex2str(lex));
417
0
            return MY_XML_ERROR;
418
0
          }
419
0
        }
420
0
        else if (MY_XML_IDENT == lex)
421
0
        {
422
0
          p->current_node_type= MY_XML_NODE_ATTR;
423
0
          if ((MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg))) ||
424
0
              (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg))))
425
0
           return MY_XML_ERROR;
426
0
        }
427
0
        else if ((MY_XML_STRING == lex) && exclam)
428
0
        {
429
          /*
430
            We are in <!DOCTYPE>, e.g.
431
            <!DOCTYPE name SYSTEM "SystemLiteral">
432
            <!DOCTYPE name PUBLIC "PublidLiteral" "SystemLiteral">
433
            Just skip "SystemLiteral" and "PublicidLiteral"
434
          */
435
0
        }
436
0
        else
437
0
          break;
438
0
      }
439
      
440
0
      if (lex == MY_XML_SLASH)
441
0
      {
442
0
        if (MY_XML_OK != my_xml_leave(p,NULL,0))
443
0
          return MY_XML_ERROR;
444
0
        lex=my_xml_scan(p,&a);
445
0
      }
446
      
447
0
gt:
448
0
      if (question)
449
0
      {
450
0
        if (lex != MY_XML_QUESTION)
451
0
        {
452
0
          snprintf(p->errstr,sizeof(p->errstr),"%s unexpected ('?' wanted)",lex2str(lex));
453
0
          return MY_XML_ERROR;
454
0
        }
455
0
        if (MY_XML_OK != my_xml_leave(p,NULL,0))
456
0
          return MY_XML_ERROR;
457
0
        lex=my_xml_scan(p,&a);
458
0
      }
459
      
460
0
      if (exclam)
461
0
      {
462
0
        if (MY_XML_OK != my_xml_leave(p,NULL,0))
463
0
          return MY_XML_ERROR;
464
0
      }
465
      
466
0
      if (lex != MY_XML_GT)
467
0
      {
468
0
        snprintf(p->errstr,sizeof(p->errstr),"%s unexpected ('>' wanted)",lex2str(lex));
469
0
        return MY_XML_ERROR;
470
0
      }
471
0
    }
472
0
    else
473
0
    {
474
0
      a.beg=p->cur;
475
0
      for ( ; (p->cur < p->end) && (p->cur[0] != '<')  ; p->cur++);
476
0
      a.end=p->cur;
477
      
478
0
      if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION))
479
0
        my_xml_norm_text(&a);
480
0
      if (a.beg != a.end)
481
0
      {
482
0
        my_xml_value(p,a.beg,(size_t) (a.end-a.beg));
483
0
      }
484
0
    }
485
0
  }
486
487
0
  if (p->attr.start[0])
488
0
  {
489
0
    snprintf(p->errstr,sizeof(p->errstr),"unexpected END-OF-INPUT");
490
0
    return MY_XML_ERROR;
491
0
  }
492
0
  return MY_XML_OK;
493
0
}
494
495
496
void my_xml_parser_create(MY_XML_PARSER *p)
497
0
{
498
0
  memset(p, 0, sizeof(p[0]));
499
  /*
500
    Use static buffer while it's sufficient.
501
  */
502
0
  p->attr.start= p->attr.end= p->attr.static_buffer;
503
0
  p->attr.buffer_size= sizeof(p->attr.static_buffer);
504
0
}
505
506
507
void my_xml_parser_free(MY_XML_PARSER *p)
508
0
{
509
0
  if (p->attr.buffer)
510
0
  {
511
0
    my_free(p->attr.buffer);
512
0
    p->attr.buffer= NULL;
513
0
  }
514
0
}
515
516
517
void my_xml_set_value_handler(MY_XML_PARSER *p,
518
            int (*action)(MY_XML_PARSER *p, const char *s,
519
              size_t l))
520
0
{
521
0
  p->value=action;
522
0
}
523
524
void my_xml_set_enter_handler(MY_XML_PARSER *p,
525
            int (*action)(MY_XML_PARSER *p, const char *s,
526
              size_t l))
527
0
{
528
0
  p->enter=action;
529
0
}
530
531
532
void my_xml_set_leave_handler(MY_XML_PARSER *p,
533
            int (*action)(MY_XML_PARSER *p, const char *s,
534
              size_t l))
535
0
{
536
0
  p->leave_xml=action;
537
0
}
538
539
540
void my_xml_set_user_data(MY_XML_PARSER *p, void *user_data)
541
0
{
542
0
  p->user_data=user_data;
543
0
}
544
545
546
const char *my_xml_error_string(MY_XML_PARSER *p)
547
0
{
548
0
  return p->errstr;
549
0
}
550
551
552
size_t my_xml_error_pos(MY_XML_PARSER *p)
553
0
{
554
0
  const char *beg=p->beg;
555
0
  const char *s;
556
0
  for ( s=p->beg ; s<p->cur; s++)
557
0
  {
558
0
    if (s[0] == '\n')
559
0
      beg=s;
560
0
  }
561
0
  return (size_t) (p->cur-beg);
562
0
}
563
564
uint my_xml_error_lineno(MY_XML_PARSER *p)
565
0
{
566
0
  uint res=0;
567
0
  const char *s;
568
0
  for (s=p->beg ; s<p->cur; s++)
569
0
  {
570
0
    if (s[0] == '\n')
571
0
      res++;
572
0
  }
573
0
  return res;
574
0
}