Coverage Report

Created: 2026-01-09 06:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gpac/src/utils/xml_parser.c
Line
Count
Source
1
/*
2
 *      GPAC - Multimedia Framework C SDK
3
 *
4
 *      Authors: Jean Le Feuvre
5
 *      Copyright (c) Telecom ParisTech 2005-2024
6
 *      All rights reserved
7
 *
8
 *  This file is part of GPAC / common tools sub-project
9
 *
10
 *  GPAC is free software; you can redistribute it and/or modify
11
 *  it under the terms of the GNU Lesser General Public License as published by
12
 *  the Free Software Foundation; either version 2, or (at your option)
13
 *  any later version.
14
 *
15
 *  GPAC is distributed in the hope that it will be useful,
16
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 *  GNU Lesser General Public License for more details.
19
 *
20
 *  You should have received a copy of the GNU Lesser General Public
21
 *  License along with this library; see the file COPYING.  If not, write to
22
 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
 *
24
 */
25
26
#include <gpac/xml.h>
27
#include <gpac/utf.h>
28
29
#ifndef GPAC_DISABLE_ZLIB
30
/*since 0.2.2, we use zlib for xmt/x3d reading to handle gz files*/
31
#include <zlib.h>
32
33
#if (defined(WIN32) || defined(_WIN32_WCE)) && !defined(__GNUC__)
34
#pragma comment(lib, "zlib")
35
#endif
36
#else
37
#define NO_GZIP
38
#endif
39
40
41
0
#define XML_INPUT_SIZE  4096
42
43
static u32 XML_MAX_CONTENT_SIZE = 0;
44
45
46
static GF_Err gf_xml_sax_parse_intern(GF_SAXParser *parser, char *current);
47
48
GF_STATIC char *xml_translate_xml_string(char *str)
49
0
{
50
0
  char *value;
51
0
  u32 size, i, j;
52
0
  if (!str || !strlen(str)) return NULL;
53
0
  value = (char *)gf_malloc(sizeof(char) * 500);
54
0
  size = 500;
55
0
  i = j = 0;
56
0
  while (str[i]) {
57
0
    if (j+20 >= size) {
58
0
      size += 500;
59
0
      value = (char *)gf_realloc(value, sizeof(char)*size);
60
0
    }
61
0
    if (str[i] == '&') {
62
0
      if (str[i+1]=='#') {
63
0
        char szChar[20], *end;
64
0
        u16 wchar[2];
65
0
        u32 val=0, _len;
66
0
        const unsigned short *srcp;
67
0
        strncpy(szChar, str+i, 10);
68
0
        szChar[10] = 0;
69
0
        end = strchr(szChar, ';');
70
0
        if (!end) break;
71
0
        end[1] = 0;
72
0
        i += (u32) strlen(szChar);
73
0
        wchar[1] = 0;
74
0
        if (szChar[2]=='x')
75
0
          sscanf(szChar, "&#x%x;", &val);
76
0
        else
77
0
          sscanf(szChar, "&#%u;", &val);
78
0
        wchar[0] = val;
79
0
        srcp = wchar;
80
0
        _len = gf_utf8_wcstombs(&value[j], 20, &srcp);
81
0
        if (_len == GF_UTF8_FAIL) _len = 0;
82
0
        j += _len;
83
0
      }
84
0
      else if (!strnicmp(&str[i], "&amp;", sizeof(char)*5)) {
85
0
        value[j] = '&';
86
0
        j++;
87
0
        i+= 5;
88
0
      }
89
0
      else if (!strnicmp(&str[i], "&lt;", sizeof(char)*4)) {
90
0
        value[j] = '<';
91
0
        j++;
92
0
        i+= 4;
93
0
      }
94
0
      else if (!strnicmp(&str[i], "&gt;", sizeof(char)*4)) {
95
0
        value[j] = '>';
96
0
        j++;
97
0
        i+= 4;
98
0
      }
99
0
      else if (!strnicmp(&str[i], "&apos;", sizeof(char)*6)) {
100
0
        value[j] = '\'';
101
0
        j++;
102
0
        i+= 6;
103
0
      }
104
0
      else if (!strnicmp(&str[i], "&quot;", sizeof(char)*6)) {
105
0
        value[j] = '\"';
106
0
        j++;
107
0
        i+= 6;
108
0
      } else {
109
0
        value[j] = str[i];
110
0
        j++;
111
0
        i++;
112
0
      }
113
0
    } else {
114
0
      value[j] = str[i];
115
0
      j++;
116
0
      i++;
117
0
    }
118
0
  }
119
0
  value[j] = 0;
120
0
  return value;
121
0
}
122
123
124
enum
125
{
126
  SAX_STATE_ATT_NAME,
127
  SAX_STATE_ATT_VALUE,
128
  SAX_STATE_ELEMENT,
129
  SAX_STATE_COMMENT,
130
  SAX_STATE_TEXT_CONTENT,
131
  SAX_STATE_ENTITY,
132
  SAX_STATE_SKIP_DOCTYPE,
133
  SAX_STATE_CDATA,
134
  SAX_STATE_DONE,
135
  SAX_STATE_XML_PROC,
136
  SAX_STATE_SYNTAX_ERROR,
137
  SAX_STATE_ALLOC_ERROR,
138
};
139
140
typedef struct
141
{
142
  u32 name_start, name_end;
143
  u32 val_start, val_end;
144
  Bool has_entities;
145
} GF_XMLSaxAttribute;
146
147
148
/* #define NO_GZIP */
149
150
151
struct _tag_sax_parser
152
{
153
  /*0: UTF-8, 1: UTF-16 BE, 2: UTF-16 LE. String input is always converted back to utf8*/
154
  s32 unicode_type;
155
  char *buffer;
156
  /*alloc size, line size and current position*/
157
  u32 alloc_size, line_size, current_pos;
158
  /*current node depth*/
159
  u32 node_depth;
160
161
  /*gz input file*/
162
#ifdef NO_GZIP
163
  FILE *f_in;
164
#else
165
  gzFile gz_in;
166
#endif
167
  /*current line , file size and pos for user notif*/
168
  u32 line, file_size, file_pos;
169
170
  /*SAX callbacks*/
171
  gf_xml_sax_node_start sax_node_start;
172
  gf_xml_sax_node_end sax_node_end;
173
  gf_xml_sax_text_content sax_text_content;
174
  void *sax_cbck;
175
  gf_xml_sax_progress on_progress;
176
177
  u32 sax_state;
178
  u32 init_state;
179
  GF_List *entities;
180
  char att_sep;
181
  Bool in_entity, suspended;
182
  u32 in_quote;
183
184
  u32 elt_start_pos, elt_end_pos;
185
186
  /*last error found*/
187
  char err_msg[1000];
188
189
  u32 att_name_start, elt_name_start, elt_name_end, text_start, text_end;
190
  u32 text_check_escapes;
191
192
  GF_XMLAttribute *attrs;
193
  GF_XMLSaxAttribute *sax_attrs;
194
  u32 nb_attrs, nb_alloc_attrs;
195
  u32 ent_rec_level;
196
};
197
198
static GF_XMLSaxAttribute *xml_get_sax_attribute(GF_SAXParser *parser)
199
0
{
200
0
  if (parser->nb_attrs==parser->nb_alloc_attrs) {
201
0
    parser->nb_alloc_attrs++;
202
0
    parser->sax_attrs = (GF_XMLSaxAttribute *)gf_realloc(parser->sax_attrs, sizeof(GF_XMLSaxAttribute)*parser->nb_alloc_attrs);
203
0
    parser->attrs = (GF_XMLAttribute *)gf_realloc(parser->attrs, sizeof(GF_XMLAttribute)*parser->nb_alloc_attrs);
204
0
  }
205
0
  return &parser->sax_attrs[parser->nb_attrs++];
206
0
}
207
208
static void xml_sax_swap(GF_SAXParser *parser)
209
0
{
210
0
  if (parser->current_pos && ((parser->sax_state==SAX_STATE_TEXT_CONTENT) || (parser->sax_state==SAX_STATE_COMMENT) ) ) {
211
0
    if (parser->line_size >= parser->current_pos) {
212
0
      parser->line_size -= parser->current_pos;
213
0
      parser->file_pos += parser->current_pos;
214
0
      if (parser->line_size) memmove(parser->buffer, parser->buffer + parser->current_pos, sizeof(char)*parser->line_size);
215
0
      parser->buffer[parser->line_size] = 0;
216
0
      parser->current_pos = 0;
217
0
    }
218
0
  }
219
0
}
220
221
static void format_sax_error(GF_SAXParser *parser, u32 linepos, const char* fmt, ...)
222
0
{
223
0
  va_list args;
224
0
  u32 len;
225
226
0
  if (!parser) return;
227
228
0
  va_start(args, fmt);
229
0
  vsnprintf(parser->err_msg, GF_ARRAY_LENGTH(parser->err_msg), fmt, args);
230
0
  va_end(args);
231
232
0
  if (strlen(parser->err_msg)+30 < GF_ARRAY_LENGTH(parser->err_msg)) {
233
0
    char szM[20];
234
0
    snprintf(szM, 20, " - Line %d: ", parser->line + 1);
235
0
    strcat(parser->err_msg, szM);
236
0
    len = (u32) strlen(parser->err_msg);
237
0
    strncpy(parser->err_msg + len, parser->buffer+ (linepos ? linepos : parser->current_pos), 10);
238
0
    parser->err_msg[len + 10] = 0;
239
0
  }
240
0
  parser->sax_state = SAX_STATE_SYNTAX_ERROR;
241
0
}
242
243
static void xml_sax_node_end(GF_SAXParser *parser, Bool had_children)
244
0
{
245
0
  char *name, c;
246
247
0
  gf_assert(parser->elt_name_start);
248
0
  gf_assert(parser->elt_name_end);
249
0
  if (!parser->node_depth) {
250
0
    format_sax_error(parser, 0, "Markup error");
251
0
    return;
252
0
  }
253
0
  c = parser->buffer[parser->elt_name_end - 1];
254
0
  parser->buffer[parser->elt_name_end - 1] = 0;
255
0
  name = parser->buffer + parser->elt_name_start - 1;
256
257
0
  if (parser->sax_node_end) {
258
0
    char *sep = strchr(name, ':');
259
0
    if (sep) {
260
0
      sep[0] = 0;
261
0
      parser->sax_node_end(parser->sax_cbck, sep+1, name);
262
0
      sep[0] = ':';
263
0
    } else {
264
0
      parser->sax_node_end(parser->sax_cbck, name, NULL);
265
0
    }
266
0
  }
267
0
  parser->buffer[parser->elt_name_end - 1] = c;
268
0
  parser->node_depth--;
269
0
  if (!parser->init_state && !parser->node_depth && parser->sax_state<SAX_STATE_SYNTAX_ERROR) parser->sax_state = SAX_STATE_DONE;
270
0
  xml_sax_swap(parser);
271
0
  parser->text_start = parser->text_end = 0;
272
0
}
273
274
static void xml_sax_node_start(GF_SAXParser *parser)
275
0
{
276
0
  Bool has_entities = GF_FALSE;
277
0
  u32 i;
278
0
  char c, *name;
279
280
0
  gf_assert(parser->elt_name_start && parser->elt_name_end);
281
0
  c = parser->buffer[parser->elt_name_end - 1];
282
0
  parser->buffer[parser->elt_name_end - 1] = 0;
283
0
  name = parser->buffer + parser->elt_name_start - 1;
284
285
0
  for (i=0; i<parser->nb_attrs; i++) {
286
0
    parser->attrs[i].name = parser->buffer + parser->sax_attrs[i].name_start - 1;
287
0
    parser->buffer[parser->sax_attrs[i].name_end-1] = 0;
288
0
    parser->attrs[i].value = parser->buffer + parser->sax_attrs[i].val_start - 1;
289
0
    parser->buffer[parser->sax_attrs[i].val_end-1] = 0;
290
291
0
    if (strchr(parser->attrs[i].value, '&')) {
292
0
      parser->sax_attrs[i].has_entities = GF_TRUE;
293
0
      has_entities = GF_TRUE;
294
0
      parser->attrs[i].value = xml_translate_xml_string(parser->attrs[i].value);
295
0
    }
296
    /*store first char pos after current attrib for node peeking*/
297
0
    parser->att_name_start = parser->sax_attrs[i].val_end;
298
0
  }
299
300
0
  if (parser->sax_node_start) {
301
0
    char *sep = strchr(name, ':');
302
0
    if (sep) {
303
0
      sep[0] = 0;
304
0
      parser->sax_node_start(parser->sax_cbck, sep+1, name, parser->attrs, parser->nb_attrs);
305
0
      sep[0] = ':';
306
0
    } else {
307
0
      parser->sax_node_start(parser->sax_cbck, name, NULL, parser->attrs, parser->nb_attrs);
308
0
    }
309
0
  }
310
0
  parser->att_name_start = 0;
311
0
  parser->buffer[parser->elt_name_end - 1] = c;
312
0
  parser->node_depth++;
313
0
  if (has_entities) {
314
0
    for (i=0; i<parser->nb_attrs; i++) {
315
0
      if (parser->sax_attrs[i].has_entities) {
316
0
        parser->sax_attrs[i].has_entities = GF_FALSE;
317
0
        gf_free(parser->attrs[i].value);
318
0
      }
319
0
    }
320
0
  }
321
0
  parser->nb_attrs = 0;
322
0
  xml_sax_swap(parser);
323
0
  parser->text_start = parser->text_end = 0;
324
0
}
325
326
static Bool xml_sax_parse_attribute(GF_SAXParser *parser)
327
0
{
328
0
  char *sep;
329
0
  GF_XMLSaxAttribute *att = NULL;
330
331
  /*looking for attribute name*/
332
0
  if (parser->sax_state==SAX_STATE_ATT_NAME) {
333
    /*looking for start*/
334
0
    if (!parser->att_name_start) {
335
0
      while (parser->current_pos < parser->line_size) {
336
0
        u8 c = parser->buffer[parser->current_pos];
337
0
        switch (c) {
338
0
        case '\n':
339
0
          parser->line++;
340
0
        case ' ':
341
0
        case '\r':
342
0
        case '\t':
343
0
          parser->current_pos++;
344
0
          continue;
345
        /*end of element*/
346
0
        case '?':
347
0
          if (parser->init_state!=1) break;
348
0
        case '/':
349
          /*not enough data*/
350
0
          if (parser->current_pos+1 == parser->line_size) return GF_TRUE;
351
0
          if (parser->buffer[parser->current_pos+1]=='>') {
352
0
            parser->current_pos+=2;
353
0
            parser->elt_end_pos = parser->file_pos + parser->current_pos - 1;
354
            /*done parsing attr AND elements*/
355
0
            if (!parser->init_state) {
356
0
              xml_sax_node_start(parser);
357
              /*move to SAX_STATE_TEXT_CONTENT to force text flush*/
358
0
              parser->sax_state = SAX_STATE_TEXT_CONTENT;
359
0
              xml_sax_node_end(parser, GF_FALSE);
360
0
            } else {
361
0
              parser->nb_attrs = 0;
362
0
            }
363
0
            parser->sax_state = (parser->init_state) ? SAX_STATE_ELEMENT : SAX_STATE_TEXT_CONTENT;
364
0
            parser->text_start = parser->text_end = 0;
365
0
            return GF_FALSE;
366
0
          }
367
0
          if (!parser->in_quote && (c=='/')) {
368
0
            if (!parser->init_state) {
369
0
              format_sax_error(parser, 0, "Markup error");
370
0
              return GF_TRUE;
371
0
            }
372
0
          }
373
0
          break;
374
0
        case '"':
375
0
          if (parser->sax_state==SAX_STATE_ATT_VALUE) break;
376
0
          if (parser->in_quote && (parser->in_quote!=c) ) {
377
0
            format_sax_error(parser, 0, "Markup error");
378
0
            return GF_TRUE;
379
0
          }
380
0
          if (parser->in_quote) parser->in_quote = 0;
381
0
          else parser->in_quote = c;
382
0
          break;
383
0
        case '>':
384
0
          parser->current_pos+=1;
385
          /*end of <!DOCTYPE>*/
386
0
          if (parser->init_state) {
387
0
            if (parser->init_state==1) {
388
0
              format_sax_error(parser, 0, "Invalid <!DOCTYPE...> or <?xml...?>");
389
0
              return GF_TRUE;
390
0
            }
391
0
            parser->sax_state = SAX_STATE_ELEMENT;
392
0
            return GF_FALSE;
393
0
          }
394
          /*done parsing attr*/
395
0
          parser->sax_state = SAX_STATE_TEXT_CONTENT;
396
0
          xml_sax_node_start(parser);
397
0
          return GF_FALSE;
398
0
        case '[':
399
0
          if (parser->init_state) {
400
0
            parser->current_pos+=1;
401
0
            if (parser->init_state==1) {
402
0
              format_sax_error(parser, 0, "Invalid <!DOCTYPE...> or <?xml...?>");
403
0
              return GF_TRUE;
404
0
            }
405
0
            parser->sax_state = SAX_STATE_ELEMENT;
406
0
            return GF_FALSE;
407
0
          }
408
0
          break;
409
0
        case '<':
410
0
          format_sax_error(parser, 0, "Invalid character '<'");
411
0
          return GF_FALSE;
412
        /*first char of attr name*/
413
0
        default:
414
0
          parser->att_name_start = parser->current_pos + 1;
415
0
          break;
416
0
        }
417
0
        parser->current_pos++;
418
0
        if (parser->att_name_start) break;
419
0
      }
420
0
      if (parser->current_pos == parser->line_size) return GF_TRUE;
421
0
    }
422
423
0
    if (parser->init_state==2) {
424
0
      sep = strchr(parser->buffer + parser->att_name_start - 1, parser->in_quote ?  parser->in_quote : ' ');
425
      /*not enough data*/
426
0
      if (!sep) return GF_TRUE;
427
0
      parser->current_pos = (u32) (sep - parser->buffer);
428
0
      parser->att_name_start = 0;
429
0
      if (parser->in_quote) {
430
0
        parser->current_pos++;
431
0
        parser->in_quote = 0;
432
0
      }
433
0
      return GF_FALSE;
434
0
    }
435
436
    /*looking for '"'*/
437
0
    if (parser->att_name_start) {
438
0
      u32 i, first=1;
439
0
      sep = strchr(parser->buffer + parser->att_name_start - 1, '=');
440
      /*not enough data*/
441
0
      if (!sep) return GF_TRUE;
442
443
0
      parser->current_pos = (u32) (sep - parser->buffer);
444
0
      att = xml_get_sax_attribute(parser);
445
0
      att->name_start = parser->att_name_start;
446
0
      att->name_end = parser->current_pos + 1;
447
0
      while (strchr(" \n\t", parser->buffer[att->name_end - 2])) {
448
0
        gf_assert(att->name_end);
449
0
        att->name_end --;
450
0
      }
451
0
      att->has_entities = GF_FALSE;
452
453
0
      for (i=att->name_start; i<att->name_end; i++) {
454
0
        char c = parser->buffer[i-1];
455
0
        if ((c>='a') && (c<='z')) {}
456
0
        else if ((c>='A') && (c<='Z')) {}
457
0
        else if ((c==':') || (c=='_')) {}
458
459
0
        else if (!first && ((c=='-') || (c=='.') || ((c>='0') && (c<='9')) )) {}
460
461
0
        else {
462
0
          format_sax_error(parser, att->name_start-1, "Invalid character \'%c\' for attribute name", c);
463
0
          return GF_TRUE;
464
0
        }
465
466
0
        first=0;
467
0
      }
468
469
0
      parser->att_name_start = 0;
470
0
      parser->current_pos++;
471
0
      parser->sax_state = SAX_STATE_ATT_VALUE;
472
473
0
    }
474
0
  }
475
476
0
  if (parser->sax_state == SAX_STATE_ATT_VALUE) {
477
0
    att = &parser->sax_attrs[parser->nb_attrs-1];
478
    /*looking for first delimiter*/
479
0
    if (!parser->att_sep) {
480
0
      while (parser->current_pos < parser->line_size) {
481
0
        u8 c = parser->buffer[parser->current_pos];
482
0
        switch (c) {
483
0
        case '\n':
484
0
          parser->line++;
485
0
        case ' ':
486
0
        case '\r':
487
0
        case '\t':
488
0
          parser->current_pos++;
489
0
          continue;
490
0
        case '\'':
491
0
        case '"':
492
0
          parser->att_sep = c;
493
0
          att->val_start = parser->current_pos + 2;
494
0
          break;
495
0
        default:
496
          // garbage char before value separator -> error
497
0
          goto att_retry;
498
0
        }
499
0
        parser->current_pos++;
500
0
        if (parser->att_sep) break;
501
0
      }
502
0
      if (parser->current_pos == parser->line_size) return GF_TRUE;
503
0
    }
504
505
0
att_retry:
506
507
0
    if (!parser->att_sep) {
508
0
      format_sax_error(parser, parser->current_pos, "Invalid character %c before attribute value separator", parser->buffer[parser->current_pos]);
509
0
      return GF_TRUE;
510
0
    }
511
0
    sep = strchr(parser->buffer + parser->current_pos, parser->att_sep);
512
0
    if (!sep || !sep[1]) return GF_TRUE;
513
514
0
    if (sep[1]==parser->att_sep) {
515
0
      format_sax_error(parser, (u32) (sep - parser->buffer), "Invalid character %c after attribute value separator %c ", sep[1], parser->att_sep);
516
0
      return GF_TRUE;
517
0
    }
518
519
0
    if (!parser->init_state && (strchr(" />\n\t\r", sep[1])==NULL)) {
520
0
      parser->current_pos = (u32) (sep - parser->buffer + 1);
521
0
      goto att_retry;
522
0
    }
523
524
0
    parser->current_pos = (u32) (sep - parser->buffer);
525
0
    att->val_end = parser->current_pos + 1;
526
0
    parser->current_pos++;
527
528
    /*"style" always at the beginning of the attributes for ease of parsing*/
529
0
    if (!strncmp(parser->buffer + att->name_start-1, "style", 5)) {
530
0
      GF_XMLSaxAttribute prev = parser->sax_attrs[0];
531
0
      parser->sax_attrs[0] = *att;
532
0
      *att = prev;
533
0
    }
534
0
    parser->att_sep = 0;
535
0
    parser->sax_state = SAX_STATE_ATT_NAME;
536
0
    parser->att_name_start = 0;
537
0
    return GF_FALSE;
538
0
  }
539
0
  return GF_TRUE;
540
0
}
541
542
543
typedef struct
544
{
545
  char *name;
546
  char *value;
547
  u32 namelen;
548
  u8 sep;
549
} XML_Entity;
550
551
static void xml_sax_flush_text(GF_SAXParser *parser)
552
0
{
553
0
  char *text, c;
554
0
  if (!parser->text_start || parser->init_state || !parser->sax_text_content) return;
555
556
0
  gf_assert(parser->text_start < parser->text_end);
557
558
0
  c = parser->buffer[parser->text_end-1];
559
0
  parser->buffer[parser->text_end-1] = 0;
560
0
  text = parser->buffer + parser->text_start-1;
561
562
  /*solve XML built-in entities*/
563
//old code commented for ref, we now track escape chars
564
//  if (strchr(text, '&') && strchr(text, ';')) {
565
0
  if (parser->text_check_escapes==0x3) {
566
0
    char *xml_text = xml_translate_xml_string(text);
567
0
    if (xml_text) {
568
0
      parser->sax_text_content(parser->sax_cbck, xml_text, (parser->sax_state==SAX_STATE_CDATA) ? GF_TRUE : GF_FALSE);
569
0
      gf_free(xml_text);
570
0
    }
571
0
  } else {
572
0
    parser->sax_text_content(parser->sax_cbck, text, (parser->sax_state==SAX_STATE_CDATA) ? GF_TRUE : GF_FALSE);
573
0
  }
574
0
  parser->buffer[parser->text_end-1] = c;
575
0
  parser->text_start = parser->text_end = 0;
576
0
  parser->text_check_escapes = 0;
577
0
}
578
579
static void xml_sax_store_text(GF_SAXParser *parser, u32 txt_len)
580
0
{
581
0
  if (!txt_len) return;
582
583
0
  if (!parser->text_start) {
584
0
    parser->text_check_escapes = 0;
585
0
    parser->text_start = parser->current_pos + 1;
586
0
    parser->text_end = parser->text_start + txt_len;
587
0
    parser->current_pos += txt_len;
588
0
    gf_assert(parser->current_pos <= parser->line_size);
589
0
    return;
590
0
  }
591
  /*contiguous text*/
592
0
  if (parser->text_end && (parser->text_end-1 == parser->current_pos)) {
593
0
    parser->text_end += txt_len;
594
0
    parser->current_pos += txt_len;
595
0
    gf_assert(parser->current_pos <= parser->line_size);
596
0
    return;
597
0
  }
598
  /*need to flush*/
599
0
  xml_sax_flush_text(parser);
600
601
0
  parser->text_start = parser->current_pos + 1;
602
0
  parser->text_end = parser->text_start + txt_len;
603
0
  parser->current_pos += txt_len;
604
0
  gf_assert(parser->current_pos <= parser->line_size);
605
0
}
606
607
static char *xml_get_current_text(GF_SAXParser *parser)
608
0
{
609
0
  char *text, c;
610
0
  if (!parser->text_start) return NULL;
611
612
0
  c = parser->buffer[parser->text_end-1];
613
0
  parser->buffer[parser->text_end-1] = 0;
614
0
  text = gf_strdup(parser->buffer + parser->text_start-1);
615
0
  parser->buffer[parser->text_end-1] = c;
616
0
  parser->text_start = parser->text_end = 0;
617
0
  return text;
618
0
}
619
620
static void xml_sax_skip_doctype(GF_SAXParser *parser)
621
0
{
622
0
  while (parser->current_pos < parser->line_size) {
623
0
    if (parser->buffer[parser->current_pos]=='>') {
624
0
      parser->sax_state = SAX_STATE_ELEMENT;
625
0
      parser->current_pos++;
626
0
      xml_sax_swap(parser);
627
0
      return;
628
0
    }
629
0
    parser->current_pos++;
630
0
  }
631
0
}
632
633
static void xml_sax_skip_xml_proc(GF_SAXParser *parser)
634
0
{
635
0
  while (parser->current_pos < parser->line_size) {
636
0
    if ((parser->current_pos + 1 < parser->line_size) && (parser->buffer[parser->current_pos]=='?') && (parser->buffer[parser->current_pos+1]=='>')) {
637
0
      parser->sax_state = SAX_STATE_ELEMENT;
638
0
      parser->current_pos++;
639
0
      xml_sax_swap(parser);
640
0
      return;
641
0
    }
642
0
    parser->current_pos++;
643
0
  }
644
0
}
645
646
647
static void xml_sax_parse_entity(GF_SAXParser *parser)
648
0
{
649
0
  char szC[2];
650
0
  char *ent_name=NULL;
651
0
  u32 i = 0;
652
0
  XML_Entity *ent = (XML_Entity *)gf_list_last(parser->entities);
653
0
  char *skip_chars = " \t\n\r";
654
0
  i=0;
655
0
  if (ent && ent->value) ent = NULL;
656
0
  if (ent) skip_chars = NULL;
657
0
  szC[1]=0;
658
659
0
  while (parser->current_pos+i < parser->line_size) {
660
0
    u8 c = parser->buffer[parser->current_pos+i];
661
0
    if (skip_chars && strchr(skip_chars, c)) {
662
0
      if (c=='\n') parser->line++;
663
0
      parser->current_pos++;
664
0
      continue;
665
0
    }
666
0
    if (!ent && (c=='%')) {
667
0
      parser->current_pos+=i+1;
668
0
      parser->sax_state = SAX_STATE_SKIP_DOCTYPE;
669
0
      if (ent_name) gf_free(ent_name);
670
0
      return;
671
0
    }
672
0
    else if (!ent && ((c=='\"') || (c=='\'')) ) {
673
0
      GF_SAFEALLOC(ent, XML_Entity);
674
0
      if (!ent) {
675
0
        parser->sax_state = SAX_STATE_ALLOC_ERROR;
676
0
        if (ent_name) gf_free(ent_name);
677
0
        return;
678
0
      }
679
0
      if (!ent_name) gf_dynstrcat(&ent_name, "", NULL);
680
681
0
      ent->name = ent_name;
682
0
      ent_name=NULL;
683
0
      ent->namelen = (u32) strlen(ent->name);
684
0
      ent->sep = c;
685
0
      parser->current_pos += 1+i;
686
0
      gf_assert(parser->current_pos < parser->line_size);
687
0
      xml_sax_swap(parser);
688
0
      i=0;
689
0
      gf_list_add(parser->entities, ent);
690
0
      skip_chars = NULL;
691
0
    } else if (ent && c==ent->sep) {
692
0
      if (ent_name) gf_free(ent_name);
693
0
      xml_sax_store_text(parser, i);
694
695
0
      ent->value = xml_get_current_text(parser);
696
0
      if (!ent->value) ent->value = gf_strdup("");
697
698
0
      parser->current_pos += 1;
699
0
      gf_assert(parser->current_pos < parser->line_size);
700
0
      xml_sax_swap(parser);
701
0
      parser->sax_state = SAX_STATE_SKIP_DOCTYPE;
702
0
      return;
703
0
    } else if (!ent) {
704
0
      szC[0] = c;
705
0
      gf_dynstrcat(&ent_name, szC, NULL);
706
0
      i++;
707
0
    } else {
708
0
      i++;
709
0
    }
710
0
  }
711
0
  if (ent_name) gf_free(ent_name);
712
0
  if (ent && !ent->value)
713
0
    parser->sax_state = SAX_STATE_SYNTAX_ERROR;
714
0
  xml_sax_store_text(parser, i);
715
0
}
716
717
static void xml_sax_cdata(GF_SAXParser *parser)
718
0
{
719
0
  char *cd_end = strstr(parser->buffer + parser->current_pos, "]]>");
720
0
  if (!cd_end) {
721
0
    xml_sax_store_text(parser, parser->line_size - parser->current_pos);
722
0
  } else {
723
0
    u32 size = (u32) (cd_end - (parser->buffer + parser->current_pos));
724
0
    xml_sax_store_text(parser, size);
725
0
    xml_sax_flush_text(parser);
726
0
    parser->current_pos += 3;
727
0
    gf_assert(parser->current_pos <= parser->line_size);
728
0
    parser->sax_state = SAX_STATE_TEXT_CONTENT;
729
0
  }
730
0
}
731
732
static Bool xml_sax_parse_comments(GF_SAXParser *parser)
733
0
{
734
0
  char *end = strstr(parser->buffer + parser->current_pos, "-->");
735
0
  if (!end) {
736
0
    if (parser->line_size>3)
737
0
      parser->current_pos = parser->line_size-3;
738
0
    xml_sax_swap(parser);
739
0
    return GF_FALSE;
740
0
  }
741
742
0
  parser->current_pos += 3 + (u32) (end - (parser->buffer + parser->current_pos) );
743
0
  gf_assert(parser->current_pos <= parser->line_size);
744
0
  parser->sax_state = SAX_STATE_TEXT_CONTENT;
745
0
  parser->text_start = parser->text_end = 0;
746
0
  xml_sax_swap(parser);
747
0
  return GF_TRUE;
748
0
}
749
750
751
752
static GF_Err xml_sax_parse(GF_SAXParser *parser, Bool force_parse)
753
0
{
754
0
  u32 i = 0;
755
0
  Bool is_text;
756
0
  u32 is_end;
757
0
  u8 c;
758
0
  char *elt, sep;
759
0
  u32 cdata_sep;
760
761
0
  while (parser->current_pos<parser->line_size) {
762
0
    if (!force_parse && parser->suspended) goto exit;
763
764
0
restart:
765
0
    is_text = GF_FALSE;
766
0
    switch (parser->sax_state) {
767
    /*load an XML element*/
768
0
    case SAX_STATE_TEXT_CONTENT:
769
0
      is_text = GF_TRUE;
770
0
    case SAX_STATE_ELEMENT:
771
0
      elt = NULL;
772
0
      i=0;
773
0
      while ((c = parser->buffer[parser->current_pos+i]) !='<') {
774
0
        if ((parser->init_state==2) && (c ==']')) {
775
0
          parser->sax_state = SAX_STATE_ATT_NAME;
776
0
          parser->current_pos+=i+1;
777
0
          goto restart;
778
0
        }
779
0
        i++;
780
0
        if (c=='\n') parser->line++;
781
0
        if (is_text) {
782
0
          if (c=='&') parser->text_check_escapes |= 1;
783
0
          else if (c==';') parser->text_check_escapes |= 2;
784
0
        }
785
786
0
        if (parser->current_pos+i==parser->line_size) {
787
0
          if ((parser->line_size >= XML_MAX_CONTENT_SIZE) && !parser->init_state) {
788
0
            GF_LOG(GF_LOG_ERROR, GF_LOG_CORE, ("[XML] Content size larger than max allowed %u, try increasing limit using `-xml-max-csize`\n", XML_MAX_CONTENT_SIZE));
789
0
            parser->sax_state = SAX_STATE_SYNTAX_ERROR;
790
0
          }
791
792
0
          goto exit;
793
0
        }
794
0
      }
795
0
      if (is_text && i) {
796
0
        u32 has_esc = parser->text_check_escapes;
797
0
        xml_sax_store_text(parser, i);
798
0
        parser->text_check_escapes = has_esc;
799
0
        parser->sax_state = SAX_STATE_ELEMENT;
800
0
      } else if (i) {
801
0
        parser->current_pos += i;
802
0
        gf_assert(parser->current_pos < parser->line_size);
803
0
      }
804
0
      is_end = 0;
805
0
      i = 0;
806
0
      cdata_sep = 0;
807
0
      while (1) {
808
0
        c = parser->buffer[parser->current_pos+1+i];
809
0
        if (!strncmp(parser->buffer+parser->current_pos+1+i, "!--", 3)) {
810
0
          parser->sax_state = SAX_STATE_COMMENT;
811
0
          i += 3;
812
0
          break;
813
0
        }
814
0
        if (!c) {
815
0
          goto exit;
816
0
        }
817
0
        if ((c=='\t') || (c=='\r') || (c==' ') ) {
818
0
          if (i) break;
819
0
          else parser->current_pos++;
820
0
        }
821
0
        else if (c=='\n') {
822
0
          parser->line++;
823
0
          if (i) break;
824
0
          else parser->current_pos++;
825
0
        }
826
0
        else if (c=='>') break;
827
0
        else if (c=='=') break;
828
0
        else if (c=='[') {
829
0
          i++;
830
0
          if (!cdata_sep) cdata_sep = 1;
831
0
          else {
832
0
            break;
833
0
          }
834
0
        }
835
0
        else if (c=='/') {
836
0
          is_end = !i ? 1 : 2;
837
0
          i++;
838
0
        } else if (c=='<') {
839
0
          if (parser->sax_state != SAX_STATE_COMMENT) {
840
0
            parser->sax_state = SAX_STATE_SYNTAX_ERROR;
841
0
            return GF_CORRUPTED_DATA;
842
0
          }
843
0
        } else {
844
0
          i++;
845
0
        }
846
        /*        if ((c=='[') && (parser->buffer[parser->elt_name_start-1 + i-2]=='A') ) break; */
847
0
        if (parser->current_pos+1+i==parser->line_size) {
848
0
          goto exit;
849
0
        }
850
0
      }
851
0
      if (i) {
852
0
        parser->elt_name_start = parser->current_pos+1 + 1;
853
0
        if (is_end==1) parser->elt_name_start ++;
854
0
        if (is_end==2) parser->elt_name_end = parser->current_pos+1+i;
855
0
        else parser->elt_name_end = parser->current_pos+1+i + 1;
856
0
      }
857
0
      if (is_end) {
858
0
        xml_sax_flush_text(parser);
859
0
        parser->elt_end_pos = parser->file_pos + parser->current_pos + i;
860
0
        if (is_end==2) {
861
0
          parser->sax_state = SAX_STATE_ELEMENT;
862
0
          xml_sax_node_start(parser);
863
0
          xml_sax_node_end(parser, GF_FALSE);
864
0
        } else {
865
0
          parser->elt_end_pos += parser->elt_name_end - parser->elt_name_start;
866
0
          xml_sax_node_end(parser, GF_TRUE);
867
0
        }
868
0
        if (parser->sax_state == SAX_STATE_SYNTAX_ERROR) break;
869
0
        parser->current_pos+=2+i;
870
0
        parser->sax_state = SAX_STATE_TEXT_CONTENT;
871
0
        break;
872
0
      }
873
0
      if (!parser->elt_name_end) {
874
0
        return GF_CORRUPTED_DATA;
875
0
      }
876
0
      sep = parser->buffer[parser->elt_name_end-1];
877
0
      parser->buffer[parser->elt_name_end-1] = 0;
878
0
      elt = parser->buffer + parser->elt_name_start-1;
879
880
0
      parser->sax_state = SAX_STATE_ATT_NAME;
881
0
      gf_assert(parser->elt_start_pos <= parser->file_pos + parser->current_pos);
882
0
      parser->elt_start_pos = parser->file_pos + parser->current_pos;
883
884
0
      if (!strncmp(elt, "!--", 3)) {
885
0
        xml_sax_flush_text(parser);
886
0
        parser->sax_state = SAX_STATE_COMMENT;
887
0
        if (i>3) parser->current_pos -= (i-3);
888
0
      }
889
0
      else if (!strcmp(elt, "?xml")) parser->init_state = 1;
890
0
      else if (!strcmp(elt, "!DOCTYPE")) parser->init_state = 2;
891
0
      else if (!strcmp(elt, "!ENTITY")) parser->sax_state = SAX_STATE_ENTITY;
892
0
      else if (!strcmp(elt, "!ATTLIST") || !strcmp(elt, "!ELEMENT")) parser->sax_state = SAX_STATE_SKIP_DOCTYPE;
893
0
      else if (!strcmp(elt, "![CDATA["))
894
0
        parser->sax_state = SAX_STATE_CDATA;
895
0
      else if (elt[0]=='?') {
896
0
        i--;
897
0
        parser->sax_state = SAX_STATE_XML_PROC;
898
0
      }
899
      /*node found*/
900
0
      else {
901
0
        xml_sax_flush_text(parser);
902
0
        if (parser->init_state) {
903
0
          parser->init_state = 0;
904
          /*that's a bit ugly: since we solve entities when appending text, we need to
905
          reparse the current buffer*/
906
0
          if (gf_list_count(parser->entities)) {
907
0
            char *orig_buf;
908
0
            GF_Err e;
909
0
            parser->buffer[parser->elt_name_end-1] = sep;
910
0
            orig_buf = gf_strdup(parser->buffer + parser->current_pos);
911
0
            parser->current_pos = 0;
912
0
            parser->line_size = 0;
913
0
            parser->elt_start_pos = 0;
914
0
            parser->sax_state = SAX_STATE_TEXT_CONTENT;
915
0
            parser->ent_rec_level++;
916
0
            if (parser->ent_rec_level>100) {
917
0
              GF_LOG(GF_LOG_WARNING, GF_LOG_CORE, ("[XML] Too many recursions in entity solving, max 100 allowed\n"));
918
0
              e = GF_NOT_SUPPORTED;
919
0
            } else {
920
0
              e = gf_xml_sax_parse_intern(parser, orig_buf);
921
0
              parser->ent_rec_level--;
922
0
            }
923
0
            gf_free(orig_buf);
924
0
            return e;
925
0
          }
926
0
        }
927
0
      }
928
0
      parser->current_pos+=1+i;
929
0
      parser->buffer[parser->elt_name_end-1] = sep;
930
0
      break;
931
0
    case SAX_STATE_COMMENT:
932
0
      if (!xml_sax_parse_comments(parser)) {
933
0
        xml_sax_swap(parser);
934
0
        goto exit;
935
0
      }
936
0
      break;
937
0
    case SAX_STATE_ATT_NAME:
938
0
    case SAX_STATE_ATT_VALUE:
939
0
      if (xml_sax_parse_attribute(parser))
940
0
        goto exit;
941
0
      break;
942
0
    case SAX_STATE_ENTITY:
943
0
      xml_sax_parse_entity(parser);
944
0
      break;
945
0
    case SAX_STATE_SKIP_DOCTYPE:
946
0
      xml_sax_skip_doctype(parser);
947
0
      break;
948
0
    case SAX_STATE_XML_PROC:
949
0
      xml_sax_skip_xml_proc(parser);
950
0
      break;
951
0
    case SAX_STATE_CDATA:
952
0
      xml_sax_cdata(parser);
953
0
      break;
954
0
    case SAX_STATE_SYNTAX_ERROR:
955
0
      return GF_CORRUPTED_DATA;
956
0
    case SAX_STATE_ALLOC_ERROR:
957
0
      return GF_OUT_OF_MEM;
958
0
    case SAX_STATE_DONE:
959
0
      return GF_EOS;
960
0
    }
961
0
  }
962
0
exit:
963
#if 0
964
  if (is_text) {
965
    if (i) xml_sax_store_text(parser, i);
966
    /*DON'T FLUSH TEXT YET, wait for next '<' to do so otherwise we may corrupt xml base entities (&apos;, ...)*/
967
  }
968
#endif
969
0
  xml_sax_swap(parser);
970
971
0
  if (parser->sax_state==SAX_STATE_SYNTAX_ERROR)
972
0
    return GF_CORRUPTED_DATA;
973
0
  else
974
0
    return GF_OK;
975
0
}
976
977
static GF_Err xml_sax_append_string(GF_SAXParser *parser, char *string)
978
0
{
979
0
  u32 size = parser->line_size;
980
0
  u32 nl_size = string ? (u32) strlen(string) : 0;
981
982
0
  if (!nl_size) return GF_OK;
983
984
0
  if ( (parser->alloc_size < size+nl_size+1)
985
          /*    || (parser->alloc_size / 2 ) > size+nl_size+1 */
986
0
     )
987
0
  {
988
0
    parser->alloc_size = size+nl_size+1;
989
0
    parser->alloc_size = 3 * parser->alloc_size / 2;
990
0
    parser->buffer = (char*)gf_realloc(parser->buffer, sizeof(char) * parser->alloc_size);
991
0
    if (!parser->buffer ) return GF_OUT_OF_MEM;
992
0
  }
993
0
  memcpy(parser->buffer+size, string, sizeof(char)*nl_size);
994
0
  parser->buffer[size+nl_size] = 0;
995
0
  parser->line_size = size+nl_size;
996
0
  return GF_OK;
997
0
}
998
999
static XML_Entity *gf_xml_locate_entity(GF_SAXParser *parser, char *ent_start, Bool *needs_text)
1000
0
{
1001
0
  u32 i, count;
1002
0
  u32 len = (u32) strlen(ent_start);
1003
1004
0
  *needs_text = GF_FALSE;
1005
0
  count = gf_list_count(parser->entities);
1006
1007
0
  for (i=0; i<count; i++) {
1008
0
    XML_Entity *ent = (XML_Entity *)gf_list_get(parser->entities, i);
1009
0
    if (len < ent->namelen + 1) {
1010
0
      if (strncmp(ent->name, ent_start, len))
1011
0
        return NULL;
1012
1013
0
      *needs_text = GF_TRUE;
1014
0
      return NULL;
1015
0
    }
1016
0
    if (!strncmp(ent->name, ent_start, ent->namelen) && (ent_start[ent->namelen]==';')) {
1017
0
      return ent;
1018
0
    }
1019
0
  }
1020
0
  return NULL;
1021
0
}
1022
1023
1024
static GF_Err gf_xml_sax_parse_intern(GF_SAXParser *parser, char *current)
1025
0
{
1026
0
  u32 count;
1027
  /*solve entities*/
1028
0
  count = gf_list_count(parser->entities);
1029
0
  while (count) {
1030
0
    char *entityEnd;
1031
0
    XML_Entity *ent;
1032
0
    char *entityStart = strstr(current, "&");
1033
0
    Bool needs_text;
1034
0
    u32 line_num;
1035
1036
    /*if in entity, the start of the entity is in the buffer !!*/
1037
0
    if (parser->in_entity) {
1038
0
      u32 len;
1039
0
      char *name;
1040
0
      entityEnd = strstr(current, ";");
1041
0
      if (!entityEnd) return xml_sax_append_string(parser, current);
1042
1043
0
      entityStart = strrchr(parser->buffer, '&');
1044
0
      if (!entityStart) return xml_sax_append_string(parser, current);
1045
1046
0
      entityEnd[0] = 0;
1047
0
      len = (u32) strlen(entityStart) + (u32) strlen(current) + 1;
1048
0
      name = (char*)gf_malloc(sizeof(char)*len);
1049
0
      sprintf(name, "%s%s;", entityStart+1, current);
1050
1051
0
      ent = gf_xml_locate_entity(parser, name, &needs_text);
1052
0
      gf_free(name);
1053
1054
      //entity not found, parse as regular string
1055
0
      if (!ent && !needs_text) {
1056
0
        xml_sax_append_string(parser, current);
1057
0
        xml_sax_parse(parser, GF_TRUE);
1058
0
        entityEnd[0] = ';';
1059
0
        current = entityEnd;
1060
0
        parser->in_entity = GF_FALSE;
1061
0
        continue;
1062
0
      }
1063
0
      if (!ent) {
1064
0
        GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[SAX] Entity not found\n"));
1065
0
        return GF_CORRUPTED_DATA;
1066
0
      }
1067
      /*truncate input buffer*/
1068
0
      parser->line_size -= (u32) strlen(entityStart);
1069
0
      entityStart[0] = 0;
1070
1071
0
      parser->in_entity = GF_FALSE;
1072
0
      entityEnd[0] = ';';
1073
0
      current = entityEnd+1;
1074
0
    } else {
1075
0
      if (!entityStart) break;
1076
1077
0
      ent = gf_xml_locate_entity(parser, entityStart+1, &needs_text);
1078
1079
      /*store current string before entity start*/
1080
0
      entityStart[0] = 0;
1081
0
      xml_sax_append_string(parser, current);
1082
0
      xml_sax_parse(parser, GF_TRUE);
1083
0
      entityStart[0] = '&';
1084
1085
      /*this is not an entitiy*/
1086
0
      if (!ent && !needs_text) {
1087
0
        xml_sax_append_string(parser, "&");
1088
0
        current = entityStart+1;
1089
0
        continue;
1090
0
      }
1091
1092
0
      if (!ent) {
1093
0
        parser->in_entity = GF_TRUE;
1094
        /*store entity start*/
1095
0
        return xml_sax_append_string(parser, entityStart);
1096
0
      }
1097
0
      current = entityStart + ent->namelen + 2;
1098
0
    }
1099
    /*append entity*/
1100
0
    line_num = parser->line;
1101
0
    xml_sax_append_string(parser, ent->value);
1102
0
    GF_Err e = xml_sax_parse(parser, GF_TRUE);
1103
0
    parser->line = line_num;
1104
0
    if (e) return e;
1105
1106
0
  }
1107
0
  xml_sax_append_string(parser, current);
1108
0
  return xml_sax_parse(parser, GF_FALSE);
1109
0
}
1110
1111
GF_EXPORT
1112
GF_Err gf_xml_sax_parse(GF_SAXParser *parser, const void *string)
1113
0
{
1114
0
  GF_Err e;
1115
0
  char *current;
1116
0
  char *utf_conv = NULL;
1117
1118
0
  if (parser->unicode_type < 0) return GF_BAD_PARAM;
1119
1120
0
  if (parser->unicode_type>1) {
1121
0
    const u16 *sptr = (const u16 *)string;
1122
0
    u32 len = 2 * gf_utf8_wcslen(sptr);
1123
0
    utf_conv = (char *)gf_malloc(sizeof(char)*(len+1));
1124
0
    len = gf_utf8_wcstombs(utf_conv, len, &sptr);
1125
0
    if (len == GF_UTF8_FAIL) {
1126
0
      parser->sax_state = SAX_STATE_SYNTAX_ERROR;
1127
0
      gf_free(utf_conv);
1128
0
      return GF_CORRUPTED_DATA;
1129
0
    }
1130
0
    utf_conv[len] = 0;
1131
0
    current = utf_conv;
1132
0
  } else {
1133
0
    current = (char *)string;
1134
0
  }
1135
1136
0
  e = gf_xml_sax_parse_intern(parser, current);
1137
0
  if (utf_conv) gf_free(utf_conv);
1138
0
  return e;
1139
0
}
1140
1141
1142
GF_EXPORT
1143
GF_Err gf_xml_sax_init(GF_SAXParser *parser, unsigned char *BOM)
1144
0
{
1145
0
  u32 offset;
1146
0
  if (!BOM) {
1147
0
    parser->unicode_type = 0;
1148
0
    parser->sax_state = SAX_STATE_ELEMENT;
1149
0
    return GF_OK;
1150
0
  }
1151
1152
0
  if (parser->unicode_type >= 0) return gf_xml_sax_parse(parser, BOM);
1153
1154
0
  if ((BOM[0]==0xFF) && (BOM[1]==0xFE)) {
1155
0
    if (!BOM[2] && !BOM[3]) return GF_NOT_SUPPORTED;
1156
0
    parser->unicode_type = 2;
1157
0
    offset = 2;
1158
0
  } else if ((BOM[0]==0xFE) && (BOM[1]==0xFF)) {
1159
0
    if (!BOM[2] && !BOM[3]) return GF_NOT_SUPPORTED;
1160
0
    parser->unicode_type = 1;
1161
0
    offset = 2;
1162
0
  } else if ((BOM[0]==0xEF) && (BOM[1]==0xBB) && (BOM[2]==0xBF)) {
1163
    /*we handle UTF8 as asci*/
1164
0
    parser->unicode_type = 0;
1165
0
    offset = 3;
1166
0
  } else {
1167
0
    parser->unicode_type = 0;
1168
0
    offset = 0;
1169
0
  }
1170
1171
#ifdef GPAC_ENABLE_COVERAGE
1172
  if (gf_sys_is_cov_mode()) {
1173
    format_sax_error(NULL, 0, "");
1174
  }
1175
#endif
1176
1177
0
  parser->sax_state = SAX_STATE_ELEMENT;
1178
0
  return gf_xml_sax_parse(parser, BOM + offset);
1179
0
}
1180
1181
static void xml_sax_reset(GF_SAXParser *parser)
1182
0
{
1183
0
  while (1) {
1184
0
    XML_Entity *ent = (XML_Entity *)gf_list_last(parser->entities);
1185
0
    if (!ent) break;
1186
0
    gf_list_rem_last(parser->entities);
1187
0
    if (ent->name) gf_free(ent->name);
1188
0
    if (ent->value) gf_free(ent->value);
1189
0
    gf_free(ent);
1190
0
  }
1191
0
  if (parser->buffer) gf_free(parser->buffer);
1192
0
  parser->buffer = NULL;
1193
0
  parser->current_pos = 0;
1194
0
  gf_free(parser->attrs);
1195
0
  parser->attrs = NULL;
1196
0
  gf_free(parser->sax_attrs);
1197
0
  parser->sax_attrs = NULL;
1198
0
  parser->nb_alloc_attrs = parser->nb_attrs = 0;
1199
0
}
1200
1201
1202
static GF_Err xml_sax_read_file(GF_SAXParser *parser)
1203
0
{
1204
0
  GF_Err e = GF_EOS;
1205
0
  unsigned char szLine[XML_INPUT_SIZE+2]={0};
1206
1207
#ifdef NO_GZIP
1208
  if (!parser->f_in) return GF_BAD_PARAM;
1209
#else
1210
0
  if (!parser->gz_in) return GF_BAD_PARAM;
1211
0
#endif
1212
1213
1214
0
  while (!parser->suspended) {
1215
#ifdef NO_GZIP
1216
    s32 read = (s32)gf_fread(szLine, XML_INPUT_SIZE, parser->f_in);
1217
#else
1218
0
    s32 read = gf_gzread(parser->gz_in, szLine, XML_INPUT_SIZE);
1219
0
#endif
1220
0
    if ((read<=0) /*&& !parser->node_depth*/) break;
1221
0
    szLine[read] = 0;
1222
0
    szLine[read+1] = 0;
1223
0
    e = gf_xml_sax_parse(parser, szLine);
1224
0
    if (e) break;
1225
0
    if (parser->file_pos > parser->file_size) parser->file_size = parser->file_pos + 1;
1226
0
    if (parser->on_progress) parser->on_progress(parser->sax_cbck, parser->file_pos, parser->file_size);
1227
0
  }
1228
1229
#ifdef NO_GZIP
1230
  if (gf_feof(parser->f_in)) {
1231
#else
1232
0
  if (gf_gzeof(parser->gz_in)) {
1233
0
#endif
1234
0
    if (!e) e = GF_EOS;
1235
0
    if (parser->on_progress) parser->on_progress(parser->sax_cbck, parser->file_size, parser->file_size);
1236
1237
#ifdef NO_GZIP
1238
    gf_fclose(parser->f_in);
1239
    parser->f_in = NULL;
1240
#else
1241
0
    gf_gzclose(parser->gz_in);
1242
0
    parser->gz_in = 0;
1243
0
#endif
1244
1245
0
    parser->elt_start_pos = parser->elt_end_pos = 0;
1246
0
    parser->elt_name_start = parser->elt_name_end = 0;
1247
0
    parser->att_name_start = 0;
1248
0
    parser->current_pos = 0;
1249
0
    parser->line_size = 0;
1250
0
    parser->att_sep = 0;
1251
0
    parser->file_pos = 0;
1252
0
    parser->file_size = 0;
1253
0
    parser->line_size = 0;
1254
0
  }
1255
0
  return e;
1256
0
}
1257
1258
GF_EXPORT
1259
GF_Err gf_xml_sax_parse_file(GF_SAXParser *parser, const char *fileName, gf_xml_sax_progress OnProgress)
1260
0
{
1261
0
  FILE *test;
1262
0
  GF_Err e;
1263
0
  u64 filesize;
1264
0
#ifndef NO_GZIP
1265
0
  gzFile gzInput;
1266
0
#endif
1267
0
  unsigned char szLine[6];
1268
1269
0
  parser->on_progress = OnProgress;
1270
1271
0
  if (!strncmp(fileName, "gmem://", 7)) {
1272
0
    u32 size;
1273
0
    u8 *xml_mem_address;
1274
0
    e = gf_blob_get(fileName, &xml_mem_address, &size, NULL);
1275
0
    if (e) return e;
1276
1277
0
    parser->file_size = size;
1278
    //copy possible BOM
1279
0
    memcpy(szLine, xml_mem_address, 4);
1280
0
    szLine[4] = szLine[5] = 0;
1281
1282
0
    parser->file_pos = 0;
1283
0
    parser->elt_start_pos = 0;
1284
0
    parser->current_pos = 0;
1285
1286
0
    e = gf_xml_sax_init(parser, szLine);
1287
0
    if (!e) {
1288
0
      e = gf_xml_sax_parse(parser, xml_mem_address+4);
1289
0
      if (parser->on_progress) parser->on_progress(parser->sax_cbck, parser->file_pos, parser->file_size);
1290
0
    }
1291
0
    gf_blob_release(fileName);
1292
1293
0
    parser->elt_start_pos = parser->elt_end_pos = 0;
1294
0
    parser->elt_name_start = parser->elt_name_end = 0;
1295
0
    parser->att_name_start = 0;
1296
0
    parser->current_pos = 0;
1297
0
    parser->line_size = 0;
1298
0
    parser->att_sep = 0;
1299
0
    parser->file_pos = 0;
1300
0
    parser->file_size = 0;
1301
0
    parser->line_size = 0;
1302
0
    return e;
1303
0
  }
1304
1305
  /*check file exists and gets its size (zlib doesn't support SEEK_END)*/
1306
0
  test = gf_fopen(fileName, "rb");
1307
0
  if (!test) return GF_URL_ERROR;
1308
1309
0
  filesize = gf_fsize(test);
1310
0
  gf_fatal_assert(filesize < 0x80000000);
1311
0
  parser->file_size = (u32) filesize;
1312
0
  gf_fclose(test);
1313
1314
0
  parser->file_pos = 0;
1315
0
  parser->elt_start_pos = 0;
1316
0
  parser->current_pos = 0;
1317
  //open file and copy possible BOM
1318
#ifdef NO_GZIP
1319
  parser->f_in = gf_fopen(fileName, "rt");
1320
  if (gf_fread(szLine, 4, parser->f_in) != 4) {
1321
    GF_LOG(GF_LOG_WARNING, GF_LOG_CORE, ("[XML] Error loading BOM\n"));
1322
  }
1323
#else
1324
0
  gzInput = gf_gzopen(fileName, "rb");
1325
0
  if (!gzInput) return GF_IO_ERR;
1326
0
  parser->gz_in = gzInput;
1327
  /*init SAX parser (unicode setup)*/
1328
0
  gf_gzread(gzInput, szLine, 4);
1329
0
#endif
1330
1331
0
  szLine[4] = szLine[5] = 0;
1332
0
  e = gf_xml_sax_init(parser, szLine);
1333
0
  if (e) return e;
1334
1335
0
  return xml_sax_read_file(parser);
1336
0
}
1337
1338
GF_EXPORT
1339
Bool gf_xml_sax_binary_file(GF_SAXParser *parser)
1340
0
{
1341
0
  if (!parser) return GF_FALSE;
1342
#ifdef NO_GZIP
1343
  return GF_FALSE;
1344
#else
1345
0
  if (!parser->gz_in) return GF_FALSE;
1346
0
  return (((z_stream*)parser->gz_in)->data_type==Z_BINARY) ? GF_TRUE : GF_FALSE;
1347
0
#endif
1348
0
}
1349
1350
GF_EXPORT
1351
GF_SAXParser *gf_xml_sax_new(gf_xml_sax_node_start on_node_start,
1352
                             gf_xml_sax_node_end on_node_end,
1353
                             gf_xml_sax_text_content on_text_content,
1354
                             void *cbck)
1355
0
{
1356
0
  GF_SAXParser *parser;
1357
0
  GF_SAFEALLOC(parser, GF_SAXParser);
1358
0
  if (!parser) return NULL;
1359
0
  parser->entities = gf_list_new();
1360
0
  parser->unicode_type = -1;
1361
0
  parser->sax_node_start = on_node_start;
1362
0
  parser->sax_node_end = on_node_end;
1363
0
  parser->sax_text_content = on_text_content;
1364
0
  parser->sax_cbck = cbck;
1365
0
  if (!XML_MAX_CONTENT_SIZE) {
1366
0
    XML_MAX_CONTENT_SIZE = gf_opts_get_int("core", "xml-max-csize");
1367
0
  }
1368
0
  return parser;
1369
0
}
1370
1371
GF_EXPORT
1372
void gf_xml_sax_del(GF_SAXParser *parser)
1373
0
{
1374
0
  xml_sax_reset(parser);
1375
0
  gf_list_del(parser->entities);
1376
#ifdef NO_GZIP
1377
  if (parser->f_in) gf_fclose(parser->f_in);
1378
#else
1379
0
  if (parser->gz_in) gf_gzclose(parser->gz_in);
1380
0
#endif
1381
0
  gf_free(parser);
1382
0
}
1383
1384
GF_EXPORT
1385
GF_Err gf_xml_sax_suspend(GF_SAXParser *parser, Bool do_suspend)
1386
0
{
1387
0
  parser->suspended = do_suspend;
1388
0
  if (!do_suspend) {
1389
#ifdef NO_GZIP
1390
    if (parser->f_in) return xml_sax_read_file(parser);
1391
#else
1392
0
    if (parser->gz_in) return xml_sax_read_file(parser);
1393
0
#endif
1394
0
    return xml_sax_parse(parser, GF_FALSE);
1395
0
  }
1396
0
  return GF_OK;
1397
0
}
1398
1399
1400
GF_EXPORT
1401
0
u32 gf_xml_sax_get_line(GF_SAXParser *parser) {
1402
0
  return parser->line + 1 ;
1403
0
}
1404
1405
#if 0 //unused
1406
u32 gf_xml_sax_get_file_size(GF_SAXParser *parser)
1407
{
1408
#ifdef NO_GZIP
1409
  return parser->f_in ? parser->file_size : 0;
1410
#else
1411
  return parser->gz_in ? parser->file_size : 0;
1412
#endif
1413
}
1414
1415
u32 gf_xml_sax_get_file_pos(GF_SAXParser *parser)
1416
{
1417
#ifdef NO_GZIP
1418
  return parser->f_in ? parser->file_pos : 0;
1419
#else
1420
  return parser->gz_in ? parser->file_pos : 0;
1421
#endif
1422
}
1423
#endif
1424
1425
1426
1427
GF_EXPORT
1428
char *gf_xml_sax_peek_node(GF_SAXParser *parser, char *att_name, char *att_value, char *substitute, char *get_attr, char *end_pattern, Bool *is_substitute)
1429
0
{
1430
0
  u32 state, att_len, alloc_size, _len;
1431
#ifdef NO_GZIP
1432
  u64 pos;
1433
#else
1434
0
  z_off_t pos;
1435
0
#endif
1436
0
  Bool from_buffer;
1437
0
  Bool dobreak=GF_FALSE;
1438
0
  char *szLine1, *szLine2, *szLine, *cur_line, *sep, *start, first_c, *result;
1439
1440
1441
0
#define CPYCAT_ALLOC(__str, __is_copy) _len = (u32) strlen(__str);\
1442
0
              if ( _len + (__is_copy ? 0 : strlen(szLine))>=alloc_size) {\
1443
0
                alloc_size = 1 + (u32) strlen(__str); \
1444
0
                if (!__is_copy) alloc_size += (u32) strlen(szLine); \
1445
0
                szLine = gf_realloc(szLine, alloc_size);  \
1446
0
              }\
1447
0
              if (__is_copy) { memmove(szLine, __str, sizeof(char)*_len); szLine[_len] = 0; }\
1448
0
              else strcat(szLine, __str); \
1449
0
1450
0
  from_buffer=GF_FALSE;
1451
#ifdef NO_GZIP
1452
  if (!parser->f_in) from_buffer=GF_TRUE;
1453
#else
1454
0
  if (!parser->gz_in) from_buffer=GF_TRUE;
1455
0
#endif
1456
1457
0
  result = NULL;
1458
1459
0
  szLine1 = gf_malloc(sizeof(char)*(XML_INPUT_SIZE+2));
1460
0
  if (!szLine1) return NULL;
1461
0
  szLine2 = gf_malloc(sizeof(char)*(XML_INPUT_SIZE+2));
1462
0
  if (!szLine2) {
1463
0
    gf_free(szLine1);
1464
0
    return NULL;
1465
0
  }
1466
0
  szLine1[0] = szLine2[0] = 0;
1467
0
  pos=0;
1468
0
  if (!from_buffer) {
1469
#ifdef NO_GZIP
1470
    pos = gf_ftell(parser->f_in);
1471
#else
1472
0
    pos = (u32) gf_gztell(parser->gz_in);
1473
0
#endif
1474
0
  }
1475
0
  att_len = (u32) strlen(parser->buffer + parser->att_name_start);
1476
0
  if (att_len<2*XML_INPUT_SIZE) att_len = 2*XML_INPUT_SIZE;
1477
0
  alloc_size = att_len;
1478
0
  szLine = (char *) gf_malloc(sizeof(char)*alloc_size);
1479
0
  if (!szLine) {
1480
0
    gf_free(szLine1);
1481
0
    gf_free(szLine2);
1482
0
    return NULL;
1483
0
  }
1484
0
  strcpy(szLine, parser->buffer + parser->att_name_start);
1485
0
  cur_line = szLine;
1486
0
  att_len = (u32) strlen(att_value);
1487
0
  state = 0;
1488
0
  goto retry;
1489
1490
0
  while (1) {
1491
0
    u32 read;
1492
0
    u8 sep_char;
1493
0
    if (!from_buffer) {
1494
#ifdef NO_GZIP
1495
      if (gf_feof(parser->f_in)) break;
1496
#else
1497
0
      if (gf_gzeof(parser->gz_in)) break;
1498
0
#endif
1499
0
    }
1500
1501
0
    if (dobreak) break;
1502
1503
0
    if (cur_line == szLine2) {
1504
0
      cur_line = szLine1;
1505
0
    } else {
1506
0
      cur_line = szLine2;
1507
0
    }
1508
0
    if (from_buffer) {
1509
0
      dobreak=GF_TRUE;
1510
0
    } else {
1511
#ifdef NO_GZIP
1512
      read = (u32)gf_fread(cur_line, XML_INPUT_SIZE, parser->f_in);
1513
#else
1514
0
      read = gf_gzread(parser->gz_in, cur_line, XML_INPUT_SIZE);
1515
0
#endif
1516
0
      cur_line[read] = cur_line[read+1] = 0;
1517
1518
0
      CPYCAT_ALLOC(cur_line, 0);
1519
0
    }
1520
1521
0
    if (end_pattern) {
1522
0
      start  = strstr(szLine, end_pattern);
1523
0
      if (start) {
1524
0
        start[0] = 0;
1525
0
        dobreak = GF_TRUE;
1526
0
      }
1527
0
    }
1528
1529
0
retry:
1530
0
    if (state == 2) goto fetch_attr;
1531
0
    sep = strstr(szLine, att_name);
1532
0
    if (!sep && !state) {
1533
0
      state = 0;
1534
0
      start = strrchr(szLine, '<');
1535
0
      if (start) {
1536
0
        CPYCAT_ALLOC(start, 1);
1537
0
      } else {
1538
0
        CPYCAT_ALLOC(cur_line, 1);
1539
0
      }
1540
0
      continue;
1541
0
    }
1542
0
    if (!state) {
1543
0
      state = 1;
1544
      /*load next line*/
1545
0
      first_c = sep[0];
1546
0
      sep[0] = 0;
1547
0
      start = strrchr(szLine, '<');
1548
0
      if (!start)
1549
0
        goto exit;
1550
0
      sep[0] = first_c;
1551
0
      CPYCAT_ALLOC(start, 1);
1552
0
      sep = strstr(szLine, att_name);
1553
0
    }
1554
0
    sep = sep ? strchr(sep, '=') : NULL;
1555
0
    if (!sep) {
1556
0
      state = 0;
1557
0
      CPYCAT_ALLOC(cur_line, 1);
1558
0
      continue;
1559
0
    }
1560
0
    while (sep[0] && (sep[0] != '\"') && (sep[0] != '\'') ) sep++;
1561
0
    if (!sep[0]) continue;
1562
0
    sep_char = sep[0];
1563
0
    sep++;
1564
0
    while (sep[0] && strchr(" \n\r\t", sep[0]) ) sep++;
1565
0
    if (!sep[0]) continue;
1566
0
    if (!strchr(sep, sep_char))
1567
0
      continue;
1568
1569
    /*found*/
1570
0
    if (!strncmp(sep, att_value, att_len)) {
1571
0
      u32 sub_pos;
1572
0
      sep = szLine + 1;
1573
0
      while (strchr(" \t\r\n", sep[0])) sep++;
1574
0
      sub_pos = 0;
1575
0
      while (!strchr(" \t\r\n", sep[sub_pos])) sub_pos++;
1576
0
      first_c = sep[sub_pos];
1577
0
      sep[sub_pos] = 0;
1578
0
      state = 2;
1579
0
      if (!substitute || !get_attr || strcmp(sep, substitute) ) {
1580
0
        if (is_substitute) *is_substitute = GF_FALSE;
1581
0
        result = gf_strdup(sep);
1582
0
        sep[sub_pos] = first_c;
1583
0
        goto exit;
1584
0
      }
1585
0
      sep[sub_pos] = first_c;
1586
0
fetch_attr:
1587
0
      sep = strstr(szLine + 1, get_attr);
1588
0
      if (!sep) {
1589
0
        CPYCAT_ALLOC(cur_line, 1);
1590
0
        continue;
1591
0
      }
1592
0
      sep += strlen(get_attr);
1593
0
      while (strchr("= \t\r\n", sep[0])) sep++;
1594
0
      sep++;
1595
0
      sub_pos = 0;
1596
0
      while (!strchr(" \t\r\n/>", sep[sub_pos])) sub_pos++;
1597
0
      sep[sub_pos-1] = 0;
1598
0
      result = gf_strdup(sep);
1599
0
      if (is_substitute) *is_substitute = GF_TRUE;
1600
0
      goto exit;
1601
0
    }
1602
0
    state = 0;
1603
0
    CPYCAT_ALLOC(sep, 1);
1604
0
    goto retry;
1605
0
  }
1606
0
exit:
1607
0
  gf_free(szLine);
1608
0
  gf_free(szLine1);
1609
0
  gf_free(szLine2);
1610
1611
0
  if (!from_buffer) {
1612
#ifdef NO_GZIP
1613
    gf_fseek(parser->f_in, pos, SEEK_SET);
1614
#else
1615
0
    gf_gzrewind(parser->gz_in);
1616
0
    gf_gzseek(parser->gz_in, pos, SEEK_SET);
1617
0
#endif
1618
0
  }
1619
0
  return result;
1620
0
}
1621
1622
GF_EXPORT
1623
const char *gf_xml_sax_get_error(GF_SAXParser *parser)
1624
0
{
1625
0
  return parser->err_msg;
1626
0
}
1627
1628
1629
struct _peek_type
1630
{
1631
  GF_SAXParser *parser;
1632
  char *res;
1633
};
1634
1635
static void on_peek_node_start(void *cbk, const char *name, const char *ns, const GF_XMLAttribute *attributes, u32 nb_attributes)
1636
0
{
1637
0
  struct _peek_type *pt = (struct _peek_type*)cbk;
1638
0
  if (pt->res) gf_free(pt->res);
1639
0
  pt->res = gf_strdup(name);
1640
0
  pt->parser->suspended = GF_TRUE;
1641
0
}
1642
1643
GF_EXPORT
1644
char *gf_xml_get_root_type(const char *file, GF_Err *ret)
1645
0
{
1646
0
  GF_Err e;
1647
0
  struct _peek_type pt;
1648
0
  pt.res = NULL;
1649
0
  pt.parser = gf_xml_sax_new(on_peek_node_start, NULL, NULL, &pt);
1650
0
  e = gf_xml_sax_parse_file(pt.parser, file, NULL);
1651
0
  if (ret) *ret = e;
1652
0
  gf_xml_sax_del(pt.parser);
1653
0
  return pt.res;
1654
0
}
1655
1656
1657
GF_EXPORT
1658
u32 gf_xml_sax_get_node_start_pos(GF_SAXParser *parser)
1659
0
{
1660
0
  return parser->elt_start_pos;
1661
0
}
1662
1663
GF_EXPORT
1664
u32 gf_xml_sax_get_node_end_pos(GF_SAXParser *parser)
1665
0
{
1666
0
  return parser->elt_end_pos;
1667
0
}
1668
1669
struct _tag_dom_parser
1670
{
1671
  GF_SAXParser *parser;
1672
  GF_List *stack;
1673
  //root node being parsed
1674
  GF_XMLNode *root;
1675
  //usually only one :)
1676
  GF_List *root_nodes;
1677
  u32 depth;
1678
  Bool keep_valid;
1679
  void (*OnProgress)(void *cbck, u64 done, u64 tot);
1680
  void *cbk;
1681
};
1682
1683
1684
GF_EXPORT
1685
void gf_xml_dom_node_reset(GF_XMLNode *node, Bool reset_attribs, Bool reset_children)
1686
0
{
1687
0
  if (!node) return;
1688
0
  if (node->attributes && reset_attribs) {
1689
0
    while (gf_list_count(node->attributes)) {
1690
0
      GF_XMLAttribute *att = (GF_XMLAttribute *)gf_list_last(node->attributes);
1691
0
      gf_list_rem_last(node->attributes);
1692
0
      if (att->name) gf_free(att->name);
1693
0
      if (att->value) gf_free(att->value);
1694
0
      gf_free(att);
1695
0
    }
1696
0
  }
1697
1698
0
  if (reset_children && node->content) {
1699
0
    while (gf_list_count(node->content)) {
1700
0
      GF_XMLNode *child = (GF_XMLNode *)gf_list_last(node->content);
1701
0
      gf_list_rem_last(node->content);
1702
0
      gf_xml_dom_node_del(child);
1703
0
    }
1704
0
  }
1705
0
}
1706
1707
GF_EXPORT
1708
void gf_xml_dom_node_del(GF_XMLNode *node)
1709
0
{
1710
0
  if (!node) return;
1711
0
  gf_xml_dom_node_reset(node, GF_TRUE, GF_TRUE);
1712
0
  if (node->attributes) gf_list_del(node->attributes);
1713
0
  if (node->content) gf_list_del(node->content);
1714
0
  if (node->ns) gf_free(node->ns);
1715
0
  if (node->name) gf_free(node->name);
1716
0
  gf_free(node);
1717
0
}
1718
1719
GF_List * gf_list_new_prealloc(u32 nb_prealloc);
1720
1721
static void on_dom_node_start(void *cbk, const char *name, const char *ns, const GF_XMLAttribute *attributes, u32 nb_attributes)
1722
0
{
1723
0
  u32 i;
1724
0
  GF_DOMParser *par = (GF_DOMParser *) cbk;
1725
0
  GF_XMLNode *node;
1726
1727
0
  if (par->root && !gf_list_count(par->stack)) {
1728
0
    par->parser->suspended = GF_TRUE;
1729
0
    return;
1730
0
  }
1731
1732
0
  GF_SAFEALLOC(node, GF_XMLNode);
1733
0
  if (!node) {
1734
0
    par->parser->sax_state = SAX_STATE_ALLOC_ERROR;
1735
0
    return;
1736
0
  }
1737
0
  node->attributes = gf_list_new_prealloc(nb_attributes);
1738
  //don't allocate content yet
1739
0
  node->name = gf_strdup(name);
1740
0
  if (ns) node->ns = gf_strdup(ns);
1741
0
  gf_list_add(par->stack, node);
1742
0
  if (!par->root) {
1743
0
    par->root = node;
1744
0
    gf_list_add(par->root_nodes, node);
1745
0
  }
1746
1747
0
  for (i=0; i<nb_attributes; i++) {
1748
0
    GF_XMLAttribute *att;
1749
0
    const GF_XMLAttribute *in_att = & attributes[i];
1750
0
    u32 j;
1751
0
    Bool dup=GF_FALSE;
1752
0
    for (j=0;j<i; j++) {
1753
0
      GF_XMLAttribute *p_att = gf_list_get(node->attributes, j);
1754
0
      if (!p_att) break;
1755
0
      if (!strcmp(p_att->name, in_att->name)) {
1756
0
        dup=GF_TRUE;
1757
0
        GF_LOG(GF_LOG_DEBUG, GF_LOG_PARSER, ("[SAX] Duplicated attribute \"%s\" on node \"%s\", ignoring\n", in_att->name, name));
1758
0
        break;
1759
0
      }
1760
0
    }
1761
0
    if (dup) continue;
1762
1763
0
    GF_SAFEALLOC(att, GF_XMLAttribute);
1764
0
    if (! att) {
1765
0
      GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[SAX] Failed to allocate attribute\n"));
1766
0
      par->parser->sax_state = SAX_STATE_ALLOC_ERROR;
1767
0
      return;
1768
0
    }
1769
0
    att->name = gf_strdup(in_att->name);
1770
0
    att->value = gf_strdup(in_att->value);
1771
0
    gf_list_add(node->attributes, att);
1772
0
  }
1773
0
}
1774
1775
static void on_dom_node_end(void *cbk, const char *name, const char *ns)
1776
0
{
1777
0
  GF_DOMParser *par = (GF_DOMParser *)cbk;
1778
0
  GF_XMLNode *last = (GF_XMLNode *)gf_list_last(par->stack);
1779
0
  gf_list_rem_last(par->stack);
1780
1781
0
  if (!last || (strlen(last->name)!=strlen(name)) || strcmp(last->name, name) || (!ns && last->ns) || (ns && !last->ns) || (ns && strcmp(last->ns, ns) ) ) {
1782
0
    s32 idx;
1783
0
    format_sax_error(par->parser, 0, "Invalid node stack: closing node is %s but %s was expected", name, last ? last->name : "unknown");
1784
0
    par->parser->suspended = GF_TRUE;
1785
0
    gf_xml_dom_node_del(last);
1786
0
    if (last == par->root)
1787
0
      par->root=NULL;
1788
0
    idx = gf_list_find(par->root_nodes, last);
1789
0
    if (idx != -1)
1790
0
      gf_list_rem(par->root_nodes, idx);
1791
0
    return;
1792
0
  }
1793
0
  if (last != par->root) {
1794
0
    GF_XMLNode *node = (GF_XMLNode *)gf_list_last(par->stack);
1795
0
    if (!node->content)
1796
0
      node->content = gf_list_new();
1797
1798
0
    gf_list_add(node->content, last);
1799
0
  }
1800
0
  last->valid_content = par->keep_valid;
1801
0
}
1802
1803
static void on_dom_text_content(void *cbk, const char *content, Bool is_cdata)
1804
0
{
1805
0
  GF_DOMParser *par = (GF_DOMParser *)cbk;
1806
0
  GF_XMLNode *node;
1807
0
  GF_XMLNode *last = (GF_XMLNode *)gf_list_last(par->stack);
1808
0
  if (!last) return;
1809
0
  if (!last->content)
1810
0
    last->content = gf_list_new();
1811
1812
0
  GF_SAFEALLOC(node, GF_XMLNode);
1813
0
  if (!node) {
1814
0
    GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[SAX] Failed to allocate XML node"));
1815
0
    par->parser->sax_state = SAX_STATE_ALLOC_ERROR;
1816
0
    return;
1817
0
  }
1818
0
  node->type = is_cdata ? GF_XML_CDATA_TYPE : GF_XML_TEXT_TYPE;
1819
0
  node->name = gf_strdup(content);
1820
0
  gf_list_add(last->content, node);
1821
0
}
1822
1823
GF_EXPORT
1824
GF_DOMParser *gf_xml_dom_new()
1825
3.05k
{
1826
3.05k
  GF_DOMParser *dom;
1827
3.05k
  GF_SAFEALLOC(dom, GF_DOMParser);
1828
3.05k
  if (!dom) return NULL;
1829
1830
3.05k
  dom->root_nodes = gf_list_new();
1831
3.05k
  dom->keep_valid = 0;
1832
3.05k
  return dom;
1833
3.05k
}
1834
1835
static void gf_xml_dom_reset(GF_DOMParser *dom, Bool full_reset)
1836
3.05k
{
1837
3.05k
  if (full_reset && dom->parser) {
1838
0
    gf_xml_sax_del(dom->parser);
1839
0
    dom->parser = NULL;
1840
0
  }
1841
1842
3.05k
  if (dom->stack) {
1843
0
    while (gf_list_count(dom->stack)) {
1844
0
      GF_XMLNode *n = (GF_XMLNode *)gf_list_last(dom->stack);
1845
0
      gf_list_rem_last(dom->stack);
1846
0
      if (dom->root==n) {
1847
0
        gf_list_del_item(dom->root_nodes, n);
1848
0
        dom->root = NULL;
1849
0
      }
1850
0
      gf_xml_dom_node_del(n);
1851
0
    }
1852
0
    gf_list_del(dom->stack);
1853
0
    dom->stack = NULL;
1854
0
  }
1855
3.05k
  if (full_reset && gf_list_count(dom->root_nodes) ) {
1856
0
    while (gf_list_count(dom->root_nodes)) {
1857
0
      GF_XMLNode *n = (GF_XMLNode *)gf_list_last(dom->root_nodes);
1858
0
      gf_list_rem_last(dom->root_nodes);
1859
0
      gf_xml_dom_node_del(n);
1860
0
    }
1861
0
    dom->root = NULL;
1862
0
  }
1863
3.05k
}
1864
1865
GF_EXPORT
1866
void gf_xml_dom_del(GF_DOMParser *parser)
1867
3.05k
{
1868
3.05k
  if (!parser)
1869
0
    return;
1870
1871
3.05k
  gf_xml_dom_reset(parser, GF_TRUE);
1872
3.05k
  gf_list_del(parser->root_nodes);
1873
3.05k
  gf_free(parser);
1874
3.05k
}
1875
1876
GF_EXPORT
1877
GF_XMLNode *gf_xml_dom_detach_root(GF_DOMParser *parser)
1878
0
{
1879
0
  GF_XMLNode *root;
1880
0
  if (!parser)
1881
0
    return NULL;
1882
0
  root = parser->root;
1883
0
  gf_list_del_item(parser->root_nodes, root);
1884
0
  parser->root = gf_list_get(parser->root_nodes, 0);
1885
0
  return root;
1886
0
}
1887
1888
static void dom_on_progress(void *cbck, u64 done, u64 tot)
1889
0
{
1890
0
  GF_DOMParser *dom = (GF_DOMParser *)cbck;
1891
0
  dom->OnProgress(dom->cbk, done, tot);
1892
0
}
1893
1894
GF_EXPORT
1895
GF_Err gf_xml_dom_parse(GF_DOMParser *dom, const char *file, gf_xml_sax_progress OnProgress, void *cbk)
1896
0
{
1897
0
  GF_Err e;
1898
0
  gf_xml_dom_reset(dom, GF_TRUE);
1899
0
  dom->stack = gf_list_new();
1900
0
  dom->parser = gf_xml_sax_new(on_dom_node_start, on_dom_node_end, on_dom_text_content, dom);
1901
0
  dom->OnProgress = OnProgress;
1902
0
  dom->cbk = cbk;
1903
0
  e = gf_xml_sax_parse_file(dom->parser, file, OnProgress ? dom_on_progress : NULL);
1904
0
  gf_xml_dom_reset(dom, GF_FALSE);
1905
0
  return e<0 ? e : GF_OK;
1906
0
}
1907
1908
GF_EXPORT
1909
GF_Err gf_xml_dom_parse_string(GF_DOMParser *dom, char *string)
1910
0
{
1911
0
  GF_Err e;
1912
0
  gf_xml_dom_reset(dom, GF_TRUE);
1913
0
  dom->stack = gf_list_new();
1914
0
  dom->parser = gf_xml_sax_new(on_dom_node_start, on_dom_node_end, on_dom_text_content, dom);
1915
0
  e = gf_xml_sax_init(dom->parser, (unsigned char *) string);
1916
0
  gf_xml_dom_reset(dom, GF_FALSE);
1917
0
  return e<0 ? e : GF_OK;
1918
0
}
1919
1920
GF_EXPORT
1921
GF_Err gf_xml_dom_enable_passthrough(GF_DOMParser *dom)
1922
0
{
1923
0
  if (!dom) return GF_BAD_PARAM;
1924
0
  dom->keep_valid = 1;
1925
0
  return GF_OK;
1926
0
}
1927
1928
#if 0 //unused
1929
GF_XMLNode *gf_xml_dom_create_root(GF_DOMParser *parser, const char* name) {
1930
  GF_XMLNode * root;
1931
  if (!parser) return NULL;
1932
1933
  GF_SAFEALLOC(root, GF_XMLNode);
1934
  if (!root) return NULL;
1935
  root->name = gf_strdup(name);
1936
1937
  return root;
1938
}
1939
#endif
1940
1941
GF_EXPORT
1942
GF_XMLNode *gf_xml_dom_get_root(GF_DOMParser *parser)
1943
0
{
1944
0
  return parser ? parser->root : NULL;
1945
0
}
1946
GF_EXPORT
1947
const char *gf_xml_dom_get_error(GF_DOMParser *parser)
1948
0
{
1949
0
  return gf_xml_sax_get_error(parser->parser);
1950
0
}
1951
GF_EXPORT
1952
u32 gf_xml_dom_get_line(GF_DOMParser *parser)
1953
0
{
1954
0
  return gf_xml_sax_get_line(parser->parser);
1955
0
}
1956
1957
GF_EXPORT
1958
u32 gf_xml_dom_get_root_nodes_count(GF_DOMParser *parser)
1959
0
{
1960
0
  return parser? gf_list_count(parser->root_nodes) : 0;
1961
0
}
1962
1963
GF_EXPORT
1964
GF_XMLNode *gf_xml_dom_get_root_idx(GF_DOMParser *parser, u32 idx)
1965
0
{
1966
0
  return parser ? (GF_XMLNode*)gf_list_get(parser->root_nodes, idx) : NULL;
1967
0
}
1968
1969
1970
static void gf_xml_dom_node_serialize(GF_XMLNode *node, Bool content_only, Bool no_escape, char **str, u32 *alloc_size, u32 *size)
1971
0
{
1972
0
  u32 i, count, vlen, tot_s;
1973
0
  char *name;
1974
1975
0
#define SET_STRING(v)                                      \
1976
0
  vlen = (u32)strlen(v);                                 \
1977
0
  tot_s = vlen + (*size);                                \
1978
0
  if (tot_s >= (*alloc_size))                            \
1979
0
  {                                                      \
1980
0
    (*alloc_size) = MAX(tot_s, 2 * (*alloc_size)) + 1; \
1981
0
    (*str) = gf_realloc((*str), (*alloc_size));        \
1982
0
  }                                                      \
1983
0
  memcpy((*str) + (*size), v, vlen + 1);                 \
1984
0
  *size += vlen;
1985
1986
0
#define SET_STRING_ESCAPED(uv)            \
1987
0
  {                                     \
1988
0
    u32 tlen;                         \
1989
0
    char szChar[2];                   \
1990
0
    szChar[1] = 0;                    \
1991
0
    tlen = (u32)strlen(uv);           \
1992
0
    for (int vi = 0; vi < tlen; vi++) \
1993
0
    {                                 \
1994
0
      switch (uv[vi])               \
1995
0
      {                             \
1996
0
      case '&':                     \
1997
0
        SET_STRING("&amp;");      \
1998
0
        break;                    \
1999
0
      case '<':                     \
2000
0
        SET_STRING("&lt;");       \
2001
0
        break;                    \
2002
0
      case '>':                     \
2003
0
        SET_STRING("&gt;");       \
2004
0
        break;                    \
2005
0
      case '\'':                    \
2006
0
        SET_STRING("&apos;");     \
2007
0
        break;                    \
2008
0
      case '\"':                    \
2009
0
        SET_STRING("&quot;");     \
2010
0
        break;                    \
2011
0
                                          \
2012
0
      default:                      \
2013
0
        szChar[0] = uv[vi];       \
2014
0
        SET_STRING(szChar);       \
2015
0
        break;                    \
2016
0
      }                             \
2017
0
    }                                 \
2018
0
  }
2019
2020
0
  switch (node->type) {
2021
0
  case GF_XML_CDATA_TYPE:
2022
0
    SET_STRING("![CDATA[");
2023
0
    SET_STRING(node->name);
2024
0
    SET_STRING("]]>");
2025
0
    return;
2026
0
  case GF_XML_TEXT_TYPE:
2027
0
    name = node->name;
2028
0
    if ((name[0]=='\r') && (name[1]=='\n'))
2029
0
      name++;
2030
2031
0
    if (no_escape) {
2032
0
      SET_STRING(name);
2033
0
    } else {
2034
0
      SET_STRING_ESCAPED(name);
2035
0
    }
2036
0
    return;
2037
0
  }
2038
2039
0
  if (!content_only) {
2040
0
    SET_STRING("<");
2041
0
    if (node->ns) {
2042
0
      SET_STRING(node->ns);
2043
0
      SET_STRING(":");
2044
0
    }
2045
0
    SET_STRING(node->name);
2046
0
    count = gf_list_count(node->attributes);
2047
0
    if (count > 0) {
2048
0
      SET_STRING(" ");
2049
0
    }
2050
0
    for (i=0; i<count; i++) {
2051
0
      GF_XMLAttribute *att = (GF_XMLAttribute*)gf_list_get(node->attributes, i);
2052
0
      SET_STRING(att->name);
2053
0
      SET_STRING("=\"");
2054
0
      SET_STRING_ESCAPED(att->value);
2055
0
      SET_STRING("\" ");
2056
0
    }
2057
2058
0
    if (!gf_list_count(node->content)) {
2059
0
      SET_STRING("/>");
2060
0
      return;
2061
0
    }
2062
0
    SET_STRING(">");
2063
0
  }
2064
2065
0
  count = gf_list_count(node->content);
2066
0
  for (i=0; i<count; i++) {
2067
0
    GF_XMLNode *child = (GF_XMLNode*)gf_list_get(node->content, i);
2068
0
    gf_xml_dom_node_serialize(child, GF_FALSE, node->valid_content, str, alloc_size, size);
2069
0
  }
2070
0
  if (!content_only) {
2071
0
    SET_STRING("</");
2072
0
    if (node->ns) {
2073
0
      SET_STRING(node->ns);
2074
0
      SET_STRING(":");
2075
0
    }
2076
0
    SET_STRING(node->name);
2077
0
    SET_STRING(">");
2078
0
  }
2079
0
}
2080
2081
GF_EXPORT
2082
char *gf_xml_dom_serialize(GF_XMLNode *node, Bool content_only, Bool no_escape)
2083
0
{
2084
0
  u32 alloc_size = 0;
2085
0
  u32 size = 0;
2086
0
  char *str = NULL;
2087
0
  gf_xml_dom_node_serialize(node, content_only, no_escape, &str, &alloc_size, &size);
2088
0
  return str;
2089
0
}
2090
2091
GF_EXPORT
2092
char *gf_xml_dom_serialize_root(GF_XMLNode *node, Bool content_only, Bool no_escape)
2093
0
{
2094
0
  u32 alloc_size, size;
2095
0
  char *str = NULL;
2096
0
  gf_dynstrcat(&str, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", NULL);
2097
0
  if (!str) return NULL;
2098
2099
0
  alloc_size = size = (u32) strlen(str);
2100
0
  alloc_size = size + 1;
2101
0
  gf_xml_dom_node_serialize(node, content_only, no_escape, &str, &alloc_size, &size);
2102
0
  return str;
2103
0
}
2104
2105
#if 0 //unused
2106
GF_XMLAttribute *gf_xml_dom_set_attribute(GF_XMLNode *node, const char* name, const char* value) {
2107
  GF_XMLAttribute *att;
2108
  if (!name || !value) return NULL;
2109
  if (!node->attributes) {
2110
    node->attributes = gf_list_new();
2111
    if (!node->attributes) return NULL;
2112
  }
2113
2114
  att = gf_xml_dom_create_attribute(name, value);
2115
  if (!att) return NULL;
2116
  gf_list_add(node->attributes, att);
2117
  return att;
2118
}
2119
2120
GF_XMLAttribute *gf_xml_dom_get_attribute(GF_XMLNode *node, const char* name) {
2121
  u32 i = 0;
2122
  GF_XMLAttribute *att;
2123
  if (!node || !name) return NULL;
2124
2125
  while ( (att = (GF_XMLAttribute*)gf_list_enum(node->attributes, &i))) {
2126
    if (!strcmp(att->name, name)) {
2127
      return att;
2128
    }
2129
  }
2130
2131
  return NULL;
2132
}
2133
2134
#endif
2135
2136
GF_EXPORT
2137
0
GF_XMLAttribute *gf_xml_dom_create_attribute(const char* name, const char* value) {
2138
0
  GF_XMLAttribute *att;
2139
0
  GF_SAFEALLOC(att, GF_XMLAttribute);
2140
0
  if (!att) return NULL;
2141
2142
0
  att->name = gf_strdup(name);
2143
0
  att->value = gf_strdup(value);
2144
0
  return att;
2145
0
}
2146
2147
2148
GF_EXPORT
2149
0
GF_Err gf_xml_dom_append_child(GF_XMLNode *node, GF_XMLNode *child) {
2150
0
  if (!node || !child) return GF_BAD_PARAM;
2151
0
  if (!node->content) {
2152
0
    node->content = gf_list_new();
2153
0
    if (!node->content) return GF_OUT_OF_MEM;
2154
0
  }
2155
0
  return gf_list_add(node->content, child);
2156
0
}
2157
2158
#if 0
2159
/*!
2160
\brief Removes the node to the list of children of this node.
2161
2162
Removes the node to the list of children of this node.
2163
\warning Doesn't free the memory of the removed children.
2164
2165
\param node the GF_XMLNode node
2166
\param child the GF_XMLNode child to remove
2167
\return Error code if any, otherwise GF_OK
2168
 */
2169
GF_EXPORT
2170
GF_Err gf_xml_dom_rem_child(GF_XMLNode *node, GF_XMLNode *child) {
2171
  s32 idx;
2172
  if (!node || !child || !node->content) return GF_BAD_PARAM;
2173
  idx = gf_list_find(node->content, child);
2174
  if (idx == -1) return GF_BAD_PARAM;
2175
  return gf_list_rem(node->content, idx);
2176
}
2177
#endif //unused
2178
2179
2180
GF_XMLNode *gf_xml_dom_node_new(const char* ns, const char* name)
2181
0
{
2182
0
  GF_XMLNode* node;
2183
0
  GF_SAFEALLOC(node, GF_XMLNode);
2184
0
  if (!node) return NULL;
2185
0
  if (ns) {
2186
0
    node->ns = gf_strdup(ns);
2187
0
    if (!node->ns) {
2188
0
      gf_free(node);
2189
0
      return NULL;
2190
0
    }
2191
0
  }
2192
2193
0
  if (name) {
2194
0
    node->name = gf_strdup(name);
2195
0
    if (!node->name) {
2196
0
      gf_free(node->ns);
2197
0
      gf_free(node);
2198
0
      return NULL;
2199
0
    }
2200
0
    node->type = GF_XML_NODE_TYPE;
2201
0
  } else {
2202
0
    node->type = GF_XML_TEXT_TYPE;
2203
0
  }
2204
0
  return node;
2205
0
}
2206
2207
0
GF_Err gf_xml_dom_node_check_namespace(const GF_XMLNode *n, const char *expected_node_name, const char *expected_ns_prefix) {
2208
0
  u32 i;
2209
0
  GF_XMLAttribute *att;
2210
2211
  /*check we are processing the expected node*/
2212
0
  if (expected_node_name && strcmp(expected_node_name, n->name)) {
2213
0
    return GF_SG_UNKNOWN_NODE;
2214
0
  }
2215
2216
  /*check for previously declared prefix (to be manually provided)*/
2217
0
  if (!n->ns) {
2218
0
    return GF_OK;
2219
0
  }
2220
0
  if (expected_ns_prefix && !strcmp(expected_ns_prefix, n->ns)) {
2221
0
    return GF_OK;
2222
0
  }
2223
2224
  /*look for new namespace in attributes*/
2225
0
  i = 0;
2226
0
  while ( (att = (GF_XMLAttribute*)gf_list_enum(n->attributes, &i)) ) {
2227
0
    const char *ns;
2228
0
    ns = strstr(att->name, ":");
2229
0
    if (!ns) continue;
2230
2231
0
    if (!strncmp(att->name, "xmlns", 5)) {
2232
0
      if (!strcmp(ns+1, n->ns)) {
2233
0
        return GF_OK;
2234
0
      }
2235
0
    } else {
2236
0
      GF_LOG(GF_LOG_DEBUG, GF_LOG_CORE, ("[XML] Unsupported attribute namespace \"%s\": ignoring\n", att->name));
2237
0
      continue;
2238
0
    }
2239
0
  }
2240
2241
0
  GF_LOG(GF_LOG_WARNING, GF_LOG_CORE, ("[XML] Unresolved namespace \"%s\" for node \"%s\"\n", n->ns, n->name));
2242
0
  return GF_BAD_PARAM;
2243
0
}
2244
2245
void gf_xml_dump_string(FILE* file, const char *before, const char *str, const char *after)
2246
0
{
2247
0
  size_t i;
2248
0
  size_t len=str?strlen(str):0;
2249
2250
0
  if (before) {
2251
0
    gf_fprintf(file, "%s", before);
2252
0
  }
2253
2254
0
  for (i = 0; i < len; i++) {
2255
0
    switch (str[i]) {
2256
0
    case '&':
2257
0
      gf_fprintf(file, "%s", "&amp;");
2258
0
      break;
2259
0
    case '<':
2260
0
      gf_fprintf(file, "%s", "&lt;");
2261
0
      break;
2262
0
    case '>':
2263
0
      gf_fprintf(file, "%s", "&gt;");
2264
0
      break;
2265
0
    case '\'':
2266
0
      gf_fprintf(file, "&apos;");
2267
0
      break;
2268
0
    case '\"':
2269
0
      gf_fprintf(file, "&quot;");
2270
0
      break;
2271
2272
0
    default:
2273
0
      gf_fprintf(file, "%c", str[i]);
2274
0
      break;
2275
0
    }
2276
0
  }
2277
2278
0
  if (after) {
2279
0
    gf_fprintf(file, "%s", after);
2280
0
  }
2281
0
}
2282
2283
2284
GF_XMLNode *gf_xml_dom_node_clone(GF_XMLNode *node)
2285
0
{
2286
0
  GF_XMLNode *clone, *child;
2287
0
  GF_XMLAttribute *att;
2288
0
  u32 i;
2289
0
  GF_SAFEALLOC(clone, GF_XMLNode);
2290
0
  if (!clone) return NULL;
2291
2292
0
  clone->type = node->type;
2293
0
  clone->valid_content = node->valid_content;
2294
0
  clone->orig_pos = node->orig_pos;
2295
0
  if (node->name)
2296
0
    clone->name = gf_strdup(node->name);
2297
0
  if (node->ns)
2298
0
    clone->ns = gf_strdup(node->ns);
2299
2300
0
  clone->attributes = gf_list_new();
2301
0
  i = 0;
2302
0
  while ((att = gf_list_enum(node->attributes, &i))) {
2303
0
    GF_XMLAttribute *att_clone;
2304
0
    GF_SAFEALLOC(att_clone, GF_XMLAttribute);
2305
0
    if (!att_clone) {
2306
0
      gf_xml_dom_node_del(clone);
2307
0
      return NULL;
2308
0
    }
2309
0
    att_clone->name = gf_strdup(att->name);
2310
0
    att_clone->value = gf_strdup(att->value);
2311
0
    gf_list_add(clone->attributes, att_clone);
2312
0
  }
2313
0
  clone->content = gf_list_new();
2314
0
  i=0;
2315
0
  while ((child = gf_list_enum(node->content, &i))) {
2316
0
    GF_XMLNode *child_clone = gf_xml_dom_node_clone(child);
2317
0
    if (!child_clone) {
2318
0
      gf_xml_dom_node_del(clone);
2319
0
      return NULL;
2320
0
    }
2321
0
    gf_list_add(clone->content, child_clone);
2322
0
  }
2323
0
  return clone;
2324
0
}