Coverage Report

Created: 2023-06-07 06:05

/src/libxml2-2.10.3/parserInternals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * daniel@veillard.com
8
 */
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22
23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/valid.h>
28
#include <libxml/entities.h>
29
#include <libxml/xmlerror.h>
30
#include <libxml/encoding.h>
31
#include <libxml/valid.h>
32
#include <libxml/xmlIO.h>
33
#include <libxml/uri.h>
34
#include <libxml/dict.h>
35
#include <libxml/SAX.h>
36
#ifdef LIBXML_CATALOG_ENABLED
37
#include <libxml/catalog.h>
38
#endif
39
#include <libxml/globals.h>
40
#include <libxml/chvalid.h>
41
42
9.36M
#define CUR(ctxt) ctxt->input->cur
43
9.36M
#define END(ctxt) ctxt->input->end
44
9.36M
#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
45
46
#include "buf.h"
47
#include "enc.h"
48
49
/*
50
 * Various global defaults for parsing
51
 */
52
53
/**
54
 * xmlCheckVersion:
55
 * @version: the include version number
56
 *
57
 * check the compiled lib version against the include one.
58
 * This can warn or immediately kill the application
59
 */
60
void
61
0
xmlCheckVersion(int version) {
62
0
    int myversion = (int) LIBXML_VERSION;
63
64
0
    xmlInitParser();
65
66
0
    if ((myversion / 10000) != (version / 10000)) {
67
0
  xmlGenericError(xmlGenericErrorContext,
68
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
69
0
    (version / 10000), (myversion / 10000));
70
0
  fprintf(stderr,
71
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
72
0
    (version / 10000), (myversion / 10000));
73
0
    }
74
0
    if ((myversion / 100) < (version / 100)) {
75
0
  xmlGenericError(xmlGenericErrorContext,
76
0
    "Warning: program compiled against libxml %d using older %d\n",
77
0
    (version / 100), (myversion / 100));
78
0
    }
79
0
}
80
81
82
/************************************************************************
83
 *                  *
84
 *    Some factorized error routines        *
85
 *                  *
86
 ************************************************************************/
87
88
89
/**
90
 * xmlErrMemory:
91
 * @ctxt:  an XML parser context
92
 * @extra:  extra information
93
 *
94
 * Handle a redefinition of attribute error
95
 */
96
void
97
xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
98
0
{
99
0
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
100
0
        (ctxt->instate == XML_PARSER_EOF))
101
0
  return;
102
0
    if (ctxt != NULL) {
103
0
        ctxt->errNo = XML_ERR_NO_MEMORY;
104
0
        ctxt->instate = XML_PARSER_EOF;
105
0
        ctxt->disableSAX = 1;
106
0
    }
107
0
    if (extra)
108
0
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
109
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
110
0
                        NULL, NULL, 0, 0,
111
0
                        "Memory allocation failed : %s\n", extra);
112
0
    else
113
0
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
114
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
115
0
                        NULL, NULL, 0, 0, "Memory allocation failed\n");
116
0
}
117
118
/**
119
 * __xmlErrEncoding:
120
 * @ctxt:  an XML parser context
121
 * @xmlerr:  the error number
122
 * @msg:  the error message
123
 * @str1:  an string info
124
 * @str2:  an string info
125
 *
126
 * Handle an encoding error
127
 */
128
void
129
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
130
                 const char *msg, const xmlChar * str1, const xmlChar * str2)
131
3.71k
{
132
3.71k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
133
3.71k
        (ctxt->instate == XML_PARSER_EOF))
134
0
  return;
135
3.71k
    if (ctxt != NULL)
136
3.71k
        ctxt->errNo = xmlerr;
137
3.71k
    __xmlRaiseError(NULL, NULL, NULL,
138
3.71k
                    ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
139
3.71k
                    NULL, 0, (const char *) str1, (const char *) str2,
140
3.71k
                    NULL, 0, 0, msg, str1, str2);
141
3.71k
    if (ctxt != NULL) {
142
3.71k
        ctxt->wellFormed = 0;
143
3.71k
        if (ctxt->recovery == 0)
144
3.71k
            ctxt->disableSAX = 1;
145
3.71k
    }
146
3.71k
}
147
148
/**
149
 * xmlErrInternal:
150
 * @ctxt:  an XML parser context
151
 * @msg:  the error message
152
 * @str:  error information
153
 *
154
 * Handle an internal error
155
 */
156
static void LIBXML_ATTR_FORMAT(2,0)
157
xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
158
5
{
159
5
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
160
5
        (ctxt->instate == XML_PARSER_EOF))
161
0
  return;
162
5
    if (ctxt != NULL)
163
5
        ctxt->errNo = XML_ERR_INTERNAL_ERROR;
164
5
    __xmlRaiseError(NULL, NULL, NULL,
165
5
                    ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
166
5
                    XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
167
5
                    0, 0, msg, str);
168
5
    if (ctxt != NULL) {
169
5
        ctxt->wellFormed = 0;
170
5
        if (ctxt->recovery == 0)
171
5
            ctxt->disableSAX = 1;
172
5
    }
173
5
}
174
175
/**
176
 * xmlErrEncodingInt:
177
 * @ctxt:  an XML parser context
178
 * @error:  the error number
179
 * @msg:  the error message
180
 * @val:  an integer value
181
 *
182
 * n encoding error
183
 */
184
static void LIBXML_ATTR_FORMAT(3,0)
185
xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
186
                  const char *msg, int val)
187
1.73k
{
188
1.73k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
189
1.73k
        (ctxt->instate == XML_PARSER_EOF))
190
0
  return;
191
1.73k
    if (ctxt != NULL)
192
1.73k
        ctxt->errNo = error;
193
1.73k
    __xmlRaiseError(NULL, NULL, NULL,
194
1.73k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
195
1.73k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
196
1.73k
    if (ctxt != NULL) {
197
1.73k
        ctxt->wellFormed = 0;
198
1.73k
        if (ctxt->recovery == 0)
199
1.73k
            ctxt->disableSAX = 1;
200
1.73k
    }
201
1.73k
}
202
203
/**
204
 * xmlIsLetter:
205
 * @c:  an unicode character (int)
206
 *
207
 * Check whether the character is allowed by the production
208
 * [84] Letter ::= BaseChar | Ideographic
209
 *
210
 * Returns 0 if not, non-zero otherwise
211
 */
212
int
213
0
xmlIsLetter(int c) {
214
0
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
215
0
}
216
217
/************************************************************************
218
 *                  *
219
 *    Input handling functions for progressive parsing  *
220
 *                  *
221
 ************************************************************************/
222
223
/* #define DEBUG_INPUT */
224
/* #define DEBUG_STACK */
225
/* #define DEBUG_PUSH */
226
227
228
/* we need to keep enough input to show errors in context */
229
7.61k
#define LINE_LEN        80
230
231
#ifdef DEBUG_INPUT
232
#define CHECK_BUFFER(in) check_buffer(in)
233
234
static
235
void check_buffer(xmlParserInputPtr in) {
236
    if (in->base != xmlBufContent(in->buf->buffer)) {
237
        xmlGenericError(xmlGenericErrorContext,
238
    "xmlParserInput: base mismatch problem\n");
239
    }
240
    if (in->cur < in->base) {
241
        xmlGenericError(xmlGenericErrorContext,
242
    "xmlParserInput: cur < base problem\n");
243
    }
244
    if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
245
        xmlGenericError(xmlGenericErrorContext,
246
    "xmlParserInput: cur > base + use problem\n");
247
    }
248
    xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n",
249
            (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base,
250
      xmlBufUse(in->buf->buffer));
251
}
252
253
#else
254
#define CHECK_BUFFER(in)
255
#endif
256
257
258
/**
259
 * xmlParserInputRead:
260
 * @in:  an XML parser input
261
 * @len:  an indicative size for the lookahead
262
 *
263
 * This function was internal and is deprecated.
264
 *
265
 * Returns -1 as this is an error to use it.
266
 */
267
int
268
0
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
269
0
    return(-1);
270
0
}
271
272
/**
273
 * xmlParserInputGrow:
274
 * @in:  an XML parser input
275
 * @len:  an indicative size for the lookahead
276
 *
277
 * This function increase the input for the parser. It tries to
278
 * preserve pointers to the input buffer, and keep already read data
279
 *
280
 * Returns the amount of char read, or -1 in case of error, 0 indicate the
281
 * end of this entity
282
 */
283
int
284
182k
xmlParserInputGrow(xmlParserInputPtr in, int len) {
285
182k
    int ret;
286
182k
    size_t indx;
287
288
182k
    if ((in == NULL) || (len < 0)) return(-1);
289
#ifdef DEBUG_INPUT
290
    xmlGenericError(xmlGenericErrorContext, "Grow\n");
291
#endif
292
182k
    if (in->buf == NULL) return(-1);
293
182k
    if (in->base == NULL) return(-1);
294
182k
    if (in->cur == NULL) return(-1);
295
182k
    if (in->buf->buffer == NULL) return(-1);
296
297
182k
    CHECK_BUFFER(in);
298
299
182k
    indx = in->cur - in->base;
300
182k
    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
301
302
1.25k
  CHECK_BUFFER(in);
303
304
1.25k
        return(0);
305
1.25k
    }
306
181k
    if (in->buf->readcallback != NULL) {
307
0
  ret = xmlParserInputBufferGrow(in->buf, len);
308
0
    } else
309
181k
        return(0);
310
311
0
    in->base = xmlBufContent(in->buf->buffer);
312
0
    in->cur = in->base + indx;
313
0
    in->end = xmlBufEnd(in->buf->buffer);
314
315
0
    CHECK_BUFFER(in);
316
317
0
    return(ret);
318
181k
}
319
320
/**
321
 * xmlParserInputShrink:
322
 * @in:  an XML parser input
323
 *
324
 * This function removes used input for the parser.
325
 */
326
void
327
7.61k
xmlParserInputShrink(xmlParserInputPtr in) {
328
7.61k
    size_t used;
329
7.61k
    size_t ret;
330
331
#ifdef DEBUG_INPUT
332
    xmlGenericError(xmlGenericErrorContext, "Shrink\n");
333
#endif
334
7.61k
    if (in == NULL) return;
335
7.61k
    if (in->buf == NULL) return;
336
7.61k
    if (in->base == NULL) return;
337
7.61k
    if (in->cur == NULL) return;
338
7.61k
    if (in->buf->buffer == NULL) return;
339
340
7.61k
    CHECK_BUFFER(in);
341
342
7.61k
    used = in->cur - in->base;
343
    /*
344
     * Do not shrink on large buffers whose only a tiny fraction
345
     * was consumed
346
     */
347
7.61k
    if (used > INPUT_CHUNK) {
348
7.61k
  ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
349
7.61k
  if (ret > 0) {
350
7.61k
            used -= ret;
351
7.61k
      in->consumed += ret;
352
7.61k
  }
353
7.61k
    }
354
355
7.61k
    if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
356
584
        xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
357
584
    }
358
359
7.61k
    in->base = xmlBufContent(in->buf->buffer);
360
7.61k
    in->cur = in->base + used;
361
7.61k
    in->end = xmlBufEnd(in->buf->buffer);
362
363
7.61k
    CHECK_BUFFER(in);
364
7.61k
}
365
366
/************************************************************************
367
 *                  *
368
 *    UTF8 character input and related functions    *
369
 *                  *
370
 ************************************************************************/
371
372
/**
373
 * xmlNextChar:
374
 * @ctxt:  the XML parser context
375
 *
376
 * Skip to the next char input char.
377
 */
378
379
void
380
xmlNextChar(xmlParserCtxtPtr ctxt)
381
9.36M
{
382
9.36M
    if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
383
9.36M
        (ctxt->input == NULL))
384
0
        return;
385
386
9.36M
    if (!(VALID_CTXT(ctxt))) {
387
0
        xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
388
0
  ctxt->errNo = XML_ERR_INTERNAL_ERROR;
389
0
        xmlStopParser(ctxt);
390
0
  return;
391
0
    }
392
393
9.36M
    if ((*ctxt->input->cur == 0) &&
394
9.36M
        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
395
628
        return;
396
628
    }
397
398
9.36M
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
399
7.78M
        const unsigned char *cur;
400
7.78M
        unsigned char c;
401
402
        /*
403
         *   2.11 End-of-Line Handling
404
         *   the literal two-character sequence "#xD#xA" or a standalone
405
         *   literal #xD, an XML processor must pass to the application
406
         *   the single character #xA.
407
         */
408
7.78M
        if (*(ctxt->input->cur) == '\n') {
409
4.57k
            ctxt->input->line++; ctxt->input->col = 1;
410
4.57k
        } else
411
7.77M
            ctxt->input->col++;
412
413
        /*
414
         * We are supposed to handle UTF8, check it's valid
415
         * From rfc2044: encoding of the Unicode values on UTF-8:
416
         *
417
         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
418
         * 0000 0000-0000 007F   0xxxxxxx
419
         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
420
         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
421
         *
422
         * Check for the 0x110000 limit too
423
         */
424
7.78M
        cur = ctxt->input->cur;
425
426
7.78M
        c = *cur;
427
7.78M
        if (c & 0x80) {
428
138
            if (c == 0xC0)
429
1
          goto encoding_error;
430
137
            if (cur[1] == 0) {
431
20
                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
432
20
                cur = ctxt->input->cur;
433
20
            }
434
137
            if ((cur[1] & 0xc0) != 0x80)
435
24
                goto encoding_error;
436
113
            if ((c & 0xe0) == 0xe0) {
437
108
                unsigned int val;
438
439
108
                if (cur[2] == 0) {
440
1
                    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
441
1
                    cur = ctxt->input->cur;
442
1
                }
443
108
                if ((cur[2] & 0xc0) != 0x80)
444
1
                    goto encoding_error;
445
107
                if ((c & 0xf0) == 0xf0) {
446
51
                    if (cur[3] == 0) {
447
3
                        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
448
3
                        cur = ctxt->input->cur;
449
3
                    }
450
51
                    if (((c & 0xf8) != 0xf0) ||
451
51
                        ((cur[3] & 0xc0) != 0x80))
452
5
                        goto encoding_error;
453
                    /* 4-byte code */
454
46
                    ctxt->input->cur += 4;
455
46
                    val = (cur[0] & 0x7) << 18;
456
46
                    val |= (cur[1] & 0x3f) << 12;
457
46
                    val |= (cur[2] & 0x3f) << 6;
458
46
                    val |= cur[3] & 0x3f;
459
56
                } else {
460
                    /* 3-byte code */
461
56
                    ctxt->input->cur += 3;
462
56
                    val = (cur[0] & 0xf) << 12;
463
56
                    val |= (cur[1] & 0x3f) << 6;
464
56
                    val |= cur[2] & 0x3f;
465
56
                }
466
102
                if (((val > 0xd7ff) && (val < 0xe000)) ||
467
102
                    ((val > 0xfffd) && (val < 0x10000)) ||
468
102
                    (val >= 0x110000)) {
469
52
    xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
470
52
          "Char 0x%X out of allowed range\n",
471
52
          val);
472
52
                }
473
102
            } else
474
                /* 2-byte code */
475
5
                ctxt->input->cur += 2;
476
113
        } else
477
            /* 1-byte code */
478
7.78M
            ctxt->input->cur++;
479
7.78M
    } else {
480
        /*
481
         * Assume it's a fixed length encoding (1) with
482
         * a compatible encoding for the ASCII set, since
483
         * XML constructs only use < 128 chars
484
         */
485
486
1.57M
        if (*(ctxt->input->cur) == '\n') {
487
7.52k
            ctxt->input->line++; ctxt->input->col = 1;
488
7.52k
        } else
489
1.57M
            ctxt->input->col++;
490
1.57M
        ctxt->input->cur++;
491
1.57M
    }
492
9.36M
    if (*ctxt->input->cur == 0)
493
2.75k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
494
9.36M
    return;
495
31
encoding_error:
496
    /*
497
     * If we detect an UTF8 error that probably mean that the
498
     * input encoding didn't get properly advertised in the
499
     * declaration header. Report the error and switch the encoding
500
     * to ISO-Latin-1 (if you don't like this policy, just declare the
501
     * encoding !)
502
     */
503
31
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
504
31
        (ctxt->input->end - ctxt->input->cur < 4)) {
505
28
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
506
28
         "Input is not proper UTF-8, indicate encoding !\n",
507
28
         NULL, NULL);
508
28
    } else {
509
3
        char buffer[150];
510
511
3
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
512
3
      ctxt->input->cur[0], ctxt->input->cur[1],
513
3
      ctxt->input->cur[2], ctxt->input->cur[3]);
514
3
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
515
3
         "Input is not proper UTF-8, indicate encoding !\n%s",
516
3
         BAD_CAST buffer, NULL);
517
3
    }
518
31
    ctxt->charset = XML_CHAR_ENCODING_8859_1;
519
31
    ctxt->input->cur++;
520
31
    return;
521
9.36M
}
522
523
/**
524
 * xmlCurrentChar:
525
 * @ctxt:  the XML parser context
526
 * @len:  pointer to the length of the char read
527
 *
528
 * The current char value, if using UTF-8 this may actually span multiple
529
 * bytes in the input buffer. Implement the end of line normalization:
530
 * 2.11 End-of-Line Handling
531
 * Wherever an external parsed entity or the literal entity value
532
 * of an internal parsed entity contains either the literal two-character
533
 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
534
 * must pass to the application the single character #xA.
535
 * This behavior can conveniently be produced by normalizing all
536
 * line breaks to #xA on input, before parsing.)
537
 *
538
 * Returns the current char value and its length
539
 */
540
541
int
542
502M
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
543
502M
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
544
502M
    if (ctxt->instate == XML_PARSER_EOF)
545
0
  return(0);
546
547
502M
    if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
548
5.89M
      *len = 1;
549
5.89M
      return((int) *ctxt->input->cur);
550
5.89M
    }
551
496M
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
552
  /*
553
   * We are supposed to handle UTF8, check it's valid
554
   * From rfc2044: encoding of the Unicode values on UTF-8:
555
   *
556
   * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
557
   * 0000 0000-0000 007F   0xxxxxxx
558
   * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
559
   * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
560
   *
561
   * Check for the 0x110000 limit too
562
   */
563
494M
  const unsigned char *cur = ctxt->input->cur;
564
494M
  unsigned char c;
565
494M
  unsigned int val;
566
567
494M
  c = *cur;
568
494M
  if (c & 0x80) {
569
493M
      if (((c & 0x40) == 0) || (c == 0xC0))
570
601
    goto encoding_error;
571
493M
      if (cur[1] == 0) {
572
211
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
573
211
                cur = ctxt->input->cur;
574
211
            }
575
493M
      if ((cur[1] & 0xc0) != 0x80)
576
3.34k
    goto encoding_error;
577
493M
      if ((c & 0xe0) == 0xe0) {
578
491M
    if (cur[2] == 0) {
579
19
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
580
19
                    cur = ctxt->input->cur;
581
19
                }
582
491M
    if ((cur[2] & 0xc0) != 0x80)
583
42
        goto encoding_error;
584
491M
    if ((c & 0xf0) == 0xf0) {
585
10.5k
        if (cur[3] == 0) {
586
17
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
587
17
                        cur = ctxt->input->cur;
588
17
                    }
589
10.5k
        if (((c & 0xf8) != 0xf0) ||
590
10.5k
      ((cur[3] & 0xc0) != 0x80))
591
45
      goto encoding_error;
592
        /* 4-byte code */
593
10.5k
        *len = 4;
594
10.5k
        val = (cur[0] & 0x7) << 18;
595
10.5k
        val |= (cur[1] & 0x3f) << 12;
596
10.5k
        val |= (cur[2] & 0x3f) << 6;
597
10.5k
        val |= cur[3] & 0x3f;
598
10.5k
        if (val < 0x10000)
599
16
      goto encoding_error;
600
491M
    } else {
601
      /* 3-byte code */
602
491M
        *len = 3;
603
491M
        val = (cur[0] & 0xf) << 12;
604
491M
        val |= (cur[1] & 0x3f) << 6;
605
491M
        val |= cur[2] & 0x3f;
606
491M
        if (val < 0x800)
607
18
      goto encoding_error;
608
491M
    }
609
491M
      } else {
610
        /* 2-byte code */
611
2.32M
    *len = 2;
612
2.32M
    val = (cur[0] & 0x1f) << 6;
613
2.32M
    val |= cur[1] & 0x3f;
614
2.32M
    if (val < 0x80)
615
11
        goto encoding_error;
616
2.32M
      }
617
493M
      if (!IS_CHAR(val)) {
618
757
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
619
757
          "Char 0x%X out of allowed range\n", val);
620
757
      }
621
493M
      return(val);
622
493M
  } else {
623
      /* 1-byte code */
624
678k
      *len = 1;
625
678k
      if (*ctxt->input->cur == 0)
626
9.47k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
627
678k
      if ((*ctxt->input->cur == 0) &&
628
678k
          (ctxt->input->end > ctxt->input->cur)) {
629
923
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
630
923
          "Char 0x0 out of allowed range\n", 0);
631
923
      }
632
678k
      if (*ctxt->input->cur == 0xD) {
633
645k
    if (ctxt->input->cur[1] == 0xA) {
634
2.53k
        ctxt->input->cur++;
635
2.53k
    }
636
645k
    return(0xA);
637
645k
      }
638
33.6k
      return((int) *ctxt->input->cur);
639
678k
  }
640
494M
    }
641
    /*
642
     * Assume it's a fixed length encoding (1) with
643
     * a compatible encoding for the ASCII set, since
644
     * XML constructs only use < 128 chars
645
     */
646
2.81M
    *len = 1;
647
2.81M
    if (*ctxt->input->cur == 0xD) {
648
344k
  if (ctxt->input->cur[1] == 0xA) {
649
1.58k
      ctxt->input->cur++;
650
1.58k
  }
651
344k
  return(0xA);
652
344k
    }
653
2.46M
    return((int) *ctxt->input->cur);
654
4.07k
encoding_error:
655
    /*
656
     * An encoding problem may arise from a truncated input buffer
657
     * splitting a character in the middle. In that case do not raise
658
     * an error but return 0 to indicate an end of stream problem
659
     */
660
4.07k
    if (ctxt->input->end - ctxt->input->cur < 4) {
661
507
  *len = 0;
662
507
  return(0);
663
507
    }
664
665
    /*
666
     * If we detect an UTF8 error that probably mean that the
667
     * input encoding didn't get properly advertised in the
668
     * declaration header. Report the error and switch the encoding
669
     * to ISO-Latin-1 (if you don't like this policy, just declare the
670
     * encoding !)
671
     */
672
3.56k
    {
673
3.56k
        char buffer[150];
674
675
3.56k
  snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
676
3.56k
      ctxt->input->cur[0], ctxt->input->cur[1],
677
3.56k
      ctxt->input->cur[2], ctxt->input->cur[3]);
678
3.56k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
679
3.56k
         "Input is not proper UTF-8, indicate encoding !\n%s",
680
3.56k
         BAD_CAST buffer, NULL);
681
3.56k
    }
682
3.56k
    ctxt->charset = XML_CHAR_ENCODING_8859_1;
683
3.56k
    *len = 1;
684
3.56k
    return((int) *ctxt->input->cur);
685
4.07k
}
686
687
/**
688
 * xmlStringCurrentChar:
689
 * @ctxt:  the XML parser context
690
 * @cur:  pointer to the beginning of the char
691
 * @len:  pointer to the length of the char read
692
 *
693
 * The current char value, if using UTF-8 this may actually span multiple
694
 * bytes in the input buffer.
695
 *
696
 * Returns the current char value and its length
697
 */
698
699
int
700
xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
701
39.0M
{
702
39.0M
    if ((len == NULL) || (cur == NULL)) return(0);
703
39.0M
    if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
704
        /*
705
         * We are supposed to handle UTF8, check it's valid
706
         * From rfc2044: encoding of the Unicode values on UTF-8:
707
         *
708
         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
709
         * 0000 0000-0000 007F   0xxxxxxx
710
         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
711
         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
712
         *
713
         * Check for the 0x110000 limit too
714
         */
715
38.9M
        unsigned char c;
716
38.9M
        unsigned int val;
717
718
38.9M
        c = *cur;
719
38.9M
        if (c & 0x80) {
720
38.8M
            if ((cur[1] & 0xc0) != 0x80)
721
0
                goto encoding_error;
722
38.8M
            if ((c & 0xe0) == 0xe0) {
723
724
38.8M
                if ((cur[2] & 0xc0) != 0x80)
725
0
                    goto encoding_error;
726
38.8M
                if ((c & 0xf0) == 0xf0) {
727
1.01k
                    if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
728
0
                        goto encoding_error;
729
                    /* 4-byte code */
730
1.01k
                    *len = 4;
731
1.01k
                    val = (cur[0] & 0x7) << 18;
732
1.01k
                    val |= (cur[1] & 0x3f) << 12;
733
1.01k
                    val |= (cur[2] & 0x3f) << 6;
734
1.01k
                    val |= cur[3] & 0x3f;
735
38.8M
                } else {
736
                    /* 3-byte code */
737
38.8M
                    *len = 3;
738
38.8M
                    val = (cur[0] & 0xf) << 12;
739
38.8M
                    val |= (cur[1] & 0x3f) << 6;
740
38.8M
                    val |= cur[2] & 0x3f;
741
38.8M
                }
742
38.8M
            } else {
743
                /* 2-byte code */
744
28.4k
                *len = 2;
745
28.4k
                val = (cur[0] & 0x1f) << 6;
746
28.4k
                val |= cur[1] & 0x3f;
747
28.4k
            }
748
38.8M
            if (!IS_CHAR(val)) {
749
0
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
750
0
          "Char 0x%X out of allowed range\n", val);
751
0
            }
752
38.8M
            return (val);
753
38.8M
        } else {
754
            /* 1-byte code */
755
72.8k
            *len = 1;
756
72.8k
            return ((int) *cur);
757
72.8k
        }
758
38.9M
    }
759
    /*
760
     * Assume it's a fixed length encoding (1) with
761
     * a compatible encoding for the ASCII set, since
762
     * XML constructs only use < 128 chars
763
     */
764
79.9k
    *len = 1;
765
79.9k
    return ((int) *cur);
766
0
encoding_error:
767
768
    /*
769
     * An encoding problem may arise from a truncated input buffer
770
     * splitting a character in the middle. In that case do not raise
771
     * an error but return 0 to indicate an end of stream problem
772
     */
773
0
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
774
0
        (ctxt->input->end - ctxt->input->cur < 4)) {
775
0
  *len = 0;
776
0
  return(0);
777
0
    }
778
    /*
779
     * If we detect an UTF8 error that probably mean that the
780
     * input encoding didn't get properly advertised in the
781
     * declaration header. Report the error and switch the encoding
782
     * to ISO-Latin-1 (if you don't like this policy, just declare the
783
     * encoding !)
784
     */
785
0
    {
786
0
        char buffer[150];
787
788
0
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
789
0
      ctxt->input->cur[0], ctxt->input->cur[1],
790
0
      ctxt->input->cur[2], ctxt->input->cur[3]);
791
0
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
792
0
         "Input is not proper UTF-8, indicate encoding !\n%s",
793
0
         BAD_CAST buffer, NULL);
794
0
    }
795
0
    *len = 1;
796
0
    return ((int) *cur);
797
0
}
798
799
/**
800
 * xmlCopyCharMultiByte:
801
 * @out:  pointer to an array of xmlChar
802
 * @val:  the char value
803
 *
804
 * append the char value in the array
805
 *
806
 * Returns the number of xmlChar written
807
 */
808
int
809
324M
xmlCopyCharMultiByte(xmlChar *out, int val) {
810
324M
    if (out == NULL) return(0);
811
    /*
812
     * We are supposed to handle UTF8, check it's valid
813
     * From rfc2044: encoding of the Unicode values on UTF-8:
814
     *
815
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
816
     * 0000 0000-0000 007F   0xxxxxxx
817
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
818
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
819
     */
820
324M
    if  (val >= 0x80) {
821
324M
  xmlChar *savedout = out;
822
324M
  int bits;
823
324M
  if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
824
321M
  else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
825
16.6k
  else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
826
0
  else {
827
0
      xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
828
0
        "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
829
0
            val);
830
0
      return(0);
831
0
  }
832
970M
  for ( ; bits >= 0; bits-= 6)
833
646M
      *out++= ((val >> bits) & 0x3F) | 0x80 ;
834
324M
  return (out - savedout);
835
324M
    }
836
1.84k
    *out = (xmlChar) val;
837
1.84k
    return 1;
838
324M
}
839
840
/**
841
 * xmlCopyChar:
842
 * @len:  Ignored, compatibility
843
 * @out:  pointer to an array of xmlChar
844
 * @val:  the char value
845
 *
846
 * append the char value in the array
847
 *
848
 * Returns the number of xmlChar written
849
 */
850
851
int
852
129k
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
853
129k
    if (out == NULL) return(0);
854
    /* the len parameter is ignored */
855
129k
    if  (val >= 0x80) {
856
13.9k
  return(xmlCopyCharMultiByte (out, val));
857
13.9k
    }
858
115k
    *out = (xmlChar) val;
859
115k
    return 1;
860
129k
}
861
862
/************************************************************************
863
 *                  *
864
 *    Commodity functions to switch encodings     *
865
 *                  *
866
 ************************************************************************/
867
868
static int
869
xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
870
                          xmlCharEncodingHandlerPtr handler, int len);
871
/**
872
 * xmlSwitchEncoding:
873
 * @ctxt:  the parser context
874
 * @enc:  the encoding value (number)
875
 *
876
 * change the input functions when discovering the character encoding
877
 * of a given entity.
878
 *
879
 * Returns 0 in case of success, -1 otherwise
880
 */
881
int
882
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
883
4.66k
{
884
4.66k
    xmlCharEncodingHandlerPtr handler;
885
4.66k
    int len = -1;
886
4.66k
    int ret;
887
888
4.66k
    if (ctxt == NULL) return(-1);
889
4.66k
    switch (enc) {
890
0
  case XML_CHAR_ENCODING_ERROR:
891
0
      __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
892
0
                     "encoding unknown\n", NULL, NULL);
893
0
      return(-1);
894
0
  case XML_CHAR_ENCODING_NONE:
895
      /* let's assume it's UTF-8 without the XML decl */
896
0
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
897
0
      return(0);
898
3.84k
  case XML_CHAR_ENCODING_UTF8:
899
      /* default encoding, no conversion should be needed */
900
3.84k
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
901
902
      /*
903
       * Errata on XML-1.0 June 20 2001
904
       * Specific handling of the Byte Order Mark for
905
       * UTF-8
906
       */
907
3.84k
      if ((ctxt->input != NULL) &&
908
3.84k
    (ctxt->input->cur[0] == 0xEF) &&
909
3.84k
    (ctxt->input->cur[1] == 0xBB) &&
910
3.84k
    (ctxt->input->cur[2] == 0xBF)) {
911
7
    ctxt->input->cur += 3;
912
7
      }
913
3.84k
      return(0);
914
365
    case XML_CHAR_ENCODING_UTF16LE:
915
726
    case XML_CHAR_ENCODING_UTF16BE:
916
        /*The raw input characters are encoded
917
         *in UTF-16. As we expect this function
918
         *to be called after xmlCharEncInFunc, we expect
919
         *ctxt->input->cur to contain UTF-8 encoded characters.
920
         *So the raw UTF16 Byte Order Mark
921
         *has also been converted into
922
         *an UTF-8 BOM. Let's skip that BOM.
923
         */
924
726
        if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
925
726
            (ctxt->input->cur[0] == 0xEF) &&
926
726
            (ctxt->input->cur[1] == 0xBB) &&
927
726
            (ctxt->input->cur[2] == 0xBF)) {
928
442
            ctxt->input->cur += 3;
929
442
        }
930
726
        len = 90;
931
726
  break;
932
0
    case XML_CHAR_ENCODING_UCS2:
933
0
        len = 90;
934
0
  break;
935
18
    case XML_CHAR_ENCODING_UCS4BE:
936
25
    case XML_CHAR_ENCODING_UCS4LE:
937
26
    case XML_CHAR_ENCODING_UCS4_2143:
938
28
    case XML_CHAR_ENCODING_UCS4_3412:
939
28
        len = 180;
940
28
  break;
941
63
    case XML_CHAR_ENCODING_EBCDIC:
942
63
    case XML_CHAR_ENCODING_8859_1:
943
63
    case XML_CHAR_ENCODING_8859_2:
944
63
    case XML_CHAR_ENCODING_8859_3:
945
63
    case XML_CHAR_ENCODING_8859_4:
946
63
    case XML_CHAR_ENCODING_8859_5:
947
63
    case XML_CHAR_ENCODING_8859_6:
948
63
    case XML_CHAR_ENCODING_8859_7:
949
63
    case XML_CHAR_ENCODING_8859_8:
950
63
    case XML_CHAR_ENCODING_8859_9:
951
63
    case XML_CHAR_ENCODING_ASCII:
952
63
    case XML_CHAR_ENCODING_2022_JP:
953
63
    case XML_CHAR_ENCODING_SHIFT_JIS:
954
63
    case XML_CHAR_ENCODING_EUC_JP:
955
63
        len = 45;
956
63
  break;
957
4.66k
    }
958
817
    handler = xmlGetCharEncodingHandler(enc);
959
817
    if (handler == NULL) {
960
  /*
961
   * Default handlers.
962
   */
963
3
  switch (enc) {
964
0
      case XML_CHAR_ENCODING_ASCII:
965
    /* default encoding, no conversion should be needed */
966
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
967
0
    return(0);
968
0
      case XML_CHAR_ENCODING_8859_1:
969
0
    if ((ctxt->inputNr == 1) &&
970
0
        (ctxt->encoding == NULL) &&
971
0
        (ctxt->input != NULL) &&
972
0
        (ctxt->input->encoding != NULL)) {
973
0
        ctxt->encoding = xmlStrdup(ctxt->input->encoding);
974
0
    }
975
0
    ctxt->charset = enc;
976
0
    return(0);
977
3
      default:
978
3
    __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
979
3
                        "encoding not supported: %s\n",
980
3
      BAD_CAST xmlGetCharEncodingName(enc), NULL);
981
                /*
982
                 * TODO: We could recover from errors in external entities
983
                 * if we didn't stop the parser. But most callers of this
984
                 * function don't check the return value.
985
                 */
986
3
                xmlStopParser(ctxt);
987
3
                return(-1);
988
3
        }
989
3
    }
990
814
    ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
991
814
    if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
992
        /*
993
   * on encoding conversion errors, stop the parser
994
   */
995
0
        xmlStopParser(ctxt);
996
0
  ctxt->errNo = XML_I18N_CONV_FAILED;
997
0
    }
998
814
    return(ret);
999
817
}
1000
1001
/**
1002
 * xmlSwitchInputEncodingInt:
1003
 * @ctxt:  the parser context
1004
 * @input:  the input stream
1005
 * @handler:  the encoding handler
1006
 * @len:  the number of bytes to convert for the first line or -1
1007
 *
1008
 * change the input functions when discovering the character encoding
1009
 * of a given entity.
1010
 *
1011
 * Returns 0 in case of success, -1 otherwise
1012
 */
1013
static int
1014
xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1015
                          xmlCharEncodingHandlerPtr handler, int len)
1016
3.74k
{
1017
3.74k
    int nbchars;
1018
1019
3.74k
    if (handler == NULL)
1020
0
        return (-1);
1021
3.74k
    if (input == NULL)
1022
0
        return (-1);
1023
3.74k
    if (input->buf != NULL) {
1024
3.74k
  ctxt->charset = XML_CHAR_ENCODING_UTF8;
1025
1026
3.74k
        if (input->buf->encoder != NULL) {
1027
            /*
1028
             * Check in case the auto encoding detection triggered
1029
             * in already.
1030
             */
1031
817
            if (input->buf->encoder == handler)
1032
727
                return (0);
1033
1034
            /*
1035
             * "UTF-16" can be used for both LE and BE
1036
             if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
1037
             BAD_CAST "UTF-16", 6)) &&
1038
             (!xmlStrncmp(BAD_CAST handler->name,
1039
             BAD_CAST "UTF-16", 6))) {
1040
             return(0);
1041
             }
1042
             */
1043
1044
            /*
1045
             * Note: this is a bit dangerous, but that's what it
1046
             * takes to use nearly compatible signature for different
1047
             * encodings.
1048
             *
1049
             * FIXME: Encoders might buffer partial byte sequences, so
1050
             * this probably can't work. We should return an error and
1051
             * make sure that callers never try to switch the encoding
1052
             * twice.
1053
             */
1054
90
            xmlCharEncCloseFunc(input->buf->encoder);
1055
90
            input->buf->encoder = handler;
1056
90
            return (0);
1057
817
        }
1058
2.92k
        input->buf->encoder = handler;
1059
1060
        /*
1061
         * Is there already some content down the pipe to convert ?
1062
         */
1063
2.92k
        if (xmlBufIsEmpty(input->buf->buffer) == 0) {
1064
2.92k
            int processed;
1065
2.92k
      unsigned int use;
1066
1067
            /*
1068
             * Specific handling of the Byte Order Mark for
1069
             * UTF-16
1070
             */
1071
2.92k
            if ((handler->name != NULL) &&
1072
2.92k
                (!strcmp(handler->name, "UTF-16LE") ||
1073
2.92k
                 !strcmp(handler->name, "UTF-16")) &&
1074
2.92k
                (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1075
1
                input->cur += 2;
1076
1
            }
1077
2.92k
            if ((handler->name != NULL) &&
1078
2.92k
                (!strcmp(handler->name, "UTF-16BE")) &&
1079
2.92k
                (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1080
1
                input->cur += 2;
1081
1
            }
1082
            /*
1083
             * Errata on XML-1.0 June 20 2001
1084
             * Specific handling of the Byte Order Mark for
1085
             * UTF-8
1086
             */
1087
2.92k
            if ((handler->name != NULL) &&
1088
2.92k
                (!strcmp(handler->name, "UTF-8")) &&
1089
2.92k
                (input->cur[0] == 0xEF) &&
1090
2.92k
                (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1091
0
                input->cur += 3;
1092
0
            }
1093
1094
            /*
1095
             * Shrink the current input buffer.
1096
             * Move it as the raw buffer and create a new input buffer
1097
             */
1098
2.92k
            processed = input->cur - input->base;
1099
2.92k
            xmlBufShrink(input->buf->buffer, processed);
1100
2.92k
            input->buf->raw = input->buf->buffer;
1101
2.92k
            input->buf->buffer = xmlBufCreate();
1102
2.92k
      input->buf->rawconsumed = processed;
1103
2.92k
      use = xmlBufUse(input->buf->raw);
1104
1105
2.92k
            if (ctxt->html) {
1106
                /*
1107
                 * convert as much as possible of the buffer
1108
                 */
1109
0
                nbchars = xmlCharEncInput(input->buf, 1);
1110
2.92k
            } else {
1111
                /*
1112
                 * convert just enough to get
1113
                 * '<?xml version="1.0" encoding="xxx"?>'
1114
                 * parsed with the autodetected encoding
1115
                 * into the parser reading buffer.
1116
                 */
1117
2.92k
                nbchars = xmlCharEncFirstLineInput(input->buf, len);
1118
2.92k
            }
1119
2.92k
            xmlBufResetInput(input->buf->buffer, input);
1120
2.92k
            if (nbchars < 0) {
1121
5
                xmlErrInternal(ctxt,
1122
5
                               "switching encoding: encoder error\n",
1123
5
                               NULL);
1124
5
                return (-1);
1125
5
            }
1126
2.92k
      input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
1127
2.92k
        }
1128
2.92k
        return (0);
1129
2.92k
    } else {
1130
0
  xmlErrInternal(ctxt,
1131
0
                "static memory buffer doesn't support encoding\n", NULL);
1132
        /*
1133
         * Callers assume that the input buffer takes ownership of the
1134
         * encoding handler. xmlCharEncCloseFunc frees unregistered
1135
         * handlers and avoids a memory leak.
1136
         */
1137
0
        xmlCharEncCloseFunc(handler);
1138
0
  return (-1);
1139
0
    }
1140
3.74k
}
1141
1142
/**
1143
 * xmlSwitchInputEncoding:
1144
 * @ctxt:  the parser context
1145
 * @input:  the input stream
1146
 * @handler:  the encoding handler
1147
 *
1148
 * DEPRECATED: Use xmlSwitchToEncoding
1149
 *
1150
 * change the input functions when discovering the character encoding
1151
 * of a given entity.
1152
 *
1153
 * Returns 0 in case of success, -1 otherwise
1154
 */
1155
int
1156
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1157
0
                          xmlCharEncodingHandlerPtr handler) {
1158
0
    return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
1159
0
}
1160
1161
/**
1162
 * xmlSwitchToEncoding:
1163
 * @ctxt:  the parser context
1164
 * @handler:  the encoding handler
1165
 *
1166
 * change the input functions when discovering the character encoding
1167
 * of a given entity.
1168
 *
1169
 * Returns 0 in case of success, -1 otherwise
1170
 */
1171
int
1172
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1173
2.93k
{
1174
2.93k
    if (ctxt == NULL)
1175
0
        return(-1);
1176
2.93k
    return(xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, -1));
1177
2.93k
}
1178
1179
/************************************************************************
1180
 *                  *
1181
 *  Commodity functions to handle entities processing   *
1182
 *                  *
1183
 ************************************************************************/
1184
1185
/**
1186
 * xmlFreeInputStream:
1187
 * @input:  an xmlParserInputPtr
1188
 *
1189
 * Free up an input stream.
1190
 */
1191
void
1192
18.3k
xmlFreeInputStream(xmlParserInputPtr input) {
1193
18.3k
    if (input == NULL) return;
1194
1195
18.3k
    if (input->filename != NULL) xmlFree((char *) input->filename);
1196
18.3k
    if (input->directory != NULL) xmlFree((char *) input->directory);
1197
18.3k
    if (input->encoding != NULL) xmlFree((char *) input->encoding);
1198
18.3k
    if (input->version != NULL) xmlFree((char *) input->version);
1199
18.3k
    if ((input->free != NULL) && (input->base != NULL))
1200
0
        input->free((xmlChar *) input->base);
1201
18.3k
    if (input->buf != NULL)
1202
16.0k
        xmlFreeParserInputBuffer(input->buf);
1203
18.3k
    xmlFree(input);
1204
18.3k
}
1205
1206
/**
1207
 * xmlNewInputStream:
1208
 * @ctxt:  an XML parser context
1209
 *
1210
 * Create a new input stream structure.
1211
 *
1212
 * Returns the new input stream or NULL
1213
 */
1214
xmlParserInputPtr
1215
18.3k
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1216
18.3k
    xmlParserInputPtr input;
1217
1218
18.3k
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1219
18.3k
    if (input == NULL) {
1220
0
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1221
0
  return(NULL);
1222
0
    }
1223
18.3k
    memset(input, 0, sizeof(xmlParserInput));
1224
18.3k
    input->line = 1;
1225
18.3k
    input->col = 1;
1226
18.3k
    input->standalone = -1;
1227
1228
    /*
1229
     * If the context is NULL the id cannot be initialized, but that
1230
     * should not happen while parsing which is the situation where
1231
     * the id is actually needed.
1232
     */
1233
18.3k
    if (ctxt != NULL)
1234
18.3k
        input->id = ctxt->input_id++;
1235
1236
18.3k
    return(input);
1237
18.3k
}
1238
1239
/**
1240
 * xmlNewIOInputStream:
1241
 * @ctxt:  an XML parser context
1242
 * @input:  an I/O Input
1243
 * @enc:  the charset encoding if known
1244
 *
1245
 * Create a new input stream structure encapsulating the @input into
1246
 * a stream suitable for the parser.
1247
 *
1248
 * Returns the new input stream or NULL
1249
 */
1250
xmlParserInputPtr
1251
xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1252
0
              xmlCharEncoding enc) {
1253
0
    xmlParserInputPtr inputStream;
1254
1255
0
    if (input == NULL) return(NULL);
1256
0
    if (xmlParserDebugEntities)
1257
0
  xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1258
0
    inputStream = xmlNewInputStream(ctxt);
1259
0
    if (inputStream == NULL) {
1260
0
  return(NULL);
1261
0
    }
1262
0
    inputStream->filename = NULL;
1263
0
    inputStream->buf = input;
1264
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1265
1266
0
    if (enc != XML_CHAR_ENCODING_NONE) {
1267
0
        xmlSwitchEncoding(ctxt, enc);
1268
0
    }
1269
1270
0
    return(inputStream);
1271
0
}
1272
1273
/**
1274
 * xmlNewEntityInputStream:
1275
 * @ctxt:  an XML parser context
1276
 * @entity:  an Entity pointer
1277
 *
1278
 * Create a new input stream based on an xmlEntityPtr
1279
 *
1280
 * Returns the new input stream or NULL
1281
 */
1282
xmlParserInputPtr
1283
0
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1284
0
    xmlParserInputPtr input;
1285
1286
0
    if (entity == NULL) {
1287
0
        xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1288
0
                 NULL);
1289
0
  return(NULL);
1290
0
    }
1291
0
    if (xmlParserDebugEntities)
1292
0
  xmlGenericError(xmlGenericErrorContext,
1293
0
    "new input from entity: %s\n", entity->name);
1294
0
    if (entity->content == NULL) {
1295
0
  switch (entity->etype) {
1296
0
            case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1297
0
          xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1298
0
                   entity->name);
1299
0
                break;
1300
0
            case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1301
0
            case XML_EXTERNAL_PARAMETER_ENTITY:
1302
0
    return(xmlLoadExternalEntity((char *) entity->URI,
1303
0
           (char *) entity->ExternalID, ctxt));
1304
0
            case XML_INTERNAL_GENERAL_ENTITY:
1305
0
          xmlErrInternal(ctxt,
1306
0
          "Internal entity %s without content !\n",
1307
0
                   entity->name);
1308
0
                break;
1309
0
            case XML_INTERNAL_PARAMETER_ENTITY:
1310
0
          xmlErrInternal(ctxt,
1311
0
          "Internal parameter entity %s without content !\n",
1312
0
                   entity->name);
1313
0
                break;
1314
0
            case XML_INTERNAL_PREDEFINED_ENTITY:
1315
0
          xmlErrInternal(ctxt,
1316
0
          "Predefined entity %s without content !\n",
1317
0
                   entity->name);
1318
0
                break;
1319
0
  }
1320
0
  return(NULL);
1321
0
    }
1322
0
    input = xmlNewInputStream(ctxt);
1323
0
    if (input == NULL) {
1324
0
  return(NULL);
1325
0
    }
1326
0
    if (entity->URI != NULL)
1327
0
  input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1328
0
    input->base = entity->content;
1329
0
    if (entity->length == 0)
1330
0
        entity->length = xmlStrlen(entity->content);
1331
0
    input->cur = entity->content;
1332
0
    input->length = entity->length;
1333
0
    input->end = &entity->content[input->length];
1334
0
    return(input);
1335
0
}
1336
1337
/**
1338
 * xmlNewStringInputStream:
1339
 * @ctxt:  an XML parser context
1340
 * @buffer:  an memory buffer
1341
 *
1342
 * Create a new input stream based on a memory buffer.
1343
 * Returns the new input stream
1344
 */
1345
xmlParserInputPtr
1346
0
xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1347
0
    xmlParserInputPtr input;
1348
1349
0
    if (buffer == NULL) {
1350
0
        xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1351
0
                 NULL);
1352
0
  return(NULL);
1353
0
    }
1354
0
    if (xmlParserDebugEntities)
1355
0
  xmlGenericError(xmlGenericErrorContext,
1356
0
    "new fixed input: %.30s\n", buffer);
1357
0
    input = xmlNewInputStream(ctxt);
1358
0
    if (input == NULL) {
1359
0
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1360
0
  return(NULL);
1361
0
    }
1362
0
    input->base = buffer;
1363
0
    input->cur = buffer;
1364
0
    input->length = xmlStrlen(buffer);
1365
0
    input->end = &buffer[input->length];
1366
0
    return(input);
1367
0
}
1368
1369
/**
1370
 * xmlNewInputFromFile:
1371
 * @ctxt:  an XML parser context
1372
 * @filename:  the filename to use as entity
1373
 *
1374
 * Create a new input stream based on a file or an URL.
1375
 *
1376
 * Returns the new input stream or NULL in case of error
1377
 */
1378
xmlParserInputPtr
1379
0
xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1380
0
    xmlParserInputBufferPtr buf;
1381
0
    xmlParserInputPtr inputStream;
1382
0
    char *directory = NULL;
1383
0
    xmlChar *URI = NULL;
1384
1385
0
    if (xmlParserDebugEntities)
1386
0
  xmlGenericError(xmlGenericErrorContext,
1387
0
    "new input from file: %s\n", filename);
1388
0
    if (ctxt == NULL) return(NULL);
1389
0
    buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1390
0
    if (buf == NULL) {
1391
0
  if (filename == NULL)
1392
0
      __xmlLoaderErr(ctxt,
1393
0
                     "failed to load external entity: NULL filename \n",
1394
0
         NULL);
1395
0
  else
1396
0
      __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1397
0
         (const char *) filename);
1398
0
  return(NULL);
1399
0
    }
1400
1401
0
    inputStream = xmlNewInputStream(ctxt);
1402
0
    if (inputStream == NULL) {
1403
0
  xmlFreeParserInputBuffer(buf);
1404
0
  return(NULL);
1405
0
    }
1406
1407
0
    inputStream->buf = buf;
1408
0
    inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1409
0
    if (inputStream == NULL)
1410
0
        return(NULL);
1411
1412
0
    if (inputStream->filename == NULL)
1413
0
  URI = xmlStrdup((xmlChar *) filename);
1414
0
    else
1415
0
  URI = xmlStrdup((xmlChar *) inputStream->filename);
1416
0
    directory = xmlParserGetDirectory((const char *) URI);
1417
0
    if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1418
0
    inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1419
0
    if (URI != NULL) xmlFree((char *) URI);
1420
0
    inputStream->directory = directory;
1421
1422
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1423
0
    if ((ctxt->directory == NULL) && (directory != NULL))
1424
0
        ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1425
0
    return(inputStream);
1426
0
}
1427
1428
/************************************************************************
1429
 *                  *
1430
 *    Commodity functions to handle parser contexts   *
1431
 *                  *
1432
 ************************************************************************/
1433
1434
/**
1435
 * xmlInitParserCtxt:
1436
 * @ctxt:  an XML parser context
1437
 *
1438
 * Initialize a parser context
1439
 *
1440
 * Returns 0 in case of success and -1 in case of error
1441
 */
1442
1443
int
1444
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1445
18.3k
{
1446
18.3k
    xmlParserInputPtr input;
1447
1448
18.3k
    if(ctxt==NULL) {
1449
0
        xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1450
0
        return(-1);
1451
0
    }
1452
1453
18.3k
    xmlInitParser();
1454
1455
18.3k
    if (ctxt->dict == NULL)
1456
18.3k
  ctxt->dict = xmlDictCreate();
1457
18.3k
    if (ctxt->dict == NULL) {
1458
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1459
0
  return(-1);
1460
0
    }
1461
18.3k
    xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1462
1463
18.3k
    if (ctxt->sax == NULL)
1464
18.3k
  ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1465
18.3k
    if (ctxt->sax == NULL) {
1466
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1467
0
  return(-1);
1468
0
    }
1469
18.3k
    else
1470
18.3k
        xmlSAXVersion(ctxt->sax, 2);
1471
1472
18.3k
    ctxt->maxatts = 0;
1473
18.3k
    ctxt->atts = NULL;
1474
    /* Allocate the Input stack */
1475
18.3k
    if (ctxt->inputTab == NULL) {
1476
18.3k
  ctxt->inputTab = (xmlParserInputPtr *)
1477
18.3k
        xmlMalloc(5 * sizeof(xmlParserInputPtr));
1478
18.3k
  ctxt->inputMax = 5;
1479
18.3k
    }
1480
18.3k
    if (ctxt->inputTab == NULL) {
1481
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1482
0
  ctxt->inputNr = 0;
1483
0
  ctxt->inputMax = 0;
1484
0
  ctxt->input = NULL;
1485
0
  return(-1);
1486
0
    }
1487
18.3k
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1488
0
        xmlFreeInputStream(input);
1489
0
    }
1490
18.3k
    ctxt->inputNr = 0;
1491
18.3k
    ctxt->input = NULL;
1492
1493
18.3k
    ctxt->version = NULL;
1494
18.3k
    ctxt->encoding = NULL;
1495
18.3k
    ctxt->standalone = -1;
1496
18.3k
    ctxt->hasExternalSubset = 0;
1497
18.3k
    ctxt->hasPErefs = 0;
1498
18.3k
    ctxt->html = 0;
1499
18.3k
    ctxt->external = 0;
1500
18.3k
    ctxt->instate = XML_PARSER_START;
1501
18.3k
    ctxt->token = 0;
1502
18.3k
    ctxt->directory = NULL;
1503
1504
    /* Allocate the Node stack */
1505
18.3k
    if (ctxt->nodeTab == NULL) {
1506
18.3k
  ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1507
18.3k
  ctxt->nodeMax = 10;
1508
18.3k
    }
1509
18.3k
    if (ctxt->nodeTab == NULL) {
1510
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1511
0
  ctxt->nodeNr = 0;
1512
0
  ctxt->nodeMax = 0;
1513
0
  ctxt->node = NULL;
1514
0
  ctxt->inputNr = 0;
1515
0
  ctxt->inputMax = 0;
1516
0
  ctxt->input = NULL;
1517
0
  return(-1);
1518
0
    }
1519
18.3k
    ctxt->nodeNr = 0;
1520
18.3k
    ctxt->node = NULL;
1521
1522
    /* Allocate the Name stack */
1523
18.3k
    if (ctxt->nameTab == NULL) {
1524
18.3k
  ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1525
18.3k
  ctxt->nameMax = 10;
1526
18.3k
    }
1527
18.3k
    if (ctxt->nameTab == NULL) {
1528
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1529
0
  ctxt->nodeNr = 0;
1530
0
  ctxt->nodeMax = 0;
1531
0
  ctxt->node = NULL;
1532
0
  ctxt->inputNr = 0;
1533
0
  ctxt->inputMax = 0;
1534
0
  ctxt->input = NULL;
1535
0
  ctxt->nameNr = 0;
1536
0
  ctxt->nameMax = 0;
1537
0
  ctxt->name = NULL;
1538
0
  return(-1);
1539
0
    }
1540
18.3k
    ctxt->nameNr = 0;
1541
18.3k
    ctxt->name = NULL;
1542
1543
    /* Allocate the space stack */
1544
18.3k
    if (ctxt->spaceTab == NULL) {
1545
18.3k
  ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1546
18.3k
  ctxt->spaceMax = 10;
1547
18.3k
    }
1548
18.3k
    if (ctxt->spaceTab == NULL) {
1549
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1550
0
  ctxt->nodeNr = 0;
1551
0
  ctxt->nodeMax = 0;
1552
0
  ctxt->node = NULL;
1553
0
  ctxt->inputNr = 0;
1554
0
  ctxt->inputMax = 0;
1555
0
  ctxt->input = NULL;
1556
0
  ctxt->nameNr = 0;
1557
0
  ctxt->nameMax = 0;
1558
0
  ctxt->name = NULL;
1559
0
  ctxt->spaceNr = 0;
1560
0
  ctxt->spaceMax = 0;
1561
0
  ctxt->space = NULL;
1562
0
  return(-1);
1563
0
    }
1564
18.3k
    ctxt->spaceNr = 1;
1565
18.3k
    ctxt->spaceMax = 10;
1566
18.3k
    ctxt->spaceTab[0] = -1;
1567
18.3k
    ctxt->space = &ctxt->spaceTab[0];
1568
18.3k
    ctxt->userData = ctxt;
1569
18.3k
    ctxt->myDoc = NULL;
1570
18.3k
    ctxt->wellFormed = 1;
1571
18.3k
    ctxt->nsWellFormed = 1;
1572
18.3k
    ctxt->valid = 1;
1573
18.3k
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1574
18.3k
    if (ctxt->loadsubset) {
1575
0
        ctxt->options |= XML_PARSE_DTDLOAD;
1576
0
    }
1577
18.3k
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
1578
18.3k
    ctxt->pedantic = xmlPedanticParserDefaultValue;
1579
18.3k
    if (ctxt->pedantic) {
1580
0
        ctxt->options |= XML_PARSE_PEDANTIC;
1581
0
    }
1582
18.3k
    ctxt->linenumbers = xmlLineNumbersDefaultValue;
1583
18.3k
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1584
18.3k
    if (ctxt->keepBlanks == 0) {
1585
0
  ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1586
0
  ctxt->options |= XML_PARSE_NOBLANKS;
1587
0
    }
1588
1589
18.3k
    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
1590
18.3k
    ctxt->vctxt.userData = ctxt;
1591
18.3k
    ctxt->vctxt.error = xmlParserValidityError;
1592
18.3k
    ctxt->vctxt.warning = xmlParserValidityWarning;
1593
18.3k
    if (ctxt->validate) {
1594
0
  if (xmlGetWarningsDefaultValue == 0)
1595
0
      ctxt->vctxt.warning = NULL;
1596
0
  else
1597
0
      ctxt->vctxt.warning = xmlParserValidityWarning;
1598
0
  ctxt->vctxt.nodeMax = 0;
1599
0
        ctxt->options |= XML_PARSE_DTDVALID;
1600
0
    }
1601
18.3k
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1602
18.3k
    if (ctxt->replaceEntities) {
1603
0
        ctxt->options |= XML_PARSE_NOENT;
1604
0
    }
1605
18.3k
    ctxt->record_info = 0;
1606
18.3k
    ctxt->checkIndex = 0;
1607
18.3k
    ctxt->inSubset = 0;
1608
18.3k
    ctxt->errNo = XML_ERR_OK;
1609
18.3k
    ctxt->depth = 0;
1610
18.3k
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1611
18.3k
    ctxt->catalogs = NULL;
1612
18.3k
    ctxt->nbentities = 0;
1613
18.3k
    ctxt->sizeentities = 0;
1614
18.3k
    ctxt->sizeentcopy = 0;
1615
18.3k
    ctxt->input_id = 1;
1616
18.3k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
1617
18.3k
    return(0);
1618
18.3k
}
1619
1620
/**
1621
 * xmlFreeParserCtxt:
1622
 * @ctxt:  an XML parser context
1623
 *
1624
 * Free all the memory used by a parser context. However the parsed
1625
 * document in ctxt->myDoc is not freed.
1626
 */
1627
1628
void
1629
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1630
18.3k
{
1631
18.3k
    xmlParserInputPtr input;
1632
1633
18.3k
    if (ctxt == NULL) return;
1634
1635
36.6k
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1636
18.3k
        xmlFreeInputStream(input);
1637
18.3k
    }
1638
18.3k
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1639
18.3k
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1640
18.3k
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1641
18.3k
    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
1642
18.3k
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1643
18.3k
    if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1644
18.3k
    if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1645
18.3k
    if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1646
18.3k
    if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1647
18.3k
#ifdef LIBXML_SAX1_ENABLED
1648
18.3k
    if ((ctxt->sax != NULL) &&
1649
18.3k
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1650
#else
1651
    if (ctxt->sax != NULL)
1652
#endif /* LIBXML_SAX1_ENABLED */
1653
18.3k
        xmlFree(ctxt->sax);
1654
18.3k
    if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1655
18.3k
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1656
18.3k
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1657
18.3k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1658
18.3k
    if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1659
18.3k
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1660
18.3k
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1661
18.3k
    if (ctxt->attsDefault != NULL)
1662
1.18k
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
1663
18.3k
    if (ctxt->attsSpecial != NULL)
1664
1.29k
        xmlHashFree(ctxt->attsSpecial, NULL);
1665
18.3k
    if (ctxt->freeElems != NULL) {
1666
0
        xmlNodePtr cur, next;
1667
1668
0
  cur = ctxt->freeElems;
1669
0
  while (cur != NULL) {
1670
0
      next = cur->next;
1671
0
      xmlFree(cur);
1672
0
      cur = next;
1673
0
  }
1674
0
    }
1675
18.3k
    if (ctxt->freeAttrs != NULL) {
1676
0
        xmlAttrPtr cur, next;
1677
1678
0
  cur = ctxt->freeAttrs;
1679
0
  while (cur != NULL) {
1680
0
      next = cur->next;
1681
0
      xmlFree(cur);
1682
0
      cur = next;
1683
0
  }
1684
0
    }
1685
    /*
1686
     * cleanup the error strings
1687
     */
1688
18.3k
    if (ctxt->lastError.message != NULL)
1689
17.4k
        xmlFree(ctxt->lastError.message);
1690
18.3k
    if (ctxt->lastError.file != NULL)
1691
0
        xmlFree(ctxt->lastError.file);
1692
18.3k
    if (ctxt->lastError.str1 != NULL)
1693
9.62k
        xmlFree(ctxt->lastError.str1);
1694
18.3k
    if (ctxt->lastError.str2 != NULL)
1695
177
        xmlFree(ctxt->lastError.str2);
1696
18.3k
    if (ctxt->lastError.str3 != NULL)
1697
3
        xmlFree(ctxt->lastError.str3);
1698
1699
18.3k
#ifdef LIBXML_CATALOG_ENABLED
1700
18.3k
    if (ctxt->catalogs != NULL)
1701
39
  xmlCatalogFreeLocal(ctxt->catalogs);
1702
18.3k
#endif
1703
18.3k
    xmlFree(ctxt);
1704
18.3k
}
1705
1706
/**
1707
 * xmlNewParserCtxt:
1708
 *
1709
 * Allocate and initialize a new parser context.
1710
 *
1711
 * Returns the xmlParserCtxtPtr or NULL
1712
 */
1713
1714
xmlParserCtxtPtr
1715
xmlNewParserCtxt(void)
1716
18.3k
{
1717
18.3k
    xmlParserCtxtPtr ctxt;
1718
1719
18.3k
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1720
18.3k
    if (ctxt == NULL) {
1721
0
  xmlErrMemory(NULL, "cannot allocate parser context\n");
1722
0
  return(NULL);
1723
0
    }
1724
18.3k
    memset(ctxt, 0, sizeof(xmlParserCtxt));
1725
18.3k
    if (xmlInitParserCtxt(ctxt) < 0) {
1726
0
        xmlFreeParserCtxt(ctxt);
1727
0
  return(NULL);
1728
0
    }
1729
18.3k
    return(ctxt);
1730
18.3k
}
1731
1732
/************************************************************************
1733
 *                  *
1734
 *    Handling of node information        *
1735
 *                  *
1736
 ************************************************************************/
1737
1738
/**
1739
 * xmlClearParserCtxt:
1740
 * @ctxt:  an XML parser context
1741
 *
1742
 * Clear (release owned resources) and reinitialize a parser context
1743
 */
1744
1745
void
1746
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1747
0
{
1748
0
  if (ctxt==NULL)
1749
0
    return;
1750
0
  xmlClearNodeInfoSeq(&ctxt->node_seq);
1751
0
  xmlCtxtReset(ctxt);
1752
0
}
1753
1754
1755
/**
1756
 * xmlParserFindNodeInfo:
1757
 * @ctx:  an XML parser context
1758
 * @node:  an XML node within the tree
1759
 *
1760
 * Find the parser node info struct for a given node
1761
 *
1762
 * Returns an xmlParserNodeInfo block pointer or NULL
1763
 */
1764
const xmlParserNodeInfo *
1765
xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1766
0
{
1767
0
    unsigned long pos;
1768
1769
0
    if ((ctx == NULL) || (node == NULL))
1770
0
        return (NULL);
1771
    /* Find position where node should be at */
1772
0
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1773
0
    if (pos < ctx->node_seq.length
1774
0
        && ctx->node_seq.buffer[pos].node == node)
1775
0
        return &ctx->node_seq.buffer[pos];
1776
0
    else
1777
0
        return NULL;
1778
0
}
1779
1780
1781
/**
1782
 * xmlInitNodeInfoSeq:
1783
 * @seq:  a node info sequence pointer
1784
 *
1785
 * -- Initialize (set to initial state) node info sequence
1786
 */
1787
void
1788
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1789
18.3k
{
1790
18.3k
    if (seq == NULL)
1791
0
        return;
1792
18.3k
    seq->length = 0;
1793
18.3k
    seq->maximum = 0;
1794
18.3k
    seq->buffer = NULL;
1795
18.3k
}
1796
1797
/**
1798
 * xmlClearNodeInfoSeq:
1799
 * @seq:  a node info sequence pointer
1800
 *
1801
 * -- Clear (release memory and reinitialize) node
1802
 *   info sequence
1803
 */
1804
void
1805
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1806
0
{
1807
0
    if (seq == NULL)
1808
0
        return;
1809
0
    if (seq->buffer != NULL)
1810
0
        xmlFree(seq->buffer);
1811
0
    xmlInitNodeInfoSeq(seq);
1812
0
}
1813
1814
/**
1815
 * xmlParserFindNodeInfoIndex:
1816
 * @seq:  a node info sequence pointer
1817
 * @node:  an XML node pointer
1818
 *
1819
 *
1820
 * xmlParserFindNodeInfoIndex : Find the index that the info record for
1821
 *   the given node is or should be at in a sorted sequence
1822
 *
1823
 * Returns a long indicating the position of the record
1824
 */
1825
unsigned long
1826
xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1827
                           const xmlNodePtr node)
1828
0
{
1829
0
    unsigned long upper, lower, middle;
1830
0
    int found = 0;
1831
1832
0
    if ((seq == NULL) || (node == NULL))
1833
0
        return ((unsigned long) -1);
1834
1835
    /* Do a binary search for the key */
1836
0
    lower = 1;
1837
0
    upper = seq->length;
1838
0
    middle = 0;
1839
0
    while (lower <= upper && !found) {
1840
0
        middle = lower + (upper - lower) / 2;
1841
0
        if (node == seq->buffer[middle - 1].node)
1842
0
            found = 1;
1843
0
        else if (node < seq->buffer[middle - 1].node)
1844
0
            upper = middle - 1;
1845
0
        else
1846
0
            lower = middle + 1;
1847
0
    }
1848
1849
    /* Return position */
1850
0
    if (middle == 0 || seq->buffer[middle - 1].node < node)
1851
0
        return middle;
1852
0
    else
1853
0
        return middle - 1;
1854
0
}
1855
1856
1857
/**
1858
 * xmlParserAddNodeInfo:
1859
 * @ctxt:  an XML parser context
1860
 * @info:  a node info sequence pointer
1861
 *
1862
 * Insert node info record into the sorted sequence
1863
 */
1864
void
1865
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1866
                     const xmlParserNodeInfoPtr info)
1867
0
{
1868
0
    unsigned long pos;
1869
1870
0
    if ((ctxt == NULL) || (info == NULL)) return;
1871
1872
    /* Find pos and check to see if node is already in the sequence */
1873
0
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
1874
0
                                     info->node);
1875
1876
0
    if ((pos < ctxt->node_seq.length) &&
1877
0
        (ctxt->node_seq.buffer != NULL) &&
1878
0
        (ctxt->node_seq.buffer[pos].node == info->node)) {
1879
0
        ctxt->node_seq.buffer[pos] = *info;
1880
0
    }
1881
1882
    /* Otherwise, we need to add new node to buffer */
1883
0
    else {
1884
0
        if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
1885
0
      (ctxt->node_seq.buffer == NULL)) {
1886
0
            xmlParserNodeInfo *tmp_buffer;
1887
0
            unsigned int byte_size;
1888
1889
0
            if (ctxt->node_seq.maximum == 0)
1890
0
                ctxt->node_seq.maximum = 2;
1891
0
            byte_size = (sizeof(*ctxt->node_seq.buffer) *
1892
0
      (2 * ctxt->node_seq.maximum));
1893
1894
0
            if (ctxt->node_seq.buffer == NULL)
1895
0
                tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
1896
0
            else
1897
0
                tmp_buffer =
1898
0
                    (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
1899
0
                                                     byte_size);
1900
1901
0
            if (tmp_buffer == NULL) {
1902
0
    xmlErrMemory(ctxt, "failed to allocate buffer\n");
1903
0
                return;
1904
0
            }
1905
0
            ctxt->node_seq.buffer = tmp_buffer;
1906
0
            ctxt->node_seq.maximum *= 2;
1907
0
        }
1908
1909
        /* If position is not at end, move elements out of the way */
1910
0
        if (pos != ctxt->node_seq.length) {
1911
0
            unsigned long i;
1912
1913
0
            for (i = ctxt->node_seq.length; i > pos; i--)
1914
0
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1915
0
        }
1916
1917
        /* Copy element and increase length */
1918
0
        ctxt->node_seq.buffer[pos] = *info;
1919
0
        ctxt->node_seq.length++;
1920
0
    }
1921
0
}
1922
1923
/************************************************************************
1924
 *                  *
1925
 *    Defaults settings         *
1926
 *                  *
1927
 ************************************************************************/
1928
/**
1929
 * xmlPedanticParserDefault:
1930
 * @val:  int 0 or 1
1931
 *
1932
 * Set and return the previous value for enabling pedantic warnings.
1933
 *
1934
 * Returns the last value for 0 for no substitution, 1 for substitution.
1935
 */
1936
1937
int
1938
0
xmlPedanticParserDefault(int val) {
1939
0
    int old = xmlPedanticParserDefaultValue;
1940
1941
0
    xmlPedanticParserDefaultValue = val;
1942
0
    return(old);
1943
0
}
1944
1945
/**
1946
 * xmlLineNumbersDefault:
1947
 * @val:  int 0 or 1
1948
 *
1949
 * Set and return the previous value for enabling line numbers in elements
1950
 * contents. This may break on old application and is turned off by default.
1951
 *
1952
 * Returns the last value for 0 for no substitution, 1 for substitution.
1953
 */
1954
1955
int
1956
0
xmlLineNumbersDefault(int val) {
1957
0
    int old = xmlLineNumbersDefaultValue;
1958
1959
0
    xmlLineNumbersDefaultValue = val;
1960
0
    return(old);
1961
0
}
1962
1963
/**
1964
 * xmlSubstituteEntitiesDefault:
1965
 * @val:  int 0 or 1
1966
 *
1967
 * Set and return the previous value for default entity support.
1968
 * Initially the parser always keep entity references instead of substituting
1969
 * entity values in the output. This function has to be used to change the
1970
 * default parser behavior
1971
 * SAX::substituteEntities() has to be used for changing that on a file by
1972
 * file basis.
1973
 *
1974
 * Returns the last value for 0 for no substitution, 1 for substitution.
1975
 */
1976
1977
int
1978
0
xmlSubstituteEntitiesDefault(int val) {
1979
0
    int old = xmlSubstituteEntitiesDefaultValue;
1980
1981
0
    xmlSubstituteEntitiesDefaultValue = val;
1982
0
    return(old);
1983
0
}
1984
1985
/**
1986
 * xmlKeepBlanksDefault:
1987
 * @val:  int 0 or 1
1988
 *
1989
 * Set and return the previous value for default blanks text nodes support.
1990
 * The 1.x version of the parser used an heuristic to try to detect
1991
 * ignorable white spaces. As a result the SAX callback was generating
1992
 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
1993
 * using the DOM output text nodes containing those blanks were not generated.
1994
 * The 2.x and later version will switch to the XML standard way and
1995
 * ignorableWhitespace() are only generated when running the parser in
1996
 * validating mode and when the current element doesn't allow CDATA or
1997
 * mixed content.
1998
 * This function is provided as a way to force the standard behavior
1999
 * on 1.X libs and to switch back to the old mode for compatibility when
2000
 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2001
 * by using xmlIsBlankNode() commodity function to detect the "empty"
2002
 * nodes generated.
2003
 * This value also affect autogeneration of indentation when saving code
2004
 * if blanks sections are kept, indentation is not generated.
2005
 *
2006
 * Returns the last value for 0 for no substitution, 1 for substitution.
2007
 */
2008
2009
int
2010
0
xmlKeepBlanksDefault(int val) {
2011
0
    int old = xmlKeepBlanksDefaultValue;
2012
2013
0
    xmlKeepBlanksDefaultValue = val;
2014
0
    if (!val) xmlIndentTreeOutput = 1;
2015
0
    return(old);
2016
0
}
2017