Coverage Report

Created: 2024-01-17 17:01

/src/libxml2/parserInternals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * daniel@veillard.com
8
 */
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22
23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/valid.h>
28
#include <libxml/entities.h>
29
#include <libxml/xmlerror.h>
30
#include <libxml/encoding.h>
31
#include <libxml/valid.h>
32
#include <libxml/xmlIO.h>
33
#include <libxml/uri.h>
34
#include <libxml/dict.h>
35
#include <libxml/SAX.h>
36
#ifdef LIBXML_CATALOG_ENABLED
37
#include <libxml/catalog.h>
38
#endif
39
#include <libxml/globals.h>
40
#include <libxml/chvalid.h>
41
42
2.24G
#define CUR(ctxt) ctxt->input->cur
43
2.24G
#define END(ctxt) ctxt->input->end
44
2.24G
#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
45
46
#include "private/buf.h"
47
#include "private/enc.h"
48
#include "private/error.h"
49
#include "private/io.h"
50
#include "private/parser.h"
51
52
/*
53
 * Various global defaults for parsing
54
 */
55
56
/**
57
 * xmlCheckVersion:
58
 * @version: the include version number
59
 *
60
 * check the compiled lib version against the include one.
61
 * This can warn or immediately kill the application
62
 */
63
void
64
0
xmlCheckVersion(int version) {
65
0
    int myversion = LIBXML_VERSION;
66
67
0
    xmlInitParser();
68
69
0
    if ((myversion / 10000) != (version / 10000)) {
70
0
  xmlGenericError(xmlGenericErrorContext,
71
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
72
0
    (version / 10000), (myversion / 10000));
73
0
  fprintf(stderr,
74
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
75
0
    (version / 10000), (myversion / 10000));
76
0
    }
77
0
    if ((myversion / 100) < (version / 100)) {
78
0
  xmlGenericError(xmlGenericErrorContext,
79
0
    "Warning: program compiled against libxml %d using older %d\n",
80
0
    (version / 100), (myversion / 100));
81
0
    }
82
0
}
83
84
85
/************************************************************************
86
 *                  *
87
 *    Some factorized error routines        *
88
 *                  *
89
 ************************************************************************/
90
91
92
/**
93
 * xmlErrMemory:
94
 * @ctxt:  an XML parser context
95
 * @extra:  extra information
96
 *
97
 * Handle a redefinition of attribute error
98
 */
99
void
100
xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
101
0
{
102
0
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
103
0
        (ctxt->instate == XML_PARSER_EOF))
104
0
  return;
105
0
    if (ctxt != NULL) {
106
0
        ctxt->errNo = XML_ERR_NO_MEMORY;
107
0
        ctxt->instate = XML_PARSER_EOF;
108
0
        ctxt->disableSAX = 1;
109
0
    }
110
0
    if (extra)
111
0
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
112
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
113
0
                        NULL, NULL, 0, 0,
114
0
                        "Memory allocation failed : %s\n", extra);
115
0
    else
116
0
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
117
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
118
0
                        NULL, NULL, 0, 0, "Memory allocation failed\n");
119
0
}
120
121
/**
122
 * __xmlErrEncoding:
123
 * @ctxt:  an XML parser context
124
 * @xmlerr:  the error number
125
 * @msg:  the error message
126
 * @str1:  an string info
127
 * @str2:  an string info
128
 *
129
 * Handle an encoding error
130
 */
131
void
132
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
133
                 const char *msg, const xmlChar * str1, const xmlChar * str2)
134
3.17M
{
135
3.17M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
136
3.17M
        (ctxt->instate == XML_PARSER_EOF))
137
0
  return;
138
3.17M
    if (ctxt != NULL)
139
3.17M
        ctxt->errNo = xmlerr;
140
3.17M
    __xmlRaiseError(NULL, NULL, NULL,
141
3.17M
                    ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
142
3.17M
                    NULL, 0, (const char *) str1, (const char *) str2,
143
3.17M
                    NULL, 0, 0, msg, str1, str2);
144
3.17M
    if (ctxt != NULL) {
145
3.17M
        ctxt->wellFormed = 0;
146
3.17M
        if (ctxt->recovery == 0)
147
131k
            ctxt->disableSAX = 1;
148
3.17M
    }
149
3.17M
}
150
151
/**
152
 * xmlErrInternal:
153
 * @ctxt:  an XML parser context
154
 * @msg:  the error message
155
 * @str:  error information
156
 *
157
 * Handle an internal error
158
 */
159
static void LIBXML_ATTR_FORMAT(2,0)
160
xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
161
229
{
162
229
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
163
229
        (ctxt->instate == XML_PARSER_EOF))
164
0
  return;
165
229
    if (ctxt != NULL)
166
229
        ctxt->errNo = XML_ERR_INTERNAL_ERROR;
167
229
    __xmlRaiseError(NULL, NULL, NULL,
168
229
                    ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
169
229
                    XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
170
229
                    0, 0, msg, str);
171
229
    if (ctxt != NULL) {
172
229
        ctxt->wellFormed = 0;
173
229
        if (ctxt->recovery == 0)
174
151
            ctxt->disableSAX = 1;
175
229
    }
176
229
}
177
178
/**
179
 * xmlErrEncodingInt:
180
 * @ctxt:  an XML parser context
181
 * @error:  the error number
182
 * @msg:  the error message
183
 * @val:  an integer value
184
 *
185
 * n encoding error
186
 */
187
static void LIBXML_ATTR_FORMAT(3,0)
188
xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
189
                  const char *msg, int val)
190
575k
{
191
575k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
192
575k
        (ctxt->instate == XML_PARSER_EOF))
193
0
  return;
194
575k
    if (ctxt != NULL)
195
570k
        ctxt->errNo = error;
196
575k
    __xmlRaiseError(NULL, NULL, NULL,
197
575k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
198
575k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
199
575k
    if (ctxt != NULL) {
200
570k
        ctxt->wellFormed = 0;
201
570k
        if (ctxt->recovery == 0)
202
129k
            ctxt->disableSAX = 1;
203
570k
    }
204
575k
}
205
206
/**
207
 * xmlIsLetter:
208
 * @c:  an unicode character (int)
209
 *
210
 * Check whether the character is allowed by the production
211
 * [84] Letter ::= BaseChar | Ideographic
212
 *
213
 * Returns 0 if not, non-zero otherwise
214
 */
215
int
216
0
xmlIsLetter(int c) {
217
0
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
218
0
}
219
220
/************************************************************************
221
 *                  *
222
 *    Input handling functions for progressive parsing  *
223
 *                  *
224
 ************************************************************************/
225
226
/* #define DEBUG_INPUT */
227
/* #define DEBUG_STACK */
228
/* #define DEBUG_PUSH */
229
230
231
/* we need to keep enough input to show errors in context */
232
633k
#define LINE_LEN        80
233
234
#ifdef DEBUG_INPUT
235
#define CHECK_BUFFER(in) check_buffer(in)
236
237
static
238
void check_buffer(xmlParserInputPtr in) {
239
    if (in->base != xmlBufContent(in->buf->buffer)) {
240
        xmlGenericError(xmlGenericErrorContext,
241
    "xmlParserInput: base mismatch problem\n");
242
    }
243
    if (in->cur < in->base) {
244
        xmlGenericError(xmlGenericErrorContext,
245
    "xmlParserInput: cur < base problem\n");
246
    }
247
    if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
248
        xmlGenericError(xmlGenericErrorContext,
249
    "xmlParserInput: cur > base + use problem\n");
250
    }
251
    xmlGenericError(xmlGenericErrorContext,"buffer %p : content %x, cur %d, use %d\n",
252
            (void *) in, (int) xmlBufContent(in->buf->buffer),
253
            in->cur - in->base, xmlBufUse(in->buf->buffer));
254
}
255
256
#else
257
#define CHECK_BUFFER(in)
258
#endif
259
260
261
/**
262
 * xmlParserInputRead:
263
 * @in:  an XML parser input
264
 * @len:  an indicative size for the lookahead
265
 *
266
 * DEPRECATED: This function was internal and is deprecated.
267
 *
268
 * Returns -1 as this is an error to use it.
269
 */
270
int
271
0
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
272
0
    return(-1);
273
0
}
274
275
/**
276
 * xmlParserInputGrow:
277
 * @in:  an XML parser input
278
 * @len:  an indicative size for the lookahead
279
 *
280
 * DEPRECATED: Don't use.
281
 *
282
 * This function increase the input for the parser. It tries to
283
 * preserve pointers to the input buffer, and keep already read data
284
 *
285
 * Returns the amount of char read, or -1 in case of error, 0 indicate the
286
 * end of this entity
287
 */
288
int
289
1.21G
xmlParserInputGrow(xmlParserInputPtr in, int len) {
290
1.21G
    int ret;
291
1.21G
    size_t indx;
292
293
1.21G
    if ((in == NULL) || (len < 0)) return(-1);
294
#ifdef DEBUG_INPUT
295
    xmlGenericError(xmlGenericErrorContext, "Grow\n");
296
#endif
297
1.21G
    if (in->buf == NULL) return(-1);
298
19.2M
    if (in->base == NULL) return(-1);
299
19.2M
    if (in->cur == NULL) return(-1);
300
19.2M
    if (in->buf->buffer == NULL) return(-1);
301
302
    /* Don't grow memory buffers. */
303
19.2M
    if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
304
17.4M
        return(0);
305
306
1.76M
    CHECK_BUFFER(in);
307
308
1.76M
    indx = in->cur - in->base;
309
1.76M
    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
310
311
127k
  CHECK_BUFFER(in);
312
313
127k
        return(0);
314
127k
    }
315
1.64M
    ret = xmlParserInputBufferGrow(in->buf, len);
316
317
1.64M
    in->base = xmlBufContent(in->buf->buffer);
318
1.64M
    if (in->base == NULL) {
319
0
        in->base = BAD_CAST "";
320
0
        in->cur = in->base;
321
0
        in->end = in->base;
322
0
        return(-1);
323
0
    }
324
1.64M
    in->cur = in->base + indx;
325
1.64M
    in->end = xmlBufEnd(in->buf->buffer);
326
327
1.64M
    CHECK_BUFFER(in);
328
329
1.64M
    return(ret);
330
1.64M
}
331
332
/**
333
 * xmlParserInputShrink:
334
 * @in:  an XML parser input
335
 *
336
 * This function removes used input for the parser.
337
 */
338
void
339
633k
xmlParserInputShrink(xmlParserInputPtr in) {
340
633k
    size_t used;
341
633k
    size_t ret;
342
343
#ifdef DEBUG_INPUT
344
    xmlGenericError(xmlGenericErrorContext, "Shrink\n");
345
#endif
346
633k
    if (in == NULL) return;
347
633k
    if (in->buf == NULL) return;
348
633k
    if (in->base == NULL) return;
349
633k
    if (in->cur == NULL) return;
350
633k
    if (in->buf->buffer == NULL) return;
351
352
633k
    CHECK_BUFFER(in);
353
354
633k
    used = in->cur - in->base;
355
    /*
356
     * Do not shrink on large buffers whose only a tiny fraction
357
     * was consumed
358
     */
359
633k
    if (used > INPUT_CHUNK) {
360
633k
  ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
361
633k
  if (ret > 0) {
362
633k
            used -= ret;
363
633k
            if ((ret > ULONG_MAX) ||
364
633k
                (in->consumed > ULONG_MAX - (unsigned long)ret))
365
0
                in->consumed = ULONG_MAX;
366
633k
            else
367
633k
                in->consumed += ret;
368
633k
  }
369
633k
    }
370
371
633k
    if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
372
381k
        xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
373
381k
    }
374
375
633k
    in->base = xmlBufContent(in->buf->buffer);
376
633k
    in->cur = in->base + used;
377
633k
    in->end = xmlBufEnd(in->buf->buffer);
378
379
633k
    CHECK_BUFFER(in);
380
633k
}
381
382
/************************************************************************
383
 *                  *
384
 *    UTF8 character input and related functions    *
385
 *                  *
386
 ************************************************************************/
387
388
/**
389
 * xmlNextChar:
390
 * @ctxt:  the XML parser context
391
 *
392
 * Skip to the next char input char.
393
 */
394
395
void
396
xmlNextChar(xmlParserCtxtPtr ctxt)
397
2.24G
{
398
2.24G
    if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
399
2.24G
        (ctxt->input == NULL))
400
35
        return;
401
402
2.24G
    if (!(VALID_CTXT(ctxt))) {
403
0
        xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
404
0
  ctxt->errNo = XML_ERR_INTERNAL_ERROR;
405
0
        xmlStopParser(ctxt);
406
0
  return;
407
0
    }
408
409
2.24G
    if ((ctxt->input->cur >= ctxt->input->end) &&
410
2.24G
        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
411
841
        return;
412
841
    }
413
414
2.24G
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
415
1.90G
        const unsigned char *cur;
416
1.90G
        unsigned char c;
417
418
        /*
419
         *   2.11 End-of-Line Handling
420
         *   the literal two-character sequence "#xD#xA" or a standalone
421
         *   literal #xD, an XML processor must pass to the application
422
         *   the single character #xA.
423
         */
424
1.90G
        if (*(ctxt->input->cur) == '\n') {
425
90.9M
            ctxt->input->line++; ctxt->input->col = 1;
426
90.9M
        } else
427
1.81G
            ctxt->input->col++;
428
429
        /*
430
         * We are supposed to handle UTF8, check it's valid
431
         * From rfc2044: encoding of the Unicode values on UTF-8:
432
         *
433
         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
434
         * 0000 0000-0000 007F   0xxxxxxx
435
         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
436
         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
437
         *
438
         * Check for the 0x110000 limit too
439
         */
440
1.90G
        cur = ctxt->input->cur;
441
442
1.90G
        c = *cur;
443
1.90G
        if (c & 0x80) {
444
892k
            if (c == 0xC0)
445
426
          goto encoding_error;
446
891k
            if (cur[1] == 0) {
447
1.26k
                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
448
1.26k
                cur = ctxt->input->cur;
449
1.26k
            }
450
891k
            if ((cur[1] & 0xc0) != 0x80)
451
36.3k
                goto encoding_error;
452
855k
            if ((c & 0xe0) == 0xe0) {
453
626k
                unsigned int val;
454
455
626k
                if (cur[2] == 0) {
456
278
                    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
457
278
                    cur = ctxt->input->cur;
458
278
                }
459
626k
                if ((cur[2] & 0xc0) != 0x80)
460
1.16k
                    goto encoding_error;
461
625k
                if ((c & 0xf0) == 0xf0) {
462
11.1k
                    if (cur[3] == 0) {
463
255
                        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
464
255
                        cur = ctxt->input->cur;
465
255
                    }
466
11.1k
                    if (((c & 0xf8) != 0xf0) ||
467
11.1k
                        ((cur[3] & 0xc0) != 0x80))
468
762
                        goto encoding_error;
469
                    /* 4-byte code */
470
10.3k
                    ctxt->input->cur += 4;
471
10.3k
                    val = (cur[0] & 0x7) << 18;
472
10.3k
                    val |= (cur[1] & 0x3f) << 12;
473
10.3k
                    val |= (cur[2] & 0x3f) << 6;
474
10.3k
                    val |= cur[3] & 0x3f;
475
613k
                } else {
476
                    /* 3-byte code */
477
613k
                    ctxt->input->cur += 3;
478
613k
                    val = (cur[0] & 0xf) << 12;
479
613k
                    val |= (cur[1] & 0x3f) << 6;
480
613k
                    val |= cur[2] & 0x3f;
481
613k
                }
482
624k
                if (((val > 0xd7ff) && (val < 0xe000)) ||
483
624k
                    ((val > 0xfffd) && (val < 0x10000)) ||
484
624k
                    (val >= 0x110000)) {
485
156k
    xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
486
156k
          "Char 0x%X out of allowed range\n",
487
156k
          val);
488
156k
                }
489
624k
            } else
490
                /* 2-byte code */
491
229k
                ctxt->input->cur += 2;
492
855k
        } else
493
            /* 1-byte code */
494
1.90G
            ctxt->input->cur++;
495
1.90G
    } else {
496
        /*
497
         * Assume it's a fixed length encoding (1) with
498
         * a compatible encoding for the ASCII set, since
499
         * XML constructs only use < 128 chars
500
         */
501
502
331M
        if (*(ctxt->input->cur) == '\n') {
503
7.82M
            ctxt->input->line++; ctxt->input->col = 1;
504
7.82M
        } else
505
324M
            ctxt->input->col++;
506
331M
        ctxt->input->cur++;
507
331M
    }
508
2.24G
    if (*ctxt->input->cur == 0)
509
167M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
510
2.24G
    return;
511
38.6k
encoding_error:
512
    /*
513
     * If we detect an UTF8 error that probably mean that the
514
     * input encoding didn't get properly advertised in the
515
     * declaration header. Report the error and switch the encoding
516
     * to ISO-Latin-1 (if you don't like this policy, just declare the
517
     * encoding !)
518
     */
519
38.6k
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
520
38.6k
        (ctxt->input->end - ctxt->input->cur < 4)) {
521
1.81k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
522
1.81k
         "Input is not proper UTF-8, indicate encoding !\n",
523
1.81k
         NULL, NULL);
524
36.8k
    } else {
525
36.8k
        char buffer[150];
526
527
36.8k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
528
36.8k
      ctxt->input->cur[0], ctxt->input->cur[1],
529
36.8k
      ctxt->input->cur[2], ctxt->input->cur[3]);
530
36.8k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
531
36.8k
         "Input is not proper UTF-8, indicate encoding !\n%s",
532
36.8k
         BAD_CAST buffer, NULL);
533
36.8k
    }
534
38.6k
    ctxt->charset = XML_CHAR_ENCODING_8859_1;
535
38.6k
    ctxt->input->cur++;
536
38.6k
    return;
537
2.24G
}
538
539
/**
540
 * xmlCurrentChar:
541
 * @ctxt:  the XML parser context
542
 * @len:  pointer to the length of the char read
543
 *
544
 * The current char value, if using UTF-8 this may actually span multiple
545
 * bytes in the input buffer. Implement the end of line normalization:
546
 * 2.11 End-of-Line Handling
547
 * Wherever an external parsed entity or the literal entity value
548
 * of an internal parsed entity contains either the literal two-character
549
 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
550
 * must pass to the application the single character #xA.
551
 * This behavior can conveniently be produced by normalizing all
552
 * line breaks to #xA on input, before parsing.)
553
 *
554
 * Returns the current char value and its length
555
 */
556
557
int
558
1.13G
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
559
1.13G
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
560
1.13G
    if (ctxt->instate == XML_PARSER_EOF)
561
2.61k
  return(0);
562
563
1.13G
    if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
564
947M
      *len = 1;
565
947M
      return(*ctxt->input->cur);
566
947M
    }
567
184M
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
568
  /*
569
   * We are supposed to handle UTF8, check it's valid
570
   * From rfc2044: encoding of the Unicode values on UTF-8:
571
   *
572
   * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
573
   * 0000 0000-0000 007F   0xxxxxxx
574
   * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
575
   * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
576
   *
577
   * Check for the 0x110000 limit too
578
   */
579
82.1M
  const unsigned char *cur = ctxt->input->cur;
580
82.1M
  unsigned char c;
581
82.1M
  unsigned int val;
582
583
82.1M
  c = *cur;
584
82.1M
  if (c & 0x80) {
585
54.9M
      if (((c & 0x40) == 0) || (c == 0xC0))
586
129k
    goto encoding_error;
587
54.8M
      if (cur[1] == 0) {
588
6.26k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
589
6.26k
                cur = ctxt->input->cur;
590
6.26k
            }
591
54.8M
      if ((cur[1] & 0xc0) != 0x80)
592
130k
    goto encoding_error;
593
54.7M
      if ((c & 0xe0) == 0xe0) {
594
27.5M
    if (cur[2] == 0) {
595
743
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
596
743
                    cur = ctxt->input->cur;
597
743
                }
598
27.5M
    if ((cur[2] & 0xc0) != 0x80)
599
4.99k
        goto encoding_error;
600
27.5M
    if ((c & 0xf0) == 0xf0) {
601
581k
        if (cur[3] == 0) {
602
803
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
603
803
                        cur = ctxt->input->cur;
604
803
                    }
605
581k
        if (((c & 0xf8) != 0xf0) ||
606
581k
      ((cur[3] & 0xc0) != 0x80))
607
2.58k
      goto encoding_error;
608
        /* 4-byte code */
609
578k
        *len = 4;
610
578k
        val = (cur[0] & 0x7) << 18;
611
578k
        val |= (cur[1] & 0x3f) << 12;
612
578k
        val |= (cur[2] & 0x3f) << 6;
613
578k
        val |= cur[3] & 0x3f;
614
578k
        if (val < 0x10000)
615
326
      goto encoding_error;
616
26.9M
    } else {
617
      /* 3-byte code */
618
26.9M
        *len = 3;
619
26.9M
        val = (cur[0] & 0xf) << 12;
620
26.9M
        val |= (cur[1] & 0x3f) << 6;
621
26.9M
        val |= cur[2] & 0x3f;
622
26.9M
        if (val < 0x800)
623
343
      goto encoding_error;
624
26.9M
    }
625
27.5M
      } else {
626
        /* 2-byte code */
627
27.1M
    *len = 2;
628
27.1M
    val = (cur[0] & 0x1f) << 6;
629
27.1M
    val |= cur[1] & 0x3f;
630
27.1M
    if (val < 0x80)
631
26
        goto encoding_error;
632
27.1M
      }
633
54.7M
      if (!IS_CHAR(val)) {
634
150k
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
635
150k
          "Char 0x%X out of allowed range\n", val);
636
150k
      }
637
54.7M
      return(val);
638
54.7M
  } else {
639
      /* 1-byte code */
640
27.1M
      *len = 1;
641
27.1M
      if (*ctxt->input->cur == 0)
642
10.8M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
643
27.1M
      if ((*ctxt->input->cur == 0) &&
644
27.1M
          (ctxt->input->end > ctxt->input->cur)) {
645
262k
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
646
262k
          "Char 0x0 out of allowed range\n", 0);
647
262k
      }
648
27.1M
      if (*ctxt->input->cur == 0xD) {
649
4.32M
    if (ctxt->input->cur[1] == 0xA) {
650
2.72M
        ctxt->input->cur++;
651
2.72M
    }
652
4.32M
    return(0xA);
653
4.32M
      }
654
22.8M
      return(*ctxt->input->cur);
655
27.1M
  }
656
82.1M
    }
657
    /*
658
     * Assume it's a fixed length encoding (1) with
659
     * a compatible encoding for the ASCII set, since
660
     * XML constructs only use < 128 chars
661
     */
662
101M
    *len = 1;
663
101M
    if (*ctxt->input->cur == 0xD) {
664
4.42M
  if (ctxt->input->cur[1] == 0xA) {
665
815k
      ctxt->input->cur++;
666
815k
  }
667
4.42M
  return(0xA);
668
4.42M
    }
669
97.4M
    return(*ctxt->input->cur);
670
268k
encoding_error:
671
    /*
672
     * An encoding problem may arise from a truncated input buffer
673
     * splitting a character in the middle. In that case do not raise
674
     * an error but return 0 to indicate an end of stream problem
675
     */
676
268k
    if (ctxt->input->end - ctxt->input->cur < 4) {
677
5.50k
  *len = 0;
678
5.50k
  return(0);
679
5.50k
    }
680
681
    /*
682
     * If we detect an UTF8 error that probably mean that the
683
     * input encoding didn't get properly advertised in the
684
     * declaration header. Report the error and switch the encoding
685
     * to ISO-Latin-1 (if you don't like this policy, just declare the
686
     * encoding !)
687
     */
688
262k
    {
689
262k
        char buffer[150];
690
691
262k
  snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
692
262k
      ctxt->input->cur[0], ctxt->input->cur[1],
693
262k
      ctxt->input->cur[2], ctxt->input->cur[3]);
694
262k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
695
262k
         "Input is not proper UTF-8, indicate encoding !\n%s",
696
262k
         BAD_CAST buffer, NULL);
697
262k
    }
698
262k
    ctxt->charset = XML_CHAR_ENCODING_8859_1;
699
262k
    *len = 1;
700
262k
    return(*ctxt->input->cur);
701
268k
}
702
703
/**
704
 * xmlStringCurrentChar:
705
 * @ctxt:  the XML parser context
706
 * @cur:  pointer to the beginning of the char
707
 * @len:  pointer to the length of the char read
708
 *
709
 * The current char value, if using UTF-8 this may actually span multiple
710
 * bytes in the input buffer.
711
 *
712
 * Returns the current char value and its length
713
 */
714
715
int
716
xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
717
24.5G
{
718
24.5G
    if ((len == NULL) || (cur == NULL)) return(0);
719
24.5G
    if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
720
        /*
721
         * We are supposed to handle UTF8, check it's valid
722
         * From rfc2044: encoding of the Unicode values on UTF-8:
723
         *
724
         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
725
         * 0000 0000-0000 007F   0xxxxxxx
726
         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
727
         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
728
         *
729
         * Check for the 0x110000 limit too
730
         */
731
19.4G
        unsigned char c;
732
19.4G
        unsigned int val;
733
734
19.4G
        c = *cur;
735
19.4G
        if (c & 0x80) {
736
2.35M
            if ((cur[1] & 0xc0) != 0x80)
737
19.0k
                goto encoding_error;
738
2.34M
            if ((c & 0xe0) == 0xe0) {
739
740
63.5k
                if ((cur[2] & 0xc0) != 0x80)
741
829
                    goto encoding_error;
742
62.6k
                if ((c & 0xf0) == 0xf0) {
743
34.9k
                    if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
744
1.75k
                        goto encoding_error;
745
                    /* 4-byte code */
746
33.2k
                    *len = 4;
747
33.2k
                    val = (cur[0] & 0x7) << 18;
748
33.2k
                    val |= (cur[1] & 0x3f) << 12;
749
33.2k
                    val |= (cur[2] & 0x3f) << 6;
750
33.2k
                    val |= cur[3] & 0x3f;
751
33.2k
                } else {
752
                    /* 3-byte code */
753
27.7k
                    *len = 3;
754
27.7k
                    val = (cur[0] & 0xf) << 12;
755
27.7k
                    val |= (cur[1] & 0x3f) << 6;
756
27.7k
                    val |= cur[2] & 0x3f;
757
27.7k
                }
758
2.27M
            } else {
759
                /* 2-byte code */
760
2.27M
                *len = 2;
761
2.27M
                val = (cur[0] & 0x1f) << 6;
762
2.27M
                val |= cur[1] & 0x3f;
763
2.27M
            }
764
2.33M
            if (!IS_CHAR(val)) {
765
5.62k
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
766
5.62k
          "Char 0x%X out of allowed range\n", val);
767
5.62k
            }
768
2.33M
            return (val);
769
19.4G
        } else {
770
            /* 1-byte code */
771
19.4G
            *len = 1;
772
19.4G
            return (*cur);
773
19.4G
        }
774
19.4G
    }
775
    /*
776
     * Assume it's a fixed length encoding (1) with
777
     * a compatible encoding for the ASCII set, since
778
     * XML constructs only use < 128 chars
779
     */
780
5.07G
    *len = 1;
781
5.07G
    return (*cur);
782
21.5k
encoding_error:
783
784
    /*
785
     * An encoding problem may arise from a truncated input buffer
786
     * splitting a character in the middle. In that case do not raise
787
     * an error but return 0 to indicate an end of stream problem
788
     */
789
21.5k
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
790
21.5k
        (ctxt->input->end - ctxt->input->cur < 4)) {
791
17.4k
  *len = 0;
792
17.4k
  return(0);
793
17.4k
    }
794
    /*
795
     * If we detect an UTF8 error that probably mean that the
796
     * input encoding didn't get properly advertised in the
797
     * declaration header. Report the error and switch the encoding
798
     * to ISO-Latin-1 (if you don't like this policy, just declare the
799
     * encoding !)
800
     */
801
4.09k
    {
802
4.09k
        char buffer[150];
803
804
4.09k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
805
4.09k
      ctxt->input->cur[0], ctxt->input->cur[1],
806
4.09k
      ctxt->input->cur[2], ctxt->input->cur[3]);
807
4.09k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
808
4.09k
         "Input is not proper UTF-8, indicate encoding !\n%s",
809
4.09k
         BAD_CAST buffer, NULL);
810
4.09k
    }
811
4.09k
    *len = 1;
812
4.09k
    return (*cur);
813
21.5k
}
814
815
/**
816
 * xmlCopyCharMultiByte:
817
 * @out:  pointer to an array of xmlChar
818
 * @val:  the char value
819
 *
820
 * append the char value in the array
821
 *
822
 * Returns the number of xmlChar written
823
 */
824
int
825
47.0M
xmlCopyCharMultiByte(xmlChar *out, int val) {
826
47.0M
    if ((out == NULL) || (val < 0)) return(0);
827
    /*
828
     * We are supposed to handle UTF8, check it's valid
829
     * From rfc2044: encoding of the Unicode values on UTF-8:
830
     *
831
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
832
     * 0000 0000-0000 007F   0xxxxxxx
833
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
834
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
835
     */
836
47.0M
    if  (val >= 0x80) {
837
45.5M
  xmlChar *savedout = out;
838
45.5M
  int bits;
839
45.5M
  if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
840
27.1M
  else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
841
593k
  else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
842
0
  else {
843
0
      xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
844
0
        "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
845
0
            val);
846
0
      return(0);
847
0
  }
848
118M
  for ( ; bits >= 0; bits-= 6)
849
73.2M
      *out++= ((val >> bits) & 0x3F) | 0x80 ;
850
45.5M
  return (out - savedout);
851
45.5M
    }
852
1.57M
    *out = val;
853
1.57M
    return 1;
854
47.0M
}
855
856
/**
857
 * xmlCopyChar:
858
 * @len:  Ignored, compatibility
859
 * @out:  pointer to an array of xmlChar
860
 * @val:  the char value
861
 *
862
 * append the char value in the array
863
 *
864
 * Returns the number of xmlChar written
865
 */
866
867
int
868
502k
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
869
502k
    if ((out == NULL) || (val < 0)) return(0);
870
    /* the len parameter is ignored */
871
502k
    if  (val >= 0x80) {
872
96.1k
  return(xmlCopyCharMultiByte (out, val));
873
96.1k
    }
874
405k
    *out = val;
875
405k
    return 1;
876
502k
}
877
878
/************************************************************************
879
 *                  *
880
 *    Commodity functions to switch encodings     *
881
 *                  *
882
 ************************************************************************/
883
884
static int
885
xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
886
                          xmlCharEncodingHandlerPtr handler, int len);
887
/**
888
 * xmlSwitchEncoding:
889
 * @ctxt:  the parser context
890
 * @enc:  the encoding value (number)
891
 *
892
 * change the input functions when discovering the character encoding
893
 * of a given entity.
894
 *
895
 * Returns 0 in case of success, -1 otherwise
896
 */
897
int
898
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
899
790k
{
900
790k
    xmlCharEncodingHandlerPtr handler;
901
790k
    int len = -1;
902
790k
    int ret;
903
904
790k
    if (ctxt == NULL) return(-1);
905
790k
    switch (enc) {
906
0
  case XML_CHAR_ENCODING_ERROR:
907
0
      __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
908
0
                     "encoding unknown\n", NULL, NULL);
909
0
      return(-1);
910
233k
  case XML_CHAR_ENCODING_NONE:
911
      /* let's assume it's UTF-8 without the XML decl */
912
233k
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
913
233k
      return(0);
914
532k
  case XML_CHAR_ENCODING_UTF8:
915
      /* default encoding, no conversion should be needed */
916
532k
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
917
918
      /*
919
       * Errata on XML-1.0 June 20 2001
920
       * Specific handling of the Byte Order Mark for
921
       * UTF-8
922
       */
923
532k
      if ((ctxt->input != NULL) &&
924
532k
    (ctxt->input->cur[0] == 0xEF) &&
925
532k
    (ctxt->input->cur[1] == 0xBB) &&
926
532k
    (ctxt->input->cur[2] == 0xBF)) {
927
2.93k
    ctxt->input->cur += 3;
928
2.93k
      }
929
532k
      return(0);
930
6.21k
    case XML_CHAR_ENCODING_UTF16LE:
931
10.8k
    case XML_CHAR_ENCODING_UTF16BE:
932
        /*The raw input characters are encoded
933
         *in UTF-16. As we expect this function
934
         *to be called after xmlCharEncInFunc, we expect
935
         *ctxt->input->cur to contain UTF-8 encoded characters.
936
         *So the raw UTF16 Byte Order Mark
937
         *has also been converted into
938
         *an UTF-8 BOM. Let's skip that BOM.
939
         */
940
10.8k
        if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
941
10.8k
            (ctxt->input->cur[0] == 0xEF) &&
942
10.8k
            (ctxt->input->cur[1] == 0xBB) &&
943
10.8k
            (ctxt->input->cur[2] == 0xBF)) {
944
2.63k
            ctxt->input->cur += 3;
945
2.63k
        }
946
10.8k
        len = 90;
947
10.8k
  break;
948
0
    case XML_CHAR_ENCODING_UCS2:
949
0
        len = 90;
950
0
  break;
951
427
    case XML_CHAR_ENCODING_UCS4BE:
952
1.41k
    case XML_CHAR_ENCODING_UCS4LE:
953
1.58k
    case XML_CHAR_ENCODING_UCS4_2143:
954
1.75k
    case XML_CHAR_ENCODING_UCS4_3412:
955
1.75k
        len = 180;
956
1.75k
  break;
957
12.1k
    case XML_CHAR_ENCODING_EBCDIC:
958
12.1k
    case XML_CHAR_ENCODING_8859_1:
959
12.1k
    case XML_CHAR_ENCODING_8859_2:
960
12.1k
    case XML_CHAR_ENCODING_8859_3:
961
12.1k
    case XML_CHAR_ENCODING_8859_4:
962
12.1k
    case XML_CHAR_ENCODING_8859_5:
963
12.1k
    case XML_CHAR_ENCODING_8859_6:
964
12.1k
    case XML_CHAR_ENCODING_8859_7:
965
12.1k
    case XML_CHAR_ENCODING_8859_8:
966
12.1k
    case XML_CHAR_ENCODING_8859_9:
967
12.1k
    case XML_CHAR_ENCODING_ASCII:
968
12.1k
    case XML_CHAR_ENCODING_2022_JP:
969
12.1k
    case XML_CHAR_ENCODING_SHIFT_JIS:
970
12.1k
    case XML_CHAR_ENCODING_EUC_JP:
971
12.1k
        len = 45;
972
12.1k
  break;
973
790k
    }
974
24.7k
    handler = xmlGetCharEncodingHandler(enc);
975
24.7k
    if (handler == NULL) {
976
  /*
977
   * Default handlers.
978
   */
979
337
  switch (enc) {
980
0
      case XML_CHAR_ENCODING_ASCII:
981
    /* default encoding, no conversion should be needed */
982
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
983
0
    return(0);
984
0
      case XML_CHAR_ENCODING_8859_1:
985
0
    if ((ctxt->inputNr == 1) &&
986
0
        (ctxt->encoding == NULL) &&
987
0
        (ctxt->input != NULL) &&
988
0
        (ctxt->input->encoding != NULL)) {
989
0
        ctxt->encoding = xmlStrdup(ctxt->input->encoding);
990
0
    }
991
0
    ctxt->charset = enc;
992
0
    return(0);
993
337
      default:
994
337
    __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
995
337
                        "encoding not supported: %s\n",
996
337
      BAD_CAST xmlGetCharEncodingName(enc), NULL);
997
                /*
998
                 * TODO: We could recover from errors in external entities
999
                 * if we didn't stop the parser. But most callers of this
1000
                 * function don't check the return value.
1001
                 */
1002
337
                xmlStopParser(ctxt);
1003
337
                return(-1);
1004
337
        }
1005
337
    }
1006
24.4k
    ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
1007
24.4k
    if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
1008
        /*
1009
   * on encoding conversion errors, stop the parser
1010
   */
1011
109
        xmlStopParser(ctxt);
1012
109
  ctxt->errNo = XML_I18N_CONV_FAILED;
1013
109
    }
1014
24.4k
    return(ret);
1015
24.7k
}
1016
1017
/**
1018
 * xmlSwitchInputEncodingInt:
1019
 * @ctxt:  the parser context
1020
 * @input:  the input stream
1021
 * @handler:  the encoding handler
1022
 * @len:  the number of bytes to convert for the first line or -1
1023
 *
1024
 * change the input functions when discovering the character encoding
1025
 * of a given entity.
1026
 *
1027
 * Returns 0 in case of success, -1 otherwise
1028
 */
1029
static int
1030
xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1031
                          xmlCharEncodingHandlerPtr handler, int len)
1032
160k
{
1033
160k
    int nbchars;
1034
160k
    xmlParserInputBufferPtr in;
1035
1036
160k
    if (handler == NULL)
1037
0
        return (-1);
1038
160k
    if (input == NULL)
1039
0
        return (-1);
1040
160k
    in = input->buf;
1041
160k
    if (in == NULL) {
1042
0
  xmlErrInternal(ctxt,
1043
0
                "static memory buffer doesn't support encoding\n", NULL);
1044
        /*
1045
         * Callers assume that the input buffer takes ownership of the
1046
         * encoding handler. xmlCharEncCloseFunc frees unregistered
1047
         * handlers and avoids a memory leak.
1048
         */
1049
0
        xmlCharEncCloseFunc(handler);
1050
0
  return (-1);
1051
0
    }
1052
1053
160k
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1054
1055
160k
    if (in->encoder != NULL) {
1056
        /*
1057
         * Check in case the auto encoding detection triggered
1058
         * in already.
1059
         */
1060
8.23k
        if (in->encoder == handler)
1061
3.20k
            return (0);
1062
1063
        /*
1064
         * Note: this is a bit dangerous, but that's what it
1065
         * takes to use nearly compatible signature for different
1066
         * encodings.
1067
         *
1068
         * FIXME: Encoders might buffer partial byte sequences, so
1069
         * this probably can't work. We should return an error and
1070
         * make sure that callers never try to switch the encoding
1071
         * twice.
1072
         */
1073
5.02k
        xmlCharEncCloseFunc(in->encoder);
1074
5.02k
        in->encoder = handler;
1075
5.02k
        return (0);
1076
8.23k
    }
1077
151k
    in->encoder = handler;
1078
1079
    /*
1080
     * Is there already some content down the pipe to convert ?
1081
     */
1082
151k
    if (xmlBufIsEmpty(in->buffer) == 0) {
1083
151k
        size_t processed, use, consumed;
1084
1085
        /*
1086
         * Specific handling of the Byte Order Mark for
1087
         * UTF-16
1088
         */
1089
151k
        if ((handler->name != NULL) &&
1090
151k
            (!strcmp(handler->name, "UTF-16LE") ||
1091
151k
             !strcmp(handler->name, "UTF-16")) &&
1092
151k
            (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1093
3.14k
            input->cur += 2;
1094
3.14k
        }
1095
151k
        if ((handler->name != NULL) &&
1096
151k
            (!strcmp(handler->name, "UTF-16BE")) &&
1097
151k
            (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1098
3.38k
            input->cur += 2;
1099
3.38k
        }
1100
        /*
1101
         * Errata on XML-1.0 June 20 2001
1102
         * Specific handling of the Byte Order Mark for
1103
         * UTF-8
1104
         */
1105
151k
        if ((handler->name != NULL) &&
1106
151k
            (!strcmp(handler->name, "UTF-8")) &&
1107
151k
            (input->cur[0] == 0xEF) &&
1108
151k
            (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1109
0
            input->cur += 3;
1110
0
        }
1111
1112
        /*
1113
         * Shrink the current input buffer.
1114
         * Move it as the raw buffer and create a new input buffer
1115
         */
1116
151k
        processed = input->cur - input->base;
1117
151k
        xmlBufShrink(in->buffer, processed);
1118
151k
        input->consumed += processed;
1119
151k
        in->raw = in->buffer;
1120
151k
        in->buffer = xmlBufCreate();
1121
151k
        in->rawconsumed = processed;
1122
151k
        use = xmlBufUse(in->raw);
1123
1124
151k
        if (ctxt->html) {
1125
            /*
1126
             * convert as much as possible of the buffer
1127
             */
1128
0
            nbchars = xmlCharEncInput(in, 1);
1129
151k
        } else {
1130
            /*
1131
             * convert just enough to get
1132
             * '<?xml version="1.0" encoding="xxx"?>'
1133
             * parsed with the autodetected encoding
1134
             * into the parser reading buffer.
1135
             */
1136
151k
            nbchars = xmlCharEncFirstLineInput(in, len);
1137
151k
        }
1138
151k
        xmlBufResetInput(in->buffer, input);
1139
151k
        if (nbchars < 0) {
1140
229
            xmlErrInternal(ctxt,
1141
229
                           "switching encoding: encoder error\n",
1142
229
                           NULL);
1143
229
            return (-1);
1144
229
        }
1145
151k
        consumed = use - xmlBufUse(in->raw);
1146
151k
        if ((consumed > ULONG_MAX) ||
1147
151k
            (in->rawconsumed > ULONG_MAX - (unsigned long)consumed))
1148
0
            in->rawconsumed = ULONG_MAX;
1149
151k
        else
1150
151k
      in->rawconsumed += consumed;
1151
151k
    }
1152
151k
    return (0);
1153
151k
}
1154
1155
/**
1156
 * xmlSwitchInputEncoding:
1157
 * @ctxt:  the parser context
1158
 * @input:  the input stream
1159
 * @handler:  the encoding handler
1160
 *
1161
 * DEPRECATED: Use xmlSwitchToEncoding
1162
 *
1163
 * change the input functions when discovering the character encoding
1164
 * of a given entity.
1165
 *
1166
 * Returns 0 in case of success, -1 otherwise
1167
 */
1168
int
1169
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1170
0
                          xmlCharEncodingHandlerPtr handler) {
1171
0
    return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
1172
0
}
1173
1174
/**
1175
 * xmlSwitchToEncoding:
1176
 * @ctxt:  the parser context
1177
 * @handler:  the encoding handler
1178
 *
1179
 * change the input functions when discovering the character encoding
1180
 * of a given entity.
1181
 *
1182
 * Returns 0 in case of success, -1 otherwise
1183
 */
1184
int
1185
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1186
135k
{
1187
135k
    if (ctxt == NULL)
1188
0
        return(-1);
1189
135k
    return(xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, -1));
1190
135k
}
1191
1192
/************************************************************************
1193
 *                  *
1194
 *  Commodity functions to handle entities processing   *
1195
 *                  *
1196
 ************************************************************************/
1197
1198
/**
1199
 * xmlFreeInputStream:
1200
 * @input:  an xmlParserInputPtr
1201
 *
1202
 * Free up an input stream.
1203
 */
1204
void
1205
306M
xmlFreeInputStream(xmlParserInputPtr input) {
1206
306M
    if (input == NULL) return;
1207
1208
306M
    if (input->filename != NULL) xmlFree((char *) input->filename);
1209
306M
    if (input->directory != NULL) xmlFree((char *) input->directory);
1210
306M
    if (input->encoding != NULL) xmlFree((char *) input->encoding);
1211
306M
    if (input->version != NULL) xmlFree((char *) input->version);
1212
306M
    if ((input->free != NULL) && (input->base != NULL))
1213
0
        input->free((xmlChar *) input->base);
1214
306M
    if (input->buf != NULL)
1215
1.08M
        xmlFreeParserInputBuffer(input->buf);
1216
306M
    xmlFree(input);
1217
306M
}
1218
1219
/**
1220
 * xmlNewInputStream:
1221
 * @ctxt:  an XML parser context
1222
 *
1223
 * Create a new input stream structure.
1224
 *
1225
 * Returns the new input stream or NULL
1226
 */
1227
xmlParserInputPtr
1228
306M
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1229
306M
    xmlParserInputPtr input;
1230
1231
306M
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1232
306M
    if (input == NULL) {
1233
0
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1234
0
  return(NULL);
1235
0
    }
1236
306M
    memset(input, 0, sizeof(xmlParserInput));
1237
306M
    input->line = 1;
1238
306M
    input->col = 1;
1239
306M
    input->standalone = -1;
1240
1241
    /*
1242
     * If the context is NULL the id cannot be initialized, but that
1243
     * should not happen while parsing which is the situation where
1244
     * the id is actually needed.
1245
     */
1246
306M
    if (ctxt != NULL) {
1247
306M
        if (input->id >= INT_MAX) {
1248
0
            xmlErrMemory(ctxt, "Input ID overflow\n");
1249
0
            return(NULL);
1250
0
        }
1251
306M
        input->id = ctxt->input_id++;
1252
306M
    }
1253
1254
306M
    return(input);
1255
306M
}
1256
1257
/**
1258
 * xmlNewIOInputStream:
1259
 * @ctxt:  an XML parser context
1260
 * @input:  an I/O Input
1261
 * @enc:  the charset encoding if known
1262
 *
1263
 * Create a new input stream structure encapsulating the @input into
1264
 * a stream suitable for the parser.
1265
 *
1266
 * Returns the new input stream or NULL
1267
 */
1268
xmlParserInputPtr
1269
xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1270
0
              xmlCharEncoding enc) {
1271
0
    xmlParserInputPtr inputStream;
1272
1273
0
    if (input == NULL) return(NULL);
1274
0
    if (xmlParserDebugEntities)
1275
0
  xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1276
0
    inputStream = xmlNewInputStream(ctxt);
1277
0
    if (inputStream == NULL) {
1278
0
  return(NULL);
1279
0
    }
1280
0
    inputStream->filename = NULL;
1281
0
    inputStream->buf = input;
1282
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1283
1284
0
    if (enc != XML_CHAR_ENCODING_NONE) {
1285
0
        xmlSwitchEncoding(ctxt, enc);
1286
0
    }
1287
1288
0
    return(inputStream);
1289
0
}
1290
1291
/**
1292
 * xmlNewEntityInputStream:
1293
 * @ctxt:  an XML parser context
1294
 * @entity:  an Entity pointer
1295
 *
1296
 * Create a new input stream based on an xmlEntityPtr
1297
 *
1298
 * Returns the new input stream or NULL
1299
 */
1300
xmlParserInputPtr
1301
304M
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1302
304M
    xmlParserInputPtr input;
1303
1304
304M
    if (entity == NULL) {
1305
0
        xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1306
0
                 NULL);
1307
0
  return(NULL);
1308
0
    }
1309
304M
    if (xmlParserDebugEntities)
1310
0
  xmlGenericError(xmlGenericErrorContext,
1311
0
    "new input from entity: %s\n", entity->name);
1312
304M
    if (entity->content == NULL) {
1313
48.3k
  switch (entity->etype) {
1314
0
            case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1315
0
          xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1316
0
                   entity->name);
1317
0
                break;
1318
0
            case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1319
48.3k
            case XML_EXTERNAL_PARAMETER_ENTITY:
1320
48.3k
    input = xmlLoadExternalEntity((char *) entity->URI,
1321
48.3k
           (char *) entity->ExternalID, ctxt);
1322
48.3k
                if (input != NULL)
1323
37.7k
                    input->entity = entity;
1324
48.3k
                return(input);
1325
0
            case XML_INTERNAL_GENERAL_ENTITY:
1326
0
          xmlErrInternal(ctxt,
1327
0
          "Internal entity %s without content !\n",
1328
0
                   entity->name);
1329
0
                break;
1330
0
            case XML_INTERNAL_PARAMETER_ENTITY:
1331
0
          xmlErrInternal(ctxt,
1332
0
          "Internal parameter entity %s without content !\n",
1333
0
                   entity->name);
1334
0
                break;
1335
0
            case XML_INTERNAL_PREDEFINED_ENTITY:
1336
0
          xmlErrInternal(ctxt,
1337
0
          "Predefined entity %s without content !\n",
1338
0
                   entity->name);
1339
0
                break;
1340
48.3k
  }
1341
0
  return(NULL);
1342
48.3k
    }
1343
304M
    input = xmlNewInputStream(ctxt);
1344
304M
    if (input == NULL) {
1345
0
  return(NULL);
1346
0
    }
1347
304M
    if (entity->URI != NULL)
1348
0
  input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1349
304M
    input->base = entity->content;
1350
304M
    if (entity->length == 0)
1351
168k
        entity->length = xmlStrlen(entity->content);
1352
304M
    input->cur = entity->content;
1353
304M
    input->length = entity->length;
1354
304M
    input->end = &entity->content[input->length];
1355
304M
    input->entity = entity;
1356
304M
    return(input);
1357
304M
}
1358
1359
/**
1360
 * xmlNewStringInputStream:
1361
 * @ctxt:  an XML parser context
1362
 * @buffer:  an memory buffer
1363
 *
1364
 * Create a new input stream based on a memory buffer.
1365
 * Returns the new input stream
1366
 */
1367
xmlParserInputPtr
1368
0
xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1369
0
    xmlParserInputPtr input;
1370
0
    xmlParserInputBufferPtr buf;
1371
1372
0
    if (buffer == NULL) {
1373
0
        xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1374
0
                 NULL);
1375
0
  return(NULL);
1376
0
    }
1377
0
    if (xmlParserDebugEntities)
1378
0
  xmlGenericError(xmlGenericErrorContext,
1379
0
    "new fixed input: %.30s\n", buffer);
1380
0
    buf = xmlParserInputBufferCreateMem((const char *) buffer,
1381
0
                                        xmlStrlen(buffer),
1382
0
                                        XML_CHAR_ENCODING_NONE);
1383
0
    if (buf == NULL) {
1384
0
  xmlErrMemory(ctxt, NULL);
1385
0
        return(NULL);
1386
0
    }
1387
0
    input = xmlNewInputStream(ctxt);
1388
0
    if (input == NULL) {
1389
0
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1390
0
  xmlFreeParserInputBuffer(buf);
1391
0
  return(NULL);
1392
0
    }
1393
0
    input->buf = buf;
1394
0
    xmlBufResetInput(input->buf->buffer, input);
1395
0
    return(input);
1396
0
}
1397
1398
/**
1399
 * xmlNewInputFromFile:
1400
 * @ctxt:  an XML parser context
1401
 * @filename:  the filename to use as entity
1402
 *
1403
 * Create a new input stream based on a file or an URL.
1404
 *
1405
 * Returns the new input stream or NULL in case of error
1406
 */
1407
xmlParserInputPtr
1408
0
xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1409
0
    xmlParserInputBufferPtr buf;
1410
0
    xmlParserInputPtr inputStream;
1411
0
    char *directory = NULL;
1412
0
    xmlChar *URI = NULL;
1413
1414
0
    if (xmlParserDebugEntities)
1415
0
  xmlGenericError(xmlGenericErrorContext,
1416
0
    "new input from file: %s\n", filename);
1417
0
    if (ctxt == NULL) return(NULL);
1418
0
    buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1419
0
    if (buf == NULL) {
1420
0
  if (filename == NULL)
1421
0
      __xmlLoaderErr(ctxt,
1422
0
                     "failed to load external entity: NULL filename \n",
1423
0
         NULL);
1424
0
  else
1425
0
      __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1426
0
         (const char *) filename);
1427
0
  return(NULL);
1428
0
    }
1429
1430
0
    inputStream = xmlNewInputStream(ctxt);
1431
0
    if (inputStream == NULL) {
1432
0
  xmlFreeParserInputBuffer(buf);
1433
0
  return(NULL);
1434
0
    }
1435
1436
0
    inputStream->buf = buf;
1437
0
    inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1438
0
    if (inputStream == NULL)
1439
0
        return(NULL);
1440
1441
0
    if (inputStream->filename == NULL)
1442
0
  URI = xmlStrdup((xmlChar *) filename);
1443
0
    else
1444
0
  URI = xmlStrdup((xmlChar *) inputStream->filename);
1445
0
    directory = xmlParserGetDirectory((const char *) URI);
1446
0
    if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1447
0
    inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1448
0
    if (URI != NULL) xmlFree((char *) URI);
1449
0
    inputStream->directory = directory;
1450
1451
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1452
0
    if ((ctxt->directory == NULL) && (directory != NULL))
1453
0
        ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1454
0
    return(inputStream);
1455
0
}
1456
1457
/************************************************************************
1458
 *                  *
1459
 *    Commodity functions to handle parser contexts   *
1460
 *                  *
1461
 ************************************************************************/
1462
1463
/**
1464
 * xmlInitSAXParserCtxt:
1465
 * @ctxt:  XML parser context
1466
 * @sax:  SAX handlert
1467
 * @userData:  user data
1468
 *
1469
 * Initialize a SAX parser context
1470
 *
1471
 * Returns 0 in case of success and -1 in case of error
1472
 */
1473
1474
static int
1475
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
1476
                     void *userData)
1477
1.91M
{
1478
1.91M
    xmlParserInputPtr input;
1479
1480
1.91M
    if(ctxt==NULL) {
1481
0
        xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1482
0
        return(-1);
1483
0
    }
1484
1485
1.91M
    xmlInitParser();
1486
1487
1.91M
    if (ctxt->dict == NULL)
1488
1.91M
  ctxt->dict = xmlDictCreate();
1489
1.91M
    if (ctxt->dict == NULL) {
1490
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1491
0
  return(-1);
1492
0
    }
1493
1.91M
    xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1494
1495
1.91M
    if (ctxt->sax == NULL)
1496
1.91M
  ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1497
1.91M
    if (ctxt->sax == NULL) {
1498
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1499
0
  return(-1);
1500
0
    }
1501
1.91M
    if (sax == NULL) {
1502
998k
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1503
998k
        xmlSAXVersion(ctxt->sax, 2);
1504
998k
        ctxt->userData = ctxt;
1505
998k
    } else {
1506
917k
  if (sax->initialized == XML_SAX2_MAGIC) {
1507
784k
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
1508
784k
        } else {
1509
132k
      memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1510
132k
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
1511
132k
        }
1512
917k
        ctxt->userData = userData ? userData : ctxt;
1513
917k
    }
1514
1515
1.91M
    ctxt->maxatts = 0;
1516
1.91M
    ctxt->atts = NULL;
1517
    /* Allocate the Input stack */
1518
1.91M
    if (ctxt->inputTab == NULL) {
1519
1.91M
  ctxt->inputTab = (xmlParserInputPtr *)
1520
1.91M
        xmlMalloc(5 * sizeof(xmlParserInputPtr));
1521
1.91M
  ctxt->inputMax = 5;
1522
1.91M
    }
1523
1.91M
    if (ctxt->inputTab == NULL) {
1524
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1525
0
  ctxt->inputNr = 0;
1526
0
  ctxt->inputMax = 0;
1527
0
  ctxt->input = NULL;
1528
0
  return(-1);
1529
0
    }
1530
1.91M
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1531
0
        xmlFreeInputStream(input);
1532
0
    }
1533
1.91M
    ctxt->inputNr = 0;
1534
1.91M
    ctxt->input = NULL;
1535
1536
1.91M
    ctxt->version = NULL;
1537
1.91M
    ctxt->encoding = NULL;
1538
1.91M
    ctxt->standalone = -1;
1539
1.91M
    ctxt->hasExternalSubset = 0;
1540
1.91M
    ctxt->hasPErefs = 0;
1541
1.91M
    ctxt->html = 0;
1542
1.91M
    ctxt->external = 0;
1543
1.91M
    ctxt->instate = XML_PARSER_START;
1544
1.91M
    ctxt->token = 0;
1545
1.91M
    ctxt->directory = NULL;
1546
1547
    /* Allocate the Node stack */
1548
1.91M
    if (ctxt->nodeTab == NULL) {
1549
1.91M
  ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1550
1.91M
  ctxt->nodeMax = 10;
1551
1.91M
    }
1552
1.91M
    if (ctxt->nodeTab == NULL) {
1553
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1554
0
  ctxt->nodeNr = 0;
1555
0
  ctxt->nodeMax = 0;
1556
0
  ctxt->node = NULL;
1557
0
  ctxt->inputNr = 0;
1558
0
  ctxt->inputMax = 0;
1559
0
  ctxt->input = NULL;
1560
0
  return(-1);
1561
0
    }
1562
1.91M
    ctxt->nodeNr = 0;
1563
1.91M
    ctxt->node = NULL;
1564
1565
    /* Allocate the Name stack */
1566
1.91M
    if (ctxt->nameTab == NULL) {
1567
1.91M
  ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1568
1.91M
  ctxt->nameMax = 10;
1569
1.91M
    }
1570
1.91M
    if (ctxt->nameTab == NULL) {
1571
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1572
0
  ctxt->nodeNr = 0;
1573
0
  ctxt->nodeMax = 0;
1574
0
  ctxt->node = NULL;
1575
0
  ctxt->inputNr = 0;
1576
0
  ctxt->inputMax = 0;
1577
0
  ctxt->input = NULL;
1578
0
  ctxt->nameNr = 0;
1579
0
  ctxt->nameMax = 0;
1580
0
  ctxt->name = NULL;
1581
0
  return(-1);
1582
0
    }
1583
1.91M
    ctxt->nameNr = 0;
1584
1.91M
    ctxt->name = NULL;
1585
1586
    /* Allocate the space stack */
1587
1.91M
    if (ctxt->spaceTab == NULL) {
1588
1.91M
  ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1589
1.91M
  ctxt->spaceMax = 10;
1590
1.91M
    }
1591
1.91M
    if (ctxt->spaceTab == NULL) {
1592
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1593
0
  ctxt->nodeNr = 0;
1594
0
  ctxt->nodeMax = 0;
1595
0
  ctxt->node = NULL;
1596
0
  ctxt->inputNr = 0;
1597
0
  ctxt->inputMax = 0;
1598
0
  ctxt->input = NULL;
1599
0
  ctxt->nameNr = 0;
1600
0
  ctxt->nameMax = 0;
1601
0
  ctxt->name = NULL;
1602
0
  ctxt->spaceNr = 0;
1603
0
  ctxt->spaceMax = 0;
1604
0
  ctxt->space = NULL;
1605
0
  return(-1);
1606
0
    }
1607
1.91M
    ctxt->spaceNr = 1;
1608
1.91M
    ctxt->spaceMax = 10;
1609
1.91M
    ctxt->spaceTab[0] = -1;
1610
1.91M
    ctxt->space = &ctxt->spaceTab[0];
1611
1.91M
    ctxt->myDoc = NULL;
1612
1.91M
    ctxt->wellFormed = 1;
1613
1.91M
    ctxt->nsWellFormed = 1;
1614
1.91M
    ctxt->valid = 1;
1615
1.91M
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1616
1.91M
    if (ctxt->loadsubset) {
1617
0
        ctxt->options |= XML_PARSE_DTDLOAD;
1618
0
    }
1619
1.91M
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
1620
1.91M
    ctxt->pedantic = xmlPedanticParserDefaultValue;
1621
1.91M
    if (ctxt->pedantic) {
1622
0
        ctxt->options |= XML_PARSE_PEDANTIC;
1623
0
    }
1624
1.91M
    ctxt->linenumbers = xmlLineNumbersDefaultValue;
1625
1.91M
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1626
1.91M
    if (ctxt->keepBlanks == 0) {
1627
0
  ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1628
0
  ctxt->options |= XML_PARSE_NOBLANKS;
1629
0
    }
1630
1631
1.91M
    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
1632
1.91M
    ctxt->vctxt.userData = ctxt;
1633
1.91M
    ctxt->vctxt.error = xmlParserValidityError;
1634
1.91M
    ctxt->vctxt.warning = xmlParserValidityWarning;
1635
1.91M
    if (ctxt->validate) {
1636
0
  if (xmlGetWarningsDefaultValue == 0)
1637
0
      ctxt->vctxt.warning = NULL;
1638
0
  else
1639
0
      ctxt->vctxt.warning = xmlParserValidityWarning;
1640
0
  ctxt->vctxt.nodeMax = 0;
1641
0
        ctxt->options |= XML_PARSE_DTDVALID;
1642
0
    }
1643
1.91M
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1644
1.91M
    if (ctxt->replaceEntities) {
1645
0
        ctxt->options |= XML_PARSE_NOENT;
1646
0
    }
1647
1.91M
    ctxt->record_info = 0;
1648
1.91M
    ctxt->checkIndex = 0;
1649
1.91M
    ctxt->inSubset = 0;
1650
1.91M
    ctxt->errNo = XML_ERR_OK;
1651
1.91M
    ctxt->depth = 0;
1652
1.91M
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1653
1.91M
    ctxt->catalogs = NULL;
1654
1.91M
    ctxt->sizeentities = 0;
1655
1.91M
    ctxt->sizeentcopy = 0;
1656
1.91M
    ctxt->input_id = 1;
1657
1.91M
    xmlInitNodeInfoSeq(&ctxt->node_seq);
1658
1.91M
    return(0);
1659
1.91M
}
1660
1661
/**
1662
 * xmlInitParserCtxt:
1663
 * @ctxt:  an XML parser context
1664
 *
1665
 * DEPRECATED: Internal function which will be made private in a future
1666
 * version.
1667
 *
1668
 * Initialize a parser context
1669
 *
1670
 * Returns 0 in case of success and -1 in case of error
1671
 */
1672
1673
int
1674
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1675
0
{
1676
0
    return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
1677
0
}
1678
1679
/**
1680
 * xmlFreeParserCtxt:
1681
 * @ctxt:  an XML parser context
1682
 *
1683
 * Free all the memory used by a parser context. However the parsed
1684
 * document in ctxt->myDoc is not freed.
1685
 */
1686
1687
void
1688
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1689
1.91M
{
1690
1.91M
    xmlParserInputPtr input;
1691
1692
1.91M
    if (ctxt == NULL) return;
1693
1694
3.33M
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1695
1.41M
        xmlFreeInputStream(input);
1696
1.41M
    }
1697
1.91M
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1698
1.91M
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1699
1.91M
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1700
1.91M
    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
1701
1.91M
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1702
1.91M
    if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1703
1.91M
    if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1704
1.91M
    if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1705
1.91M
    if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1706
1.91M
#ifdef LIBXML_SAX1_ENABLED
1707
1.91M
    if ((ctxt->sax != NULL) &&
1708
1.91M
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1709
#else
1710
    if (ctxt->sax != NULL)
1711
#endif /* LIBXML_SAX1_ENABLED */
1712
1.91M
        xmlFree(ctxt->sax);
1713
1.91M
    if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1714
1.91M
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1715
1.91M
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1716
1.91M
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1717
1.91M
    if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1718
1.91M
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1719
1.91M
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1720
1.91M
    if (ctxt->attsDefault != NULL)
1721
95.2k
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
1722
1.91M
    if (ctxt->attsSpecial != NULL)
1723
133k
        xmlHashFree(ctxt->attsSpecial, NULL);
1724
1.91M
    if (ctxt->freeElems != NULL) {
1725
144k
        xmlNodePtr cur, next;
1726
1727
144k
  cur = ctxt->freeElems;
1728
289k
  while (cur != NULL) {
1729
144k
      next = cur->next;
1730
144k
      xmlFree(cur);
1731
144k
      cur = next;
1732
144k
  }
1733
144k
    }
1734
1.91M
    if (ctxt->freeAttrs != NULL) {
1735
73.2k
        xmlAttrPtr cur, next;
1736
1737
73.2k
  cur = ctxt->freeAttrs;
1738
146k
  while (cur != NULL) {
1739
73.2k
      next = cur->next;
1740
73.2k
      xmlFree(cur);
1741
73.2k
      cur = next;
1742
73.2k
  }
1743
73.2k
    }
1744
    /*
1745
     * cleanup the error strings
1746
     */
1747
1.91M
    if (ctxt->lastError.message != NULL)
1748
910k
        xmlFree(ctxt->lastError.message);
1749
1.91M
    if (ctxt->lastError.file != NULL)
1750
597k
        xmlFree(ctxt->lastError.file);
1751
1.91M
    if (ctxt->lastError.str1 != NULL)
1752
445k
        xmlFree(ctxt->lastError.str1);
1753
1.91M
    if (ctxt->lastError.str2 != NULL)
1754
54.2k
        xmlFree(ctxt->lastError.str2);
1755
1.91M
    if (ctxt->lastError.str3 != NULL)
1756
5.09k
        xmlFree(ctxt->lastError.str3);
1757
1758
1.91M
#ifdef LIBXML_CATALOG_ENABLED
1759
1.91M
    if (ctxt->catalogs != NULL)
1760
0
  xmlCatalogFreeLocal(ctxt->catalogs);
1761
1.91M
#endif
1762
1.91M
    xmlFree(ctxt);
1763
1.91M
}
1764
1765
/**
1766
 * xmlNewParserCtxt:
1767
 *
1768
 * Allocate and initialize a new parser context.
1769
 *
1770
 * Returns the xmlParserCtxtPtr or NULL
1771
 */
1772
1773
xmlParserCtxtPtr
1774
xmlNewParserCtxt(void)
1775
699k
{
1776
699k
    return(xmlNewSAXParserCtxt(NULL, NULL));
1777
699k
}
1778
1779
/**
1780
 * xmlNewSAXParserCtxt:
1781
 * @sax:  SAX handler
1782
 * @userData:  user data
1783
 *
1784
 * Allocate and initialize a new SAX parser context. If userData is NULL,
1785
 * the parser context will be passed as user data.
1786
 *
1787
 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
1788
 */
1789
1790
xmlParserCtxtPtr
1791
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
1792
1.91M
{
1793
1.91M
    xmlParserCtxtPtr ctxt;
1794
1795
1.91M
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1796
1.91M
    if (ctxt == NULL) {
1797
0
  xmlErrMemory(NULL, "cannot allocate parser context\n");
1798
0
  return(NULL);
1799
0
    }
1800
1.91M
    memset(ctxt, 0, sizeof(xmlParserCtxt));
1801
1.91M
    if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
1802
0
        xmlFreeParserCtxt(ctxt);
1803
0
  return(NULL);
1804
0
    }
1805
1.91M
    return(ctxt);
1806
1.91M
}
1807
1808
/************************************************************************
1809
 *                  *
1810
 *    Handling of node information        *
1811
 *                  *
1812
 ************************************************************************/
1813
1814
/**
1815
 * xmlClearParserCtxt:
1816
 * @ctxt:  an XML parser context
1817
 *
1818
 * Clear (release owned resources) and reinitialize a parser context
1819
 */
1820
1821
void
1822
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1823
0
{
1824
0
  if (ctxt==NULL)
1825
0
    return;
1826
0
  xmlClearNodeInfoSeq(&ctxt->node_seq);
1827
0
  xmlCtxtReset(ctxt);
1828
0
}
1829
1830
1831
/**
1832
 * xmlParserFindNodeInfo:
1833
 * @ctx:  an XML parser context
1834
 * @node:  an XML node within the tree
1835
 *
1836
 * DEPRECATED: Don't use.
1837
 *
1838
 * Find the parser node info struct for a given node
1839
 *
1840
 * Returns an xmlParserNodeInfo block pointer or NULL
1841
 */
1842
const xmlParserNodeInfo *
1843
xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1844
0
{
1845
0
    unsigned long pos;
1846
1847
0
    if ((ctx == NULL) || (node == NULL))
1848
0
        return (NULL);
1849
    /* Find position where node should be at */
1850
0
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1851
0
    if (pos < ctx->node_seq.length
1852
0
        && ctx->node_seq.buffer[pos].node == node)
1853
0
        return &ctx->node_seq.buffer[pos];
1854
0
    else
1855
0
        return NULL;
1856
0
}
1857
1858
1859
/**
1860
 * xmlInitNodeInfoSeq:
1861
 * @seq:  a node info sequence pointer
1862
 *
1863
 * DEPRECATED: Don't use.
1864
 *
1865
 * -- Initialize (set to initial state) node info sequence
1866
 */
1867
void
1868
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1869
1.91M
{
1870
1.91M
    if (seq == NULL)
1871
0
        return;
1872
1.91M
    seq->length = 0;
1873
1.91M
    seq->maximum = 0;
1874
1.91M
    seq->buffer = NULL;
1875
1.91M
}
1876
1877
/**
1878
 * xmlClearNodeInfoSeq:
1879
 * @seq:  a node info sequence pointer
1880
 *
1881
 * DEPRECATED: Don't use.
1882
 *
1883
 * -- Clear (release memory and reinitialize) node
1884
 *   info sequence
1885
 */
1886
void
1887
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1888
0
{
1889
0
    if (seq == NULL)
1890
0
        return;
1891
0
    if (seq->buffer != NULL)
1892
0
        xmlFree(seq->buffer);
1893
0
    xmlInitNodeInfoSeq(seq);
1894
0
}
1895
1896
/**
1897
 * xmlParserFindNodeInfoIndex:
1898
 * @seq:  a node info sequence pointer
1899
 * @node:  an XML node pointer
1900
 *
1901
 * DEPRECATED: Don't use.
1902
 *
1903
 * xmlParserFindNodeInfoIndex : Find the index that the info record for
1904
 *   the given node is or should be at in a sorted sequence
1905
 *
1906
 * Returns a long indicating the position of the record
1907
 */
1908
unsigned long
1909
xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1910
                           const xmlNodePtr node)
1911
0
{
1912
0
    unsigned long upper, lower, middle;
1913
0
    int found = 0;
1914
1915
0
    if ((seq == NULL) || (node == NULL))
1916
0
        return ((unsigned long) -1);
1917
1918
    /* Do a binary search for the key */
1919
0
    lower = 1;
1920
0
    upper = seq->length;
1921
0
    middle = 0;
1922
0
    while (lower <= upper && !found) {
1923
0
        middle = lower + (upper - lower) / 2;
1924
0
        if (node == seq->buffer[middle - 1].node)
1925
0
            found = 1;
1926
0
        else if (node < seq->buffer[middle - 1].node)
1927
0
            upper = middle - 1;
1928
0
        else
1929
0
            lower = middle + 1;
1930
0
    }
1931
1932
    /* Return position */
1933
0
    if (middle == 0 || seq->buffer[middle - 1].node < node)
1934
0
        return middle;
1935
0
    else
1936
0
        return middle - 1;
1937
0
}
1938
1939
1940
/**
1941
 * xmlParserAddNodeInfo:
1942
 * @ctxt:  an XML parser context
1943
 * @info:  a node info sequence pointer
1944
 *
1945
 * DEPRECATED: Don't use.
1946
 *
1947
 * Insert node info record into the sorted sequence
1948
 */
1949
void
1950
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1951
                     const xmlParserNodeInfoPtr info)
1952
0
{
1953
0
    unsigned long pos;
1954
1955
0
    if ((ctxt == NULL) || (info == NULL)) return;
1956
1957
    /* Find pos and check to see if node is already in the sequence */
1958
0
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
1959
0
                                     info->node);
1960
1961
0
    if ((pos < ctxt->node_seq.length) &&
1962
0
        (ctxt->node_seq.buffer != NULL) &&
1963
0
        (ctxt->node_seq.buffer[pos].node == info->node)) {
1964
0
        ctxt->node_seq.buffer[pos] = *info;
1965
0
    }
1966
1967
    /* Otherwise, we need to add new node to buffer */
1968
0
    else {
1969
0
        if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
1970
0
      (ctxt->node_seq.buffer == NULL)) {
1971
0
            xmlParserNodeInfo *tmp_buffer;
1972
0
            unsigned int byte_size;
1973
1974
0
            if (ctxt->node_seq.maximum == 0)
1975
0
                ctxt->node_seq.maximum = 2;
1976
0
            byte_size = (sizeof(*ctxt->node_seq.buffer) *
1977
0
      (2 * ctxt->node_seq.maximum));
1978
1979
0
            if (ctxt->node_seq.buffer == NULL)
1980
0
                tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
1981
0
            else
1982
0
                tmp_buffer =
1983
0
                    (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
1984
0
                                                     byte_size);
1985
1986
0
            if (tmp_buffer == NULL) {
1987
0
    xmlErrMemory(ctxt, "failed to allocate buffer\n");
1988
0
                return;
1989
0
            }
1990
0
            ctxt->node_seq.buffer = tmp_buffer;
1991
0
            ctxt->node_seq.maximum *= 2;
1992
0
        }
1993
1994
        /* If position is not at end, move elements out of the way */
1995
0
        if (pos != ctxt->node_seq.length) {
1996
0
            unsigned long i;
1997
1998
0
            for (i = ctxt->node_seq.length; i > pos; i--)
1999
0
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2000
0
        }
2001
2002
        /* Copy element and increase length */
2003
0
        ctxt->node_seq.buffer[pos] = *info;
2004
0
        ctxt->node_seq.length++;
2005
0
    }
2006
0
}
2007
2008
/************************************************************************
2009
 *                  *
2010
 *    Defaults settings         *
2011
 *                  *
2012
 ************************************************************************/
2013
/**
2014
 * xmlPedanticParserDefault:
2015
 * @val:  int 0 or 1
2016
 *
2017
 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
2018
 *
2019
 * Set and return the previous value for enabling pedantic warnings.
2020
 *
2021
 * Returns the last value for 0 for no substitution, 1 for substitution.
2022
 */
2023
2024
int
2025
0
xmlPedanticParserDefault(int val) {
2026
0
    int old = xmlPedanticParserDefaultValue;
2027
2028
0
    xmlPedanticParserDefaultValue = val;
2029
0
    return(old);
2030
0
}
2031
2032
/**
2033
 * xmlLineNumbersDefault:
2034
 * @val:  int 0 or 1
2035
 *
2036
 * DEPRECATED: The modern options API always enables line numbers.
2037
 *
2038
 * Set and return the previous value for enabling line numbers in elements
2039
 * contents. This may break on old application and is turned off by default.
2040
 *
2041
 * Returns the last value for 0 for no substitution, 1 for substitution.
2042
 */
2043
2044
int
2045
0
xmlLineNumbersDefault(int val) {
2046
0
    int old = xmlLineNumbersDefaultValue;
2047
2048
0
    xmlLineNumbersDefaultValue = val;
2049
0
    return(old);
2050
0
}
2051
2052
/**
2053
 * xmlSubstituteEntitiesDefault:
2054
 * @val:  int 0 or 1
2055
 *
2056
 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
2057
 *
2058
 * Set and return the previous value for default entity support.
2059
 * Initially the parser always keep entity references instead of substituting
2060
 * entity values in the output. This function has to be used to change the
2061
 * default parser behavior
2062
 * SAX::substituteEntities() has to be used for changing that on a file by
2063
 * file basis.
2064
 *
2065
 * Returns the last value for 0 for no substitution, 1 for substitution.
2066
 */
2067
2068
int
2069
0
xmlSubstituteEntitiesDefault(int val) {
2070
0
    int old = xmlSubstituteEntitiesDefaultValue;
2071
2072
0
    xmlSubstituteEntitiesDefaultValue = val;
2073
0
    return(old);
2074
0
}
2075
2076
/**
2077
 * xmlKeepBlanksDefault:
2078
 * @val:  int 0 or 1
2079
 *
2080
 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
2081
 *
2082
 * Set and return the previous value for default blanks text nodes support.
2083
 * The 1.x version of the parser used an heuristic to try to detect
2084
 * ignorable white spaces. As a result the SAX callback was generating
2085
 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2086
 * using the DOM output text nodes containing those blanks were not generated.
2087
 * The 2.x and later version will switch to the XML standard way and
2088
 * ignorableWhitespace() are only generated when running the parser in
2089
 * validating mode and when the current element doesn't allow CDATA or
2090
 * mixed content.
2091
 * This function is provided as a way to force the standard behavior
2092
 * on 1.X libs and to switch back to the old mode for compatibility when
2093
 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2094
 * by using xmlIsBlankNode() commodity function to detect the "empty"
2095
 * nodes generated.
2096
 * This value also affect autogeneration of indentation when saving code
2097
 * if blanks sections are kept, indentation is not generated.
2098
 *
2099
 * Returns the last value for 0 for no substitution, 1 for substitution.
2100
 */
2101
2102
int
2103
0
xmlKeepBlanksDefault(int val) {
2104
0
    int old = xmlKeepBlanksDefaultValue;
2105
2106
0
    xmlKeepBlanksDefaultValue = val;
2107
0
    if (!val) xmlIndentTreeOutput = 1;
2108
0
    return(old);
2109
0
}
2110