Coverage Report

Created: 2023-03-26 06:14

/src/libxml2/parserInternals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * daniel@veillard.com
8
 */
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22
23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/valid.h>
28
#include <libxml/entities.h>
29
#include <libxml/xmlerror.h>
30
#include <libxml/encoding.h>
31
#include <libxml/valid.h>
32
#include <libxml/xmlIO.h>
33
#include <libxml/uri.h>
34
#include <libxml/dict.h>
35
#include <libxml/SAX.h>
36
#ifdef LIBXML_CATALOG_ENABLED
37
#include <libxml/catalog.h>
38
#endif
39
#include <libxml/globals.h>
40
#include <libxml/chvalid.h>
41
42
24.5M
#define CUR(ctxt) ctxt->input->cur
43
24.5M
#define END(ctxt) ctxt->input->end
44
24.5M
#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
45
46
#include "private/buf.h"
47
#include "private/enc.h"
48
#include "private/error.h"
49
#include "private/io.h"
50
#include "private/parser.h"
51
52
/*
53
 * Various global defaults for parsing
54
 */
55
56
/**
57
 * xmlCheckVersion:
58
 * @version: the include version number
59
 *
60
 * check the compiled lib version against the include one.
61
 * This can warn or immediately kill the application
62
 */
63
void
64
0
xmlCheckVersion(int version) {
65
0
    int myversion = LIBXML_VERSION;
66
67
0
    xmlInitParser();
68
69
0
    if ((myversion / 10000) != (version / 10000)) {
70
0
  xmlGenericError(xmlGenericErrorContext,
71
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
72
0
    (version / 10000), (myversion / 10000));
73
0
  fprintf(stderr,
74
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
75
0
    (version / 10000), (myversion / 10000));
76
0
    }
77
0
    if ((myversion / 100) < (version / 100)) {
78
0
  xmlGenericError(xmlGenericErrorContext,
79
0
    "Warning: program compiled against libxml %d using older %d\n",
80
0
    (version / 100), (myversion / 100));
81
0
    }
82
0
}
83
84
85
/************************************************************************
86
 *                  *
87
 *    Some factorized error routines        *
88
 *                  *
89
 ************************************************************************/
90
91
92
/**
93
 * xmlErrMemory:
94
 * @ctxt:  an XML parser context
95
 * @extra:  extra information
96
 *
97
 * Handle a redefinition of attribute error
98
 */
99
void
100
xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
101
2.42k
{
102
2.42k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
103
2.42k
        (ctxt->instate == XML_PARSER_EOF))
104
271
  return;
105
2.15k
    if (ctxt != NULL) {
106
1.61k
        ctxt->errNo = XML_ERR_NO_MEMORY;
107
1.61k
        ctxt->instate = XML_PARSER_EOF;
108
1.61k
        ctxt->disableSAX = 1;
109
1.61k
    }
110
2.15k
    if (extra)
111
808
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
112
808
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
113
808
                        NULL, NULL, 0, 0,
114
808
                        "Memory allocation failed : %s\n", extra);
115
1.34k
    else
116
1.34k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
117
1.34k
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
118
1.34k
                        NULL, NULL, 0, 0, "Memory allocation failed\n");
119
2.15k
}
120
121
/**
122
 * __xmlErrEncoding:
123
 * @ctxt:  an XML parser context
124
 * @xmlerr:  the error number
125
 * @msg:  the error message
126
 * @str1:  an string info
127
 * @str2:  an string info
128
 *
129
 * Handle an encoding error
130
 */
131
void
132
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
133
                 const char *msg, const xmlChar * str1, const xmlChar * str2)
134
37.6k
{
135
37.6k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
136
37.6k
        (ctxt->instate == XML_PARSER_EOF))
137
0
  return;
138
37.6k
    if (ctxt != NULL)
139
37.6k
        ctxt->errNo = xmlerr;
140
37.6k
    __xmlRaiseError(NULL, NULL, NULL,
141
37.6k
                    ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
142
37.6k
                    NULL, 0, (const char *) str1, (const char *) str2,
143
37.6k
                    NULL, 0, 0, msg, str1, str2);
144
37.6k
    if (ctxt != NULL) {
145
37.6k
        ctxt->wellFormed = 0;
146
37.6k
        if (ctxt->recovery == 0)
147
37.6k
            ctxt->disableSAX = 1;
148
37.6k
    }
149
37.6k
}
150
151
/**
152
 * xmlErrInternal:
153
 * @ctxt:  an XML parser context
154
 * @msg:  the error message
155
 * @str:  error information
156
 *
157
 * Handle an internal error
158
 */
159
static void LIBXML_ATTR_FORMAT(2,0)
160
xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
161
12.3k
{
162
12.3k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
163
12.3k
        (ctxt->instate == XML_PARSER_EOF))
164
1
  return;
165
12.3k
    if (ctxt != NULL)
166
12.3k
        ctxt->errNo = XML_ERR_INTERNAL_ERROR;
167
12.3k
    __xmlRaiseError(NULL, NULL, NULL,
168
12.3k
                    ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
169
12.3k
                    XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
170
12.3k
                    0, 0, msg, str);
171
12.3k
    if (ctxt != NULL) {
172
12.3k
        ctxt->wellFormed = 0;
173
12.3k
        if (ctxt->recovery == 0)
174
12.3k
            ctxt->disableSAX = 1;
175
12.3k
    }
176
12.3k
}
177
178
/**
179
 * xmlErrEncodingInt:
180
 * @ctxt:  an XML parser context
181
 * @error:  the error number
182
 * @msg:  the error message
183
 * @val:  an integer value
184
 *
185
 * n encoding error
186
 */
187
static void LIBXML_ATTR_FORMAT(3,0)
188
xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
189
                  const char *msg, int val)
190
384k
{
191
384k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
192
384k
        (ctxt->instate == XML_PARSER_EOF))
193
0
  return;
194
384k
    if (ctxt != NULL)
195
384k
        ctxt->errNo = error;
196
384k
    __xmlRaiseError(NULL, NULL, NULL,
197
384k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
198
384k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
199
384k
    if (ctxt != NULL) {
200
384k
        ctxt->wellFormed = 0;
201
384k
        if (ctxt->recovery == 0)
202
150k
            ctxt->disableSAX = 1;
203
384k
    }
204
384k
}
205
206
/**
207
 * xmlIsLetter:
208
 * @c:  an unicode character (int)
209
 *
210
 * Check whether the character is allowed by the production
211
 * [84] Letter ::= BaseChar | Ideographic
212
 *
213
 * Returns 0 if not, non-zero otherwise
214
 */
215
int
216
0
xmlIsLetter(int c) {
217
0
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
218
0
}
219
220
/************************************************************************
221
 *                  *
222
 *    Input handling functions for progressive parsing  *
223
 *                  *
224
 ************************************************************************/
225
226
/* #define DEBUG_INPUT */
227
/* #define DEBUG_STACK */
228
/* #define DEBUG_PUSH */
229
230
231
/* we need to keep enough input to show errors in context */
232
39.0k
#define LINE_LEN        80
233
234
#ifdef DEBUG_INPUT
235
#define CHECK_BUFFER(in) check_buffer(in)
236
237
static
238
void check_buffer(xmlParserInputPtr in) {
239
    if (in->base != xmlBufContent(in->buf->buffer)) {
240
        xmlGenericError(xmlGenericErrorContext,
241
    "xmlParserInput: base mismatch problem\n");
242
    }
243
    if (in->cur < in->base) {
244
        xmlGenericError(xmlGenericErrorContext,
245
    "xmlParserInput: cur < base problem\n");
246
    }
247
    if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
248
        xmlGenericError(xmlGenericErrorContext,
249
    "xmlParserInput: cur > base + use problem\n");
250
    }
251
    xmlGenericError(xmlGenericErrorContext,"buffer %p : content %x, cur %d, use %d\n",
252
            (void *) in, (int) xmlBufContent(in->buf->buffer),
253
            in->cur - in->base, xmlBufUse(in->buf->buffer));
254
}
255
256
#else
257
#define CHECK_BUFFER(in)
258
#endif
259
260
261
/**
262
 * xmlHaltParser:
263
 * @ctxt:  an XML parser context
264
 *
265
 * Blocks further parser processing don't override error
266
 * for internal use
267
 */
268
void
269
46.2k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
270
46.2k
    if (ctxt == NULL)
271
0
        return;
272
46.2k
    ctxt->instate = XML_PARSER_EOF;
273
46.2k
    ctxt->disableSAX = 1;
274
50.4k
    while (ctxt->inputNr > 1)
275
4.21k
        xmlFreeInputStream(inputPop(ctxt));
276
46.2k
    if (ctxt->input != NULL) {
277
        /*
278
   * in case there was a specific allocation deallocate before
279
   * overriding base
280
   */
281
46.2k
        if (ctxt->input->free != NULL) {
282
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
283
0
      ctxt->input->free = NULL;
284
0
  }
285
46.2k
        if (ctxt->input->buf != NULL) {
286
45.5k
            xmlFreeParserInputBuffer(ctxt->input->buf);
287
45.5k
            ctxt->input->buf = NULL;
288
45.5k
        }
289
46.2k
  ctxt->input->cur = BAD_CAST"";
290
46.2k
        ctxt->input->length = 0;
291
46.2k
  ctxt->input->base = ctxt->input->cur;
292
46.2k
        ctxt->input->end = ctxt->input->cur;
293
46.2k
    }
294
46.2k
}
295
296
/**
297
 * xmlParserInputRead:
298
 * @in:  an XML parser input
299
 * @len:  an indicative size for the lookahead
300
 *
301
 * DEPRECATED: This function was internal and is deprecated.
302
 *
303
 * Returns -1 as this is an error to use it.
304
 */
305
int
306
0
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
307
0
    return(-1);
308
0
}
309
310
/**
311
 * xmlParserGrow:
312
 * @ctxt:  an XML parser context
313
 */
314
int
315
15.8M
xmlParserGrow(xmlParserCtxtPtr ctxt) {
316
15.8M
    xmlParserInputPtr in = ctxt->input;
317
15.8M
    xmlParserInputBufferPtr buf = in->buf;
318
15.8M
    ptrdiff_t curEnd = in->end - in->cur;
319
15.8M
    ptrdiff_t curBase = in->cur - in->base;
320
15.8M
    int ret;
321
322
15.8M
    if (buf == NULL)
323
3.34M
        return(0);
324
    /* Don't grow memory buffers. */
325
12.5M
    if ((buf->encoder == NULL) && (buf->readcallback == NULL))
326
8.09M
        return(0);
327
328
4.41M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
329
4.41M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
330
4.41M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
331
4
        xmlErrInternal(ctxt, "Huge input lookup", NULL);
332
4
        xmlHaltParser(ctxt);
333
4
  return(-1);
334
4
    }
335
336
4.41M
    if (curEnd >= INPUT_CHUNK)
337
9.22k
        return(0);
338
339
4.40M
    ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
340
4.40M
    xmlBufSetInputBaseCur(buf->buffer, in, 0, curBase);
341
342
    /* TODO: Get error code from xmlParserInputBufferGrow */
343
4.40M
    if (ret < 0) {
344
11.8k
        xmlErrInternal(ctxt, "Growing input buffer", NULL);
345
11.8k
        xmlHaltParser(ctxt);
346
11.8k
    }
347
348
4.40M
    return(ret);
349
4.41M
}
350
351
/**
352
 * xmlParserInputGrow:
353
 * @in:  an XML parser input
354
 * @len:  an indicative size for the lookahead
355
 *
356
 * DEPRECATED: Don't use.
357
 *
358
 * This function increase the input for the parser. It tries to
359
 * preserve pointers to the input buffer, and keep already read data
360
 *
361
 * Returns the amount of char read, or -1 in case of error, 0 indicate the
362
 * end of this entity
363
 */
364
int
365
0
xmlParserInputGrow(xmlParserInputPtr in, int len) {
366
0
    int ret;
367
0
    size_t indx;
368
369
0
    if ((in == NULL) || (len < 0)) return(-1);
370
#ifdef DEBUG_INPUT
371
    xmlGenericError(xmlGenericErrorContext, "Grow\n");
372
#endif
373
0
    if (in->buf == NULL) return(-1);
374
0
    if (in->base == NULL) return(-1);
375
0
    if (in->cur == NULL) return(-1);
376
0
    if (in->buf->buffer == NULL) return(-1);
377
378
    /* Don't grow memory buffers. */
379
0
    if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
380
0
        return(0);
381
382
0
    CHECK_BUFFER(in);
383
384
0
    indx = in->cur - in->base;
385
0
    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
386
387
0
  CHECK_BUFFER(in);
388
389
0
        return(0);
390
0
    }
391
0
    ret = xmlParserInputBufferGrow(in->buf, len);
392
393
0
    in->base = xmlBufContent(in->buf->buffer);
394
0
    if (in->base == NULL) {
395
0
        in->base = BAD_CAST "";
396
0
        in->cur = in->base;
397
0
        in->end = in->base;
398
0
        return(-1);
399
0
    }
400
0
    in->cur = in->base + indx;
401
0
    in->end = xmlBufEnd(in->buf->buffer);
402
403
0
    CHECK_BUFFER(in);
404
405
0
    return(ret);
406
0
}
407
408
/**
409
 * xmlParserShrink:
410
 * @ctxt:  an XML parser context
411
 */
412
void
413
595k
xmlParserShrink(xmlParserCtxtPtr ctxt) {
414
595k
    xmlParserInputPtr in = ctxt->input;
415
595k
    xmlParserInputBufferPtr buf = in->buf;
416
595k
    size_t used;
417
418
    /* Don't shrink memory buffers. */
419
595k
    if ((buf == NULL) ||
420
595k
        ((buf->encoder == NULL) && (buf->readcallback == NULL)))
421
556k
        return;
422
423
39.0k
    used = in->cur - in->base;
424
    /*
425
     * Do not shrink on large buffers whose only a tiny fraction
426
     * was consumed
427
     */
428
39.0k
    if (used > INPUT_CHUNK) {
429
39.0k
  size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
430
431
39.0k
  if (res > 0) {
432
39.0k
            used -= res;
433
39.0k
            if ((res > ULONG_MAX) ||
434
39.0k
                (in->consumed > ULONG_MAX - (unsigned long)res))
435
0
                in->consumed = ULONG_MAX;
436
39.0k
            else
437
39.0k
                in->consumed += res;
438
39.0k
  }
439
39.0k
    }
440
441
39.0k
    xmlBufSetInputBaseCur(buf->buffer, in, 0, used);
442
39.0k
}
443
444
/**
445
 * xmlParserInputShrink:
446
 * @in:  an XML parser input
447
 *
448
 * DEPRECATED: Don't use.
449
 *
450
 * This function removes used input for the parser.
451
 */
452
void
453
0
xmlParserInputShrink(xmlParserInputPtr in) {
454
0
    size_t used;
455
0
    size_t ret;
456
457
#ifdef DEBUG_INPUT
458
    xmlGenericError(xmlGenericErrorContext, "Shrink\n");
459
#endif
460
0
    if (in == NULL) return;
461
0
    if (in->buf == NULL) return;
462
0
    if (in->base == NULL) return;
463
0
    if (in->cur == NULL) return;
464
0
    if (in->buf->buffer == NULL) return;
465
466
0
    CHECK_BUFFER(in);
467
468
0
    used = in->cur - in->base;
469
    /*
470
     * Do not shrink on large buffers whose only a tiny fraction
471
     * was consumed
472
     */
473
0
    if (used > INPUT_CHUNK) {
474
0
  ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
475
0
  if (ret > 0) {
476
0
            used -= ret;
477
0
            if ((ret > ULONG_MAX) ||
478
0
                (in->consumed > ULONG_MAX - (unsigned long)ret))
479
0
                in->consumed = ULONG_MAX;
480
0
            else
481
0
                in->consumed += ret;
482
0
  }
483
0
    }
484
485
0
    if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
486
0
        xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
487
0
    }
488
489
0
    in->base = xmlBufContent(in->buf->buffer);
490
0
    if (in->base == NULL) {
491
        /* TODO: raise error */
492
0
        in->base = BAD_CAST "";
493
0
        in->cur = in->base;
494
0
        in->end = in->base;
495
0
        return;
496
0
    }
497
0
    in->cur = in->base + used;
498
0
    in->end = xmlBufEnd(in->buf->buffer);
499
500
0
    CHECK_BUFFER(in);
501
0
}
502
503
/************************************************************************
504
 *                  *
505
 *    UTF8 character input and related functions    *
506
 *                  *
507
 ************************************************************************/
508
509
/**
510
 * xmlNextChar:
511
 * @ctxt:  the XML parser context
512
 *
513
 * Skip to the next char input char.
514
 */
515
516
void
517
xmlNextChar(xmlParserCtxtPtr ctxt)
518
24.5M
{
519
24.5M
    if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
520
24.5M
        (ctxt->input == NULL))
521
99
        return;
522
523
24.5M
    if (!(VALID_CTXT(ctxt))) {
524
0
        xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
525
0
  ctxt->errNo = XML_ERR_INTERNAL_ERROR;
526
0
        xmlStopParser(ctxt);
527
0
  return;
528
0
    }
529
530
24.5M
    if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) {
531
2.36M
        if (xmlParserGrow(ctxt) < 0)
532
1.57k
            return;
533
2.35M
        if (ctxt->input->cur >= ctxt->input->end)
534
634
            return;
535
2.35M
    }
536
537
24.5M
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
538
19.1M
        const unsigned char *cur;
539
19.1M
        unsigned char c;
540
541
        /*
542
         *   2.11 End-of-Line Handling
543
         *   the literal two-character sequence "#xD#xA" or a standalone
544
         *   literal #xD, an XML processor must pass to the application
545
         *   the single character #xA.
546
         */
547
19.1M
        if (*(ctxt->input->cur) == '\n') {
548
393k
            ctxt->input->line++; ctxt->input->col = 1;
549
393k
        } else
550
18.7M
            ctxt->input->col++;
551
552
        /*
553
         * We are supposed to handle UTF8, check it's valid
554
         * From rfc2044: encoding of the Unicode values on UTF-8:
555
         *
556
         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
557
         * 0000 0000-0000 007F   0xxxxxxx
558
         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
559
         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
560
         *
561
         * Check for the 0x110000 limit too
562
         */
563
19.1M
        cur = ctxt->input->cur;
564
565
19.1M
        c = *cur;
566
19.1M
        if (c & 0x80) {
567
7.74M
            size_t avail;
568
569
7.74M
            if (c == 0xC0)
570
263
          goto encoding_error;
571
572
7.74M
            avail = ctxt->input->end - ctxt->input->cur;
573
574
7.74M
            if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
575
5.33k
                goto encoding_error;
576
7.73M
            if ((c & 0xe0) == 0xe0) {
577
6.41M
                unsigned int val;
578
579
6.41M
                if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
580
643
                    goto encoding_error;
581
6.41M
                if ((c & 0xf0) == 0xf0) {
582
6.90k
                    if (((c & 0xf8) != 0xf0) ||
583
6.90k
                        (avail < 4) || ((cur[3] & 0xc0) != 0x80))
584
648
                        goto encoding_error;
585
                    /* 4-byte code */
586
6.25k
                    ctxt->input->cur += 4;
587
6.25k
                    val = (cur[0] & 0x7) << 18;
588
6.25k
                    val |= (cur[1] & 0x3f) << 12;
589
6.25k
                    val |= (cur[2] & 0x3f) << 6;
590
6.25k
                    val |= cur[3] & 0x3f;
591
6.41M
                } else {
592
                    /* 3-byte code */
593
6.41M
                    ctxt->input->cur += 3;
594
6.41M
                    val = (cur[0] & 0xf) << 12;
595
6.41M
                    val |= (cur[1] & 0x3f) << 6;
596
6.41M
                    val |= cur[2] & 0x3f;
597
6.41M
                }
598
6.41M
                if (((val > 0xd7ff) && (val < 0xe000)) ||
599
6.41M
                    ((val > 0xfffd) && (val < 0x10000)) ||
600
6.41M
                    (val >= 0x110000)) {
601
171k
    xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
602
171k
          "Char 0x%X out of allowed range\n",
603
171k
          val);
604
171k
                }
605
6.41M
            } else
606
                /* 2-byte code */
607
1.31M
                ctxt->input->cur += 2;
608
7.73M
        } else
609
            /* 1-byte code */
610
11.4M
            ctxt->input->cur++;
611
19.1M
    } else {
612
        /*
613
         * Assume it's a fixed length encoding (1) with
614
         * a compatible encoding for the ASCII set, since
615
         * XML constructs only use < 128 chars
616
         */
617
618
5.42M
        if (*(ctxt->input->cur) == '\n') {
619
158k
            ctxt->input->line++; ctxt->input->col = 1;
620
158k
        } else
621
5.26M
            ctxt->input->col++;
622
5.42M
        ctxt->input->cur++;
623
5.42M
    }
624
24.5M
    return;
625
24.5M
encoding_error:
626
    /*
627
     * If we detect an UTF8 error that probably mean that the
628
     * input encoding didn't get properly advertised in the
629
     * declaration header. Report the error and switch the encoding
630
     * to ISO-Latin-1 (if you don't like this policy, just declare the
631
     * encoding !)
632
     */
633
6.89k
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
634
6.89k
        (ctxt->input->end - ctxt->input->cur < 4)) {
635
1.53k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
636
1.53k
         "Input is not proper UTF-8, indicate encoding !\n",
637
1.53k
         NULL, NULL);
638
5.36k
    } else {
639
5.36k
        char buffer[150];
640
641
5.36k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
642
5.36k
      ctxt->input->cur[0], ctxt->input->cur[1],
643
5.36k
      ctxt->input->cur[2], ctxt->input->cur[3]);
644
5.36k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
645
5.36k
         "Input is not proper UTF-8, indicate encoding !\n%s",
646
5.36k
         BAD_CAST buffer, NULL);
647
5.36k
    }
648
6.89k
    ctxt->charset = XML_CHAR_ENCODING_8859_1;
649
6.89k
    ctxt->input->cur++;
650
6.89k
    return;
651
24.5M
}
652
653
/**
654
 * xmlCurrentChar:
655
 * @ctxt:  the XML parser context
656
 * @len:  pointer to the length of the char read
657
 *
658
 * The current char value, if using UTF-8 this may actually span multiple
659
 * bytes in the input buffer. Implement the end of line normalization:
660
 * 2.11 End-of-Line Handling
661
 * Wherever an external parsed entity or the literal entity value
662
 * of an internal parsed entity contains either the literal two-character
663
 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
664
 * must pass to the application the single character #xA.
665
 * This behavior can conveniently be produced by normalizing all
666
 * line breaks to #xA on input, before parsing.)
667
 *
668
 * Returns the current char value and its length
669
 */
670
671
int
672
983M
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
673
983M
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
674
983M
    if (ctxt->instate == XML_PARSER_EOF)
675
1.91k
  return(0);
676
677
983M
    if ((ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) &&
678
983M
        (xmlParserGrow(ctxt) < 0))
679
5.19k
        return(0);
680
681
983M
    if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
682
159M
      *len = 1;
683
159M
      return(*ctxt->input->cur);
684
159M
    }
685
823M
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
686
  /*
687
   * We are supposed to handle UTF8, check it's valid
688
   * From rfc2044: encoding of the Unicode values on UTF-8:
689
   *
690
   * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
691
   * 0000 0000-0000 007F   0xxxxxxx
692
   * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
693
   * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
694
   *
695
   * Check for the 0x110000 limit too
696
   */
697
810M
  const unsigned char *cur = ctxt->input->cur;
698
810M
  unsigned char c;
699
810M
  unsigned int val;
700
701
810M
  c = *cur;
702
810M
  if (c & 0x80) {
703
731M
            size_t avail;
704
705
731M
      if (((c & 0x40) == 0) || (c == 0xC0))
706
8.09k
    goto encoding_error;
707
708
731M
            avail = ctxt->input->end - ctxt->input->cur;
709
710
731M
      if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
711
21.3k
    goto encoding_error;
712
731M
      if ((c & 0xe0) == 0xe0) {
713
650M
    if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
714
795
        goto encoding_error;
715
650M
    if ((c & 0xf0) == 0xf0) {
716
71.4k
        if (((c & 0xf8) != 0xf0) ||
717
71.4k
      (avail < 4) || ((cur[3] & 0xc0) != 0x80))
718
787
      goto encoding_error;
719
        /* 4-byte code */
720
70.6k
        *len = 4;
721
70.6k
        val = (cur[0] & 0x7) << 18;
722
70.6k
        val |= (cur[1] & 0x3f) << 12;
723
70.6k
        val |= (cur[2] & 0x3f) << 6;
724
70.6k
        val |= cur[3] & 0x3f;
725
70.6k
        if (val < 0x10000)
726
247
      goto encoding_error;
727
650M
    } else {
728
      /* 3-byte code */
729
650M
        *len = 3;
730
650M
        val = (cur[0] & 0xf) << 12;
731
650M
        val |= (cur[1] & 0x3f) << 6;
732
650M
        val |= cur[2] & 0x3f;
733
650M
        if (val < 0x800)
734
288
      goto encoding_error;
735
650M
    }
736
650M
      } else {
737
        /* 2-byte code */
738
80.7M
    *len = 2;
739
80.7M
    val = (cur[0] & 0x1f) << 6;
740
80.7M
    val |= cur[1] & 0x3f;
741
80.7M
    if (val < 0x80)
742
270
        goto encoding_error;
743
80.7M
      }
744
731M
      if (!IS_CHAR(val)) {
745
194k
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
746
194k
          "Char 0x%X out of allowed range\n", val);
747
194k
      }
748
731M
      return(val);
749
731M
  } else {
750
      /* 1-byte code */
751
79.2M
      *len = 1;
752
79.2M
      if ((*ctxt->input->cur == 0) &&
753
79.2M
          (ctxt->input->end > ctxt->input->cur)) {
754
17.7k
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
755
17.7k
          "Char 0x0 out of allowed range\n", 0);
756
17.7k
      }
757
79.2M
      if (*ctxt->input->cur == 0xD) {
758
1.58M
    if (ctxt->input->cur[1] == 0xA) {
759
20.2k
        ctxt->input->cur++;
760
20.2k
    }
761
1.58M
    return(0xA);
762
1.58M
      }
763
77.6M
      return(*ctxt->input->cur);
764
79.2M
  }
765
810M
    }
766
    /*
767
     * Assume it's a fixed length encoding (1) with
768
     * a compatible encoding for the ASCII set, since
769
     * XML constructs only use < 128 chars
770
     */
771
13.0M
    *len = 1;
772
13.0M
    if (*ctxt->input->cur == 0xD) {
773
72.0k
  if (ctxt->input->cur[1] == 0xA) {
774
2.97k
      ctxt->input->cur++;
775
2.97k
  }
776
72.0k
  return(0xA);
777
72.0k
    }
778
12.9M
    return(*ctxt->input->cur);
779
31.8k
encoding_error:
780
    /*
781
     * An encoding problem may arise from a truncated input buffer
782
     * splitting a character in the middle. In that case do not raise
783
     * an error but return 0 to indicate an end of stream problem
784
     */
785
31.8k
    if (ctxt->input->end - ctxt->input->cur < 4) {
786
1.61k
  *len = 0;
787
1.61k
  return(0);
788
1.61k
    }
789
790
    /*
791
     * If we detect an UTF8 error that probably mean that the
792
     * input encoding didn't get properly advertised in the
793
     * declaration header. Report the error and switch the encoding
794
     * to ISO-Latin-1 (if you don't like this policy, just declare the
795
     * encoding !)
796
     */
797
30.2k
    {
798
30.2k
        char buffer[150];
799
800
30.2k
  snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
801
30.2k
      ctxt->input->cur[0], ctxt->input->cur[1],
802
30.2k
      ctxt->input->cur[2], ctxt->input->cur[3]);
803
30.2k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
804
30.2k
         "Input is not proper UTF-8, indicate encoding !\n%s",
805
30.2k
         BAD_CAST buffer, NULL);
806
30.2k
    }
807
30.2k
    ctxt->charset = XML_CHAR_ENCODING_8859_1;
808
30.2k
    *len = 1;
809
30.2k
    return(*ctxt->input->cur);
810
31.8k
}
811
812
/**
813
 * xmlStringCurrentChar:
814
 * @ctxt:  the XML parser context
815
 * @cur:  pointer to the beginning of the char
816
 * @len:  pointer to the length of the char read
817
 *
818
 * The current char value, if using UTF-8 this may actually span multiple
819
 * bytes in the input buffer.
820
 *
821
 * Returns the current char value and its length
822
 */
823
824
int
825
xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
826
427M
{
827
427M
    if ((len == NULL) || (cur == NULL)) return(0);
828
427M
    if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
829
        /*
830
         * We are supposed to handle UTF8, check it's valid
831
         * From rfc2044: encoding of the Unicode values on UTF-8:
832
         *
833
         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
834
         * 0000 0000-0000 007F   0xxxxxxx
835
         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
836
         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
837
         *
838
         * Check for the 0x110000 limit too
839
         */
840
420M
        unsigned char c;
841
420M
        unsigned int val;
842
843
420M
        c = *cur;
844
420M
        if (c & 0x80) {
845
397M
            if ((cur[1] & 0xc0) != 0x80)
846
73
                goto encoding_error;
847
397M
            if ((c & 0xe0) == 0xe0) {
848
849
395M
                if ((cur[2] & 0xc0) != 0x80)
850
0
                    goto encoding_error;
851
395M
                if ((c & 0xf0) == 0xf0) {
852
30.5k
                    if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
853
0
                        goto encoding_error;
854
                    /* 4-byte code */
855
30.5k
                    *len = 4;
856
30.5k
                    val = (cur[0] & 0x7) << 18;
857
30.5k
                    val |= (cur[1] & 0x3f) << 12;
858
30.5k
                    val |= (cur[2] & 0x3f) << 6;
859
30.5k
                    val |= cur[3] & 0x3f;
860
395M
                } else {
861
                    /* 3-byte code */
862
395M
                    *len = 3;
863
395M
                    val = (cur[0] & 0xf) << 12;
864
395M
                    val |= (cur[1] & 0x3f) << 6;
865
395M
                    val |= cur[2] & 0x3f;
866
395M
                }
867
395M
            } else {
868
                /* 2-byte code */
869
1.13M
                *len = 2;
870
1.13M
                val = (cur[0] & 0x1f) << 6;
871
1.13M
                val |= cur[1] & 0x3f;
872
1.13M
            }
873
397M
            if (!IS_CHAR(val)) {
874
0
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
875
0
          "Char 0x%X out of allowed range\n", val);
876
0
            }
877
397M
            return (val);
878
397M
        } else {
879
            /* 1-byte code */
880
23.0M
            *len = 1;
881
23.0M
            return (*cur);
882
23.0M
        }
883
420M
    }
884
    /*
885
     * Assume it's a fixed length encoding (1) with
886
     * a compatible encoding for the ASCII set, since
887
     * XML constructs only use < 128 chars
888
     */
889
7.31M
    *len = 1;
890
7.31M
    return (*cur);
891
73
encoding_error:
892
893
    /*
894
     * An encoding problem may arise from a truncated input buffer
895
     * splitting a character in the middle. In that case do not raise
896
     * an error but return 0 to indicate an end of stream problem
897
     */
898
73
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
899
73
        (ctxt->input->end - ctxt->input->cur < 4)) {
900
73
  *len = 0;
901
73
  return(0);
902
73
    }
903
    /*
904
     * If we detect an UTF8 error that probably mean that the
905
     * input encoding didn't get properly advertised in the
906
     * declaration header. Report the error and switch the encoding
907
     * to ISO-Latin-1 (if you don't like this policy, just declare the
908
     * encoding !)
909
     */
910
0
    {
911
0
        char buffer[150];
912
913
0
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
914
0
      ctxt->input->cur[0], ctxt->input->cur[1],
915
0
      ctxt->input->cur[2], ctxt->input->cur[3]);
916
0
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
917
0
         "Input is not proper UTF-8, indicate encoding !\n%s",
918
0
         BAD_CAST buffer, NULL);
919
0
    }
920
0
    *len = 1;
921
0
    return (*cur);
922
73
}
923
924
/**
925
 * xmlCopyCharMultiByte:
926
 * @out:  pointer to an array of xmlChar
927
 * @val:  the char value
928
 *
929
 * append the char value in the array
930
 *
931
 * Returns the number of xmlChar written
932
 */
933
int
934
1.03G
xmlCopyCharMultiByte(xmlChar *out, int val) {
935
1.03G
    if ((out == NULL) || (val < 0)) return(0);
936
    /*
937
     * We are supposed to handle UTF8, check it's valid
938
     * From rfc2044: encoding of the Unicode values on UTF-8:
939
     *
940
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
941
     * 0000 0000-0000 007F   0xxxxxxx
942
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
943
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
944
     */
945
1.03G
    if  (val >= 0x80) {
946
1.03G
  xmlChar *savedout = out;
947
1.03G
  int bits;
948
1.03G
  if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
949
953M
  else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
950
134k
  else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
951
0
  else {
952
0
      xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
953
0
        "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
954
0
            val);
955
0
      return(0);
956
0
  }
957
3.02G
  for ( ; bits >= 0; bits-= 6)
958
1.99G
      *out++= ((val >> bits) & 0x3F) | 0x80 ;
959
1.03G
  return (out - savedout);
960
1.03G
    }
961
42.5k
    *out = val;
962
42.5k
    return 1;
963
1.03G
}
964
965
/**
966
 * xmlCopyChar:
967
 * @len:  Ignored, compatibility
968
 * @out:  pointer to an array of xmlChar
969
 * @val:  the char value
970
 *
971
 * append the char value in the array
972
 *
973
 * Returns the number of xmlChar written
974
 */
975
976
int
977
2.64M
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
978
2.64M
    if ((out == NULL) || (val < 0)) return(0);
979
    /* the len parameter is ignored */
980
2.64M
    if  (val >= 0x80) {
981
2.54M
  return(xmlCopyCharMultiByte (out, val));
982
2.54M
    }
983
99.2k
    *out = val;
984
99.2k
    return 1;
985
2.64M
}
986
987
/************************************************************************
988
 *                  *
989
 *    Commodity functions to switch encodings     *
990
 *                  *
991
 ************************************************************************/
992
993
static xmlCharEncodingHandlerPtr
994
722
xmlDetectEBCDIC(xmlParserInputPtr input) {
995
722
    xmlChar out[200];
996
722
    xmlCharEncodingHandlerPtr handler;
997
722
    int inlen, outlen, res, i;
998
999
    /*
1000
     * To detect the EBCDIC code page, we convert the first 200 bytes
1001
     * to EBCDIC-US and try to find the encoding declaration.
1002
     */
1003
722
    handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC);
1004
722
    if (handler == NULL)
1005
1
        return(NULL);
1006
721
    outlen = sizeof(out);
1007
721
    inlen = input->end - input->cur;
1008
721
    res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen, 0);
1009
721
    if (res < 0)
1010
249
        return(handler);
1011
1012
19.3k
    for (i = 0; i < outlen; i++) {
1013
19.2k
        if (out[i] == '>')
1014
2
            break;
1015
19.2k
        if ((out[i] == 'e') &&
1016
19.2k
            (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1017
363
            int start, cur, quote;
1018
1019
363
            i += 8;
1020
363
            while (IS_BLANK_CH(out[i]))
1021
113
                i += 1;
1022
363
            if (out[i++] != '=')
1023
12
                break;
1024
351
            while (IS_BLANK_CH(out[i]))
1025
12
                i += 1;
1026
351
            quote = out[i++];
1027
351
            if ((quote != '\'') && (quote != '"'))
1028
311
                break;
1029
40
            start = i;
1030
40
            cur = out[i];
1031
1.27k
            while (((cur >= 'a') && (cur <= 'z')) ||
1032
1.27k
                   ((cur >= 'A') && (cur <= 'Z')) ||
1033
1.27k
                   ((cur >= '0') && (cur <= '9')) ||
1034
1.27k
                   (cur == '.') || (cur == '_') ||
1035
1.27k
                   (cur == '-'))
1036
1.23k
                cur = out[++i];
1037
40
            if (cur != quote)
1038
32
                break;
1039
8
            out[i] = 0;
1040
8
            xmlCharEncCloseFunc(handler);
1041
8
            handler = xmlFindCharEncodingHandler((char *) out + start);
1042
8
            break;
1043
40
        }
1044
19.2k
    }
1045
1046
472
    return(handler);
1047
721
}
1048
1049
/**
1050
 * xmlSwitchEncoding:
1051
 * @ctxt:  the parser context
1052
 * @enc:  the encoding value (number)
1053
 *
1054
 * change the input functions when discovering the character encoding
1055
 * of a given entity.
1056
 *
1057
 * Returns 0 in case of success, -1 otherwise
1058
 */
1059
int
1060
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1061
115k
{
1062
115k
    xmlCharEncodingHandlerPtr handler;
1063
115k
    int ret;
1064
1065
115k
    if (ctxt == NULL) return(-1);
1066
115k
    switch (enc) {
1067
0
  case XML_CHAR_ENCODING_ERROR:
1068
0
      __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
1069
0
                     "encoding unknown\n", NULL, NULL);
1070
0
      return(-1);
1071
0
  case XML_CHAR_ENCODING_NONE:
1072
      /* let's assume it's UTF-8 without the XML decl */
1073
0
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
1074
0
      return(0);
1075
111k
  case XML_CHAR_ENCODING_UTF8:
1076
      /* default encoding, no conversion should be needed */
1077
111k
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
1078
1079
      /*
1080
       * Errata on XML-1.0 June 20 2001
1081
       * Specific handling of the Byte Order Mark for
1082
       * UTF-8
1083
       */
1084
111k
      if ((ctxt->input != NULL) &&
1085
111k
    (ctxt->input->cur[0] == 0xEF) &&
1086
111k
    (ctxt->input->cur[1] == 0xBB) &&
1087
111k
    (ctxt->input->cur[2] == 0xBF)) {
1088
5.93k
    ctxt->input->cur += 3;
1089
5.93k
      }
1090
111k
      return(0);
1091
722
        case XML_CHAR_ENCODING_EBCDIC:
1092
722
            handler = xmlDetectEBCDIC(ctxt->input);
1093
722
            break;
1094
4.00k
        default:
1095
4.00k
            handler = xmlGetCharEncodingHandler(enc);
1096
4.00k
            break;
1097
115k
    }
1098
4.72k
    if (handler == NULL) {
1099
  /*
1100
   * Default handlers.
1101
   */
1102
587
  switch (enc) {
1103
0
      case XML_CHAR_ENCODING_ASCII:
1104
    /* default encoding, no conversion should be needed */
1105
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1106
0
    return(0);
1107
0
      case XML_CHAR_ENCODING_8859_1:
1108
0
    if ((ctxt->inputNr == 1) &&
1109
0
        (ctxt->encoding == NULL) &&
1110
0
        (ctxt->input != NULL) &&
1111
0
        (ctxt->input->encoding != NULL)) {
1112
0
        ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1113
0
    }
1114
0
    ctxt->charset = enc;
1115
0
    return(0);
1116
587
      default:
1117
587
    __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1118
587
                        "encoding not supported: %s\n",
1119
587
      BAD_CAST xmlGetCharEncodingName(enc), NULL);
1120
                /*
1121
                 * TODO: We could recover from errors in external entities
1122
                 * if we didn't stop the parser. But most callers of this
1123
                 * function don't check the return value.
1124
                 */
1125
587
                xmlStopParser(ctxt);
1126
587
                return(-1);
1127
587
        }
1128
587
    }
1129
4.14k
    ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1130
4.14k
    if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
1131
        /*
1132
   * on encoding conversion errors, stop the parser
1133
   */
1134
20
        xmlStopParser(ctxt);
1135
20
  ctxt->errNo = XML_I18N_CONV_FAILED;
1136
20
    }
1137
4.14k
    return(ret);
1138
4.72k
}
1139
1140
/**
1141
 * xmlSwitchInputEncoding:
1142
 * @ctxt:  the parser context
1143
 * @input:  the input stream
1144
 * @handler:  the encoding handler
1145
 *
1146
 * change the input functions when discovering the character encoding
1147
 * of a given entity.
1148
 *
1149
 * Returns 0 in case of success, -1 otherwise
1150
 */
1151
int
1152
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1153
                       xmlCharEncodingHandlerPtr handler)
1154
70.3k
{
1155
70.3k
    int nbchars;
1156
70.3k
    xmlParserInputBufferPtr in;
1157
1158
70.3k
    if (handler == NULL)
1159
0
        return (-1);
1160
70.3k
    if (input == NULL)
1161
0
        return (-1);
1162
70.3k
    in = input->buf;
1163
70.3k
    if (in == NULL) {
1164
0
  xmlErrInternal(ctxt,
1165
0
                "static memory buffer doesn't support encoding\n", NULL);
1166
        /*
1167
         * Callers assume that the input buffer takes ownership of the
1168
         * encoding handler. xmlCharEncCloseFunc frees unregistered
1169
         * handlers and avoids a memory leak.
1170
         */
1171
0
        xmlCharEncCloseFunc(handler);
1172
0
  return (-1);
1173
0
    }
1174
1175
70.3k
    if (in->encoder != NULL) {
1176
        /*
1177
         * TODO: Detect encoding mismatch. We should start by comparing
1178
         * in->encoder->name and handler->name, but there are a few
1179
         * compatible encodings like UTF-16 and UCS-2 or UTF-32 and UCS-4.
1180
         */
1181
0
        xmlCharEncCloseFunc(handler);
1182
0
        return (0);
1183
0
    }
1184
1185
70.3k
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1186
70.3k
    in->encoder = handler;
1187
1188
    /*
1189
     * Is there already some content down the pipe to convert ?
1190
     */
1191
70.3k
    if (xmlBufIsEmpty(in->buffer) == 0) {
1192
70.3k
        size_t processed, use, consumed;
1193
1194
        /*
1195
         * Specific handling of the Byte Order Mark for
1196
         * UTF-16
1197
         */
1198
70.3k
        if ((handler->name != NULL) &&
1199
70.3k
            (!strcmp(handler->name, "UTF-16LE") ||
1200
70.3k
             !strcmp(handler->name, "UTF-16")) &&
1201
70.3k
            (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1202
75
            input->cur += 2;
1203
75
        }
1204
70.3k
        if ((handler->name != NULL) &&
1205
70.3k
            (!strcmp(handler->name, "UTF-16BE")) &&
1206
70.3k
            (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1207
658
            input->cur += 2;
1208
658
        }
1209
        /*
1210
         * Errata on XML-1.0 June 20 2001
1211
         * Specific handling of the Byte Order Mark for
1212
         * UTF-8
1213
         */
1214
70.3k
        if ((handler->name != NULL) &&
1215
70.3k
            (!strcmp(handler->name, "UTF-8")) &&
1216
70.3k
            (input->cur[0] == 0xEF) &&
1217
70.3k
            (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1218
0
            input->cur += 3;
1219
0
        }
1220
1221
        /*
1222
         * Shrink the current input buffer.
1223
         * Move it as the raw buffer and create a new input buffer
1224
         */
1225
70.3k
        processed = input->cur - input->base;
1226
70.3k
        xmlBufShrink(in->buffer, processed);
1227
70.3k
        input->consumed += processed;
1228
70.3k
        in->raw = in->buffer;
1229
70.3k
        in->buffer = xmlBufCreate();
1230
70.3k
        in->rawconsumed = processed;
1231
70.3k
        use = xmlBufUse(in->raw);
1232
1233
70.3k
        nbchars = xmlCharEncInput(in, 0);
1234
70.3k
        xmlBufResetInput(in->buffer, input);
1235
70.3k
        if (nbchars < 0) {
1236
449
            xmlErrInternal(ctxt,
1237
449
                           "switching encoding: encoder error\n",
1238
449
                           NULL);
1239
449
            return (-1);
1240
449
        }
1241
69.8k
        consumed = use - xmlBufUse(in->raw);
1242
69.8k
        if ((consumed > ULONG_MAX) ||
1243
69.8k
            (in->rawconsumed > ULONG_MAX - (unsigned long)consumed))
1244
0
            in->rawconsumed = ULONG_MAX;
1245
69.8k
        else
1246
69.8k
      in->rawconsumed += consumed;
1247
69.8k
    }
1248
69.8k
    return (0);
1249
70.3k
}
1250
1251
/**
1252
 * xmlSwitchToEncoding:
1253
 * @ctxt:  the parser context
1254
 * @handler:  the encoding handler
1255
 *
1256
 * change the input functions when discovering the character encoding
1257
 * of a given entity.
1258
 *
1259
 * Returns 0 in case of success, -1 otherwise
1260
 */
1261
int
1262
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1263
66.1k
{
1264
66.1k
    if (ctxt == NULL)
1265
0
        return(-1);
1266
66.1k
    return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
1267
66.1k
}
1268
1269
/************************************************************************
1270
 *                  *
1271
 *  Commodity functions to handle entities processing   *
1272
 *                  *
1273
 ************************************************************************/
1274
1275
/**
1276
 * xmlFreeInputStream:
1277
 * @input:  an xmlParserInputPtr
1278
 *
1279
 * Free up an input stream.
1280
 */
1281
void
1282
267k
xmlFreeInputStream(xmlParserInputPtr input) {
1283
267k
    if (input == NULL) return;
1284
1285
263k
    if (input->filename != NULL) xmlFree((char *) input->filename);
1286
263k
    if (input->directory != NULL) xmlFree((char *) input->directory);
1287
263k
    if (input->encoding != NULL) xmlFree((char *) input->encoding);
1288
263k
    if (input->version != NULL) xmlFree((char *) input->version);
1289
263k
    if ((input->free != NULL) && (input->base != NULL))
1290
0
        input->free((xmlChar *) input->base);
1291
263k
    if (input->buf != NULL)
1292
170k
        xmlFreeParserInputBuffer(input->buf);
1293
263k
    xmlFree(input);
1294
263k
}
1295
1296
/**
1297
 * xmlNewInputStream:
1298
 * @ctxt:  an XML parser context
1299
 *
1300
 * Create a new input stream structure.
1301
 *
1302
 * Returns the new input stream or NULL
1303
 */
1304
xmlParserInputPtr
1305
263k
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1306
263k
    xmlParserInputPtr input;
1307
1308
263k
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1309
263k
    if (input == NULL) {
1310
34
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1311
34
  return(NULL);
1312
34
    }
1313
263k
    memset(input, 0, sizeof(xmlParserInput));
1314
263k
    input->line = 1;
1315
263k
    input->col = 1;
1316
263k
    input->standalone = -1;
1317
1318
    /*
1319
     * If the context is NULL the id cannot be initialized, but that
1320
     * should not happen while parsing which is the situation where
1321
     * the id is actually needed.
1322
     */
1323
263k
    if (ctxt != NULL) {
1324
263k
        if (input->id >= INT_MAX) {
1325
0
            xmlErrMemory(ctxt, "Input ID overflow\n");
1326
0
            return(NULL);
1327
0
        }
1328
263k
        input->id = ctxt->input_id++;
1329
263k
    }
1330
1331
263k
    return(input);
1332
263k
}
1333
1334
/**
1335
 * xmlNewIOInputStream:
1336
 * @ctxt:  an XML parser context
1337
 * @input:  an I/O Input
1338
 * @enc:  the charset encoding if known
1339
 *
1340
 * Create a new input stream structure encapsulating the @input into
1341
 * a stream suitable for the parser.
1342
 *
1343
 * Returns the new input stream or NULL
1344
 */
1345
xmlParserInputPtr
1346
xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1347
0
              xmlCharEncoding enc) {
1348
0
    xmlParserInputPtr inputStream;
1349
1350
0
    if (input == NULL) return(NULL);
1351
0
    if (xmlParserDebugEntities)
1352
0
  xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1353
0
    inputStream = xmlNewInputStream(ctxt);
1354
0
    if (inputStream == NULL) {
1355
0
  return(NULL);
1356
0
    }
1357
0
    inputStream->filename = NULL;
1358
0
    inputStream->buf = input;
1359
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1360
1361
0
    if (enc != XML_CHAR_ENCODING_NONE) {
1362
0
        xmlSwitchEncoding(ctxt, enc);
1363
0
    }
1364
1365
0
    return(inputStream);
1366
0
}
1367
1368
/**
1369
 * xmlNewEntityInputStream:
1370
 * @ctxt:  an XML parser context
1371
 * @entity:  an Entity pointer
1372
 *
1373
 * Create a new input stream based on an xmlEntityPtr
1374
 *
1375
 * Returns the new input stream or NULL
1376
 */
1377
xmlParserInputPtr
1378
69.5k
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1379
69.5k
    xmlParserInputPtr input;
1380
1381
69.5k
    if (entity == NULL) {
1382
0
        xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1383
0
                 NULL);
1384
0
  return(NULL);
1385
0
    }
1386
69.5k
    if (xmlParserDebugEntities)
1387
0
  xmlGenericError(xmlGenericErrorContext,
1388
0
    "new input from entity: %s\n", entity->name);
1389
69.5k
    if (entity->content == NULL) {
1390
21.5k
  switch (entity->etype) {
1391
0
            case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1392
0
          xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1393
0
                   entity->name);
1394
0
                break;
1395
0
            case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1396
21.5k
            case XML_EXTERNAL_PARAMETER_ENTITY:
1397
21.5k
    input = xmlLoadExternalEntity((char *) entity->URI,
1398
21.5k
           (char *) entity->ExternalID, ctxt);
1399
21.5k
                if (input != NULL)
1400
17.8k
                    input->entity = entity;
1401
21.5k
                return(input);
1402
0
            case XML_INTERNAL_GENERAL_ENTITY:
1403
0
          xmlErrInternal(ctxt,
1404
0
          "Internal entity %s without content !\n",
1405
0
                   entity->name);
1406
0
                break;
1407
35
            case XML_INTERNAL_PARAMETER_ENTITY:
1408
35
          xmlErrInternal(ctxt,
1409
35
          "Internal parameter entity %s without content !\n",
1410
35
                   entity->name);
1411
35
                break;
1412
0
            case XML_INTERNAL_PREDEFINED_ENTITY:
1413
0
          xmlErrInternal(ctxt,
1414
0
          "Predefined entity %s without content !\n",
1415
0
                   entity->name);
1416
0
                break;
1417
21.5k
  }
1418
35
  return(NULL);
1419
21.5k
    }
1420
48.0k
    input = xmlNewInputStream(ctxt);
1421
48.0k
    if (input == NULL) {
1422
20
  return(NULL);
1423
20
    }
1424
47.9k
    if (entity->URI != NULL)
1425
0
  input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1426
47.9k
    input->base = entity->content;
1427
47.9k
    if (entity->length == 0)
1428
202
        entity->length = xmlStrlen(entity->content);
1429
47.9k
    input->cur = entity->content;
1430
47.9k
    input->length = entity->length;
1431
47.9k
    input->end = &entity->content[input->length];
1432
47.9k
    input->entity = entity;
1433
47.9k
    return(input);
1434
48.0k
}
1435
1436
/**
1437
 * xmlNewStringInputStream:
1438
 * @ctxt:  an XML parser context
1439
 * @buffer:  an memory buffer
1440
 *
1441
 * Create a new input stream based on a memory buffer.
1442
 * Returns the new input stream
1443
 */
1444
xmlParserInputPtr
1445
0
xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1446
0
    xmlParserInputPtr input;
1447
0
    xmlParserInputBufferPtr buf;
1448
1449
0
    if (buffer == NULL) {
1450
0
        xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1451
0
                 NULL);
1452
0
  return(NULL);
1453
0
    }
1454
0
    if (xmlParserDebugEntities)
1455
0
  xmlGenericError(xmlGenericErrorContext,
1456
0
    "new fixed input: %.30s\n", buffer);
1457
0
    buf = xmlParserInputBufferCreateMem((const char *) buffer,
1458
0
                                        xmlStrlen(buffer),
1459
0
                                        XML_CHAR_ENCODING_NONE);
1460
0
    if (buf == NULL) {
1461
0
  xmlErrMemory(ctxt, NULL);
1462
0
        return(NULL);
1463
0
    }
1464
0
    input = xmlNewInputStream(ctxt);
1465
0
    if (input == NULL) {
1466
0
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1467
0
  xmlFreeParserInputBuffer(buf);
1468
0
  return(NULL);
1469
0
    }
1470
0
    input->buf = buf;
1471
0
    xmlBufResetInput(input->buf->buffer, input);
1472
0
    return(input);
1473
0
}
1474
1475
/**
1476
 * xmlNewInputFromFile:
1477
 * @ctxt:  an XML parser context
1478
 * @filename:  the filename to use as entity
1479
 *
1480
 * Create a new input stream based on a file or an URL.
1481
 *
1482
 * Returns the new input stream or NULL in case of error
1483
 */
1484
xmlParserInputPtr
1485
0
xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1486
0
    xmlParserInputBufferPtr buf;
1487
0
    xmlParserInputPtr inputStream;
1488
0
    char *directory = NULL;
1489
0
    xmlChar *URI = NULL;
1490
1491
0
    if (xmlParserDebugEntities)
1492
0
  xmlGenericError(xmlGenericErrorContext,
1493
0
    "new input from file: %s\n", filename);
1494
0
    if (ctxt == NULL) return(NULL);
1495
0
    buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1496
0
    if (buf == NULL) {
1497
0
  if (filename == NULL)
1498
0
      __xmlLoaderErr(ctxt,
1499
0
                     "failed to load external entity: NULL filename \n",
1500
0
         NULL);
1501
0
  else
1502
0
      __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1503
0
         (const char *) filename);
1504
0
  return(NULL);
1505
0
    }
1506
1507
0
    inputStream = xmlNewInputStream(ctxt);
1508
0
    if (inputStream == NULL) {
1509
0
  xmlFreeParserInputBuffer(buf);
1510
0
  return(NULL);
1511
0
    }
1512
1513
0
    inputStream->buf = buf;
1514
0
    inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1515
0
    if (inputStream == NULL)
1516
0
        return(NULL);
1517
1518
0
    if (inputStream->filename == NULL)
1519
0
  URI = xmlStrdup((xmlChar *) filename);
1520
0
    else
1521
0
  URI = xmlStrdup((xmlChar *) inputStream->filename);
1522
0
    directory = xmlParserGetDirectory((const char *) URI);
1523
0
    if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1524
0
    inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1525
0
    if (URI != NULL) xmlFree((char *) URI);
1526
0
    inputStream->directory = directory;
1527
1528
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1529
0
    if ((ctxt->directory == NULL) && (directory != NULL))
1530
0
        ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1531
0
    return(inputStream);
1532
0
}
1533
1534
/************************************************************************
1535
 *                  *
1536
 *    Commodity functions to handle parser contexts   *
1537
 *                  *
1538
 ************************************************************************/
1539
1540
/**
1541
 * xmlInitSAXParserCtxt:
1542
 * @ctxt:  XML parser context
1543
 * @sax:  SAX handlert
1544
 * @userData:  user data
1545
 *
1546
 * Initialize a SAX parser context
1547
 *
1548
 * Returns 0 in case of success and -1 in case of error
1549
 */
1550
1551
static int
1552
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
1553
                     void *userData)
1554
370k
{
1555
370k
    xmlParserInputPtr input;
1556
1557
370k
    if(ctxt==NULL) {
1558
0
        xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1559
0
        return(-1);
1560
0
    }
1561
1562
370k
    xmlInitParser();
1563
1564
370k
    if (ctxt->dict == NULL)
1565
370k
  ctxt->dict = xmlDictCreate();
1566
370k
    if (ctxt->dict == NULL) {
1567
43
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1568
43
  return(-1);
1569
43
    }
1570
369k
    xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1571
1572
369k
    if (ctxt->sax == NULL)
1573
369k
  ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1574
369k
    if (ctxt->sax == NULL) {
1575
28
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1576
28
  return(-1);
1577
28
    }
1578
369k
    if (sax == NULL) {
1579
360k
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1580
360k
        xmlSAXVersion(ctxt->sax, 2);
1581
360k
        ctxt->userData = ctxt;
1582
360k
    } else {
1583
9.45k
  if (sax->initialized == XML_SAX2_MAGIC) {
1584
9.45k
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
1585
9.45k
        } else {
1586
0
      memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1587
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
1588
0
        }
1589
9.45k
        ctxt->userData = userData ? userData : ctxt;
1590
9.45k
    }
1591
1592
369k
    ctxt->maxatts = 0;
1593
369k
    ctxt->atts = NULL;
1594
    /* Allocate the Input stack */
1595
369k
    if (ctxt->inputTab == NULL) {
1596
369k
  ctxt->inputTab = (xmlParserInputPtr *)
1597
369k
        xmlMalloc(5 * sizeof(xmlParserInputPtr));
1598
369k
  ctxt->inputMax = 5;
1599
369k
    }
1600
369k
    if (ctxt->inputTab == NULL) {
1601
20
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1602
20
  ctxt->inputNr = 0;
1603
20
  ctxt->inputMax = 0;
1604
20
  ctxt->input = NULL;
1605
20
  return(-1);
1606
20
    }
1607
369k
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1608
0
        xmlFreeInputStream(input);
1609
0
    }
1610
369k
    ctxt->inputNr = 0;
1611
369k
    ctxt->input = NULL;
1612
1613
369k
    ctxt->version = NULL;
1614
369k
    ctxt->encoding = NULL;
1615
369k
    ctxt->standalone = -1;
1616
369k
    ctxt->hasExternalSubset = 0;
1617
369k
    ctxt->hasPErefs = 0;
1618
369k
    ctxt->html = 0;
1619
369k
    ctxt->external = 0;
1620
369k
    ctxt->instate = XML_PARSER_START;
1621
369k
    ctxt->token = 0;
1622
369k
    ctxt->directory = NULL;
1623
1624
    /* Allocate the Node stack */
1625
369k
    if (ctxt->nodeTab == NULL) {
1626
369k
  ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1627
369k
  ctxt->nodeMax = 10;
1628
369k
    }
1629
369k
    if (ctxt->nodeTab == NULL) {
1630
15
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1631
15
  ctxt->nodeNr = 0;
1632
15
  ctxt->nodeMax = 0;
1633
15
  ctxt->node = NULL;
1634
15
  ctxt->inputNr = 0;
1635
15
  ctxt->inputMax = 0;
1636
15
  ctxt->input = NULL;
1637
15
  return(-1);
1638
15
    }
1639
369k
    ctxt->nodeNr = 0;
1640
369k
    ctxt->node = NULL;
1641
1642
    /* Allocate the Name stack */
1643
369k
    if (ctxt->nameTab == NULL) {
1644
369k
  ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1645
369k
  ctxt->nameMax = 10;
1646
369k
    }
1647
369k
    if (ctxt->nameTab == NULL) {
1648
17
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1649
17
  ctxt->nodeNr = 0;
1650
17
  ctxt->nodeMax = 0;
1651
17
  ctxt->node = NULL;
1652
17
  ctxt->inputNr = 0;
1653
17
  ctxt->inputMax = 0;
1654
17
  ctxt->input = NULL;
1655
17
  ctxt->nameNr = 0;
1656
17
  ctxt->nameMax = 0;
1657
17
  ctxt->name = NULL;
1658
17
  return(-1);
1659
17
    }
1660
369k
    ctxt->nameNr = 0;
1661
369k
    ctxt->name = NULL;
1662
1663
    /* Allocate the space stack */
1664
369k
    if (ctxt->spaceTab == NULL) {
1665
369k
  ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1666
369k
  ctxt->spaceMax = 10;
1667
369k
    }
1668
369k
    if (ctxt->spaceTab == NULL) {
1669
24
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1670
24
  ctxt->nodeNr = 0;
1671
24
  ctxt->nodeMax = 0;
1672
24
  ctxt->node = NULL;
1673
24
  ctxt->inputNr = 0;
1674
24
  ctxt->inputMax = 0;
1675
24
  ctxt->input = NULL;
1676
24
  ctxt->nameNr = 0;
1677
24
  ctxt->nameMax = 0;
1678
24
  ctxt->name = NULL;
1679
24
  ctxt->spaceNr = 0;
1680
24
  ctxt->spaceMax = 0;
1681
24
  ctxt->space = NULL;
1682
24
  return(-1);
1683
24
    }
1684
369k
    ctxt->spaceNr = 1;
1685
369k
    ctxt->spaceMax = 10;
1686
369k
    ctxt->spaceTab[0] = -1;
1687
369k
    ctxt->space = &ctxt->spaceTab[0];
1688
369k
    ctxt->myDoc = NULL;
1689
369k
    ctxt->wellFormed = 1;
1690
369k
    ctxt->nsWellFormed = 1;
1691
369k
    ctxt->valid = 1;
1692
369k
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1693
369k
    if (ctxt->loadsubset) {
1694
0
        ctxt->options |= XML_PARSE_DTDLOAD;
1695
0
    }
1696
369k
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
1697
369k
    ctxt->pedantic = xmlPedanticParserDefaultValue;
1698
369k
    if (ctxt->pedantic) {
1699
0
        ctxt->options |= XML_PARSE_PEDANTIC;
1700
0
    }
1701
369k
    ctxt->linenumbers = xmlLineNumbersDefaultValue;
1702
369k
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1703
369k
    if (ctxt->keepBlanks == 0) {
1704
0
  ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1705
0
  ctxt->options |= XML_PARSE_NOBLANKS;
1706
0
    }
1707
1708
369k
    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
1709
369k
    ctxt->vctxt.userData = ctxt;
1710
369k
    ctxt->vctxt.error = xmlParserValidityError;
1711
369k
    ctxt->vctxt.warning = xmlParserValidityWarning;
1712
369k
    if (ctxt->validate) {
1713
0
  if (xmlGetWarningsDefaultValue == 0)
1714
0
      ctxt->vctxt.warning = NULL;
1715
0
  else
1716
0
      ctxt->vctxt.warning = xmlParserValidityWarning;
1717
0
  ctxt->vctxt.nodeMax = 0;
1718
0
        ctxt->options |= XML_PARSE_DTDVALID;
1719
0
    }
1720
369k
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1721
369k
    if (ctxt->replaceEntities) {
1722
0
        ctxt->options |= XML_PARSE_NOENT;
1723
0
    }
1724
369k
    ctxt->record_info = 0;
1725
369k
    ctxt->checkIndex = 0;
1726
369k
    ctxt->inSubset = 0;
1727
369k
    ctxt->errNo = XML_ERR_OK;
1728
369k
    ctxt->depth = 0;
1729
369k
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1730
369k
    ctxt->catalogs = NULL;
1731
369k
    ctxt->sizeentities = 0;
1732
369k
    ctxt->sizeentcopy = 0;
1733
369k
    ctxt->input_id = 1;
1734
369k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
1735
369k
    return(0);
1736
369k
}
1737
1738
/**
1739
 * xmlInitParserCtxt:
1740
 * @ctxt:  an XML parser context
1741
 *
1742
 * DEPRECATED: Internal function which will be made private in a future
1743
 * version.
1744
 *
1745
 * Initialize a parser context
1746
 *
1747
 * Returns 0 in case of success and -1 in case of error
1748
 */
1749
1750
int
1751
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1752
0
{
1753
0
    return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
1754
0
}
1755
1756
/**
1757
 * xmlFreeParserCtxt:
1758
 * @ctxt:  an XML parser context
1759
 *
1760
 * Free all the memory used by a parser context. However the parsed
1761
 * document in ctxt->myDoc is not freed.
1762
 */
1763
1764
void
1765
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1766
370k
{
1767
370k
    xmlParserInputPtr input;
1768
1769
370k
    if (ctxt == NULL) return;
1770
1771
565k
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1772
195k
        xmlFreeInputStream(input);
1773
195k
    }
1774
370k
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1775
370k
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1776
370k
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1777
370k
    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
1778
370k
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1779
370k
    if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1780
370k
    if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1781
370k
    if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1782
370k
    if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1783
#ifdef LIBXML_SAX1_ENABLED
1784
    if ((ctxt->sax != NULL) &&
1785
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1786
#else
1787
370k
    if (ctxt->sax != NULL)
1788
369k
#endif /* LIBXML_SAX1_ENABLED */
1789
369k
        xmlFree(ctxt->sax);
1790
370k
    if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1791
370k
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1792
370k
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1793
370k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1794
370k
    if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1795
370k
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1796
370k
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1797
370k
    if (ctxt->attsDefault != NULL)
1798
14.6k
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
1799
370k
    if (ctxt->attsSpecial != NULL)
1800
15.0k
        xmlHashFree(ctxt->attsSpecial, NULL);
1801
370k
    if (ctxt->freeElems != NULL) {
1802
0
        xmlNodePtr cur, next;
1803
1804
0
  cur = ctxt->freeElems;
1805
0
  while (cur != NULL) {
1806
0
      next = cur->next;
1807
0
      xmlFree(cur);
1808
0
      cur = next;
1809
0
  }
1810
0
    }
1811
370k
    if (ctxt->freeAttrs != NULL) {
1812
0
        xmlAttrPtr cur, next;
1813
1814
0
  cur = ctxt->freeAttrs;
1815
0
  while (cur != NULL) {
1816
0
      next = cur->next;
1817
0
      xmlFree(cur);
1818
0
      cur = next;
1819
0
  }
1820
0
    }
1821
    /*
1822
     * cleanup the error strings
1823
     */
1824
370k
    if (ctxt->lastError.message != NULL)
1825
158k
        xmlFree(ctxt->lastError.message);
1826
370k
    if (ctxt->lastError.file != NULL)
1827
155k
        xmlFree(ctxt->lastError.file);
1828
370k
    if (ctxt->lastError.str1 != NULL)
1829
105k
        xmlFree(ctxt->lastError.str1);
1830
370k
    if (ctxt->lastError.str2 != NULL)
1831
41.2k
        xmlFree(ctxt->lastError.str2);
1832
370k
    if (ctxt->lastError.str3 != NULL)
1833
16.7k
        xmlFree(ctxt->lastError.str3);
1834
1835
370k
#ifdef LIBXML_CATALOG_ENABLED
1836
370k
    if (ctxt->catalogs != NULL)
1837
326
  xmlCatalogFreeLocal(ctxt->catalogs);
1838
370k
#endif
1839
370k
    xmlFree(ctxt);
1840
370k
}
1841
1842
/**
1843
 * xmlNewParserCtxt:
1844
 *
1845
 * Allocate and initialize a new parser context.
1846
 *
1847
 * Returns the xmlParserCtxtPtr or NULL
1848
 */
1849
1850
xmlParserCtxtPtr
1851
xmlNewParserCtxt(void)
1852
360k
{
1853
360k
    return(xmlNewSAXParserCtxt(NULL, NULL));
1854
360k
}
1855
1856
/**
1857
 * xmlNewSAXParserCtxt:
1858
 * @sax:  SAX handler
1859
 * @userData:  user data
1860
 *
1861
 * Allocate and initialize a new SAX parser context. If userData is NULL,
1862
 * the parser context will be passed as user data.
1863
 *
1864
 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
1865
 */
1866
1867
xmlParserCtxtPtr
1868
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
1869
370k
{
1870
370k
    xmlParserCtxtPtr ctxt;
1871
1872
370k
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1873
370k
    if (ctxt == NULL) {
1874
394
  xmlErrMemory(NULL, "cannot allocate parser context\n");
1875
394
  return(NULL);
1876
394
    }
1877
370k
    memset(ctxt, 0, sizeof(xmlParserCtxt));
1878
370k
    if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
1879
147
        xmlFreeParserCtxt(ctxt);
1880
147
  return(NULL);
1881
147
    }
1882
369k
    return(ctxt);
1883
370k
}
1884
1885
/************************************************************************
1886
 *                  *
1887
 *    Handling of node information        *
1888
 *                  *
1889
 ************************************************************************/
1890
1891
/**
1892
 * xmlClearParserCtxt:
1893
 * @ctxt:  an XML parser context
1894
 *
1895
 * Clear (release owned resources) and reinitialize a parser context
1896
 */
1897
1898
void
1899
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1900
0
{
1901
0
  if (ctxt==NULL)
1902
0
    return;
1903
0
  xmlClearNodeInfoSeq(&ctxt->node_seq);
1904
0
  xmlCtxtReset(ctxt);
1905
0
}
1906
1907
1908
/**
1909
 * xmlParserFindNodeInfo:
1910
 * @ctx:  an XML parser context
1911
 * @node:  an XML node within the tree
1912
 *
1913
 * DEPRECATED: Don't use.
1914
 *
1915
 * Find the parser node info struct for a given node
1916
 *
1917
 * Returns an xmlParserNodeInfo block pointer or NULL
1918
 */
1919
const xmlParserNodeInfo *
1920
xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1921
0
{
1922
0
    unsigned long pos;
1923
1924
0
    if ((ctx == NULL) || (node == NULL))
1925
0
        return (NULL);
1926
    /* Find position where node should be at */
1927
0
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1928
0
    if (pos < ctx->node_seq.length
1929
0
        && ctx->node_seq.buffer[pos].node == node)
1930
0
        return &ctx->node_seq.buffer[pos];
1931
0
    else
1932
0
        return NULL;
1933
0
}
1934
1935
1936
/**
1937
 * xmlInitNodeInfoSeq:
1938
 * @seq:  a node info sequence pointer
1939
 *
1940
 * DEPRECATED: Don't use.
1941
 *
1942
 * -- Initialize (set to initial state) node info sequence
1943
 */
1944
void
1945
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1946
369k
{
1947
369k
    if (seq == NULL)
1948
0
        return;
1949
369k
    seq->length = 0;
1950
369k
    seq->maximum = 0;
1951
369k
    seq->buffer = NULL;
1952
369k
}
1953
1954
/**
1955
 * xmlClearNodeInfoSeq:
1956
 * @seq:  a node info sequence pointer
1957
 *
1958
 * DEPRECATED: Don't use.
1959
 *
1960
 * -- Clear (release memory and reinitialize) node
1961
 *   info sequence
1962
 */
1963
void
1964
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1965
0
{
1966
0
    if (seq == NULL)
1967
0
        return;
1968
0
    if (seq->buffer != NULL)
1969
0
        xmlFree(seq->buffer);
1970
0
    xmlInitNodeInfoSeq(seq);
1971
0
}
1972
1973
/**
1974
 * xmlParserFindNodeInfoIndex:
1975
 * @seq:  a node info sequence pointer
1976
 * @node:  an XML node pointer
1977
 *
1978
 * DEPRECATED: Don't use.
1979
 *
1980
 * xmlParserFindNodeInfoIndex : Find the index that the info record for
1981
 *   the given node is or should be at in a sorted sequence
1982
 *
1983
 * Returns a long indicating the position of the record
1984
 */
1985
unsigned long
1986
xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1987
                           const xmlNodePtr node)
1988
0
{
1989
0
    unsigned long upper, lower, middle;
1990
0
    int found = 0;
1991
1992
0
    if ((seq == NULL) || (node == NULL))
1993
0
        return ((unsigned long) -1);
1994
1995
    /* Do a binary search for the key */
1996
0
    lower = 1;
1997
0
    upper = seq->length;
1998
0
    middle = 0;
1999
0
    while (lower <= upper && !found) {
2000
0
        middle = lower + (upper - lower) / 2;
2001
0
        if (node == seq->buffer[middle - 1].node)
2002
0
            found = 1;
2003
0
        else if (node < seq->buffer[middle - 1].node)
2004
0
            upper = middle - 1;
2005
0
        else
2006
0
            lower = middle + 1;
2007
0
    }
2008
2009
    /* Return position */
2010
0
    if (middle == 0 || seq->buffer[middle - 1].node < node)
2011
0
        return middle;
2012
0
    else
2013
0
        return middle - 1;
2014
0
}
2015
2016
2017
/**
2018
 * xmlParserAddNodeInfo:
2019
 * @ctxt:  an XML parser context
2020
 * @info:  a node info sequence pointer
2021
 *
2022
 * DEPRECATED: Don't use.
2023
 *
2024
 * Insert node info record into the sorted sequence
2025
 */
2026
void
2027
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2028
                     const xmlParserNodeInfoPtr info)
2029
0
{
2030
0
    unsigned long pos;
2031
2032
0
    if ((ctxt == NULL) || (info == NULL)) return;
2033
2034
    /* Find pos and check to see if node is already in the sequence */
2035
0
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2036
0
                                     info->node);
2037
2038
0
    if ((pos < ctxt->node_seq.length) &&
2039
0
        (ctxt->node_seq.buffer != NULL) &&
2040
0
        (ctxt->node_seq.buffer[pos].node == info->node)) {
2041
0
        ctxt->node_seq.buffer[pos] = *info;
2042
0
    }
2043
2044
    /* Otherwise, we need to add new node to buffer */
2045
0
    else {
2046
0
        if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2047
0
      (ctxt->node_seq.buffer == NULL)) {
2048
0
            xmlParserNodeInfo *tmp_buffer;
2049
0
            unsigned int byte_size;
2050
2051
0
            if (ctxt->node_seq.maximum == 0)
2052
0
                ctxt->node_seq.maximum = 2;
2053
0
            byte_size = (sizeof(*ctxt->node_seq.buffer) *
2054
0
      (2 * ctxt->node_seq.maximum));
2055
2056
0
            if (ctxt->node_seq.buffer == NULL)
2057
0
                tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2058
0
            else
2059
0
                tmp_buffer =
2060
0
                    (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2061
0
                                                     byte_size);
2062
2063
0
            if (tmp_buffer == NULL) {
2064
0
    xmlErrMemory(ctxt, "failed to allocate buffer\n");
2065
0
                return;
2066
0
            }
2067
0
            ctxt->node_seq.buffer = tmp_buffer;
2068
0
            ctxt->node_seq.maximum *= 2;
2069
0
        }
2070
2071
        /* If position is not at end, move elements out of the way */
2072
0
        if (pos != ctxt->node_seq.length) {
2073
0
            unsigned long i;
2074
2075
0
            for (i = ctxt->node_seq.length; i > pos; i--)
2076
0
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2077
0
        }
2078
2079
        /* Copy element and increase length */
2080
0
        ctxt->node_seq.buffer[pos] = *info;
2081
0
        ctxt->node_seq.length++;
2082
0
    }
2083
0
}
2084
2085
/************************************************************************
2086
 *                  *
2087
 *    Defaults settings         *
2088
 *                  *
2089
 ************************************************************************/
2090
/**
2091
 * xmlPedanticParserDefault:
2092
 * @val:  int 0 or 1
2093
 *
2094
 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
2095
 *
2096
 * Set and return the previous value for enabling pedantic warnings.
2097
 *
2098
 * Returns the last value for 0 for no substitution, 1 for substitution.
2099
 */
2100
2101
int
2102
0
xmlPedanticParserDefault(int val) {
2103
0
    int old = xmlPedanticParserDefaultValue;
2104
2105
0
    xmlPedanticParserDefaultValue = val;
2106
0
    return(old);
2107
0
}
2108
2109
/**
2110
 * xmlLineNumbersDefault:
2111
 * @val:  int 0 or 1
2112
 *
2113
 * DEPRECATED: The modern options API always enables line numbers.
2114
 *
2115
 * Set and return the previous value for enabling line numbers in elements
2116
 * contents. This may break on old application and is turned off by default.
2117
 *
2118
 * Returns the last value for 0 for no substitution, 1 for substitution.
2119
 */
2120
2121
int
2122
0
xmlLineNumbersDefault(int val) {
2123
0
    int old = xmlLineNumbersDefaultValue;
2124
2125
0
    xmlLineNumbersDefaultValue = val;
2126
0
    return(old);
2127
0
}
2128
2129
/**
2130
 * xmlSubstituteEntitiesDefault:
2131
 * @val:  int 0 or 1
2132
 *
2133
 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
2134
 *
2135
 * Set and return the previous value for default entity support.
2136
 * Initially the parser always keep entity references instead of substituting
2137
 * entity values in the output. This function has to be used to change the
2138
 * default parser behavior
2139
 * SAX::substituteEntities() has to be used for changing that on a file by
2140
 * file basis.
2141
 *
2142
 * Returns the last value for 0 for no substitution, 1 for substitution.
2143
 */
2144
2145
int
2146
0
xmlSubstituteEntitiesDefault(int val) {
2147
0
    int old = xmlSubstituteEntitiesDefaultValue;
2148
2149
0
    xmlSubstituteEntitiesDefaultValue = val;
2150
0
    return(old);
2151
0
}
2152
2153
/**
2154
 * xmlKeepBlanksDefault:
2155
 * @val:  int 0 or 1
2156
 *
2157
 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
2158
 *
2159
 * Set and return the previous value for default blanks text nodes support.
2160
 * The 1.x version of the parser used an heuristic to try to detect
2161
 * ignorable white spaces. As a result the SAX callback was generating
2162
 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2163
 * using the DOM output text nodes containing those blanks were not generated.
2164
 * The 2.x and later version will switch to the XML standard way and
2165
 * ignorableWhitespace() are only generated when running the parser in
2166
 * validating mode and when the current element doesn't allow CDATA or
2167
 * mixed content.
2168
 * This function is provided as a way to force the standard behavior
2169
 * on 1.X libs and to switch back to the old mode for compatibility when
2170
 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2171
 * by using xmlIsBlankNode() commodity function to detect the "empty"
2172
 * nodes generated.
2173
 * This value also affect autogeneration of indentation when saving code
2174
 * if blanks sections are kept, indentation is not generated.
2175
 *
2176
 * Returns the last value for 0 for no substitution, 1 for substitution.
2177
 */
2178
2179
int
2180
0
xmlKeepBlanksDefault(int val) {
2181
0
    int old = xmlKeepBlanksDefaultValue;
2182
2183
0
    xmlKeepBlanksDefaultValue = val;
2184
0
    if (!val) xmlIndentTreeOutput = 1;
2185
0
    return(old);
2186
0
}
2187