Coverage Report

Created: 2023-11-19 06:13

/src/libxml2-2.11.5/parserInternals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * daniel@veillard.com
8
 */
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22
23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/valid.h>
28
#include <libxml/entities.h>
29
#include <libxml/xmlerror.h>
30
#include <libxml/encoding.h>
31
#include <libxml/valid.h>
32
#include <libxml/xmlIO.h>
33
#include <libxml/uri.h>
34
#include <libxml/dict.h>
35
#include <libxml/SAX.h>
36
#ifdef LIBXML_CATALOG_ENABLED
37
#include <libxml/catalog.h>
38
#endif
39
#include <libxml/globals.h>
40
#include <libxml/chvalid.h>
41
42
8.85M
#define CUR(ctxt) ctxt->input->cur
43
8.85M
#define END(ctxt) ctxt->input->end
44
8.85M
#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
45
46
#include "private/buf.h"
47
#include "private/enc.h"
48
#include "private/error.h"
49
#include "private/io.h"
50
#include "private/parser.h"
51
52
/*
53
 * Various global defaults for parsing
54
 */
55
56
/**
57
 * xmlCheckVersion:
58
 * @version: the include version number
59
 *
60
 * check the compiled lib version against the include one.
61
 * This can warn or immediately kill the application
62
 */
63
void
64
0
xmlCheckVersion(int version) {
65
0
    int myversion = LIBXML_VERSION;
66
67
0
    xmlInitParser();
68
69
0
    if ((myversion / 10000) != (version / 10000)) {
70
0
  xmlGenericError(xmlGenericErrorContext,
71
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
72
0
    (version / 10000), (myversion / 10000));
73
0
  fprintf(stderr,
74
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
75
0
    (version / 10000), (myversion / 10000));
76
0
    }
77
0
    if ((myversion / 100) < (version / 100)) {
78
0
  xmlGenericError(xmlGenericErrorContext,
79
0
    "Warning: program compiled against libxml %d using older %d\n",
80
0
    (version / 100), (myversion / 100));
81
0
    }
82
0
}
83
84
85
/************************************************************************
86
 *                  *
87
 *    Some factorized error routines        *
88
 *                  *
89
 ************************************************************************/
90
91
92
/**
93
 * xmlErrMemory:
94
 * @ctxt:  an XML parser context
95
 * @extra:  extra information
96
 *
97
 * Handle a redefinition of attribute error
98
 */
99
void
100
xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
101
0
{
102
0
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
103
0
        (ctxt->instate == XML_PARSER_EOF))
104
0
  return;
105
0
    if (ctxt != NULL) {
106
0
        ctxt->errNo = XML_ERR_NO_MEMORY;
107
0
        ctxt->instate = XML_PARSER_EOF;
108
0
        ctxt->disableSAX = 1;
109
0
    }
110
0
    if (extra)
111
0
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
112
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
113
0
                        NULL, NULL, 0, 0,
114
0
                        "Memory allocation failed : %s\n", extra);
115
0
    else
116
0
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
117
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
118
0
                        NULL, NULL, 0, 0, "Memory allocation failed\n");
119
0
}
120
121
/**
122
 * __xmlErrEncoding:
123
 * @ctxt:  an XML parser context
124
 * @xmlerr:  the error number
125
 * @msg:  the error message
126
 * @str1:  an string info
127
 * @str2:  an string info
128
 *
129
 * Handle an encoding error
130
 */
131
void
132
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
133
                 const char *msg, const xmlChar * str1, const xmlChar * str2)
134
3.18k
{
135
3.18k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
136
3.18k
        (ctxt->instate == XML_PARSER_EOF))
137
0
  return;
138
3.18k
    if (ctxt != NULL)
139
3.18k
        ctxt->errNo = xmlerr;
140
3.18k
    __xmlRaiseError(NULL, NULL, NULL,
141
3.18k
                    ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
142
3.18k
                    NULL, 0, (const char *) str1, (const char *) str2,
143
3.18k
                    NULL, 0, 0, msg, str1, str2);
144
3.18k
    if (ctxt != NULL) {
145
3.18k
        ctxt->wellFormed = 0;
146
3.18k
        if (ctxt->recovery == 0)
147
3.18k
            ctxt->disableSAX = 1;
148
3.18k
    }
149
3.18k
}
150
151
/**
152
 * xmlErrInternal:
153
 * @ctxt:  an XML parser context
154
 * @msg:  the error message
155
 * @str:  error information
156
 *
157
 * Handle an internal error
158
 */
159
static void LIBXML_ATTR_FORMAT(2,0)
160
xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
161
13
{
162
13
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
163
13
        (ctxt->instate == XML_PARSER_EOF))
164
0
  return;
165
13
    if (ctxt != NULL)
166
13
        ctxt->errNo = XML_ERR_INTERNAL_ERROR;
167
13
    __xmlRaiseError(NULL, NULL, NULL,
168
13
                    ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
169
13
                    XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
170
13
                    0, 0, msg, str);
171
13
    if (ctxt != NULL) {
172
13
        ctxt->wellFormed = 0;
173
13
        if (ctxt->recovery == 0)
174
13
            ctxt->disableSAX = 1;
175
13
    }
176
13
}
177
178
/**
179
 * xmlErrEncodingInt:
180
 * @ctxt:  an XML parser context
181
 * @error:  the error number
182
 * @msg:  the error message
183
 * @val:  an integer value
184
 *
185
 * n encoding error
186
 */
187
static void LIBXML_ATTR_FORMAT(3,0)
188
xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
189
                  const char *msg, int val)
190
2.82k
{
191
2.82k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
192
2.82k
        (ctxt->instate == XML_PARSER_EOF))
193
0
  return;
194
2.82k
    if (ctxt != NULL)
195
2.82k
        ctxt->errNo = error;
196
2.82k
    __xmlRaiseError(NULL, NULL, NULL,
197
2.82k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
198
2.82k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
199
2.82k
    if (ctxt != NULL) {
200
2.82k
        ctxt->wellFormed = 0;
201
2.82k
        if (ctxt->recovery == 0)
202
2.82k
            ctxt->disableSAX = 1;
203
2.82k
    }
204
2.82k
}
205
206
/**
207
 * xmlIsLetter:
208
 * @c:  an unicode character (int)
209
 *
210
 * Check whether the character is allowed by the production
211
 * [84] Letter ::= BaseChar | Ideographic
212
 *
213
 * Returns 0 if not, non-zero otherwise
214
 */
215
int
216
0
xmlIsLetter(int c) {
217
0
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
218
0
}
219
220
/************************************************************************
221
 *                  *
222
 *    Input handling functions for progressive parsing  *
223
 *                  *
224
 ************************************************************************/
225
226
/* #define DEBUG_INPUT */
227
/* #define DEBUG_STACK */
228
/* #define DEBUG_PUSH */
229
230
231
/* we need to keep enough input to show errors in context */
232
9.19k
#define LINE_LEN        80
233
234
#ifdef DEBUG_INPUT
235
#define CHECK_BUFFER(in) check_buffer(in)
236
237
static
238
void check_buffer(xmlParserInputPtr in) {
239
    if (in->base != xmlBufContent(in->buf->buffer)) {
240
        xmlGenericError(xmlGenericErrorContext,
241
    "xmlParserInput: base mismatch problem\n");
242
    }
243
    if (in->cur < in->base) {
244
        xmlGenericError(xmlGenericErrorContext,
245
    "xmlParserInput: cur < base problem\n");
246
    }
247
    if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
248
        xmlGenericError(xmlGenericErrorContext,
249
    "xmlParserInput: cur > base + use problem\n");
250
    }
251
    xmlGenericError(xmlGenericErrorContext,"buffer %p : content %x, cur %d, use %d\n",
252
            (void *) in, (int) xmlBufContent(in->buf->buffer),
253
            in->cur - in->base, xmlBufUse(in->buf->buffer));
254
}
255
256
#else
257
#define CHECK_BUFFER(in)
258
#endif
259
260
261
/**
262
 * xmlHaltParser:
263
 * @ctxt:  an XML parser context
264
 *
265
 * Blocks further parser processing don't override error
266
 * for internal use
267
 */
268
void
269
3.15k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
270
3.15k
    if (ctxt == NULL)
271
0
        return;
272
3.15k
    ctxt->instate = XML_PARSER_EOF;
273
3.15k
    ctxt->disableSAX = 1;
274
3.15k
    while (ctxt->inputNr > 1)
275
0
        xmlFreeInputStream(inputPop(ctxt));
276
3.15k
    if (ctxt->input != NULL) {
277
        /*
278
   * in case there was a specific allocation deallocate before
279
   * overriding base
280
   */
281
3.15k
        if (ctxt->input->free != NULL) {
282
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
283
0
      ctxt->input->free = NULL;
284
0
  }
285
3.15k
        if (ctxt->input->buf != NULL) {
286
3.13k
            xmlFreeParserInputBuffer(ctxt->input->buf);
287
3.13k
            ctxt->input->buf = NULL;
288
3.13k
        }
289
3.15k
  ctxt->input->cur = BAD_CAST"";
290
3.15k
        ctxt->input->length = 0;
291
3.15k
  ctxt->input->base = ctxt->input->cur;
292
3.15k
        ctxt->input->end = ctxt->input->cur;
293
3.15k
    }
294
3.15k
}
295
296
/**
297
 * xmlParserInputRead:
298
 * @in:  an XML parser input
299
 * @len:  an indicative size for the lookahead
300
 *
301
 * DEPRECATED: This function was internal and is deprecated.
302
 *
303
 * Returns -1 as this is an error to use it.
304
 */
305
int
306
0
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
307
0
    return(-1);
308
0
}
309
310
/**
311
 * xmlParserGrow:
312
 * @ctxt:  an XML parser context
313
 */
314
int
315
743k
xmlParserGrow(xmlParserCtxtPtr ctxt) {
316
743k
    xmlParserInputPtr in = ctxt->input;
317
743k
    xmlParserInputBufferPtr buf = in->buf;
318
743k
    ptrdiff_t curEnd = in->end - in->cur;
319
743k
    ptrdiff_t curBase = in->cur - in->base;
320
743k
    int ret;
321
322
743k
    if (buf == NULL)
323
0
        return(0);
324
    /* Don't grow push parser buffer. */
325
743k
    if (ctxt->progressive)
326
743k
        return(0);
327
    /* Don't grow memory buffers. */
328
0
    if ((buf->encoder == NULL) && (buf->readcallback == NULL))
329
0
        return(0);
330
331
0
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
332
0
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
333
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
334
0
        xmlErrInternal(ctxt, "Huge input lookup", NULL);
335
0
        xmlHaltParser(ctxt);
336
0
  return(-1);
337
0
    }
338
339
0
    if (curEnd >= INPUT_CHUNK)
340
0
        return(0);
341
342
0
    ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
343
0
    xmlBufSetInputBaseCur(buf->buffer, in, 0, curBase);
344
345
    /* TODO: Get error code from xmlParserInputBufferGrow */
346
0
    if (ret < 0) {
347
0
        xmlErrInternal(ctxt, "Growing input buffer", NULL);
348
0
        xmlHaltParser(ctxt);
349
0
    }
350
351
0
    return(ret);
352
0
}
353
354
/**
355
 * xmlParserInputGrow:
356
 * @in:  an XML parser input
357
 * @len:  an indicative size for the lookahead
358
 *
359
 * DEPRECATED: Don't use.
360
 *
361
 * This function increase the input for the parser. It tries to
362
 * preserve pointers to the input buffer, and keep already read data
363
 *
364
 * Returns the amount of char read, or -1 in case of error, 0 indicate the
365
 * end of this entity
366
 */
367
int
368
0
xmlParserInputGrow(xmlParserInputPtr in, int len) {
369
0
    int ret;
370
0
    size_t indx;
371
372
0
    if ((in == NULL) || (len < 0)) return(-1);
373
#ifdef DEBUG_INPUT
374
    xmlGenericError(xmlGenericErrorContext, "Grow\n");
375
#endif
376
0
    if (in->buf == NULL) return(-1);
377
0
    if (in->base == NULL) return(-1);
378
0
    if (in->cur == NULL) return(-1);
379
0
    if (in->buf->buffer == NULL) return(-1);
380
381
    /* Don't grow memory buffers. */
382
0
    if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
383
0
        return(0);
384
385
0
    CHECK_BUFFER(in);
386
387
0
    indx = in->cur - in->base;
388
0
    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
389
390
0
  CHECK_BUFFER(in);
391
392
0
        return(0);
393
0
    }
394
0
    ret = xmlParserInputBufferGrow(in->buf, len);
395
396
0
    in->base = xmlBufContent(in->buf->buffer);
397
0
    if (in->base == NULL) {
398
0
        in->base = BAD_CAST "";
399
0
        in->cur = in->base;
400
0
        in->end = in->base;
401
0
        return(-1);
402
0
    }
403
0
    in->cur = in->base + indx;
404
0
    in->end = xmlBufEnd(in->buf->buffer);
405
406
0
    CHECK_BUFFER(in);
407
408
0
    return(ret);
409
0
}
410
411
/**
412
 * xmlParserShrink:
413
 * @ctxt:  an XML parser context
414
 */
415
void
416
9.19k
xmlParserShrink(xmlParserCtxtPtr ctxt) {
417
9.19k
    xmlParserInputPtr in = ctxt->input;
418
9.19k
    xmlParserInputBufferPtr buf = in->buf;
419
9.19k
    size_t used;
420
421
    /* Don't shrink pull parser memory buffers. */
422
9.19k
    if ((buf == NULL) ||
423
9.19k
        ((ctxt->progressive == 0) &&
424
9.19k
         (buf->encoder == NULL) && (buf->readcallback == NULL)))
425
0
        return;
426
427
9.19k
    used = in->cur - in->base;
428
    /*
429
     * Do not shrink on large buffers whose only a tiny fraction
430
     * was consumed
431
     */
432
9.19k
    if (used > INPUT_CHUNK) {
433
9.19k
  size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
434
435
9.19k
  if (res > 0) {
436
9.19k
            used -= res;
437
9.19k
            if ((res > ULONG_MAX) ||
438
9.19k
                (in->consumed > ULONG_MAX - (unsigned long)res))
439
0
                in->consumed = ULONG_MAX;
440
9.19k
            else
441
9.19k
                in->consumed += res;
442
9.19k
  }
443
9.19k
    }
444
445
9.19k
    xmlBufSetInputBaseCur(buf->buffer, in, 0, used);
446
9.19k
}
447
448
/**
449
 * xmlParserInputShrink:
450
 * @in:  an XML parser input
451
 *
452
 * DEPRECATED: Don't use.
453
 *
454
 * This function removes used input for the parser.
455
 */
456
void
457
0
xmlParserInputShrink(xmlParserInputPtr in) {
458
0
    size_t used;
459
0
    size_t ret;
460
461
#ifdef DEBUG_INPUT
462
    xmlGenericError(xmlGenericErrorContext, "Shrink\n");
463
#endif
464
0
    if (in == NULL) return;
465
0
    if (in->buf == NULL) return;
466
0
    if (in->base == NULL) return;
467
0
    if (in->cur == NULL) return;
468
0
    if (in->buf->buffer == NULL) return;
469
470
0
    CHECK_BUFFER(in);
471
472
0
    used = in->cur - in->base;
473
    /*
474
     * Do not shrink on large buffers whose only a tiny fraction
475
     * was consumed
476
     */
477
0
    if (used > INPUT_CHUNK) {
478
0
  ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
479
0
  if (ret > 0) {
480
0
            used -= ret;
481
0
            if ((ret > ULONG_MAX) ||
482
0
                (in->consumed > ULONG_MAX - (unsigned long)ret))
483
0
                in->consumed = ULONG_MAX;
484
0
            else
485
0
                in->consumed += ret;
486
0
  }
487
0
    }
488
489
0
    if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
490
0
        xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
491
0
    }
492
493
0
    in->base = xmlBufContent(in->buf->buffer);
494
0
    if (in->base == NULL) {
495
        /* TODO: raise error */
496
0
        in->base = BAD_CAST "";
497
0
        in->cur = in->base;
498
0
        in->end = in->base;
499
0
        return;
500
0
    }
501
0
    in->cur = in->base + used;
502
0
    in->end = xmlBufEnd(in->buf->buffer);
503
504
0
    CHECK_BUFFER(in);
505
0
}
506
507
/************************************************************************
508
 *                  *
509
 *    UTF8 character input and related functions    *
510
 *                  *
511
 ************************************************************************/
512
513
/**
514
 * xmlNextChar:
515
 * @ctxt:  the XML parser context
516
 *
517
 * DEPRECATED: Internal function, do not use.
518
 *
519
 * Skip to the next char input char.
520
 */
521
522
void
523
xmlNextChar(xmlParserCtxtPtr ctxt)
524
8.85M
{
525
8.85M
    if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
526
8.85M
        (ctxt->input == NULL))
527
0
        return;
528
529
8.85M
    if (!(VALID_CTXT(ctxt))) {
530
0
        xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
531
0
  ctxt->errNo = XML_ERR_INTERNAL_ERROR;
532
0
        xmlStopParser(ctxt);
533
0
  return;
534
0
    }
535
536
8.85M
    if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) {
537
260k
        if (xmlParserGrow(ctxt) < 0)
538
0
            return;
539
260k
        if (ctxt->input->cur >= ctxt->input->end)
540
62
            return;
541
260k
    }
542
543
8.85M
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
544
8.31M
        const unsigned char *cur;
545
8.31M
        unsigned char c;
546
547
        /*
548
         *   2.11 End-of-Line Handling
549
         *   the literal two-character sequence "#xD#xA" or a standalone
550
         *   literal #xD, an XML processor must pass to the application
551
         *   the single character #xA.
552
         */
553
8.31M
        if (*(ctxt->input->cur) == '\n') {
554
188k
            ctxt->input->line++; ctxt->input->col = 1;
555
188k
        } else
556
8.12M
            ctxt->input->col++;
557
558
        /*
559
         * We are supposed to handle UTF8, check it's valid
560
         * From rfc2044: encoding of the Unicode values on UTF-8:
561
         *
562
         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
563
         * 0000 0000-0000 007F   0xxxxxxx
564
         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
565
         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
566
         *
567
         * Check for the 0x110000 limit too
568
         */
569
8.31M
        cur = ctxt->input->cur;
570
571
8.31M
        c = *cur;
572
8.31M
        if (c & 0x80) {
573
1.33M
            size_t avail;
574
575
1.33M
            if (c == 0xC0)
576
1
          goto encoding_error;
577
578
1.33M
            avail = ctxt->input->end - ctxt->input->cur;
579
580
1.33M
            if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
581
70
                goto encoding_error;
582
1.33M
            if ((c & 0xe0) == 0xe0) {
583
1.31M
                unsigned int val;
584
585
1.31M
                if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
586
6
                    goto encoding_error;
587
1.31M
                if ((c & 0xf0) == 0xf0) {
588
291
                    if (((c & 0xf8) != 0xf0) ||
589
291
                        (avail < 4) || ((cur[3] & 0xc0) != 0x80))
590
4
                        goto encoding_error;
591
                    /* 4-byte code */
592
287
                    ctxt->input->cur += 4;
593
287
                    val = (cur[0] & 0x7) << 18;
594
287
                    val |= (cur[1] & 0x3f) << 12;
595
287
                    val |= (cur[2] & 0x3f) << 6;
596
287
                    val |= cur[3] & 0x3f;
597
1.31M
                } else {
598
                    /* 3-byte code */
599
1.31M
                    ctxt->input->cur += 3;
600
1.31M
                    val = (cur[0] & 0xf) << 12;
601
1.31M
                    val |= (cur[1] & 0x3f) << 6;
602
1.31M
                    val |= cur[2] & 0x3f;
603
1.31M
                }
604
1.31M
                if (((val > 0xd7ff) && (val < 0xe000)) ||
605
1.31M
                    ((val > 0xfffd) && (val < 0x10000)) ||
606
1.31M
                    (val >= 0x110000)) {
607
2.11k
    xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
608
2.11k
          "Char 0x%X out of allowed range\n",
609
2.11k
          val);
610
2.11k
                }
611
1.31M
            } else
612
                /* 2-byte code */
613
15.4k
                ctxt->input->cur += 2;
614
1.33M
        } else
615
            /* 1-byte code */
616
6.98M
            ctxt->input->cur++;
617
8.31M
    } else {
618
        /*
619
         * Assume it's a fixed length encoding (1) with
620
         * a compatible encoding for the ASCII set, since
621
         * XML constructs only use < 128 chars
622
         */
623
624
544k
        if (*(ctxt->input->cur) == '\n') {
625
113k
            ctxt->input->line++; ctxt->input->col = 1;
626
113k
        } else
627
431k
            ctxt->input->col++;
628
544k
        ctxt->input->cur++;
629
544k
    }
630
8.85M
    return;
631
8.85M
encoding_error:
632
    /*
633
     * If we detect an UTF8 error that probably mean that the
634
     * input encoding didn't get properly advertised in the
635
     * declaration header. Report the error and switch the encoding
636
     * to ISO-Latin-1 (if you don't like this policy, just declare the
637
     * encoding !)
638
     */
639
81
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
640
81
        (ctxt->input->end - ctxt->input->cur < 4)) {
641
44
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
642
44
         "Input is not proper UTF-8, indicate encoding !\n",
643
44
         NULL, NULL);
644
44
    } else {
645
37
        char buffer[150];
646
647
37
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
648
37
      ctxt->input->cur[0], ctxt->input->cur[1],
649
37
      ctxt->input->cur[2], ctxt->input->cur[3]);
650
37
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
651
37
         "Input is not proper UTF-8, indicate encoding !\n%s",
652
37
         BAD_CAST buffer, NULL);
653
37
    }
654
81
    ctxt->charset = XML_CHAR_ENCODING_8859_1;
655
81
    ctxt->input->cur++;
656
81
    return;
657
8.85M
}
658
659
/**
660
 * xmlCurrentChar:
661
 * @ctxt:  the XML parser context
662
 * @len:  pointer to the length of the char read
663
 *
664
 * DEPRECATED: Internal function, do not use.
665
 *
666
 * The current char value, if using UTF-8 this may actually span multiple
667
 * bytes in the input buffer. Implement the end of line normalization:
668
 * 2.11 End-of-Line Handling
669
 * Wherever an external parsed entity or the literal entity value
670
 * of an internal parsed entity contains either the literal two-character
671
 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
672
 * must pass to the application the single character #xA.
673
 * This behavior can conveniently be produced by normalizing all
674
 * line breaks to #xA on input, before parsing.)
675
 *
676
 * Returns the current char value and its length
677
 */
678
679
int
680
270M
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
681
270M
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
682
270M
    if (ctxt->instate == XML_PARSER_EOF)
683
0
  return(0);
684
685
270M
    if ((ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) &&
686
270M
        (xmlParserGrow(ctxt) < 0))
687
0
        return(0);
688
689
270M
    if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
690
9.17M
      *len = 1;
691
9.17M
      return(*ctxt->input->cur);
692
9.17M
    }
693
261M
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
694
  /*
695
   * We are supposed to handle UTF8, check it's valid
696
   * From rfc2044: encoding of the Unicode values on UTF-8:
697
   *
698
   * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
699
   * 0000 0000-0000 007F   0xxxxxxx
700
   * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
701
   * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
702
   *
703
   * Check for the 0x110000 limit too
704
   */
705
257M
  const unsigned char *cur = ctxt->input->cur;
706
257M
  unsigned char c;
707
257M
  unsigned int val;
708
709
257M
  c = *cur;
710
257M
  if (c & 0x80) {
711
256M
            size_t avail;
712
713
256M
      if (((c & 0x40) == 0) || (c == 0xC0))
714
718
    goto encoding_error;
715
716
256M
            avail = ctxt->input->end - ctxt->input->cur;
717
718
256M
            if (avail < 2)
719
129
                goto incomplete_sequence;
720
256M
      if ((cur[1] & 0xc0) != 0x80)
721
2.15k
    goto encoding_error;
722
256M
      if ((c & 0xe0) == 0xe0) {
723
256M
                if (avail < 3)
724
62
                    goto incomplete_sequence;
725
256M
    if ((cur[2] & 0xc0) != 0x80)
726
18
        goto encoding_error;
727
256M
    if ((c & 0xf0) == 0xf0) {
728
60.2k
                    if (avail < 4)
729
31
                        goto incomplete_sequence;
730
60.2k
        if (((c & 0xf8) != 0xf0) ||
731
60.2k
      ((cur[3] & 0xc0) != 0x80))
732
9
      goto encoding_error;
733
        /* 4-byte code */
734
60.2k
        *len = 4;
735
60.2k
        val = (cur[0] & 0x7) << 18;
736
60.2k
        val |= (cur[1] & 0x3f) << 12;
737
60.2k
        val |= (cur[2] & 0x3f) << 6;
738
60.2k
        val |= cur[3] & 0x3f;
739
60.2k
        if (val < 0x10000)
740
11
      goto encoding_error;
741
256M
    } else {
742
      /* 3-byte code */
743
256M
        *len = 3;
744
256M
        val = (cur[0] & 0xf) << 12;
745
256M
        val |= (cur[1] & 0x3f) << 6;
746
256M
        val |= cur[2] & 0x3f;
747
256M
        if (val < 0x800)
748
4
      goto encoding_error;
749
256M
    }
750
256M
      } else {
751
        /* 2-byte code */
752
44.2k
    *len = 2;
753
44.2k
    val = (cur[0] & 0x1f) << 6;
754
44.2k
    val |= cur[1] & 0x3f;
755
44.2k
    if (val < 0x80)
756
4
        goto encoding_error;
757
44.2k
      }
758
256M
      if (!IS_CHAR(val)) {
759
471
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
760
471
          "Char 0x%X out of allowed range\n", val);
761
471
      }
762
256M
      return(val);
763
256M
  } else {
764
      /* 1-byte code */
765
667k
      *len = 1;
766
667k
      if ((*ctxt->input->cur == 0) &&
767
667k
          (ctxt->input->end > ctxt->input->cur)) {
768
239
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
769
239
          "Char 0x0 out of allowed range\n", 0);
770
239
      }
771
667k
      if (*ctxt->input->cur == 0xD) {
772
531k
    if (ctxt->input->cur[1] == 0xA) {
773
73.9k
        ctxt->input->cur++;
774
73.9k
    }
775
531k
    return(0xA);
776
531k
      }
777
136k
      return(*ctxt->input->cur);
778
667k
  }
779
257M
    }
780
    /*
781
     * Assume it's a fixed length encoding (1) with
782
     * a compatible encoding for the ASCII set, since
783
     * XML constructs only use < 128 chars
784
     */
785
3.79M
    *len = 1;
786
3.79M
    if (*ctxt->input->cur == 0xD) {
787
207k
  if (ctxt->input->cur[1] == 0xA) {
788
1.43k
      ctxt->input->cur++;
789
1.43k
  }
790
207k
  return(0xA);
791
207k
    }
792
3.59M
    return(*ctxt->input->cur);
793
794
2.92k
encoding_error:
795
    /*
796
     * If we detect an UTF8 error that probably mean that the
797
     * input encoding didn't get properly advertised in the
798
     * declaration header. Report the error and switch the encoding
799
     * to ISO-Latin-1 (if you don't like this policy, just declare the
800
     * encoding !)
801
     */
802
2.92k
    if (ctxt->input->end - ctxt->input->cur < 4) {
803
431
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
804
431
         "Input is not proper UTF-8, indicate encoding !\n",
805
431
         NULL, NULL);
806
2.49k
    } else {
807
2.49k
        char buffer[150];
808
809
2.49k
  snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
810
2.49k
      ctxt->input->cur[0], ctxt->input->cur[1],
811
2.49k
      ctxt->input->cur[2], ctxt->input->cur[3]);
812
2.49k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
813
2.49k
         "Input is not proper UTF-8, indicate encoding !\n%s",
814
2.49k
         BAD_CAST buffer, NULL);
815
2.49k
    }
816
2.92k
    ctxt->charset = XML_CHAR_ENCODING_8859_1;
817
2.92k
    *len = 1;
818
2.92k
    return(*ctxt->input->cur);
819
820
222
incomplete_sequence:
821
    /*
822
     * An encoding problem may arise from a truncated input buffer
823
     * splitting a character in the middle. In that case do not raise
824
     * an error but return 0. This should only happen when push parsing
825
     * char data.
826
     */
827
222
    *len = 0;
828
222
    return(0);
829
3.79M
}
830
831
/**
832
 * xmlStringCurrentChar:
833
 * @ctxt:  the XML parser context
834
 * @cur:  pointer to the beginning of the char
835
 * @len:  pointer to the length of the char read
836
 *
837
 * DEPRECATED: Internal function, do not use.
838
 *
839
 * The current char value, if using UTF-8 this may actually span multiple
840
 * bytes in the input buffer.
841
 *
842
 * Returns the current char value and its length
843
 */
844
845
int
846
xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
847
24.3M
{
848
24.3M
    if ((len == NULL) || (cur == NULL)) return(0);
849
24.3M
    if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
850
        /*
851
         * We are supposed to handle UTF8, check it's valid
852
         * From rfc2044: encoding of the Unicode values on UTF-8:
853
         *
854
         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
855
         * 0000 0000-0000 007F   0xxxxxxx
856
         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
857
         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
858
         *
859
         * Check for the 0x110000 limit too
860
         */
861
24.0M
        unsigned char c;
862
24.0M
        unsigned int val;
863
864
24.0M
        c = *cur;
865
24.0M
        if (c & 0x80) {
866
23.9M
            if ((cur[1] & 0xc0) != 0x80)
867
0
                goto encoding_error;
868
23.9M
            if ((c & 0xe0) == 0xe0) {
869
870
23.9M
                if ((cur[2] & 0xc0) != 0x80)
871
0
                    goto encoding_error;
872
23.9M
                if ((c & 0xf0) == 0xf0) {
873
522
                    if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
874
0
                        goto encoding_error;
875
                    /* 4-byte code */
876
522
                    *len = 4;
877
522
                    val = (cur[0] & 0x7) << 18;
878
522
                    val |= (cur[1] & 0x3f) << 12;
879
522
                    val |= (cur[2] & 0x3f) << 6;
880
522
                    val |= cur[3] & 0x3f;
881
23.9M
                } else {
882
                    /* 3-byte code */
883
23.9M
                    *len = 3;
884
23.9M
                    val = (cur[0] & 0xf) << 12;
885
23.9M
                    val |= (cur[1] & 0x3f) << 6;
886
23.9M
                    val |= cur[2] & 0x3f;
887
23.9M
                }
888
23.9M
            } else {
889
                /* 2-byte code */
890
340
                *len = 2;
891
340
                val = (cur[0] & 0x1f) << 6;
892
340
                val |= cur[1] & 0x3f;
893
340
            }
894
23.9M
            if (!IS_CHAR(val)) {
895
0
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
896
0
          "Char 0x%X out of allowed range\n", val);
897
0
            }
898
23.9M
            return (val);
899
23.9M
        } else {
900
            /* 1-byte code */
901
149k
            *len = 1;
902
149k
            return (*cur);
903
149k
        }
904
24.0M
    }
905
    /*
906
     * Assume it's a fixed length encoding (1) with
907
     * a compatible encoding for the ASCII set, since
908
     * XML constructs only use < 128 chars
909
     */
910
280k
    *len = 1;
911
280k
    return (*cur);
912
0
encoding_error:
913
914
    /*
915
     * An encoding problem may arise from a truncated input buffer
916
     * splitting a character in the middle. In that case do not raise
917
     * an error but return 0 to indicate an end of stream problem
918
     */
919
0
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
920
0
        (ctxt->input->end - ctxt->input->cur < 4)) {
921
0
  *len = 0;
922
0
  return(0);
923
0
    }
924
    /*
925
     * If we detect an UTF8 error that probably mean that the
926
     * input encoding didn't get properly advertised in the
927
     * declaration header. Report the error and switch the encoding
928
     * to ISO-Latin-1 (if you don't like this policy, just declare the
929
     * encoding !)
930
     */
931
0
    {
932
0
        char buffer[150];
933
934
0
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
935
0
      ctxt->input->cur[0], ctxt->input->cur[1],
936
0
      ctxt->input->cur[2], ctxt->input->cur[3]);
937
0
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
938
0
         "Input is not proper UTF-8, indicate encoding !\n%s",
939
0
         BAD_CAST buffer, NULL);
940
0
    }
941
0
    *len = 1;
942
0
    return (*cur);
943
0
}
944
945
/**
946
 * xmlCopyCharMultiByte:
947
 * @out:  pointer to an array of xmlChar
948
 * @val:  the char value
949
 *
950
 * append the char value in the array
951
 *
952
 * Returns the number of xmlChar written
953
 */
954
int
955
139M
xmlCopyCharMultiByte(xmlChar *out, int val) {
956
139M
    if ((out == NULL) || (val < 0)) return(0);
957
    /*
958
     * We are supposed to handle UTF8, check it's valid
959
     * From rfc2044: encoding of the Unicode values on UTF-8:
960
     *
961
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
962
     * 0000 0000-0000 007F   0xxxxxxx
963
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
964
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
965
     */
966
139M
    if  (val >= 0x80) {
967
139M
  xmlChar *savedout = out;
968
139M
  int bits;
969
139M
  if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
970
139M
  else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
971
62.4k
  else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
972
0
  else {
973
0
      xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
974
0
        "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
975
0
            val);
976
0
      return(0);
977
0
  }
978
419M
  for ( ; bits >= 0; bits-= 6)
979
279M
      *out++= ((val >> bits) & 0x3F) | 0x80 ;
980
139M
  return (out - savedout);
981
139M
    }
982
795
    *out = val;
983
795
    return 1;
984
139M
}
985
986
/**
987
 * xmlCopyChar:
988
 * @len:  Ignored, compatibility
989
 * @out:  pointer to an array of xmlChar
990
 * @val:  the char value
991
 *
992
 * append the char value in the array
993
 *
994
 * Returns the number of xmlChar written
995
 */
996
997
int
998
18.1k
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
999
18.1k
    if ((out == NULL) || (val < 0)) return(0);
1000
    /* the len parameter is ignored */
1001
18.1k
    if  (val >= 0x80) {
1002
16.8k
  return(xmlCopyCharMultiByte (out, val));
1003
16.8k
    }
1004
1.28k
    *out = val;
1005
1.28k
    return 1;
1006
18.1k
}
1007
1008
/************************************************************************
1009
 *                  *
1010
 *    Commodity functions to switch encodings     *
1011
 *                  *
1012
 ************************************************************************/
1013
1014
static xmlCharEncodingHandlerPtr
1015
204
xmlDetectEBCDIC(xmlParserInputPtr input) {
1016
204
    xmlChar out[200];
1017
204
    xmlCharEncodingHandlerPtr handler;
1018
204
    int inlen, outlen, res, i;
1019
1020
    /*
1021
     * To detect the EBCDIC code page, we convert the first 200 bytes
1022
     * to EBCDIC-US and try to find the encoding declaration.
1023
     */
1024
204
    handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC);
1025
204
    if (handler == NULL)
1026
0
        return(NULL);
1027
204
    outlen = sizeof(out) - 1;
1028
204
    inlen = input->end - input->cur;
1029
204
    res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen, 0);
1030
204
    if (res < 0)
1031
16
        return(handler);
1032
188
    out[outlen] = 0;
1033
1034
2.42k
    for (i = 0; i < outlen; i++) {
1035
2.35k
        if (out[i] == '>')
1036
2
            break;
1037
2.35k
        if ((out[i] == 'e') &&
1038
2.35k
            (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1039
117
            int start, cur, quote;
1040
1041
117
            i += 8;
1042
117
            while (IS_BLANK_CH(out[i]))
1043
691
                i += 1;
1044
117
            if (out[i++] != '=')
1045
27
                break;
1046
90
            while (IS_BLANK_CH(out[i]))
1047
485
                i += 1;
1048
90
            quote = out[i++];
1049
90
            if ((quote != '\'') && (quote != '"'))
1050
30
                break;
1051
60
            start = i;
1052
60
            cur = out[i];
1053
1.40k
            while (((cur >= 'a') && (cur <= 'z')) ||
1054
1.40k
                   ((cur >= 'A') && (cur <= 'Z')) ||
1055
1.40k
                   ((cur >= '0') && (cur <= '9')) ||
1056
1.40k
                   (cur == '.') || (cur == '_') ||
1057
1.40k
                   (cur == '-'))
1058
1.34k
                cur = out[++i];
1059
60
            if (cur != quote)
1060
53
                break;
1061
7
            out[i] = 0;
1062
7
            xmlCharEncCloseFunc(handler);
1063
7
            handler = xmlFindCharEncodingHandler((char *) out + start);
1064
7
            break;
1065
60
        }
1066
2.35k
    }
1067
1068
188
    return(handler);
1069
204
}
1070
1071
/**
1072
 * xmlSwitchEncoding:
1073
 * @ctxt:  the parser context
1074
 * @enc:  the encoding value (number)
1075
 *
1076
 * change the input functions when discovering the character encoding
1077
 * of a given entity.
1078
 *
1079
 * Returns 0 in case of success, -1 otherwise
1080
 */
1081
int
1082
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1083
12.4k
{
1084
12.4k
    xmlCharEncodingHandlerPtr handler;
1085
12.4k
    int ret;
1086
1087
12.4k
    if (ctxt == NULL) return(-1);
1088
1089
    /*
1090
     * FIXME: The BOM shouldn't be skipped here, but in the parsing code.
1091
     *
1092
     * Note that we look for a decoded UTF-8 BOM when switching to UTF-16.
1093
     * This is mostly useless but Webkit/Chromium relies on this behavior.
1094
     * See https://bugs.chromium.org/p/chromium/issues/detail?id=1451026
1095
     */
1096
12.4k
    if ((ctxt->input != NULL) &&
1097
12.4k
        (ctxt->input->consumed == 0) &&
1098
12.4k
        (ctxt->input->cur != NULL) &&
1099
12.4k
        (ctxt->input->cur == ctxt->input->base) &&
1100
12.4k
        ((enc == XML_CHAR_ENCODING_UTF8) ||
1101
12.4k
         (enc == XML_CHAR_ENCODING_UTF16LE) ||
1102
12.4k
         (enc == XML_CHAR_ENCODING_UTF16BE))) {
1103
        /*
1104
         * Errata on XML-1.0 June 20 2001
1105
         * Specific handling of the Byte Order Mark for
1106
         * UTF-8
1107
         */
1108
3.17k
        if ((ctxt->input->cur[0] == 0xEF) &&
1109
3.17k
            (ctxt->input->cur[1] == 0xBB) &&
1110
3.17k
            (ctxt->input->cur[2] == 0xBF)) {
1111
333
            ctxt->input->cur += 3;
1112
333
        }
1113
3.17k
    }
1114
1115
12.4k
    switch (enc) {
1116
0
  case XML_CHAR_ENCODING_ERROR:
1117
0
      __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
1118
0
                     "encoding unknown\n", NULL, NULL);
1119
0
      return(-1);
1120
9.05k
  case XML_CHAR_ENCODING_NONE:
1121
      /* let's assume it's UTF-8 without the XML decl */
1122
9.05k
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
1123
9.05k
      return(0);
1124
2.69k
  case XML_CHAR_ENCODING_UTF8:
1125
      /* default encoding, no conversion should be needed */
1126
2.69k
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
1127
2.69k
      return(0);
1128
204
        case XML_CHAR_ENCODING_EBCDIC:
1129
204
            handler = xmlDetectEBCDIC(ctxt->input);
1130
204
            break;
1131
497
        default:
1132
497
            handler = xmlGetCharEncodingHandler(enc);
1133
497
            break;
1134
12.4k
    }
1135
701
    if (handler == NULL) {
1136
  /*
1137
   * Default handlers.
1138
   */
1139
8
  switch (enc) {
1140
0
      case XML_CHAR_ENCODING_ASCII:
1141
    /* default encoding, no conversion should be needed */
1142
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1143
0
    return(0);
1144
0
      case XML_CHAR_ENCODING_8859_1:
1145
0
    if ((ctxt->inputNr == 1) &&
1146
0
        (ctxt->encoding == NULL) &&
1147
0
        (ctxt->input != NULL) &&
1148
0
        (ctxt->input->encoding != NULL)) {
1149
0
        ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1150
0
    }
1151
0
    ctxt->charset = enc;
1152
0
    return(0);
1153
8
      default:
1154
8
    __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1155
8
                        "encoding not supported: %s\n",
1156
8
      BAD_CAST xmlGetCharEncodingName(enc), NULL);
1157
                /*
1158
                 * TODO: We could recover from errors in external entities
1159
                 * if we didn't stop the parser. But most callers of this
1160
                 * function don't check the return value.
1161
                 */
1162
8
                xmlStopParser(ctxt);
1163
8
                return(-1);
1164
8
        }
1165
8
    }
1166
693
    ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1167
693
    if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
1168
        /*
1169
   * on encoding conversion errors, stop the parser
1170
   */
1171
9
        xmlStopParser(ctxt);
1172
9
  ctxt->errNo = XML_I18N_CONV_FAILED;
1173
9
    }
1174
693
    return(ret);
1175
701
}
1176
1177
/**
1178
 * xmlSwitchInputEncoding:
1179
 * @ctxt:  the parser context
1180
 * @input:  the input stream
1181
 * @handler:  the encoding handler
1182
 *
1183
 * change the input functions when discovering the character encoding
1184
 * of a given entity.
1185
 *
1186
 * Returns 0 in case of success, -1 otherwise
1187
 */
1188
int
1189
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1190
                       xmlCharEncodingHandlerPtr handler)
1191
1.90k
{
1192
1.90k
    int nbchars;
1193
1.90k
    xmlParserInputBufferPtr in;
1194
1195
1.90k
    if (handler == NULL)
1196
0
        return (-1);
1197
1.90k
    if (input == NULL)
1198
0
        return (-1);
1199
1.90k
    in = input->buf;
1200
1.90k
    if (in == NULL) {
1201
0
  xmlErrInternal(ctxt,
1202
0
                "static memory buffer doesn't support encoding\n", NULL);
1203
        /*
1204
         * Callers assume that the input buffer takes ownership of the
1205
         * encoding handler. xmlCharEncCloseFunc frees unregistered
1206
         * handlers and avoids a memory leak.
1207
         */
1208
0
        xmlCharEncCloseFunc(handler);
1209
0
  return (-1);
1210
0
    }
1211
1212
1.90k
    if (in->encoder != NULL) {
1213
1
        if (in->encoder == handler)
1214
0
            return (0);
1215
1216
        /*
1217
         * Switching encodings during parsing is a really bad idea,
1218
         * but Chromium can switch between ISO-8859-1 and UTF-16 before
1219
         * separate calls to xmlParseChunk.
1220
         *
1221
         * TODO: We should check whether the "raw" input buffer is empty and
1222
         * convert the old content using the old encoder.
1223
         */
1224
1225
1
        xmlCharEncCloseFunc(in->encoder);
1226
1
        in->encoder = handler;
1227
1
        return (0);
1228
1
    }
1229
1230
1.90k
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1231
1.90k
    in->encoder = handler;
1232
1233
    /*
1234
     * Is there already some content down the pipe to convert ?
1235
     */
1236
1.90k
    if (xmlBufIsEmpty(in->buffer) == 0) {
1237
1.90k
        size_t processed, use, consumed;
1238
1239
        /*
1240
         * FIXME: The BOM shouldn't be skipped here, but in the parsing code.
1241
         */
1242
1243
        /*
1244
         * Specific handling of the Byte Order Mark for
1245
         * UTF-16
1246
         */
1247
1.90k
        if ((handler->name != NULL) &&
1248
1.90k
            (!strcmp(handler->name, "UTF-16LE") ||
1249
1.90k
             !strcmp(handler->name, "UTF-16")) &&
1250
1.90k
            (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1251
129
            input->cur += 2;
1252
129
        }
1253
1.90k
        if ((handler->name != NULL) &&
1254
1.90k
            (!strcmp(handler->name, "UTF-16BE")) &&
1255
1.90k
            (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1256
160
            input->cur += 2;
1257
160
        }
1258
        /*
1259
         * Errata on XML-1.0 June 20 2001
1260
         * Specific handling of the Byte Order Mark for
1261
         * UTF-8
1262
         */
1263
1.90k
        if ((handler->name != NULL) &&
1264
1.90k
            (!strcmp(handler->name, "UTF-8")) &&
1265
1.90k
            (input->cur[0] == 0xEF) &&
1266
1.90k
            (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1267
0
            input->cur += 3;
1268
0
        }
1269
1270
        /*
1271
         * Shrink the current input buffer.
1272
         * Move it as the raw buffer and create a new input buffer
1273
         */
1274
1.90k
        processed = input->cur - input->base;
1275
1.90k
        xmlBufShrink(in->buffer, processed);
1276
1.90k
        input->consumed += processed;
1277
1.90k
        in->raw = in->buffer;
1278
1.90k
        in->buffer = xmlBufCreate();
1279
1.90k
        in->rawconsumed = processed;
1280
1.90k
        use = xmlBufUse(in->raw);
1281
1282
        /*
1283
         * TODO: We must flush and decode the whole buffer to make functions
1284
         * like xmlReadMemory work with a user-provided encoding. If the
1285
         * encoding is specified directly, we should probably set
1286
         * XML_PARSE_IGNORE_ENC in xmlDoRead to avoid switching encodings
1287
         * twice. Then we could set "flush" to false which should save
1288
         * a considerable amount of memory when parsing from memory.
1289
         * It's probably even possible to remove this whole if-block
1290
         * completely.
1291
         */
1292
1.90k
        nbchars = xmlCharEncInput(in, 1);
1293
1.90k
        xmlBufResetInput(in->buffer, input);
1294
1.90k
        if (nbchars < 0) {
1295
            /* TODO: This could be an out of memory or an encoding error. */
1296
13
            xmlErrInternal(ctxt,
1297
13
                           "switching encoding: encoder error\n",
1298
13
                           NULL);
1299
13
            xmlHaltParser(ctxt);
1300
13
            return (-1);
1301
13
        }
1302
1.89k
        consumed = use - xmlBufUse(in->raw);
1303
1.89k
        if ((consumed > ULONG_MAX) ||
1304
1.89k
            (in->rawconsumed > ULONG_MAX - (unsigned long)consumed))
1305
0
            in->rawconsumed = ULONG_MAX;
1306
1.89k
        else
1307
1.89k
      in->rawconsumed += consumed;
1308
1.89k
    }
1309
1.89k
    return (0);
1310
1.90k
}
1311
1312
/**
1313
 * xmlSwitchToEncoding:
1314
 * @ctxt:  the parser context
1315
 * @handler:  the encoding handler
1316
 *
1317
 * change the input functions when discovering the character encoding
1318
 * of a given entity.
1319
 *
1320
 * Returns 0 in case of success, -1 otherwise
1321
 */
1322
int
1323
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1324
1.21k
{
1325
1.21k
    if (ctxt == NULL)
1326
0
        return(-1);
1327
1.21k
    return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
1328
1.21k
}
1329
1330
/************************************************************************
1331
 *                  *
1332
 *  Commodity functions to handle entities processing   *
1333
 *                  *
1334
 ************************************************************************/
1335
1336
/**
1337
 * xmlFreeInputStream:
1338
 * @input:  an xmlParserInputPtr
1339
 *
1340
 * Free up an input stream.
1341
 */
1342
void
1343
12.4k
xmlFreeInputStream(xmlParserInputPtr input) {
1344
12.4k
    if (input == NULL) return;
1345
1346
12.4k
    if (input->filename != NULL) xmlFree((char *) input->filename);
1347
12.4k
    if (input->directory != NULL) xmlFree((char *) input->directory);
1348
12.4k
    if (input->encoding != NULL) xmlFree((char *) input->encoding);
1349
12.4k
    if (input->version != NULL) xmlFree((char *) input->version);
1350
12.4k
    if ((input->free != NULL) && (input->base != NULL))
1351
0
        input->free((xmlChar *) input->base);
1352
12.4k
    if (input->buf != NULL)
1353
9.31k
        xmlFreeParserInputBuffer(input->buf);
1354
12.4k
    xmlFree(input);
1355
12.4k
}
1356
1357
/**
1358
 * xmlNewInputStream:
1359
 * @ctxt:  an XML parser context
1360
 *
1361
 * Create a new input stream structure.
1362
 *
1363
 * Returns the new input stream or NULL
1364
 */
1365
xmlParserInputPtr
1366
12.4k
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1367
12.4k
    xmlParserInputPtr input;
1368
1369
12.4k
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1370
12.4k
    if (input == NULL) {
1371
0
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1372
0
  return(NULL);
1373
0
    }
1374
12.4k
    memset(input, 0, sizeof(xmlParserInput));
1375
12.4k
    input->line = 1;
1376
12.4k
    input->col = 1;
1377
12.4k
    input->standalone = -1;
1378
1379
    /*
1380
     * If the context is NULL the id cannot be initialized, but that
1381
     * should not happen while parsing which is the situation where
1382
     * the id is actually needed.
1383
     */
1384
12.4k
    if (ctxt != NULL) {
1385
12.4k
        if (input->id >= INT_MAX) {
1386
0
            xmlErrMemory(ctxt, "Input ID overflow\n");
1387
0
            return(NULL);
1388
0
        }
1389
12.4k
        input->id = ctxt->input_id++;
1390
12.4k
    }
1391
1392
12.4k
    return(input);
1393
12.4k
}
1394
1395
/**
1396
 * xmlNewIOInputStream:
1397
 * @ctxt:  an XML parser context
1398
 * @input:  an I/O Input
1399
 * @enc:  the charset encoding if known
1400
 *
1401
 * Create a new input stream structure encapsulating the @input into
1402
 * a stream suitable for the parser.
1403
 *
1404
 * Returns the new input stream or NULL
1405
 */
1406
xmlParserInputPtr
1407
xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1408
0
              xmlCharEncoding enc) {
1409
0
    xmlParserInputPtr inputStream;
1410
1411
0
    if (input == NULL) return(NULL);
1412
0
    if (xmlParserDebugEntities)
1413
0
  xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1414
0
    inputStream = xmlNewInputStream(ctxt);
1415
0
    if (inputStream == NULL) {
1416
0
  return(NULL);
1417
0
    }
1418
0
    inputStream->filename = NULL;
1419
0
    inputStream->buf = input;
1420
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1421
1422
0
    if (enc != XML_CHAR_ENCODING_NONE) {
1423
0
        xmlSwitchEncoding(ctxt, enc);
1424
0
    }
1425
1426
0
    return(inputStream);
1427
0
}
1428
1429
/**
1430
 * xmlNewEntityInputStream:
1431
 * @ctxt:  an XML parser context
1432
 * @entity:  an Entity pointer
1433
 *
1434
 * DEPRECATED: Internal function, do not use.
1435
 *
1436
 * Create a new input stream based on an xmlEntityPtr
1437
 *
1438
 * Returns the new input stream or NULL
1439
 */
1440
xmlParserInputPtr
1441
0
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1442
0
    xmlParserInputPtr input;
1443
1444
0
    if (entity == NULL) {
1445
0
        xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1446
0
                 NULL);
1447
0
  return(NULL);
1448
0
    }
1449
0
    if (xmlParserDebugEntities)
1450
0
  xmlGenericError(xmlGenericErrorContext,
1451
0
    "new input from entity: %s\n", entity->name);
1452
0
    if (entity->content == NULL) {
1453
0
  switch (entity->etype) {
1454
0
            case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1455
0
          xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1456
0
                   entity->name);
1457
0
                break;
1458
0
            case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1459
0
            case XML_EXTERNAL_PARAMETER_ENTITY:
1460
0
    input = xmlLoadExternalEntity((char *) entity->URI,
1461
0
           (char *) entity->ExternalID, ctxt);
1462
0
                if (input != NULL)
1463
0
                    input->entity = entity;
1464
0
                return(input);
1465
0
            case XML_INTERNAL_GENERAL_ENTITY:
1466
0
          xmlErrInternal(ctxt,
1467
0
          "Internal entity %s without content !\n",
1468
0
                   entity->name);
1469
0
                break;
1470
0
            case XML_INTERNAL_PARAMETER_ENTITY:
1471
0
          xmlErrInternal(ctxt,
1472
0
          "Internal parameter entity %s without content !\n",
1473
0
                   entity->name);
1474
0
                break;
1475
0
            case XML_INTERNAL_PREDEFINED_ENTITY:
1476
0
          xmlErrInternal(ctxt,
1477
0
          "Predefined entity %s without content !\n",
1478
0
                   entity->name);
1479
0
                break;
1480
0
  }
1481
0
  return(NULL);
1482
0
    }
1483
0
    input = xmlNewInputStream(ctxt);
1484
0
    if (input == NULL) {
1485
0
  return(NULL);
1486
0
    }
1487
0
    if (entity->URI != NULL)
1488
0
  input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1489
0
    input->base = entity->content;
1490
0
    if (entity->length == 0)
1491
0
        entity->length = xmlStrlen(entity->content);
1492
0
    input->cur = entity->content;
1493
0
    input->length = entity->length;
1494
0
    input->end = &entity->content[input->length];
1495
0
    input->entity = entity;
1496
0
    return(input);
1497
0
}
1498
1499
/**
1500
 * xmlNewStringInputStream:
1501
 * @ctxt:  an XML parser context
1502
 * @buffer:  an memory buffer
1503
 *
1504
 * Create a new input stream based on a memory buffer.
1505
 * Returns the new input stream
1506
 */
1507
xmlParserInputPtr
1508
0
xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1509
0
    xmlParserInputPtr input;
1510
0
    xmlParserInputBufferPtr buf;
1511
1512
0
    if (buffer == NULL) {
1513
0
        xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1514
0
                 NULL);
1515
0
  return(NULL);
1516
0
    }
1517
0
    if (xmlParserDebugEntities)
1518
0
  xmlGenericError(xmlGenericErrorContext,
1519
0
    "new fixed input: %.30s\n", buffer);
1520
0
    buf = xmlParserInputBufferCreateMem((const char *) buffer,
1521
0
                                        xmlStrlen(buffer),
1522
0
                                        XML_CHAR_ENCODING_NONE);
1523
0
    if (buf == NULL) {
1524
0
  xmlErrMemory(ctxt, NULL);
1525
0
        return(NULL);
1526
0
    }
1527
0
    input = xmlNewInputStream(ctxt);
1528
0
    if (input == NULL) {
1529
0
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1530
0
  xmlFreeParserInputBuffer(buf);
1531
0
  return(NULL);
1532
0
    }
1533
0
    input->buf = buf;
1534
0
    xmlBufResetInput(input->buf->buffer, input);
1535
0
    return(input);
1536
0
}
1537
1538
/**
1539
 * xmlNewInputFromFile:
1540
 * @ctxt:  an XML parser context
1541
 * @filename:  the filename to use as entity
1542
 *
1543
 * Create a new input stream based on a file or an URL.
1544
 *
1545
 * Returns the new input stream or NULL in case of error
1546
 */
1547
xmlParserInputPtr
1548
0
xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1549
0
    xmlParserInputBufferPtr buf;
1550
0
    xmlParserInputPtr inputStream;
1551
0
    char *directory = NULL;
1552
0
    xmlChar *URI = NULL;
1553
1554
0
    if (xmlParserDebugEntities)
1555
0
  xmlGenericError(xmlGenericErrorContext,
1556
0
    "new input from file: %s\n", filename);
1557
0
    if (ctxt == NULL) return(NULL);
1558
0
    buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1559
0
    if (buf == NULL) {
1560
0
  if (filename == NULL)
1561
0
      __xmlLoaderErr(ctxt,
1562
0
                     "failed to load external entity: NULL filename \n",
1563
0
         NULL);
1564
0
  else
1565
0
      __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1566
0
         (const char *) filename);
1567
0
  return(NULL);
1568
0
    }
1569
1570
0
    inputStream = xmlNewInputStream(ctxt);
1571
0
    if (inputStream == NULL) {
1572
0
  xmlFreeParserInputBuffer(buf);
1573
0
  return(NULL);
1574
0
    }
1575
1576
0
    inputStream->buf = buf;
1577
0
    inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1578
0
    if (inputStream == NULL)
1579
0
        return(NULL);
1580
1581
0
    if (inputStream->filename == NULL)
1582
0
  URI = xmlStrdup((xmlChar *) filename);
1583
0
    else
1584
0
  URI = xmlStrdup((xmlChar *) inputStream->filename);
1585
0
    directory = xmlParserGetDirectory((const char *) URI);
1586
0
    if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1587
0
    inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1588
0
    if (URI != NULL) xmlFree((char *) URI);
1589
0
    inputStream->directory = directory;
1590
1591
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1592
0
    if ((ctxt->directory == NULL) && (directory != NULL))
1593
0
        ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1594
0
    return(inputStream);
1595
0
}
1596
1597
/************************************************************************
1598
 *                  *
1599
 *    Commodity functions to handle parser contexts   *
1600
 *                  *
1601
 ************************************************************************/
1602
1603
/**
1604
 * xmlInitSAXParserCtxt:
1605
 * @ctxt:  XML parser context
1606
 * @sax:  SAX handlert
1607
 * @userData:  user data
1608
 *
1609
 * Initialize a SAX parser context
1610
 *
1611
 * Returns 0 in case of success and -1 in case of error
1612
 */
1613
1614
static int
1615
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
1616
                     void *userData)
1617
12.4k
{
1618
12.4k
    xmlParserInputPtr input;
1619
1620
12.4k
    if(ctxt==NULL) {
1621
0
        xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1622
0
        return(-1);
1623
0
    }
1624
1625
12.4k
    xmlInitParser();
1626
1627
12.4k
    if (ctxt->dict == NULL)
1628
12.4k
  ctxt->dict = xmlDictCreate();
1629
12.4k
    if (ctxt->dict == NULL) {
1630
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1631
0
  return(-1);
1632
0
    }
1633
12.4k
    xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1634
1635
12.4k
    if (ctxt->sax == NULL)
1636
12.4k
  ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1637
12.4k
    if (ctxt->sax == NULL) {
1638
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1639
0
  return(-1);
1640
0
    }
1641
12.4k
    if (sax == NULL) {
1642
0
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1643
0
        xmlSAXVersion(ctxt->sax, 2);
1644
0
        ctxt->userData = ctxt;
1645
12.4k
    } else {
1646
12.4k
  if (sax->initialized == XML_SAX2_MAGIC) {
1647
12.4k
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
1648
12.4k
        } else {
1649
0
      memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1650
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
1651
0
        }
1652
12.4k
        ctxt->userData = userData ? userData : ctxt;
1653
12.4k
    }
1654
1655
12.4k
    ctxt->maxatts = 0;
1656
12.4k
    ctxt->atts = NULL;
1657
    /* Allocate the Input stack */
1658
12.4k
    if (ctxt->inputTab == NULL) {
1659
12.4k
  ctxt->inputTab = (xmlParserInputPtr *)
1660
12.4k
        xmlMalloc(5 * sizeof(xmlParserInputPtr));
1661
12.4k
  ctxt->inputMax = 5;
1662
12.4k
    }
1663
12.4k
    if (ctxt->inputTab == NULL) {
1664
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1665
0
  ctxt->inputNr = 0;
1666
0
  ctxt->inputMax = 0;
1667
0
  ctxt->input = NULL;
1668
0
  return(-1);
1669
0
    }
1670
12.4k
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1671
0
        xmlFreeInputStream(input);
1672
0
    }
1673
12.4k
    ctxt->inputNr = 0;
1674
12.4k
    ctxt->input = NULL;
1675
1676
12.4k
    ctxt->version = NULL;
1677
12.4k
    ctxt->encoding = NULL;
1678
12.4k
    ctxt->standalone = -1;
1679
12.4k
    ctxt->hasExternalSubset = 0;
1680
12.4k
    ctxt->hasPErefs = 0;
1681
12.4k
    ctxt->html = 0;
1682
12.4k
    ctxt->external = 0;
1683
12.4k
    ctxt->instate = XML_PARSER_START;
1684
12.4k
    ctxt->token = 0;
1685
12.4k
    ctxt->directory = NULL;
1686
1687
    /* Allocate the Node stack */
1688
12.4k
    if (ctxt->nodeTab == NULL) {
1689
12.4k
  ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1690
12.4k
  ctxt->nodeMax = 10;
1691
12.4k
    }
1692
12.4k
    if (ctxt->nodeTab == NULL) {
1693
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1694
0
  ctxt->nodeNr = 0;
1695
0
  ctxt->nodeMax = 0;
1696
0
  ctxt->node = NULL;
1697
0
  ctxt->inputNr = 0;
1698
0
  ctxt->inputMax = 0;
1699
0
  ctxt->input = NULL;
1700
0
  return(-1);
1701
0
    }
1702
12.4k
    ctxt->nodeNr = 0;
1703
12.4k
    ctxt->node = NULL;
1704
1705
    /* Allocate the Name stack */
1706
12.4k
    if (ctxt->nameTab == NULL) {
1707
12.4k
  ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1708
12.4k
  ctxt->nameMax = 10;
1709
12.4k
    }
1710
12.4k
    if (ctxt->nameTab == NULL) {
1711
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1712
0
  ctxt->nodeNr = 0;
1713
0
  ctxt->nodeMax = 0;
1714
0
  ctxt->node = NULL;
1715
0
  ctxt->inputNr = 0;
1716
0
  ctxt->inputMax = 0;
1717
0
  ctxt->input = NULL;
1718
0
  ctxt->nameNr = 0;
1719
0
  ctxt->nameMax = 0;
1720
0
  ctxt->name = NULL;
1721
0
  return(-1);
1722
0
    }
1723
12.4k
    ctxt->nameNr = 0;
1724
12.4k
    ctxt->name = NULL;
1725
1726
    /* Allocate the space stack */
1727
12.4k
    if (ctxt->spaceTab == NULL) {
1728
12.4k
  ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1729
12.4k
  ctxt->spaceMax = 10;
1730
12.4k
    }
1731
12.4k
    if (ctxt->spaceTab == NULL) {
1732
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1733
0
  ctxt->nodeNr = 0;
1734
0
  ctxt->nodeMax = 0;
1735
0
  ctxt->node = NULL;
1736
0
  ctxt->inputNr = 0;
1737
0
  ctxt->inputMax = 0;
1738
0
  ctxt->input = NULL;
1739
0
  ctxt->nameNr = 0;
1740
0
  ctxt->nameMax = 0;
1741
0
  ctxt->name = NULL;
1742
0
  ctxt->spaceNr = 0;
1743
0
  ctxt->spaceMax = 0;
1744
0
  ctxt->space = NULL;
1745
0
  return(-1);
1746
0
    }
1747
12.4k
    ctxt->spaceNr = 1;
1748
12.4k
    ctxt->spaceMax = 10;
1749
12.4k
    ctxt->spaceTab[0] = -1;
1750
12.4k
    ctxt->space = &ctxt->spaceTab[0];
1751
12.4k
    ctxt->myDoc = NULL;
1752
12.4k
    ctxt->wellFormed = 1;
1753
12.4k
    ctxt->nsWellFormed = 1;
1754
12.4k
    ctxt->valid = 1;
1755
12.4k
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1756
12.4k
    if (ctxt->loadsubset) {
1757
0
        ctxt->options |= XML_PARSE_DTDLOAD;
1758
0
    }
1759
12.4k
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
1760
12.4k
    ctxt->pedantic = xmlPedanticParserDefaultValue;
1761
12.4k
    if (ctxt->pedantic) {
1762
0
        ctxt->options |= XML_PARSE_PEDANTIC;
1763
0
    }
1764
12.4k
    ctxt->linenumbers = xmlLineNumbersDefaultValue;
1765
12.4k
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1766
12.4k
    if (ctxt->keepBlanks == 0) {
1767
0
  ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1768
0
  ctxt->options |= XML_PARSE_NOBLANKS;
1769
0
    }
1770
1771
12.4k
    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
1772
12.4k
    ctxt->vctxt.userData = ctxt;
1773
12.4k
    ctxt->vctxt.error = xmlParserValidityError;
1774
12.4k
    ctxt->vctxt.warning = xmlParserValidityWarning;
1775
12.4k
    if (ctxt->validate) {
1776
0
  if (xmlGetWarningsDefaultValue == 0)
1777
0
      ctxt->vctxt.warning = NULL;
1778
0
  else
1779
0
      ctxt->vctxt.warning = xmlParserValidityWarning;
1780
0
  ctxt->vctxt.nodeMax = 0;
1781
0
        ctxt->options |= XML_PARSE_DTDVALID;
1782
0
    }
1783
12.4k
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1784
12.4k
    if (ctxt->replaceEntities) {
1785
0
        ctxt->options |= XML_PARSE_NOENT;
1786
0
    }
1787
12.4k
    ctxt->record_info = 0;
1788
12.4k
    ctxt->checkIndex = 0;
1789
12.4k
    ctxt->inSubset = 0;
1790
12.4k
    ctxt->errNo = XML_ERR_OK;
1791
12.4k
    ctxt->depth = 0;
1792
12.4k
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1793
12.4k
    ctxt->catalogs = NULL;
1794
12.4k
    ctxt->sizeentities = 0;
1795
12.4k
    ctxt->sizeentcopy = 0;
1796
12.4k
    ctxt->input_id = 1;
1797
12.4k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
1798
12.4k
    return(0);
1799
12.4k
}
1800
1801
/**
1802
 * xmlInitParserCtxt:
1803
 * @ctxt:  an XML parser context
1804
 *
1805
 * DEPRECATED: Internal function which will be made private in a future
1806
 * version.
1807
 *
1808
 * Initialize a parser context
1809
 *
1810
 * Returns 0 in case of success and -1 in case of error
1811
 */
1812
1813
int
1814
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1815
0
{
1816
0
    return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
1817
0
}
1818
1819
/**
1820
 * xmlFreeParserCtxt:
1821
 * @ctxt:  an XML parser context
1822
 *
1823
 * Free all the memory used by a parser context. However the parsed
1824
 * document in ctxt->myDoc is not freed.
1825
 */
1826
1827
void
1828
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1829
12.4k
{
1830
12.4k
    xmlParserInputPtr input;
1831
1832
12.4k
    if (ctxt == NULL) return;
1833
1834
24.9k
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1835
12.4k
        xmlFreeInputStream(input);
1836
12.4k
    }
1837
12.4k
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1838
12.4k
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1839
12.4k
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1840
12.4k
    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
1841
12.4k
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1842
12.4k
    if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1843
12.4k
    if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1844
12.4k
    if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1845
12.4k
    if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1846
12.4k
#ifdef LIBXML_SAX1_ENABLED
1847
12.4k
    if ((ctxt->sax != NULL) &&
1848
12.4k
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1849
#else
1850
    if (ctxt->sax != NULL)
1851
#endif /* LIBXML_SAX1_ENABLED */
1852
12.4k
        xmlFree(ctxt->sax);
1853
12.4k
    if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1854
12.4k
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1855
12.4k
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1856
12.4k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1857
12.4k
    if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1858
12.4k
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1859
12.4k
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1860
12.4k
    if (ctxt->attsDefault != NULL)
1861
814
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
1862
12.4k
    if (ctxt->attsSpecial != NULL)
1863
907
        xmlHashFree(ctxt->attsSpecial, NULL);
1864
12.4k
    if (ctxt->freeElems != NULL) {
1865
0
        xmlNodePtr cur, next;
1866
1867
0
  cur = ctxt->freeElems;
1868
0
  while (cur != NULL) {
1869
0
      next = cur->next;
1870
0
      xmlFree(cur);
1871
0
      cur = next;
1872
0
  }
1873
0
    }
1874
12.4k
    if (ctxt->freeAttrs != NULL) {
1875
0
        xmlAttrPtr cur, next;
1876
1877
0
  cur = ctxt->freeAttrs;
1878
0
  while (cur != NULL) {
1879
0
      next = cur->next;
1880
0
      xmlFree(cur);
1881
0
      cur = next;
1882
0
  }
1883
0
    }
1884
    /*
1885
     * cleanup the error strings
1886
     */
1887
12.4k
    if (ctxt->lastError.message != NULL)
1888
12.0k
        xmlFree(ctxt->lastError.message);
1889
12.4k
    if (ctxt->lastError.file != NULL)
1890
0
        xmlFree(ctxt->lastError.file);
1891
12.4k
    if (ctxt->lastError.str1 != NULL)
1892
8.80k
        xmlFree(ctxt->lastError.str1);
1893
12.4k
    if (ctxt->lastError.str2 != NULL)
1894
232
        xmlFree(ctxt->lastError.str2);
1895
12.4k
    if (ctxt->lastError.str3 != NULL)
1896
6
        xmlFree(ctxt->lastError.str3);
1897
1898
12.4k
#ifdef LIBXML_CATALOG_ENABLED
1899
12.4k
    if (ctxt->catalogs != NULL)
1900
28
  xmlCatalogFreeLocal(ctxt->catalogs);
1901
12.4k
#endif
1902
12.4k
    xmlFree(ctxt);
1903
12.4k
}
1904
1905
/**
1906
 * xmlNewParserCtxt:
1907
 *
1908
 * Allocate and initialize a new parser context.
1909
 *
1910
 * Returns the xmlParserCtxtPtr or NULL
1911
 */
1912
1913
xmlParserCtxtPtr
1914
xmlNewParserCtxt(void)
1915
0
{
1916
0
    return(xmlNewSAXParserCtxt(NULL, NULL));
1917
0
}
1918
1919
/**
1920
 * xmlNewSAXParserCtxt:
1921
 * @sax:  SAX handler
1922
 * @userData:  user data
1923
 *
1924
 * Allocate and initialize a new SAX parser context. If userData is NULL,
1925
 * the parser context will be passed as user data.
1926
 *
1927
 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
1928
 */
1929
1930
xmlParserCtxtPtr
1931
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
1932
12.4k
{
1933
12.4k
    xmlParserCtxtPtr ctxt;
1934
1935
12.4k
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1936
12.4k
    if (ctxt == NULL) {
1937
0
  xmlErrMemory(NULL, "cannot allocate parser context\n");
1938
0
  return(NULL);
1939
0
    }
1940
12.4k
    memset(ctxt, 0, sizeof(xmlParserCtxt));
1941
12.4k
    if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
1942
0
        xmlFreeParserCtxt(ctxt);
1943
0
  return(NULL);
1944
0
    }
1945
12.4k
    return(ctxt);
1946
12.4k
}
1947
1948
/************************************************************************
1949
 *                  *
1950
 *    Handling of node information        *
1951
 *                  *
1952
 ************************************************************************/
1953
1954
/**
1955
 * xmlClearParserCtxt:
1956
 * @ctxt:  an XML parser context
1957
 *
1958
 * Clear (release owned resources) and reinitialize a parser context
1959
 */
1960
1961
void
1962
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1963
0
{
1964
0
  if (ctxt==NULL)
1965
0
    return;
1966
0
  xmlClearNodeInfoSeq(&ctxt->node_seq);
1967
0
  xmlCtxtReset(ctxt);
1968
0
}
1969
1970
1971
/**
1972
 * xmlParserFindNodeInfo:
1973
 * @ctx:  an XML parser context
1974
 * @node:  an XML node within the tree
1975
 *
1976
 * DEPRECATED: Don't use.
1977
 *
1978
 * Find the parser node info struct for a given node
1979
 *
1980
 * Returns an xmlParserNodeInfo block pointer or NULL
1981
 */
1982
const xmlParserNodeInfo *
1983
xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1984
0
{
1985
0
    unsigned long pos;
1986
1987
0
    if ((ctx == NULL) || (node == NULL))
1988
0
        return (NULL);
1989
    /* Find position where node should be at */
1990
0
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1991
0
    if (pos < ctx->node_seq.length
1992
0
        && ctx->node_seq.buffer[pos].node == node)
1993
0
        return &ctx->node_seq.buffer[pos];
1994
0
    else
1995
0
        return NULL;
1996
0
}
1997
1998
1999
/**
2000
 * xmlInitNodeInfoSeq:
2001
 * @seq:  a node info sequence pointer
2002
 *
2003
 * DEPRECATED: Don't use.
2004
 *
2005
 * -- Initialize (set to initial state) node info sequence
2006
 */
2007
void
2008
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2009
12.4k
{
2010
12.4k
    if (seq == NULL)
2011
0
        return;
2012
12.4k
    seq->length = 0;
2013
12.4k
    seq->maximum = 0;
2014
12.4k
    seq->buffer = NULL;
2015
12.4k
}
2016
2017
/**
2018
 * xmlClearNodeInfoSeq:
2019
 * @seq:  a node info sequence pointer
2020
 *
2021
 * DEPRECATED: Don't use.
2022
 *
2023
 * -- Clear (release memory and reinitialize) node
2024
 *   info sequence
2025
 */
2026
void
2027
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2028
0
{
2029
0
    if (seq == NULL)
2030
0
        return;
2031
0
    if (seq->buffer != NULL)
2032
0
        xmlFree(seq->buffer);
2033
0
    xmlInitNodeInfoSeq(seq);
2034
0
}
2035
2036
/**
2037
 * xmlParserFindNodeInfoIndex:
2038
 * @seq:  a node info sequence pointer
2039
 * @node:  an XML node pointer
2040
 *
2041
 * DEPRECATED: Don't use.
2042
 *
2043
 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2044
 *   the given node is or should be at in a sorted sequence
2045
 *
2046
 * Returns a long indicating the position of the record
2047
 */
2048
unsigned long
2049
xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2050
                           const xmlNodePtr node)
2051
0
{
2052
0
    unsigned long upper, lower, middle;
2053
0
    int found = 0;
2054
2055
0
    if ((seq == NULL) || (node == NULL))
2056
0
        return ((unsigned long) -1);
2057
2058
    /* Do a binary search for the key */
2059
0
    lower = 1;
2060
0
    upper = seq->length;
2061
0
    middle = 0;
2062
0
    while (lower <= upper && !found) {
2063
0
        middle = lower + (upper - lower) / 2;
2064
0
        if (node == seq->buffer[middle - 1].node)
2065
0
            found = 1;
2066
0
        else if (node < seq->buffer[middle - 1].node)
2067
0
            upper = middle - 1;
2068
0
        else
2069
0
            lower = middle + 1;
2070
0
    }
2071
2072
    /* Return position */
2073
0
    if (middle == 0 || seq->buffer[middle - 1].node < node)
2074
0
        return middle;
2075
0
    else
2076
0
        return middle - 1;
2077
0
}
2078
2079
2080
/**
2081
 * xmlParserAddNodeInfo:
2082
 * @ctxt:  an XML parser context
2083
 * @info:  a node info sequence pointer
2084
 *
2085
 * DEPRECATED: Don't use.
2086
 *
2087
 * Insert node info record into the sorted sequence
2088
 */
2089
void
2090
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2091
                     const xmlParserNodeInfoPtr info)
2092
0
{
2093
0
    unsigned long pos;
2094
2095
0
    if ((ctxt == NULL) || (info == NULL)) return;
2096
2097
    /* Find pos and check to see if node is already in the sequence */
2098
0
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2099
0
                                     info->node);
2100
2101
0
    if ((pos < ctxt->node_seq.length) &&
2102
0
        (ctxt->node_seq.buffer != NULL) &&
2103
0
        (ctxt->node_seq.buffer[pos].node == info->node)) {
2104
0
        ctxt->node_seq.buffer[pos] = *info;
2105
0
    }
2106
2107
    /* Otherwise, we need to add new node to buffer */
2108
0
    else {
2109
0
        if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2110
0
      (ctxt->node_seq.buffer == NULL)) {
2111
0
            xmlParserNodeInfo *tmp_buffer;
2112
0
            unsigned int byte_size;
2113
2114
0
            if (ctxt->node_seq.maximum == 0)
2115
0
                ctxt->node_seq.maximum = 2;
2116
0
            byte_size = (sizeof(*ctxt->node_seq.buffer) *
2117
0
      (2 * ctxt->node_seq.maximum));
2118
2119
0
            if (ctxt->node_seq.buffer == NULL)
2120
0
                tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2121
0
            else
2122
0
                tmp_buffer =
2123
0
                    (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2124
0
                                                     byte_size);
2125
2126
0
            if (tmp_buffer == NULL) {
2127
0
    xmlErrMemory(ctxt, "failed to allocate buffer\n");
2128
0
                return;
2129
0
            }
2130
0
            ctxt->node_seq.buffer = tmp_buffer;
2131
0
            ctxt->node_seq.maximum *= 2;
2132
0
        }
2133
2134
        /* If position is not at end, move elements out of the way */
2135
0
        if (pos != ctxt->node_seq.length) {
2136
0
            unsigned long i;
2137
2138
0
            for (i = ctxt->node_seq.length; i > pos; i--)
2139
0
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2140
0
        }
2141
2142
        /* Copy element and increase length */
2143
0
        ctxt->node_seq.buffer[pos] = *info;
2144
0
        ctxt->node_seq.length++;
2145
0
    }
2146
0
}
2147
2148
/************************************************************************
2149
 *                  *
2150
 *    Defaults settings         *
2151
 *                  *
2152
 ************************************************************************/
2153
/**
2154
 * xmlPedanticParserDefault:
2155
 * @val:  int 0 or 1
2156
 *
2157
 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
2158
 *
2159
 * Set and return the previous value for enabling pedantic warnings.
2160
 *
2161
 * Returns the last value for 0 for no substitution, 1 for substitution.
2162
 */
2163
2164
int
2165
0
xmlPedanticParserDefault(int val) {
2166
0
    int old = xmlPedanticParserDefaultValue;
2167
2168
0
    xmlPedanticParserDefaultValue = val;
2169
0
    return(old);
2170
0
}
2171
2172
/**
2173
 * xmlLineNumbersDefault:
2174
 * @val:  int 0 or 1
2175
 *
2176
 * DEPRECATED: The modern options API always enables line numbers.
2177
 *
2178
 * Set and return the previous value for enabling line numbers in elements
2179
 * contents. This may break on old application and is turned off by default.
2180
 *
2181
 * Returns the last value for 0 for no substitution, 1 for substitution.
2182
 */
2183
2184
int
2185
0
xmlLineNumbersDefault(int val) {
2186
0
    int old = xmlLineNumbersDefaultValue;
2187
2188
0
    xmlLineNumbersDefaultValue = val;
2189
0
    return(old);
2190
0
}
2191
2192
/**
2193
 * xmlSubstituteEntitiesDefault:
2194
 * @val:  int 0 or 1
2195
 *
2196
 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
2197
 *
2198
 * Set and return the previous value for default entity support.
2199
 * Initially the parser always keep entity references instead of substituting
2200
 * entity values in the output. This function has to be used to change the
2201
 * default parser behavior
2202
 * SAX::substituteEntities() has to be used for changing that on a file by
2203
 * file basis.
2204
 *
2205
 * Returns the last value for 0 for no substitution, 1 for substitution.
2206
 */
2207
2208
int
2209
0
xmlSubstituteEntitiesDefault(int val) {
2210
0
    int old = xmlSubstituteEntitiesDefaultValue;
2211
2212
0
    xmlSubstituteEntitiesDefaultValue = val;
2213
0
    return(old);
2214
0
}
2215
2216
/**
2217
 * xmlKeepBlanksDefault:
2218
 * @val:  int 0 or 1
2219
 *
2220
 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
2221
 *
2222
 * Set and return the previous value for default blanks text nodes support.
2223
 * The 1.x version of the parser used an heuristic to try to detect
2224
 * ignorable white spaces. As a result the SAX callback was generating
2225
 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2226
 * using the DOM output text nodes containing those blanks were not generated.
2227
 * The 2.x and later version will switch to the XML standard way and
2228
 * ignorableWhitespace() are only generated when running the parser in
2229
 * validating mode and when the current element doesn't allow CDATA or
2230
 * mixed content.
2231
 * This function is provided as a way to force the standard behavior
2232
 * on 1.X libs and to switch back to the old mode for compatibility when
2233
 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2234
 * by using xmlIsBlankNode() commodity function to detect the "empty"
2235
 * nodes generated.
2236
 * This value also affect autogeneration of indentation when saving code
2237
 * if blanks sections are kept, indentation is not generated.
2238
 *
2239
 * Returns the last value for 0 for no substitution, 1 for substitution.
2240
 */
2241
2242
int
2243
0
xmlKeepBlanksDefault(int val) {
2244
0
    int old = xmlKeepBlanksDefaultValue;
2245
2246
0
    xmlKeepBlanksDefaultValue = val;
2247
0
    if (!val) xmlIndentTreeOutput = 1;
2248
0
    return(old);
2249
0
}
2250