Coverage Report

Created: 2023-09-25 06:03

/src/libxml2-2.11.5/parserInternals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * daniel@veillard.com
8
 */
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22
23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/valid.h>
28
#include <libxml/entities.h>
29
#include <libxml/xmlerror.h>
30
#include <libxml/encoding.h>
31
#include <libxml/valid.h>
32
#include <libxml/xmlIO.h>
33
#include <libxml/uri.h>
34
#include <libxml/dict.h>
35
#include <libxml/SAX.h>
36
#ifdef LIBXML_CATALOG_ENABLED
37
#include <libxml/catalog.h>
38
#endif
39
#include <libxml/globals.h>
40
#include <libxml/chvalid.h>
41
42
3.07M
#define CUR(ctxt) ctxt->input->cur
43
3.07M
#define END(ctxt) ctxt->input->end
44
3.07M
#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
45
46
#include "private/buf.h"
47
#include "private/enc.h"
48
#include "private/error.h"
49
#include "private/io.h"
50
#include "private/parser.h"
51
52
/*
53
 * Various global defaults for parsing
54
 */
55
56
/**
57
 * xmlCheckVersion:
58
 * @version: the include version number
59
 *
60
 * check the compiled lib version against the include one.
61
 * This can warn or immediately kill the application
62
 */
63
void
64
0
xmlCheckVersion(int version) {
65
0
    int myversion = LIBXML_VERSION;
66
67
0
    xmlInitParser();
68
69
0
    if ((myversion / 10000) != (version / 10000)) {
70
0
  xmlGenericError(xmlGenericErrorContext,
71
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
72
0
    (version / 10000), (myversion / 10000));
73
0
  fprintf(stderr,
74
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
75
0
    (version / 10000), (myversion / 10000));
76
0
    }
77
0
    if ((myversion / 100) < (version / 100)) {
78
0
  xmlGenericError(xmlGenericErrorContext,
79
0
    "Warning: program compiled against libxml %d using older %d\n",
80
0
    (version / 100), (myversion / 100));
81
0
    }
82
0
}
83
84
85
/************************************************************************
86
 *                  *
87
 *    Some factorized error routines        *
88
 *                  *
89
 ************************************************************************/
90
91
92
/**
93
 * xmlErrMemory:
94
 * @ctxt:  an XML parser context
95
 * @extra:  extra information
96
 *
97
 * Handle a redefinition of attribute error
98
 */
99
void
100
xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
101
0
{
102
0
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
103
0
        (ctxt->instate == XML_PARSER_EOF))
104
0
  return;
105
0
    if (ctxt != NULL) {
106
0
        ctxt->errNo = XML_ERR_NO_MEMORY;
107
0
        ctxt->instate = XML_PARSER_EOF;
108
0
        ctxt->disableSAX = 1;
109
0
    }
110
0
    if (extra)
111
0
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
112
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
113
0
                        NULL, NULL, 0, 0,
114
0
                        "Memory allocation failed : %s\n", extra);
115
0
    else
116
0
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
117
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
118
0
                        NULL, NULL, 0, 0, "Memory allocation failed\n");
119
0
}
120
121
/**
122
 * __xmlErrEncoding:
123
 * @ctxt:  an XML parser context
124
 * @xmlerr:  the error number
125
 * @msg:  the error message
126
 * @str1:  an string info
127
 * @str2:  an string info
128
 *
129
 * Handle an encoding error
130
 */
131
void
132
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
133
                 const char *msg, const xmlChar * str1, const xmlChar * str2)
134
3.10k
{
135
3.10k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
136
3.10k
        (ctxt->instate == XML_PARSER_EOF))
137
0
  return;
138
3.10k
    if (ctxt != NULL)
139
3.10k
        ctxt->errNo = xmlerr;
140
3.10k
    __xmlRaiseError(NULL, NULL, NULL,
141
3.10k
                    ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
142
3.10k
                    NULL, 0, (const char *) str1, (const char *) str2,
143
3.10k
                    NULL, 0, 0, msg, str1, str2);
144
3.10k
    if (ctxt != NULL) {
145
3.10k
        ctxt->wellFormed = 0;
146
3.10k
        if (ctxt->recovery == 0)
147
3.10k
            ctxt->disableSAX = 1;
148
3.10k
    }
149
3.10k
}
150
151
/**
152
 * xmlErrInternal:
153
 * @ctxt:  an XML parser context
154
 * @msg:  the error message
155
 * @str:  error information
156
 *
157
 * Handle an internal error
158
 */
159
static void LIBXML_ATTR_FORMAT(2,0)
160
xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
161
8
{
162
8
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
163
8
        (ctxt->instate == XML_PARSER_EOF))
164
0
  return;
165
8
    if (ctxt != NULL)
166
8
        ctxt->errNo = XML_ERR_INTERNAL_ERROR;
167
8
    __xmlRaiseError(NULL, NULL, NULL,
168
8
                    ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
169
8
                    XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
170
8
                    0, 0, msg, str);
171
8
    if (ctxt != NULL) {
172
8
        ctxt->wellFormed = 0;
173
8
        if (ctxt->recovery == 0)
174
8
            ctxt->disableSAX = 1;
175
8
    }
176
8
}
177
178
/**
179
 * xmlErrEncodingInt:
180
 * @ctxt:  an XML parser context
181
 * @error:  the error number
182
 * @msg:  the error message
183
 * @val:  an integer value
184
 *
185
 * n encoding error
186
 */
187
static void LIBXML_ATTR_FORMAT(3,0)
188
xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
189
                  const char *msg, int val)
190
3.21k
{
191
3.21k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
192
3.21k
        (ctxt->instate == XML_PARSER_EOF))
193
0
  return;
194
3.21k
    if (ctxt != NULL)
195
3.21k
        ctxt->errNo = error;
196
3.21k
    __xmlRaiseError(NULL, NULL, NULL,
197
3.21k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
198
3.21k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
199
3.21k
    if (ctxt != NULL) {
200
3.21k
        ctxt->wellFormed = 0;
201
3.21k
        if (ctxt->recovery == 0)
202
3.21k
            ctxt->disableSAX = 1;
203
3.21k
    }
204
3.21k
}
205
206
/**
207
 * xmlIsLetter:
208
 * @c:  an unicode character (int)
209
 *
210
 * Check whether the character is allowed by the production
211
 * [84] Letter ::= BaseChar | Ideographic
212
 *
213
 * Returns 0 if not, non-zero otherwise
214
 */
215
int
216
0
xmlIsLetter(int c) {
217
0
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
218
0
}
219
220
/************************************************************************
221
 *                  *
222
 *    Input handling functions for progressive parsing  *
223
 *                  *
224
 ************************************************************************/
225
226
/* #define DEBUG_INPUT */
227
/* #define DEBUG_STACK */
228
/* #define DEBUG_PUSH */
229
230
231
/* we need to keep enough input to show errors in context */
232
2.77k
#define LINE_LEN        80
233
234
#ifdef DEBUG_INPUT
235
#define CHECK_BUFFER(in) check_buffer(in)
236
237
static
238
void check_buffer(xmlParserInputPtr in) {
239
    if (in->base != xmlBufContent(in->buf->buffer)) {
240
        xmlGenericError(xmlGenericErrorContext,
241
    "xmlParserInput: base mismatch problem\n");
242
    }
243
    if (in->cur < in->base) {
244
        xmlGenericError(xmlGenericErrorContext,
245
    "xmlParserInput: cur < base problem\n");
246
    }
247
    if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
248
        xmlGenericError(xmlGenericErrorContext,
249
    "xmlParserInput: cur > base + use problem\n");
250
    }
251
    xmlGenericError(xmlGenericErrorContext,"buffer %p : content %x, cur %d, use %d\n",
252
            (void *) in, (int) xmlBufContent(in->buf->buffer),
253
            in->cur - in->base, xmlBufUse(in->buf->buffer));
254
}
255
256
#else
257
#define CHECK_BUFFER(in)
258
#endif
259
260
261
/**
262
 * xmlHaltParser:
263
 * @ctxt:  an XML parser context
264
 *
265
 * Blocks further parser processing don't override error
266
 * for internal use
267
 */
268
void
269
2.58k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
270
2.58k
    if (ctxt == NULL)
271
0
        return;
272
2.58k
    ctxt->instate = XML_PARSER_EOF;
273
2.58k
    ctxt->disableSAX = 1;
274
2.58k
    while (ctxt->inputNr > 1)
275
0
        xmlFreeInputStream(inputPop(ctxt));
276
2.58k
    if (ctxt->input != NULL) {
277
        /*
278
   * in case there was a specific allocation deallocate before
279
   * overriding base
280
   */
281
2.58k
        if (ctxt->input->free != NULL) {
282
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
283
0
      ctxt->input->free = NULL;
284
0
  }
285
2.58k
        if (ctxt->input->buf != NULL) {
286
2.57k
            xmlFreeParserInputBuffer(ctxt->input->buf);
287
2.57k
            ctxt->input->buf = NULL;
288
2.57k
        }
289
2.58k
  ctxt->input->cur = BAD_CAST"";
290
2.58k
        ctxt->input->length = 0;
291
2.58k
  ctxt->input->base = ctxt->input->cur;
292
2.58k
        ctxt->input->end = ctxt->input->cur;
293
2.58k
    }
294
2.58k
}
295
296
/**
297
 * xmlParserInputRead:
298
 * @in:  an XML parser input
299
 * @len:  an indicative size for the lookahead
300
 *
301
 * DEPRECATED: This function was internal and is deprecated.
302
 *
303
 * Returns -1 as this is an error to use it.
304
 */
305
int
306
0
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
307
0
    return(-1);
308
0
}
309
310
/**
311
 * xmlParserGrow:
312
 * @ctxt:  an XML parser context
313
 */
314
int
315
537k
xmlParserGrow(xmlParserCtxtPtr ctxt) {
316
537k
    xmlParserInputPtr in = ctxt->input;
317
537k
    xmlParserInputBufferPtr buf = in->buf;
318
537k
    ptrdiff_t curEnd = in->end - in->cur;
319
537k
    ptrdiff_t curBase = in->cur - in->base;
320
537k
    int ret;
321
322
537k
    if (buf == NULL)
323
0
        return(0);
324
    /* Don't grow push parser buffer. */
325
537k
    if (ctxt->progressive)
326
537k
        return(0);
327
    /* Don't grow memory buffers. */
328
0
    if ((buf->encoder == NULL) && (buf->readcallback == NULL))
329
0
        return(0);
330
331
0
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
332
0
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
333
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
334
0
        xmlErrInternal(ctxt, "Huge input lookup", NULL);
335
0
        xmlHaltParser(ctxt);
336
0
  return(-1);
337
0
    }
338
339
0
    if (curEnd >= INPUT_CHUNK)
340
0
        return(0);
341
342
0
    ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
343
0
    xmlBufSetInputBaseCur(buf->buffer, in, 0, curBase);
344
345
    /* TODO: Get error code from xmlParserInputBufferGrow */
346
0
    if (ret < 0) {
347
0
        xmlErrInternal(ctxt, "Growing input buffer", NULL);
348
0
        xmlHaltParser(ctxt);
349
0
    }
350
351
0
    return(ret);
352
0
}
353
354
/**
355
 * xmlParserInputGrow:
356
 * @in:  an XML parser input
357
 * @len:  an indicative size for the lookahead
358
 *
359
 * DEPRECATED: Don't use.
360
 *
361
 * This function increase the input for the parser. It tries to
362
 * preserve pointers to the input buffer, and keep already read data
363
 *
364
 * Returns the amount of char read, or -1 in case of error, 0 indicate the
365
 * end of this entity
366
 */
367
int
368
0
xmlParserInputGrow(xmlParserInputPtr in, int len) {
369
0
    int ret;
370
0
    size_t indx;
371
372
0
    if ((in == NULL) || (len < 0)) return(-1);
373
#ifdef DEBUG_INPUT
374
    xmlGenericError(xmlGenericErrorContext, "Grow\n");
375
#endif
376
0
    if (in->buf == NULL) return(-1);
377
0
    if (in->base == NULL) return(-1);
378
0
    if (in->cur == NULL) return(-1);
379
0
    if (in->buf->buffer == NULL) return(-1);
380
381
    /* Don't grow memory buffers. */
382
0
    if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
383
0
        return(0);
384
385
0
    CHECK_BUFFER(in);
386
387
0
    indx = in->cur - in->base;
388
0
    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
389
390
0
  CHECK_BUFFER(in);
391
392
0
        return(0);
393
0
    }
394
0
    ret = xmlParserInputBufferGrow(in->buf, len);
395
396
0
    in->base = xmlBufContent(in->buf->buffer);
397
0
    if (in->base == NULL) {
398
0
        in->base = BAD_CAST "";
399
0
        in->cur = in->base;
400
0
        in->end = in->base;
401
0
        return(-1);
402
0
    }
403
0
    in->cur = in->base + indx;
404
0
    in->end = xmlBufEnd(in->buf->buffer);
405
406
0
    CHECK_BUFFER(in);
407
408
0
    return(ret);
409
0
}
410
411
/**
412
 * xmlParserShrink:
413
 * @ctxt:  an XML parser context
414
 */
415
void
416
2.77k
xmlParserShrink(xmlParserCtxtPtr ctxt) {
417
2.77k
    xmlParserInputPtr in = ctxt->input;
418
2.77k
    xmlParserInputBufferPtr buf = in->buf;
419
2.77k
    size_t used;
420
421
    /* Don't shrink pull parser memory buffers. */
422
2.77k
    if ((buf == NULL) ||
423
2.77k
        ((ctxt->progressive == 0) &&
424
2.77k
         (buf->encoder == NULL) && (buf->readcallback == NULL)))
425
0
        return;
426
427
2.77k
    used = in->cur - in->base;
428
    /*
429
     * Do not shrink on large buffers whose only a tiny fraction
430
     * was consumed
431
     */
432
2.77k
    if (used > INPUT_CHUNK) {
433
2.77k
  size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
434
435
2.77k
  if (res > 0) {
436
2.77k
            used -= res;
437
2.77k
            if ((res > ULONG_MAX) ||
438
2.77k
                (in->consumed > ULONG_MAX - (unsigned long)res))
439
0
                in->consumed = ULONG_MAX;
440
2.77k
            else
441
2.77k
                in->consumed += res;
442
2.77k
  }
443
2.77k
    }
444
445
2.77k
    xmlBufSetInputBaseCur(buf->buffer, in, 0, used);
446
2.77k
}
447
448
/**
449
 * xmlParserInputShrink:
450
 * @in:  an XML parser input
451
 *
452
 * DEPRECATED: Don't use.
453
 *
454
 * This function removes used input for the parser.
455
 */
456
void
457
0
xmlParserInputShrink(xmlParserInputPtr in) {
458
0
    size_t used;
459
0
    size_t ret;
460
461
#ifdef DEBUG_INPUT
462
    xmlGenericError(xmlGenericErrorContext, "Shrink\n");
463
#endif
464
0
    if (in == NULL) return;
465
0
    if (in->buf == NULL) return;
466
0
    if (in->base == NULL) return;
467
0
    if (in->cur == NULL) return;
468
0
    if (in->buf->buffer == NULL) return;
469
470
0
    CHECK_BUFFER(in);
471
472
0
    used = in->cur - in->base;
473
    /*
474
     * Do not shrink on large buffers whose only a tiny fraction
475
     * was consumed
476
     */
477
0
    if (used > INPUT_CHUNK) {
478
0
  ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
479
0
  if (ret > 0) {
480
0
            used -= ret;
481
0
            if ((ret > ULONG_MAX) ||
482
0
                (in->consumed > ULONG_MAX - (unsigned long)ret))
483
0
                in->consumed = ULONG_MAX;
484
0
            else
485
0
                in->consumed += ret;
486
0
  }
487
0
    }
488
489
0
    if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
490
0
        xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
491
0
    }
492
493
0
    in->base = xmlBufContent(in->buf->buffer);
494
0
    if (in->base == NULL) {
495
        /* TODO: raise error */
496
0
        in->base = BAD_CAST "";
497
0
        in->cur = in->base;
498
0
        in->end = in->base;
499
0
        return;
500
0
    }
501
0
    in->cur = in->base + used;
502
0
    in->end = xmlBufEnd(in->buf->buffer);
503
504
0
    CHECK_BUFFER(in);
505
0
}
506
507
/************************************************************************
508
 *                  *
509
 *    UTF8 character input and related functions    *
510
 *                  *
511
 ************************************************************************/
512
513
/**
514
 * xmlNextChar:
515
 * @ctxt:  the XML parser context
516
 *
517
 * DEPRECATED: Internal function, do not use.
518
 *
519
 * Skip to the next char input char.
520
 */
521
522
void
523
xmlNextChar(xmlParserCtxtPtr ctxt)
524
3.07M
{
525
3.07M
    if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
526
3.07M
        (ctxt->input == NULL))
527
0
        return;
528
529
3.07M
    if (!(VALID_CTXT(ctxt))) {
530
0
        xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
531
0
  ctxt->errNo = XML_ERR_INTERNAL_ERROR;
532
0
        xmlStopParser(ctxt);
533
0
  return;
534
0
    }
535
536
3.07M
    if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) {
537
151k
        if (xmlParserGrow(ctxt) < 0)
538
0
            return;
539
151k
        if (ctxt->input->cur >= ctxt->input->end)
540
35
            return;
541
151k
    }
542
543
3.07M
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
544
1.96M
        const unsigned char *cur;
545
1.96M
        unsigned char c;
546
547
        /*
548
         *   2.11 End-of-Line Handling
549
         *   the literal two-character sequence "#xD#xA" or a standalone
550
         *   literal #xD, an XML processor must pass to the application
551
         *   the single character #xA.
552
         */
553
1.96M
        if (*(ctxt->input->cur) == '\n') {
554
7.45k
            ctxt->input->line++; ctxt->input->col = 1;
555
7.45k
        } else
556
1.95M
            ctxt->input->col++;
557
558
        /*
559
         * We are supposed to handle UTF8, check it's valid
560
         * From rfc2044: encoding of the Unicode values on UTF-8:
561
         *
562
         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
563
         * 0000 0000-0000 007F   0xxxxxxx
564
         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
565
         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
566
         *
567
         * Check for the 0x110000 limit too
568
         */
569
1.96M
        cur = ctxt->input->cur;
570
571
1.96M
        c = *cur;
572
1.96M
        if (c & 0x80) {
573
247k
            size_t avail;
574
575
247k
            if (c == 0xC0)
576
1
          goto encoding_error;
577
578
247k
            avail = ctxt->input->end - ctxt->input->cur;
579
580
247k
            if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
581
68
                goto encoding_error;
582
247k
            if ((c & 0xe0) == 0xe0) {
583
247k
                unsigned int val;
584
585
247k
                if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
586
5
                    goto encoding_error;
587
247k
                if ((c & 0xf0) == 0xf0) {
588
117
                    if (((c & 0xf8) != 0xf0) ||
589
117
                        (avail < 4) || ((cur[3] & 0xc0) != 0x80))
590
4
                        goto encoding_error;
591
                    /* 4-byte code */
592
113
                    ctxt->input->cur += 4;
593
113
                    val = (cur[0] & 0x7) << 18;
594
113
                    val |= (cur[1] & 0x3f) << 12;
595
113
                    val |= (cur[2] & 0x3f) << 6;
596
113
                    val |= cur[3] & 0x3f;
597
246k
                } else {
598
                    /* 3-byte code */
599
246k
                    ctxt->input->cur += 3;
600
246k
                    val = (cur[0] & 0xf) << 12;
601
246k
                    val |= (cur[1] & 0x3f) << 6;
602
246k
                    val |= cur[2] & 0x3f;
603
246k
                }
604
247k
                if (((val > 0xd7ff) && (val < 0xe000)) ||
605
247k
                    ((val > 0xfffd) && (val < 0x10000)) ||
606
247k
                    (val >= 0x110000)) {
607
2.51k
    xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
608
2.51k
          "Char 0x%X out of allowed range\n",
609
2.51k
          val);
610
2.51k
                }
611
247k
            } else
612
                /* 2-byte code */
613
737
                ctxt->input->cur += 2;
614
247k
        } else
615
            /* 1-byte code */
616
1.71M
            ctxt->input->cur++;
617
1.96M
    } else {
618
        /*
619
         * Assume it's a fixed length encoding (1) with
620
         * a compatible encoding for the ASCII set, since
621
         * XML constructs only use < 128 chars
622
         */
623
624
1.11M
        if (*(ctxt->input->cur) == '\n') {
625
12.1k
            ctxt->input->line++; ctxt->input->col = 1;
626
12.1k
        } else
627
1.10M
            ctxt->input->col++;
628
1.11M
        ctxt->input->cur++;
629
1.11M
    }
630
3.07M
    return;
631
3.07M
encoding_error:
632
    /*
633
     * If we detect an UTF8 error that probably mean that the
634
     * input encoding didn't get properly advertised in the
635
     * declaration header. Report the error and switch the encoding
636
     * to ISO-Latin-1 (if you don't like this policy, just declare the
637
     * encoding !)
638
     */
639
78
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
640
78
        (ctxt->input->end - ctxt->input->cur < 4)) {
641
47
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
642
47
         "Input is not proper UTF-8, indicate encoding !\n",
643
47
         NULL, NULL);
644
47
    } else {
645
31
        char buffer[150];
646
647
31
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
648
31
      ctxt->input->cur[0], ctxt->input->cur[1],
649
31
      ctxt->input->cur[2], ctxt->input->cur[3]);
650
31
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
651
31
         "Input is not proper UTF-8, indicate encoding !\n%s",
652
31
         BAD_CAST buffer, NULL);
653
31
    }
654
78
    ctxt->charset = XML_CHAR_ENCODING_8859_1;
655
78
    ctxt->input->cur++;
656
78
    return;
657
3.07M
}
658
659
/**
660
 * xmlCurrentChar:
661
 * @ctxt:  the XML parser context
662
 * @len:  pointer to the length of the char read
663
 *
664
 * DEPRECATED: Internal function, do not use.
665
 *
666
 * The current char value, if using UTF-8 this may actually span multiple
667
 * bytes in the input buffer. Implement the end of line normalization:
668
 * 2.11 End-of-Line Handling
669
 * Wherever an external parsed entity or the literal entity value
670
 * of an internal parsed entity contains either the literal two-character
671
 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
672
 * must pass to the application the single character #xA.
673
 * This behavior can conveniently be produced by normalizing all
674
 * line breaks to #xA on input, before parsing.)
675
 *
676
 * Returns the current char value and its length
677
 */
678
679
int
680
137M
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
681
137M
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
682
137M
    if (ctxt->instate == XML_PARSER_EOF)
683
0
  return(0);
684
685
137M
    if ((ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) &&
686
137M
        (xmlParserGrow(ctxt) < 0))
687
0
        return(0);
688
689
137M
    if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
690
14.1M
      *len = 1;
691
14.1M
      return(*ctxt->input->cur);
692
14.1M
    }
693
123M
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
694
  /*
695
   * We are supposed to handle UTF8, check it's valid
696
   * From rfc2044: encoding of the Unicode values on UTF-8:
697
   *
698
   * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
699
   * 0000 0000-0000 007F   0xxxxxxx
700
   * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
701
   * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
702
   *
703
   * Check for the 0x110000 limit too
704
   */
705
117M
  const unsigned char *cur = ctxt->input->cur;
706
117M
  unsigned char c;
707
117M
  unsigned int val;
708
709
117M
  c = *cur;
710
117M
  if (c & 0x80) {
711
116M
            size_t avail;
712
713
116M
      if (((c & 0x40) == 0) || (c == 0xC0))
714
695
    goto encoding_error;
715
716
116M
            avail = ctxt->input->end - ctxt->input->cur;
717
718
116M
            if (avail < 2)
719
102
                goto incomplete_sequence;
720
116M
      if ((cur[1] & 0xc0) != 0x80)
721
2.11k
    goto encoding_error;
722
116M
      if ((c & 0xe0) == 0xe0) {
723
115M
                if (avail < 3)
724
9
                    goto incomplete_sequence;
725
115M
    if ((cur[2] & 0xc0) != 0x80)
726
30
        goto encoding_error;
727
115M
    if ((c & 0xf0) == 0xf0) {
728
4.75k
                    if (avail < 4)
729
7
                        goto incomplete_sequence;
730
4.74k
        if (((c & 0xf8) != 0xf0) ||
731
4.74k
      ((cur[3] & 0xc0) != 0x80))
732
10
      goto encoding_error;
733
        /* 4-byte code */
734
4.73k
        *len = 4;
735
4.73k
        val = (cur[0] & 0x7) << 18;
736
4.73k
        val |= (cur[1] & 0x3f) << 12;
737
4.73k
        val |= (cur[2] & 0x3f) << 6;
738
4.73k
        val |= cur[3] & 0x3f;
739
4.73k
        if (val < 0x10000)
740
8
      goto encoding_error;
741
115M
    } else {
742
      /* 3-byte code */
743
115M
        *len = 3;
744
115M
        val = (cur[0] & 0xf) << 12;
745
115M
        val |= (cur[1] & 0x3f) << 6;
746
115M
        val |= cur[2] & 0x3f;
747
115M
        if (val < 0x800)
748
3
      goto encoding_error;
749
115M
    }
750
115M
      } else {
751
        /* 2-byte code */
752
745k
    *len = 2;
753
745k
    val = (cur[0] & 0x1f) << 6;
754
745k
    val |= cur[1] & 0x3f;
755
745k
    if (val < 0x80)
756
3
        goto encoding_error;
757
745k
      }
758
116M
      if (!IS_CHAR(val)) {
759
429
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
760
429
          "Char 0x%X out of allowed range\n", val);
761
429
      }
762
116M
      return(val);
763
116M
  } else {
764
      /* 1-byte code */
765
477k
      *len = 1;
766
477k
      if ((*ctxt->input->cur == 0) &&
767
477k
          (ctxt->input->end > ctxt->input->cur)) {
768
277
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
769
277
          "Char 0x0 out of allowed range\n", 0);
770
277
      }
771
477k
      if (*ctxt->input->cur == 0xD) {
772
375k
    if (ctxt->input->cur[1] == 0xA) {
773
1.30k
        ctxt->input->cur++;
774
1.30k
    }
775
375k
    return(0xA);
776
375k
      }
777
102k
      return(*ctxt->input->cur);
778
477k
  }
779
117M
    }
780
    /*
781
     * Assume it's a fixed length encoding (1) with
782
     * a compatible encoding for the ASCII set, since
783
     * XML constructs only use < 128 chars
784
     */
785
6.08M
    *len = 1;
786
6.08M
    if (*ctxt->input->cur == 0xD) {
787
569k
  if (ctxt->input->cur[1] == 0xA) {
788
6.09k
      ctxt->input->cur++;
789
6.09k
  }
790
569k
  return(0xA);
791
569k
    }
792
5.51M
    return(*ctxt->input->cur);
793
794
2.86k
encoding_error:
795
    /*
796
     * If we detect an UTF8 error that probably mean that the
797
     * input encoding didn't get properly advertised in the
798
     * declaration header. Report the error and switch the encoding
799
     * to ISO-Latin-1 (if you don't like this policy, just declare the
800
     * encoding !)
801
     */
802
2.86k
    if (ctxt->input->end - ctxt->input->cur < 4) {
803
344
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
804
344
         "Input is not proper UTF-8, indicate encoding !\n",
805
344
         NULL, NULL);
806
2.51k
    } else {
807
2.51k
        char buffer[150];
808
809
2.51k
  snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
810
2.51k
      ctxt->input->cur[0], ctxt->input->cur[1],
811
2.51k
      ctxt->input->cur[2], ctxt->input->cur[3]);
812
2.51k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
813
2.51k
         "Input is not proper UTF-8, indicate encoding !\n%s",
814
2.51k
         BAD_CAST buffer, NULL);
815
2.51k
    }
816
2.86k
    ctxt->charset = XML_CHAR_ENCODING_8859_1;
817
2.86k
    *len = 1;
818
2.86k
    return(*ctxt->input->cur);
819
820
118
incomplete_sequence:
821
    /*
822
     * An encoding problem may arise from a truncated input buffer
823
     * splitting a character in the middle. In that case do not raise
824
     * an error but return 0. This should only happen when push parsing
825
     * char data.
826
     */
827
118
    *len = 0;
828
118
    return(0);
829
6.08M
}
830
831
/**
832
 * xmlStringCurrentChar:
833
 * @ctxt:  the XML parser context
834
 * @cur:  pointer to the beginning of the char
835
 * @len:  pointer to the length of the char read
836
 *
837
 * DEPRECATED: Internal function, do not use.
838
 *
839
 * The current char value, if using UTF-8 this may actually span multiple
840
 * bytes in the input buffer.
841
 *
842
 * Returns the current char value and its length
843
 */
844
845
int
846
xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
847
5.08M
{
848
5.08M
    if ((len == NULL) || (cur == NULL)) return(0);
849
5.08M
    if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
850
        /*
851
         * We are supposed to handle UTF8, check it's valid
852
         * From rfc2044: encoding of the Unicode values on UTF-8:
853
         *
854
         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
855
         * 0000 0000-0000 007F   0xxxxxxx
856
         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
857
         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
858
         *
859
         * Check for the 0x110000 limit too
860
         */
861
928k
        unsigned char c;
862
928k
        unsigned int val;
863
864
928k
        c = *cur;
865
928k
        if (c & 0x80) {
866
80.5k
            if ((cur[1] & 0xc0) != 0x80)
867
0
                goto encoding_error;
868
80.5k
            if ((c & 0xe0) == 0xe0) {
869
870
78.9k
                if ((cur[2] & 0xc0) != 0x80)
871
0
                    goto encoding_error;
872
78.9k
                if ((c & 0xf0) == 0xf0) {
873
92
                    if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
874
0
                        goto encoding_error;
875
                    /* 4-byte code */
876
92
                    *len = 4;
877
92
                    val = (cur[0] & 0x7) << 18;
878
92
                    val |= (cur[1] & 0x3f) << 12;
879
92
                    val |= (cur[2] & 0x3f) << 6;
880
92
                    val |= cur[3] & 0x3f;
881
78.8k
                } else {
882
                    /* 3-byte code */
883
78.8k
                    *len = 3;
884
78.8k
                    val = (cur[0] & 0xf) << 12;
885
78.8k
                    val |= (cur[1] & 0x3f) << 6;
886
78.8k
                    val |= cur[2] & 0x3f;
887
78.8k
                }
888
78.9k
            } else {
889
                /* 2-byte code */
890
1.62k
                *len = 2;
891
1.62k
                val = (cur[0] & 0x1f) << 6;
892
1.62k
                val |= cur[1] & 0x3f;
893
1.62k
            }
894
80.5k
            if (!IS_CHAR(val)) {
895
0
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
896
0
          "Char 0x%X out of allowed range\n", val);
897
0
            }
898
80.5k
            return (val);
899
848k
        } else {
900
            /* 1-byte code */
901
848k
            *len = 1;
902
848k
            return (*cur);
903
848k
        }
904
928k
    }
905
    /*
906
     * Assume it's a fixed length encoding (1) with
907
     * a compatible encoding for the ASCII set, since
908
     * XML constructs only use < 128 chars
909
     */
910
4.15M
    *len = 1;
911
4.15M
    return (*cur);
912
0
encoding_error:
913
914
    /*
915
     * An encoding problem may arise from a truncated input buffer
916
     * splitting a character in the middle. In that case do not raise
917
     * an error but return 0 to indicate an end of stream problem
918
     */
919
0
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
920
0
        (ctxt->input->end - ctxt->input->cur < 4)) {
921
0
  *len = 0;
922
0
  return(0);
923
0
    }
924
    /*
925
     * If we detect an UTF8 error that probably mean that the
926
     * input encoding didn't get properly advertised in the
927
     * declaration header. Report the error and switch the encoding
928
     * to ISO-Latin-1 (if you don't like this policy, just declare the
929
     * encoding !)
930
     */
931
0
    {
932
0
        char buffer[150];
933
934
0
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
935
0
      ctxt->input->cur[0], ctxt->input->cur[1],
936
0
      ctxt->input->cur[2], ctxt->input->cur[3]);
937
0
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
938
0
         "Input is not proper UTF-8, indicate encoding !\n%s",
939
0
         BAD_CAST buffer, NULL);
940
0
    }
941
0
    *len = 1;
942
0
    return (*cur);
943
0
}
944
945
/**
946
 * xmlCopyCharMultiByte:
947
 * @out:  pointer to an array of xmlChar
948
 * @val:  the char value
949
 *
950
 * append the char value in the array
951
 *
952
 * Returns the number of xmlChar written
953
 */
954
int
955
36.6M
xmlCopyCharMultiByte(xmlChar *out, int val) {
956
36.6M
    if ((out == NULL) || (val < 0)) return(0);
957
    /*
958
     * We are supposed to handle UTF8, check it's valid
959
     * From rfc2044: encoding of the Unicode values on UTF-8:
960
     *
961
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
962
     * 0000 0000-0000 007F   0xxxxxxx
963
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
964
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
965
     */
966
36.6M
    if  (val >= 0x80) {
967
36.6M
  xmlChar *savedout = out;
968
36.6M
  int bits;
969
36.6M
  if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
970
35.9M
  else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
971
10.6k
  else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
972
0
  else {
973
0
      xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
974
0
        "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
975
0
            val);
976
0
      return(0);
977
0
  }
978
109M
  for ( ; bits >= 0; bits-= 6)
979
72.5M
      *out++= ((val >> bits) & 0x3F) | 0x80 ;
980
36.6M
  return (out - savedout);
981
36.6M
    }
982
7.52k
    *out = val;
983
7.52k
    return 1;
984
36.6M
}
985
986
/**
987
 * xmlCopyChar:
988
 * @len:  Ignored, compatibility
989
 * @out:  pointer to an array of xmlChar
990
 * @val:  the char value
991
 *
992
 * append the char value in the array
993
 *
994
 * Returns the number of xmlChar written
995
 */
996
997
int
998
16.6k
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
999
16.6k
    if ((out == NULL) || (val < 0)) return(0);
1000
    /* the len parameter is ignored */
1001
16.6k
    if  (val >= 0x80) {
1002
11.7k
  return(xmlCopyCharMultiByte (out, val));
1003
11.7k
    }
1004
4.91k
    *out = val;
1005
4.91k
    return 1;
1006
16.6k
}
1007
1008
/************************************************************************
1009
 *                  *
1010
 *    Commodity functions to switch encodings     *
1011
 *                  *
1012
 ************************************************************************/
1013
1014
static xmlCharEncodingHandlerPtr
1015
178
xmlDetectEBCDIC(xmlParserInputPtr input) {
1016
178
    xmlChar out[200];
1017
178
    xmlCharEncodingHandlerPtr handler;
1018
178
    int inlen, outlen, res, i;
1019
1020
    /*
1021
     * To detect the EBCDIC code page, we convert the first 200 bytes
1022
     * to EBCDIC-US and try to find the encoding declaration.
1023
     */
1024
178
    handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC);
1025
178
    if (handler == NULL)
1026
0
        return(NULL);
1027
178
    outlen = sizeof(out) - 1;
1028
178
    inlen = input->end - input->cur;
1029
178
    res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen, 0);
1030
178
    if (res < 0)
1031
8
        return(handler);
1032
170
    out[outlen] = 0;
1033
1034
2.13k
    for (i = 0; i < outlen; i++) {
1035
2.07k
        if (out[i] == '>')
1036
1
            break;
1037
2.07k
        if ((out[i] == 'e') &&
1038
2.07k
            (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1039
109
            int start, cur, quote;
1040
1041
109
            i += 8;
1042
109
            while (IS_BLANK_CH(out[i]))
1043
427
                i += 1;
1044
109
            if (out[i++] != '=')
1045
24
                break;
1046
85
            while (IS_BLANK_CH(out[i]))
1047
497
                i += 1;
1048
85
            quote = out[i++];
1049
85
            if ((quote != '\'') && (quote != '"'))
1050
25
                break;
1051
60
            start = i;
1052
60
            cur = out[i];
1053
1.27k
            while (((cur >= 'a') && (cur <= 'z')) ||
1054
1.27k
                   ((cur >= 'A') && (cur <= 'Z')) ||
1055
1.27k
                   ((cur >= '0') && (cur <= '9')) ||
1056
1.27k
                   (cur == '.') || (cur == '_') ||
1057
1.27k
                   (cur == '-'))
1058
1.21k
                cur = out[++i];
1059
60
            if (cur != quote)
1060
51
                break;
1061
9
            out[i] = 0;
1062
9
            xmlCharEncCloseFunc(handler);
1063
9
            handler = xmlFindCharEncodingHandler((char *) out + start);
1064
9
            break;
1065
60
        }
1066
2.07k
    }
1067
1068
170
    return(handler);
1069
178
}
1070
1071
/**
1072
 * xmlSwitchEncoding:
1073
 * @ctxt:  the parser context
1074
 * @enc:  the encoding value (number)
1075
 *
1076
 * change the input functions when discovering the character encoding
1077
 * of a given entity.
1078
 *
1079
 * Returns 0 in case of success, -1 otherwise
1080
 */
1081
int
1082
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1083
9.88k
{
1084
9.88k
    xmlCharEncodingHandlerPtr handler;
1085
9.88k
    int ret;
1086
1087
9.88k
    if (ctxt == NULL) return(-1);
1088
1089
    /*
1090
     * FIXME: The BOM shouldn't be skipped here, but in the parsing code.
1091
     *
1092
     * Note that we look for a decoded UTF-8 BOM when switching to UTF-16.
1093
     * This is mostly useless but Webkit/Chromium relies on this behavior.
1094
     * See https://bugs.chromium.org/p/chromium/issues/detail?id=1451026
1095
     */
1096
9.88k
    if ((ctxt->input != NULL) &&
1097
9.88k
        (ctxt->input->consumed == 0) &&
1098
9.88k
        (ctxt->input->cur != NULL) &&
1099
9.88k
        (ctxt->input->cur == ctxt->input->base) &&
1100
9.88k
        ((enc == XML_CHAR_ENCODING_UTF8) ||
1101
9.88k
         (enc == XML_CHAR_ENCODING_UTF16LE) ||
1102
9.88k
         (enc == XML_CHAR_ENCODING_UTF16BE))) {
1103
        /*
1104
         * Errata on XML-1.0 June 20 2001
1105
         * Specific handling of the Byte Order Mark for
1106
         * UTF-8
1107
         */
1108
3.41k
        if ((ctxt->input->cur[0] == 0xEF) &&
1109
3.41k
            (ctxt->input->cur[1] == 0xBB) &&
1110
3.41k
            (ctxt->input->cur[2] == 0xBF)) {
1111
577
            ctxt->input->cur += 3;
1112
577
        }
1113
3.41k
    }
1114
1115
9.88k
    switch (enc) {
1116
0
  case XML_CHAR_ENCODING_ERROR:
1117
0
      __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
1118
0
                     "encoding unknown\n", NULL, NULL);
1119
0
      return(-1);
1120
6.26k
  case XML_CHAR_ENCODING_NONE:
1121
      /* let's assume it's UTF-8 without the XML decl */
1122
6.26k
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
1123
6.26k
      return(0);
1124
2.89k
  case XML_CHAR_ENCODING_UTF8:
1125
      /* default encoding, no conversion should be needed */
1126
2.89k
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
1127
2.89k
      return(0);
1128
178
        case XML_CHAR_ENCODING_EBCDIC:
1129
178
            handler = xmlDetectEBCDIC(ctxt->input);
1130
178
            break;
1131
539
        default:
1132
539
            handler = xmlGetCharEncodingHandler(enc);
1133
539
            break;
1134
9.88k
    }
1135
717
    if (handler == NULL) {
1136
  /*
1137
   * Default handlers.
1138
   */
1139
9
  switch (enc) {
1140
0
      case XML_CHAR_ENCODING_ASCII:
1141
    /* default encoding, no conversion should be needed */
1142
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1143
0
    return(0);
1144
0
      case XML_CHAR_ENCODING_8859_1:
1145
0
    if ((ctxt->inputNr == 1) &&
1146
0
        (ctxt->encoding == NULL) &&
1147
0
        (ctxt->input != NULL) &&
1148
0
        (ctxt->input->encoding != NULL)) {
1149
0
        ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1150
0
    }
1151
0
    ctxt->charset = enc;
1152
0
    return(0);
1153
9
      default:
1154
9
    __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1155
9
                        "encoding not supported: %s\n",
1156
9
      BAD_CAST xmlGetCharEncodingName(enc), NULL);
1157
                /*
1158
                 * TODO: We could recover from errors in external entities
1159
                 * if we didn't stop the parser. But most callers of this
1160
                 * function don't check the return value.
1161
                 */
1162
9
                xmlStopParser(ctxt);
1163
9
                return(-1);
1164
9
        }
1165
9
    }
1166
708
    ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1167
708
    if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
1168
        /*
1169
   * on encoding conversion errors, stop the parser
1170
   */
1171
6
        xmlStopParser(ctxt);
1172
6
  ctxt->errNo = XML_I18N_CONV_FAILED;
1173
6
    }
1174
708
    return(ret);
1175
717
}
1176
1177
/**
1178
 * xmlSwitchInputEncoding:
1179
 * @ctxt:  the parser context
1180
 * @input:  the input stream
1181
 * @handler:  the encoding handler
1182
 *
1183
 * change the input functions when discovering the character encoding
1184
 * of a given entity.
1185
 *
1186
 * Returns 0 in case of success, -1 otherwise
1187
 */
1188
int
1189
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1190
                       xmlCharEncodingHandlerPtr handler)
1191
1.55k
{
1192
1.55k
    int nbchars;
1193
1.55k
    xmlParserInputBufferPtr in;
1194
1195
1.55k
    if (handler == NULL)
1196
0
        return (-1);
1197
1.55k
    if (input == NULL)
1198
0
        return (-1);
1199
1.55k
    in = input->buf;
1200
1.55k
    if (in == NULL) {
1201
0
  xmlErrInternal(ctxt,
1202
0
                "static memory buffer doesn't support encoding\n", NULL);
1203
        /*
1204
         * Callers assume that the input buffer takes ownership of the
1205
         * encoding handler. xmlCharEncCloseFunc frees unregistered
1206
         * handlers and avoids a memory leak.
1207
         */
1208
0
        xmlCharEncCloseFunc(handler);
1209
0
  return (-1);
1210
0
    }
1211
1212
1.55k
    if (in->encoder != NULL) {
1213
1
        if (in->encoder == handler)
1214
0
            return (0);
1215
1216
        /*
1217
         * Switching encodings during parsing is a really bad idea,
1218
         * but Chromium can switch between ISO-8859-1 and UTF-16 before
1219
         * separate calls to xmlParseChunk.
1220
         *
1221
         * TODO: We should check whether the "raw" input buffer is empty and
1222
         * convert the old content using the old encoder.
1223
         */
1224
1225
1
        xmlCharEncCloseFunc(in->encoder);
1226
1
        in->encoder = handler;
1227
1
        return (0);
1228
1
    }
1229
1230
1.55k
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1231
1.55k
    in->encoder = handler;
1232
1233
    /*
1234
     * Is there already some content down the pipe to convert ?
1235
     */
1236
1.55k
    if (xmlBufIsEmpty(in->buffer) == 0) {
1237
1.55k
        size_t processed, use, consumed;
1238
1239
        /*
1240
         * FIXME: The BOM shouldn't be skipped here, but in the parsing code.
1241
         */
1242
1243
        /*
1244
         * Specific handling of the Byte Order Mark for
1245
         * UTF-16
1246
         */
1247
1.55k
        if ((handler->name != NULL) &&
1248
1.55k
            (!strcmp(handler->name, "UTF-16LE") ||
1249
1.55k
             !strcmp(handler->name, "UTF-16")) &&
1250
1.55k
            (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1251
53
            input->cur += 2;
1252
53
        }
1253
1.55k
        if ((handler->name != NULL) &&
1254
1.55k
            (!strcmp(handler->name, "UTF-16BE")) &&
1255
1.55k
            (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1256
228
            input->cur += 2;
1257
228
        }
1258
        /*
1259
         * Errata on XML-1.0 June 20 2001
1260
         * Specific handling of the Byte Order Mark for
1261
         * UTF-8
1262
         */
1263
1.55k
        if ((handler->name != NULL) &&
1264
1.55k
            (!strcmp(handler->name, "UTF-8")) &&
1265
1.55k
            (input->cur[0] == 0xEF) &&
1266
1.55k
            (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1267
0
            input->cur += 3;
1268
0
        }
1269
1270
        /*
1271
         * Shrink the current input buffer.
1272
         * Move it as the raw buffer and create a new input buffer
1273
         */
1274
1.55k
        processed = input->cur - input->base;
1275
1.55k
        xmlBufShrink(in->buffer, processed);
1276
1.55k
        input->consumed += processed;
1277
1.55k
        in->raw = in->buffer;
1278
1.55k
        in->buffer = xmlBufCreate();
1279
1.55k
        in->rawconsumed = processed;
1280
1.55k
        use = xmlBufUse(in->raw);
1281
1282
        /*
1283
         * TODO: We must flush and decode the whole buffer to make functions
1284
         * like xmlReadMemory work with a user-provided encoding. If the
1285
         * encoding is specified directly, we should probably set
1286
         * XML_PARSE_IGNORE_ENC in xmlDoRead to avoid switching encodings
1287
         * twice. Then we could set "flush" to false which should save
1288
         * a considerable amount of memory when parsing from memory.
1289
         * It's probably even possible to remove this whole if-block
1290
         * completely.
1291
         */
1292
1.55k
        nbchars = xmlCharEncInput(in, 1);
1293
1.55k
        xmlBufResetInput(in->buffer, input);
1294
1.55k
        if (nbchars < 0) {
1295
            /* TODO: This could be an out of memory or an encoding error. */
1296
8
            xmlErrInternal(ctxt,
1297
8
                           "switching encoding: encoder error\n",
1298
8
                           NULL);
1299
8
            xmlHaltParser(ctxt);
1300
8
            return (-1);
1301
8
        }
1302
1.54k
        consumed = use - xmlBufUse(in->raw);
1303
1.54k
        if ((consumed > ULONG_MAX) ||
1304
1.54k
            (in->rawconsumed > ULONG_MAX - (unsigned long)consumed))
1305
0
            in->rawconsumed = ULONG_MAX;
1306
1.54k
        else
1307
1.54k
      in->rawconsumed += consumed;
1308
1.54k
    }
1309
1.54k
    return (0);
1310
1.55k
}
1311
1312
/**
1313
 * xmlSwitchToEncoding:
1314
 * @ctxt:  the parser context
1315
 * @handler:  the encoding handler
1316
 *
1317
 * change the input functions when discovering the character encoding
1318
 * of a given entity.
1319
 *
1320
 * Returns 0 in case of success, -1 otherwise
1321
 */
1322
int
1323
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1324
845
{
1325
845
    if (ctxt == NULL)
1326
0
        return(-1);
1327
845
    return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
1328
845
}
1329
1330
/************************************************************************
1331
 *                  *
1332
 *  Commodity functions to handle entities processing   *
1333
 *                  *
1334
 ************************************************************************/
1335
1336
/**
1337
 * xmlFreeInputStream:
1338
 * @input:  an xmlParserInputPtr
1339
 *
1340
 * Free up an input stream.
1341
 */
1342
void
1343
9.88k
xmlFreeInputStream(xmlParserInputPtr input) {
1344
9.88k
    if (input == NULL) return;
1345
1346
9.88k
    if (input->filename != NULL) xmlFree((char *) input->filename);
1347
9.88k
    if (input->directory != NULL) xmlFree((char *) input->directory);
1348
9.88k
    if (input->encoding != NULL) xmlFree((char *) input->encoding);
1349
9.88k
    if (input->version != NULL) xmlFree((char *) input->version);
1350
9.88k
    if ((input->free != NULL) && (input->base != NULL))
1351
0
        input->free((xmlChar *) input->base);
1352
9.88k
    if (input->buf != NULL)
1353
7.31k
        xmlFreeParserInputBuffer(input->buf);
1354
9.88k
    xmlFree(input);
1355
9.88k
}
1356
1357
/**
1358
 * xmlNewInputStream:
1359
 * @ctxt:  an XML parser context
1360
 *
1361
 * Create a new input stream structure.
1362
 *
1363
 * Returns the new input stream or NULL
1364
 */
1365
xmlParserInputPtr
1366
9.88k
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1367
9.88k
    xmlParserInputPtr input;
1368
1369
9.88k
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1370
9.88k
    if (input == NULL) {
1371
0
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1372
0
  return(NULL);
1373
0
    }
1374
9.88k
    memset(input, 0, sizeof(xmlParserInput));
1375
9.88k
    input->line = 1;
1376
9.88k
    input->col = 1;
1377
9.88k
    input->standalone = -1;
1378
1379
    /*
1380
     * If the context is NULL the id cannot be initialized, but that
1381
     * should not happen while parsing which is the situation where
1382
     * the id is actually needed.
1383
     */
1384
9.88k
    if (ctxt != NULL) {
1385
9.88k
        if (input->id >= INT_MAX) {
1386
0
            xmlErrMemory(ctxt, "Input ID overflow\n");
1387
0
            return(NULL);
1388
0
        }
1389
9.88k
        input->id = ctxt->input_id++;
1390
9.88k
    }
1391
1392
9.88k
    return(input);
1393
9.88k
}
1394
1395
/**
1396
 * xmlNewIOInputStream:
1397
 * @ctxt:  an XML parser context
1398
 * @input:  an I/O Input
1399
 * @enc:  the charset encoding if known
1400
 *
1401
 * Create a new input stream structure encapsulating the @input into
1402
 * a stream suitable for the parser.
1403
 *
1404
 * Returns the new input stream or NULL
1405
 */
1406
xmlParserInputPtr
1407
xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1408
0
              xmlCharEncoding enc) {
1409
0
    xmlParserInputPtr inputStream;
1410
1411
0
    if (input == NULL) return(NULL);
1412
0
    if (xmlParserDebugEntities)
1413
0
  xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1414
0
    inputStream = xmlNewInputStream(ctxt);
1415
0
    if (inputStream == NULL) {
1416
0
  return(NULL);
1417
0
    }
1418
0
    inputStream->filename = NULL;
1419
0
    inputStream->buf = input;
1420
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1421
1422
0
    if (enc != XML_CHAR_ENCODING_NONE) {
1423
0
        xmlSwitchEncoding(ctxt, enc);
1424
0
    }
1425
1426
0
    return(inputStream);
1427
0
}
1428
1429
/**
1430
 * xmlNewEntityInputStream:
1431
 * @ctxt:  an XML parser context
1432
 * @entity:  an Entity pointer
1433
 *
1434
 * DEPRECATED: Internal function, do not use.
1435
 *
1436
 * Create a new input stream based on an xmlEntityPtr
1437
 *
1438
 * Returns the new input stream or NULL
1439
 */
1440
xmlParserInputPtr
1441
0
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1442
0
    xmlParserInputPtr input;
1443
1444
0
    if (entity == NULL) {
1445
0
        xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1446
0
                 NULL);
1447
0
  return(NULL);
1448
0
    }
1449
0
    if (xmlParserDebugEntities)
1450
0
  xmlGenericError(xmlGenericErrorContext,
1451
0
    "new input from entity: %s\n", entity->name);
1452
0
    if (entity->content == NULL) {
1453
0
  switch (entity->etype) {
1454
0
            case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1455
0
          xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1456
0
                   entity->name);
1457
0
                break;
1458
0
            case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1459
0
            case XML_EXTERNAL_PARAMETER_ENTITY:
1460
0
    input = xmlLoadExternalEntity((char *) entity->URI,
1461
0
           (char *) entity->ExternalID, ctxt);
1462
0
                if (input != NULL)
1463
0
                    input->entity = entity;
1464
0
                return(input);
1465
0
            case XML_INTERNAL_GENERAL_ENTITY:
1466
0
          xmlErrInternal(ctxt,
1467
0
          "Internal entity %s without content !\n",
1468
0
                   entity->name);
1469
0
                break;
1470
0
            case XML_INTERNAL_PARAMETER_ENTITY:
1471
0
          xmlErrInternal(ctxt,
1472
0
          "Internal parameter entity %s without content !\n",
1473
0
                   entity->name);
1474
0
                break;
1475
0
            case XML_INTERNAL_PREDEFINED_ENTITY:
1476
0
          xmlErrInternal(ctxt,
1477
0
          "Predefined entity %s without content !\n",
1478
0
                   entity->name);
1479
0
                break;
1480
0
  }
1481
0
  return(NULL);
1482
0
    }
1483
0
    input = xmlNewInputStream(ctxt);
1484
0
    if (input == NULL) {
1485
0
  return(NULL);
1486
0
    }
1487
0
    if (entity->URI != NULL)
1488
0
  input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1489
0
    input->base = entity->content;
1490
0
    if (entity->length == 0)
1491
0
        entity->length = xmlStrlen(entity->content);
1492
0
    input->cur = entity->content;
1493
0
    input->length = entity->length;
1494
0
    input->end = &entity->content[input->length];
1495
0
    input->entity = entity;
1496
0
    return(input);
1497
0
}
1498
1499
/**
1500
 * xmlNewStringInputStream:
1501
 * @ctxt:  an XML parser context
1502
 * @buffer:  an memory buffer
1503
 *
1504
 * Create a new input stream based on a memory buffer.
1505
 * Returns the new input stream
1506
 */
1507
xmlParserInputPtr
1508
0
xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1509
0
    xmlParserInputPtr input;
1510
0
    xmlParserInputBufferPtr buf;
1511
1512
0
    if (buffer == NULL) {
1513
0
        xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1514
0
                 NULL);
1515
0
  return(NULL);
1516
0
    }
1517
0
    if (xmlParserDebugEntities)
1518
0
  xmlGenericError(xmlGenericErrorContext,
1519
0
    "new fixed input: %.30s\n", buffer);
1520
0
    buf = xmlParserInputBufferCreateMem((const char *) buffer,
1521
0
                                        xmlStrlen(buffer),
1522
0
                                        XML_CHAR_ENCODING_NONE);
1523
0
    if (buf == NULL) {
1524
0
  xmlErrMemory(ctxt, NULL);
1525
0
        return(NULL);
1526
0
    }
1527
0
    input = xmlNewInputStream(ctxt);
1528
0
    if (input == NULL) {
1529
0
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1530
0
  xmlFreeParserInputBuffer(buf);
1531
0
  return(NULL);
1532
0
    }
1533
0
    input->buf = buf;
1534
0
    xmlBufResetInput(input->buf->buffer, input);
1535
0
    return(input);
1536
0
}
1537
1538
/**
1539
 * xmlNewInputFromFile:
1540
 * @ctxt:  an XML parser context
1541
 * @filename:  the filename to use as entity
1542
 *
1543
 * Create a new input stream based on a file or an URL.
1544
 *
1545
 * Returns the new input stream or NULL in case of error
1546
 */
1547
xmlParserInputPtr
1548
0
xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1549
0
    xmlParserInputBufferPtr buf;
1550
0
    xmlParserInputPtr inputStream;
1551
0
    char *directory = NULL;
1552
0
    xmlChar *URI = NULL;
1553
1554
0
    if (xmlParserDebugEntities)
1555
0
  xmlGenericError(xmlGenericErrorContext,
1556
0
    "new input from file: %s\n", filename);
1557
0
    if (ctxt == NULL) return(NULL);
1558
0
    buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1559
0
    if (buf == NULL) {
1560
0
  if (filename == NULL)
1561
0
      __xmlLoaderErr(ctxt,
1562
0
                     "failed to load external entity: NULL filename \n",
1563
0
         NULL);
1564
0
  else
1565
0
      __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1566
0
         (const char *) filename);
1567
0
  return(NULL);
1568
0
    }
1569
1570
0
    inputStream = xmlNewInputStream(ctxt);
1571
0
    if (inputStream == NULL) {
1572
0
  xmlFreeParserInputBuffer(buf);
1573
0
  return(NULL);
1574
0
    }
1575
1576
0
    inputStream->buf = buf;
1577
0
    inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1578
0
    if (inputStream == NULL)
1579
0
        return(NULL);
1580
1581
0
    if (inputStream->filename == NULL)
1582
0
  URI = xmlStrdup((xmlChar *) filename);
1583
0
    else
1584
0
  URI = xmlStrdup((xmlChar *) inputStream->filename);
1585
0
    directory = xmlParserGetDirectory((const char *) URI);
1586
0
    if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1587
0
    inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1588
0
    if (URI != NULL) xmlFree((char *) URI);
1589
0
    inputStream->directory = directory;
1590
1591
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1592
0
    if ((ctxt->directory == NULL) && (directory != NULL))
1593
0
        ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1594
0
    return(inputStream);
1595
0
}
1596
1597
/************************************************************************
1598
 *                  *
1599
 *    Commodity functions to handle parser contexts   *
1600
 *                  *
1601
 ************************************************************************/
1602
1603
/**
1604
 * xmlInitSAXParserCtxt:
1605
 * @ctxt:  XML parser context
1606
 * @sax:  SAX handlert
1607
 * @userData:  user data
1608
 *
1609
 * Initialize a SAX parser context
1610
 *
1611
 * Returns 0 in case of success and -1 in case of error
1612
 */
1613
1614
static int
1615
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
1616
                     void *userData)
1617
9.88k
{
1618
9.88k
    xmlParserInputPtr input;
1619
1620
9.88k
    if(ctxt==NULL) {
1621
0
        xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1622
0
        return(-1);
1623
0
    }
1624
1625
9.88k
    xmlInitParser();
1626
1627
9.88k
    if (ctxt->dict == NULL)
1628
9.88k
  ctxt->dict = xmlDictCreate();
1629
9.88k
    if (ctxt->dict == NULL) {
1630
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1631
0
  return(-1);
1632
0
    }
1633
9.88k
    xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1634
1635
9.88k
    if (ctxt->sax == NULL)
1636
9.88k
  ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1637
9.88k
    if (ctxt->sax == NULL) {
1638
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1639
0
  return(-1);
1640
0
    }
1641
9.88k
    if (sax == NULL) {
1642
0
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1643
0
        xmlSAXVersion(ctxt->sax, 2);
1644
0
        ctxt->userData = ctxt;
1645
9.88k
    } else {
1646
9.88k
  if (sax->initialized == XML_SAX2_MAGIC) {
1647
9.88k
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
1648
9.88k
        } else {
1649
0
      memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1650
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
1651
0
        }
1652
9.88k
        ctxt->userData = userData ? userData : ctxt;
1653
9.88k
    }
1654
1655
9.88k
    ctxt->maxatts = 0;
1656
9.88k
    ctxt->atts = NULL;
1657
    /* Allocate the Input stack */
1658
9.88k
    if (ctxt->inputTab == NULL) {
1659
9.88k
  ctxt->inputTab = (xmlParserInputPtr *)
1660
9.88k
        xmlMalloc(5 * sizeof(xmlParserInputPtr));
1661
9.88k
  ctxt->inputMax = 5;
1662
9.88k
    }
1663
9.88k
    if (ctxt->inputTab == NULL) {
1664
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1665
0
  ctxt->inputNr = 0;
1666
0
  ctxt->inputMax = 0;
1667
0
  ctxt->input = NULL;
1668
0
  return(-1);
1669
0
    }
1670
9.88k
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1671
0
        xmlFreeInputStream(input);
1672
0
    }
1673
9.88k
    ctxt->inputNr = 0;
1674
9.88k
    ctxt->input = NULL;
1675
1676
9.88k
    ctxt->version = NULL;
1677
9.88k
    ctxt->encoding = NULL;
1678
9.88k
    ctxt->standalone = -1;
1679
9.88k
    ctxt->hasExternalSubset = 0;
1680
9.88k
    ctxt->hasPErefs = 0;
1681
9.88k
    ctxt->html = 0;
1682
9.88k
    ctxt->external = 0;
1683
9.88k
    ctxt->instate = XML_PARSER_START;
1684
9.88k
    ctxt->token = 0;
1685
9.88k
    ctxt->directory = NULL;
1686
1687
    /* Allocate the Node stack */
1688
9.88k
    if (ctxt->nodeTab == NULL) {
1689
9.88k
  ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1690
9.88k
  ctxt->nodeMax = 10;
1691
9.88k
    }
1692
9.88k
    if (ctxt->nodeTab == NULL) {
1693
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1694
0
  ctxt->nodeNr = 0;
1695
0
  ctxt->nodeMax = 0;
1696
0
  ctxt->node = NULL;
1697
0
  ctxt->inputNr = 0;
1698
0
  ctxt->inputMax = 0;
1699
0
  ctxt->input = NULL;
1700
0
  return(-1);
1701
0
    }
1702
9.88k
    ctxt->nodeNr = 0;
1703
9.88k
    ctxt->node = NULL;
1704
1705
    /* Allocate the Name stack */
1706
9.88k
    if (ctxt->nameTab == NULL) {
1707
9.88k
  ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1708
9.88k
  ctxt->nameMax = 10;
1709
9.88k
    }
1710
9.88k
    if (ctxt->nameTab == NULL) {
1711
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1712
0
  ctxt->nodeNr = 0;
1713
0
  ctxt->nodeMax = 0;
1714
0
  ctxt->node = NULL;
1715
0
  ctxt->inputNr = 0;
1716
0
  ctxt->inputMax = 0;
1717
0
  ctxt->input = NULL;
1718
0
  ctxt->nameNr = 0;
1719
0
  ctxt->nameMax = 0;
1720
0
  ctxt->name = NULL;
1721
0
  return(-1);
1722
0
    }
1723
9.88k
    ctxt->nameNr = 0;
1724
9.88k
    ctxt->name = NULL;
1725
1726
    /* Allocate the space stack */
1727
9.88k
    if (ctxt->spaceTab == NULL) {
1728
9.88k
  ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1729
9.88k
  ctxt->spaceMax = 10;
1730
9.88k
    }
1731
9.88k
    if (ctxt->spaceTab == NULL) {
1732
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1733
0
  ctxt->nodeNr = 0;
1734
0
  ctxt->nodeMax = 0;
1735
0
  ctxt->node = NULL;
1736
0
  ctxt->inputNr = 0;
1737
0
  ctxt->inputMax = 0;
1738
0
  ctxt->input = NULL;
1739
0
  ctxt->nameNr = 0;
1740
0
  ctxt->nameMax = 0;
1741
0
  ctxt->name = NULL;
1742
0
  ctxt->spaceNr = 0;
1743
0
  ctxt->spaceMax = 0;
1744
0
  ctxt->space = NULL;
1745
0
  return(-1);
1746
0
    }
1747
9.88k
    ctxt->spaceNr = 1;
1748
9.88k
    ctxt->spaceMax = 10;
1749
9.88k
    ctxt->spaceTab[0] = -1;
1750
9.88k
    ctxt->space = &ctxt->spaceTab[0];
1751
9.88k
    ctxt->myDoc = NULL;
1752
9.88k
    ctxt->wellFormed = 1;
1753
9.88k
    ctxt->nsWellFormed = 1;
1754
9.88k
    ctxt->valid = 1;
1755
9.88k
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1756
9.88k
    if (ctxt->loadsubset) {
1757
0
        ctxt->options |= XML_PARSE_DTDLOAD;
1758
0
    }
1759
9.88k
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
1760
9.88k
    ctxt->pedantic = xmlPedanticParserDefaultValue;
1761
9.88k
    if (ctxt->pedantic) {
1762
0
        ctxt->options |= XML_PARSE_PEDANTIC;
1763
0
    }
1764
9.88k
    ctxt->linenumbers = xmlLineNumbersDefaultValue;
1765
9.88k
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1766
9.88k
    if (ctxt->keepBlanks == 0) {
1767
0
  ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1768
0
  ctxt->options |= XML_PARSE_NOBLANKS;
1769
0
    }
1770
1771
9.88k
    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
1772
9.88k
    ctxt->vctxt.userData = ctxt;
1773
9.88k
    ctxt->vctxt.error = xmlParserValidityError;
1774
9.88k
    ctxt->vctxt.warning = xmlParserValidityWarning;
1775
9.88k
    if (ctxt->validate) {
1776
0
  if (xmlGetWarningsDefaultValue == 0)
1777
0
      ctxt->vctxt.warning = NULL;
1778
0
  else
1779
0
      ctxt->vctxt.warning = xmlParserValidityWarning;
1780
0
  ctxt->vctxt.nodeMax = 0;
1781
0
        ctxt->options |= XML_PARSE_DTDVALID;
1782
0
    }
1783
9.88k
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1784
9.88k
    if (ctxt->replaceEntities) {
1785
0
        ctxt->options |= XML_PARSE_NOENT;
1786
0
    }
1787
9.88k
    ctxt->record_info = 0;
1788
9.88k
    ctxt->checkIndex = 0;
1789
9.88k
    ctxt->inSubset = 0;
1790
9.88k
    ctxt->errNo = XML_ERR_OK;
1791
9.88k
    ctxt->depth = 0;
1792
9.88k
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1793
9.88k
    ctxt->catalogs = NULL;
1794
9.88k
    ctxt->sizeentities = 0;
1795
9.88k
    ctxt->sizeentcopy = 0;
1796
9.88k
    ctxt->input_id = 1;
1797
9.88k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
1798
9.88k
    return(0);
1799
9.88k
}
1800
1801
/**
1802
 * xmlInitParserCtxt:
1803
 * @ctxt:  an XML parser context
1804
 *
1805
 * DEPRECATED: Internal function which will be made private in a future
1806
 * version.
1807
 *
1808
 * Initialize a parser context
1809
 *
1810
 * Returns 0 in case of success and -1 in case of error
1811
 */
1812
1813
int
1814
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1815
0
{
1816
0
    return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
1817
0
}
1818
1819
/**
1820
 * xmlFreeParserCtxt:
1821
 * @ctxt:  an XML parser context
1822
 *
1823
 * Free all the memory used by a parser context. However the parsed
1824
 * document in ctxt->myDoc is not freed.
1825
 */
1826
1827
void
1828
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1829
9.88k
{
1830
9.88k
    xmlParserInputPtr input;
1831
1832
9.88k
    if (ctxt == NULL) return;
1833
1834
19.7k
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1835
9.88k
        xmlFreeInputStream(input);
1836
9.88k
    }
1837
9.88k
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1838
9.88k
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1839
9.88k
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1840
9.88k
    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
1841
9.88k
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1842
9.88k
    if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1843
9.88k
    if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1844
9.88k
    if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1845
9.88k
    if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1846
9.88k
#ifdef LIBXML_SAX1_ENABLED
1847
9.88k
    if ((ctxt->sax != NULL) &&
1848
9.88k
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1849
#else
1850
    if (ctxt->sax != NULL)
1851
#endif /* LIBXML_SAX1_ENABLED */
1852
9.88k
        xmlFree(ctxt->sax);
1853
9.88k
    if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1854
9.88k
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1855
9.88k
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1856
9.88k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1857
9.88k
    if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1858
9.88k
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1859
9.88k
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1860
9.88k
    if (ctxt->attsDefault != NULL)
1861
532
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
1862
9.88k
    if (ctxt->attsSpecial != NULL)
1863
664
        xmlHashFree(ctxt->attsSpecial, NULL);
1864
9.88k
    if (ctxt->freeElems != NULL) {
1865
0
        xmlNodePtr cur, next;
1866
1867
0
  cur = ctxt->freeElems;
1868
0
  while (cur != NULL) {
1869
0
      next = cur->next;
1870
0
      xmlFree(cur);
1871
0
      cur = next;
1872
0
  }
1873
0
    }
1874
9.88k
    if (ctxt->freeAttrs != NULL) {
1875
0
        xmlAttrPtr cur, next;
1876
1877
0
  cur = ctxt->freeAttrs;
1878
0
  while (cur != NULL) {
1879
0
      next = cur->next;
1880
0
      xmlFree(cur);
1881
0
      cur = next;
1882
0
  }
1883
0
    }
1884
    /*
1885
     * cleanup the error strings
1886
     */
1887
9.88k
    if (ctxt->lastError.message != NULL)
1888
9.58k
        xmlFree(ctxt->lastError.message);
1889
9.88k
    if (ctxt->lastError.file != NULL)
1890
0
        xmlFree(ctxt->lastError.file);
1891
9.88k
    if (ctxt->lastError.str1 != NULL)
1892
7.00k
        xmlFree(ctxt->lastError.str1);
1893
9.88k
    if (ctxt->lastError.str2 != NULL)
1894
242
        xmlFree(ctxt->lastError.str2);
1895
9.88k
    if (ctxt->lastError.str3 != NULL)
1896
8
        xmlFree(ctxt->lastError.str3);
1897
1898
9.88k
#ifdef LIBXML_CATALOG_ENABLED
1899
9.88k
    if (ctxt->catalogs != NULL)
1900
12
  xmlCatalogFreeLocal(ctxt->catalogs);
1901
9.88k
#endif
1902
9.88k
    xmlFree(ctxt);
1903
9.88k
}
1904
1905
/**
1906
 * xmlNewParserCtxt:
1907
 *
1908
 * Allocate and initialize a new parser context.
1909
 *
1910
 * Returns the xmlParserCtxtPtr or NULL
1911
 */
1912
1913
xmlParserCtxtPtr
1914
xmlNewParserCtxt(void)
1915
0
{
1916
0
    return(xmlNewSAXParserCtxt(NULL, NULL));
1917
0
}
1918
1919
/**
1920
 * xmlNewSAXParserCtxt:
1921
 * @sax:  SAX handler
1922
 * @userData:  user data
1923
 *
1924
 * Allocate and initialize a new SAX parser context. If userData is NULL,
1925
 * the parser context will be passed as user data.
1926
 *
1927
 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
1928
 */
1929
1930
xmlParserCtxtPtr
1931
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
1932
9.88k
{
1933
9.88k
    xmlParserCtxtPtr ctxt;
1934
1935
9.88k
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1936
9.88k
    if (ctxt == NULL) {
1937
0
  xmlErrMemory(NULL, "cannot allocate parser context\n");
1938
0
  return(NULL);
1939
0
    }
1940
9.88k
    memset(ctxt, 0, sizeof(xmlParserCtxt));
1941
9.88k
    if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
1942
0
        xmlFreeParserCtxt(ctxt);
1943
0
  return(NULL);
1944
0
    }
1945
9.88k
    return(ctxt);
1946
9.88k
}
1947
1948
/************************************************************************
1949
 *                  *
1950
 *    Handling of node information        *
1951
 *                  *
1952
 ************************************************************************/
1953
1954
/**
1955
 * xmlClearParserCtxt:
1956
 * @ctxt:  an XML parser context
1957
 *
1958
 * Clear (release owned resources) and reinitialize a parser context
1959
 */
1960
1961
void
1962
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1963
0
{
1964
0
  if (ctxt==NULL)
1965
0
    return;
1966
0
  xmlClearNodeInfoSeq(&ctxt->node_seq);
1967
0
  xmlCtxtReset(ctxt);
1968
0
}
1969
1970
1971
/**
1972
 * xmlParserFindNodeInfo:
1973
 * @ctx:  an XML parser context
1974
 * @node:  an XML node within the tree
1975
 *
1976
 * DEPRECATED: Don't use.
1977
 *
1978
 * Find the parser node info struct for a given node
1979
 *
1980
 * Returns an xmlParserNodeInfo block pointer or NULL
1981
 */
1982
const xmlParserNodeInfo *
1983
xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1984
0
{
1985
0
    unsigned long pos;
1986
1987
0
    if ((ctx == NULL) || (node == NULL))
1988
0
        return (NULL);
1989
    /* Find position where node should be at */
1990
0
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1991
0
    if (pos < ctx->node_seq.length
1992
0
        && ctx->node_seq.buffer[pos].node == node)
1993
0
        return &ctx->node_seq.buffer[pos];
1994
0
    else
1995
0
        return NULL;
1996
0
}
1997
1998
1999
/**
2000
 * xmlInitNodeInfoSeq:
2001
 * @seq:  a node info sequence pointer
2002
 *
2003
 * DEPRECATED: Don't use.
2004
 *
2005
 * -- Initialize (set to initial state) node info sequence
2006
 */
2007
void
2008
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2009
9.88k
{
2010
9.88k
    if (seq == NULL)
2011
0
        return;
2012
9.88k
    seq->length = 0;
2013
9.88k
    seq->maximum = 0;
2014
9.88k
    seq->buffer = NULL;
2015
9.88k
}
2016
2017
/**
2018
 * xmlClearNodeInfoSeq:
2019
 * @seq:  a node info sequence pointer
2020
 *
2021
 * DEPRECATED: Don't use.
2022
 *
2023
 * -- Clear (release memory and reinitialize) node
2024
 *   info sequence
2025
 */
2026
void
2027
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2028
0
{
2029
0
    if (seq == NULL)
2030
0
        return;
2031
0
    if (seq->buffer != NULL)
2032
0
        xmlFree(seq->buffer);
2033
0
    xmlInitNodeInfoSeq(seq);
2034
0
}
2035
2036
/**
2037
 * xmlParserFindNodeInfoIndex:
2038
 * @seq:  a node info sequence pointer
2039
 * @node:  an XML node pointer
2040
 *
2041
 * DEPRECATED: Don't use.
2042
 *
2043
 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2044
 *   the given node is or should be at in a sorted sequence
2045
 *
2046
 * Returns a long indicating the position of the record
2047
 */
2048
unsigned long
2049
xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2050
                           const xmlNodePtr node)
2051
0
{
2052
0
    unsigned long upper, lower, middle;
2053
0
    int found = 0;
2054
2055
0
    if ((seq == NULL) || (node == NULL))
2056
0
        return ((unsigned long) -1);
2057
2058
    /* Do a binary search for the key */
2059
0
    lower = 1;
2060
0
    upper = seq->length;
2061
0
    middle = 0;
2062
0
    while (lower <= upper && !found) {
2063
0
        middle = lower + (upper - lower) / 2;
2064
0
        if (node == seq->buffer[middle - 1].node)
2065
0
            found = 1;
2066
0
        else if (node < seq->buffer[middle - 1].node)
2067
0
            upper = middle - 1;
2068
0
        else
2069
0
            lower = middle + 1;
2070
0
    }
2071
2072
    /* Return position */
2073
0
    if (middle == 0 || seq->buffer[middle - 1].node < node)
2074
0
        return middle;
2075
0
    else
2076
0
        return middle - 1;
2077
0
}
2078
2079
2080
/**
2081
 * xmlParserAddNodeInfo:
2082
 * @ctxt:  an XML parser context
2083
 * @info:  a node info sequence pointer
2084
 *
2085
 * DEPRECATED: Don't use.
2086
 *
2087
 * Insert node info record into the sorted sequence
2088
 */
2089
void
2090
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2091
                     const xmlParserNodeInfoPtr info)
2092
0
{
2093
0
    unsigned long pos;
2094
2095
0
    if ((ctxt == NULL) || (info == NULL)) return;
2096
2097
    /* Find pos and check to see if node is already in the sequence */
2098
0
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2099
0
                                     info->node);
2100
2101
0
    if ((pos < ctxt->node_seq.length) &&
2102
0
        (ctxt->node_seq.buffer != NULL) &&
2103
0
        (ctxt->node_seq.buffer[pos].node == info->node)) {
2104
0
        ctxt->node_seq.buffer[pos] = *info;
2105
0
    }
2106
2107
    /* Otherwise, we need to add new node to buffer */
2108
0
    else {
2109
0
        if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2110
0
      (ctxt->node_seq.buffer == NULL)) {
2111
0
            xmlParserNodeInfo *tmp_buffer;
2112
0
            unsigned int byte_size;
2113
2114
0
            if (ctxt->node_seq.maximum == 0)
2115
0
                ctxt->node_seq.maximum = 2;
2116
0
            byte_size = (sizeof(*ctxt->node_seq.buffer) *
2117
0
      (2 * ctxt->node_seq.maximum));
2118
2119
0
            if (ctxt->node_seq.buffer == NULL)
2120
0
                tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2121
0
            else
2122
0
                tmp_buffer =
2123
0
                    (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2124
0
                                                     byte_size);
2125
2126
0
            if (tmp_buffer == NULL) {
2127
0
    xmlErrMemory(ctxt, "failed to allocate buffer\n");
2128
0
                return;
2129
0
            }
2130
0
            ctxt->node_seq.buffer = tmp_buffer;
2131
0
            ctxt->node_seq.maximum *= 2;
2132
0
        }
2133
2134
        /* If position is not at end, move elements out of the way */
2135
0
        if (pos != ctxt->node_seq.length) {
2136
0
            unsigned long i;
2137
2138
0
            for (i = ctxt->node_seq.length; i > pos; i--)
2139
0
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2140
0
        }
2141
2142
        /* Copy element and increase length */
2143
0
        ctxt->node_seq.buffer[pos] = *info;
2144
0
        ctxt->node_seq.length++;
2145
0
    }
2146
0
}
2147
2148
/************************************************************************
2149
 *                  *
2150
 *    Defaults settings         *
2151
 *                  *
2152
 ************************************************************************/
2153
/**
2154
 * xmlPedanticParserDefault:
2155
 * @val:  int 0 or 1
2156
 *
2157
 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
2158
 *
2159
 * Set and return the previous value for enabling pedantic warnings.
2160
 *
2161
 * Returns the last value for 0 for no substitution, 1 for substitution.
2162
 */
2163
2164
int
2165
0
xmlPedanticParserDefault(int val) {
2166
0
    int old = xmlPedanticParserDefaultValue;
2167
2168
0
    xmlPedanticParserDefaultValue = val;
2169
0
    return(old);
2170
0
}
2171
2172
/**
2173
 * xmlLineNumbersDefault:
2174
 * @val:  int 0 or 1
2175
 *
2176
 * DEPRECATED: The modern options API always enables line numbers.
2177
 *
2178
 * Set and return the previous value for enabling line numbers in elements
2179
 * contents. This may break on old application and is turned off by default.
2180
 *
2181
 * Returns the last value for 0 for no substitution, 1 for substitution.
2182
 */
2183
2184
int
2185
0
xmlLineNumbersDefault(int val) {
2186
0
    int old = xmlLineNumbersDefaultValue;
2187
2188
0
    xmlLineNumbersDefaultValue = val;
2189
0
    return(old);
2190
0
}
2191
2192
/**
2193
 * xmlSubstituteEntitiesDefault:
2194
 * @val:  int 0 or 1
2195
 *
2196
 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
2197
 *
2198
 * Set and return the previous value for default entity support.
2199
 * Initially the parser always keep entity references instead of substituting
2200
 * entity values in the output. This function has to be used to change the
2201
 * default parser behavior
2202
 * SAX::substituteEntities() has to be used for changing that on a file by
2203
 * file basis.
2204
 *
2205
 * Returns the last value for 0 for no substitution, 1 for substitution.
2206
 */
2207
2208
int
2209
0
xmlSubstituteEntitiesDefault(int val) {
2210
0
    int old = xmlSubstituteEntitiesDefaultValue;
2211
2212
0
    xmlSubstituteEntitiesDefaultValue = val;
2213
0
    return(old);
2214
0
}
2215
2216
/**
2217
 * xmlKeepBlanksDefault:
2218
 * @val:  int 0 or 1
2219
 *
2220
 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
2221
 *
2222
 * Set and return the previous value for default blanks text nodes support.
2223
 * The 1.x version of the parser used an heuristic to try to detect
2224
 * ignorable white spaces. As a result the SAX callback was generating
2225
 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2226
 * using the DOM output text nodes containing those blanks were not generated.
2227
 * The 2.x and later version will switch to the XML standard way and
2228
 * ignorableWhitespace() are only generated when running the parser in
2229
 * validating mode and when the current element doesn't allow CDATA or
2230
 * mixed content.
2231
 * This function is provided as a way to force the standard behavior
2232
 * on 1.X libs and to switch back to the old mode for compatibility when
2233
 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2234
 * by using xmlIsBlankNode() commodity function to detect the "empty"
2235
 * nodes generated.
2236
 * This value also affect autogeneration of indentation when saving code
2237
 * if blanks sections are kept, indentation is not generated.
2238
 *
2239
 * Returns the last value for 0 for no substitution, 1 for substitution.
2240
 */
2241
2242
int
2243
0
xmlKeepBlanksDefault(int val) {
2244
0
    int old = xmlKeepBlanksDefaultValue;
2245
2246
0
    xmlKeepBlanksDefaultValue = val;
2247
0
    if (!val) xmlIndentTreeOutput = 1;
2248
0
    return(old);
2249
0
}
2250