Coverage Report

Created: 2023-12-13 20:03

/src/libxml2/parserInternals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * daniel@veillard.com
8
 */
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22
23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/valid.h>
28
#include <libxml/entities.h>
29
#include <libxml/xmlerror.h>
30
#include <libxml/encoding.h>
31
#include <libxml/valid.h>
32
#include <libxml/xmlIO.h>
33
#include <libxml/uri.h>
34
#include <libxml/dict.h>
35
#include <libxml/SAX.h>
36
#ifdef LIBXML_CATALOG_ENABLED
37
#include <libxml/catalog.h>
38
#endif
39
#include <libxml/globals.h>
40
#include <libxml/chvalid.h>
41
42
186M
#define CUR(ctxt) ctxt->input->cur
43
186M
#define END(ctxt) ctxt->input->end
44
186M
#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
45
46
#include "private/buf.h"
47
#include "private/enc.h"
48
#include "private/error.h"
49
#include "private/io.h"
50
#include "private/parser.h"
51
52
/*
53
 * Various global defaults for parsing
54
 */
55
56
/**
57
 * xmlCheckVersion:
58
 * @version: the include version number
59
 *
60
 * check the compiled lib version against the include one.
61
 * This can warn or immediately kill the application
62
 */
63
void
64
0
xmlCheckVersion(int version) {
65
0
    int myversion = LIBXML_VERSION;
66
67
0
    xmlInitParser();
68
69
0
    if ((myversion / 10000) != (version / 10000)) {
70
0
  xmlGenericError(xmlGenericErrorContext,
71
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
72
0
    (version / 10000), (myversion / 10000));
73
0
  fprintf(stderr,
74
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
75
0
    (version / 10000), (myversion / 10000));
76
0
    }
77
0
    if ((myversion / 100) < (version / 100)) {
78
0
  xmlGenericError(xmlGenericErrorContext,
79
0
    "Warning: program compiled against libxml %d using older %d\n",
80
0
    (version / 100), (myversion / 100));
81
0
    }
82
0
}
83
84
85
/************************************************************************
86
 *                  *
87
 *    Some factorized error routines        *
88
 *                  *
89
 ************************************************************************/
90
91
92
/**
93
 * xmlErrMemory:
94
 * @ctxt:  an XML parser context
95
 * @extra:  extra information
96
 *
97
 * Handle a redefinition of attribute error
98
 */
99
void
100
xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
101
0
{
102
0
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
103
0
        (ctxt->instate == XML_PARSER_EOF))
104
0
  return;
105
0
    if (ctxt != NULL) {
106
0
        ctxt->errNo = XML_ERR_NO_MEMORY;
107
0
        ctxt->instate = XML_PARSER_EOF;
108
0
        ctxt->disableSAX = 1;
109
0
    }
110
0
    if (extra)
111
0
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
112
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
113
0
                        NULL, NULL, 0, 0,
114
0
                        "Memory allocation failed : %s\n", extra);
115
0
    else
116
0
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
117
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
118
0
                        NULL, NULL, 0, 0, "Memory allocation failed\n");
119
0
}
120
121
/**
122
 * __xmlErrEncoding:
123
 * @ctxt:  an XML parser context
124
 * @xmlerr:  the error number
125
 * @msg:  the error message
126
 * @str1:  an string info
127
 * @str2:  an string info
128
 *
129
 * Handle an encoding error
130
 */
131
void
132
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
133
                 const char *msg, const xmlChar * str1, const xmlChar * str2)
134
855k
{
135
855k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
136
855k
        (ctxt->instate == XML_PARSER_EOF))
137
0
  return;
138
855k
    if (ctxt != NULL)
139
855k
        ctxt->errNo = xmlerr;
140
855k
    __xmlRaiseError(NULL, NULL, NULL,
141
855k
                    ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
142
855k
                    NULL, 0, (const char *) str1, (const char *) str2,
143
855k
                    NULL, 0, 0, msg, str1, str2);
144
855k
    if (ctxt != NULL) {
145
855k
        ctxt->wellFormed = 0;
146
855k
        if (ctxt->recovery == 0)
147
490k
            ctxt->disableSAX = 1;
148
855k
    }
149
855k
}
150
151
/**
152
 * xmlErrInternal:
153
 * @ctxt:  an XML parser context
154
 * @msg:  the error message
155
 * @str:  error information
156
 *
157
 * Handle an internal error
158
 */
159
static void LIBXML_ATTR_FORMAT(2,0)
160
xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
161
353
{
162
353
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
163
353
        (ctxt->instate == XML_PARSER_EOF))
164
0
  return;
165
353
    if (ctxt != NULL)
166
353
        ctxt->errNo = XML_ERR_INTERNAL_ERROR;
167
353
    __xmlRaiseError(NULL, NULL, NULL,
168
353
                    ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
169
353
                    XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
170
353
                    0, 0, msg, str);
171
353
    if (ctxt != NULL) {
172
353
        ctxt->wellFormed = 0;
173
353
        if (ctxt->recovery == 0)
174
192
            ctxt->disableSAX = 1;
175
353
    }
176
353
}
177
178
/**
179
 * xmlErrEncodingInt:
180
 * @ctxt:  an XML parser context
181
 * @error:  the error number
182
 * @msg:  the error message
183
 * @val:  an integer value
184
 *
185
 * n encoding error
186
 */
187
static void LIBXML_ATTR_FORMAT(3,0)
188
xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
189
                  const char *msg, int val)
190
206k
{
191
206k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
192
206k
        (ctxt->instate == XML_PARSER_EOF))
193
0
  return;
194
206k
    if (ctxt != NULL)
195
205k
        ctxt->errNo = error;
196
206k
    __xmlRaiseError(NULL, NULL, NULL,
197
206k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
198
206k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
199
206k
    if (ctxt != NULL) {
200
205k
        ctxt->wellFormed = 0;
201
205k
        if (ctxt->recovery == 0)
202
152k
            ctxt->disableSAX = 1;
203
205k
    }
204
206k
}
205
206
/**
207
 * xmlIsLetter:
208
 * @c:  an unicode character (int)
209
 *
210
 * Check whether the character is allowed by the production
211
 * [84] Letter ::= BaseChar | Ideographic
212
 *
213
 * Returns 0 if not, non-zero otherwise
214
 */
215
int
216
0
xmlIsLetter(int c) {
217
0
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
218
0
}
219
220
/************************************************************************
221
 *                  *
222
 *    Input handling functions for progressive parsing  *
223
 *                  *
224
 ************************************************************************/
225
226
/* #define DEBUG_INPUT */
227
/* #define DEBUG_STACK */
228
/* #define DEBUG_PUSH */
229
230
231
/* we need to keep enough input to show errors in context */
232
371k
#define LINE_LEN        80
233
234
#ifdef DEBUG_INPUT
235
#define CHECK_BUFFER(in) check_buffer(in)
236
237
static
238
void check_buffer(xmlParserInputPtr in) {
239
    if (in->base != xmlBufContent(in->buf->buffer)) {
240
        xmlGenericError(xmlGenericErrorContext,
241
    "xmlParserInput: base mismatch problem\n");
242
    }
243
    if (in->cur < in->base) {
244
        xmlGenericError(xmlGenericErrorContext,
245
    "xmlParserInput: cur < base problem\n");
246
    }
247
    if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
248
        xmlGenericError(xmlGenericErrorContext,
249
    "xmlParserInput: cur > base + use problem\n");
250
    }
251
    xmlGenericError(xmlGenericErrorContext,"buffer %p : content %x, cur %d, use %d\n",
252
            (void *) in, (int) xmlBufContent(in->buf->buffer),
253
            in->cur - in->base, xmlBufUse(in->buf->buffer));
254
}
255
256
#else
257
#define CHECK_BUFFER(in)
258
#endif
259
260
261
/**
262
 * xmlParserInputRead:
263
 * @in:  an XML parser input
264
 * @len:  an indicative size for the lookahead
265
 *
266
 * DEPRECATED: This function was internal and is deprecated.
267
 *
268
 * Returns -1 as this is an error to use it.
269
 */
270
int
271
0
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
272
0
    return(-1);
273
0
}
274
275
/**
276
 * xmlParserInputGrow:
277
 * @in:  an XML parser input
278
 * @len:  an indicative size for the lookahead
279
 *
280
 * DEPRECATED: Don't use.
281
 *
282
 * This function increase the input for the parser. It tries to
283
 * preserve pointers to the input buffer, and keep already read data
284
 *
285
 * Returns the amount of char read, or -1 in case of error, 0 indicate the
286
 * end of this entity
287
 */
288
int
289
109M
xmlParserInputGrow(xmlParserInputPtr in, int len) {
290
109M
    int ret;
291
109M
    size_t indx;
292
293
109M
    if ((in == NULL) || (len < 0)) return(-1);
294
#ifdef DEBUG_INPUT
295
    xmlGenericError(xmlGenericErrorContext, "Grow\n");
296
#endif
297
109M
    if (in->buf == NULL) return(-1);
298
91.6M
    if (in->base == NULL) return(-1);
299
91.6M
    if (in->cur == NULL) return(-1);
300
91.6M
    if (in->buf->buffer == NULL) return(-1);
301
302
91.6M
    CHECK_BUFFER(in);
303
304
91.6M
    indx = in->cur - in->base;
305
91.6M
    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
306
307
43.4k
  CHECK_BUFFER(in);
308
309
43.4k
        return(0);
310
43.4k
    }
311
91.5M
    if (in->buf->readcallback != NULL) {
312
86.3M
  ret = xmlParserInputBufferGrow(in->buf, len);
313
86.3M
    } else
314
5.20M
        return(0);
315
316
86.3M
    in->base = xmlBufContent(in->buf->buffer);
317
86.3M
    in->cur = in->base + indx;
318
86.3M
    in->end = xmlBufEnd(in->buf->buffer);
319
320
86.3M
    CHECK_BUFFER(in);
321
322
86.3M
    return(ret);
323
91.5M
}
324
325
/**
326
 * xmlParserInputShrink:
327
 * @in:  an XML parser input
328
 *
329
 * This function removes used input for the parser.
330
 */
331
void
332
374k
xmlParserInputShrink(xmlParserInputPtr in) {
333
374k
    size_t used;
334
374k
    size_t ret;
335
336
#ifdef DEBUG_INPUT
337
    xmlGenericError(xmlGenericErrorContext, "Shrink\n");
338
#endif
339
374k
    if (in == NULL) return;
340
374k
    if (in->buf == NULL) return;
341
371k
    if (in->base == NULL) return;
342
371k
    if (in->cur == NULL) return;
343
371k
    if (in->buf->buffer == NULL) return;
344
345
371k
    CHECK_BUFFER(in);
346
347
371k
    used = in->cur - in->base;
348
    /*
349
     * Do not shrink on large buffers whose only a tiny fraction
350
     * was consumed
351
     */
352
371k
    if (used > INPUT_CHUNK) {
353
371k
  ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
354
371k
  if (ret > 0) {
355
371k
            used -= ret;
356
371k
      in->consumed += ret;
357
371k
  }
358
371k
    }
359
360
371k
    if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
361
193k
        xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
362
193k
    }
363
364
371k
    in->base = xmlBufContent(in->buf->buffer);
365
371k
    in->cur = in->base + used;
366
371k
    in->end = xmlBufEnd(in->buf->buffer);
367
368
371k
    CHECK_BUFFER(in);
369
371k
}
370
371
/************************************************************************
372
 *                  *
373
 *    UTF8 character input and related functions    *
374
 *                  *
375
 ************************************************************************/
376
377
/**
378
 * xmlNextChar:
379
 * @ctxt:  the XML parser context
380
 *
381
 * Skip to the next char input char.
382
 */
383
384
void
385
xmlNextChar(xmlParserCtxtPtr ctxt)
386
186M
{
387
186M
    if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
388
186M
        (ctxt->input == NULL))
389
0
        return;
390
391
186M
    if (!(VALID_CTXT(ctxt))) {
392
0
        xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
393
0
  ctxt->errNo = XML_ERR_INTERNAL_ERROR;
394
0
        xmlStopParser(ctxt);
395
0
  return;
396
0
    }
397
398
186M
    if ((*ctxt->input->cur == 0) &&
399
186M
        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
400
4.54k
        return;
401
4.54k
    }
402
403
186M
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
404
147M
        const unsigned char *cur;
405
147M
        unsigned char c;
406
407
        /*
408
         *   2.11 End-of-Line Handling
409
         *   the literal two-character sequence "#xD#xA" or a standalone
410
         *   literal #xD, an XML processor must pass to the application
411
         *   the single character #xA.
412
         */
413
147M
        if (*(ctxt->input->cur) == '\n') {
414
9.31M
            ctxt->input->line++; ctxt->input->col = 1;
415
9.31M
        } else
416
138M
            ctxt->input->col++;
417
418
        /*
419
         * We are supposed to handle UTF8, check it's valid
420
         * From rfc2044: encoding of the Unicode values on UTF-8:
421
         *
422
         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
423
         * 0000 0000-0000 007F   0xxxxxxx
424
         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
425
         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
426
         *
427
         * Check for the 0x110000 limit too
428
         */
429
147M
        cur = ctxt->input->cur;
430
431
147M
        c = *cur;
432
147M
        if (c & 0x80) {
433
203k
            if (c == 0xC0)
434
647
          goto encoding_error;
435
203k
            if (cur[1] == 0) {
436
492
                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
437
492
                cur = ctxt->input->cur;
438
492
            }
439
203k
            if ((cur[1] & 0xc0) != 0x80)
440
42.4k
                goto encoding_error;
441
160k
            if ((c & 0xe0) == 0xe0) {
442
95.0k
                unsigned int val;
443
444
95.0k
                if (cur[2] == 0) {
445
166
                    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
446
166
                    cur = ctxt->input->cur;
447
166
                }
448
95.0k
                if ((cur[2] & 0xc0) != 0x80)
449
1.16k
                    goto encoding_error;
450
93.8k
                if ((c & 0xf0) == 0xf0) {
451
14.0k
                    if (cur[3] == 0) {
452
175
                        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
453
175
                        cur = ctxt->input->cur;
454
175
                    }
455
14.0k
                    if (((c & 0xf8) != 0xf0) ||
456
14.0k
                        ((cur[3] & 0xc0) != 0x80))
457
577
                        goto encoding_error;
458
                    /* 4-byte code */
459
13.4k
                    ctxt->input->cur += 4;
460
13.4k
                    val = (cur[0] & 0x7) << 18;
461
13.4k
                    val |= (cur[1] & 0x3f) << 12;
462
13.4k
                    val |= (cur[2] & 0x3f) << 6;
463
13.4k
                    val |= cur[3] & 0x3f;
464
79.8k
                } else {
465
                    /* 3-byte code */
466
79.8k
                    ctxt->input->cur += 3;
467
79.8k
                    val = (cur[0] & 0xf) << 12;
468
79.8k
                    val |= (cur[1] & 0x3f) << 6;
469
79.8k
                    val |= cur[2] & 0x3f;
470
79.8k
                }
471
93.3k
                if (((val > 0xd7ff) && (val < 0xe000)) ||
472
93.3k
                    ((val > 0xfffd) && (val < 0x10000)) ||
473
93.3k
                    (val >= 0x110000)) {
474
12.4k
    xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
475
12.4k
          "Char 0x%X out of allowed range\n",
476
12.4k
          val);
477
12.4k
                }
478
93.3k
            } else
479
                /* 2-byte code */
480
65.5k
                ctxt->input->cur += 2;
481
160k
        } else
482
            /* 1-byte code */
483
147M
            ctxt->input->cur++;
484
147M
    } else {
485
        /*
486
         * Assume it's a fixed length encoding (1) with
487
         * a compatible encoding for the ASCII set, since
488
         * XML constructs only use < 128 chars
489
         */
490
491
39.1M
        if (*(ctxt->input->cur) == '\n') {
492
1.49M
            ctxt->input->line++; ctxt->input->col = 1;
493
1.49M
        } else
494
37.6M
            ctxt->input->col++;
495
39.1M
        ctxt->input->cur++;
496
39.1M
    }
497
186M
    if (*ctxt->input->cur == 0)
498
467k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
499
186M
    return;
500
44.8k
encoding_error:
501
    /*
502
     * If we detect an UTF8 error that probably mean that the
503
     * input encoding didn't get properly advertised in the
504
     * declaration header. Report the error and switch the encoding
505
     * to ISO-Latin-1 (if you don't like this policy, just declare the
506
     * encoding !)
507
     */
508
44.8k
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
509
44.8k
        (ctxt->input->end - ctxt->input->cur < 4)) {
510
407
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
511
407
         "Input is not proper UTF-8, indicate encoding !\n",
512
407
         NULL, NULL);
513
44.4k
    } else {
514
44.4k
        char buffer[150];
515
516
44.4k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
517
44.4k
      ctxt->input->cur[0], ctxt->input->cur[1],
518
44.4k
      ctxt->input->cur[2], ctxt->input->cur[3]);
519
44.4k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
520
44.4k
         "Input is not proper UTF-8, indicate encoding !\n%s",
521
44.4k
         BAD_CAST buffer, NULL);
522
44.4k
    }
523
44.8k
    ctxt->charset = XML_CHAR_ENCODING_8859_1;
524
44.8k
    ctxt->input->cur++;
525
44.8k
    return;
526
186M
}
527
528
/**
529
 * xmlCurrentChar:
530
 * @ctxt:  the XML parser context
531
 * @len:  pointer to the length of the char read
532
 *
533
 * The current char value, if using UTF-8 this may actually span multiple
534
 * bytes in the input buffer. Implement the end of line normalization:
535
 * 2.11 End-of-Line Handling
536
 * Wherever an external parsed entity or the literal entity value
537
 * of an internal parsed entity contains either the literal two-character
538
 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
539
 * must pass to the application the single character #xA.
540
 * This behavior can conveniently be produced by normalizing all
541
 * line breaks to #xA on input, before parsing.)
542
 *
543
 * Returns the current char value and its length
544
 */
545
546
int
547
513M
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
548
513M
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
549
513M
    if (ctxt->instate == XML_PARSER_EOF)
550
0
  return(0);
551
552
513M
    if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
553
335M
      *len = 1;
554
335M
      return(*ctxt->input->cur);
555
335M
    }
556
177M
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
557
  /*
558
   * We are supposed to handle UTF8, check it's valid
559
   * From rfc2044: encoding of the Unicode values on UTF-8:
560
   *
561
   * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
562
   * 0000 0000-0000 007F   0xxxxxxx
563
   * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
564
   * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
565
   *
566
   * Check for the 0x110000 limit too
567
   */
568
27.9M
  const unsigned char *cur = ctxt->input->cur;
569
27.9M
  unsigned char c;
570
27.9M
  unsigned int val;
571
572
27.9M
  c = *cur;
573
27.9M
  if (c & 0x80) {
574
16.5M
      if (((c & 0x40) == 0) || (c == 0xC0))
575
222k
    goto encoding_error;
576
16.3M
      if (cur[1] == 0) {
577
17.0k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
578
17.0k
                cur = ctxt->input->cur;
579
17.0k
            }
580
16.3M
      if ((cur[1] & 0xc0) != 0x80)
581
538k
    goto encoding_error;
582
15.7M
      if ((c & 0xe0) == 0xe0) {
583
3.72M
    if (cur[2] == 0) {
584
3.90k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
585
3.90k
                    cur = ctxt->input->cur;
586
3.90k
                }
587
3.72M
    if ((cur[2] & 0xc0) != 0x80)
588
9.40k
        goto encoding_error;
589
3.71M
    if ((c & 0xf0) == 0xf0) {
590
913k
        if (cur[3] == 0) {
591
771
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
592
771
                        cur = ctxt->input->cur;
593
771
                    }
594
913k
        if (((c & 0xf8) != 0xf0) ||
595
913k
      ((cur[3] & 0xc0) != 0x80))
596
6.07k
      goto encoding_error;
597
        /* 4-byte code */
598
907k
        *len = 4;
599
907k
        val = (cur[0] & 0x7) << 18;
600
907k
        val |= (cur[1] & 0x3f) << 12;
601
907k
        val |= (cur[2] & 0x3f) << 6;
602
907k
        val |= cur[3] & 0x3f;
603
907k
        if (val < 0x10000)
604
425
      goto encoding_error;
605
2.80M
    } else {
606
      /* 3-byte code */
607
2.80M
        *len = 3;
608
2.80M
        val = (cur[0] & 0xf) << 12;
609
2.80M
        val |= (cur[1] & 0x3f) << 6;
610
2.80M
        val |= cur[2] & 0x3f;
611
2.80M
        if (val < 0x800)
612
285
      goto encoding_error;
613
2.80M
    }
614
12.0M
      } else {
615
        /* 2-byte code */
616
12.0M
    *len = 2;
617
12.0M
    val = (cur[0] & 0x1f) << 6;
618
12.0M
    val |= cur[1] & 0x3f;
619
12.0M
    if (val < 0x80)
620
459
        goto encoding_error;
621
12.0M
      }
622
15.7M
      if (!IS_CHAR(val)) {
623
16.6k
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
624
16.6k
          "Char 0x%X out of allowed range\n", val);
625
16.6k
      }
626
15.7M
      return(val);
627
15.7M
  } else {
628
      /* 1-byte code */
629
11.3M
      *len = 1;
630
11.3M
      if (*ctxt->input->cur == 0)
631
938k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
632
11.3M
      if ((*ctxt->input->cur == 0) &&
633
11.3M
          (ctxt->input->end > ctxt->input->cur)) {
634
176k
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
635
176k
          "Char 0x0 out of allowed range\n", 0);
636
176k
      }
637
11.3M
      if (*ctxt->input->cur == 0xD) {
638
729k
    if (ctxt->input->cur[1] == 0xA) {
639
174k
        ctxt->input->cur++;
640
174k
    }
641
729k
    return(0xA);
642
729k
      }
643
10.6M
      return(*ctxt->input->cur);
644
11.3M
  }
645
27.9M
    }
646
    /*
647
     * Assume it's a fixed length encoding (1) with
648
     * a compatible encoding for the ASCII set, since
649
     * XML constructs only use < 128 chars
650
     */
651
150M
    *len = 1;
652
150M
    if (*ctxt->input->cur == 0xD) {
653
699k
  if (ctxt->input->cur[1] == 0xA) {
654
155k
      ctxt->input->cur++;
655
155k
  }
656
699k
  return(0xA);
657
699k
    }
658
149M
    return(*ctxt->input->cur);
659
777k
encoding_error:
660
    /*
661
     * An encoding problem may arise from a truncated input buffer
662
     * splitting a character in the middle. In that case do not raise
663
     * an error but return 0 to indicate an end of stream problem
664
     */
665
777k
    if (ctxt->input->end - ctxt->input->cur < 4) {
666
74.6k
  *len = 0;
667
74.6k
  return(0);
668
74.6k
    }
669
670
    /*
671
     * If we detect an UTF8 error that probably mean that the
672
     * input encoding didn't get properly advertised in the
673
     * declaration header. Report the error and switch the encoding
674
     * to ISO-Latin-1 (if you don't like this policy, just declare the
675
     * encoding !)
676
     */
677
703k
    {
678
703k
        char buffer[150];
679
680
703k
  snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
681
703k
      ctxt->input->cur[0], ctxt->input->cur[1],
682
703k
      ctxt->input->cur[2], ctxt->input->cur[3]);
683
703k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
684
703k
         "Input is not proper UTF-8, indicate encoding !\n%s",
685
703k
         BAD_CAST buffer, NULL);
686
703k
    }
687
703k
    ctxt->charset = XML_CHAR_ENCODING_8859_1;
688
703k
    *len = 1;
689
703k
    return(*ctxt->input->cur);
690
777k
}
691
692
/**
693
 * xmlStringCurrentChar:
694
 * @ctxt:  the XML parser context
695
 * @cur:  pointer to the beginning of the char
696
 * @len:  pointer to the length of the char read
697
 *
698
 * The current char value, if using UTF-8 this may actually span multiple
699
 * bytes in the input buffer.
700
 *
701
 * Returns the current char value and its length
702
 */
703
704
int
705
xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
706
502M
{
707
502M
    if ((len == NULL) || (cur == NULL)) return(0);
708
502M
    if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
709
        /*
710
         * We are supposed to handle UTF8, check it's valid
711
         * From rfc2044: encoding of the Unicode values on UTF-8:
712
         *
713
         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
714
         * 0000 0000-0000 007F   0xxxxxxx
715
         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
716
         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
717
         *
718
         * Check for the 0x110000 limit too
719
         */
720
159M
        unsigned char c;
721
159M
        unsigned int val;
722
723
159M
        c = *cur;
724
159M
        if (c & 0x80) {
725
1.37M
            if ((cur[1] & 0xc0) != 0x80)
726
32.6k
                goto encoding_error;
727
1.34M
            if ((c & 0xe0) == 0xe0) {
728
729
211k
                if ((cur[2] & 0xc0) != 0x80)
730
2.18k
                    goto encoding_error;
731
209k
                if ((c & 0xf0) == 0xf0) {
732
150k
                    if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
733
1.03k
                        goto encoding_error;
734
                    /* 4-byte code */
735
149k
                    *len = 4;
736
149k
                    val = (cur[0] & 0x7) << 18;
737
149k
                    val |= (cur[1] & 0x3f) << 12;
738
149k
                    val |= (cur[2] & 0x3f) << 6;
739
149k
                    val |= cur[3] & 0x3f;
740
149k
                } else {
741
                    /* 3-byte code */
742
59.0k
                    *len = 3;
743
59.0k
                    val = (cur[0] & 0xf) << 12;
744
59.0k
                    val |= (cur[1] & 0x3f) << 6;
745
59.0k
                    val |= cur[2] & 0x3f;
746
59.0k
                }
747
1.13M
            } else {
748
                /* 2-byte code */
749
1.13M
                *len = 2;
750
1.13M
                val = (cur[0] & 0x1f) << 6;
751
1.13M
                val |= cur[1] & 0x3f;
752
1.13M
            }
753
1.34M
            if (!IS_CHAR(val)) {
754
1.60k
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
755
1.60k
          "Char 0x%X out of allowed range\n", val);
756
1.60k
            }
757
1.34M
            return (val);
758
158M
        } else {
759
            /* 1-byte code */
760
158M
            *len = 1;
761
158M
            return (*cur);
762
158M
        }
763
159M
    }
764
    /*
765
     * Assume it's a fixed length encoding (1) with
766
     * a compatible encoding for the ASCII set, since
767
     * XML constructs only use < 128 chars
768
     */
769
342M
    *len = 1;
770
342M
    return (*cur);
771
35.8k
encoding_error:
772
773
    /*
774
     * An encoding problem may arise from a truncated input buffer
775
     * splitting a character in the middle. In that case do not raise
776
     * an error but return 0 to indicate an end of stream problem
777
     */
778
35.8k
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
779
35.8k
        (ctxt->input->end - ctxt->input->cur < 4)) {
780
12.0k
  *len = 0;
781
12.0k
  return(0);
782
12.0k
    }
783
    /*
784
     * If we detect an UTF8 error that probably mean that the
785
     * input encoding didn't get properly advertised in the
786
     * declaration header. Report the error and switch the encoding
787
     * to ISO-Latin-1 (if you don't like this policy, just declare the
788
     * encoding !)
789
     */
790
23.7k
    {
791
23.7k
        char buffer[150];
792
793
23.7k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
794
23.7k
      ctxt->input->cur[0], ctxt->input->cur[1],
795
23.7k
      ctxt->input->cur[2], ctxt->input->cur[3]);
796
23.7k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
797
23.7k
         "Input is not proper UTF-8, indicate encoding !\n%s",
798
23.7k
         BAD_CAST buffer, NULL);
799
23.7k
    }
800
23.7k
    *len = 1;
801
23.7k
    return (*cur);
802
35.8k
}
803
804
/**
805
 * xmlCopyCharMultiByte:
806
 * @out:  pointer to an array of xmlChar
807
 * @val:  the char value
808
 *
809
 * append the char value in the array
810
 *
811
 * Returns the number of xmlChar written
812
 */
813
int
814
18.5M
xmlCopyCharMultiByte(xmlChar *out, int val) {
815
18.5M
    if ((out == NULL) || (val < 0)) return(0);
816
    /*
817
     * We are supposed to handle UTF8, check it's valid
818
     * From rfc2044: encoding of the Unicode values on UTF-8:
819
     *
820
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
821
     * 0000 0000-0000 007F   0xxxxxxx
822
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
823
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
824
     */
825
18.5M
    if  (val >= 0x80) {
826
17.7M
  xmlChar *savedout = out;
827
17.7M
  int bits;
828
17.7M
  if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
829
2.47M
  else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
830
988k
  else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
831
0
  else {
832
0
      xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
833
0
        "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
834
0
            val);
835
0
      return(0);
836
0
  }
837
39.0M
  for ( ; bits >= 0; bits-= 6)
838
21.2M
      *out++= ((val >> bits) & 0x3F) | 0x80 ;
839
17.7M
  return (out - savedout);
840
17.7M
    }
841
799k
    *out = val;
842
799k
    return 1;
843
18.5M
}
844
845
/**
846
 * xmlCopyChar:
847
 * @len:  Ignored, compatibility
848
 * @out:  pointer to an array of xmlChar
849
 * @val:  the char value
850
 *
851
 * append the char value in the array
852
 *
853
 * Returns the number of xmlChar written
854
 */
855
856
int
857
621k
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
858
621k
    if ((out == NULL) || (val < 0)) return(0);
859
    /* the len parameter is ignored */
860
621k
    if  (val >= 0x80) {
861
308k
  return(xmlCopyCharMultiByte (out, val));
862
308k
    }
863
313k
    *out = val;
864
313k
    return 1;
865
621k
}
866
867
/************************************************************************
868
 *                  *
869
 *    Commodity functions to switch encodings     *
870
 *                  *
871
 ************************************************************************/
872
873
static int
874
xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
875
                          xmlCharEncodingHandlerPtr handler, int len);
876
/**
877
 * xmlSwitchEncoding:
878
 * @ctxt:  the parser context
879
 * @enc:  the encoding value (number)
880
 *
881
 * change the input functions when discovering the character encoding
882
 * of a given entity.
883
 *
884
 * Returns 0 in case of success, -1 otherwise
885
 */
886
int
887
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
888
723k
{
889
723k
    xmlCharEncodingHandlerPtr handler;
890
723k
    int len = -1;
891
723k
    int ret;
892
893
723k
    if (ctxt == NULL) return(-1);
894
723k
    switch (enc) {
895
0
  case XML_CHAR_ENCODING_ERROR:
896
0
      __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
897
0
                     "encoding unknown\n", NULL, NULL);
898
0
      return(-1);
899
160k
  case XML_CHAR_ENCODING_NONE:
900
      /* let's assume it's UTF-8 without the XML decl */
901
160k
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
902
160k
      return(0);
903
547k
  case XML_CHAR_ENCODING_UTF8:
904
      /* default encoding, no conversion should be needed */
905
547k
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
906
907
      /*
908
       * Errata on XML-1.0 June 20 2001
909
       * Specific handling of the Byte Order Mark for
910
       * UTF-8
911
       */
912
547k
      if ((ctxt->input != NULL) &&
913
547k
    (ctxt->input->cur[0] == 0xEF) &&
914
547k
    (ctxt->input->cur[1] == 0xBB) &&
915
547k
    (ctxt->input->cur[2] == 0xBF)) {
916
5.31k
    ctxt->input->cur += 3;
917
5.31k
      }
918
547k
      return(0);
919
4.37k
    case XML_CHAR_ENCODING_UTF16LE:
920
8.95k
    case XML_CHAR_ENCODING_UTF16BE:
921
        /*The raw input characters are encoded
922
         *in UTF-16. As we expect this function
923
         *to be called after xmlCharEncInFunc, we expect
924
         *ctxt->input->cur to contain UTF-8 encoded characters.
925
         *So the raw UTF16 Byte Order Mark
926
         *has also been converted into
927
         *an UTF-8 BOM. Let's skip that BOM.
928
         */
929
8.95k
        if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
930
8.95k
            (ctxt->input->cur[0] == 0xEF) &&
931
8.95k
            (ctxt->input->cur[1] == 0xBB) &&
932
8.95k
            (ctxt->input->cur[2] == 0xBF)) {
933
2.19k
            ctxt->input->cur += 3;
934
2.19k
        }
935
8.95k
        len = 90;
936
8.95k
  break;
937
0
    case XML_CHAR_ENCODING_UCS2:
938
0
        len = 90;
939
0
  break;
940
963
    case XML_CHAR_ENCODING_UCS4BE:
941
1.73k
    case XML_CHAR_ENCODING_UCS4LE:
942
1.91k
    case XML_CHAR_ENCODING_UCS4_2143:
943
2.10k
    case XML_CHAR_ENCODING_UCS4_3412:
944
2.10k
        len = 180;
945
2.10k
  break;
946
4.60k
    case XML_CHAR_ENCODING_EBCDIC:
947
4.60k
    case XML_CHAR_ENCODING_8859_1:
948
4.60k
    case XML_CHAR_ENCODING_8859_2:
949
4.60k
    case XML_CHAR_ENCODING_8859_3:
950
4.60k
    case XML_CHAR_ENCODING_8859_4:
951
4.60k
    case XML_CHAR_ENCODING_8859_5:
952
4.60k
    case XML_CHAR_ENCODING_8859_6:
953
4.60k
    case XML_CHAR_ENCODING_8859_7:
954
4.60k
    case XML_CHAR_ENCODING_8859_8:
955
4.60k
    case XML_CHAR_ENCODING_8859_9:
956
4.60k
    case XML_CHAR_ENCODING_ASCII:
957
4.60k
    case XML_CHAR_ENCODING_2022_JP:
958
4.60k
    case XML_CHAR_ENCODING_SHIFT_JIS:
959
4.60k
    case XML_CHAR_ENCODING_EUC_JP:
960
4.60k
        len = 45;
961
4.60k
  break;
962
723k
    }
963
15.6k
    handler = xmlGetCharEncodingHandler(enc);
964
15.6k
    if (handler == NULL) {
965
  /*
966
   * Default handlers.
967
   */
968
367
  switch (enc) {
969
0
      case XML_CHAR_ENCODING_ASCII:
970
    /* default encoding, no conversion should be needed */
971
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
972
0
    return(0);
973
0
      case XML_CHAR_ENCODING_8859_1:
974
0
    if ((ctxt->inputNr == 1) &&
975
0
        (ctxt->encoding == NULL) &&
976
0
        (ctxt->input != NULL) &&
977
0
        (ctxt->input->encoding != NULL)) {
978
0
        ctxt->encoding = xmlStrdup(ctxt->input->encoding);
979
0
    }
980
0
    ctxt->charset = enc;
981
0
    return(0);
982
367
      default:
983
367
    __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
984
367
                        "encoding not supported: %s\n",
985
367
      BAD_CAST xmlGetCharEncodingName(enc), NULL);
986
                /*
987
                 * TODO: We could recover from errors in external entities
988
                 * if we didn't stop the parser. But most callers of this
989
                 * function don't check the return value.
990
                 */
991
367
                xmlStopParser(ctxt);
992
367
                return(-1);
993
367
        }
994
367
    }
995
15.2k
    ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
996
15.2k
    if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
997
        /*
998
   * on encoding conversion errors, stop the parser
999
   */
1000
191
        xmlStopParser(ctxt);
1001
191
  ctxt->errNo = XML_I18N_CONV_FAILED;
1002
191
    }
1003
15.2k
    return(ret);
1004
15.6k
}
1005
1006
/**
1007
 * xmlSwitchInputEncodingInt:
1008
 * @ctxt:  the parser context
1009
 * @input:  the input stream
1010
 * @handler:  the encoding handler
1011
 * @len:  the number of bytes to convert for the first line or -1
1012
 *
1013
 * change the input functions when discovering the character encoding
1014
 * of a given entity.
1015
 *
1016
 * Returns 0 in case of success, -1 otherwise
1017
 */
1018
static int
1019
xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1020
                          xmlCharEncodingHandlerPtr handler, int len)
1021
61.9k
{
1022
61.9k
    int nbchars;
1023
1024
61.9k
    if (handler == NULL)
1025
0
        return (-1);
1026
61.9k
    if (input == NULL)
1027
0
        return (-1);
1028
61.9k
    if (input->buf != NULL) {
1029
61.9k
  ctxt->charset = XML_CHAR_ENCODING_UTF8;
1030
1031
61.9k
        if (input->buf->encoder != NULL) {
1032
            /*
1033
             * Check in case the auto encoding detection triggered
1034
             * in already.
1035
             */
1036
5.38k
            if (input->buf->encoder == handler)
1037
2.88k
                return (0);
1038
1039
            /*
1040
             * "UTF-16" can be used for both LE and BE
1041
             if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
1042
             BAD_CAST "UTF-16", 6)) &&
1043
             (!xmlStrncmp(BAD_CAST handler->name,
1044
             BAD_CAST "UTF-16", 6))) {
1045
             return(0);
1046
             }
1047
             */
1048
1049
            /*
1050
             * Note: this is a bit dangerous, but that's what it
1051
             * takes to use nearly compatible signature for different
1052
             * encodings.
1053
             *
1054
             * FIXME: Encoders might buffer partial byte sequences, so
1055
             * this probably can't work. We should return an error and
1056
             * make sure that callers never try to switch the encoding
1057
             * twice.
1058
             */
1059
2.50k
            xmlCharEncCloseFunc(input->buf->encoder);
1060
2.50k
            input->buf->encoder = handler;
1061
2.50k
            return (0);
1062
5.38k
        }
1063
56.5k
        input->buf->encoder = handler;
1064
1065
        /*
1066
         * Is there already some content down the pipe to convert ?
1067
         */
1068
56.5k
        if (xmlBufIsEmpty(input->buf->buffer) == 0) {
1069
56.5k
            int processed;
1070
56.5k
      unsigned int use;
1071
1072
            /*
1073
             * Specific handling of the Byte Order Mark for
1074
             * UTF-16
1075
             */
1076
56.5k
            if ((handler->name != NULL) &&
1077
56.5k
                (!strcmp(handler->name, "UTF-16LE") ||
1078
56.5k
                 !strcmp(handler->name, "UTF-16")) &&
1079
56.5k
                (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1080
2.40k
                input->cur += 2;
1081
2.40k
            }
1082
56.5k
            if ((handler->name != NULL) &&
1083
56.5k
                (!strcmp(handler->name, "UTF-16BE")) &&
1084
56.5k
                (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1085
2.29k
                input->cur += 2;
1086
2.29k
            }
1087
            /*
1088
             * Errata on XML-1.0 June 20 2001
1089
             * Specific handling of the Byte Order Mark for
1090
             * UTF-8
1091
             */
1092
56.5k
            if ((handler->name != NULL) &&
1093
56.5k
                (!strcmp(handler->name, "UTF-8")) &&
1094
56.5k
                (input->cur[0] == 0xEF) &&
1095
56.5k
                (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1096
0
                input->cur += 3;
1097
0
            }
1098
1099
            /*
1100
             * Shrink the current input buffer.
1101
             * Move it as the raw buffer and create a new input buffer
1102
             */
1103
56.5k
            processed = input->cur - input->base;
1104
56.5k
            xmlBufShrink(input->buf->buffer, processed);
1105
56.5k
            input->buf->raw = input->buf->buffer;
1106
56.5k
            input->buf->buffer = xmlBufCreate();
1107
56.5k
      input->buf->rawconsumed = processed;
1108
56.5k
      use = xmlBufUse(input->buf->raw);
1109
1110
56.5k
            if (ctxt->html) {
1111
                /*
1112
                 * convert as much as possible of the buffer
1113
                 */
1114
0
                nbchars = xmlCharEncInput(input->buf, 1);
1115
56.5k
            } else {
1116
                /*
1117
                 * convert just enough to get
1118
                 * '<?xml version="1.0" encoding="xxx"?>'
1119
                 * parsed with the autodetected encoding
1120
                 * into the parser reading buffer.
1121
                 */
1122
56.5k
                nbchars = xmlCharEncFirstLineInput(input->buf, len);
1123
56.5k
            }
1124
56.5k
            xmlBufResetInput(input->buf->buffer, input);
1125
56.5k
            if (nbchars < 0) {
1126
350
                xmlErrInternal(ctxt,
1127
350
                               "switching encoding: encoder error\n",
1128
350
                               NULL);
1129
350
                return (-1);
1130
350
            }
1131
56.1k
      input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
1132
56.1k
        }
1133
56.1k
        return (0);
1134
56.5k
    } else {
1135
3
  xmlErrInternal(ctxt,
1136
3
                "static memory buffer doesn't support encoding\n", NULL);
1137
        /*
1138
         * Callers assume that the input buffer takes ownership of the
1139
         * encoding handler. xmlCharEncCloseFunc frees unregistered
1140
         * handlers and avoids a memory leak.
1141
         */
1142
3
        xmlCharEncCloseFunc(handler);
1143
3
  return (-1);
1144
3
    }
1145
61.9k
}
1146
1147
/**
1148
 * xmlSwitchInputEncoding:
1149
 * @ctxt:  the parser context
1150
 * @input:  the input stream
1151
 * @handler:  the encoding handler
1152
 *
1153
 * DEPRECATED: Use xmlSwitchToEncoding
1154
 *
1155
 * change the input functions when discovering the character encoding
1156
 * of a given entity.
1157
 *
1158
 * Returns 0 in case of success, -1 otherwise
1159
 */
1160
int
1161
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1162
0
                          xmlCharEncodingHandlerPtr handler) {
1163
0
    return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
1164
0
}
1165
1166
/**
1167
 * xmlSwitchToEncoding:
1168
 * @ctxt:  the parser context
1169
 * @handler:  the encoding handler
1170
 *
1171
 * change the input functions when discovering the character encoding
1172
 * of a given entity.
1173
 *
1174
 * Returns 0 in case of success, -1 otherwise
1175
 */
1176
int
1177
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1178
46.6k
{
1179
46.6k
    if (ctxt == NULL)
1180
0
        return(-1);
1181
46.6k
    return(xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, -1));
1182
46.6k
}
1183
1184
/************************************************************************
1185
 *                  *
1186
 *  Commodity functions to handle entities processing   *
1187
 *                  *
1188
 ************************************************************************/
1189
1190
/**
1191
 * xmlFreeInputStream:
1192
 * @input:  an xmlParserInputPtr
1193
 *
1194
 * Free up an input stream.
1195
 */
1196
void
1197
4.34M
xmlFreeInputStream(xmlParserInputPtr input) {
1198
4.34M
    if (input == NULL) return;
1199
1200
4.34M
    if (input->filename != NULL) xmlFree((char *) input->filename);
1201
4.34M
    if (input->directory != NULL) xmlFree((char *) input->directory);
1202
4.34M
    if (input->encoding != NULL) xmlFree((char *) input->encoding);
1203
4.34M
    if (input->version != NULL) xmlFree((char *) input->version);
1204
4.34M
    if ((input->free != NULL) && (input->base != NULL))
1205
0
        input->free((xmlChar *) input->base);
1206
4.34M
    if (input->buf != NULL)
1207
2.18M
        xmlFreeParserInputBuffer(input->buf);
1208
4.34M
    xmlFree(input);
1209
4.34M
}
1210
1211
/**
1212
 * xmlNewInputStream:
1213
 * @ctxt:  an XML parser context
1214
 *
1215
 * Create a new input stream structure.
1216
 *
1217
 * Returns the new input stream or NULL
1218
 */
1219
xmlParserInputPtr
1220
4.35M
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1221
4.35M
    xmlParserInputPtr input;
1222
1223
4.35M
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1224
4.35M
    if (input == NULL) {
1225
0
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1226
0
  return(NULL);
1227
0
    }
1228
4.35M
    memset(input, 0, sizeof(xmlParserInput));
1229
4.35M
    input->line = 1;
1230
4.35M
    input->col = 1;
1231
4.35M
    input->standalone = -1;
1232
1233
    /*
1234
     * If the context is NULL the id cannot be initialized, but that
1235
     * should not happen while parsing which is the situation where
1236
     * the id is actually needed.
1237
     */
1238
4.35M
    if (ctxt != NULL)
1239
4.35M
        input->id = ctxt->input_id++;
1240
1241
4.35M
    return(input);
1242
4.35M
}
1243
1244
/**
1245
 * xmlNewIOInputStream:
1246
 * @ctxt:  an XML parser context
1247
 * @input:  an I/O Input
1248
 * @enc:  the charset encoding if known
1249
 *
1250
 * Create a new input stream structure encapsulating the @input into
1251
 * a stream suitable for the parser.
1252
 *
1253
 * Returns the new input stream or NULL
1254
 */
1255
xmlParserInputPtr
1256
xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1257
0
              xmlCharEncoding enc) {
1258
0
    xmlParserInputPtr inputStream;
1259
1260
0
    if (input == NULL) return(NULL);
1261
0
    if (xmlParserDebugEntities)
1262
0
  xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1263
0
    inputStream = xmlNewInputStream(ctxt);
1264
0
    if (inputStream == NULL) {
1265
0
  return(NULL);
1266
0
    }
1267
0
    inputStream->filename = NULL;
1268
0
    inputStream->buf = input;
1269
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1270
1271
0
    if (enc != XML_CHAR_ENCODING_NONE) {
1272
0
        xmlSwitchEncoding(ctxt, enc);
1273
0
    }
1274
1275
0
    return(inputStream);
1276
0
}
1277
1278
/**
1279
 * xmlNewEntityInputStream:
1280
 * @ctxt:  an XML parser context
1281
 * @entity:  an Entity pointer
1282
 *
1283
 * Create a new input stream based on an xmlEntityPtr
1284
 *
1285
 * Returns the new input stream or NULL
1286
 */
1287
xmlParserInputPtr
1288
1.19M
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1289
1.19M
    xmlParserInputPtr input;
1290
1291
1.19M
    if (entity == NULL) {
1292
0
        xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1293
0
                 NULL);
1294
0
  return(NULL);
1295
0
    }
1296
1.19M
    if (xmlParserDebugEntities)
1297
0
  xmlGenericError(xmlGenericErrorContext,
1298
0
    "new input from entity: %s\n", entity->name);
1299
1.19M
    if (entity->content == NULL) {
1300
302k
  switch (entity->etype) {
1301
0
            case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1302
0
          xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1303
0
                   entity->name);
1304
0
                break;
1305
0
            case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1306
302k
            case XML_EXTERNAL_PARAMETER_ENTITY:
1307
302k
    return(xmlLoadExternalEntity((char *) entity->URI,
1308
302k
           (char *) entity->ExternalID, ctxt));
1309
0
            case XML_INTERNAL_GENERAL_ENTITY:
1310
0
          xmlErrInternal(ctxt,
1311
0
          "Internal entity %s without content !\n",
1312
0
                   entity->name);
1313
0
                break;
1314
0
            case XML_INTERNAL_PARAMETER_ENTITY:
1315
0
          xmlErrInternal(ctxt,
1316
0
          "Internal parameter entity %s without content !\n",
1317
0
                   entity->name);
1318
0
                break;
1319
0
            case XML_INTERNAL_PREDEFINED_ENTITY:
1320
0
          xmlErrInternal(ctxt,
1321
0
          "Predefined entity %s without content !\n",
1322
0
                   entity->name);
1323
0
                break;
1324
302k
  }
1325
0
  return(NULL);
1326
302k
    }
1327
891k
    input = xmlNewInputStream(ctxt);
1328
891k
    if (input == NULL) {
1329
0
  return(NULL);
1330
0
    }
1331
891k
    if (entity->URI != NULL)
1332
153
  input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1333
891k
    input->base = entity->content;
1334
891k
    if (entity->length == 0)
1335
369
        entity->length = xmlStrlen(entity->content);
1336
891k
    input->cur = entity->content;
1337
891k
    input->length = entity->length;
1338
891k
    input->end = &entity->content[input->length];
1339
891k
    return(input);
1340
891k
}
1341
1342
/**
1343
 * xmlNewStringInputStream:
1344
 * @ctxt:  an XML parser context
1345
 * @buffer:  an memory buffer
1346
 *
1347
 * Create a new input stream based on a memory buffer.
1348
 * Returns the new input stream
1349
 */
1350
xmlParserInputPtr
1351
0
xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1352
0
    xmlParserInputPtr input;
1353
0
    xmlParserInputBufferPtr buf;
1354
1355
0
    if (buffer == NULL) {
1356
0
        xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1357
0
                 NULL);
1358
0
  return(NULL);
1359
0
    }
1360
0
    if (xmlParserDebugEntities)
1361
0
  xmlGenericError(xmlGenericErrorContext,
1362
0
    "new fixed input: %.30s\n", buffer);
1363
0
    buf = xmlParserInputBufferCreateMem((const char *) buffer,
1364
0
                                        xmlStrlen(buffer),
1365
0
                                        XML_CHAR_ENCODING_NONE);
1366
0
    if (buf == NULL) {
1367
0
  xmlErrMemory(ctxt, NULL);
1368
0
        return(NULL);
1369
0
    }
1370
0
    input = xmlNewInputStream(ctxt);
1371
0
    if (input == NULL) {
1372
0
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1373
0
  xmlFreeParserInputBuffer(buf);
1374
0
  return(NULL);
1375
0
    }
1376
0
    input->buf = buf;
1377
0
    xmlBufResetInput(input->buf->buffer, input);
1378
0
    return(input);
1379
0
}
1380
1381
/**
1382
 * xmlNewInputFromFile:
1383
 * @ctxt:  an XML parser context
1384
 * @filename:  the filename to use as entity
1385
 *
1386
 * Create a new input stream based on a file or an URL.
1387
 *
1388
 * Returns the new input stream or NULL in case of error
1389
 */
1390
xmlParserInputPtr
1391
0
xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1392
0
    xmlParserInputBufferPtr buf;
1393
0
    xmlParserInputPtr inputStream;
1394
0
    char *directory = NULL;
1395
0
    xmlChar *URI = NULL;
1396
1397
0
    if (xmlParserDebugEntities)
1398
0
  xmlGenericError(xmlGenericErrorContext,
1399
0
    "new input from file: %s\n", filename);
1400
0
    if (ctxt == NULL) return(NULL);
1401
0
    buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1402
0
    if (buf == NULL) {
1403
0
  if (filename == NULL)
1404
0
      __xmlLoaderErr(ctxt,
1405
0
                     "failed to load external entity: NULL filename \n",
1406
0
         NULL);
1407
0
  else
1408
0
      __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1409
0
         (const char *) filename);
1410
0
  return(NULL);
1411
0
    }
1412
1413
0
    inputStream = xmlNewInputStream(ctxt);
1414
0
    if (inputStream == NULL) {
1415
0
  xmlFreeParserInputBuffer(buf);
1416
0
  return(NULL);
1417
0
    }
1418
1419
0
    inputStream->buf = buf;
1420
0
    inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1421
0
    if (inputStream == NULL)
1422
0
        return(NULL);
1423
1424
0
    if (inputStream->filename == NULL)
1425
0
  URI = xmlStrdup((xmlChar *) filename);
1426
0
    else
1427
0
  URI = xmlStrdup((xmlChar *) inputStream->filename);
1428
0
    directory = xmlParserGetDirectory((const char *) URI);
1429
0
    if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1430
0
    inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1431
0
    if (URI != NULL) xmlFree((char *) URI);
1432
0
    inputStream->directory = directory;
1433
1434
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1435
0
    if ((ctxt->directory == NULL) && (directory != NULL))
1436
0
        ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1437
0
    return(inputStream);
1438
0
}
1439
1440
/************************************************************************
1441
 *                  *
1442
 *    Commodity functions to handle parser contexts   *
1443
 *                  *
1444
 ************************************************************************/
1445
1446
/**
1447
 * xmlInitSAXParserCtxt:
1448
 * @ctxt:  XML parser context
1449
 * @sax:  SAX handlert
1450
 * @userData:  user data
1451
 *
1452
 * Initialize a SAX parser context
1453
 *
1454
 * Returns 0 in case of success and -1 in case of error
1455
 */
1456
1457
static int
1458
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
1459
                     void *userData)
1460
3.46M
{
1461
3.46M
    xmlParserInputPtr input;
1462
1463
3.46M
    if(ctxt==NULL) {
1464
0
        xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1465
0
        return(-1);
1466
0
    }
1467
1468
3.46M
    xmlInitParser();
1469
1470
3.46M
    if (ctxt->dict == NULL)
1471
3.46M
  ctxt->dict = xmlDictCreate();
1472
3.46M
    if (ctxt->dict == NULL) {
1473
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1474
0
  return(-1);
1475
0
    }
1476
3.46M
    xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1477
1478
3.46M
    if (ctxt->sax == NULL)
1479
3.46M
  ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1480
3.46M
    if (ctxt->sax == NULL) {
1481
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1482
0
  return(-1);
1483
0
    }
1484
3.46M
    if (sax == NULL) {
1485
1.51M
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1486
1.51M
        xmlSAXVersion(ctxt->sax, 2);
1487
1.51M
        ctxt->userData = ctxt;
1488
1.95M
    } else {
1489
1.95M
  if (sax->initialized == XML_SAX2_MAGIC) {
1490
838k
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
1491
1.11M
        } else {
1492
1.11M
      memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1493
1.11M
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
1494
1.11M
        }
1495
1.95M
        ctxt->userData = userData ? userData : ctxt;
1496
1.95M
    }
1497
1498
3.46M
    ctxt->maxatts = 0;
1499
3.46M
    ctxt->atts = NULL;
1500
    /* Allocate the Input stack */
1501
3.46M
    if (ctxt->inputTab == NULL) {
1502
3.46M
  ctxt->inputTab = (xmlParserInputPtr *)
1503
3.46M
        xmlMalloc(5 * sizeof(xmlParserInputPtr));
1504
3.46M
  ctxt->inputMax = 5;
1505
3.46M
    }
1506
3.46M
    if (ctxt->inputTab == NULL) {
1507
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1508
0
  ctxt->inputNr = 0;
1509
0
  ctxt->inputMax = 0;
1510
0
  ctxt->input = NULL;
1511
0
  return(-1);
1512
0
    }
1513
3.46M
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1514
0
        xmlFreeInputStream(input);
1515
0
    }
1516
3.46M
    ctxt->inputNr = 0;
1517
3.46M
    ctxt->input = NULL;
1518
1519
3.46M
    ctxt->version = NULL;
1520
3.46M
    ctxt->encoding = NULL;
1521
3.46M
    ctxt->standalone = -1;
1522
3.46M
    ctxt->hasExternalSubset = 0;
1523
3.46M
    ctxt->hasPErefs = 0;
1524
3.46M
    ctxt->html = 0;
1525
3.46M
    ctxt->external = 0;
1526
3.46M
    ctxt->instate = XML_PARSER_START;
1527
3.46M
    ctxt->token = 0;
1528
3.46M
    ctxt->directory = NULL;
1529
1530
    /* Allocate the Node stack */
1531
3.46M
    if (ctxt->nodeTab == NULL) {
1532
3.46M
  ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1533
3.46M
  ctxt->nodeMax = 10;
1534
3.46M
    }
1535
3.46M
    if (ctxt->nodeTab == NULL) {
1536
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1537
0
  ctxt->nodeNr = 0;
1538
0
  ctxt->nodeMax = 0;
1539
0
  ctxt->node = NULL;
1540
0
  ctxt->inputNr = 0;
1541
0
  ctxt->inputMax = 0;
1542
0
  ctxt->input = NULL;
1543
0
  return(-1);
1544
0
    }
1545
3.46M
    ctxt->nodeNr = 0;
1546
3.46M
    ctxt->node = NULL;
1547
1548
    /* Allocate the Name stack */
1549
3.46M
    if (ctxt->nameTab == NULL) {
1550
3.46M
  ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1551
3.46M
  ctxt->nameMax = 10;
1552
3.46M
    }
1553
3.46M
    if (ctxt->nameTab == NULL) {
1554
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1555
0
  ctxt->nodeNr = 0;
1556
0
  ctxt->nodeMax = 0;
1557
0
  ctxt->node = NULL;
1558
0
  ctxt->inputNr = 0;
1559
0
  ctxt->inputMax = 0;
1560
0
  ctxt->input = NULL;
1561
0
  ctxt->nameNr = 0;
1562
0
  ctxt->nameMax = 0;
1563
0
  ctxt->name = NULL;
1564
0
  return(-1);
1565
0
    }
1566
3.46M
    ctxt->nameNr = 0;
1567
3.46M
    ctxt->name = NULL;
1568
1569
    /* Allocate the space stack */
1570
3.46M
    if (ctxt->spaceTab == NULL) {
1571
3.46M
  ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1572
3.46M
  ctxt->spaceMax = 10;
1573
3.46M
    }
1574
3.46M
    if (ctxt->spaceTab == NULL) {
1575
0
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1576
0
  ctxt->nodeNr = 0;
1577
0
  ctxt->nodeMax = 0;
1578
0
  ctxt->node = NULL;
1579
0
  ctxt->inputNr = 0;
1580
0
  ctxt->inputMax = 0;
1581
0
  ctxt->input = NULL;
1582
0
  ctxt->nameNr = 0;
1583
0
  ctxt->nameMax = 0;
1584
0
  ctxt->name = NULL;
1585
0
  ctxt->spaceNr = 0;
1586
0
  ctxt->spaceMax = 0;
1587
0
  ctxt->space = NULL;
1588
0
  return(-1);
1589
0
    }
1590
3.46M
    ctxt->spaceNr = 1;
1591
3.46M
    ctxt->spaceMax = 10;
1592
3.46M
    ctxt->spaceTab[0] = -1;
1593
3.46M
    ctxt->space = &ctxt->spaceTab[0];
1594
3.46M
    ctxt->myDoc = NULL;
1595
3.46M
    ctxt->wellFormed = 1;
1596
3.46M
    ctxt->nsWellFormed = 1;
1597
3.46M
    ctxt->valid = 1;
1598
3.46M
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1599
3.46M
    if (ctxt->loadsubset) {
1600
0
        ctxt->options |= XML_PARSE_DTDLOAD;
1601
0
    }
1602
3.46M
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
1603
3.46M
    ctxt->pedantic = xmlPedanticParserDefaultValue;
1604
3.46M
    if (ctxt->pedantic) {
1605
0
        ctxt->options |= XML_PARSE_PEDANTIC;
1606
0
    }
1607
3.46M
    ctxt->linenumbers = xmlLineNumbersDefaultValue;
1608
3.46M
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1609
3.46M
    if (ctxt->keepBlanks == 0) {
1610
0
  ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1611
0
  ctxt->options |= XML_PARSE_NOBLANKS;
1612
0
    }
1613
1614
3.46M
    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
1615
3.46M
    ctxt->vctxt.userData = ctxt;
1616
3.46M
    ctxt->vctxt.error = xmlParserValidityError;
1617
3.46M
    ctxt->vctxt.warning = xmlParserValidityWarning;
1618
3.46M
    if (ctxt->validate) {
1619
0
  if (xmlGetWarningsDefaultValue == 0)
1620
0
      ctxt->vctxt.warning = NULL;
1621
0
  else
1622
0
      ctxt->vctxt.warning = xmlParserValidityWarning;
1623
0
  ctxt->vctxt.nodeMax = 0;
1624
0
        ctxt->options |= XML_PARSE_DTDVALID;
1625
0
    }
1626
3.46M
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1627
3.46M
    if (ctxt->replaceEntities) {
1628
0
        ctxt->options |= XML_PARSE_NOENT;
1629
0
    }
1630
3.46M
    ctxt->record_info = 0;
1631
3.46M
    ctxt->checkIndex = 0;
1632
3.46M
    ctxt->inSubset = 0;
1633
3.46M
    ctxt->errNo = XML_ERR_OK;
1634
3.46M
    ctxt->depth = 0;
1635
3.46M
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1636
3.46M
    ctxt->catalogs = NULL;
1637
3.46M
    ctxt->nbentities = 0;
1638
3.46M
    ctxt->sizeentities = 0;
1639
3.46M
    ctxt->sizeentcopy = 0;
1640
3.46M
    ctxt->input_id = 1;
1641
3.46M
    xmlInitNodeInfoSeq(&ctxt->node_seq);
1642
3.46M
    return(0);
1643
3.46M
}
1644
1645
/**
1646
 * xmlInitParserCtxt:
1647
 * @ctxt:  an XML parser context
1648
 *
1649
 * DEPRECATED: Internal function which will be made private in a future
1650
 * version.
1651
 *
1652
 * Initialize a parser context
1653
 *
1654
 * Returns 0 in case of success and -1 in case of error
1655
 */
1656
1657
int
1658
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1659
0
{
1660
0
    return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
1661
0
}
1662
1663
/**
1664
 * xmlFreeParserCtxt:
1665
 * @ctxt:  an XML parser context
1666
 *
1667
 * Free all the memory used by a parser context. However the parsed
1668
 * document in ctxt->myDoc is not freed.
1669
 */
1670
1671
void
1672
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1673
3.46M
{
1674
3.46M
    xmlParserInputPtr input;
1675
1676
3.46M
    if (ctxt == NULL) return;
1677
1678
6.55M
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1679
3.09M
        xmlFreeInputStream(input);
1680
3.09M
    }
1681
3.46M
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1682
3.46M
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1683
3.46M
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1684
3.46M
    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
1685
3.46M
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1686
3.46M
    if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1687
3.46M
    if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1688
3.46M
    if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1689
3.46M
    if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1690
3.46M
#ifdef LIBXML_SAX1_ENABLED
1691
3.46M
    if ((ctxt->sax != NULL) &&
1692
3.46M
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1693
#else
1694
    if (ctxt->sax != NULL)
1695
#endif /* LIBXML_SAX1_ENABLED */
1696
3.46M
        xmlFree(ctxt->sax);
1697
3.46M
    if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1698
3.46M
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1699
3.46M
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1700
3.46M
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1701
3.46M
    if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1702
3.46M
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1703
3.46M
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1704
3.46M
    if (ctxt->attsDefault != NULL)
1705
57.3k
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
1706
3.46M
    if (ctxt->attsSpecial != NULL)
1707
93.5k
        xmlHashFree(ctxt->attsSpecial, NULL);
1708
3.46M
    if (ctxt->freeElems != NULL) {
1709
183k
        xmlNodePtr cur, next;
1710
1711
183k
  cur = ctxt->freeElems;
1712
366k
  while (cur != NULL) {
1713
183k
      next = cur->next;
1714
183k
      xmlFree(cur);
1715
183k
      cur = next;
1716
183k
  }
1717
183k
    }
1718
3.46M
    if (ctxt->freeAttrs != NULL) {
1719
115k
        xmlAttrPtr cur, next;
1720
1721
115k
  cur = ctxt->freeAttrs;
1722
231k
  while (cur != NULL) {
1723
115k
      next = cur->next;
1724
115k
      xmlFree(cur);
1725
115k
      cur = next;
1726
115k
  }
1727
115k
    }
1728
    /*
1729
     * cleanup the error strings
1730
     */
1731
3.46M
    if (ctxt->lastError.message != NULL)
1732
2.97M
        xmlFree(ctxt->lastError.message);
1733
3.46M
    if (ctxt->lastError.file != NULL)
1734
572k
        xmlFree(ctxt->lastError.file);
1735
3.46M
    if (ctxt->lastError.str1 != NULL)
1736
782k
        xmlFree(ctxt->lastError.str1);
1737
3.46M
    if (ctxt->lastError.str2 != NULL)
1738
59.2k
        xmlFree(ctxt->lastError.str2);
1739
3.46M
    if (ctxt->lastError.str3 != NULL)
1740
7.80k
        xmlFree(ctxt->lastError.str3);
1741
1742
3.46M
#ifdef LIBXML_CATALOG_ENABLED
1743
3.46M
    if (ctxt->catalogs != NULL)
1744
0
  xmlCatalogFreeLocal(ctxt->catalogs);
1745
3.46M
#endif
1746
3.46M
    xmlFree(ctxt);
1747
3.46M
}
1748
1749
/**
1750
 * xmlNewParserCtxt:
1751
 *
1752
 * Allocate and initialize a new parser context.
1753
 *
1754
 * Returns the xmlParserCtxtPtr or NULL
1755
 */
1756
1757
xmlParserCtxtPtr
1758
xmlNewParserCtxt(void)
1759
1.22M
{
1760
1.22M
    return(xmlNewSAXParserCtxt(NULL, NULL));
1761
1.22M
}
1762
1763
/**
1764
 * xmlNewSAXParserCtxt:
1765
 * @sax:  SAX handler
1766
 * @userData:  user data
1767
 *
1768
 * Allocate and initialize a new SAX parser context. If userData is NULL,
1769
 * the parser context will be passed as user data.
1770
 *
1771
 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
1772
 */
1773
1774
xmlParserCtxtPtr
1775
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
1776
3.46M
{
1777
3.46M
    xmlParserCtxtPtr ctxt;
1778
1779
3.46M
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1780
3.46M
    if (ctxt == NULL) {
1781
0
  xmlErrMemory(NULL, "cannot allocate parser context\n");
1782
0
  return(NULL);
1783
0
    }
1784
3.46M
    memset(ctxt, 0, sizeof(xmlParserCtxt));
1785
3.46M
    if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
1786
0
        xmlFreeParserCtxt(ctxt);
1787
0
  return(NULL);
1788
0
    }
1789
3.46M
    return(ctxt);
1790
3.46M
}
1791
1792
/************************************************************************
1793
 *                  *
1794
 *    Handling of node information        *
1795
 *                  *
1796
 ************************************************************************/
1797
1798
/**
1799
 * xmlClearParserCtxt:
1800
 * @ctxt:  an XML parser context
1801
 *
1802
 * Clear (release owned resources) and reinitialize a parser context
1803
 */
1804
1805
void
1806
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1807
0
{
1808
0
  if (ctxt==NULL)
1809
0
    return;
1810
0
  xmlClearNodeInfoSeq(&ctxt->node_seq);
1811
0
  xmlCtxtReset(ctxt);
1812
0
}
1813
1814
1815
/**
1816
 * xmlParserFindNodeInfo:
1817
 * @ctx:  an XML parser context
1818
 * @node:  an XML node within the tree
1819
 *
1820
 * DEPRECATED: Don't use.
1821
 *
1822
 * Find the parser node info struct for a given node
1823
 *
1824
 * Returns an xmlParserNodeInfo block pointer or NULL
1825
 */
1826
const xmlParserNodeInfo *
1827
xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1828
0
{
1829
0
    unsigned long pos;
1830
1831
0
    if ((ctx == NULL) || (node == NULL))
1832
0
        return (NULL);
1833
    /* Find position where node should be at */
1834
0
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1835
0
    if (pos < ctx->node_seq.length
1836
0
        && ctx->node_seq.buffer[pos].node == node)
1837
0
        return &ctx->node_seq.buffer[pos];
1838
0
    else
1839
0
        return NULL;
1840
0
}
1841
1842
1843
/**
1844
 * xmlInitNodeInfoSeq:
1845
 * @seq:  a node info sequence pointer
1846
 *
1847
 * DEPRECATED: Don't use.
1848
 *
1849
 * -- Initialize (set to initial state) node info sequence
1850
 */
1851
void
1852
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1853
3.46M
{
1854
3.46M
    if (seq == NULL)
1855
0
        return;
1856
3.46M
    seq->length = 0;
1857
3.46M
    seq->maximum = 0;
1858
3.46M
    seq->buffer = NULL;
1859
3.46M
}
1860
1861
/**
1862
 * xmlClearNodeInfoSeq:
1863
 * @seq:  a node info sequence pointer
1864
 *
1865
 * DEPRECATED: Don't use.
1866
 *
1867
 * -- Clear (release memory and reinitialize) node
1868
 *   info sequence
1869
 */
1870
void
1871
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1872
0
{
1873
0
    if (seq == NULL)
1874
0
        return;
1875
0
    if (seq->buffer != NULL)
1876
0
        xmlFree(seq->buffer);
1877
0
    xmlInitNodeInfoSeq(seq);
1878
0
}
1879
1880
/**
1881
 * xmlParserFindNodeInfoIndex:
1882
 * @seq:  a node info sequence pointer
1883
 * @node:  an XML node pointer
1884
 *
1885
 * DEPRECATED: Don't use.
1886
 *
1887
 * xmlParserFindNodeInfoIndex : Find the index that the info record for
1888
 *   the given node is or should be at in a sorted sequence
1889
 *
1890
 * Returns a long indicating the position of the record
1891
 */
1892
unsigned long
1893
xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1894
                           const xmlNodePtr node)
1895
0
{
1896
0
    unsigned long upper, lower, middle;
1897
0
    int found = 0;
1898
1899
0
    if ((seq == NULL) || (node == NULL))
1900
0
        return ((unsigned long) -1);
1901
1902
    /* Do a binary search for the key */
1903
0
    lower = 1;
1904
0
    upper = seq->length;
1905
0
    middle = 0;
1906
0
    while (lower <= upper && !found) {
1907
0
        middle = lower + (upper - lower) / 2;
1908
0
        if (node == seq->buffer[middle - 1].node)
1909
0
            found = 1;
1910
0
        else if (node < seq->buffer[middle - 1].node)
1911
0
            upper = middle - 1;
1912
0
        else
1913
0
            lower = middle + 1;
1914
0
    }
1915
1916
    /* Return position */
1917
0
    if (middle == 0 || seq->buffer[middle - 1].node < node)
1918
0
        return middle;
1919
0
    else
1920
0
        return middle - 1;
1921
0
}
1922
1923
1924
/**
1925
 * xmlParserAddNodeInfo:
1926
 * @ctxt:  an XML parser context
1927
 * @info:  a node info sequence pointer
1928
 *
1929
 * DEPRECATED: Don't use.
1930
 *
1931
 * Insert node info record into the sorted sequence
1932
 */
1933
void
1934
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1935
                     const xmlParserNodeInfoPtr info)
1936
0
{
1937
0
    unsigned long pos;
1938
1939
0
    if ((ctxt == NULL) || (info == NULL)) return;
1940
1941
    /* Find pos and check to see if node is already in the sequence */
1942
0
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
1943
0
                                     info->node);
1944
1945
0
    if ((pos < ctxt->node_seq.length) &&
1946
0
        (ctxt->node_seq.buffer != NULL) &&
1947
0
        (ctxt->node_seq.buffer[pos].node == info->node)) {
1948
0
        ctxt->node_seq.buffer[pos] = *info;
1949
0
    }
1950
1951
    /* Otherwise, we need to add new node to buffer */
1952
0
    else {
1953
0
        if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
1954
0
      (ctxt->node_seq.buffer == NULL)) {
1955
0
            xmlParserNodeInfo *tmp_buffer;
1956
0
            unsigned int byte_size;
1957
1958
0
            if (ctxt->node_seq.maximum == 0)
1959
0
                ctxt->node_seq.maximum = 2;
1960
0
            byte_size = (sizeof(*ctxt->node_seq.buffer) *
1961
0
      (2 * ctxt->node_seq.maximum));
1962
1963
0
            if (ctxt->node_seq.buffer == NULL)
1964
0
                tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
1965
0
            else
1966
0
                tmp_buffer =
1967
0
                    (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
1968
0
                                                     byte_size);
1969
1970
0
            if (tmp_buffer == NULL) {
1971
0
    xmlErrMemory(ctxt, "failed to allocate buffer\n");
1972
0
                return;
1973
0
            }
1974
0
            ctxt->node_seq.buffer = tmp_buffer;
1975
0
            ctxt->node_seq.maximum *= 2;
1976
0
        }
1977
1978
        /* If position is not at end, move elements out of the way */
1979
0
        if (pos != ctxt->node_seq.length) {
1980
0
            unsigned long i;
1981
1982
0
            for (i = ctxt->node_seq.length; i > pos; i--)
1983
0
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1984
0
        }
1985
1986
        /* Copy element and increase length */
1987
0
        ctxt->node_seq.buffer[pos] = *info;
1988
0
        ctxt->node_seq.length++;
1989
0
    }
1990
0
}
1991
1992
/************************************************************************
1993
 *                  *
1994
 *    Defaults settings         *
1995
 *                  *
1996
 ************************************************************************/
1997
/**
1998
 * xmlPedanticParserDefault:
1999
 * @val:  int 0 or 1
2000
 *
2001
 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
2002
 *
2003
 * Set and return the previous value for enabling pedantic warnings.
2004
 *
2005
 * Returns the last value for 0 for no substitution, 1 for substitution.
2006
 */
2007
2008
int
2009
0
xmlPedanticParserDefault(int val) {
2010
0
    int old = xmlPedanticParserDefaultValue;
2011
2012
0
    xmlPedanticParserDefaultValue = val;
2013
0
    return(old);
2014
0
}
2015
2016
/**
2017
 * xmlLineNumbersDefault:
2018
 * @val:  int 0 or 1
2019
 *
2020
 * DEPRECATED: The modern options API always enables line numbers.
2021
 *
2022
 * Set and return the previous value for enabling line numbers in elements
2023
 * contents. This may break on old application and is turned off by default.
2024
 *
2025
 * Returns the last value for 0 for no substitution, 1 for substitution.
2026
 */
2027
2028
int
2029
0
xmlLineNumbersDefault(int val) {
2030
0
    int old = xmlLineNumbersDefaultValue;
2031
2032
0
    xmlLineNumbersDefaultValue = val;
2033
0
    return(old);
2034
0
}
2035
2036
/**
2037
 * xmlSubstituteEntitiesDefault:
2038
 * @val:  int 0 or 1
2039
 *
2040
 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
2041
 *
2042
 * Set and return the previous value for default entity support.
2043
 * Initially the parser always keep entity references instead of substituting
2044
 * entity values in the output. This function has to be used to change the
2045
 * default parser behavior
2046
 * SAX::substituteEntities() has to be used for changing that on a file by
2047
 * file basis.
2048
 *
2049
 * Returns the last value for 0 for no substitution, 1 for substitution.
2050
 */
2051
2052
int
2053
0
xmlSubstituteEntitiesDefault(int val) {
2054
0
    int old = xmlSubstituteEntitiesDefaultValue;
2055
2056
0
    xmlSubstituteEntitiesDefaultValue = val;
2057
0
    return(old);
2058
0
}
2059
2060
/**
2061
 * xmlKeepBlanksDefault:
2062
 * @val:  int 0 or 1
2063
 *
2064
 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
2065
 *
2066
 * Set and return the previous value for default blanks text nodes support.
2067
 * The 1.x version of the parser used an heuristic to try to detect
2068
 * ignorable white spaces. As a result the SAX callback was generating
2069
 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2070
 * using the DOM output text nodes containing those blanks were not generated.
2071
 * The 2.x and later version will switch to the XML standard way and
2072
 * ignorableWhitespace() are only generated when running the parser in
2073
 * validating mode and when the current element doesn't allow CDATA or
2074
 * mixed content.
2075
 * This function is provided as a way to force the standard behavior
2076
 * on 1.X libs and to switch back to the old mode for compatibility when
2077
 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2078
 * by using xmlIsBlankNode() commodity function to detect the "empty"
2079
 * nodes generated.
2080
 * This value also affect autogeneration of indentation when saving code
2081
 * if blanks sections are kept, indentation is not generated.
2082
 *
2083
 * Returns the last value for 0 for no substitution, 1 for substitution.
2084
 */
2085
2086
int
2087
0
xmlKeepBlanksDefault(int val) {
2088
0
    int old = xmlKeepBlanksDefaultValue;
2089
2090
0
    xmlKeepBlanksDefaultValue = val;
2091
0
    if (!val) xmlIndentTreeOutput = 1;
2092
0
    return(old);
2093
0
}
2094