Coverage Report

Created: 2025-07-11 06:46

/src/tinysparql/subprojects/libxml2-2.13.1/parserInternals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * daniel@veillard.com
8
 */
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22
23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/entities.h>
28
#include <libxml/xmlerror.h>
29
#include <libxml/encoding.h>
30
#include <libxml/xmlIO.h>
31
#include <libxml/uri.h>
32
#include <libxml/dict.h>
33
#include <libxml/xmlsave.h>
34
#ifdef LIBXML_CATALOG_ENABLED
35
#include <libxml/catalog.h>
36
#endif
37
#include <libxml/chvalid.h>
38
#include <libxml/nanohttp.h>
39
40
#define CUR(ctxt) ctxt->input->cur
41
#define END(ctxt) ctxt->input->end
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
#include "private/io.h"
47
#include "private/parser.h"
48
49
0
#define XML_MAX_ERRORS 100
50
51
/*
52
 * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
53
 * factor of serialized output after entity expansion.
54
 */
55
0
#define XML_MAX_AMPLIFICATION_DEFAULT 5
56
57
/*
58
 * Various global defaults for parsing
59
 */
60
61
/**
62
 * xmlCheckVersion:
63
 * @version: the include version number
64
 *
65
 * check the compiled lib version against the include one.
66
 */
67
void
68
0
xmlCheckVersion(int version) {
69
0
    int myversion = LIBXML_VERSION;
70
71
0
    xmlInitParser();
72
73
0
    if ((myversion / 10000) != (version / 10000)) {
74
0
  fprintf(stderr,
75
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
76
0
    (version / 10000), (myversion / 10000));
77
0
    } else if ((myversion / 100) < (version / 100)) {
78
0
  fprintf(stderr,
79
0
    "Warning: program compiled against libxml %d using older %d\n",
80
0
    (version / 100), (myversion / 100));
81
0
    }
82
0
}
83
84
85
/************************************************************************
86
 *                  *
87
 *    Some factorized error routines        *
88
 *                  *
89
 ************************************************************************/
90
91
92
/**
93
 * xmlCtxtSetErrorHandler:
94
 * @ctxt:  an XML parser context
95
 * @handler:  error handler
96
 * @data:  data for error handler
97
 *
98
 * Register a callback function that will be called on errors and
99
 * warnings. If handler is NULL, the error handler will be deactivated.
100
 *
101
 * This is the recommended way to collect errors from the parser and
102
 * takes precedence over all other error reporting mechanisms.
103
 * These are (in order of precedence):
104
 *
105
 * - per-context structured handler (xmlCtxtSetErrorHandler)
106
 * - per-context structured "serror" SAX handler
107
 * - global structured handler (xmlSetStructuredErrorFunc)
108
 * - per-context generic "error" and "warning" SAX handlers
109
 * - global generic handler (xmlSetGenericErrorFunc)
110
 * - print to stderr
111
 *
112
 * Available since 2.13.0.
113
 */
114
void
115
xmlCtxtSetErrorHandler(xmlParserCtxtPtr ctxt, xmlStructuredErrorFunc handler,
116
                       void *data)
117
0
{
118
0
    if (ctxt == NULL)
119
0
        return;
120
0
    ctxt->errorHandler = handler;
121
0
    ctxt->errorCtxt = data;
122
0
}
123
124
/**
125
 * xmlCtxtErrMemory:
126
 * @ctxt:  an XML parser context
127
 *
128
 * Handle an out-of-memory error.
129
 *
130
 * Available since 2.13.0.
131
 */
132
void
133
xmlCtxtErrMemory(xmlParserCtxtPtr ctxt)
134
0
{
135
0
    xmlStructuredErrorFunc schannel = NULL;
136
0
    xmlGenericErrorFunc channel = NULL;
137
0
    void *data;
138
139
0
    if (ctxt == NULL)
140
0
        return;
141
142
0
    ctxt->errNo = XML_ERR_NO_MEMORY;
143
0
    ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
144
0
    ctxt->wellFormed = 0;
145
0
    ctxt->disableSAX = 2;
146
147
0
    if (ctxt->errorHandler) {
148
0
        schannel = ctxt->errorHandler;
149
0
        data = ctxt->errorCtxt;
150
0
    } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
151
0
        (ctxt->sax->serror != NULL)) {
152
0
        schannel = ctxt->sax->serror;
153
0
        data = ctxt->userData;
154
0
    } else {
155
0
        channel = ctxt->sax->error;
156
0
        data = ctxt->userData;
157
0
    }
158
159
0
    xmlRaiseMemoryError(schannel, channel, data, XML_FROM_PARSER,
160
0
                        &ctxt->lastError);
161
0
}
162
163
/**
164
 * xmlCtxtErrIO:
165
 * @ctxt:  parser context
166
 * @code:  xmlParserErrors code
167
 * @uri:  filename or URI (optional)
168
 *
169
 * If filename is empty, use the one from context input if available.
170
 *
171
 * Report an IO error to the parser context.
172
 */
173
void
174
xmlCtxtErrIO(xmlParserCtxtPtr ctxt, int code, const char *uri)
175
0
{
176
0
    const char *errstr, *msg, *str1, *str2;
177
0
    xmlErrorLevel level;
178
179
0
    if (ctxt == NULL)
180
0
        return;
181
182
    /*
183
     * Only report a warning if a file could not be found. This should
184
     * only be done for external entities, but the external entity loader
185
     * of xsltproc can try multiple paths and assumes that ENOENT doesn't
186
     * raise an error and aborts parsing.
187
     */
188
0
    if (((code == XML_IO_ENOENT) ||
189
0
         (code == XML_IO_NETWORK_ATTEMPT) ||
190
0
         (code == XML_IO_UNKNOWN))) {
191
0
        if (ctxt->validate == 0)
192
0
            level = XML_ERR_WARNING;
193
0
        else
194
0
            level = XML_ERR_ERROR;
195
0
    } else {
196
0
        level = XML_ERR_FATAL;
197
0
    }
198
199
0
    errstr = xmlErrString(code);
200
201
0
    if (uri == NULL) {
202
0
        msg = "%s\n";
203
0
        str1 = errstr;
204
0
        str2 = NULL;
205
0
    } else {
206
0
        msg = "failed to load \"%s\": %s\n";
207
0
        str1 = uri;
208
0
        str2 = errstr;
209
0
    }
210
211
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_IO, code, level,
212
0
               (const xmlChar *) uri, NULL, NULL, 0,
213
0
               msg, str1, str2);
214
0
}
215
216
/**
217
 * xmlCtxtVErr:
218
 * @ctxt:  a parser context
219
 * @node: the current node or NULL
220
 * @domain: the domain for the error
221
 * @code: the code for the error
222
 * @level: the xmlErrorLevel for the error
223
 * @str1: extra string info
224
 * @str2: extra string info
225
 * @str3: extra string info
226
 * @int1: extra int info
227
 * @msg:  the message to display/transmit
228
 * @ap:  extra parameters for the message display
229
 *
230
 * Raise a parser error.
231
 */
232
void
233
xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
234
            xmlParserErrors code, xmlErrorLevel level,
235
            const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
236
            int int1, const char *msg, va_list ap)
237
0
{
238
0
    xmlStructuredErrorFunc schannel = NULL;
239
0
    xmlGenericErrorFunc channel = NULL;
240
0
    void *data = NULL;
241
0
    const char *file = NULL;
242
0
    int line = 0;
243
0
    int col = 0;
244
0
    int res;
245
246
0
    if (code == XML_ERR_NO_MEMORY) {
247
0
        xmlCtxtErrMemory(ctxt);
248
0
        return;
249
0
    }
250
251
0
    if (PARSER_STOPPED(ctxt))
252
0
  return;
253
254
0
    if (level == XML_ERR_WARNING) {
255
0
        if (ctxt->nbWarnings >= XML_MAX_ERRORS)
256
0
            return;
257
0
        ctxt->nbWarnings += 1;
258
0
    } else {
259
0
        if (ctxt->nbErrors >= XML_MAX_ERRORS)
260
0
            return;
261
0
        ctxt->nbErrors += 1;
262
0
    }
263
264
0
    if (((ctxt->options & XML_PARSE_NOERROR) == 0) &&
265
0
        ((level != XML_ERR_WARNING) ||
266
0
         ((ctxt->options & XML_PARSE_NOWARNING) == 0))) {
267
0
        if (ctxt->errorHandler) {
268
0
            schannel = ctxt->errorHandler;
269
0
            data = ctxt->errorCtxt;
270
0
        } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
271
0
            (ctxt->sax->serror != NULL)) {
272
0
            schannel = ctxt->sax->serror;
273
0
            data = ctxt->userData;
274
0
        } else if ((domain == XML_FROM_VALID) || (domain == XML_FROM_DTD)) {
275
0
            if (level == XML_ERR_WARNING)
276
0
                channel = ctxt->vctxt.warning;
277
0
            else
278
0
                channel = ctxt->vctxt.error;
279
0
            data = ctxt->vctxt.userData;
280
0
        } else {
281
0
            if (level == XML_ERR_WARNING)
282
0
                channel = ctxt->sax->warning;
283
0
            else
284
0
                channel = ctxt->sax->error;
285
0
            data = ctxt->userData;
286
0
        }
287
0
    }
288
289
0
    if (ctxt->input != NULL) {
290
0
        xmlParserInputPtr input = ctxt->input;
291
292
0
        if ((input->filename == NULL) &&
293
0
            (ctxt->inputNr > 1)) {
294
0
            input = ctxt->inputTab[ctxt->inputNr - 2];
295
0
        }
296
0
        file = input->filename;
297
0
        line = input->line;
298
0
        col = input->col;
299
0
    }
300
301
0
    res = xmlVRaiseError(schannel, channel, data, ctxt, node, domain, code,
302
0
                         level, file, line, (const char *) str1,
303
0
                         (const char *) str2, (const char *) str3, int1, col,
304
0
                         msg, ap);
305
306
0
    if (res < 0) {
307
0
        xmlCtxtErrMemory(ctxt);
308
0
        return;
309
0
    }
310
311
0
    if (level >= XML_ERR_ERROR)
312
0
        ctxt->errNo = code;
313
0
    if (level == XML_ERR_FATAL) {
314
0
        ctxt->wellFormed = 0;
315
0
        if (ctxt->recovery == 0)
316
0
            ctxt->disableSAX = 1;
317
0
    }
318
319
0
    return;
320
0
}
321
322
/**
323
 * xmlCtxtErr:
324
 * @ctxt:  a parser context
325
 * @node: the current node or NULL
326
 * @domain: the domain for the error
327
 * @code: the code for the error
328
 * @level: the xmlErrorLevel for the error
329
 * @str1: extra string info
330
 * @str2: extra string info
331
 * @str3: extra string info
332
 * @int1: extra int info
333
 * @msg:  the message to display/transmit
334
 * @...:  extra parameters for the message display
335
 *
336
 * Raise a parser error.
337
 */
338
void
339
xmlCtxtErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
340
           xmlParserErrors code, xmlErrorLevel level,
341
           const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
342
           int int1, const char *msg, ...)
343
0
{
344
0
    va_list ap;
345
346
0
    va_start(ap, msg);
347
0
    xmlCtxtVErr(ctxt, node, domain, code, level,
348
0
                str1, str2, str3, int1, msg, ap);
349
0
    va_end(ap);
350
0
}
351
352
/**
353
 * xmlFatalErr:
354
 * @ctxt:  an XML parser context
355
 * @code:  the error number
356
 * @info:  extra information string
357
 *
358
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
359
 */
360
void
361
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors code, const char *info)
362
0
{
363
0
    const char *errmsg;
364
0
    xmlErrorLevel level;
365
366
0
    if (code == XML_ERR_UNSUPPORTED_ENCODING)
367
0
        level = XML_ERR_WARNING;
368
0
    else
369
0
        level = XML_ERR_FATAL;
370
371
0
    errmsg = xmlErrString(code);
372
373
0
    if (info == NULL) {
374
0
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, level,
375
0
                   NULL, NULL, NULL, 0, "%s\n", errmsg);
376
0
    } else {
377
0
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, level,
378
0
                   (const xmlChar *) info, NULL, NULL, 0,
379
0
                   "%s: %s\n", errmsg, info);
380
0
    }
381
0
}
382
383
/**
384
 * xmlIsLetter:
385
 * @c:  an unicode character (int)
386
 *
387
 * Check whether the character is allowed by the production
388
 * [84] Letter ::= BaseChar | Ideographic
389
 *
390
 * Returns 0 if not, non-zero otherwise
391
 */
392
int
393
0
xmlIsLetter(int c) {
394
0
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
395
0
}
396
397
/************************************************************************
398
 *                  *
399
 *    Input handling functions for progressive parsing  *
400
 *                  *
401
 ************************************************************************/
402
403
/* we need to keep enough input to show errors in context */
404
0
#define LINE_LEN        80
405
406
/**
407
 * xmlHaltParser:
408
 * @ctxt:  an XML parser context
409
 *
410
 * Blocks further parser processing don't override error
411
 * for internal use
412
 */
413
void
414
0
xmlHaltParser(xmlParserCtxtPtr ctxt) {
415
0
    if (ctxt == NULL)
416
0
        return;
417
0
    ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
418
0
    ctxt->disableSAX = 2;
419
0
}
420
421
/**
422
 * xmlParserInputRead:
423
 * @in:  an XML parser input
424
 * @len:  an indicative size for the lookahead
425
 *
426
 * DEPRECATED: This function was internal and is deprecated.
427
 *
428
 * Returns -1 as this is an error to use it.
429
 */
430
int
431
0
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
432
0
    return(-1);
433
0
}
434
435
/**
436
 * xmlParserGrow:
437
 * @ctxt:  an XML parser context
438
 *
439
 * Grow the input buffer.
440
 *
441
 * Returns the number of bytes read or -1 in case of error.
442
 */
443
int
444
0
xmlParserGrow(xmlParserCtxtPtr ctxt) {
445
0
    xmlParserInputPtr in = ctxt->input;
446
0
    xmlParserInputBufferPtr buf = in->buf;
447
0
    size_t curEnd = in->end - in->cur;
448
0
    size_t curBase = in->cur - in->base;
449
0
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
450
0
                       XML_MAX_HUGE_LENGTH :
451
0
                       XML_MAX_LOOKUP_LIMIT;
452
0
    int ret;
453
454
0
    if (buf == NULL)
455
0
        return(0);
456
    /* Don't grow push parser buffer. */
457
0
    if (PARSER_PROGRESSIVE(ctxt))
458
0
        return(0);
459
    /* Don't grow memory buffers. */
460
0
    if ((buf->encoder == NULL) && (buf->readcallback == NULL))
461
0
        return(0);
462
0
    if (buf->error != 0)
463
0
        return(-1);
464
465
0
    if (curBase > maxLength) {
466
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
467
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
468
0
        xmlHaltParser(ctxt);
469
0
  return(-1);
470
0
    }
471
472
0
    if (curEnd >= INPUT_CHUNK)
473
0
        return(0);
474
475
0
    ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
476
0
    xmlBufUpdateInput(buf->buffer, in, curBase);
477
478
0
    if (ret < 0) {
479
0
        xmlCtxtErrIO(ctxt, buf->error, NULL);
480
0
    }
481
482
0
    return(ret);
483
0
}
484
485
/**
486
 * xmlParserInputGrow:
487
 * @in:  an XML parser input
488
 * @len:  an indicative size for the lookahead
489
 *
490
 * DEPRECATED: Don't use.
491
 *
492
 * This function increase the input for the parser. It tries to
493
 * preserve pointers to the input buffer, and keep already read data
494
 *
495
 * Returns the amount of char read, or -1 in case of error, 0 indicate the
496
 * end of this entity
497
 */
498
int
499
0
xmlParserInputGrow(xmlParserInputPtr in, int len) {
500
0
    int ret;
501
0
    size_t indx;
502
503
0
    if ((in == NULL) || (len < 0)) return(-1);
504
0
    if (in->buf == NULL) return(-1);
505
0
    if (in->base == NULL) return(-1);
506
0
    if (in->cur == NULL) return(-1);
507
0
    if (in->buf->buffer == NULL) return(-1);
508
509
    /* Don't grow memory buffers. */
510
0
    if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
511
0
        return(0);
512
513
0
    indx = in->cur - in->base;
514
0
    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
515
0
        return(0);
516
0
    }
517
0
    ret = xmlParserInputBufferGrow(in->buf, len);
518
519
0
    in->base = xmlBufContent(in->buf->buffer);
520
0
    if (in->base == NULL) {
521
0
        in->base = BAD_CAST "";
522
0
        in->cur = in->base;
523
0
        in->end = in->base;
524
0
        return(-1);
525
0
    }
526
0
    in->cur = in->base + indx;
527
0
    in->end = xmlBufEnd(in->buf->buffer);
528
529
0
    return(ret);
530
0
}
531
532
/**
533
 * xmlParserShrink:
534
 * @ctxt:  an XML parser context
535
 *
536
 * Shrink the input buffer.
537
 */
538
void
539
0
xmlParserShrink(xmlParserCtxtPtr ctxt) {
540
0
    xmlParserInputPtr in = ctxt->input;
541
0
    xmlParserInputBufferPtr buf = in->buf;
542
0
    size_t used;
543
544
0
    if (buf == NULL)
545
0
        return;
546
    /* Don't shrink pull parser memory buffers. */
547
0
    if ((!PARSER_PROGRESSIVE(ctxt)) &&
548
0
        (buf->encoder == NULL) &&
549
0
        (buf->readcallback == NULL))
550
0
        return;
551
552
0
    used = in->cur - in->base;
553
    /*
554
     * Do not shrink on large buffers whose only a tiny fraction
555
     * was consumed
556
     */
557
0
    if (used > INPUT_CHUNK) {
558
0
  size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
559
560
0
  if (res > 0) {
561
0
            used -= res;
562
0
            if ((res > ULONG_MAX) ||
563
0
                (in->consumed > ULONG_MAX - (unsigned long)res))
564
0
                in->consumed = ULONG_MAX;
565
0
            else
566
0
                in->consumed += res;
567
0
  }
568
0
    }
569
570
0
    xmlBufUpdateInput(buf->buffer, in, used);
571
0
}
572
573
/**
574
 * xmlParserInputShrink:
575
 * @in:  an XML parser input
576
 *
577
 * DEPRECATED: Don't use.
578
 *
579
 * This function removes used input for the parser.
580
 */
581
void
582
0
xmlParserInputShrink(xmlParserInputPtr in) {
583
0
    size_t used;
584
0
    size_t ret;
585
586
0
    if (in == NULL) return;
587
0
    if (in->buf == NULL) return;
588
0
    if (in->base == NULL) return;
589
0
    if (in->cur == NULL) return;
590
0
    if (in->buf->buffer == NULL) return;
591
592
0
    used = in->cur - in->base;
593
    /*
594
     * Do not shrink on large buffers whose only a tiny fraction
595
     * was consumed
596
     */
597
0
    if (used > INPUT_CHUNK) {
598
0
  ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
599
0
  if (ret > 0) {
600
0
            used -= ret;
601
0
            if ((ret > ULONG_MAX) ||
602
0
                (in->consumed > ULONG_MAX - (unsigned long)ret))
603
0
                in->consumed = ULONG_MAX;
604
0
            else
605
0
                in->consumed += ret;
606
0
  }
607
0
    }
608
609
0
    if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
610
0
        xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
611
0
    }
612
613
0
    in->base = xmlBufContent(in->buf->buffer);
614
0
    if (in->base == NULL) {
615
        /* TODO: raise error */
616
0
        in->base = BAD_CAST "";
617
0
        in->cur = in->base;
618
0
        in->end = in->base;
619
0
        return;
620
0
    }
621
0
    in->cur = in->base + used;
622
0
    in->end = xmlBufEnd(in->buf->buffer);
623
0
}
624
625
/************************************************************************
626
 *                  *
627
 *    UTF8 character input and related functions    *
628
 *                  *
629
 ************************************************************************/
630
631
/**
632
 * xmlNextChar:
633
 * @ctxt:  the XML parser context
634
 *
635
 * DEPRECATED: Internal function, do not use.
636
 *
637
 * Skip to the next char input char.
638
 */
639
640
void
641
xmlNextChar(xmlParserCtxtPtr ctxt)
642
0
{
643
0
    const unsigned char *cur;
644
0
    size_t avail;
645
0
    int c;
646
647
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
648
0
        return;
649
650
0
    avail = ctxt->input->end - ctxt->input->cur;
651
652
0
    if (avail < INPUT_CHUNK) {
653
0
        xmlParserGrow(ctxt);
654
0
        if (ctxt->input->cur >= ctxt->input->end)
655
0
            return;
656
0
        avail = ctxt->input->end - ctxt->input->cur;
657
0
    }
658
659
0
    cur = ctxt->input->cur;
660
0
    c = *cur;
661
662
0
    if (c < 0x80) {
663
0
        if (c == '\n') {
664
0
            ctxt->input->cur++;
665
0
            ctxt->input->line++;
666
0
            ctxt->input->col = 1;
667
0
        } else if (c == '\r') {
668
            /*
669
             *   2.11 End-of-Line Handling
670
             *   the literal two-character sequence "#xD#xA" or a standalone
671
             *   literal #xD, an XML processor must pass to the application
672
             *   the single character #xA.
673
             */
674
0
            ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
675
0
            ctxt->input->line++;
676
0
            ctxt->input->col = 1;
677
0
            return;
678
0
        } else {
679
0
            ctxt->input->cur++;
680
0
            ctxt->input->col++;
681
0
        }
682
0
    } else {
683
0
        ctxt->input->col++;
684
685
0
        if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
686
0
            goto encoding_error;
687
688
0
        if (c < 0xe0) {
689
            /* 2-byte code */
690
0
            if (c < 0xc2)
691
0
                goto encoding_error;
692
0
            ctxt->input->cur += 2;
693
0
        } else {
694
0
            unsigned int val = (c << 8) | cur[1];
695
696
0
            if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
697
0
                goto encoding_error;
698
699
0
            if (c < 0xf0) {
700
                /* 3-byte code */
701
0
                if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
702
0
                    goto encoding_error;
703
0
                ctxt->input->cur += 3;
704
0
            } else {
705
0
                if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
706
0
                    goto encoding_error;
707
708
                /* 4-byte code */
709
0
                if ((val < 0xf090) || (val >= 0xf490))
710
0
                    goto encoding_error;
711
0
                ctxt->input->cur += 4;
712
0
            }
713
0
        }
714
0
    }
715
716
0
    return;
717
718
0
encoding_error:
719
    /* Only report the first error */
720
0
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
721
0
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
722
0
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
723
0
    }
724
0
    ctxt->input->cur++;
725
0
    return;
726
0
}
727
728
/**
729
 * xmlCurrentChar:
730
 * @ctxt:  the XML parser context
731
 * @len:  pointer to the length of the char read
732
 *
733
 * DEPRECATED: Internal function, do not use.
734
 *
735
 * The current char value, if using UTF-8 this may actually span multiple
736
 * bytes in the input buffer. Implement the end of line normalization:
737
 * 2.11 End-of-Line Handling
738
 * Wherever an external parsed entity or the literal entity value
739
 * of an internal parsed entity contains either the literal two-character
740
 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
741
 * must pass to the application the single character #xA.
742
 * This behavior can conveniently be produced by normalizing all
743
 * line breaks to #xA on input, before parsing.)
744
 *
745
 * Returns the current char value and its length
746
 */
747
748
int
749
0
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
750
0
    const unsigned char *cur;
751
0
    size_t avail;
752
0
    int c;
753
754
0
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
755
756
0
    avail = ctxt->input->end - ctxt->input->cur;
757
758
0
    if (avail < INPUT_CHUNK) {
759
0
        xmlParserGrow(ctxt);
760
0
        avail = ctxt->input->end - ctxt->input->cur;
761
0
    }
762
763
0
    cur = ctxt->input->cur;
764
0
    c = *cur;
765
766
0
    if (c < 0x80) {
767
  /* 1-byte code */
768
0
        if (c < 0x20) {
769
            /*
770
             *   2.11 End-of-Line Handling
771
             *   the literal two-character sequence "#xD#xA" or a standalone
772
             *   literal #xD, an XML processor must pass to the application
773
             *   the single character #xA.
774
             */
775
0
            if (c == '\r') {
776
                /*
777
                 * TODO: This function shouldn't change the 'cur' pointer
778
                 * as side effect, but the NEXTL macro in parser.c relies
779
                 * on this behavior when incrementing line numbers.
780
                 */
781
0
                if (cur[1] == '\n')
782
0
                    ctxt->input->cur++;
783
0
                *len = 1;
784
0
                c = '\n';
785
0
            } else if (c == 0) {
786
0
                if (ctxt->input->cur >= ctxt->input->end) {
787
0
                    *len = 0;
788
0
                } else {
789
0
                    *len = 1;
790
                    /*
791
                     * TODO: Null bytes should be handled by callers,
792
                     * but this can be tricky.
793
                     */
794
0
                    xmlFatalErr(ctxt, XML_ERR_INVALID_CHAR,
795
0
                            "Char 0x0 out of allowed range\n");
796
0
                }
797
0
            } else {
798
0
                *len = 1;
799
0
            }
800
0
        } else {
801
0
            *len = 1;
802
0
        }
803
804
0
        return(c);
805
0
    } else {
806
0
        int val;
807
808
0
        if (avail < 2)
809
0
            goto incomplete_sequence;
810
0
        if ((cur[1] & 0xc0) != 0x80)
811
0
            goto encoding_error;
812
813
0
        if (c < 0xe0) {
814
            /* 2-byte code */
815
0
            if (c < 0xc2)
816
0
                goto encoding_error;
817
0
            val = (c & 0x1f) << 6;
818
0
            val |= cur[1] & 0x3f;
819
0
            *len = 2;
820
0
        } else {
821
0
            if (avail < 3)
822
0
                goto incomplete_sequence;
823
0
            if ((cur[2] & 0xc0) != 0x80)
824
0
                goto encoding_error;
825
826
0
            if (c < 0xf0) {
827
                /* 3-byte code */
828
0
                val = (c & 0xf) << 12;
829
0
                val |= (cur[1] & 0x3f) << 6;
830
0
                val |= cur[2] & 0x3f;
831
0
                if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
832
0
                    goto encoding_error;
833
0
                *len = 3;
834
0
            } else {
835
0
                if (avail < 4)
836
0
                    goto incomplete_sequence;
837
0
                if ((cur[3] & 0xc0) != 0x80)
838
0
                    goto encoding_error;
839
840
                /* 4-byte code */
841
0
                val = (c & 0x0f) << 18;
842
0
                val |= (cur[1] & 0x3f) << 12;
843
0
                val |= (cur[2] & 0x3f) << 6;
844
0
                val |= cur[3] & 0x3f;
845
0
                if ((val < 0x10000) || (val >= 0x110000))
846
0
                    goto encoding_error;
847
0
                *len = 4;
848
0
            }
849
0
        }
850
851
0
        return(val);
852
0
    }
853
854
0
encoding_error:
855
    /* Only report the first error */
856
0
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
857
0
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
858
0
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
859
0
    }
860
0
    *len = 1;
861
0
    return(0xFFFD); /* U+FFFD Replacement Character */
862
863
0
incomplete_sequence:
864
    /*
865
     * An encoding problem may arise from a truncated input buffer
866
     * splitting a character in the middle. In that case do not raise
867
     * an error but return 0. This should only happen when push parsing
868
     * char data.
869
     */
870
0
    *len = 0;
871
0
    return(0);
872
0
}
873
874
/**
875
 * xmlStringCurrentChar:
876
 * @ctxt:  the XML parser context
877
 * @cur:  pointer to the beginning of the char
878
 * @len:  pointer to the length of the char read
879
 *
880
 * DEPRECATED: Internal function, do not use.
881
 *
882
 * The current char value, if using UTF-8 this may actually span multiple
883
 * bytes in the input buffer.
884
 *
885
 * Returns the current char value and its length
886
 */
887
888
int
889
xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
890
0
                     const xmlChar *cur, int *len) {
891
0
    int c;
892
893
0
    if ((cur == NULL) || (len == NULL))
894
0
        return(0);
895
896
    /* cur is zero-terminated, so we can lie about its length. */
897
0
    *len = 4;
898
0
    c = xmlGetUTF8Char(cur, len);
899
900
0
    return((c < 0) ? 0 : c);
901
0
}
902
903
/**
904
 * xmlCopyCharMultiByte:
905
 * @out:  pointer to an array of xmlChar
906
 * @val:  the char value
907
 *
908
 * append the char value in the array
909
 *
910
 * Returns the number of xmlChar written
911
 */
912
int
913
0
xmlCopyCharMultiByte(xmlChar *out, int val) {
914
0
    if ((out == NULL) || (val < 0)) return(0);
915
    /*
916
     * We are supposed to handle UTF8, check it's valid
917
     * From rfc2044: encoding of the Unicode values on UTF-8:
918
     *
919
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
920
     * 0000 0000-0000 007F   0xxxxxxx
921
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
922
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
923
     */
924
0
    if  (val >= 0x80) {
925
0
  xmlChar *savedout = out;
926
0
  int bits;
927
0
  if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
928
0
  else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
929
0
  else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
930
0
  else {
931
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
932
0
            fprintf(stderr, "xmlCopyCharMultiByte: codepoint out of range\n");
933
0
            abort();
934
0
#endif
935
0
      return(0);
936
0
  }
937
0
  for ( ; bits >= 0; bits-= 6)
938
0
      *out++= ((val >> bits) & 0x3F) | 0x80 ;
939
0
  return (out - savedout);
940
0
    }
941
0
    *out = val;
942
0
    return 1;
943
0
}
944
945
/**
946
 * xmlCopyChar:
947
 * @len:  Ignored, compatibility
948
 * @out:  pointer to an array of xmlChar
949
 * @val:  the char value
950
 *
951
 * append the char value in the array
952
 *
953
 * Returns the number of xmlChar written
954
 */
955
956
int
957
0
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
958
0
    if ((out == NULL) || (val < 0)) return(0);
959
    /* the len parameter is ignored */
960
0
    if  (val >= 0x80) {
961
0
  return(xmlCopyCharMultiByte (out, val));
962
0
    }
963
0
    *out = val;
964
0
    return 1;
965
0
}
966
967
/************************************************************************
968
 *                  *
969
 *    Commodity functions to switch encodings     *
970
 *                  *
971
 ************************************************************************/
972
973
static int
974
0
xmlDetectEBCDIC(xmlParserInputPtr input, xmlCharEncodingHandlerPtr *hout) {
975
0
    xmlChar out[200];
976
0
    xmlCharEncodingHandlerPtr handler;
977
0
    int inlen, outlen, res, i;
978
979
0
    *hout = NULL;
980
981
    /*
982
     * To detect the EBCDIC code page, we convert the first 200 bytes
983
     * to EBCDIC-US and try to find the encoding declaration.
984
     */
985
0
    res = xmlLookupCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC, &handler);
986
0
    if (res != 0)
987
0
        return(res);
988
0
    outlen = sizeof(out) - 1;
989
0
    inlen = input->end - input->cur;
990
0
    res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen);
991
    /*
992
     * Return the EBCDIC handler if decoding failed. The error will
993
     * be reported later.
994
     */
995
0
    if (res < 0)
996
0
        goto done;
997
0
    out[outlen] = 0;
998
999
0
    for (i = 0; i < outlen; i++) {
1000
0
        if (out[i] == '>')
1001
0
            break;
1002
0
        if ((out[i] == 'e') &&
1003
0
            (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1004
0
            int start, cur, quote;
1005
1006
0
            i += 8;
1007
0
            while (IS_BLANK_CH(out[i]))
1008
0
                i += 1;
1009
0
            if (out[i++] != '=')
1010
0
                break;
1011
0
            while (IS_BLANK_CH(out[i]))
1012
0
                i += 1;
1013
0
            quote = out[i++];
1014
0
            if ((quote != '\'') && (quote != '"'))
1015
0
                break;
1016
0
            start = i;
1017
0
            cur = out[i];
1018
0
            while (((cur >= 'a') && (cur <= 'z')) ||
1019
0
                   ((cur >= 'A') && (cur <= 'Z')) ||
1020
0
                   ((cur >= '0') && (cur <= '9')) ||
1021
0
                   (cur == '.') || (cur == '_') ||
1022
0
                   (cur == '-'))
1023
0
                cur = out[++i];
1024
0
            if (cur != quote)
1025
0
                break;
1026
0
            out[i] = 0;
1027
0
            xmlCharEncCloseFunc(handler);
1028
0
            res = xmlOpenCharEncodingHandler((char *) out + start,
1029
0
                                             /* output */ 0, &handler);
1030
0
            if (res != 0)
1031
0
                return(res);
1032
0
            *hout = handler;
1033
0
            return(0);
1034
0
        }
1035
0
    }
1036
1037
0
done:
1038
    /*
1039
     * Encoding handlers are stateful, so we have to recreate them.
1040
     */
1041
0
    xmlCharEncCloseFunc(handler);
1042
0
    res = xmlLookupCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC, &handler);
1043
0
    if (res != 0)
1044
0
        return(res);
1045
0
    *hout = handler;
1046
0
    return(0);
1047
0
}
1048
1049
/**
1050
 * xmlSwitchEncoding:
1051
 * @ctxt:  the parser context
1052
 * @enc:  the encoding value (number)
1053
 *
1054
 * Use encoding specified by enum to decode input data. This overrides
1055
 * the encoding found in the XML declaration.
1056
 *
1057
 * This function can also be used to override the encoding of chunks
1058
 * passed to xmlParseChunk.
1059
 *
1060
 * Returns 0 in case of success, -1 otherwise
1061
 */
1062
int
1063
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1064
0
{
1065
0
    xmlCharEncodingHandlerPtr handler = NULL;
1066
0
    int ret;
1067
0
    int res;
1068
1069
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
1070
0
        return(-1);
1071
1072
0
    switch (enc) {
1073
0
  case XML_CHAR_ENCODING_NONE:
1074
0
  case XML_CHAR_ENCODING_UTF8:
1075
0
        case XML_CHAR_ENCODING_ASCII:
1076
0
            res = 0;
1077
0
            break;
1078
0
        case XML_CHAR_ENCODING_EBCDIC:
1079
0
            res = xmlDetectEBCDIC(ctxt->input, &handler);
1080
0
            break;
1081
0
        default:
1082
0
            res = xmlLookupCharEncodingHandler(enc, &handler);
1083
0
            break;
1084
0
    }
1085
1086
0
    if (res != 0) {
1087
0
        const char *name = xmlGetCharEncodingName(enc);
1088
1089
0
        xmlFatalErr(ctxt, res, (name ? name : "<null>"));
1090
0
        return(-1);
1091
0
    }
1092
1093
0
    ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1094
1095
0
    if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1096
0
        ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1097
0
    }
1098
1099
0
    return(ret);
1100
0
}
1101
1102
/**
1103
 * xmlSwitchEncodingName:
1104
 * @ctxt:  the parser context, only for error reporting
1105
 * @input:  the input strea,
1106
 * @encoding:  the encoding name
1107
 *
1108
 * Available since 2.13.0.
1109
 *
1110
 * Returns 0 in case of success, -1 otherwise
1111
 */
1112
static int
1113
xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1114
0
                           const char *encoding) {
1115
0
    xmlCharEncodingHandlerPtr handler;
1116
0
    int res;
1117
1118
0
    if (encoding == NULL)
1119
0
        return(-1);
1120
1121
0
    res = xmlOpenCharEncodingHandler(encoding, /* output */ 0, &handler);
1122
0
    if (res != 0) {
1123
0
        xmlFatalErr(ctxt, res, encoding);
1124
0
        return(-1);
1125
0
    }
1126
1127
0
    return(xmlSwitchInputEncoding(ctxt, input, handler));
1128
0
}
1129
1130
/**
1131
 * xmlSwitchEncodingName:
1132
 * @ctxt:  the parser context
1133
 * @encoding:  the encoding name
1134
 *
1135
 * Use specified encoding to decode input data. This overrides the
1136
 * encoding found in the XML declaration.
1137
 *
1138
 * This function can also be used to override the encoding of chunks
1139
 * passed to xmlParseChunk.
1140
 *
1141
 * Available since 2.13.0.
1142
 *
1143
 * Returns 0 in case of success, -1 otherwise
1144
 */
1145
int
1146
0
xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding) {
1147
0
    if (ctxt == NULL)
1148
0
        return(-1);
1149
1150
0
    return(xmlSwitchInputEncodingName(ctxt, ctxt->input, encoding));
1151
0
}
1152
1153
/**
1154
 * xmlSwitchInputEncoding:
1155
 * @ctxt:  the parser context, only for error reporting
1156
 * @input:  the input stream
1157
 * @handler:  the encoding handler
1158
 *
1159
 * DEPRECATED: Internal function, don't use.
1160
 *
1161
 * Use encoding handler to decode input data.
1162
 *
1163
 * Returns 0 in case of success, -1 otherwise
1164
 */
1165
int
1166
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1167
                       xmlCharEncodingHandlerPtr handler)
1168
0
{
1169
0
    int nbchars;
1170
0
    xmlParserInputBufferPtr in;
1171
1172
0
    if ((input == NULL) || (input->buf == NULL)) {
1173
0
        xmlCharEncCloseFunc(handler);
1174
0
  return (-1);
1175
0
    }
1176
0
    in = input->buf;
1177
1178
0
    input->flags |= XML_INPUT_HAS_ENCODING;
1179
1180
    /*
1181
     * UTF-8 requires no encoding handler.
1182
     */
1183
0
    if ((handler != NULL) &&
1184
0
        (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1185
0
        xmlCharEncCloseFunc(handler);
1186
0
        handler = NULL;
1187
0
    }
1188
1189
0
    if (in->encoder == handler)
1190
0
        return (0);
1191
1192
0
    if (in->encoder != NULL) {
1193
        /*
1194
         * Switching encodings during parsing is a really bad idea,
1195
         * but Chromium can switch between ISO-8859-1 and UTF-16 before
1196
         * separate calls to xmlParseChunk.
1197
         *
1198
         * TODO: We should check whether the "raw" input buffer is empty and
1199
         * convert the old content using the old encoder.
1200
         */
1201
1202
0
        xmlCharEncCloseFunc(in->encoder);
1203
0
        in->encoder = handler;
1204
0
        return (0);
1205
0
    }
1206
1207
0
    in->encoder = handler;
1208
1209
    /*
1210
     * Is there already some content down the pipe to convert ?
1211
     */
1212
0
    if (xmlBufIsEmpty(in->buffer) == 0) {
1213
0
        xmlBufPtr buf;
1214
0
        size_t processed;
1215
1216
0
        buf = xmlBufCreate();
1217
0
        if (buf == NULL) {
1218
0
            xmlCtxtErrMemory(ctxt);
1219
0
            return(-1);
1220
0
        }
1221
1222
        /*
1223
         * Shrink the current input buffer.
1224
         * Move it as the raw buffer and create a new input buffer
1225
         */
1226
0
        processed = input->cur - input->base;
1227
0
        xmlBufShrink(in->buffer, processed);
1228
0
        input->consumed += processed;
1229
0
        in->raw = in->buffer;
1230
0
        in->buffer = buf;
1231
0
        in->rawconsumed = processed;
1232
1233
0
        nbchars = xmlCharEncInput(in);
1234
0
        xmlBufResetInput(in->buffer, input);
1235
0
        if (nbchars == XML_ENC_ERR_MEMORY) {
1236
0
            xmlCtxtErrMemory(ctxt);
1237
0
        } else if (nbchars < 0) {
1238
0
            xmlCtxtErrIO(ctxt, in->error, NULL);
1239
0
            xmlHaltParser(ctxt);
1240
0
            return (-1);
1241
0
        }
1242
0
    }
1243
0
    return (0);
1244
0
}
1245
1246
/**
1247
 * xmlSwitchToEncoding:
1248
 * @ctxt:  the parser context
1249
 * @handler:  the encoding handler
1250
 *
1251
 * Use encoding handler to decode input data.
1252
 *
1253
 * This function can be used to enforce the encoding of chunks passed
1254
 * to xmlParseChunk.
1255
 *
1256
 * Returns 0 in case of success, -1 otherwise
1257
 */
1258
int
1259
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1260
0
{
1261
0
    if (ctxt == NULL)
1262
0
        return(-1);
1263
0
    return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
1264
0
}
1265
1266
/**
1267
 * xmlDetectEncoding:
1268
 * @ctxt:  the parser context
1269
 *
1270
 * Handle optional BOM, detect and switch to encoding.
1271
 *
1272
 * Assumes that there are at least four bytes in the input buffer.
1273
 */
1274
void
1275
0
xmlDetectEncoding(xmlParserCtxtPtr ctxt) {
1276
0
    const xmlChar *in;
1277
0
    xmlCharEncoding enc;
1278
0
    int bomSize;
1279
0
    int autoFlag = 0;
1280
1281
0
    if (xmlParserGrow(ctxt) < 0)
1282
0
        return;
1283
0
    in = ctxt->input->cur;
1284
0
    if (ctxt->input->end - in < 4)
1285
0
        return;
1286
1287
0
    if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1288
        /*
1289
         * If the encoding was already set, only skip the BOM which was
1290
         * possibly decoded to UTF-8.
1291
         */
1292
0
        if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1293
0
            ctxt->input->cur += 3;
1294
0
        }
1295
1296
0
        return;
1297
0
    }
1298
1299
0
    enc = XML_CHAR_ENCODING_NONE;
1300
0
    bomSize = 0;
1301
1302
0
    switch (in[0]) {
1303
0
        case 0x00:
1304
0
            if ((in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1305
0
                enc = XML_CHAR_ENCODING_UCS4BE;
1306
0
                autoFlag = XML_INPUT_AUTO_OTHER;
1307
0
            } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1308
0
                enc = XML_CHAR_ENCODING_UTF16BE;
1309
0
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1310
0
            }
1311
0
            break;
1312
1313
0
        case 0x3C:
1314
0
            if (in[1] == 0x00) {
1315
0
                if ((in[2] == 0x00) && (in[3] == 0x00)) {
1316
0
                    enc = XML_CHAR_ENCODING_UCS4LE;
1317
0
                    autoFlag = XML_INPUT_AUTO_OTHER;
1318
0
                } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1319
0
                    enc = XML_CHAR_ENCODING_UTF16LE;
1320
0
                    autoFlag = XML_INPUT_AUTO_UTF16LE;
1321
0
                }
1322
0
            }
1323
0
            break;
1324
1325
0
        case 0x4C:
1326
0
      if ((in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1327
0
          enc = XML_CHAR_ENCODING_EBCDIC;
1328
0
                autoFlag = XML_INPUT_AUTO_OTHER;
1329
0
            }
1330
0
            break;
1331
1332
0
        case 0xEF:
1333
0
            if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1334
0
                enc = XML_CHAR_ENCODING_UTF8;
1335
0
                autoFlag = XML_INPUT_AUTO_UTF8;
1336
0
                bomSize = 3;
1337
0
            }
1338
0
            break;
1339
1340
0
        case 0xFE:
1341
0
            if (in[1] == 0xFF) {
1342
0
                enc = XML_CHAR_ENCODING_UTF16BE;
1343
0
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1344
0
                bomSize = 2;
1345
0
            }
1346
0
            break;
1347
1348
0
        case 0xFF:
1349
0
            if (in[1] == 0xFE) {
1350
0
                enc = XML_CHAR_ENCODING_UTF16LE;
1351
0
                autoFlag = XML_INPUT_AUTO_UTF16LE;
1352
0
                bomSize = 2;
1353
0
            }
1354
0
            break;
1355
0
    }
1356
1357
0
    if (bomSize > 0) {
1358
0
        ctxt->input->cur += bomSize;
1359
0
    }
1360
1361
0
    if (enc != XML_CHAR_ENCODING_NONE) {
1362
0
        ctxt->input->flags |= autoFlag;
1363
0
        xmlSwitchEncoding(ctxt, enc);
1364
0
    }
1365
0
}
1366
1367
/**
1368
 * xmlSetDeclaredEncoding:
1369
 * @ctxt:  the parser context
1370
 * @encoding:  declared encoding
1371
 *
1372
 * Set the encoding from a declaration in the document.
1373
 *
1374
 * If no encoding was set yet, switch the encoding. Otherwise, only warn
1375
 * about encoding mismatches.
1376
 *
1377
 * Takes ownership of 'encoding'.
1378
 */
1379
void
1380
0
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
1381
0
    if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1382
0
        ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1383
0
        xmlSwitchEncodingName(ctxt, (const char *) encoding);
1384
0
        ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1385
0
    } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1386
0
        static const char *allowedUTF8[] = {
1387
0
            "UTF-8", "UTF8", NULL
1388
0
        };
1389
0
        static const char *allowedUTF16LE[] = {
1390
0
            "UTF-16", "UTF-16LE", "UTF16", NULL
1391
0
        };
1392
0
        static const char *allowedUTF16BE[] = {
1393
0
            "UTF-16", "UTF-16BE", "UTF16", NULL
1394
0
        };
1395
0
        const char **allowed = NULL;
1396
0
        const char *autoEnc = NULL;
1397
1398
0
        switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1399
0
            case XML_INPUT_AUTO_UTF8:
1400
0
                allowed = allowedUTF8;
1401
0
                autoEnc = "UTF-8";
1402
0
                break;
1403
0
            case XML_INPUT_AUTO_UTF16LE:
1404
0
                allowed = allowedUTF16LE;
1405
0
                autoEnc = "UTF-16LE";
1406
0
                break;
1407
0
            case XML_INPUT_AUTO_UTF16BE:
1408
0
                allowed = allowedUTF16BE;
1409
0
                autoEnc = "UTF-16BE";
1410
0
                break;
1411
0
        }
1412
1413
0
        if (allowed != NULL) {
1414
0
            const char **p;
1415
0
            int match = 0;
1416
1417
0
            for (p = allowed; *p != NULL; p++) {
1418
0
                if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1419
0
                    match = 1;
1420
0
                    break;
1421
0
                }
1422
0
            }
1423
1424
0
            if (match == 0) {
1425
0
                xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1426
0
                              "Encoding '%s' doesn't match "
1427
0
                              "auto-detected '%s'\n",
1428
0
                              encoding, BAD_CAST autoEnc);
1429
0
                xmlFree(encoding);
1430
0
                encoding = xmlStrdup(BAD_CAST autoEnc);
1431
0
                if (encoding == NULL)
1432
0
                    xmlCtxtErrMemory(ctxt);
1433
0
            }
1434
0
        }
1435
0
    }
1436
1437
0
    if (ctxt->encoding != NULL)
1438
0
        xmlFree((xmlChar *) ctxt->encoding);
1439
0
    ctxt->encoding = encoding;
1440
0
}
1441
1442
/**
1443
 * xmlGetActualEncoding:
1444
 * @ctxt:  the parser context
1445
 *
1446
 * Returns the actual used to parse the document. This can differ from
1447
 * the declared encoding.
1448
 */
1449
const xmlChar *
1450
0
xmlGetActualEncoding(xmlParserCtxtPtr ctxt) {
1451
0
    const xmlChar *encoding = NULL;
1452
1453
0
    if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
1454
0
        (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
1455
        /* Preserve encoding exactly */
1456
0
        encoding = ctxt->encoding;
1457
0
    } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
1458
0
        encoding = BAD_CAST ctxt->input->buf->encoder->name;
1459
0
    } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1460
0
        encoding = BAD_CAST "UTF-8";
1461
0
    }
1462
1463
0
    return(encoding);
1464
0
}
1465
1466
/************************************************************************
1467
 *                  *
1468
 *  Commodity functions to handle entities processing   *
1469
 *                  *
1470
 ************************************************************************/
1471
1472
/**
1473
 * xmlFreeInputStream:
1474
 * @input:  an xmlParserInputPtr
1475
 *
1476
 * Free up an input stream.
1477
 */
1478
void
1479
0
xmlFreeInputStream(xmlParserInputPtr input) {
1480
0
    if (input == NULL) return;
1481
1482
0
    if (input->filename != NULL) xmlFree((char *) input->filename);
1483
0
    if (input->version != NULL) xmlFree((char *) input->version);
1484
0
    if ((input->free != NULL) && (input->base != NULL))
1485
0
        input->free((xmlChar *) input->base);
1486
0
    if (input->buf != NULL)
1487
0
        xmlFreeParserInputBuffer(input->buf);
1488
0
    xmlFree(input);
1489
0
}
1490
1491
/**
1492
 * xmlNewInputStream:
1493
 * @ctxt:  an XML parser context
1494
 *
1495
 * Create a new input stream structure.
1496
 *
1497
 * Returns the new input stream or NULL
1498
 */
1499
xmlParserInputPtr
1500
0
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1501
0
    xmlParserInputPtr input;
1502
1503
0
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1504
0
    if (input == NULL) {
1505
0
        xmlCtxtErrMemory(ctxt);
1506
0
  return(NULL);
1507
0
    }
1508
0
    memset(input, 0, sizeof(xmlParserInput));
1509
0
    input->line = 1;
1510
0
    input->col = 1;
1511
1512
    /*
1513
     * If the context is NULL the id cannot be initialized, but that
1514
     * should not happen while parsing which is the situation where
1515
     * the id is actually needed.
1516
     */
1517
0
    if (ctxt != NULL) {
1518
0
        if (input->id >= INT_MAX) {
1519
0
            xmlCtxtErrMemory(ctxt);
1520
0
            return(NULL);
1521
0
        }
1522
0
        input->id = ctxt->input_id++;
1523
0
    }
1524
1525
0
    return(input);
1526
0
}
1527
1528
/**
1529
 * xmlNewInputURL:
1530
 * @ctxt:  parser context
1531
 * @url:  filename or URL
1532
 * @publicId:  publid ID from doctype (optional)
1533
 * @encoding:  character encoding (optional)
1534
 * @flags:  unused, pass 0
1535
 *
1536
 * Creates a new parser input from the filesystem, the network or
1537
 * a user-defined resource loader.
1538
 *
1539
 * Returns a new parser input.
1540
 */
1541
xmlParserInputPtr
1542
xmlNewInputURL(xmlParserCtxtPtr ctxt, const char *url, const char *publicId,
1543
0
               const char *encoding, int flags ATTRIBUTE_UNUSED) {
1544
0
    xmlParserInputPtr input;
1545
1546
0
    if ((ctxt == NULL) || (url == NULL))
1547
0
  return(NULL);
1548
1549
0
    input = xmlLoadExternalEntity(url, publicId, ctxt);
1550
0
    if (input == NULL)
1551
0
        return(NULL);
1552
1553
0
    if (encoding != NULL)
1554
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1555
1556
0
    return(input);
1557
0
}
1558
1559
/**
1560
 * xmlNewInputInternal:
1561
 * @ctxt:  parser context
1562
 * @buf:  parser input buffer
1563
 * @filename:  filename or URL
1564
 * @encoding:  character encoding (optional)
1565
 *
1566
 * Internal helper function.
1567
 *
1568
 * Returns a new parser input.
1569
 */
1570
static xmlParserInputPtr
1571
xmlNewInputInternal(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr buf,
1572
0
                    const char *filename, const char *encoding) {
1573
0
    xmlParserInputPtr input;
1574
1575
0
    input = xmlNewInputStream(ctxt);
1576
0
    if (input == NULL) {
1577
0
  xmlFreeParserInputBuffer(buf);
1578
0
  return(NULL);
1579
0
    }
1580
1581
0
    input->buf = buf;
1582
0
    xmlBufResetInput(input->buf->buffer, input);
1583
1584
0
    if (filename != NULL) {
1585
0
        input->filename = xmlMemStrdup(filename);
1586
0
        if (input->filename == NULL) {
1587
0
            xmlCtxtErrMemory(ctxt);
1588
0
            xmlFreeInputStream(input);
1589
0
            return(NULL);
1590
0
        }
1591
0
    }
1592
1593
0
    if (encoding != NULL)
1594
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1595
1596
0
    return(input);
1597
0
}
1598
1599
/**
1600
 * xmlNewInputMemory:
1601
 * @ctxt:  parser context
1602
 * @url:  base URL (optional)
1603
 * @mem:  pointer to char array
1604
 * @size:  size of array
1605
 * @encoding:  character encoding (optional)
1606
 * @flags:  optimization hints
1607
 *
1608
 * Creates a new parser input to read from a memory area.
1609
 *
1610
 * @url is used as base to resolve external entities and for
1611
 * error reporting.
1612
 *
1613
 * If the XML_INPUT_BUF_STATIC flag is set, the memory area must
1614
 * stay unchanged until parsing has finished. This can avoid
1615
 * temporary copies.
1616
 *
1617
 * If the XML_INPUT_BUF_ZERO_TERMINATED flag is set, the memory
1618
 * area must contain a zero byte after the buffer at position @size.
1619
 * This can avoid temporary copies.
1620
 *
1621
 * Returns a new parser input.
1622
 */
1623
xmlParserInputPtr
1624
xmlNewInputMemory(xmlParserCtxtPtr ctxt, const char *url,
1625
                  const void *mem, size_t size,
1626
0
                  const char *encoding, int flags) {
1627
0
    xmlParserInputBufferPtr buf;
1628
1629
0
    if ((ctxt == NULL) || (mem == NULL))
1630
0
  return(NULL);
1631
1632
0
    buf = xmlNewInputBufferMemory(mem, size, flags, XML_CHAR_ENCODING_NONE);
1633
0
    if (buf == NULL) {
1634
0
  xmlCtxtErrMemory(ctxt);
1635
0
        return(NULL);
1636
0
    }
1637
1638
0
    return(xmlNewInputInternal(ctxt, buf, url, encoding));
1639
0
}
1640
1641
/**
1642
 * xmlNewInputString:
1643
 * @ctxt:  parser context
1644
 * @url:  base URL (optional)
1645
 * @str:  zero-terminated string
1646
 * @encoding:  character encoding (optional)
1647
 * @flags:  optimization hints
1648
 *
1649
 * Creates a new parser input to read from a zero-terminated string.
1650
 *
1651
 * @url is used as base to resolve external entities and for
1652
 * error reporting.
1653
 *
1654
 * If the XML_INPUT_BUF_STATIC flag is set, the string must
1655
 * stay unchanged until parsing has finished. This can avoid
1656
 * temporary copies.
1657
 *
1658
 * Returns a new parser input.
1659
 */
1660
xmlParserInputPtr
1661
xmlNewInputString(xmlParserCtxtPtr ctxt, const char *url,
1662
0
                  const char *str, const char *encoding, int flags) {
1663
0
    xmlParserInputBufferPtr buf;
1664
1665
0
    if ((ctxt == NULL) || (str == NULL))
1666
0
  return(NULL);
1667
1668
0
    buf = xmlNewInputBufferString(str, flags);
1669
0
    if (buf == NULL) {
1670
0
  xmlCtxtErrMemory(ctxt);
1671
0
        return(NULL);
1672
0
    }
1673
1674
0
    return(xmlNewInputInternal(ctxt, buf, url, encoding));
1675
0
}
1676
1677
/**
1678
 * xmlNewInputFd:
1679
 * @ctxt:  parser context
1680
 * @url:  base URL (optional)
1681
 * @fd:  file descriptor
1682
 * @encoding:  character encoding (optional)
1683
 * @flags:  unused, pass 0
1684
 *
1685
 * Creates a new parser input to read from a zero-terminated string.
1686
 *
1687
 * @url is used as base to resolve external entities and for
1688
 * error reporting.
1689
 *
1690
 * @fd is closed after parsing has finished.
1691
 *
1692
 * Returns a new parser input.
1693
 */
1694
xmlParserInputPtr
1695
xmlNewInputFd(xmlParserCtxtPtr ctxt, const char *url,
1696
0
              int fd, const char *encoding, int flags ATTRIBUTE_UNUSED) {
1697
0
    xmlParserInputBufferPtr buf;
1698
1699
0
    if ((ctxt == NULL) || (fd < 0))
1700
0
  return(NULL);
1701
1702
0
    buf = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
1703
0
    if (buf == NULL) {
1704
0
  xmlCtxtErrMemory(ctxt);
1705
0
        return(NULL);
1706
0
    }
1707
1708
0
    return(xmlNewInputInternal(ctxt, buf, url, encoding));
1709
0
}
1710
1711
/**
1712
 * xmlNewInputIO:
1713
 * @ctxt:  parser context
1714
 * @url:  base URL (optional)
1715
 * @ioRead:  read callback
1716
 * @ioClose:  close callback (optional)
1717
 * @ioCtxt:  IO context
1718
 * @encoding:  character encoding (optional)
1719
 * @flags:  unused, pass 0
1720
 *
1721
 * Creates a new parser input to read from input callbacks and
1722
 * cintext.
1723
 *
1724
 * @url is used as base to resolve external entities and for
1725
 * error reporting.
1726
 *
1727
 * @ioRead is called to read new data into a provided buffer.
1728
 * It must return the number of bytes written into the buffer
1729
 * ot a negative xmlParserErrors code on failure.
1730
 *
1731
 * @ioClose is called after parsing has finished.
1732
 *
1733
 * @ioCtxt is an opaque pointer passed to the callbacks.
1734
 *
1735
 * Returns a new parser input.
1736
 */
1737
xmlParserInputPtr
1738
xmlNewInputIO(xmlParserCtxtPtr ctxt, const char *url,
1739
              xmlInputReadCallback ioRead, xmlInputCloseCallback ioClose,
1740
              void *ioCtxt,
1741
0
              const char *encoding, int flags ATTRIBUTE_UNUSED) {
1742
0
    xmlParserInputBufferPtr buf;
1743
1744
0
    if ((ctxt == NULL) || (ioRead == NULL))
1745
0
  return(NULL);
1746
1747
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
1748
0
    if (buf == NULL) {
1749
0
        xmlCtxtErrMemory(ctxt);
1750
0
        if (ioClose != NULL)
1751
0
            ioClose(ioCtxt);
1752
0
        return(NULL);
1753
0
    }
1754
1755
0
    buf->context = ioCtxt;
1756
0
    buf->readcallback = ioRead;
1757
0
    buf->closecallback = ioClose;
1758
1759
0
    return(xmlNewInputInternal(ctxt, buf, url, encoding));
1760
0
}
1761
1762
/**
1763
 * xmlNewInputPush:
1764
 * @ctxt:  parser context
1765
 * @url:  base URL (optional)
1766
 * @chunk:  pointer to char array
1767
 * @size:  size of array
1768
 * @encoding:  character encoding (optional)
1769
 *
1770
 * Creates a new parser input for a push parser.
1771
 *
1772
 * Returns a new parser input.
1773
 */
1774
xmlParserInputPtr
1775
xmlNewInputPush(xmlParserCtxtPtr ctxt, const char *url,
1776
0
                const char *chunk, int size, const char *encoding) {
1777
0
    xmlParserInputBufferPtr buf;
1778
0
    xmlParserInputPtr input;
1779
1780
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
1781
0
    if (buf == NULL) {
1782
0
        xmlCtxtErrMemory(ctxt);
1783
0
        return(NULL);
1784
0
    }
1785
1786
0
    input = xmlNewInputInternal(ctxt, buf, url, encoding);
1787
0
    if (input == NULL)
1788
0
  return(NULL);
1789
1790
0
    input->flags |= XML_INPUT_PROGRESSIVE;
1791
1792
0
    if ((size > 0) && (chunk != NULL)) {
1793
0
        int res;
1794
1795
0
  res = xmlParserInputBufferPush(input->buf, size, chunk);
1796
0
        xmlBufResetInput(input->buf->buffer, input);
1797
0
        if (res < 0) {
1798
0
            xmlCtxtErrIO(ctxt, input->buf->error, NULL);
1799
0
            xmlFreeInputStream(input);
1800
0
            return(NULL);
1801
0
        }
1802
0
    }
1803
1804
0
    return(input);
1805
0
}
1806
1807
/**
1808
 * xmlNewIOInputStream:
1809
 * @ctxt:  an XML parser context
1810
 * @buf:  an input buffer
1811
 * @enc:  the charset encoding if known
1812
 *
1813
 * Create a new input stream structure encapsulating the @input into
1814
 * a stream suitable for the parser.
1815
 *
1816
 * Returns the new input stream or NULL
1817
 */
1818
xmlParserInputPtr
1819
xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr buf,
1820
0
              xmlCharEncoding enc) {
1821
0
    const char *encoding;
1822
1823
0
    if (buf == NULL)
1824
0
        return(NULL);
1825
1826
0
    encoding = xmlGetCharEncodingName(enc);
1827
0
    return(xmlNewInputInternal(ctxt, buf, NULL, encoding));
1828
0
}
1829
1830
/**
1831
 * xmlNewEntityInputStream:
1832
 * @ctxt:  an XML parser context
1833
 * @ent:  an Entity pointer
1834
 *
1835
 * DEPRECATED: Internal function, do not use.
1836
 *
1837
 * Create a new input stream based on an xmlEntityPtr
1838
 *
1839
 * Returns the new input stream or NULL
1840
 */
1841
xmlParserInputPtr
1842
0
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
1843
0
    xmlParserInputPtr input;
1844
1845
0
    if ((ctxt == NULL) || (ent == NULL))
1846
0
  return(NULL);
1847
1848
0
    if (ent->content != NULL) {
1849
0
        input = xmlNewInputString(ctxt, NULL, (const char *) ent->content,
1850
0
                                  NULL, XML_INPUT_BUF_STATIC);
1851
0
    } else if (ent->URI != NULL) {
1852
0
        input = xmlLoadExternalEntity((char *) ent->URI,
1853
0
                                      (char *) ent->ExternalID, ctxt);
1854
0
    } else {
1855
0
        return(NULL);
1856
0
    }
1857
1858
0
    if (input == NULL)
1859
0
        return(NULL);
1860
1861
0
    input->entity = ent;
1862
1863
0
    return(input);
1864
0
}
1865
1866
/**
1867
 * xmlNewStringInputStream:
1868
 * @ctxt:  an XML parser context
1869
 * @buffer:  an memory buffer
1870
 *
1871
 * Create a new input stream based on a memory buffer.
1872
 *
1873
 * Returns the new input stream
1874
 */
1875
xmlParserInputPtr
1876
0
xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1877
0
    return(xmlNewInputString(ctxt, NULL, (const char *) buffer, NULL, 0));
1878
0
}
1879
1880
1881
/****************************************************************
1882
 *                *
1883
 *    External entities loading     *
1884
 *                *
1885
 ****************************************************************/
1886
1887
#ifdef LIBXML_CATALOG_ENABLED
1888
1889
/**
1890
 * xmlResolveResourceFromCatalog:
1891
 * @URL:  the URL for the entity to load
1892
 * @ID:  the System ID for the entity to load
1893
 * @ctxt:  the context in which the entity is called or NULL
1894
 *
1895
 * Resolves the URL and ID against the appropriate catalog.
1896
 * This function is used by xmlDefaultExternalEntityLoader and
1897
 * xmlNoNetExternalEntityLoader.
1898
 *
1899
 * Returns a new allocated URL, or NULL.
1900
 */
1901
static xmlChar *
1902
xmlResolveResourceFromCatalog(const char *URL, const char *ID,
1903
0
                              xmlParserCtxtPtr ctxt) {
1904
0
    xmlChar *resource = NULL;
1905
0
    xmlCatalogAllow pref;
1906
1907
    /*
1908
     * If the resource doesn't exists as a file,
1909
     * try to load it from the resource pointed in the catalogs
1910
     */
1911
0
    pref = xmlCatalogGetDefaults();
1912
1913
0
    if ((pref != XML_CATA_ALLOW_NONE) && (!xmlNoNetExists(URL))) {
1914
  /*
1915
   * Do a local lookup
1916
   */
1917
0
  if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
1918
0
      ((pref == XML_CATA_ALLOW_ALL) ||
1919
0
       (pref == XML_CATA_ALLOW_DOCUMENT))) {
1920
0
      resource = xmlCatalogLocalResolve(ctxt->catalogs,
1921
0
                (const xmlChar *)ID,
1922
0
                (const xmlChar *)URL);
1923
0
        }
1924
  /*
1925
   * Try a global lookup
1926
   */
1927
0
  if ((resource == NULL) &&
1928
0
      ((pref == XML_CATA_ALLOW_ALL) ||
1929
0
       (pref == XML_CATA_ALLOW_GLOBAL))) {
1930
0
      resource = xmlCatalogResolve((const xmlChar *)ID,
1931
0
           (const xmlChar *)URL);
1932
0
  }
1933
0
  if ((resource == NULL) && (URL != NULL))
1934
0
      resource = xmlStrdup((const xmlChar *) URL);
1935
1936
  /*
1937
   * TODO: do an URI lookup on the reference
1938
   */
1939
0
  if ((resource != NULL) && (!xmlNoNetExists((const char *)resource))) {
1940
0
      xmlChar *tmp = NULL;
1941
1942
0
      if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
1943
0
    ((pref == XML_CATA_ALLOW_ALL) ||
1944
0
     (pref == XML_CATA_ALLOW_DOCUMENT))) {
1945
0
    tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource);
1946
0
      }
1947
0
      if ((tmp == NULL) &&
1948
0
    ((pref == XML_CATA_ALLOW_ALL) ||
1949
0
           (pref == XML_CATA_ALLOW_GLOBAL))) {
1950
0
    tmp = xmlCatalogResolveURI(resource);
1951
0
      }
1952
1953
0
      if (tmp != NULL) {
1954
0
    xmlFree(resource);
1955
0
    resource = tmp;
1956
0
      }
1957
0
  }
1958
0
    }
1959
1960
0
    return resource;
1961
0
}
1962
1963
#endif
1964
1965
/**
1966
 * xmlCheckHTTPInput:
1967
 * @ctxt: an XML parser context
1968
 * @ret: an XML parser input
1969
 *
1970
 * DEPRECATED: Internal function, don't use.
1971
 *
1972
 * Check an input in case it was created from an HTTP stream, in that
1973
 * case it will handle encoding and update of the base URL in case of
1974
 * redirection. It also checks for HTTP errors in which case the input
1975
 * is cleanly freed up and an appropriate error is raised in context
1976
 *
1977
 * Returns the input or NULL in case of HTTP error.
1978
 */
1979
xmlParserInputPtr
1980
0
xmlCheckHTTPInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr ret) {
1981
    /* Avoid unused variable warning if features are disabled. */
1982
0
    (void) ctxt;
1983
1984
#ifdef LIBXML_HTTP_ENABLED
1985
    if ((ret != NULL) && (ret->buf != NULL) &&
1986
        (ret->buf->readcallback == xmlIOHTTPRead) &&
1987
        (ret->buf->context != NULL)) {
1988
        const char *encoding;
1989
        const char *redir;
1990
        const char *mime;
1991
        int code;
1992
1993
        code = xmlNanoHTTPReturnCode(ret->buf->context);
1994
        if (code >= 400) {
1995
            /* fatal error */
1996
      if (ret->filename != NULL)
1997
                xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, ret->filename);
1998
      else
1999
                xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, "<null>");
2000
            xmlFreeInputStream(ret);
2001
            ret = NULL;
2002
        } else {
2003
2004
            mime = xmlNanoHTTPMimeType(ret->buf->context);
2005
            if ((xmlStrstr(BAD_CAST mime, BAD_CAST "/xml")) ||
2006
                (xmlStrstr(BAD_CAST mime, BAD_CAST "+xml"))) {
2007
                encoding = xmlNanoHTTPEncoding(ret->buf->context);
2008
                if (encoding != NULL)
2009
                    xmlSwitchEncodingName(ctxt, encoding);
2010
#if 0
2011
            } else if (xmlStrstr(BAD_CAST mime, BAD_CAST "html")) {
2012
#endif
2013
            }
2014
            redir = xmlNanoHTTPRedir(ret->buf->context);
2015
            if (redir != NULL) {
2016
                if (ret->filename != NULL)
2017
                    xmlFree((xmlChar *) ret->filename);
2018
                ret->filename =
2019
                    (char *) xmlStrdup((const xmlChar *) redir);
2020
            }
2021
        }
2022
    }
2023
#endif
2024
0
    return(ret);
2025
0
}
2026
2027
/**
2028
 * xmlNewInputFromFile:
2029
 * @ctxt:  an XML parser context
2030
 * @filename:  the filename to use as entity
2031
 *
2032
 * Create a new input stream based on a file or an URL.
2033
 *
2034
 * Returns the new input stream or NULL in case of error
2035
 */
2036
xmlParserInputPtr
2037
0
xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2038
0
    xmlParserInputBufferPtr buf;
2039
0
    xmlParserInputPtr inputStream;
2040
0
    const xmlChar *URI;
2041
0
    xmlChar *canonic;
2042
0
    int code;
2043
2044
0
    if ((ctxt == NULL) || (filename == NULL))
2045
0
        return(NULL);
2046
2047
0
    code = xmlParserInputBufferCreateFilenameSafe(filename,
2048
0
                                                  XML_CHAR_ENCODING_NONE, &buf);
2049
0
    if (buf == NULL) {
2050
0
        xmlCtxtErrIO(ctxt, code, filename);
2051
0
  return(NULL);
2052
0
    }
2053
2054
0
    inputStream = xmlNewInputStream(ctxt);
2055
0
    if (inputStream == NULL) {
2056
0
  xmlFreeParserInputBuffer(buf);
2057
0
  return(NULL);
2058
0
    }
2059
2060
0
    inputStream->buf = buf;
2061
0
    inputStream = xmlCheckHTTPInput(ctxt, inputStream);
2062
0
    if (inputStream == NULL)
2063
0
        return(NULL);
2064
2065
0
    if (inputStream->filename == NULL)
2066
0
  URI = (xmlChar *) filename;
2067
0
    else
2068
0
  URI = (xmlChar *) inputStream->filename;
2069
0
    canonic = xmlCanonicPath(URI);
2070
0
    if (canonic == NULL) {
2071
0
        xmlCtxtErrMemory(ctxt);
2072
0
        xmlFreeInputStream(inputStream);
2073
0
        return(NULL);
2074
0
    }
2075
0
    if (inputStream->filename != NULL)
2076
0
        xmlFree((char *) inputStream->filename);
2077
0
    inputStream->filename = (char *) canonic;
2078
2079
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
2080
2081
0
    return(inputStream);
2082
0
}
2083
2084
/**
2085
 * xmlDefaultExternalEntityLoader:
2086
 * @URL:  the URL for the entity to load
2087
 * @ID:  the System ID for the entity to load
2088
 * @ctxt:  the context in which the entity is called or NULL
2089
 *
2090
 * By default we don't load external entities, yet.
2091
 *
2092
 * Returns a new allocated xmlParserInputPtr, or NULL.
2093
 */
2094
static xmlParserInputPtr
2095
xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
2096
                               xmlParserCtxtPtr ctxt)
2097
0
{
2098
0
    xmlParserInputPtr ret = NULL;
2099
0
    xmlChar *resource = NULL;
2100
2101
0
    if (URL == NULL)
2102
0
        return(NULL);
2103
2104
0
    if ((ctxt != NULL) && (ctxt->options & XML_PARSE_NONET)) {
2105
0
        int options = ctxt->options;
2106
2107
0
  ctxt->options -= XML_PARSE_NONET;
2108
0
        ret = xmlNoNetExternalEntityLoader(URL, ID, ctxt);
2109
0
  ctxt->options = options;
2110
0
  return(ret);
2111
0
    }
2112
0
#ifdef LIBXML_CATALOG_ENABLED
2113
0
    resource = xmlResolveResourceFromCatalog(URL, ID, ctxt);
2114
0
#endif
2115
2116
0
    if (resource == NULL)
2117
0
        resource = (xmlChar *) URL;
2118
2119
0
    ret = xmlNewInputFromFile(ctxt, (const char *) resource);
2120
0
    if ((resource != NULL) && (resource != (xmlChar *) URL))
2121
0
        xmlFree(resource);
2122
0
    return (ret);
2123
0
}
2124
2125
/**
2126
 * xmlNoNetExternalEntityLoader:
2127
 * @URL:  the URL for the entity to load
2128
 * @ID:  the System ID for the entity to load
2129
 * @ctxt:  the context in which the entity is called or NULL
2130
 *
2131
 * A specific entity loader disabling network accesses, though still
2132
 * allowing local catalog accesses for resolution.
2133
 *
2134
 * Returns a new allocated xmlParserInputPtr, or NULL.
2135
 */
2136
xmlParserInputPtr
2137
xmlNoNetExternalEntityLoader(const char *URL, const char *ID,
2138
0
                             xmlParserCtxtPtr ctxt) {
2139
0
    xmlParserInputPtr input = NULL;
2140
0
    xmlChar *resource = NULL;
2141
2142
0
#ifdef LIBXML_CATALOG_ENABLED
2143
0
    resource = xmlResolveResourceFromCatalog(URL, ID, ctxt);
2144
0
#endif
2145
2146
0
    if (resource == NULL)
2147
0
  resource = (xmlChar *) URL;
2148
2149
0
    if (resource != NULL) {
2150
0
        if ((!xmlStrncasecmp(BAD_CAST resource, BAD_CAST "ftp://", 6)) ||
2151
0
            (!xmlStrncasecmp(BAD_CAST resource, BAD_CAST "http://", 7))) {
2152
0
            xmlCtxtErrIO(ctxt, XML_IO_NETWORK_ATTEMPT,
2153
0
                         (const char *) resource);
2154
            /*
2155
             * Also forward the error directly to the global error
2156
             * handler, which the XML::LibXML test suite expects.
2157
             */
2158
0
            __xmlIOErr(XML_FROM_IO, XML_IO_NETWORK_ATTEMPT,
2159
0
                       (const char *) resource);
2160
0
      if (resource != (xmlChar *) URL)
2161
0
    xmlFree(resource);
2162
0
      return(NULL);
2163
0
  }
2164
0
    }
2165
0
    input = xmlDefaultExternalEntityLoader((const char *) resource, ID, ctxt);
2166
0
    if (resource != (xmlChar *) URL)
2167
0
  xmlFree(resource);
2168
0
    return(input);
2169
0
}
2170
2171
/*
2172
 * This global has to die eventually
2173
 */
2174
static xmlExternalEntityLoader
2175
xmlCurrentExternalEntityLoader = xmlDefaultExternalEntityLoader;
2176
2177
/**
2178
 * xmlSetExternalEntityLoader:
2179
 * @f:  the new entity resolver function
2180
 *
2181
 * Changes the defaultexternal entity resolver function for the application
2182
 */
2183
void
2184
0
xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
2185
0
    xmlCurrentExternalEntityLoader = f;
2186
0
}
2187
2188
/**
2189
 * xmlGetExternalEntityLoader:
2190
 *
2191
 * Get the default external entity resolver function for the application
2192
 *
2193
 * Returns the xmlExternalEntityLoader function pointer
2194
 */
2195
xmlExternalEntityLoader
2196
0
xmlGetExternalEntityLoader(void) {
2197
0
    return(xmlCurrentExternalEntityLoader);
2198
0
}
2199
2200
/**
2201
 * xmlLoadExternalEntity:
2202
 * @URL:  the URL for the entity to load
2203
 * @ID:  the Public ID for the entity to load
2204
 * @ctxt:  the context in which the entity is called or NULL
2205
 *
2206
 * @URL is a filename or URL. If if contains the substring "://",
2207
 * it is assumed to be a Legacy Extended IRI. Otherwise, it is
2208
 * treated as a filesystem path.
2209
 *
2210
 * @ID is an optional XML public ID, typically from a doctype
2211
 * declaration. It is used for catalog lookups.
2212
 *
2213
 * The following resource loaders will be called if they were
2214
 * registered (in order of precedence):
2215
 *
2216
 * - the global external entity loader set with
2217
 *   xmlSetExternalEntityLoader
2218
 * - the per-thread xmlParserInputBufferCreateFilenameFunc set with
2219
 *   xmlParserInputBufferCreateFilenameDefault
2220
 * - the default loader which will return
2221
 *   - the result from a matching global input callback set with
2222
 *     xmlRegisterInputCallbacks
2223
 *   - a HTTP resource if support is compiled in.
2224
 *   - a file opened from the filesystem, with automatic detection
2225
 *     of compressed files if support is compiled in.
2226
 *
2227
 * Returns the xmlParserInputPtr or NULL
2228
 */
2229
xmlParserInputPtr
2230
xmlLoadExternalEntity(const char *URL, const char *ID,
2231
0
                      xmlParserCtxtPtr ctxt) {
2232
0
    char *canonicFilename;
2233
0
    xmlParserInputPtr ret;
2234
2235
0
    if (URL == NULL)
2236
0
        return(NULL);
2237
2238
0
    canonicFilename = (char *) xmlCanonicPath((const xmlChar *) URL);
2239
0
    if (canonicFilename == NULL) {
2240
0
        xmlCtxtErrMemory(ctxt);
2241
0
        return(NULL);
2242
0
    }
2243
2244
0
    ret = xmlCurrentExternalEntityLoader(canonicFilename, ID, ctxt);
2245
0
    xmlFree(canonicFilename);
2246
0
    return(ret);
2247
0
}
2248
2249
/************************************************************************
2250
 *                  *
2251
 *    Commodity functions to handle parser contexts   *
2252
 *                  *
2253
 ************************************************************************/
2254
2255
/**
2256
 * xmlInitSAXParserCtxt:
2257
 * @ctxt:  XML parser context
2258
 * @sax:  SAX handlert
2259
 * @userData:  user data
2260
 *
2261
 * Initialize a SAX parser context
2262
 *
2263
 * Returns 0 in case of success and -1 in case of error
2264
 */
2265
2266
static int
2267
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
2268
                     void *userData)
2269
0
{
2270
0
    xmlParserInputPtr input;
2271
2272
0
    if (ctxt == NULL)
2273
0
        return(-1);
2274
2275
0
    if (ctxt->dict == NULL)
2276
0
  ctxt->dict = xmlDictCreate();
2277
0
    if (ctxt->dict == NULL)
2278
0
  return(-1);
2279
0
    xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
2280
2281
0
    if (ctxt->sax == NULL)
2282
0
  ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2283
0
    if (ctxt->sax == NULL)
2284
0
  return(-1);
2285
0
    if (sax == NULL) {
2286
0
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2287
0
        xmlSAXVersion(ctxt->sax, 2);
2288
0
        ctxt->userData = ctxt;
2289
0
    } else {
2290
0
  if (sax->initialized == XML_SAX2_MAGIC) {
2291
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
2292
0
        } else {
2293
0
      memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2294
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
2295
0
        }
2296
0
        ctxt->userData = userData ? userData : ctxt;
2297
0
    }
2298
2299
0
    ctxt->maxatts = 0;
2300
0
    ctxt->atts = NULL;
2301
    /* Allocate the Input stack */
2302
0
    if (ctxt->inputTab == NULL) {
2303
0
  ctxt->inputTab = (xmlParserInputPtr *)
2304
0
        xmlMalloc(5 * sizeof(xmlParserInputPtr));
2305
0
  ctxt->inputMax = 5;
2306
0
    }
2307
0
    if (ctxt->inputTab == NULL)
2308
0
  return(-1);
2309
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2310
0
        xmlFreeInputStream(input);
2311
0
    }
2312
0
    ctxt->inputNr = 0;
2313
0
    ctxt->input = NULL;
2314
2315
0
    ctxt->version = NULL;
2316
0
    ctxt->encoding = NULL;
2317
0
    ctxt->standalone = -1;
2318
0
    ctxt->hasExternalSubset = 0;
2319
0
    ctxt->hasPErefs = 0;
2320
0
    ctxt->html = 0;
2321
0
    ctxt->instate = XML_PARSER_START;
2322
2323
    /* Allocate the Node stack */
2324
0
    if (ctxt->nodeTab == NULL) {
2325
0
  ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2326
0
  ctxt->nodeMax = 10;
2327
0
    }
2328
0
    if (ctxt->nodeTab == NULL)
2329
0
  return(-1);
2330
0
    ctxt->nodeNr = 0;
2331
0
    ctxt->node = NULL;
2332
2333
    /* Allocate the Name stack */
2334
0
    if (ctxt->nameTab == NULL) {
2335
0
  ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2336
0
  ctxt->nameMax = 10;
2337
0
    }
2338
0
    if (ctxt->nameTab == NULL)
2339
0
  return(-1);
2340
0
    ctxt->nameNr = 0;
2341
0
    ctxt->name = NULL;
2342
2343
    /* Allocate the space stack */
2344
0
    if (ctxt->spaceTab == NULL) {
2345
0
  ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2346
0
  ctxt->spaceMax = 10;
2347
0
    }
2348
0
    if (ctxt->spaceTab == NULL)
2349
0
  return(-1);
2350
0
    ctxt->spaceNr = 1;
2351
0
    ctxt->spaceMax = 10;
2352
0
    ctxt->spaceTab[0] = -1;
2353
0
    ctxt->space = &ctxt->spaceTab[0];
2354
0
    ctxt->myDoc = NULL;
2355
0
    ctxt->wellFormed = 1;
2356
0
    ctxt->nsWellFormed = 1;
2357
0
    ctxt->valid = 1;
2358
2359
0
    ctxt->options = XML_PARSE_NODICT;
2360
2361
    /*
2362
     * Initialize some parser options from deprecated global variables.
2363
     * Note that the "modern" API taking options arguments or
2364
     * xmlCtxtSetOptions will ignore these defaults. They're only
2365
     * relevant if old API functions like xmlParseFile are used.
2366
     */
2367
0
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2368
0
    if (ctxt->loadsubset) {
2369
0
        ctxt->options |= XML_PARSE_DTDLOAD;
2370
0
    }
2371
0
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
2372
0
    if (ctxt->validate) {
2373
0
        ctxt->options |= XML_PARSE_DTDVALID;
2374
0
    }
2375
0
    ctxt->pedantic = xmlPedanticParserDefaultValue;
2376
0
    if (ctxt->pedantic) {
2377
0
        ctxt->options |= XML_PARSE_PEDANTIC;
2378
0
    }
2379
0
    ctxt->linenumbers = xmlLineNumbersDefaultValue;
2380
0
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2381
0
    if (ctxt->keepBlanks == 0) {
2382
0
  ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
2383
0
  ctxt->options |= XML_PARSE_NOBLANKS;
2384
0
    }
2385
0
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2386
0
    if (ctxt->replaceEntities) {
2387
0
        ctxt->options |= XML_PARSE_NOENT;
2388
0
    }
2389
0
    if (xmlGetWarningsDefaultValue == 0)
2390
0
        ctxt->options |= XML_PARSE_NOWARNING;
2391
2392
0
    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
2393
0
    ctxt->vctxt.userData = ctxt;
2394
0
    ctxt->vctxt.error = xmlParserValidityError;
2395
0
    ctxt->vctxt.warning = xmlParserValidityWarning;
2396
2397
0
    ctxt->record_info = 0;
2398
0
    ctxt->checkIndex = 0;
2399
0
    ctxt->inSubset = 0;
2400
0
    ctxt->errNo = XML_ERR_OK;
2401
0
    ctxt->depth = 0;
2402
0
    ctxt->catalogs = NULL;
2403
0
    ctxt->sizeentities = 0;
2404
0
    ctxt->sizeentcopy = 0;
2405
0
    ctxt->input_id = 1;
2406
0
    ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
2407
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
2408
2409
0
    if (ctxt->nsdb == NULL) {
2410
0
        ctxt->nsdb = xmlParserNsCreate();
2411
0
        if (ctxt->nsdb == NULL) {
2412
0
            xmlCtxtErrMemory(ctxt);
2413
0
            return(-1);
2414
0
        }
2415
0
    }
2416
2417
0
    return(0);
2418
0
}
2419
2420
/**
2421
 * xmlInitParserCtxt:
2422
 * @ctxt:  an XML parser context
2423
 *
2424
 * DEPRECATED: Internal function which will be made private in a future
2425
 * version.
2426
 *
2427
 * Initialize a parser context
2428
 *
2429
 * Returns 0 in case of success and -1 in case of error
2430
 */
2431
2432
int
2433
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2434
0
{
2435
0
    return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
2436
0
}
2437
2438
/**
2439
 * xmlFreeParserCtxt:
2440
 * @ctxt:  an XML parser context
2441
 *
2442
 * Free all the memory used by a parser context. However the parsed
2443
 * document in ctxt->myDoc is not freed.
2444
 */
2445
2446
void
2447
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2448
0
{
2449
0
    xmlParserInputPtr input;
2450
2451
0
    if (ctxt == NULL) return;
2452
2453
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2454
0
        xmlFreeInputStream(input);
2455
0
    }
2456
0
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2457
0
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
2458
0
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2459
0
    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
2460
0
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2461
0
    if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2462
0
    if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2463
0
    if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2464
0
    if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2465
0
#ifdef LIBXML_SAX1_ENABLED
2466
0
    if ((ctxt->sax != NULL) &&
2467
0
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
2468
#else
2469
    if (ctxt->sax != NULL)
2470
#endif /* LIBXML_SAX1_ENABLED */
2471
0
        xmlFree(ctxt->sax);
2472
0
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2473
0
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
2474
0
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
2475
0
    if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
2476
0
    if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
2477
0
    if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
2478
0
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2479
0
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2480
0
    if (ctxt->attsDefault != NULL)
2481
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2482
0
    if (ctxt->attsSpecial != NULL)
2483
0
        xmlHashFree(ctxt->attsSpecial, NULL);
2484
0
    if (ctxt->freeElems != NULL) {
2485
0
        xmlNodePtr cur, next;
2486
2487
0
  cur = ctxt->freeElems;
2488
0
  while (cur != NULL) {
2489
0
      next = cur->next;
2490
0
      xmlFree(cur);
2491
0
      cur = next;
2492
0
  }
2493
0
    }
2494
0
    if (ctxt->freeAttrs != NULL) {
2495
0
        xmlAttrPtr cur, next;
2496
2497
0
  cur = ctxt->freeAttrs;
2498
0
  while (cur != NULL) {
2499
0
      next = cur->next;
2500
0
      xmlFree(cur);
2501
0
      cur = next;
2502
0
  }
2503
0
    }
2504
    /*
2505
     * cleanup the error strings
2506
     */
2507
0
    if (ctxt->lastError.message != NULL)
2508
0
        xmlFree(ctxt->lastError.message);
2509
0
    if (ctxt->lastError.file != NULL)
2510
0
        xmlFree(ctxt->lastError.file);
2511
0
    if (ctxt->lastError.str1 != NULL)
2512
0
        xmlFree(ctxt->lastError.str1);
2513
0
    if (ctxt->lastError.str2 != NULL)
2514
0
        xmlFree(ctxt->lastError.str2);
2515
0
    if (ctxt->lastError.str3 != NULL)
2516
0
        xmlFree(ctxt->lastError.str3);
2517
2518
0
#ifdef LIBXML_CATALOG_ENABLED
2519
0
    if (ctxt->catalogs != NULL)
2520
0
  xmlCatalogFreeLocal(ctxt->catalogs);
2521
0
#endif
2522
0
    xmlFree(ctxt);
2523
0
}
2524
2525
/**
2526
 * xmlNewParserCtxt:
2527
 *
2528
 * Allocate and initialize a new parser context.
2529
 *
2530
 * Returns the xmlParserCtxtPtr or NULL
2531
 */
2532
2533
xmlParserCtxtPtr
2534
xmlNewParserCtxt(void)
2535
0
{
2536
0
    return(xmlNewSAXParserCtxt(NULL, NULL));
2537
0
}
2538
2539
/**
2540
 * xmlNewSAXParserCtxt:
2541
 * @sax:  SAX handler
2542
 * @userData:  user data
2543
 *
2544
 * Allocate and initialize a new SAX parser context. If userData is NULL,
2545
 * the parser context will be passed as user data.
2546
 *
2547
 * Available since 2.11.0. If you want support older versions,
2548
 * it's best to invoke xmlNewParserCtxt and set ctxt->sax with
2549
 * struct assignment.
2550
 *
2551
 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
2552
 */
2553
2554
xmlParserCtxtPtr
2555
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
2556
0
{
2557
0
    xmlParserCtxtPtr ctxt;
2558
2559
0
    xmlInitParser();
2560
2561
0
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2562
0
    if (ctxt == NULL)
2563
0
  return(NULL);
2564
0
    memset(ctxt, 0, sizeof(xmlParserCtxt));
2565
0
    if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
2566
0
        xmlFreeParserCtxt(ctxt);
2567
0
  return(NULL);
2568
0
    }
2569
0
    return(ctxt);
2570
0
}
2571
2572
/************************************************************************
2573
 *                  *
2574
 *    Handling of node information        *
2575
 *                  *
2576
 ************************************************************************/
2577
2578
/**
2579
 * xmlClearParserCtxt:
2580
 * @ctxt:  an XML parser context
2581
 *
2582
 * Clear (release owned resources) and reinitialize a parser context
2583
 */
2584
2585
void
2586
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2587
0
{
2588
0
  if (ctxt==NULL)
2589
0
    return;
2590
0
  xmlClearNodeInfoSeq(&ctxt->node_seq);
2591
0
  xmlCtxtReset(ctxt);
2592
0
}
2593
2594
2595
/**
2596
 * xmlParserFindNodeInfo:
2597
 * @ctx:  an XML parser context
2598
 * @node:  an XML node within the tree
2599
 *
2600
 * DEPRECATED: Don't use.
2601
 *
2602
 * Find the parser node info struct for a given node
2603
 *
2604
 * Returns an xmlParserNodeInfo block pointer or NULL
2605
 */
2606
const xmlParserNodeInfo *
2607
xmlParserFindNodeInfo(xmlParserCtxtPtr ctx, xmlNodePtr node)
2608
0
{
2609
0
    unsigned long pos;
2610
2611
0
    if ((ctx == NULL) || (node == NULL))
2612
0
        return (NULL);
2613
    /* Find position where node should be at */
2614
0
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2615
0
    if (pos < ctx->node_seq.length
2616
0
        && ctx->node_seq.buffer[pos].node == node)
2617
0
        return &ctx->node_seq.buffer[pos];
2618
0
    else
2619
0
        return NULL;
2620
0
}
2621
2622
2623
/**
2624
 * xmlInitNodeInfoSeq:
2625
 * @seq:  a node info sequence pointer
2626
 *
2627
 * DEPRECATED: Don't use.
2628
 *
2629
 * -- Initialize (set to initial state) node info sequence
2630
 */
2631
void
2632
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2633
0
{
2634
0
    if (seq == NULL)
2635
0
        return;
2636
0
    seq->length = 0;
2637
0
    seq->maximum = 0;
2638
0
    seq->buffer = NULL;
2639
0
}
2640
2641
/**
2642
 * xmlClearNodeInfoSeq:
2643
 * @seq:  a node info sequence pointer
2644
 *
2645
 * DEPRECATED: Don't use.
2646
 *
2647
 * -- Clear (release memory and reinitialize) node
2648
 *   info sequence
2649
 */
2650
void
2651
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2652
0
{
2653
0
    if (seq == NULL)
2654
0
        return;
2655
0
    if (seq->buffer != NULL)
2656
0
        xmlFree(seq->buffer);
2657
0
    xmlInitNodeInfoSeq(seq);
2658
0
}
2659
2660
/**
2661
 * xmlParserFindNodeInfoIndex:
2662
 * @seq:  a node info sequence pointer
2663
 * @node:  an XML node pointer
2664
 *
2665
 * DEPRECATED: Don't use.
2666
 *
2667
 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2668
 *   the given node is or should be at in a sorted sequence
2669
 *
2670
 * Returns a long indicating the position of the record
2671
 */
2672
unsigned long
2673
xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeqPtr seq,
2674
                           xmlNodePtr node)
2675
0
{
2676
0
    unsigned long upper, lower, middle;
2677
0
    int found = 0;
2678
2679
0
    if ((seq == NULL) || (node == NULL))
2680
0
        return ((unsigned long) -1);
2681
2682
    /* Do a binary search for the key */
2683
0
    lower = 1;
2684
0
    upper = seq->length;
2685
0
    middle = 0;
2686
0
    while (lower <= upper && !found) {
2687
0
        middle = lower + (upper - lower) / 2;
2688
0
        if (node == seq->buffer[middle - 1].node)
2689
0
            found = 1;
2690
0
        else if (node < seq->buffer[middle - 1].node)
2691
0
            upper = middle - 1;
2692
0
        else
2693
0
            lower = middle + 1;
2694
0
    }
2695
2696
    /* Return position */
2697
0
    if (middle == 0 || seq->buffer[middle - 1].node < node)
2698
0
        return middle;
2699
0
    else
2700
0
        return middle - 1;
2701
0
}
2702
2703
2704
/**
2705
 * xmlParserAddNodeInfo:
2706
 * @ctxt:  an XML parser context
2707
 * @info:  a node info sequence pointer
2708
 *
2709
 * DEPRECATED: Don't use.
2710
 *
2711
 * Insert node info record into the sorted sequence
2712
 */
2713
void
2714
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2715
                     xmlParserNodeInfoPtr info)
2716
0
{
2717
0
    unsigned long pos;
2718
2719
0
    if ((ctxt == NULL) || (info == NULL)) return;
2720
2721
    /* Find pos and check to see if node is already in the sequence */
2722
0
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2723
0
                                     info->node);
2724
2725
0
    if ((pos < ctxt->node_seq.length) &&
2726
0
        (ctxt->node_seq.buffer != NULL) &&
2727
0
        (ctxt->node_seq.buffer[pos].node == info->node)) {
2728
0
        ctxt->node_seq.buffer[pos] = *info;
2729
0
    }
2730
2731
    /* Otherwise, we need to add new node to buffer */
2732
0
    else {
2733
0
        if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2734
0
      (ctxt->node_seq.buffer == NULL)) {
2735
0
            xmlParserNodeInfo *tmp_buffer;
2736
0
            unsigned int byte_size;
2737
2738
0
            if (ctxt->node_seq.maximum == 0)
2739
0
                ctxt->node_seq.maximum = 2;
2740
0
            byte_size = (sizeof(*ctxt->node_seq.buffer) *
2741
0
      (2 * ctxt->node_seq.maximum));
2742
2743
0
            if (ctxt->node_seq.buffer == NULL)
2744
0
                tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2745
0
            else
2746
0
                tmp_buffer =
2747
0
                    (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2748
0
                                                     byte_size);
2749
2750
0
            if (tmp_buffer == NULL) {
2751
0
    xmlCtxtErrMemory(ctxt);
2752
0
                return;
2753
0
            }
2754
0
            ctxt->node_seq.buffer = tmp_buffer;
2755
0
            ctxt->node_seq.maximum *= 2;
2756
0
        }
2757
2758
        /* If position is not at end, move elements out of the way */
2759
0
        if (pos != ctxt->node_seq.length) {
2760
0
            unsigned long i;
2761
2762
0
            for (i = ctxt->node_seq.length; i > pos; i--)
2763
0
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2764
0
        }
2765
2766
        /* Copy element and increase length */
2767
0
        ctxt->node_seq.buffer[pos] = *info;
2768
0
        ctxt->node_seq.length++;
2769
0
    }
2770
0
}
2771
2772
/************************************************************************
2773
 *                  *
2774
 *    Defaults settings         *
2775
 *                  *
2776
 ************************************************************************/
2777
/**
2778
 * xmlPedanticParserDefault:
2779
 * @val:  int 0 or 1
2780
 *
2781
 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
2782
 *
2783
 * Set and return the previous value for enabling pedantic warnings.
2784
 *
2785
 * Returns the last value for 0 for no substitution, 1 for substitution.
2786
 */
2787
2788
int
2789
0
xmlPedanticParserDefault(int val) {
2790
0
    int old = xmlPedanticParserDefaultValue;
2791
2792
0
    xmlPedanticParserDefaultValue = val;
2793
0
    return(old);
2794
0
}
2795
2796
/**
2797
 * xmlLineNumbersDefault:
2798
 * @val:  int 0 or 1
2799
 *
2800
 * DEPRECATED: The modern options API always enables line numbers.
2801
 *
2802
 * Set and return the previous value for enabling line numbers in elements
2803
 * contents. This may break on old application and is turned off by default.
2804
 *
2805
 * Returns the last value for 0 for no substitution, 1 for substitution.
2806
 */
2807
2808
int
2809
0
xmlLineNumbersDefault(int val) {
2810
0
    int old = xmlLineNumbersDefaultValue;
2811
2812
0
    xmlLineNumbersDefaultValue = val;
2813
0
    return(old);
2814
0
}
2815
2816
/**
2817
 * xmlSubstituteEntitiesDefault:
2818
 * @val:  int 0 or 1
2819
 *
2820
 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
2821
 *
2822
 * Set and return the previous value for default entity support.
2823
 * Initially the parser always keep entity references instead of substituting
2824
 * entity values in the output. This function has to be used to change the
2825
 * default parser behavior
2826
 * SAX::substituteEntities() has to be used for changing that on a file by
2827
 * file basis.
2828
 *
2829
 * Returns the last value for 0 for no substitution, 1 for substitution.
2830
 */
2831
2832
int
2833
0
xmlSubstituteEntitiesDefault(int val) {
2834
0
    int old = xmlSubstituteEntitiesDefaultValue;
2835
2836
0
    xmlSubstituteEntitiesDefaultValue = val;
2837
0
    return(old);
2838
0
}
2839
2840
/**
2841
 * xmlKeepBlanksDefault:
2842
 * @val:  int 0 or 1
2843
 *
2844
 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
2845
 *
2846
 * Set and return the previous value for default blanks text nodes support.
2847
 * The 1.x version of the parser used an heuristic to try to detect
2848
 * ignorable white spaces. As a result the SAX callback was generating
2849
 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2850
 * using the DOM output text nodes containing those blanks were not generated.
2851
 * The 2.x and later version will switch to the XML standard way and
2852
 * ignorableWhitespace() are only generated when running the parser in
2853
 * validating mode and when the current element doesn't allow CDATA or
2854
 * mixed content.
2855
 * This function is provided as a way to force the standard behavior
2856
 * on 1.X libs and to switch back to the old mode for compatibility when
2857
 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2858
 * by using xmlIsBlankNode() commodity function to detect the "empty"
2859
 * nodes generated.
2860
 * This value also affect autogeneration of indentation when saving code
2861
 * if blanks sections are kept, indentation is not generated.
2862
 *
2863
 * Returns the last value for 0 for no substitution, 1 for substitution.
2864
 */
2865
2866
int
2867
0
xmlKeepBlanksDefault(int val) {
2868
0
    int old = xmlKeepBlanksDefaultValue;
2869
2870
0
    xmlKeepBlanksDefaultValue = val;
2871
0
#ifdef LIBXML_OUTPUT_ENABLED
2872
0
    if (!val)
2873
0
        xmlIndentTreeOutput = 1;
2874
0
#endif
2875
0
    return(old);
2876
0
}
2877