Coverage Report

Created: 2025-08-26 06:41

/src/libxml2/parserInternals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * Author: Daniel Veillard
8
 */
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22
23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/entities.h>
28
#include <libxml/xmlerror.h>
29
#include <libxml/encoding.h>
30
#include <libxml/xmlIO.h>
31
#include <libxml/uri.h>
32
#include <libxml/dict.h>
33
#include <libxml/xmlsave.h>
34
#ifdef LIBXML_CATALOG_ENABLED
35
#include <libxml/catalog.h>
36
#endif
37
#include <libxml/chvalid.h>
38
39
#define CUR(ctxt) ctxt->input->cur
40
#define END(ctxt) ctxt->input->end
41
42
#include "private/buf.h"
43
#include "private/enc.h"
44
#include "private/error.h"
45
#include "private/globals.h"
46
#include "private/io.h"
47
#include "private/memory.h"
48
#include "private/parser.h"
49
50
#ifndef SIZE_MAX
51
  #define SIZE_MAX ((size_t) -1)
52
#endif
53
54
326k
#define XML_MAX_ERRORS 100
55
56
/*
57
 * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
58
 * factor of serialized output after entity expansion.
59
 */
60
0
#define XML_MAX_AMPLIFICATION_DEFAULT 5
61
62
/*
63
 * Various global defaults for parsing
64
 */
65
66
/**
67
 * check the compiled lib version against the include one.
68
 *
69
 * @param version  the include version number
70
 */
71
void
72
0
xmlCheckVersion(int version) {
73
0
    int myversion = LIBXML_VERSION;
74
75
0
    xmlInitParser();
76
77
0
    if ((myversion / 10000) != (version / 10000)) {
78
0
  xmlPrintErrorMessage(
79
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
80
0
    (version / 10000), (myversion / 10000));
81
0
    } else if ((myversion / 100) < (version / 100)) {
82
0
  xmlPrintErrorMessage(
83
0
    "Warning: program compiled against libxml %d using older %d\n",
84
0
    (version / 100), (myversion / 100));
85
0
    }
86
0
}
87
88
89
/************************************************************************
90
 *                  *
91
 *    Some factorized error routines        *
92
 *                  *
93
 ************************************************************************/
94
95
96
/**
97
 * Register a callback function that will be called on errors and
98
 * warnings. If handler is NULL, the error handler will be deactivated.
99
 *
100
 * If you only want to disable parser errors being printed to
101
 * stderr, use xmlParserOption XML_PARSE_NOERROR.
102
 *
103
 * This is the recommended way to collect errors from the parser and
104
 * takes precedence over all other error reporting mechanisms.
105
 * These are (in order of precedence):
106
 *
107
 * - per-context structured handler (#xmlCtxtSetErrorHandler)
108
 * - per-context structured "serror" SAX handler
109
 * - global structured handler (#xmlSetStructuredErrorFunc)
110
 * - per-context generic "error" and "warning" SAX handlers
111
 * - global generic handler (#xmlSetGenericErrorFunc)
112
 * - print to stderr
113
 *
114
 * @since 2.13.0
115
 * @param ctxt  an XML parser context
116
 * @param handler  error handler
117
 * @param data  data for error handler
118
 */
119
void
120
xmlCtxtSetErrorHandler(xmlParserCtxt *ctxt, xmlStructuredErrorFunc handler,
121
                       void *data)
122
19.2k
{
123
19.2k
    if (ctxt == NULL)
124
0
        return;
125
19.2k
    ctxt->errorHandler = handler;
126
19.2k
    ctxt->errorCtxt = data;
127
19.2k
}
128
129
/**
130
 * Get the last error raised.
131
 *
132
 * Note that the XML parser typically doesn't stop after
133
 * encountering an error and will often report multiple errors.
134
 * Most of the time, the last error isn't useful. Future
135
 * versions might return the first parser error instead.
136
 *
137
 * @param ctx  an XML parser context
138
 * @returns NULL if no error occurred or a pointer to the error
139
 */
140
const xmlError *
141
xmlCtxtGetLastError(void *ctx)
142
0
{
143
0
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
144
145
0
    if (ctxt == NULL)
146
0
        return (NULL);
147
0
    if (ctxt->lastError.code == XML_ERR_OK)
148
0
        return (NULL);
149
0
    return (&ctxt->lastError);
150
0
}
151
152
/**
153
 * Reset the last parser error to success. This does not change
154
 * the well-formedness status.
155
 *
156
 * @param ctx  an XML parser context
157
 */
158
void
159
xmlCtxtResetLastError(void *ctx)
160
0
{
161
0
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
162
163
0
    if (ctxt == NULL)
164
0
        return;
165
0
    ctxt->errNo = XML_ERR_OK;
166
0
    if (ctxt->lastError.code == XML_ERR_OK)
167
0
        return;
168
0
    xmlResetError(&ctxt->lastError);
169
0
}
170
171
/**
172
 * Handle an out-of-memory error.
173
 *
174
 * @since 2.13.0
175
 * @param ctxt  an XML parser context
176
 */
177
void
178
xmlCtxtErrMemory(xmlParserCtxt *ctxt)
179
3.31k
{
180
3.31k
    xmlStructuredErrorFunc schannel = NULL;
181
3.31k
    xmlGenericErrorFunc channel = NULL;
182
3.31k
    void *data;
183
184
3.31k
    if (ctxt == NULL) {
185
0
        xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_PARSER, NULL);
186
0
        return;
187
0
    }
188
189
3.31k
    ctxt->errNo = XML_ERR_NO_MEMORY;
190
3.31k
    ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
191
3.31k
    ctxt->wellFormed = 0;
192
3.31k
    ctxt->disableSAX = 2;
193
194
3.31k
    if (ctxt->errorHandler) {
195
3.31k
        schannel = ctxt->errorHandler;
196
3.31k
        data = ctxt->errorCtxt;
197
3.31k
    } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
198
0
        (ctxt->sax->serror != NULL)) {
199
0
        schannel = ctxt->sax->serror;
200
0
        data = ctxt->userData;
201
0
    } else {
202
0
        channel = ctxt->sax->error;
203
0
        data = ctxt->userData;
204
0
    }
205
206
3.31k
    xmlRaiseMemoryError(schannel, channel, data, XML_FROM_PARSER,
207
3.31k
                        &ctxt->lastError);
208
3.31k
}
209
210
/**
211
 * If filename is empty, use the one from context input if available.
212
 *
213
 * Report an IO error to the parser context.
214
 *
215
 * @param ctxt  parser context
216
 * @param code  xmlParserErrors code
217
 * @param uri  filename or URI (optional)
218
 */
219
void
220
xmlCtxtErrIO(xmlParserCtxt *ctxt, int code, const char *uri)
221
580
{
222
580
    const char *errstr, *msg, *str1, *str2;
223
580
    xmlErrorLevel level;
224
225
580
    if (ctxt == NULL)
226
0
        return;
227
228
580
    if (((code == XML_IO_ENOENT) ||
229
580
         (code == XML_IO_UNKNOWN))) {
230
        /*
231
         * Only report a warning if a file could not be found. This should
232
         * only be done for external entities, but the external entity loader
233
         * of xsltproc can try multiple paths and assumes that ENOENT doesn't
234
         * raise an error and aborts parsing.
235
         */
236
0
        if (ctxt->validate == 0)
237
0
            level = XML_ERR_WARNING;
238
0
        else
239
0
            level = XML_ERR_ERROR;
240
580
    } else if (code == XML_IO_NETWORK_ATTEMPT) {
241
0
        level = XML_ERR_ERROR;
242
580
    } else {
243
580
        level = XML_ERR_FATAL;
244
580
    }
245
246
580
    errstr = xmlErrString(code);
247
248
580
    if (uri == NULL) {
249
580
        msg = "%s\n";
250
580
        str1 = errstr;
251
580
        str2 = NULL;
252
580
    } else {
253
0
        msg = "failed to load \"%s\": %s\n";
254
0
        str1 = uri;
255
0
        str2 = errstr;
256
0
    }
257
258
580
    xmlCtxtErr(ctxt, NULL, XML_FROM_IO, code, level,
259
580
               (const xmlChar *) uri, NULL, NULL, 0,
260
580
               msg, str1, str2);
261
580
}
262
263
/**
264
 * @param ctxt  parser context
265
 * @returns true if the last error is catastrophic.
266
 */
267
int
268
165k
xmlCtxtIsCatastrophicError(xmlParserCtxt *ctxt) {
269
165k
    if (ctxt == NULL)
270
0
        return(1);
271
272
165k
    return(xmlIsCatastrophicError(ctxt->lastError.level,
273
165k
                                  ctxt->lastError.code));
274
165k
}
275
276
/**
277
 * Raise a parser error.
278
 *
279
 * @param ctxt  a parser context
280
 * @param node  the current node or NULL
281
 * @param domain  the domain for the error
282
 * @param code  the code for the error
283
 * @param level  the xmlErrorLevel for the error
284
 * @param str1  extra string info
285
 * @param str2  extra string info
286
 * @param str3  extra string info
287
 * @param int1  extra int info
288
 * @param msg  the message to display/transmit
289
 * @param ap  extra parameters for the message display
290
 */
291
void
292
xmlCtxtVErr(xmlParserCtxt *ctxt, xmlNode *node, xmlErrorDomain domain,
293
            xmlParserErrors code, xmlErrorLevel level,
294
            const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
295
            int int1, const char *msg, va_list ap)
296
163k
{
297
163k
    xmlStructuredErrorFunc schannel = NULL;
298
163k
    xmlGenericErrorFunc channel = NULL;
299
163k
    void *data = NULL;
300
163k
    const char *file = NULL;
301
163k
    int line = 0;
302
163k
    int col = 0;
303
163k
    int res;
304
305
163k
    if (code == XML_ERR_NO_MEMORY) {
306
304
        xmlCtxtErrMemory(ctxt);
307
304
        return;
308
304
    }
309
310
163k
    if (ctxt == NULL) {
311
0
        res = xmlVRaiseError(NULL, NULL, NULL, NULL, node, domain, code,
312
0
                             level, NULL, 0, (const char *) str1,
313
0
                             (const char *) str2, (const char *) str3,
314
0
                             int1, 0, msg, ap);
315
0
        if (res < 0)
316
0
            xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_PARSER, NULL);
317
318
0
        return;
319
0
    }
320
321
163k
    if (PARSER_STOPPED(ctxt))
322
227
  return;
323
324
    /* Don't overwrite catastrophic errors */
325
163k
    if (xmlCtxtIsCatastrophicError(ctxt))
326
0
        return;
327
328
163k
    if (level == XML_ERR_WARNING) {
329
0
        if (ctxt->nbWarnings >= XML_MAX_ERRORS)
330
0
            return;
331
0
        ctxt->nbWarnings += 1;
332
163k
    } else {
333
        /*
334
         * By long-standing design, the parser isn't completely
335
         * stopped on well-formedness errors. Only SAX callbacks
336
         * are disabled.
337
         *
338
         * In some situations, we really want to abort as fast
339
         * as possible.
340
         */
341
163k
        if (xmlIsCatastrophicError(level, code) ||
342
163k
            code == XML_ERR_RESOURCE_LIMIT ||
343
163k
            code == XML_ERR_ENTITY_LOOP) {
344
104
            ctxt->disableSAX = 2; /* really stop parser */
345
163k
        } else {
346
            /* Report at least one fatal error. */
347
163k
            if (ctxt->nbErrors >= XML_MAX_ERRORS &&
348
163k
                (level < XML_ERR_FATAL || ctxt->wellFormed == 0))
349
125k
                return;
350
351
37.5k
            if (level == XML_ERR_FATAL && ctxt->recovery == 0)
352
0
                ctxt->disableSAX = 1;
353
37.5k
        }
354
355
37.6k
        if (level == XML_ERR_FATAL)
356
428
            ctxt->wellFormed = 0;
357
37.6k
        ctxt->errNo = code;
358
37.6k
        ctxt->nbErrors += 1;
359
37.6k
    }
360
361
37.6k
    if (((ctxt->options & XML_PARSE_NOERROR) == 0) &&
362
37.6k
        ((level != XML_ERR_WARNING) ||
363
25.5k
         ((ctxt->options & XML_PARSE_NOWARNING) == 0))) {
364
25.5k
        if (ctxt->errorHandler) {
365
25.5k
            schannel = ctxt->errorHandler;
366
25.5k
            data = ctxt->errorCtxt;
367
25.5k
        } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
368
0
            (ctxt->sax->serror != NULL)) {
369
0
            schannel = ctxt->sax->serror;
370
0
            data = ctxt->userData;
371
0
        } else if ((domain == XML_FROM_VALID) || (domain == XML_FROM_DTD)) {
372
0
            if (level == XML_ERR_WARNING)
373
0
                channel = ctxt->vctxt.warning;
374
0
            else
375
0
                channel = ctxt->vctxt.error;
376
0
            data = ctxt->vctxt.userData;
377
0
        } else {
378
0
            if (level == XML_ERR_WARNING)
379
0
                channel = ctxt->sax->warning;
380
0
            else
381
0
                channel = ctxt->sax->error;
382
0
            data = ctxt->userData;
383
0
        }
384
25.5k
    }
385
386
37.6k
    if (ctxt->input != NULL) {
387
37.6k
        xmlParserInputPtr input = ctxt->input;
388
389
37.6k
        if ((input->filename == NULL) &&
390
37.6k
            (ctxt->inputNr > 1)) {
391
0
            input = ctxt->inputTab[ctxt->inputNr - 2];
392
0
        }
393
37.6k
        file = input->filename;
394
37.6k
        line = input->line;
395
37.6k
        col = input->col;
396
37.6k
    }
397
398
37.6k
    res = xmlVRaiseError(schannel, channel, data, ctxt, node, domain, code,
399
37.6k
                         level, file, line, (const char *) str1,
400
37.6k
                         (const char *) str2, (const char *) str3, int1, col,
401
37.6k
                         msg, ap);
402
403
37.6k
    if (res < 0) {
404
259
        xmlCtxtErrMemory(ctxt);
405
259
        return;
406
259
    }
407
37.6k
}
408
409
/**
410
 * Raise a parser error.
411
 *
412
 * @param ctxt  a parser context
413
 * @param node  the current node or NULL
414
 * @param domain  the domain for the error
415
 * @param code  the code for the error
416
 * @param level  the xmlErrorLevel for the error
417
 * @param str1  extra string info
418
 * @param str2  extra string info
419
 * @param str3  extra string info
420
 * @param int1  extra int info
421
 * @param msg  the message to display/transmit
422
 * @param ...  extra parameters for the message display
423
 */
424
void
425
xmlCtxtErr(xmlParserCtxt *ctxt, xmlNode *node, xmlErrorDomain domain,
426
           xmlParserErrors code, xmlErrorLevel level,
427
           const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
428
           int int1, const char *msg, ...)
429
72.5k
{
430
72.5k
    va_list ap;
431
432
72.5k
    va_start(ap, msg);
433
72.5k
    xmlCtxtVErr(ctxt, node, domain, code, level,
434
72.5k
                str1, str2, str3, int1, msg, ap);
435
72.5k
    va_end(ap);
436
72.5k
}
437
438
/**
439
 * Get well-formedness and validation status after parsing. Also
440
 * reports catastrophic errors which are not related to parsing
441
 * like out-of-memory, I/O or other errors.
442
 *
443
 * @since 2.14.0
444
 *
445
 * @param ctxt  an XML parser context
446
 * @returns a bitmask of XML_STATUS_* flags ORed together.
447
 */
448
xmlParserStatus
449
0
xmlCtxtGetStatus(xmlParserCtxt *ctxt) {
450
0
    xmlParserStatus bits = 0;
451
452
0
    if (xmlCtxtIsCatastrophicError(ctxt)) {
453
0
        bits |= XML_STATUS_CATASTROPHIC_ERROR |
454
0
                XML_STATUS_NOT_WELL_FORMED |
455
0
                XML_STATUS_NOT_NS_WELL_FORMED;
456
0
        if ((ctxt != NULL) && (ctxt->validate))
457
0
            bits |= XML_STATUS_DTD_VALIDATION_FAILED;
458
459
0
        return(bits);
460
0
    }
461
462
0
    if (!ctxt->wellFormed)
463
0
        bits |= XML_STATUS_NOT_WELL_FORMED;
464
0
    if (!ctxt->nsWellFormed)
465
0
        bits |= XML_STATUS_NOT_NS_WELL_FORMED;
466
0
    if ((ctxt->validate) && (!ctxt->valid))
467
0
        bits |= XML_STATUS_DTD_VALIDATION_FAILED;
468
469
0
    return(bits);
470
0
}
471
472
/**
473
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
474
 *
475
 * @param ctxt  an XML parser context
476
 * @param code  the error number
477
 * @param info  extra information string
478
 */
479
void
480
xmlFatalErr(xmlParserCtxt *ctxt, xmlParserErrors code, const char *info)
481
3.48k
{
482
3.48k
    const char *errmsg;
483
3.48k
    xmlErrorDomain domain = XML_FROM_PARSER;
484
3.48k
    xmlErrorLevel level = XML_ERR_FATAL;
485
486
3.48k
    errmsg = xmlErrString(code);
487
488
3.48k
    if ((ctxt != NULL) && (ctxt->html)) {
489
3.48k
        domain = XML_FROM_HTML;
490
491
        /* Continue if encoding is unsupported */
492
3.48k
        if (code == XML_ERR_UNSUPPORTED_ENCODING)
493
3.39k
            level = XML_ERR_ERROR;
494
3.48k
    }
495
496
3.48k
    if (info == NULL) {
497
0
        xmlCtxtErr(ctxt, NULL, domain, code, level,
498
0
                   NULL, NULL, NULL, 0, "%s\n", errmsg);
499
3.48k
    } else {
500
3.48k
        xmlCtxtErr(ctxt, NULL, domain, code, level,
501
3.48k
                   (const xmlChar *) info, NULL, NULL, 0,
502
3.48k
                   "%s: %s\n", errmsg, info);
503
3.48k
    }
504
3.48k
}
505
506
/**
507
 * Return window into current parser data.
508
 *
509
 * @param input  parser input
510
 * @param startOut  start of window (output)
511
 * @param sizeInOut  maximum size of window (in)
512
 *                   actual size of window (out)
513
 * @param offsetOut  offset of current position inside
514
 *                   window (out)
515
 */
516
void
517
xmlParserInputGetWindow(xmlParserInput *input, const xmlChar **startOut,
518
0
                        int *sizeInOut, int *offsetOut) {
519
0
    const xmlChar *cur, *base, *start;
520
0
    int n, col;
521
0
    int size = *sizeInOut;
522
523
0
    cur = input->cur;
524
0
    base = input->base;
525
    /* skip backwards over any end-of-lines */
526
0
    while ((cur > base) && ((*(cur) == '\n') || (*(cur) == '\r'))) {
527
0
  cur--;
528
0
    }
529
0
    n = 0;
530
    /* search backwards for beginning-of-line (to max buff size) */
531
0
    while ((n < size) && (cur > base) &&
532
0
     (*cur != '\n') && (*cur != '\r')) {
533
0
        cur--;
534
0
        n++;
535
0
    }
536
0
    if ((n > 0) && ((*cur == '\n') || (*cur == '\r'))) {
537
0
        cur++;
538
0
    } else {
539
        /* skip over continuation bytes */
540
0
        while ((cur < input->cur) && ((*cur & 0xC0) == 0x80))
541
0
            cur++;
542
0
    }
543
    /* calculate the error position in terms of the current position */
544
0
    col = input->cur - cur;
545
    /* search forward for end-of-line (to max buff size) */
546
0
    n = 0;
547
0
    start = cur;
548
    /* copy selected text to our buffer */
549
0
    while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) {
550
0
        int len = input->end - cur;
551
0
        int c = xmlGetUTF8Char(cur, &len);
552
553
0
        if ((c < 0) || (n + len > size))
554
0
            break;
555
0
        cur += len;
556
0
  n += len;
557
0
    }
558
559
    /*
560
     * col can only point to the end of the buffer if
561
     * there's space for a marker.
562
     */
563
0
    if (col >= n)
564
0
        col = n < size ? n : size - 1;
565
566
0
    *startOut = start;
567
0
    *sizeInOut = n;
568
0
    *offsetOut = col;
569
0
}
570
571
/**
572
 * Check whether the character is allowed by the production
573
 *
574
 * @deprecated Internal function, don't use.
575
 *
576
 * ```
577
 * [84] Letter ::= BaseChar | Ideographic
578
 * ```
579
 *
580
 * @param c  an unicode character (int)
581
 * @returns 0 if not, non-zero otherwise
582
 */
583
int
584
0
xmlIsLetter(int c) {
585
0
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
586
0
}
587
588
/************************************************************************
589
 *                  *
590
 *    Input handling functions for progressive parsing  *
591
 *                  *
592
 ************************************************************************/
593
594
/* we need to keep enough input to show errors in context */
595
59.1k
#define LINE_LEN        80
596
597
/**
598
 * @deprecated This function was internal and is deprecated.
599
 *
600
 * @param in  an XML parser input
601
 * @param len  an indicative size for the lookahead
602
 * @returns -1 as this is an error to use it.
603
 */
604
int
605
0
xmlParserInputRead(xmlParserInput *in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
606
0
    return(-1);
607
0
}
608
609
/**
610
 * Grow the input buffer.
611
 *
612
 * @param ctxt  an XML parser context
613
 * @returns the number of bytes read or -1 in case of error.
614
 */
615
int
616
229k
xmlParserGrow(xmlParserCtxt *ctxt) {
617
229k
    xmlParserInputPtr in = ctxt->input;
618
229k
    xmlParserInputBufferPtr buf = in->buf;
619
229k
    size_t curEnd = in->end - in->cur;
620
229k
    size_t curBase = in->cur - in->base;
621
229k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
622
85.7k
                       XML_MAX_HUGE_LENGTH :
623
229k
                       XML_MAX_LOOKUP_LIMIT;
624
229k
    int ret;
625
626
229k
    if (buf == NULL)
627
0
        return(0);
628
    /* Don't grow push parser buffer. */
629
229k
    if (PARSER_PROGRESSIVE(ctxt))
630
9.59k
        return(0);
631
    /* Don't grow memory buffers. */
632
219k
    if ((buf->encoder == NULL) && (buf->readcallback == NULL))
633
0
        return(0);
634
219k
    if (buf->error != 0)
635
1.14k
        return(-1);
636
637
218k
    if (curBase > maxLength) {
638
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
639
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
640
0
  return(-1);
641
0
    }
642
643
218k
    if (curEnd >= INPUT_CHUNK)
644
0
        return(0);
645
646
218k
    ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
647
218k
    xmlBufUpdateInput(buf->buffer, in, curBase);
648
649
218k
    if (ret < 0) {
650
40
        xmlCtxtErrIO(ctxt, buf->error, NULL);
651
40
    }
652
653
218k
    return(ret);
654
218k
}
655
656
/**
657
 * Raises an error with `code` if the input wasn't consumed
658
 * completely.
659
 *
660
 * @param ctxt  parser ctxt
661
 * @param code  error code
662
 */
663
void
664
18.5k
xmlParserCheckEOF(xmlParserCtxt *ctxt, xmlParserErrors code) {
665
18.5k
    xmlParserInputPtr in = ctxt->input;
666
18.5k
    xmlParserInputBufferPtr buf;
667
668
18.5k
    if (ctxt->errNo != XML_ERR_OK)
669
5.61k
        return;
670
671
12.9k
    if (in->cur < in->end) {
672
0
        xmlFatalErr(ctxt, code, NULL);
673
0
        return;
674
0
    }
675
676
12.9k
    buf = in->buf;
677
12.9k
    if ((buf != NULL) && (buf->encoder != NULL)) {
678
5.88k
        size_t curBase = in->cur - in->base;
679
5.88k
        size_t sizeOut = 64;
680
5.88k
        xmlCharEncError ret;
681
682
        /*
683
         * Check for truncated multi-byte sequence
684
         */
685
5.88k
        ret = xmlCharEncInput(buf, &sizeOut, /* flush */ 1);
686
5.88k
        xmlBufUpdateInput(buf->buffer, in, curBase);
687
5.88k
        if (ret != XML_ENC_ERR_SUCCESS) {
688
182
            xmlCtxtErrIO(ctxt, buf->error, NULL);
689
182
            return;
690
182
        }
691
692
        /* Shouldn't happen */
693
5.70k
        if (in->cur < in->end)
694
0
            xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "expected EOF");
695
5.70k
    }
696
12.9k
}
697
698
/**
699
 * This function increase the input for the parser. It tries to
700
 * preserve pointers to the input buffer, and keep already read data
701
 *
702
 * @deprecated Don't use.
703
 *
704
 * @param in  an XML parser input
705
 * @param len  an indicative size for the lookahead
706
 * @returns the amount of char read, or -1 in case of error, 0 indicate the
707
 * end of this entity
708
 */
709
int
710
0
xmlParserInputGrow(xmlParserInput *in, int len) {
711
0
    int ret;
712
0
    size_t indx;
713
714
0
    if ((in == NULL) || (len < 0)) return(-1);
715
0
    if (in->buf == NULL) return(-1);
716
0
    if (in->base == NULL) return(-1);
717
0
    if (in->cur == NULL) return(-1);
718
0
    if (in->buf->buffer == NULL) return(-1);
719
720
    /* Don't grow memory buffers. */
721
0
    if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
722
0
        return(0);
723
724
0
    indx = in->cur - in->base;
725
0
    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
726
0
        return(0);
727
0
    }
728
0
    ret = xmlParserInputBufferGrow(in->buf, len);
729
730
0
    in->base = xmlBufContent(in->buf->buffer);
731
0
    if (in->base == NULL) {
732
0
        in->base = BAD_CAST "";
733
0
        in->cur = in->base;
734
0
        in->end = in->base;
735
0
        return(-1);
736
0
    }
737
0
    in->cur = in->base + indx;
738
0
    in->end = xmlBufEnd(in->buf->buffer);
739
740
0
    return(ret);
741
0
}
742
743
/**
744
 * Shrink the input buffer.
745
 *
746
 * @param ctxt  an XML parser context
747
 */
748
void
749
29.5k
xmlParserShrink(xmlParserCtxt *ctxt) {
750
29.5k
    xmlParserInputPtr in = ctxt->input;
751
29.5k
    xmlParserInputBufferPtr buf = in->buf;
752
29.5k
    size_t used, res;
753
754
29.5k
    if (buf == NULL)
755
0
        return;
756
757
29.5k
    used = in->cur - in->base;
758
759
29.5k
    if (used > LINE_LEN) {
760
29.5k
        res = xmlBufShrink(buf->buffer, used - LINE_LEN);
761
762
29.5k
        if (res > 0) {
763
29.5k
            used -= res;
764
29.5k
            xmlSaturatedAddSizeT(&in->consumed, res);
765
29.5k
        }
766
767
29.5k
        xmlBufUpdateInput(buf->buffer, in, used);
768
29.5k
    }
769
29.5k
}
770
771
/**
772
 * This function removes used input for the parser.
773
 *
774
 * @deprecated Don't use.
775
 *
776
 * @param in  an XML parser input
777
 */
778
void
779
0
xmlParserInputShrink(xmlParserInput *in) {
780
0
    size_t used;
781
0
    size_t ret;
782
783
0
    if (in == NULL) return;
784
0
    if (in->buf == NULL) return;
785
0
    if (in->base == NULL) return;
786
0
    if (in->cur == NULL) return;
787
0
    if (in->buf->buffer == NULL) return;
788
789
0
    used = in->cur - in->base;
790
791
0
    if (used > LINE_LEN) {
792
0
  ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
793
0
  if (ret > 0) {
794
0
            used -= ret;
795
0
            xmlSaturatedAddSizeT(&in->consumed, ret);
796
0
  }
797
798
0
        xmlBufUpdateInput(in->buf->buffer, in, used);
799
0
    }
800
0
}
801
802
/************************************************************************
803
 *                  *
804
 *    UTF8 character input and related functions    *
805
 *                  *
806
 ************************************************************************/
807
808
/**
809
 * Skip to the next char input char.
810
 *
811
 * @deprecated Internal function, do not use.
812
 *
813
 * @param ctxt  the XML parser context
814
 */
815
816
void
817
xmlNextChar(xmlParserCtxt *ctxt)
818
0
{
819
0
    const unsigned char *cur;
820
0
    size_t avail;
821
0
    int c;
822
823
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
824
0
        return;
825
826
0
    avail = ctxt->input->end - ctxt->input->cur;
827
828
0
    if (avail < INPUT_CHUNK) {
829
0
        xmlParserGrow(ctxt);
830
0
        if (ctxt->input->cur >= ctxt->input->end)
831
0
            return;
832
0
        avail = ctxt->input->end - ctxt->input->cur;
833
0
    }
834
835
0
    cur = ctxt->input->cur;
836
0
    c = *cur;
837
838
0
    if (c < 0x80) {
839
0
        if (c == '\n') {
840
0
            ctxt->input->cur++;
841
0
            ctxt->input->line++;
842
0
            ctxt->input->col = 1;
843
0
        } else if (c == '\r') {
844
            /*
845
             *   2.11 End-of-Line Handling
846
             *   the literal two-character sequence "#xD#xA" or a standalone
847
             *   literal #xD, an XML processor must pass to the application
848
             *   the single character #xA.
849
             */
850
0
            ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
851
0
            ctxt->input->line++;
852
0
            ctxt->input->col = 1;
853
0
            return;
854
0
        } else {
855
0
            ctxt->input->cur++;
856
0
            ctxt->input->col++;
857
0
        }
858
0
    } else {
859
0
        ctxt->input->col++;
860
861
0
        if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
862
0
            goto encoding_error;
863
864
0
        if (c < 0xe0) {
865
            /* 2-byte code */
866
0
            if (c < 0xc2)
867
0
                goto encoding_error;
868
0
            ctxt->input->cur += 2;
869
0
        } else {
870
0
            unsigned int val = (c << 8) | cur[1];
871
872
0
            if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
873
0
                goto encoding_error;
874
875
0
            if (c < 0xf0) {
876
                /* 3-byte code */
877
0
                if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
878
0
                    goto encoding_error;
879
0
                ctxt->input->cur += 3;
880
0
            } else {
881
0
                if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
882
0
                    goto encoding_error;
883
884
                /* 4-byte code */
885
0
                if ((val < 0xf090) || (val >= 0xf490))
886
0
                    goto encoding_error;
887
0
                ctxt->input->cur += 4;
888
0
            }
889
0
        }
890
0
    }
891
892
0
    return;
893
894
0
encoding_error:
895
    /* Only report the first error */
896
0
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
897
0
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
898
0
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
899
0
    }
900
0
    ctxt->input->cur++;
901
0
}
902
903
/**
904
 * The current char value, if using UTF-8 this may actually span multiple
905
 * bytes in the input buffer. Implement the end of line normalization:
906
 *
907
 * @deprecated Internal function, do not use.
908
 *
909
 * 2.11 End-of-Line Handling
910
 *
911
 * Wherever an external parsed entity or the literal entity value
912
 * of an internal parsed entity contains either the literal two-character
913
 * sequence "#xD#xA" or a standalone literal \#xD, an XML processor
914
 * must pass to the application the single character \#xA.
915
 * This behavior can conveniently be produced by normalizing all
916
 * line breaks to \#xA on input, before parsing.)
917
 *
918
 * @param ctxt  the XML parser context
919
 * @param len  pointer to the length of the char read
920
 * @returns the current char value and its length
921
 */
922
923
int
924
0
xmlCurrentChar(xmlParserCtxt *ctxt, int *len) {
925
0
    const unsigned char *cur;
926
0
    size_t avail;
927
0
    int c;
928
929
0
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
930
931
0
    avail = ctxt->input->end - ctxt->input->cur;
932
933
0
    if (avail < INPUT_CHUNK) {
934
0
        xmlParserGrow(ctxt);
935
0
        avail = ctxt->input->end - ctxt->input->cur;
936
0
    }
937
938
0
    cur = ctxt->input->cur;
939
0
    c = *cur;
940
941
0
    if (c < 0x80) {
942
  /* 1-byte code */
943
0
        if (c < 0x20) {
944
            /*
945
             *   2.11 End-of-Line Handling
946
             *   the literal two-character sequence "#xD#xA" or a standalone
947
             *   literal #xD, an XML processor must pass to the application
948
             *   the single character #xA.
949
             */
950
0
            if (c == '\r') {
951
                /*
952
                 * TODO: This function shouldn't change the 'cur' pointer
953
                 * as side effect, but the NEXTL macro in parser.c relies
954
                 * on this behavior when incrementing line numbers.
955
                 */
956
0
                if (cur[1] == '\n')
957
0
                    ctxt->input->cur++;
958
0
                *len = 1;
959
0
                c = '\n';
960
0
            } else if (c == 0) {
961
0
                if (ctxt->input->cur >= ctxt->input->end) {
962
0
                    *len = 0;
963
0
                } else {
964
0
                    *len = 1;
965
                    /*
966
                     * TODO: Null bytes should be handled by callers,
967
                     * but this can be tricky.
968
                     */
969
0
                    xmlFatalErr(ctxt, XML_ERR_INVALID_CHAR,
970
0
                            "Char 0x0 out of allowed range\n");
971
0
                }
972
0
            } else {
973
0
                *len = 1;
974
0
            }
975
0
        } else {
976
0
            *len = 1;
977
0
        }
978
979
0
        return(c);
980
0
    } else {
981
0
        int val;
982
983
0
        if (avail < 2)
984
0
            goto incomplete_sequence;
985
0
        if ((cur[1] & 0xc0) != 0x80)
986
0
            goto encoding_error;
987
988
0
        if (c < 0xe0) {
989
            /* 2-byte code */
990
0
            if (c < 0xc2)
991
0
                goto encoding_error;
992
0
            val = (c & 0x1f) << 6;
993
0
            val |= cur[1] & 0x3f;
994
0
            *len = 2;
995
0
        } else {
996
0
            if (avail < 3)
997
0
                goto incomplete_sequence;
998
0
            if ((cur[2] & 0xc0) != 0x80)
999
0
                goto encoding_error;
1000
1001
0
            if (c < 0xf0) {
1002
                /* 3-byte code */
1003
0
                val = (c & 0xf) << 12;
1004
0
                val |= (cur[1] & 0x3f) << 6;
1005
0
                val |= cur[2] & 0x3f;
1006
0
                if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
1007
0
                    goto encoding_error;
1008
0
                *len = 3;
1009
0
            } else {
1010
0
                if (avail < 4)
1011
0
                    goto incomplete_sequence;
1012
0
                if ((cur[3] & 0xc0) != 0x80)
1013
0
                    goto encoding_error;
1014
1015
                /* 4-byte code */
1016
0
                val = (c & 0x0f) << 18;
1017
0
                val |= (cur[1] & 0x3f) << 12;
1018
0
                val |= (cur[2] & 0x3f) << 6;
1019
0
                val |= cur[3] & 0x3f;
1020
0
                if ((val < 0x10000) || (val >= 0x110000))
1021
0
                    goto encoding_error;
1022
0
                *len = 4;
1023
0
            }
1024
0
        }
1025
1026
0
        return(val);
1027
0
    }
1028
1029
0
encoding_error:
1030
    /* Only report the first error */
1031
0
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
1032
0
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
1033
0
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
1034
0
    }
1035
0
    *len = 1;
1036
0
    return(XML_INVALID_CHAR);
1037
1038
0
incomplete_sequence:
1039
    /*
1040
     * An encoding problem may arise from a truncated input buffer
1041
     * splitting a character in the middle. In that case do not raise
1042
     * an error but return 0. This should only happen when push parsing
1043
     * char data.
1044
     */
1045
0
    *len = 0;
1046
0
    return(0);
1047
0
}
1048
1049
/**
1050
 * The current char value, if using UTF-8 this may actually span multiple
1051
 * bytes in the input buffer.
1052
 *
1053
 * @deprecated Internal function, do not use.
1054
 *
1055
 * @param ctxt  the XML parser context
1056
 * @param cur  pointer to the beginning of the char
1057
 * @param len  pointer to the length of the char read
1058
 * @returns the current char value and its length
1059
 */
1060
1061
int
1062
xmlStringCurrentChar(xmlParserCtxt *ctxt ATTRIBUTE_UNUSED,
1063
0
                     const xmlChar *cur, int *len) {
1064
0
    int c;
1065
1066
0
    if ((cur == NULL) || (len == NULL))
1067
0
        return(0);
1068
1069
    /* cur is zero-terminated, so we can lie about its length. */
1070
0
    *len = 4;
1071
0
    c = xmlGetUTF8Char(cur, len);
1072
1073
0
    return((c < 0) ? 0 : c);
1074
0
}
1075
1076
/**
1077
 * append the char value in the array
1078
 *
1079
 * @deprecated Internal function, don't use.
1080
 *
1081
 * @param out  pointer to an array of xmlChar
1082
 * @param val  the char value
1083
 * @returns the number of xmlChar written
1084
 */
1085
int
1086
0
xmlCopyCharMultiByte(xmlChar *out, int val) {
1087
0
    if ((out == NULL) || (val < 0)) return(0);
1088
    /*
1089
     * We are supposed to handle UTF8, check it's valid
1090
     * From rfc2044: encoding of the Unicode values on UTF-8:
1091
     *
1092
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
1093
     * 0000 0000-0000 007F   0xxxxxxx
1094
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1095
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1096
     */
1097
0
    if  (val >= 0x80) {
1098
0
  xmlChar *savedout = out;
1099
0
  int bits;
1100
0
  if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
1101
0
  else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
1102
0
  else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
1103
0
  else {
1104
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1105
0
            xmlAbort("xmlCopyCharMultiByte: codepoint out of range\n");
1106
0
#endif
1107
0
      return(0);
1108
0
  }
1109
0
  for ( ; bits >= 0; bits-= 6)
1110
0
      *out++= ((val >> bits) & 0x3F) | 0x80 ;
1111
0
  return (out - savedout);
1112
0
    }
1113
0
    *out = val;
1114
0
    return 1;
1115
0
}
1116
1117
/**
1118
 * append the char value in the array
1119
 *
1120
 * @deprecated Don't use.
1121
 *
1122
 * @param len  Ignored, compatibility
1123
 * @param out  pointer to an array of xmlChar
1124
 * @param val  the char value
1125
 * @returns the number of xmlChar written
1126
 */
1127
1128
int
1129
0
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
1130
0
    if ((out == NULL) || (val < 0)) return(0);
1131
    /* the len parameter is ignored */
1132
0
    if  (val >= 0x80) {
1133
0
  return(xmlCopyCharMultiByte (out, val));
1134
0
    }
1135
0
    *out = val;
1136
0
    return 1;
1137
0
}
1138
1139
/************************************************************************
1140
 *                  *
1141
 *    Commodity functions to switch encodings     *
1142
 *                  *
1143
 ************************************************************************/
1144
1145
/**
1146
 * Installs a custom implementation to convert between character
1147
 * encodings.
1148
 *
1149
 * This bypasses legacy feature like global encoding handlers or
1150
 * encoding aliases.
1151
 *
1152
 * @since 2.14.0
1153
 * @param ctxt  parser context
1154
 * @param impl  callback
1155
 * @param vctxt  user data
1156
 */
1157
void
1158
xmlCtxtSetCharEncConvImpl(xmlParserCtxt *ctxt, xmlCharEncConvImpl impl,
1159
0
                          void *vctxt) {
1160
0
    if (ctxt == NULL)
1161
0
        return;
1162
1163
0
    ctxt->convImpl = impl;
1164
0
    ctxt->convCtxt = vctxt;
1165
0
}
1166
1167
static xmlParserErrors
1168
0
xmlDetectEBCDIC(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr *hout) {
1169
0
    xmlChar out[200];
1170
0
    xmlParserInputPtr input = ctxt->input;
1171
0
    xmlCharEncodingHandlerPtr handler;
1172
0
    int inlen, outlen, i;
1173
0
    xmlParserErrors code;
1174
0
    xmlCharEncError res;
1175
1176
0
    *hout = NULL;
1177
1178
    /*
1179
     * To detect the EBCDIC code page, we convert the first 200 bytes
1180
     * to IBM037 (EBCDIC-US) and try to find the encoding declaration.
1181
     */
1182
0
    code = xmlCreateCharEncodingHandler("IBM037", XML_ENC_INPUT,
1183
0
            ctxt->convImpl, ctxt->convCtxt, &handler);
1184
0
    if (code != XML_ERR_OK)
1185
0
        return(code);
1186
0
    outlen = sizeof(out) - 1;
1187
0
    inlen = input->end - input->cur;
1188
0
    res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen,
1189
0
                           /* flush */ 0);
1190
    /*
1191
     * Return the EBCDIC handler if decoding failed. The error will
1192
     * be reported later.
1193
     */
1194
0
    if (res < 0)
1195
0
        goto done;
1196
0
    out[outlen] = 0;
1197
1198
0
    for (i = 0; i < outlen; i++) {
1199
0
        if (out[i] == '>')
1200
0
            break;
1201
0
        if ((out[i] == 'e') &&
1202
0
            (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1203
0
            int start, cur, quote;
1204
1205
0
            i += 8;
1206
0
            while (IS_BLANK_CH(out[i]))
1207
0
                i += 1;
1208
0
            if (out[i++] != '=')
1209
0
                break;
1210
0
            while (IS_BLANK_CH(out[i]))
1211
0
                i += 1;
1212
0
            quote = out[i++];
1213
0
            if ((quote != '\'') && (quote != '"'))
1214
0
                break;
1215
0
            start = i;
1216
0
            cur = out[i];
1217
0
            while (((cur >= 'a') && (cur <= 'z')) ||
1218
0
                   ((cur >= 'A') && (cur <= 'Z')) ||
1219
0
                   ((cur >= '0') && (cur <= '9')) ||
1220
0
                   (cur == '.') || (cur == '_') ||
1221
0
                   (cur == '-'))
1222
0
                cur = out[++i];
1223
0
            if (cur != quote)
1224
0
                break;
1225
0
            out[i] = 0;
1226
0
            xmlCharEncCloseFunc(handler);
1227
0
            code = xmlCreateCharEncodingHandler((char *) out + start,
1228
0
                    XML_ENC_INPUT, ctxt->convImpl, ctxt->convCtxt,
1229
0
                    &handler);
1230
0
            if (code != XML_ERR_OK)
1231
0
                return(code);
1232
0
            *hout = handler;
1233
0
            return(XML_ERR_OK);
1234
0
        }
1235
0
    }
1236
1237
0
done:
1238
    /*
1239
     * Encoding handlers are stateful, so we have to recreate them.
1240
     */
1241
0
    xmlCharEncCloseFunc(handler);
1242
0
    code = xmlCreateCharEncodingHandler("IBM037", XML_ENC_INPUT,
1243
0
            ctxt->convImpl, ctxt->convCtxt, &handler);
1244
0
    if (code != XML_ERR_OK)
1245
0
        return(code);
1246
0
    *hout = handler;
1247
0
    return(XML_ERR_OK);
1248
0
}
1249
1250
/**
1251
 * Use encoding specified by enum to decode input data. This overrides
1252
 * the encoding found in the XML declaration.
1253
 *
1254
 * This function can also be used to override the encoding of chunks
1255
 * passed to #xmlParseChunk.
1256
 *
1257
 * @param ctxt  the parser context
1258
 * @param enc  the encoding value (number)
1259
 * @returns 0 in case of success, -1 otherwise
1260
 */
1261
int
1262
xmlSwitchEncoding(xmlParserCtxt *ctxt, xmlCharEncoding enc)
1263
8.13k
{
1264
8.13k
    xmlCharEncodingHandlerPtr handler = NULL;
1265
8.13k
    int ret;
1266
8.13k
    xmlParserErrors code;
1267
1268
8.13k
    if ((ctxt == NULL) || (ctxt->input == NULL))
1269
0
        return(-1);
1270
1271
8.13k
    code = xmlLookupCharEncodingHandler(enc, &handler);
1272
8.13k
    if (code != 0) {
1273
0
        xmlFatalErr(ctxt, code, NULL);
1274
0
        return(-1);
1275
0
    }
1276
1277
8.13k
    ret = xmlSwitchToEncoding(ctxt, handler);
1278
1279
8.13k
    if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1280
0
        ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1281
0
    }
1282
1283
8.13k
    return(ret);
1284
8.13k
}
1285
1286
/**
1287
 * @param ctxt  the parser context
1288
 * @param input  the input strea,
1289
 * @param encoding  the encoding name
1290
 * @returns 0 in case of success, -1 otherwise
1291
 */
1292
static int
1293
xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1294
0
                           const char *encoding) {
1295
0
    xmlCharEncodingHandlerPtr handler;
1296
0
    xmlParserErrors res;
1297
1298
0
    if (encoding == NULL)
1299
0
        return(-1);
1300
1301
0
    res = xmlCreateCharEncodingHandler(encoding, XML_ENC_INPUT,
1302
0
            ctxt->convImpl, ctxt->convCtxt, &handler);
1303
0
    if (res == XML_ERR_UNSUPPORTED_ENCODING) {
1304
0
        xmlWarningMsg(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1305
0
                      "Unsupported encoding: %s\n", BAD_CAST encoding, NULL);
1306
0
        return(-1);
1307
0
    } else if (res != XML_ERR_OK) {
1308
0
        xmlFatalErr(ctxt, res, encoding);
1309
0
        return(-1);
1310
0
    }
1311
1312
0
    res  = xmlInputSetEncodingHandler(input, handler);
1313
0
    if (res != XML_ERR_OK) {
1314
0
        xmlCtxtErrIO(ctxt, res, NULL);
1315
0
        return(-1);
1316
0
    }
1317
1318
0
    return(0);
1319
0
}
1320
1321
/**
1322
 * Use specified encoding to decode input data. This overrides the
1323
 * encoding found in the XML declaration.
1324
 *
1325
 * This function can also be used to override the encoding of chunks
1326
 * passed to #xmlParseChunk.
1327
 *
1328
 * @since 2.13.0
1329
 *
1330
 * @param ctxt  the parser context
1331
 * @param encoding  the encoding name
1332
 * @returns 0 in case of success, -1 otherwise
1333
 */
1334
int
1335
0
xmlSwitchEncodingName(xmlParserCtxt *ctxt, const char *encoding) {
1336
0
    if (ctxt == NULL)
1337
0
        return(-1);
1338
1339
0
    return(xmlSwitchInputEncodingName(ctxt, ctxt->input, encoding));
1340
0
}
1341
1342
/**
1343
 * Use encoding handler to decode input data.
1344
 *
1345
 * Closes the handler on error.
1346
 *
1347
 * @param input  the input stream
1348
 * @param handler  the encoding handler
1349
 * @returns an xmlParserErrors code.
1350
 */
1351
xmlParserErrors
1352
xmlInputSetEncodingHandler(xmlParserInput *input,
1353
8.51k
                           xmlCharEncodingHandler *handler) {
1354
8.51k
    xmlParserInputBufferPtr in;
1355
8.51k
    xmlBufPtr buf;
1356
8.51k
    xmlParserErrors code = XML_ERR_OK;
1357
1358
8.51k
    if ((input == NULL) || (input->buf == NULL)) {
1359
0
        xmlCharEncCloseFunc(handler);
1360
0
  return(XML_ERR_ARGUMENT);
1361
0
    }
1362
8.51k
    in = input->buf;
1363
1364
8.51k
    input->flags |= XML_INPUT_HAS_ENCODING;
1365
1366
    /*
1367
     * UTF-8 requires no encoding handler.
1368
     */
1369
8.51k
    if ((handler != NULL) &&
1370
8.51k
        (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1371
0
        xmlCharEncCloseFunc(handler);
1372
0
        handler = NULL;
1373
0
    }
1374
1375
8.51k
    if (in->encoder == handler)
1376
291
        return(XML_ERR_OK);
1377
1378
8.22k
    if (in->encoder != NULL) {
1379
        /*
1380
         * Switching encodings during parsing is a really bad idea,
1381
         * but Chromium can switch between ISO-8859-1 and UTF-16 before
1382
         * separate calls to xmlParseChunk.
1383
         *
1384
         * TODO: We should check whether the "raw" input buffer is empty and
1385
         * convert the old content using the old encoder.
1386
         */
1387
1388
0
        xmlCharEncCloseFunc(in->encoder);
1389
0
        in->encoder = handler;
1390
0
        return(XML_ERR_OK);
1391
0
    }
1392
1393
8.22k
    buf = xmlBufCreate(XML_IO_BUFFER_SIZE);
1394
8.22k
    if (buf == NULL) {
1395
203
        xmlCharEncCloseFunc(handler);
1396
203
        return(XML_ERR_NO_MEMORY);
1397
203
    }
1398
1399
8.02k
    in->encoder = handler;
1400
8.02k
    in->raw = in->buffer;
1401
8.02k
    in->buffer = buf;
1402
1403
    /*
1404
     * Is there already some content down the pipe to convert ?
1405
     */
1406
8.02k
    if (input->end > input->base) {
1407
8.02k
        size_t processed;
1408
8.02k
        size_t nbchars;
1409
8.02k
        xmlCharEncError res;
1410
1411
        /*
1412
         * Shrink the current input buffer.
1413
         * Move it as the raw buffer and create a new input buffer
1414
         */
1415
8.02k
        processed = input->cur - input->base;
1416
8.02k
        xmlBufShrink(in->raw, processed);
1417
8.02k
        input->consumed += processed;
1418
8.02k
        in->rawconsumed = processed;
1419
1420
        /*
1421
         * If we're push-parsing, we must convert the whole buffer.
1422
         *
1423
         * If we're pull-parsing, we could be parsing from a huge
1424
         * memory buffer which we don't want to convert completely.
1425
         */
1426
8.02k
        if (input->flags & XML_INPUT_PROGRESSIVE)
1427
4.07k
            nbchars = SIZE_MAX;
1428
3.95k
        else
1429
3.95k
            nbchars = 4000 /* MINLEN */;
1430
8.02k
        res = xmlCharEncInput(in, &nbchars, /* flush */ 0);
1431
8.02k
        if (res != XML_ENC_ERR_SUCCESS)
1432
155
            code = in->error;
1433
8.02k
    }
1434
1435
8.02k
    xmlBufResetInput(in->buffer, input);
1436
1437
8.02k
    return(code);
1438
8.22k
}
1439
1440
/**
1441
 * Use encoding handler to decode input data.
1442
 *
1443
 * @deprecated Internal function, don't use.
1444
 *
1445
 * @param ctxt  the parser context, only for error reporting
1446
 * @param input  the input stream
1447
 * @param handler  the encoding handler
1448
 * @returns 0 in case of success, -1 otherwise
1449
 */
1450
int
1451
xmlSwitchInputEncoding(xmlParserCtxt *ctxt, xmlParserInput *input,
1452
0
                       xmlCharEncodingHandler *handler) {
1453
0
    xmlParserErrors code = xmlInputSetEncodingHandler(input, handler);
1454
1455
0
    if (code != XML_ERR_OK) {
1456
0
        xmlCtxtErrIO(ctxt, code, NULL);
1457
0
        return(-1);
1458
0
    }
1459
1460
0
    return(0);
1461
0
}
1462
1463
/**
1464
 * Use encoding handler to decode input data.
1465
 *
1466
 * This function can be used to enforce the encoding of chunks passed
1467
 * to #xmlParseChunk.
1468
 *
1469
 * @param ctxt  the parser context
1470
 * @param handler  the encoding handler
1471
 * @returns 0 in case of success, -1 otherwise
1472
 */
1473
int
1474
xmlSwitchToEncoding(xmlParserCtxt *ctxt, xmlCharEncodingHandler *handler)
1475
8.13k
{
1476
8.13k
    xmlParserErrors code;
1477
1478
8.13k
    if (ctxt == NULL)
1479
0
        return(-1);
1480
1481
8.13k
    code = xmlInputSetEncodingHandler(ctxt->input, handler);
1482
8.13k
    if (code != XML_ERR_OK) {
1483
300
        xmlCtxtErrIO(ctxt, code, NULL);
1484
300
        return(-1);
1485
300
    }
1486
1487
7.83k
    return(0);
1488
8.13k
}
1489
1490
/**
1491
 * Handle optional BOM, detect and switch to encoding.
1492
 *
1493
 * Assumes that there are at least four bytes in the input buffer.
1494
 *
1495
 * @param ctxt  the parser context
1496
 */
1497
void
1498
19.1k
xmlDetectEncoding(xmlParserCtxt *ctxt) {
1499
19.1k
    const xmlChar *in;
1500
19.1k
    xmlCharEncoding enc;
1501
19.1k
    int bomSize;
1502
19.1k
    int autoFlag = 0;
1503
1504
19.1k
    if (xmlParserGrow(ctxt) < 0)
1505
0
        return;
1506
19.1k
    in = ctxt->input->cur;
1507
19.1k
    if (ctxt->input->end - in < 4)
1508
859
        return;
1509
1510
18.3k
    if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1511
        /*
1512
         * If the encoding was already set, only skip the BOM which was
1513
         * possibly decoded to UTF-8.
1514
         */
1515
0
        if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1516
0
            ctxt->input->cur += 3;
1517
0
        }
1518
1519
0
        return;
1520
0
    }
1521
1522
18.3k
    enc = XML_CHAR_ENCODING_NONE;
1523
18.3k
    bomSize = 0;
1524
1525
    /*
1526
     * BOM sniffing and detection of initial bytes of an XML
1527
     * declaration.
1528
     *
1529
     * The HTML5 spec doesn't cover UTF-32 (UCS-4) or EBCDIC.
1530
     */
1531
18.3k
    switch (in[0]) {
1532
546
        case 0x00:
1533
546
            if ((!ctxt->html) &&
1534
546
                (in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1535
0
                enc = XML_CHAR_ENCODING_UCS4BE;
1536
0
                autoFlag = XML_INPUT_AUTO_OTHER;
1537
546
            } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1538
                /*
1539
                 * TODO: The HTML5 spec requires to check that the
1540
                 * next codepoint is an 'x'.
1541
                 */
1542
51
                enc = XML_CHAR_ENCODING_UTF16BE;
1543
51
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1544
51
            }
1545
546
            break;
1546
1547
10.8k
        case 0x3C:
1548
10.8k
            if (in[1] == 0x00) {
1549
82
                if ((!ctxt->html) &&
1550
82
                    (in[2] == 0x00) && (in[3] == 0x00)) {
1551
0
                    enc = XML_CHAR_ENCODING_UCS4LE;
1552
0
                    autoFlag = XML_INPUT_AUTO_OTHER;
1553
82
                } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1554
                    /*
1555
                     * TODO: The HTML5 spec requires to check that the
1556
                     * next codepoint is an 'x'.
1557
                     */
1558
53
                    enc = XML_CHAR_ENCODING_UTF16LE;
1559
53
                    autoFlag = XML_INPUT_AUTO_UTF16LE;
1560
53
                }
1561
82
            }
1562
10.8k
            break;
1563
1564
5
        case 0x4C:
1565
5
      if ((!ctxt->html) &&
1566
5
                (in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1567
0
          enc = XML_CHAR_ENCODING_EBCDIC;
1568
0
                autoFlag = XML_INPUT_AUTO_OTHER;
1569
0
            }
1570
5
            break;
1571
1572
198
        case 0xEF:
1573
198
            if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1574
122
                enc = XML_CHAR_ENCODING_UTF8;
1575
122
                autoFlag = XML_INPUT_AUTO_UTF8;
1576
122
                bomSize = 3;
1577
122
            }
1578
198
            break;
1579
1580
378
        case 0xFE:
1581
378
            if (in[1] == 0xFF) {
1582
333
                enc = XML_CHAR_ENCODING_UTF16BE;
1583
333
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1584
333
                bomSize = 2;
1585
333
            }
1586
378
            break;
1587
1588
512
        case 0xFF:
1589
512
            if (in[1] == 0xFE) {
1590
287
                enc = XML_CHAR_ENCODING_UTF16LE;
1591
287
                autoFlag = XML_INPUT_AUTO_UTF16LE;
1592
287
                bomSize = 2;
1593
287
            }
1594
512
            break;
1595
18.3k
    }
1596
1597
18.3k
    if (bomSize > 0) {
1598
742
        ctxt->input->cur += bomSize;
1599
742
    }
1600
1601
18.3k
    if (enc != XML_CHAR_ENCODING_NONE) {
1602
846
        ctxt->input->flags |= autoFlag;
1603
1604
846
        if (enc == XML_CHAR_ENCODING_EBCDIC) {
1605
0
            xmlCharEncodingHandlerPtr handler;
1606
0
            xmlParserErrors res;
1607
1608
0
            res = xmlDetectEBCDIC(ctxt, &handler);
1609
0
            if (res != XML_ERR_OK) {
1610
0
                xmlFatalErr(ctxt, res, "detecting EBCDIC\n");
1611
0
            } else {
1612
0
                xmlSwitchToEncoding(ctxt, handler);
1613
0
            }
1614
846
        } else {
1615
846
            xmlSwitchEncoding(ctxt, enc);
1616
846
        }
1617
846
    }
1618
18.3k
}
1619
1620
/**
1621
 * Set the encoding from a declaration in the document.
1622
 *
1623
 * If no encoding was set yet, switch the encoding. Otherwise, only warn
1624
 * about encoding mismatches.
1625
 *
1626
 * Takes ownership of 'encoding'.
1627
 *
1628
 * @param ctxt  the parser context
1629
 * @param encoding  declared encoding
1630
 */
1631
void
1632
1.87k
xmlSetDeclaredEncoding(xmlParserCtxt *ctxt, xmlChar *encoding) {
1633
1.87k
    if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1634
1.87k
        ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1635
404
        xmlCharEncodingHandlerPtr handler;
1636
404
        xmlParserErrors res;
1637
404
        xmlCharEncFlags flags = XML_ENC_INPUT;
1638
1639
        /*
1640
         * xmlSwitchEncodingName treats unsupported encodings as
1641
         * warnings, but we want it to be an error in an encoding
1642
         * declaration.
1643
         */
1644
404
        if (ctxt->html)
1645
404
            flags |= XML_ENC_HTML;
1646
404
        res = xmlCreateCharEncodingHandler((const char *) encoding,
1647
404
                flags, ctxt->convImpl, ctxt->convCtxt, &handler);
1648
404
        if (res != XML_ERR_OK) {
1649
24
            xmlFatalErr(ctxt, res, (const char *) encoding);
1650
24
            xmlFree(encoding);
1651
24
            return;
1652
24
        }
1653
1654
380
        res  = xmlInputSetEncodingHandler(ctxt->input, handler);
1655
380
        if (res != XML_ERR_OK) {
1656
58
            xmlCtxtErrIO(ctxt, res, NULL);
1657
58
            xmlFree(encoding);
1658
58
            return;
1659
58
        }
1660
1661
322
        ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1662
1.47k
    } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1663
0
        static const char *allowedUTF8[] = {
1664
0
            "UTF-8", "UTF8", NULL
1665
0
        };
1666
0
        static const char *allowedUTF16LE[] = {
1667
0
            "UTF-16", "UTF-16LE", "UTF16", NULL
1668
0
        };
1669
0
        static const char *allowedUTF16BE[] = {
1670
0
            "UTF-16", "UTF-16BE", "UTF16", NULL
1671
0
        };
1672
0
        const char **allowed = NULL;
1673
0
        const char *autoEnc = NULL;
1674
1675
0
        switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1676
0
            case XML_INPUT_AUTO_UTF8:
1677
0
                allowed = allowedUTF8;
1678
0
                autoEnc = "UTF-8";
1679
0
                break;
1680
0
            case XML_INPUT_AUTO_UTF16LE:
1681
0
                allowed = allowedUTF16LE;
1682
0
                autoEnc = "UTF-16LE";
1683
0
                break;
1684
0
            case XML_INPUT_AUTO_UTF16BE:
1685
0
                allowed = allowedUTF16BE;
1686
0
                autoEnc = "UTF-16BE";
1687
0
                break;
1688
0
        }
1689
1690
0
        if (allowed != NULL) {
1691
0
            const char **p;
1692
0
            int match = 0;
1693
1694
0
            for (p = allowed; *p != NULL; p++) {
1695
0
                if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1696
0
                    match = 1;
1697
0
                    break;
1698
0
                }
1699
0
            }
1700
1701
0
            if (match == 0) {
1702
0
                xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1703
0
                              "Encoding '%s' doesn't match "
1704
0
                              "auto-detected '%s'\n",
1705
0
                              encoding, BAD_CAST autoEnc);
1706
0
                xmlFree(encoding);
1707
0
                encoding = xmlStrdup(BAD_CAST autoEnc);
1708
0
                if (encoding == NULL)
1709
0
                    xmlCtxtErrMemory(ctxt);
1710
0
            }
1711
0
        }
1712
0
    }
1713
1714
1.79k
    if (ctxt->encoding != NULL)
1715
1.33k
        xmlFree(ctxt->encoding);
1716
1.79k
    ctxt->encoding = encoding;
1717
1.79k
}
1718
1719
/**
1720
 * @since 2.14.0
1721
 *
1722
 * @param ctxt  parser context
1723
 * @returns the encoding from the encoding declaration. This can differ
1724
 * from the actual encoding.
1725
 */
1726
const xmlChar *
1727
0
xmlCtxtGetDeclaredEncoding(xmlParserCtxt *ctxt) {
1728
0
    if (ctxt == NULL)
1729
0
        return(NULL);
1730
1731
0
    return(ctxt->encoding);
1732
0
}
1733
1734
/**
1735
 * @param ctxt  the parser context
1736
 * @returns the actual used to parse the document. This can differ from
1737
 * the declared encoding.
1738
 */
1739
const xmlChar *
1740
18.3k
xmlGetActualEncoding(xmlParserCtxt *ctxt) {
1741
18.3k
    const xmlChar *encoding = NULL;
1742
1743
18.3k
    if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
1744
18.3k
        (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
1745
        /* Preserve encoding exactly */
1746
979
        encoding = ctxt->encoding;
1747
17.3k
    } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
1748
6.90k
        encoding = BAD_CAST ctxt->input->buf->encoder->name;
1749
10.4k
    } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1750
286
        encoding = BAD_CAST "UTF-8";
1751
286
    }
1752
1753
18.3k
    return(encoding);
1754
18.3k
}
1755
1756
/************************************************************************
1757
 *                  *
1758
 *  Commodity functions to handle entities processing   *
1759
 *                  *
1760
 ************************************************************************/
1761
1762
/**
1763
 * Free up an input stream.
1764
 *
1765
 * @param input  an xmlParserInput
1766
 */
1767
void
1768
19.1k
xmlFreeInputStream(xmlParserInput *input) {
1769
19.1k
    if (input == NULL) return;
1770
1771
19.1k
    if (input->filename != NULL) xmlFree((char *) input->filename);
1772
19.1k
    if (input->version != NULL) xmlFree((char *) input->version);
1773
19.1k
    if ((input->free != NULL) && (input->base != NULL))
1774
0
        input->free((xmlChar *) input->base);
1775
19.1k
    if (input->buf != NULL)
1776
19.1k
        xmlFreeParserInputBuffer(input->buf);
1777
19.1k
    xmlFree(input);
1778
19.1k
}
1779
1780
/**
1781
 * Create a new input stream structure.
1782
 *
1783
 * @deprecated Use #xmlNewInputFromUrl or similar functions.
1784
 *
1785
 * @param ctxt  an XML parser context
1786
 * @returns the new input stream or NULL
1787
 */
1788
xmlParserInput *
1789
0
xmlNewInputStream(xmlParserCtxt *ctxt) {
1790
0
    xmlParserInputPtr input;
1791
1792
0
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1793
0
    if (input == NULL) {
1794
0
        xmlCtxtErrMemory(ctxt);
1795
0
  return(NULL);
1796
0
    }
1797
0
    memset(input, 0, sizeof(xmlParserInput));
1798
0
    input->line = 1;
1799
0
    input->col = 1;
1800
1801
0
    return(input);
1802
0
}
1803
1804
/**
1805
 * Creates a new parser input from the filesystem, the network or
1806
 * a user-defined resource loader.
1807
 *
1808
 * @param ctxt  parser context
1809
 * @param url  filename or URL
1810
 * @param publicId  publid ID from doctype (optional)
1811
 * @param encoding  character encoding (optional)
1812
 * @param flags  unused, pass 0
1813
 * @returns a new parser input.
1814
 */
1815
xmlParserInput *
1816
xmlCtxtNewInputFromUrl(xmlParserCtxt *ctxt, const char *url,
1817
                       const char *publicId, const char *encoding,
1818
0
                       xmlParserInputFlags flags ATTRIBUTE_UNUSED) {
1819
0
    xmlParserInputPtr input;
1820
1821
0
    if ((ctxt == NULL) || (url == NULL))
1822
0
  return(NULL);
1823
1824
0
    input = xmlLoadResource(ctxt, url, publicId, XML_RESOURCE_MAIN_DOCUMENT);
1825
0
    if (input == NULL)
1826
0
        return(NULL);
1827
1828
0
    if (encoding != NULL)
1829
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1830
1831
0
    return(input);
1832
0
}
1833
1834
/**
1835
 * Internal helper function.
1836
 *
1837
 * @param buf  parser input buffer
1838
 * @param filename  filename or URL
1839
 * @returns a new parser input.
1840
 */
1841
static xmlParserInputPtr
1842
19.1k
xmlNewInputInternal(xmlParserInputBufferPtr buf, const char *filename) {
1843
19.1k
    xmlParserInputPtr input;
1844
1845
19.1k
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1846
19.1k
    if (input == NULL) {
1847
26
  xmlFreeParserInputBuffer(buf);
1848
26
  return(NULL);
1849
26
    }
1850
19.1k
    memset(input, 0, sizeof(xmlParserInput));
1851
19.1k
    input->line = 1;
1852
19.1k
    input->col = 1;
1853
1854
19.1k
    input->buf = buf;
1855
19.1k
    xmlBufResetInput(input->buf->buffer, input);
1856
1857
19.1k
    if (filename != NULL) {
1858
0
        input->filename = xmlMemStrdup(filename);
1859
0
        if (input->filename == NULL) {
1860
0
            xmlFreeInputStream(input);
1861
0
            return(NULL);
1862
0
        }
1863
0
    }
1864
1865
19.1k
    return(input);
1866
19.1k
}
1867
1868
/**
1869
 * Creates a new parser input to read from a memory area.
1870
 *
1871
 * `url` is used as base to resolve external entities and for
1872
 * error reporting.
1873
 *
1874
 * If the XML_INPUT_BUF_STATIC flag is set, the memory area must
1875
 * stay unchanged until parsing has finished. This can avoid
1876
 * temporary copies.
1877
 *
1878
 * If the XML_INPUT_BUF_ZERO_TERMINATED flag is set, the memory
1879
 * area must contain a zero byte after the buffer at position `size`.
1880
 * This can avoid temporary copies.
1881
 *
1882
 * @since 2.14.0
1883
 *
1884
 * @param url  base URL (optional)
1885
 * @param mem  pointer to char array
1886
 * @param size  size of array
1887
 * @param flags  optimization hints
1888
 * @returns a new parser input or NULL if a memory allocation failed.
1889
 */
1890
xmlParserInput *
1891
xmlNewInputFromMemory(const char *url, const void *mem, size_t size,
1892
9.60k
                      xmlParserInputFlags flags) {
1893
9.60k
    xmlParserInputBufferPtr buf;
1894
1895
9.60k
    if (mem == NULL)
1896
0
  return(NULL);
1897
1898
9.60k
    buf = xmlNewInputBufferMemory(mem, size, flags, XML_CHAR_ENCODING_NONE);
1899
9.60k
    if (buf == NULL)
1900
5
        return(NULL);
1901
1902
9.59k
    return(xmlNewInputInternal(buf, url));
1903
9.60k
}
1904
1905
/**
1906
 * @param ctxt  parser context
1907
 * @param url  base URL (optional)
1908
 * @param mem  pointer to char array
1909
 * @param size  size of array
1910
 * @param encoding  character encoding (optional)
1911
 * @param flags  optimization hints
1912
 * @returns a new parser input or NULL in case of error.
1913
 */
1914
xmlParserInput *
1915
xmlCtxtNewInputFromMemory(xmlParserCtxt *ctxt, const char *url,
1916
                          const void *mem, size_t size,
1917
9.60k
                          const char *encoding, xmlParserInputFlags flags) {
1918
9.60k
    xmlParserInputPtr input;
1919
1920
9.60k
    if ((ctxt == NULL) || (mem == NULL))
1921
0
  return(NULL);
1922
1923
9.60k
    input = xmlNewInputFromMemory(url, mem, size, flags);
1924
9.60k
    if (input == NULL) {
1925
30
        xmlCtxtErrMemory(ctxt);
1926
30
        return(NULL);
1927
30
    }
1928
1929
9.57k
    if (encoding != NULL)
1930
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1931
1932
9.57k
    return(input);
1933
9.60k
}
1934
1935
/**
1936
 * Creates a new parser input to read from a zero-terminated string.
1937
 *
1938
 * `url` is used as base to resolve external entities and for
1939
 * error reporting.
1940
 *
1941
 * If the XML_INPUT_BUF_STATIC flag is set, the string must
1942
 * stay unchanged until parsing has finished. This can avoid
1943
 * temporary copies.
1944
 *
1945
 * @since 2.14.0
1946
 *
1947
 * @param url  base URL (optional)
1948
 * @param str  zero-terminated string
1949
 * @param flags  optimization hints
1950
 * @returns a new parser input or NULL if a memory allocation failed.
1951
 */
1952
xmlParserInput *
1953
xmlNewInputFromString(const char *url, const char *str,
1954
0
                      xmlParserInputFlags flags) {
1955
0
    xmlParserInputBufferPtr buf;
1956
1957
0
    if (str == NULL)
1958
0
  return(NULL);
1959
1960
0
    buf = xmlNewInputBufferString(str, flags);
1961
0
    if (buf == NULL)
1962
0
        return(NULL);
1963
1964
0
    return(xmlNewInputInternal(buf, url));
1965
0
}
1966
1967
/**
1968
 * @param ctxt  parser context
1969
 * @param url  base URL (optional)
1970
 * @param str  zero-terminated string
1971
 * @param encoding  character encoding (optional)
1972
 * @param flags  optimization hints
1973
 * @returns a new parser input.
1974
 */
1975
xmlParserInput *
1976
xmlCtxtNewInputFromString(xmlParserCtxt *ctxt, const char *url,
1977
                          const char *str, const char *encoding,
1978
0
                          xmlParserInputFlags flags) {
1979
0
    xmlParserInputPtr input;
1980
1981
0
    if ((ctxt == NULL) || (str == NULL))
1982
0
  return(NULL);
1983
1984
0
    input = xmlNewInputFromString(url, str, flags);
1985
0
    if (input == NULL) {
1986
0
        xmlCtxtErrMemory(ctxt);
1987
0
        return(NULL);
1988
0
    }
1989
1990
0
    if (encoding != NULL)
1991
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1992
1993
0
    return(input);
1994
0
}
1995
1996
/**
1997
 * Creates a new parser input to read from a file descriptor.
1998
 *
1999
 * `url` is used as base to resolve external entities and for
2000
 * error reporting.
2001
 *
2002
 * `fd` is closed after parsing has finished.
2003
 *
2004
 * Supported `flags` are XML_INPUT_UNZIP to decompress data
2005
 * automatically. This feature is deprecated and will be removed
2006
 * in a future release.
2007
 *
2008
 * @since 2.14.0
2009
 *
2010
 * @param url  base URL (optional)
2011
 * @param fd  file descriptor
2012
 * @param flags  input flags
2013
 * @returns a new parser input or NULL if a memory allocation failed.
2014
 */
2015
xmlParserInput *
2016
0
xmlNewInputFromFd(const char *url, int fd, xmlParserInputFlags flags) {
2017
0
    xmlParserInputBufferPtr buf;
2018
2019
0
    if (fd < 0)
2020
0
  return(NULL);
2021
2022
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2023
0
    if (buf == NULL)
2024
0
        return(NULL);
2025
2026
0
    if (xmlInputFromFd(buf, fd, flags) != XML_ERR_OK) {
2027
0
        xmlFreeParserInputBuffer(buf);
2028
0
        return(NULL);
2029
0
    }
2030
2031
0
    return(xmlNewInputInternal(buf, url));
2032
0
}
2033
2034
/**
2035
 * @param ctxt  parser context
2036
 * @param url  base URL (optional)
2037
 * @param fd  file descriptor
2038
 * @param encoding  character encoding (optional)
2039
 * @param flags  unused, pass 0
2040
 * @returns a new parser input.
2041
 */
2042
xmlParserInput *
2043
xmlCtxtNewInputFromFd(xmlParserCtxt *ctxt, const char *url,
2044
                      int fd, const char *encoding,
2045
0
                      xmlParserInputFlags flags) {
2046
0
    xmlParserInputPtr input;
2047
2048
0
    if ((ctxt == NULL) || (fd < 0))
2049
0
  return(NULL);
2050
2051
0
    if (ctxt->options & XML_PARSE_UNZIP)
2052
0
        flags |= XML_INPUT_UNZIP;
2053
2054
0
    input = xmlNewInputFromFd(url, fd, flags);
2055
0
    if (input == NULL) {
2056
0
  xmlCtxtErrMemory(ctxt);
2057
0
        return(NULL);
2058
0
    }
2059
2060
0
    if (encoding != NULL)
2061
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2062
2063
0
    return(input);
2064
0
}
2065
2066
/**
2067
 * Creates a new parser input to read from input callbacks and
2068
 * context.
2069
 *
2070
 * `url` is used as base to resolve external entities and for
2071
 * error reporting.
2072
 *
2073
 * `ioRead` is called to read new data into a provided buffer.
2074
 * It must return the number of bytes written into the buffer
2075
 * ot a negative xmlParserErrors code on failure.
2076
 *
2077
 * `ioClose` is called after parsing has finished.
2078
 *
2079
 * `ioCtxt` is an opaque pointer passed to the callbacks.
2080
 *
2081
 * @since 2.14.0
2082
 *
2083
 * @param url  base URL (optional)
2084
 * @param ioRead  read callback
2085
 * @param ioClose  close callback (optional)
2086
 * @param ioCtxt  IO context
2087
 * @param flags  unused, pass 0
2088
 * @returns a new parser input or NULL if a memory allocation failed.
2089
 */
2090
xmlParserInput *
2091
xmlNewInputFromIO(const char *url, xmlInputReadCallback ioRead,
2092
                  xmlInputCloseCallback ioClose, void *ioCtxt,
2093
0
                  xmlParserInputFlags flags ATTRIBUTE_UNUSED) {
2094
0
    xmlParserInputBufferPtr buf;
2095
2096
0
    if (ioRead == NULL)
2097
0
  return(NULL);
2098
2099
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2100
0
    if (buf == NULL) {
2101
0
        if (ioClose != NULL)
2102
0
            ioClose(ioCtxt);
2103
0
        return(NULL);
2104
0
    }
2105
2106
0
    buf->context = ioCtxt;
2107
0
    buf->readcallback = ioRead;
2108
0
    buf->closecallback = ioClose;
2109
2110
0
    return(xmlNewInputInternal(buf, url));
2111
0
}
2112
2113
/**
2114
 * @param ctxt  parser context
2115
 * @param url  base URL (optional)
2116
 * @param ioRead  read callback
2117
 * @param ioClose  close callback (optional)
2118
 * @param ioCtxt  IO context
2119
 * @param encoding  character encoding (optional)
2120
 * @param flags  unused, pass 0
2121
 * @returns a new parser input.
2122
 */
2123
xmlParserInput *
2124
xmlCtxtNewInputFromIO(xmlParserCtxt *ctxt, const char *url,
2125
                      xmlInputReadCallback ioRead,
2126
                      xmlInputCloseCallback ioClose,
2127
                      void *ioCtxt, const char *encoding,
2128
0
                      xmlParserInputFlags flags) {
2129
0
    xmlParserInputPtr input;
2130
2131
0
    if ((ctxt == NULL) || (ioRead == NULL))
2132
0
  return(NULL);
2133
2134
0
    input = xmlNewInputFromIO(url, ioRead, ioClose, ioCtxt, flags);
2135
0
    if (input == NULL) {
2136
0
        xmlCtxtErrMemory(ctxt);
2137
0
        return(NULL);
2138
0
    }
2139
2140
0
    if (encoding != NULL)
2141
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2142
2143
0
    return(input);
2144
0
}
2145
2146
/**
2147
 * Creates a new parser input for a push parser.
2148
 *
2149
 * @param url  base URL (optional)
2150
 * @param chunk  pointer to char array
2151
 * @param size  size of array
2152
 * @returns a new parser input or NULL if a memory allocation failed.
2153
 */
2154
xmlParserInput *
2155
9.60k
xmlNewPushInput(const char *url, const char *chunk, int size) {
2156
9.60k
    xmlParserInputBufferPtr buf;
2157
9.60k
    xmlParserInputPtr input;
2158
2159
9.60k
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2160
9.60k
    if (buf == NULL)
2161
4
        return(NULL);
2162
2163
9.59k
    input = xmlNewInputInternal(buf, url);
2164
9.59k
    if (input == NULL)
2165
1
  return(NULL);
2166
2167
9.59k
    input->flags |= XML_INPUT_PROGRESSIVE;
2168
2169
9.59k
    if ((size > 0) && (chunk != NULL)) {
2170
0
        int res;
2171
2172
0
  res = xmlParserInputBufferPush(input->buf, size, chunk);
2173
0
        xmlBufResetInput(input->buf->buffer, input);
2174
0
        if (res < 0) {
2175
0
            xmlFreeInputStream(input);
2176
0
            return(NULL);
2177
0
        }
2178
0
    }
2179
2180
9.59k
    return(input);
2181
9.59k
}
2182
2183
/**
2184
 * Create a new input stream structure encapsulating the `input` into
2185
 * a stream suitable for the parser.
2186
 *
2187
 * @param ctxt  an XML parser context
2188
 * @param buf  an input buffer
2189
 * @param enc  the charset encoding if known
2190
 * @returns the new input stream or NULL
2191
 */
2192
xmlParserInput *
2193
xmlNewIOInputStream(xmlParserCtxt *ctxt, xmlParserInputBuffer *buf,
2194
0
              xmlCharEncoding enc) {
2195
0
    xmlParserInputPtr input;
2196
0
    const char *encoding;
2197
2198
0
    if ((ctxt == NULL) || (buf == NULL))
2199
0
        return(NULL);
2200
2201
0
    input = xmlNewInputInternal(buf, NULL);
2202
0
    if (input == NULL) {
2203
0
        xmlCtxtErrMemory(ctxt);
2204
0
  return(NULL);
2205
0
    }
2206
2207
0
    encoding = xmlGetCharEncodingName(enc);
2208
0
    if (encoding != NULL)
2209
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2210
2211
0
    return(input);
2212
0
}
2213
2214
/**
2215
 * Create a new input stream based on an xmlEntity
2216
 *
2217
 * @deprecated Internal function, do not use.
2218
 *
2219
 * @param ctxt  an XML parser context
2220
 * @param ent  an Entity pointer
2221
 * @returns the new input stream or NULL
2222
 */
2223
xmlParserInput *
2224
0
xmlNewEntityInputStream(xmlParserCtxt *ctxt, xmlEntity *ent) {
2225
0
    xmlParserInputPtr input;
2226
2227
0
    if ((ctxt == NULL) || (ent == NULL))
2228
0
  return(NULL);
2229
2230
0
    if (ent->content != NULL) {
2231
0
        input = xmlCtxtNewInputFromString(ctxt, NULL,
2232
0
                (const char *) ent->content, NULL, XML_INPUT_BUF_STATIC);
2233
0
    } else if (ent->URI != NULL) {
2234
0
        xmlResourceType rtype;
2235
2236
0
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY)
2237
0
            rtype = XML_RESOURCE_PARAMETER_ENTITY;
2238
0
        else
2239
0
            rtype = XML_RESOURCE_GENERAL_ENTITY;
2240
2241
0
        input = xmlLoadResource(ctxt, (char *) ent->URI,
2242
0
                                (char *) ent->ExternalID, rtype);
2243
0
    } else {
2244
0
        return(NULL);
2245
0
    }
2246
2247
0
    if (input == NULL)
2248
0
        return(NULL);
2249
2250
0
    input->entity = ent;
2251
2252
0
    return(input);
2253
0
}
2254
2255
/**
2256
 * Create a new input stream based on a memory buffer.
2257
 *
2258
 * @deprecated Use #xmlNewInputFromString.
2259
 *
2260
 * @param ctxt  an XML parser context
2261
 * @param buffer  an memory buffer
2262
 * @returns the new input stream
2263
 */
2264
xmlParserInput *
2265
0
xmlNewStringInputStream(xmlParserCtxt *ctxt, const xmlChar *buffer) {
2266
0
    return(xmlCtxtNewInputFromString(ctxt, NULL, (const char *) buffer,
2267
0
                                     NULL, 0));
2268
0
}
2269
2270
2271
/****************************************************************
2272
 *                *
2273
 *    External entities loading     *
2274
 *                *
2275
 ****************************************************************/
2276
2277
#ifdef LIBXML_CATALOG_ENABLED
2278
2279
/**
2280
 * Resolves an external ID or URL against the appropriate catalog.
2281
 *
2282
 * @param url  the URL or system ID for the entity to load
2283
 * @param publicId  the public ID for the entity to load (optional)
2284
 * @param localCatalogs  local catalogs (optional)
2285
 * @param allowGlobal  allow global system catalog
2286
 * @param out  resulting resource or NULL
2287
 * @returns an xmlParserErrors code
2288
 */
2289
static xmlParserErrors
2290
xmlResolveFromCatalog(const char *url, const char *publicId,
2291
0
                      void *localCatalogs, int allowGlobal, char **out) {
2292
0
    xmlError oldError;
2293
0
    xmlError *lastError;
2294
0
    char *resource = NULL;
2295
0
    xmlParserErrors code;
2296
2297
0
    if (out == NULL)
2298
0
        return(XML_ERR_ARGUMENT);
2299
0
    *out = NULL;
2300
0
    if ((localCatalogs == NULL) && (!allowGlobal))
2301
0
        return(XML_ERR_OK);
2302
2303
    /*
2304
     * Don't try to resolve if local file exists.
2305
     *
2306
     * TODO: This is somewhat non-deterministic.
2307
     */
2308
0
    if (xmlNoNetExists(url))
2309
0
        return(XML_ERR_OK);
2310
2311
    /* Backup and reset last error */
2312
0
    lastError = xmlGetLastErrorInternal();
2313
0
    oldError = *lastError;
2314
0
    lastError->code = XML_ERR_OK;
2315
2316
    /*
2317
     * Do a local lookup
2318
     */
2319
0
    if (localCatalogs != NULL) {
2320
0
        resource = (char *) xmlCatalogLocalResolve(localCatalogs,
2321
0
                                                   BAD_CAST publicId,
2322
0
                                                   BAD_CAST url);
2323
0
    }
2324
    /*
2325
     * Try a global lookup
2326
     */
2327
0
    if ((resource == NULL) && (allowGlobal)) {
2328
0
        resource = (char *) xmlCatalogResolve(BAD_CAST publicId,
2329
0
                                              BAD_CAST url);
2330
0
    }
2331
2332
    /*
2333
     * Try to resolve url using URI rules.
2334
     *
2335
     * TODO: We should consider using only a single resolution
2336
     * mechanism depending on resource type. Either by external ID
2337
     * or by URI.
2338
     */
2339
0
    if ((resource == NULL) && (url != NULL)) {
2340
0
        if (localCatalogs != NULL) {
2341
0
            resource = (char *) xmlCatalogLocalResolveURI(localCatalogs,
2342
0
                                                          BAD_CAST url);
2343
0
        }
2344
0
        if ((resource == NULL) && (allowGlobal)) {
2345
0
            resource = (char *) xmlCatalogResolveURI(BAD_CAST url);
2346
0
        }
2347
0
    }
2348
2349
0
    code = lastError->code;
2350
0
    if (code == XML_ERR_OK) {
2351
0
        *out = resource;
2352
0
    } else {
2353
0
        xmlFree(resource);
2354
0
    }
2355
2356
0
    *lastError = oldError;
2357
2358
0
    return(code);
2359
0
}
2360
2361
static char *
2362
xmlCtxtResolveFromCatalog(xmlParserCtxtPtr ctxt, const char *url,
2363
0
                          const char *publicId) {
2364
0
    char *resource;
2365
0
    void *localCatalogs = NULL;
2366
0
    int allowGlobal = 1;
2367
0
    xmlParserErrors code;
2368
2369
0
    if (ctxt != NULL) {
2370
        /*
2371
         * Loading of HTML documents shouldn't use XML catalogs.
2372
         */
2373
0
        if (ctxt->html)
2374
0
            return(NULL);
2375
2376
0
        localCatalogs = ctxt->catalogs;
2377
2378
0
        if (ctxt->options & XML_PARSE_NO_SYS_CATALOG)
2379
0
            allowGlobal = 0;
2380
0
    }
2381
2382
0
    switch (xmlCatalogGetDefaults()) {
2383
0
        case XML_CATA_ALLOW_NONE:
2384
0
            return(NULL);
2385
0
        case XML_CATA_ALLOW_DOCUMENT:
2386
0
            allowGlobal = 0;
2387
0
            break;
2388
0
        case XML_CATA_ALLOW_GLOBAL:
2389
0
            localCatalogs = NULL;
2390
0
            break;
2391
0
        case XML_CATA_ALLOW_ALL:
2392
0
            break;
2393
0
    }
2394
2395
0
    code = xmlResolveFromCatalog(url, publicId, localCatalogs,
2396
0
                                 allowGlobal, &resource);
2397
0
    if (code != XML_ERR_OK)
2398
0
        xmlCtxtErr(ctxt, NULL, XML_FROM_CATALOG, code, XML_ERR_ERROR,
2399
0
                   BAD_CAST url, BAD_CAST publicId, NULL, 0,
2400
0
                   "%s\n", xmlErrString(code), NULL);
2401
2402
0
    return(resource);
2403
0
}
2404
2405
#endif
2406
2407
/**
2408
 * @deprecated Internal function, don't use.
2409
 *
2410
 * @param ctxt  an XML parser context
2411
 * @param ret  an XML parser input
2412
 * @returns NULL.
2413
 */
2414
xmlParserInput *
2415
xmlCheckHTTPInput(xmlParserCtxt *ctxt ATTRIBUTE_UNUSED,
2416
0
                  xmlParserInput *ret ATTRIBUTE_UNUSED) {
2417
0
    return(NULL);
2418
0
}
2419
2420
/**
2421
 * Create a new input stream based on a file or a URL.
2422
 *
2423
 * The flag XML_INPUT_UNZIP allows decompression.
2424
 *
2425
 * The flag XML_INPUT_NETWORK allows network access.
2426
 *
2427
 * The following resource loaders will be called if they were
2428
 * registered (in order of precedence):
2429
 *
2430
 * - the per-thread #xmlParserInputBufferCreateFilenameFunc set with
2431
 *   #xmlParserInputBufferCreateFilenameDefault (deprecated)
2432
 * - the default loader which will return
2433
 *   - the result from a matching global input callback set with
2434
 *     #xmlRegisterInputCallbacks (deprecated)
2435
 *   - a file opened from the filesystem, with automatic detection
2436
 *     of compressed files if support is compiled in.
2437
 *
2438
 * @since 2.14.0
2439
 *
2440
 * @param url  the filename to use as entity
2441
 * @param flags  XML_INPUT flags
2442
 * @param out  pointer to new parser input
2443
 * @returns an xmlParserErrors code.
2444
 */
2445
xmlParserErrors
2446
xmlNewInputFromUrl(const char *url, xmlParserInputFlags flags,
2447
0
                   xmlParserInput **out) {
2448
0
    char *resource = NULL;
2449
0
    xmlParserInputBufferPtr buf;
2450
0
    xmlParserInputPtr input;
2451
0
    xmlParserErrors code = XML_ERR_OK;
2452
2453
0
    if (out == NULL)
2454
0
        return(XML_ERR_ARGUMENT);
2455
0
    *out = NULL;
2456
0
    if (url == NULL)
2457
0
        return(XML_ERR_ARGUMENT);
2458
2459
0
#ifdef LIBXML_CATALOG_ENABLED
2460
0
    if (flags & XML_INPUT_USE_SYS_CATALOG) {
2461
0
        code = xmlResolveFromCatalog(url, NULL, NULL, 1, &resource);
2462
0
        if (code != XML_ERR_OK)
2463
0
            return(code);
2464
0
        if (resource != NULL)
2465
0
            url = resource;
2466
0
    }
2467
0
#endif
2468
2469
0
    if (xmlParserInputBufferCreateFilenameValue != NULL) {
2470
0
        buf = xmlParserInputBufferCreateFilenameValue(url,
2471
0
                XML_CHAR_ENCODING_NONE);
2472
0
        if (buf == NULL)
2473
0
            code = XML_IO_ENOENT;
2474
0
    } else {
2475
0
        code = xmlParserInputBufferCreateUrl(url, XML_CHAR_ENCODING_NONE,
2476
0
                                             flags, &buf);
2477
0
    }
2478
2479
0
    if (code == XML_ERR_OK) {
2480
0
        input = xmlNewInputInternal(buf, url);
2481
0
        if (input == NULL)
2482
0
            code = XML_ERR_NO_MEMORY;
2483
2484
0
        *out = input;
2485
0
    }
2486
2487
0
    if (resource != NULL)
2488
0
        xmlFree(resource);
2489
0
    return(code);
2490
0
}
2491
2492
/**
2493
 * Create a new input stream based on a file or an URL.
2494
 *
2495
 * Unlike the default external entity loader, this function
2496
 * doesn't use XML catalogs.
2497
 *
2498
 * @deprecated Use #xmlNewInputFromUrl.
2499
 *
2500
 * @param ctxt  an XML parser context
2501
 * @param filename  the filename to use as entity
2502
 * @returns the new input stream or NULL in case of error
2503
 */
2504
xmlParserInput *
2505
0
xmlNewInputFromFile(xmlParserCtxt *ctxt, const char *filename) {
2506
0
    xmlParserInputPtr input;
2507
0
    xmlParserInputFlags flags = 0;
2508
0
    xmlParserErrors code;
2509
2510
0
    if ((ctxt == NULL) || (filename == NULL))
2511
0
        return(NULL);
2512
2513
0
    if (ctxt->options & XML_PARSE_UNZIP)
2514
0
        flags |= XML_INPUT_UNZIP;
2515
0
    if ((ctxt->options & XML_PARSE_NONET) == 0)
2516
0
        flags |= XML_INPUT_NETWORK;
2517
2518
0
    code = xmlNewInputFromUrl(filename, flags, &input);
2519
0
    if (code != XML_ERR_OK) {
2520
0
        xmlCtxtErrIO(ctxt, code, filename);
2521
0
        return(NULL);
2522
0
    }
2523
2524
0
    return(input);
2525
0
}
2526
2527
/**
2528
 * By default we don't load external entities, yet.
2529
 *
2530
 * @param url  the URL or system ID for the entity to load
2531
 * @param publicId  the public ID for the entity to load (optional)
2532
 * @param ctxt  the context in which the entity is called or NULL
2533
 * @returns a new allocated xmlParserInput, or NULL.
2534
 */
2535
static xmlParserInputPtr
2536
xmlDefaultExternalEntityLoader(const char *url, const char *publicId,
2537
                               xmlParserCtxtPtr ctxt)
2538
0
{
2539
0
    xmlParserInputPtr input = NULL;
2540
0
    char *resource = NULL;
2541
2542
0
    (void) publicId;
2543
2544
0
    if (url == NULL)
2545
0
        return(NULL);
2546
2547
0
#ifdef LIBXML_CATALOG_ENABLED
2548
0
    resource = xmlCtxtResolveFromCatalog(ctxt, url, publicId);
2549
0
    if (resource != NULL)
2550
0
  url = resource;
2551
0
#endif
2552
2553
    /*
2554
     * Several downstream test suites expect this error whenever
2555
     * an http URI is passed and NONET is set.
2556
     */
2557
0
    if ((ctxt != NULL) &&
2558
0
        (ctxt->options & XML_PARSE_NONET) &&
2559
0
        (xmlStrncasecmp(BAD_CAST url, BAD_CAST "http://", 7) == 0)) {
2560
0
        xmlCtxtErrIO(ctxt, XML_IO_NETWORK_ATTEMPT, url);
2561
0
    } else {
2562
0
        input = xmlNewInputFromFile(ctxt, url);
2563
0
    }
2564
2565
0
    if (resource != NULL)
2566
0
  xmlFree(resource);
2567
0
    return(input);
2568
0
}
2569
2570
/**
2571
 * A specific entity loader disabling network accesses, though still
2572
 * allowing local catalog accesses for resolution.
2573
 *
2574
 * @deprecated Use XML_PARSE_NONET.
2575
 *
2576
 * @param URL  the URL or system ID for the entity to load
2577
 * @param publicId  the public ID for the entity to load
2578
 * @param ctxt  the context in which the entity is called or NULL
2579
 * @returns a new allocated xmlParserInput, or NULL.
2580
 */
2581
xmlParserInput *
2582
xmlNoNetExternalEntityLoader(const char *URL, const char *publicId,
2583
0
                             xmlParserCtxt *ctxt) {
2584
0
    int oldOptions = 0;
2585
0
    xmlParserInputPtr input;
2586
2587
0
    if (ctxt != NULL) {
2588
0
        oldOptions = ctxt->options;
2589
0
        ctxt->options |= XML_PARSE_NONET;
2590
0
    }
2591
2592
0
    input = xmlDefaultExternalEntityLoader(URL, publicId, ctxt);
2593
2594
0
    if (ctxt != NULL)
2595
0
        ctxt->options = oldOptions;
2596
2597
0
    return(input);
2598
0
}
2599
2600
/*
2601
 * This global has to die eventually
2602
 */
2603
static xmlExternalEntityLoader
2604
xmlCurrentExternalEntityLoader = xmlDefaultExternalEntityLoader;
2605
2606
/**
2607
 * Changes the default external entity resolver function for the
2608
 * application.
2609
 *
2610
 * @deprecated This is a global setting and not thread-safe. Use
2611
 * #xmlCtxtSetResourceLoader or similar functions.
2612
 *
2613
 * @param f  the new entity resolver function
2614
 */
2615
void
2616
0
xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
2617
0
    xmlCurrentExternalEntityLoader = f;
2618
0
}
2619
2620
/**
2621
 * Get the default external entity resolver function for the application
2622
 *
2623
 * @deprecated See #xmlSetExternalEntityLoader.
2624
 *
2625
 * @returns the #xmlExternalEntityLoader function pointer
2626
 */
2627
xmlExternalEntityLoader
2628
0
xmlGetExternalEntityLoader(void) {
2629
0
    return(xmlCurrentExternalEntityLoader);
2630
0
}
2631
2632
/**
2633
 * Installs a custom callback to load documents, DTDs or external
2634
 * entities.
2635
 *
2636
 * If `vctxt` is NULL, the parser context will be passed.
2637
 *
2638
 * @since 2.14.0
2639
 * @param ctxt  parser context
2640
 * @param loader  callback
2641
 * @param vctxt  user data (optional)
2642
 */
2643
void
2644
xmlCtxtSetResourceLoader(xmlParserCtxt *ctxt, xmlResourceLoader loader,
2645
0
                         void *vctxt) {
2646
0
    if (ctxt == NULL)
2647
0
        return;
2648
2649
0
    ctxt->resourceLoader = loader;
2650
0
    ctxt->resourceCtxt = vctxt;
2651
0
}
2652
2653
/**
2654
 * @param ctxt  parser context
2655
 * @param url  the URL or system ID for the entity to load
2656
 * @param publicId  the public ID for the entity to load (optional)
2657
 * @param type  resource type
2658
 * @returns the xmlParserInput or NULL in case of error.
2659
 */
2660
xmlParserInput *
2661
xmlLoadResource(xmlParserCtxt *ctxt, const char *url, const char *publicId,
2662
0
                xmlResourceType type) {
2663
0
    char *canonicFilename;
2664
0
    xmlParserInputPtr ret;
2665
2666
0
    if (url == NULL)
2667
0
        return(NULL);
2668
2669
0
    if ((ctxt != NULL) && (ctxt->resourceLoader != NULL)) {
2670
0
        char *resource = NULL;
2671
0
        void *userData;
2672
0
        xmlParserInputFlags flags = 0;
2673
0
        int code;
2674
2675
0
#ifdef LIBXML_CATALOG_ENABLED
2676
0
        resource = xmlCtxtResolveFromCatalog(ctxt, url, publicId);
2677
0
        if (resource != NULL)
2678
0
            url = resource;
2679
0
#endif
2680
2681
0
        if (ctxt->options & XML_PARSE_UNZIP)
2682
0
            flags |= XML_INPUT_UNZIP;
2683
0
        if ((ctxt->options & XML_PARSE_NONET) == 0)
2684
0
            flags |= XML_INPUT_NETWORK;
2685
2686
0
        userData = ctxt->resourceCtxt;
2687
0
        if (userData == NULL)
2688
0
            userData = ctxt;
2689
2690
0
        code = ctxt->resourceLoader(userData, url, publicId, type,
2691
0
                                    flags, &ret);
2692
0
        if (code != XML_ERR_OK) {
2693
0
            xmlCtxtErrIO(ctxt, code, url);
2694
0
            ret = NULL;
2695
0
        }
2696
0
        if (resource != NULL)
2697
0
            xmlFree(resource);
2698
0
        return(ret);
2699
0
    }
2700
2701
0
    canonicFilename = (char *) xmlCanonicPath((const xmlChar *) url);
2702
0
    if (canonicFilename == NULL) {
2703
0
        xmlCtxtErrMemory(ctxt);
2704
0
        return(NULL);
2705
0
    }
2706
2707
0
    ret = xmlCurrentExternalEntityLoader(canonicFilename, publicId, ctxt);
2708
0
    xmlFree(canonicFilename);
2709
0
    return(ret);
2710
0
}
2711
2712
/**
2713
 * `URL` is a filename or URL. If if contains the substring "://",
2714
 * it is assumed to be a Legacy Extended IRI. Otherwise, it is
2715
 * treated as a filesystem path.
2716
 *
2717
 * `publicId` is an optional XML public ID, typically from a doctype
2718
 * declaration. It is used for catalog lookups.
2719
 *
2720
 * If catalog lookup is enabled (default is yes) and URL or ID are
2721
 * found in system or local XML catalogs, URL is replaced with the
2722
 * result. Then the following resource loaders will be called if
2723
 * they were registered (in order of precedence):
2724
 *
2725
 * - the resource loader set with #xmlCtxtSetResourceLoader
2726
 * - the global external entity loader set with
2727
 *   #xmlSetExternalEntityLoader (without catalog resolution,
2728
 *   deprecated)
2729
 * - the per-thread #xmlParserInputBufferCreateFilenameFunc set with
2730
 *   #xmlParserInputBufferCreateFilenameDefault (deprecated)
2731
 * - the default loader which will return
2732
 *   - the result from a matching global input callback set with
2733
 *     #xmlRegisterInputCallbacks (deprecated)
2734
 *   - a file opened from the filesystem, with automatic detection
2735
 *     of compressed files if support is compiled in.
2736
 *
2737
 * @param URL  the URL or system ID for the entity to load
2738
 * @param publicId  the public ID for the entity to load (optional)
2739
 * @param ctxt  the context in which the entity is called or NULL
2740
 * @returns the xmlParserInput or NULL
2741
 */
2742
xmlParserInput *
2743
xmlLoadExternalEntity(const char *URL, const char *publicId,
2744
0
                      xmlParserCtxt *ctxt) {
2745
0
    return(xmlLoadResource(ctxt, URL, publicId, XML_RESOURCE_UNKNOWN));
2746
0
}
2747
2748
/************************************************************************
2749
 *                  *
2750
 *    Commodity functions to handle parser contexts   *
2751
 *                  *
2752
 ************************************************************************/
2753
2754
/**
2755
 * Initialize a SAX parser context
2756
 *
2757
 * @param ctxt  XML parser context
2758
 * @param sax  SAX handlert
2759
 * @param userData  user data
2760
 * @returns 0 in case of success and -1 in case of error
2761
 */
2762
2763
static int
2764
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
2765
                     void *userData)
2766
0
{
2767
0
    xmlParserInputPtr input;
2768
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2769
0
    size_t initialNodeTabSize = 1;
2770
#else
2771
    size_t initialNodeTabSize = 10;
2772
#endif
2773
2774
0
    if (ctxt == NULL)
2775
0
        return(-1);
2776
2777
0
    if (ctxt->dict == NULL)
2778
0
  ctxt->dict = xmlDictCreate();
2779
0
    if (ctxt->dict == NULL)
2780
0
  return(-1);
2781
2782
0
    if (ctxt->sax == NULL)
2783
0
  ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2784
0
    if (ctxt->sax == NULL)
2785
0
  return(-1);
2786
0
    if (sax == NULL) {
2787
0
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2788
0
        xmlSAXVersion(ctxt->sax, 2);
2789
0
        ctxt->userData = ctxt;
2790
0
    } else {
2791
0
  if (sax->initialized == XML_SAX2_MAGIC) {
2792
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
2793
0
        } else {
2794
0
      memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2795
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
2796
0
        }
2797
0
        ctxt->userData = userData ? userData : ctxt;
2798
0
    }
2799
2800
0
    ctxt->maxatts = 0;
2801
0
    ctxt->atts = NULL;
2802
    /* Allocate the Input stack */
2803
0
    if (ctxt->inputTab == NULL) {
2804
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2805
0
        size_t initialSize = 1;
2806
#else
2807
        size_t initialSize = 5;
2808
#endif
2809
2810
0
  ctxt->inputTab = xmlMalloc(initialSize * sizeof(xmlParserInputPtr));
2811
0
  ctxt->inputMax = initialSize;
2812
0
    }
2813
0
    if (ctxt->inputTab == NULL)
2814
0
  return(-1);
2815
0
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
2816
0
        xmlFreeInputStream(input);
2817
0
    }
2818
0
    ctxt->inputNr = 0;
2819
0
    ctxt->input = NULL;
2820
2821
0
    ctxt->version = NULL;
2822
0
    ctxt->encoding = NULL;
2823
0
    ctxt->standalone = -1;
2824
0
    ctxt->hasExternalSubset = 0;
2825
0
    ctxt->hasPErefs = 0;
2826
0
    ctxt->html = 0;
2827
0
    ctxt->instate = XML_PARSER_START;
2828
2829
    /* Allocate the Node stack */
2830
0
    if (ctxt->nodeTab == NULL) {
2831
0
  ctxt->nodeTab = xmlMalloc(initialNodeTabSize * sizeof(xmlNodePtr));
2832
0
  ctxt->nodeMax = initialNodeTabSize;
2833
0
    }
2834
0
    if (ctxt->nodeTab == NULL)
2835
0
  return(-1);
2836
0
    ctxt->nodeNr = 0;
2837
0
    ctxt->node = NULL;
2838
2839
    /* Allocate the Name stack */
2840
0
    if (ctxt->nameTab == NULL) {
2841
0
  ctxt->nameTab = xmlMalloc(initialNodeTabSize * sizeof(xmlChar *));
2842
0
  ctxt->nameMax = initialNodeTabSize;
2843
0
    }
2844
0
    if (ctxt->nameTab == NULL)
2845
0
  return(-1);
2846
0
    ctxt->nameNr = 0;
2847
0
    ctxt->name = NULL;
2848
2849
    /* Allocate the space stack */
2850
0
    if (ctxt->spaceTab == NULL) {
2851
0
  ctxt->spaceTab = xmlMalloc(initialNodeTabSize * sizeof(int));
2852
0
  ctxt->spaceMax = initialNodeTabSize;
2853
0
    }
2854
0
    if (ctxt->spaceTab == NULL)
2855
0
  return(-1);
2856
0
    ctxt->spaceNr = 1;
2857
0
    ctxt->spaceTab[0] = -1;
2858
0
    ctxt->space = &ctxt->spaceTab[0];
2859
0
    ctxt->myDoc = NULL;
2860
0
    ctxt->wellFormed = 1;
2861
0
    ctxt->nsWellFormed = 1;
2862
0
    ctxt->valid = 1;
2863
2864
0
    ctxt->options = XML_PARSE_NODICT;
2865
2866
    /*
2867
     * Initialize some parser options from deprecated global variables.
2868
     * Note that the "modern" API taking options arguments or
2869
     * xmlCtxtSetOptions will ignore these defaults. They're only
2870
     * relevant if old API functions like xmlParseFile are used.
2871
     */
2872
0
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2873
0
    if (ctxt->loadsubset) {
2874
0
        ctxt->options |= XML_PARSE_DTDLOAD;
2875
0
    }
2876
0
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
2877
0
    if (ctxt->validate) {
2878
0
        ctxt->options |= XML_PARSE_DTDVALID;
2879
0
    }
2880
0
    ctxt->pedantic = xmlPedanticParserDefaultValue;
2881
0
    if (ctxt->pedantic) {
2882
0
        ctxt->options |= XML_PARSE_PEDANTIC;
2883
0
    }
2884
0
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2885
0
    if (ctxt->keepBlanks == 0) {
2886
0
  ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
2887
0
  ctxt->options |= XML_PARSE_NOBLANKS;
2888
0
    }
2889
0
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2890
0
    if (ctxt->replaceEntities) {
2891
0
        ctxt->options |= XML_PARSE_NOENT;
2892
0
    }
2893
0
    if (xmlGetWarningsDefaultValue == 0)
2894
0
        ctxt->options |= XML_PARSE_NOWARNING;
2895
2896
0
    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
2897
0
    ctxt->vctxt.userData = ctxt;
2898
0
    ctxt->vctxt.error = xmlParserValidityError;
2899
0
    ctxt->vctxt.warning = xmlParserValidityWarning;
2900
2901
0
    ctxt->record_info = 0;
2902
0
    ctxt->checkIndex = 0;
2903
0
    ctxt->inSubset = 0;
2904
0
    ctxt->errNo = XML_ERR_OK;
2905
0
    ctxt->depth = 0;
2906
0
    ctxt->catalogs = NULL;
2907
0
    ctxt->sizeentities = 0;
2908
0
    ctxt->sizeentcopy = 0;
2909
0
    ctxt->input_id = 1;
2910
0
    ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
2911
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
2912
2913
0
    if (ctxt->nsdb == NULL) {
2914
0
        ctxt->nsdb = xmlParserNsCreate();
2915
0
        if (ctxt->nsdb == NULL)
2916
0
            return(-1);
2917
0
    }
2918
2919
0
    return(0);
2920
0
}
2921
2922
/**
2923
 * Initialize a parser context
2924
 *
2925
 * @deprecated Internal function which will be made private in a future
2926
 * version.
2927
 *
2928
 * @param ctxt  an XML parser context
2929
 * @returns 0 in case of success and -1 in case of error
2930
 */
2931
2932
int
2933
xmlInitParserCtxt(xmlParserCtxt *ctxt)
2934
0
{
2935
0
    return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
2936
0
}
2937
2938
/**
2939
 * Free all the memory used by a parser context. However the parsed
2940
 * document in ctxt->myDoc is not freed.
2941
 *
2942
 * @param ctxt  an XML parser context
2943
 */
2944
2945
void
2946
xmlFreeParserCtxt(xmlParserCtxt *ctxt)
2947
19.2k
{
2948
19.2k
    xmlParserInputPtr input;
2949
2950
19.2k
    if (ctxt == NULL) return;
2951
2952
28.8k
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
2953
9.59k
        xmlFreeInputStream(input);
2954
9.59k
    }
2955
19.2k
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2956
19.2k
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
2957
19.2k
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2958
19.2k
    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
2959
19.2k
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2960
19.2k
    if (ctxt->version != NULL) xmlFree(ctxt->version);
2961
19.2k
    if (ctxt->encoding != NULL) xmlFree(ctxt->encoding);
2962
19.2k
    if (ctxt->extSubURI != NULL) xmlFree(ctxt->extSubURI);
2963
19.2k
    if (ctxt->extSubSystem != NULL) xmlFree(ctxt->extSubSystem);
2964
19.2k
#ifdef LIBXML_SAX1_ENABLED
2965
19.2k
    if ((ctxt->sax != NULL) &&
2966
19.2k
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
2967
#else
2968
    if (ctxt->sax != NULL)
2969
#endif /* LIBXML_SAX1_ENABLED */
2970
19.2k
        xmlFree(ctxt->sax);
2971
19.2k
    if (ctxt->directory != NULL) xmlFree(ctxt->directory);
2972
19.2k
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2973
19.2k
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
2974
19.2k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
2975
19.2k
    if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
2976
19.2k
    if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
2977
19.2k
    if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
2978
19.2k
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2979
19.2k
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2980
19.2k
    if (ctxt->attsDefault != NULL)
2981
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2982
19.2k
    if (ctxt->attsSpecial != NULL)
2983
0
        xmlHashFree(ctxt->attsSpecial, NULL);
2984
19.2k
    if (ctxt->freeElems != NULL) {
2985
0
        xmlNodePtr cur, next;
2986
2987
0
  cur = ctxt->freeElems;
2988
0
  while (cur != NULL) {
2989
0
      next = cur->next;
2990
0
      xmlFree(cur);
2991
0
      cur = next;
2992
0
  }
2993
0
    }
2994
19.2k
    if (ctxt->freeAttrs != NULL) {
2995
0
        xmlAttrPtr cur, next;
2996
2997
0
  cur = ctxt->freeAttrs;
2998
0
  while (cur != NULL) {
2999
0
      next = cur->next;
3000
0
      xmlFree(cur);
3001
0
      cur = next;
3002
0
  }
3003
0
    }
3004
    /*
3005
     * cleanup the error strings
3006
     */
3007
19.2k
    if (ctxt->lastError.message != NULL)
3008
3.68k
        xmlFree(ctxt->lastError.message);
3009
19.2k
    if (ctxt->lastError.file != NULL)
3010
0
        xmlFree(ctxt->lastError.file);
3011
19.2k
    if (ctxt->lastError.str1 != NULL)
3012
1.89k
        xmlFree(ctxt->lastError.str1);
3013
19.2k
    if (ctxt->lastError.str2 != NULL)
3014
115
        xmlFree(ctxt->lastError.str2);
3015
19.2k
    if (ctxt->lastError.str3 != NULL)
3016
0
        xmlFree(ctxt->lastError.str3);
3017
3018
19.2k
#ifdef LIBXML_CATALOG_ENABLED
3019
19.2k
    if (ctxt->catalogs != NULL)
3020
0
  xmlCatalogFreeLocal(ctxt->catalogs);
3021
19.2k
#endif
3022
19.2k
    xmlFree(ctxt);
3023
19.2k
}
3024
3025
/**
3026
 * Allocate and initialize a new parser context.
3027
 *
3028
 * @returns the xmlParserCtxt or NULL
3029
 */
3030
3031
xmlParserCtxt *
3032
xmlNewParserCtxt(void)
3033
0
{
3034
0
    return(xmlNewSAXParserCtxt(NULL, NULL));
3035
0
}
3036
3037
/**
3038
 * Allocate and initialize a new SAX parser context. If userData is NULL,
3039
 * the parser context will be passed as user data.
3040
 *
3041
 * @since 2.11.0
3042
 *
3043
 * If you want support older versions,
3044
 * it's best to invoke #xmlNewParserCtxt and set ctxt->sax with
3045
 * struct assignment.
3046
 *
3047
 * @param sax  SAX handler
3048
 * @param userData  user data
3049
 * @returns the xmlParserCtxt or NULL if memory allocation failed.
3050
 */
3051
3052
xmlParserCtxt *
3053
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
3054
0
{
3055
0
    xmlParserCtxtPtr ctxt;
3056
3057
0
    xmlInitParser();
3058
3059
0
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
3060
0
    if (ctxt == NULL)
3061
0
  return(NULL);
3062
0
    memset(ctxt, 0, sizeof(xmlParserCtxt));
3063
0
    if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
3064
0
        xmlFreeParserCtxt(ctxt);
3065
0
  return(NULL);
3066
0
    }
3067
0
    return(ctxt);
3068
0
}
3069
3070
/**
3071
 * @since 2.14.0
3072
 *
3073
 * @param ctxt  parser context
3074
 * @returns the private application data.
3075
 */
3076
void *
3077
0
xmlCtxtGetPrivate(xmlParserCtxt *ctxt) {
3078
0
    if (ctxt == NULL)
3079
0
        return(NULL);
3080
3081
0
    return(ctxt->_private);
3082
0
}
3083
3084
/**
3085
 * Set the private application data.
3086
 *
3087
 * @since 2.14.0
3088
 *
3089
 * @param ctxt  parser context
3090
 * @param priv  private application data
3091
 */
3092
void
3093
0
xmlCtxtSetPrivate(xmlParserCtxt *ctxt, void *priv) {
3094
0
    if (ctxt == NULL)
3095
0
        return;
3096
3097
0
    ctxt->_private = priv;
3098
0
}
3099
3100
/**
3101
 * @since 2.14.0
3102
 *
3103
 * @param ctxt  parser context
3104
 * @returns the local catalogs.
3105
 */
3106
void *
3107
0
xmlCtxtGetCatalogs(xmlParserCtxt *ctxt) {
3108
0
    if (ctxt == NULL)
3109
0
        return(NULL);
3110
3111
0
    return(ctxt->catalogs);
3112
0
}
3113
3114
/**
3115
 * Set the local catalogs.
3116
 *
3117
 * @since 2.14.0
3118
 *
3119
 * @param ctxt  parser context
3120
 * @param catalogs  catalogs pointer
3121
 */
3122
void
3123
0
xmlCtxtSetCatalogs(xmlParserCtxt *ctxt, void *catalogs) {
3124
0
    if (ctxt == NULL)
3125
0
        return;
3126
3127
0
    ctxt->catalogs = catalogs;
3128
0
}
3129
3130
/**
3131
 * @since 2.14.0
3132
 *
3133
 * @param ctxt  parser context
3134
 * @returns the dictionary.
3135
 */
3136
xmlDict *
3137
0
xmlCtxtGetDict(xmlParserCtxt *ctxt) {
3138
0
    if (ctxt == NULL)
3139
0
        return(NULL);
3140
3141
0
    return(ctxt->dict);
3142
0
}
3143
3144
/**
3145
 * Set the dictionary. This should only be done immediately after
3146
 * creating a parser context.
3147
 *
3148
 * @since 2.14.0
3149
 *
3150
 * @param ctxt  parser context
3151
 * @param dict  dictionary
3152
 */
3153
void
3154
0
xmlCtxtSetDict(xmlParserCtxt *ctxt, xmlDict *dict) {
3155
0
    if (ctxt == NULL)
3156
0
        return;
3157
3158
0
    if (ctxt->dict != NULL)
3159
0
        xmlDictFree(ctxt->dict);
3160
3161
0
    xmlDictReference(dict);
3162
0
    ctxt->dict = dict;
3163
0
}
3164
3165
/**
3166
 * @since 2.14.0
3167
 *
3168
 * @param ctxt  parser context
3169
 * @returns the SAX handler struct. This is not a copy and must not
3170
 * be freed. Handlers can be updated.
3171
 */
3172
xmlSAXHandler *
3173
0
xmlCtxtGetSaxHandler(xmlParserCtxt *ctxt) {
3174
0
    if (ctxt == NULL)
3175
0
        return(NULL);
3176
3177
0
    return(ctxt->sax);
3178
0
}
3179
3180
/**
3181
 * Set the SAX handler struct to a copy of `sax`.
3182
 *
3183
 * @since 2.14.0
3184
 *
3185
 * @param ctxt  parser context
3186
 * @param sax  SAX handler
3187
 * @returns 0 on success or -1 if arguments are invalid or a memory
3188
 * allocation failed.
3189
 */
3190
int
3191
0
xmlCtxtSetSaxHandler(xmlParserCtxt *ctxt, const xmlSAXHandler *sax) {
3192
0
    xmlSAXHandler *copy;
3193
3194
0
    if ((ctxt == NULL) || (sax == NULL))
3195
0
        return(-1);
3196
3197
0
    copy = xmlMalloc(sizeof(*copy));
3198
0
    if (copy == NULL)
3199
0
        return(-1);
3200
3201
0
    memcpy(copy, sax, sizeof(*copy));
3202
0
    ctxt->sax = copy;
3203
3204
0
    return(0);
3205
0
}
3206
3207
/**
3208
 * @since 2.14.0
3209
 *
3210
 * @param ctxt  parser context
3211
 * @returns the parsed document or NULL if a fatal error occurred when
3212
 * parsing. The document must be freed by the caller. Resets the
3213
 * context's document to NULL.
3214
 */
3215
xmlDoc *
3216
9.57k
xmlCtxtGetDocument(xmlParserCtxt *ctxt) {
3217
9.57k
    xmlDocPtr doc;
3218
3219
9.57k
    if (ctxt == NULL)
3220
0
        return(NULL);
3221
3222
9.57k
    if ((ctxt->wellFormed) ||
3223
9.57k
        (((ctxt->recovery) || (ctxt->html)) &&
3224
7.93k
         (!xmlCtxtIsCatastrophicError(ctxt)))) {
3225
7.93k
        doc = ctxt->myDoc;
3226
7.93k
    } else {
3227
1.63k
        if (ctxt->errNo == XML_ERR_OK)
3228
0
            xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error");
3229
1.63k
        doc = NULL;
3230
1.63k
        xmlFreeDoc(ctxt->myDoc);
3231
1.63k
    }
3232
9.57k
    ctxt->myDoc = NULL;
3233
3234
9.57k
    return(doc);
3235
9.57k
}
3236
3237
/**
3238
 * @since 2.14.0
3239
 *
3240
 * @param ctxt  parser context
3241
 * @returns 1 if this is a HTML parser context, 0 otherwise.
3242
 */
3243
int
3244
0
xmlCtxtIsHtml(xmlParserCtxt *ctxt) {
3245
0
    if (ctxt == NULL)
3246
0
        return(0);
3247
3248
0
    return(ctxt->html ? 1 : 0);
3249
0
}
3250
3251
/**
3252
 * Check whether the parser is stopped.
3253
 *
3254
 * The parser is stopped on fatal (non-wellformedness) errors or
3255
 * on user request with #xmlStopParser.
3256
 *
3257
 * @since 2.14.0
3258
 *
3259
 * @param ctxt  parser context
3260
 * @returns 1 if the parser is stopped, 0 otherwise.
3261
 */
3262
int
3263
0
xmlCtxtIsStopped(xmlParserCtxt *ctxt) {
3264
0
    if (ctxt == NULL)
3265
0
        return(0);
3266
3267
0
    return(ctxt->disableSAX != 0);
3268
0
}
3269
3270
/**
3271
 * Check whether a DTD subset is being parsed.
3272
 *
3273
 * Should only be used by SAX callbacks.
3274
 *
3275
 * Return values are
3276
 *
3277
 * - 0: not in DTD
3278
 * - 1: in internal DTD subset
3279
 * - 2: in external DTD subset
3280
 *
3281
 * @since 2.15.0
3282
 *
3283
 * @param ctxt  parser context
3284
 * @returns the subset status
3285
 */
3286
int
3287
0
xmlCtxtIsInSubset(xmlParserCtxt *ctxt) {
3288
0
    if (ctxt == NULL)
3289
0
        return(0);
3290
3291
0
    return(ctxt->inSubset);
3292
0
}
3293
3294
#ifdef LIBXML_VALID_ENABLED
3295
/**
3296
 * @since 2.14.0
3297
 *
3298
 * @param ctxt  parser context
3299
 * @returns the validation context.
3300
 */
3301
xmlValidCtxt *
3302
0
xmlCtxtGetValidCtxt(xmlParserCtxt *ctxt) {
3303
0
    if (ctxt == NULL)
3304
0
        return(NULL);
3305
3306
0
    return(&ctxt->vctxt);
3307
0
}
3308
#endif
3309
3310
/**
3311
 * Return user data.
3312
 *
3313
 * Return user data of a custom SAX parser or the parser context
3314
 * itself if unset.
3315
 *
3316
 * @since 2.15.0
3317
 *
3318
 * @param ctxt  parser context
3319
 * @returns the user data.
3320
 */
3321
void *
3322
0
xmlCtxtGetUserData(xmlParserCtxt *ctxt) {
3323
0
    if (ctxt == NULL)
3324
0
        return NULL;
3325
3326
0
    return ctxt->userData;
3327
0
}
3328
3329
/**
3330
 * Return the current node being parsed.
3331
 *
3332
 * This is only useful if the default SAX callbacks which build
3333
 * a document tree are intercepted. This mode of operation is
3334
 * fragile and discouraged.
3335
 *
3336
 * Returns the current element node, or the document node if no
3337
 * element was parsed yet.
3338
 *
3339
 * @since 2.15.0
3340
 *
3341
 * @param ctxt  parser context
3342
 * @returns the current node.
3343
 */
3344
xmlNode *
3345
0
xmlCtxtGetNode(xmlParserCtxt *ctxt) {
3346
0
    if (ctxt == NULL)
3347
0
        return NULL;
3348
3349
0
    if (ctxt->node != NULL)
3350
0
        return ctxt->node;
3351
0
    return (xmlNode *) ctxt->myDoc;
3352
0
}
3353
3354
/**
3355
 * Return data from the doctype declaration.
3356
 *
3357
 * Should only be used by SAX callbacks.
3358
 *
3359
 * @since 2.15.0
3360
 *
3361
 * @param ctxt  parser context
3362
 * @param name  name of the root element (output)
3363
 * @param systemId  system ID (URI) of the external subset (output)
3364
 * @param publicId  public ID of the external subset (output)
3365
 * @returns 0 on success, -1 if argument is invalid
3366
 */
3367
int
3368
xmlCtxtGetDocTypeDecl(xmlParserCtxt *ctxt,
3369
                      const xmlChar **name,
3370
                      const xmlChar **systemId,
3371
0
                      const xmlChar **publicId) {
3372
0
    if (ctxt == NULL)
3373
0
        return -1;
3374
3375
0
    if (name != NULL)
3376
0
        *name = ctxt->intSubName;
3377
0
    if (systemId != NULL)
3378
0
        *systemId = ctxt->extSubURI;
3379
0
    if (publicId != NULL)
3380
0
        *publicId = ctxt->extSubSystem; /* The member is misnamed */
3381
3382
0
    return 0;
3383
0
}
3384
3385
/**
3386
 * Return input position.
3387
 *
3388
 * Should only be used by error handlers or SAX callbacks.
3389
 *
3390
 * Because of entities, there can be multiple inputs. Non-negative
3391
 * values of `inputIndex` (0, 1, 2, ...)  select inputs starting
3392
 * from the outermost input. Negative values (-1, -2, ...) select
3393
 * inputs starting from the innermost input.
3394
 *
3395
 * The byte position is counted in possibly decoded UTF-8 bytes,
3396
 * so it won't match the position in the raw input data.
3397
 *
3398
 * @since 2.15.0
3399
 *
3400
 * @param ctxt  parser context
3401
 * @param inputIndex  input index
3402
 * @param filename  filename (output)
3403
 * @param line  line number (output)
3404
 * @param col  column number (output)
3405
 * @param utf8BytePos  byte position (output)
3406
 * @returns 0 on success, -1 if arguments are invalid
3407
 */
3408
int
3409
xmlCtxtGetInputPosition(xmlParserCtxt *ctxt, int inputIndex,
3410
                        const char **filename, int *line, int *col,
3411
0
                        unsigned long *utf8BytePos) {
3412
0
    xmlParserInput *input;
3413
3414
0
    if (ctxt == NULL)
3415
0
        return -1;
3416
3417
0
    if (inputIndex < 0) {
3418
0
        inputIndex += ctxt->inputNr;
3419
0
        if (inputIndex < 0)
3420
0
            return -1;
3421
0
    }
3422
0
    if (inputIndex >= ctxt->inputNr)
3423
0
        return -1;
3424
3425
0
    input = ctxt->inputTab[inputIndex];
3426
3427
0
    if (filename != NULL)
3428
0
        *filename = input->filename;
3429
0
    if (line != NULL)
3430
0
        *line = input->line;
3431
0
    if (col != NULL)
3432
0
        *col = input->col;
3433
3434
0
    if (utf8BytePos != NULL) {
3435
0
        unsigned long consumed;
3436
3437
0
        consumed = input->consumed;
3438
0
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
3439
0
        *utf8BytePos = consumed;
3440
0
    }
3441
3442
0
    return 0;
3443
0
}
3444
3445
/**
3446
 * Return window into input data.
3447
 *
3448
 * Should only be used by error handlers or SAX callbacks.
3449
 * The returned pointer is only valid until the callback returns.
3450
 *
3451
 * Because of entities, there can be multiple inputs. Non-negative
3452
 * values of `inputIndex` (0, 1, 2, ...)  select inputs starting
3453
 * from the outermost input. Negative values (-1, -2, ...) select
3454
 * inputs starting from the innermost input.
3455
 *
3456
 * @since 2.15.0
3457
 *
3458
 * @param ctxt  parser context
3459
 * @param inputIndex  input index
3460
 * @param startOut  start of window (output)
3461
 * @param sizeInOut  maximum size of window (in)
3462
 *                   actual size of window (out)
3463
 * @param offsetOut  offset of current position inside
3464
 *                   window (out)
3465
 * @returns 0 on success, -1 if arguments are invalid
3466
 */
3467
int
3468
xmlCtxtGetInputWindow(xmlParserCtxt *ctxt, int inputIndex,
3469
                      const xmlChar **startOut,
3470
0
                      int *sizeInOut, int *offsetOut) {
3471
0
    xmlParserInput *input;
3472
3473
0
    if (ctxt == NULL || startOut == NULL || sizeInOut == NULL ||
3474
0
        offsetOut == NULL)
3475
0
        return -1;
3476
3477
0
    if (inputIndex < 0) {
3478
0
        inputIndex += ctxt->inputNr;
3479
0
        if (inputIndex < 0)
3480
0
            return -1;
3481
0
    }
3482
0
    if (inputIndex >= ctxt->inputNr)
3483
0
        return -1;
3484
3485
0
    input = ctxt->inputTab[inputIndex];
3486
3487
0
    xmlParserInputGetWindow(input, startOut, sizeInOut, offsetOut);
3488
3489
0
    return 0;
3490
0
}
3491
3492
/************************************************************************
3493
 *                  *
3494
 *    Handling of node information        *
3495
 *                  *
3496
 ************************************************************************/
3497
3498
/**
3499
 * Same as #xmlCtxtReset
3500
 *
3501
 * @deprecated Use #xmlCtxtReset
3502
 *
3503
 * @param ctxt  an XML parser context
3504
 */
3505
void
3506
xmlClearParserCtxt(xmlParserCtxt *ctxt)
3507
0
{
3508
0
    xmlCtxtReset(ctxt);
3509
0
}
3510
3511
3512
/**
3513
 * Find the parser node info struct for a given node
3514
 *
3515
 * @deprecated Don't use.
3516
 *
3517
 * @param ctx  an XML parser context
3518
 * @param node  an XML node within the tree
3519
 * @returns an xmlParserNodeInfo block pointer or NULL
3520
 */
3521
const xmlParserNodeInfo *
3522
xmlParserFindNodeInfo(xmlParserCtxt *ctx, xmlNode *node)
3523
0
{
3524
0
    unsigned long pos;
3525
3526
0
    if ((ctx == NULL) || (node == NULL))
3527
0
        return (NULL);
3528
    /* Find position where node should be at */
3529
0
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
3530
0
    if (pos < ctx->node_seq.length
3531
0
        && ctx->node_seq.buffer[pos].node == node)
3532
0
        return &ctx->node_seq.buffer[pos];
3533
0
    else
3534
0
        return NULL;
3535
0
}
3536
3537
3538
/**
3539
 * Initialize (set to initial state) node info sequence
3540
 *
3541
 * @deprecated Don't use.
3542
 *
3543
 * @param seq  a node info sequence pointer
3544
 */
3545
void
3546
xmlInitNodeInfoSeq(xmlParserNodeInfoSeq *seq)
3547
28.8k
{
3548
28.8k
    if (seq == NULL)
3549
0
        return;
3550
28.8k
    seq->length = 0;
3551
28.8k
    seq->maximum = 0;
3552
28.8k
    seq->buffer = NULL;
3553
28.8k
}
3554
3555
/**
3556
 * Clear (release memory and reinitialize) node info sequence
3557
 *
3558
 * @deprecated Don't use.
3559
 *
3560
 * @param seq  a node info sequence pointer
3561
 */
3562
void
3563
xmlClearNodeInfoSeq(xmlParserNodeInfoSeq *seq)
3564
0
{
3565
0
    if (seq == NULL)
3566
0
        return;
3567
0
    if (seq->buffer != NULL)
3568
0
        xmlFree(seq->buffer);
3569
0
    xmlInitNodeInfoSeq(seq);
3570
0
}
3571
3572
/**
3573
 * Find the index that the info record for the given node is or
3574
 * should be at in a sorted sequence.
3575
 *
3576
 * @deprecated Don't use.
3577
 *
3578
 * @param seq  a node info sequence pointer
3579
 * @param node  an XML node pointer
3580
 * @returns a long indicating the position of the record
3581
 */
3582
unsigned long
3583
xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeq *seq,
3584
                           xmlNode *node)
3585
0
{
3586
0
    unsigned long upper, lower, middle;
3587
0
    int found = 0;
3588
3589
0
    if ((seq == NULL) || (node == NULL))
3590
0
        return ((unsigned long) -1);
3591
3592
    /* Do a binary search for the key */
3593
0
    lower = 1;
3594
0
    upper = seq->length;
3595
0
    middle = 0;
3596
0
    while (lower <= upper && !found) {
3597
0
        middle = lower + (upper - lower) / 2;
3598
0
        if (node == seq->buffer[middle - 1].node)
3599
0
            found = 1;
3600
0
        else if (node < seq->buffer[middle - 1].node)
3601
0
            upper = middle - 1;
3602
0
        else
3603
0
            lower = middle + 1;
3604
0
    }
3605
3606
    /* Return position */
3607
0
    if (middle == 0 || seq->buffer[middle - 1].node < node)
3608
0
        return middle;
3609
0
    else
3610
0
        return middle - 1;
3611
0
}
3612
3613
3614
/**
3615
 * Insert node info record into the sorted sequence
3616
 *
3617
 * @deprecated Don't use.
3618
 *
3619
 * @param ctxt  an XML parser context
3620
 * @param info  a node info sequence pointer
3621
 */
3622
void
3623
xmlParserAddNodeInfo(xmlParserCtxt *ctxt,
3624
                     xmlParserNodeInfo *info)
3625
0
{
3626
0
    unsigned long pos;
3627
3628
0
    if ((ctxt == NULL) || (info == NULL)) return;
3629
3630
    /* Find pos and check to see if node is already in the sequence */
3631
0
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
3632
0
                                     info->node);
3633
3634
0
    if ((pos < ctxt->node_seq.length) &&
3635
0
        (ctxt->node_seq.buffer != NULL) &&
3636
0
        (ctxt->node_seq.buffer[pos].node == info->node)) {
3637
0
        ctxt->node_seq.buffer[pos] = *info;
3638
0
    }
3639
3640
    /* Otherwise, we need to add new node to buffer */
3641
0
    else {
3642
0
        if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
3643
0
            xmlParserNodeInfo *tmp;
3644
0
            int newSize;
3645
3646
0
            newSize = xmlGrowCapacity(ctxt->node_seq.maximum, sizeof(tmp[0]),
3647
0
                                      4, XML_MAX_ITEMS);
3648
0
            if (newSize < 0) {
3649
0
    xmlCtxtErrMemory(ctxt);
3650
0
                return;
3651
0
            }
3652
0
            tmp = xmlRealloc(ctxt->node_seq.buffer, newSize * sizeof(tmp[0]));
3653
0
            if (tmp == NULL) {
3654
0
    xmlCtxtErrMemory(ctxt);
3655
0
                return;
3656
0
            }
3657
0
            ctxt->node_seq.buffer = tmp;
3658
0
            ctxt->node_seq.maximum = newSize;
3659
0
        }
3660
3661
        /* If position is not at end, move elements out of the way */
3662
0
        if (pos != ctxt->node_seq.length) {
3663
0
            unsigned long i;
3664
3665
0
            for (i = ctxt->node_seq.length; i > pos; i--)
3666
0
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
3667
0
        }
3668
3669
        /* Copy element and increase length */
3670
0
        ctxt->node_seq.buffer[pos] = *info;
3671
0
        ctxt->node_seq.length++;
3672
0
    }
3673
0
}
3674
3675
/************************************************************************
3676
 *                  *
3677
 *    Defaults settings         *
3678
 *                  *
3679
 ************************************************************************/
3680
/**
3681
 * Set and return the previous value for enabling pedantic warnings.
3682
 *
3683
 * @deprecated Use the modern options API with XML_PARSE_PEDANTIC.
3684
 *
3685
 * @param val  int 0 or 1
3686
 * @returns the last value for 0 for no substitution, 1 for substitution.
3687
 */
3688
3689
int
3690
0
xmlPedanticParserDefault(int val) {
3691
0
    int old = xmlPedanticParserDefaultValue;
3692
3693
0
    xmlPedanticParserDefaultValue = val;
3694
0
    return(old);
3695
0
}
3696
3697
/**
3698
 * Has no effect.
3699
 *
3700
 * @deprecated Line numbers are always enabled.
3701
 *
3702
 * @param val  int 0 or 1
3703
 * @returns 1
3704
 */
3705
3706
int
3707
0
xmlLineNumbersDefault(int val ATTRIBUTE_UNUSED) {
3708
0
    return(1);
3709
0
}
3710
3711
/**
3712
 * Set and return the previous value for default entity support.
3713
 *
3714
 * @deprecated Use the modern options API with XML_PARSE_NOENT.
3715
 *
3716
 * @param val  int 0 or 1
3717
 * @returns the last value for 0 for no substitution, 1 for substitution.
3718
 */
3719
3720
int
3721
0
xmlSubstituteEntitiesDefault(int val) {
3722
0
    int old = xmlSubstituteEntitiesDefaultValue;
3723
3724
0
    xmlSubstituteEntitiesDefaultValue = val;
3725
0
    return(old);
3726
0
}
3727
3728
/**
3729
 * Set and return the previous value for default blanks text nodes support.
3730
 *
3731
 * @deprecated Use the modern options API with XML_PARSE_NOBLANKS.
3732
 *
3733
 * @param val  int 0 or 1
3734
 * @returns the last value for 0 for no substitution, 1 for substitution.
3735
 */
3736
3737
int
3738
0
xmlKeepBlanksDefault(int val) {
3739
0
    int old = xmlKeepBlanksDefaultValue;
3740
3741
0
    xmlKeepBlanksDefaultValue = val;
3742
0
#ifdef LIBXML_OUTPUT_ENABLED
3743
0
    if (!val)
3744
0
        xmlIndentTreeOutput = 1;
3745
0
#endif
3746
0
    return(old);
3747
0
}
3748