Coverage Report

Created: 2025-08-03 06:56

/src/libxml2/parserInternals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * Author: Daniel Veillard
8
 */
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22
23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/entities.h>
28
#include <libxml/xmlerror.h>
29
#include <libxml/encoding.h>
30
#include <libxml/xmlIO.h>
31
#include <libxml/uri.h>
32
#include <libxml/dict.h>
33
#include <libxml/xmlsave.h>
34
#ifdef LIBXML_CATALOG_ENABLED
35
#include <libxml/catalog.h>
36
#endif
37
#include <libxml/chvalid.h>
38
39
#define CUR(ctxt) ctxt->input->cur
40
#define END(ctxt) ctxt->input->end
41
42
#include "private/buf.h"
43
#include "private/enc.h"
44
#include "private/error.h"
45
#include "private/globals.h"
46
#include "private/io.h"
47
#include "private/memory.h"
48
#include "private/parser.h"
49
50
#ifndef SIZE_MAX
51
  #define SIZE_MAX ((size_t) -1)
52
#endif
53
54
21.5M
#define XML_MAX_ERRORS 100
55
56
/*
57
 * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
58
 * factor of serialized output after entity expansion.
59
 */
60
525
#define XML_MAX_AMPLIFICATION_DEFAULT 5
61
62
/*
63
 * Various global defaults for parsing
64
 */
65
66
/**
67
 * check the compiled lib version against the include one.
68
 *
69
 * @param version  the include version number
70
 */
71
void
72
0
xmlCheckVersion(int version) {
73
0
    int myversion = LIBXML_VERSION;
74
75
0
    xmlInitParser();
76
77
0
    if ((myversion / 10000) != (version / 10000)) {
78
0
  xmlPrintErrorMessage(
79
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
80
0
    (version / 10000), (myversion / 10000));
81
0
    } else if ((myversion / 100) < (version / 100)) {
82
0
  xmlPrintErrorMessage(
83
0
    "Warning: program compiled against libxml %d using older %d\n",
84
0
    (version / 100), (myversion / 100));
85
0
    }
86
0
}
87
88
89
/************************************************************************
90
 *                  *
91
 *    Some factorized error routines        *
92
 *                  *
93
 ************************************************************************/
94
95
96
/**
97
 * Register a callback function that will be called on errors and
98
 * warnings. If handler is NULL, the error handler will be deactivated.
99
 *
100
 * If you only want to disable parser errors being printed to
101
 * stderr, use xmlParserOption XML_PARSE_NOERROR.
102
 *
103
 * This is the recommended way to collect errors from the parser and
104
 * takes precedence over all other error reporting mechanisms.
105
 * These are (in order of precedence):
106
 *
107
 * - per-context structured handler (#xmlCtxtSetErrorHandler)
108
 * - per-context structured "serror" SAX handler
109
 * - global structured handler (#xmlSetStructuredErrorFunc)
110
 * - per-context generic "error" and "warning" SAX handlers
111
 * - global generic handler (#xmlSetGenericErrorFunc)
112
 * - print to stderr
113
 *
114
 * @since 2.13.0
115
 * @param ctxt  an XML parser context
116
 * @param handler  error handler
117
 * @param data  data for error handler
118
 */
119
void
120
xmlCtxtSetErrorHandler(xmlParserCtxt *ctxt, xmlStructuredErrorFunc handler,
121
                       void *data)
122
525
{
123
525
    if (ctxt == NULL)
124
0
        return;
125
525
    ctxt->errorHandler = handler;
126
525
    ctxt->errorCtxt = data;
127
525
}
128
129
/**
130
 * Get the last error raised.
131
 *
132
 * Note that the XML parser typically doesn't stop after
133
 * encountering an error and will often report multiple errors.
134
 * Most of the time, the last error isn't useful. Future
135
 * versions might return the first parser error instead.
136
 *
137
 * @param ctx  an XML parser context
138
 * @returns NULL if no error occurred or a pointer to the error
139
 */
140
const xmlError *
141
xmlCtxtGetLastError(void *ctx)
142
0
{
143
0
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
144
145
0
    if (ctxt == NULL)
146
0
        return (NULL);
147
0
    if (ctxt->lastError.code == XML_ERR_OK)
148
0
        return (NULL);
149
0
    return (&ctxt->lastError);
150
0
}
151
152
/**
153
 * Reset the last parser error to success. This does not change
154
 * the well-formedness status.
155
 *
156
 * @param ctx  an XML parser context
157
 */
158
void
159
xmlCtxtResetLastError(void *ctx)
160
0
{
161
0
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
162
163
0
    if (ctxt == NULL)
164
0
        return;
165
0
    ctxt->errNo = XML_ERR_OK;
166
0
    if (ctxt->lastError.code == XML_ERR_OK)
167
0
        return;
168
0
    xmlResetError(&ctxt->lastError);
169
0
}
170
171
/**
172
 * Handle an out-of-memory error.
173
 *
174
 * @since 2.13.0
175
 * @param ctxt  an XML parser context
176
 */
177
void
178
xmlCtxtErrMemory(xmlParserCtxt *ctxt)
179
11
{
180
11
    xmlStructuredErrorFunc schannel = NULL;
181
11
    xmlGenericErrorFunc channel = NULL;
182
11
    void *data;
183
184
11
    if (ctxt == NULL) {
185
0
        xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_PARSER, NULL);
186
0
        return;
187
0
    }
188
189
11
    ctxt->errNo = XML_ERR_NO_MEMORY;
190
11
    ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
191
11
    ctxt->wellFormed = 0;
192
11
    ctxt->disableSAX = 2;
193
194
11
    if (ctxt->errorHandler) {
195
11
        schannel = ctxt->errorHandler;
196
11
        data = ctxt->errorCtxt;
197
11
    } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
198
0
        (ctxt->sax->serror != NULL)) {
199
0
        schannel = ctxt->sax->serror;
200
0
        data = ctxt->userData;
201
0
    } else {
202
0
        channel = ctxt->sax->error;
203
0
        data = ctxt->userData;
204
0
    }
205
206
11
    xmlRaiseMemoryError(schannel, channel, data, XML_FROM_PARSER,
207
11
                        &ctxt->lastError);
208
11
}
209
210
/**
211
 * If filename is empty, use the one from context input if available.
212
 *
213
 * Report an IO error to the parser context.
214
 *
215
 * @param ctxt  parser context
216
 * @param code  xmlParserErrors code
217
 * @param uri  filename or URI (optional)
218
 */
219
void
220
xmlCtxtErrIO(xmlParserCtxt *ctxt, int code, const char *uri)
221
1.22k
{
222
1.22k
    const char *errstr, *msg, *str1, *str2;
223
1.22k
    xmlErrorLevel level;
224
225
1.22k
    if (ctxt == NULL)
226
0
        return;
227
228
1.22k
    if (((code == XML_IO_ENOENT) ||
229
1.22k
         (code == XML_IO_UNKNOWN))) {
230
        /*
231
         * Only report a warning if a file could not be found. This should
232
         * only be done for external entities, but the external entity loader
233
         * of xsltproc can try multiple paths and assumes that ENOENT doesn't
234
         * raise an error and aborts parsing.
235
         */
236
5
        if (ctxt->validate == 0)
237
5
            level = XML_ERR_WARNING;
238
0
        else
239
0
            level = XML_ERR_ERROR;
240
1.22k
    } else if (code == XML_IO_NETWORK_ATTEMPT) {
241
0
        level = XML_ERR_ERROR;
242
1.22k
    } else {
243
1.22k
        level = XML_ERR_FATAL;
244
1.22k
    }
245
246
1.22k
    errstr = xmlErrString(code);
247
248
1.22k
    if (uri == NULL) {
249
1.22k
        msg = "%s\n";
250
1.22k
        str1 = errstr;
251
1.22k
        str2 = NULL;
252
1.22k
    } else {
253
5
        msg = "failed to load \"%s\": %s\n";
254
5
        str1 = uri;
255
5
        str2 = errstr;
256
5
    }
257
258
1.22k
    xmlCtxtErr(ctxt, NULL, XML_FROM_IO, code, level,
259
1.22k
               (const xmlChar *) uri, NULL, NULL, 0,
260
1.22k
               msg, str1, str2);
261
1.22k
}
262
263
/**
264
 * @param ctxt  parser context
265
 * @returns true if the last error is catastrophic.
266
 */
267
int
268
21.5M
xmlCtxtIsCatastrophicError(xmlParserCtxt *ctxt) {
269
21.5M
    if (ctxt == NULL)
270
0
        return(1);
271
272
21.5M
    return(xmlIsCatastrophicError(ctxt->lastError.level,
273
21.5M
                                  ctxt->lastError.code));
274
21.5M
}
275
276
/**
277
 * Raise a parser error.
278
 *
279
 * @param ctxt  a parser context
280
 * @param node  the current node or NULL
281
 * @param domain  the domain for the error
282
 * @param code  the code for the error
283
 * @param level  the xmlErrorLevel for the error
284
 * @param str1  extra string info
285
 * @param str2  extra string info
286
 * @param str3  extra string info
287
 * @param int1  extra int info
288
 * @param msg  the message to display/transmit
289
 * @param ap  extra parameters for the message display
290
 */
291
void
292
xmlCtxtVErr(xmlParserCtxt *ctxt, xmlNode *node, xmlErrorDomain domain,
293
            xmlParserErrors code, xmlErrorLevel level,
294
            const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
295
            int int1, const char *msg, va_list ap)
296
21.5M
{
297
21.5M
    xmlStructuredErrorFunc schannel = NULL;
298
21.5M
    xmlGenericErrorFunc channel = NULL;
299
21.5M
    void *data = NULL;
300
21.5M
    const char *file = NULL;
301
21.5M
    int line = 0;
302
21.5M
    int col = 0;
303
21.5M
    int res;
304
305
21.5M
    if (code == XML_ERR_NO_MEMORY) {
306
0
        xmlCtxtErrMemory(ctxt);
307
0
        return;
308
0
    }
309
310
21.5M
    if (ctxt == NULL) {
311
0
        res = xmlVRaiseError(NULL, NULL, NULL, NULL, node, domain, code,
312
0
                             level, NULL, 0, (const char *) str1,
313
0
                             (const char *) str2, (const char *) str3,
314
0
                             int1, 0, msg, ap);
315
0
        if (res < 0)
316
0
            xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_PARSER, NULL);
317
318
0
        return;
319
0
    }
320
321
21.5M
    if (PARSER_STOPPED(ctxt))
322
57
  return;
323
324
    /* Don't overwrite catastrophic errors */
325
21.5M
    if (xmlCtxtIsCatastrophicError(ctxt))
326
0
        return;
327
328
21.5M
    if (level == XML_ERR_WARNING) {
329
12.5k
        if (ctxt->nbWarnings >= XML_MAX_ERRORS)
330
10.7k
            return;
331
1.86k
        ctxt->nbWarnings += 1;
332
21.5M
    } else {
333
        /* Report at least one fatal error. */
334
21.5M
        if ((ctxt->nbErrors >= XML_MAX_ERRORS) &&
335
21.5M
            ((level < XML_ERR_FATAL) || (ctxt->wellFormed == 0)) &&
336
21.5M
            (!xmlIsCatastrophicError(level, code)))
337
21.4M
            return;
338
36.6k
        ctxt->nbErrors += 1;
339
36.6k
    }
340
341
38.5k
    if (((ctxt->options & XML_PARSE_NOERROR) == 0) &&
342
38.5k
        ((level != XML_ERR_WARNING) ||
343
14.5k
         ((ctxt->options & XML_PARSE_NOWARNING) == 0))) {
344
14.5k
        if (ctxt->errorHandler) {
345
14.5k
            schannel = ctxt->errorHandler;
346
14.5k
            data = ctxt->errorCtxt;
347
14.5k
        } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
348
0
            (ctxt->sax->serror != NULL)) {
349
0
            schannel = ctxt->sax->serror;
350
0
            data = ctxt->userData;
351
0
        } else if ((domain == XML_FROM_VALID) || (domain == XML_FROM_DTD)) {
352
0
            if (level == XML_ERR_WARNING)
353
0
                channel = ctxt->vctxt.warning;
354
0
            else
355
0
                channel = ctxt->vctxt.error;
356
0
            data = ctxt->vctxt.userData;
357
0
        } else {
358
0
            if (level == XML_ERR_WARNING)
359
0
                channel = ctxt->sax->warning;
360
0
            else
361
0
                channel = ctxt->sax->error;
362
0
            data = ctxt->userData;
363
0
        }
364
14.5k
    }
365
366
38.5k
    if (ctxt->input != NULL) {
367
37.8k
        xmlParserInputPtr input = ctxt->input;
368
369
37.8k
        if ((input->filename == NULL) &&
370
37.8k
            (ctxt->inputNr > 1)) {
371
466
            input = ctxt->inputTab[ctxt->inputNr - 2];
372
466
        }
373
37.8k
        file = input->filename;
374
37.8k
        line = input->line;
375
37.8k
        col = input->col;
376
37.8k
    }
377
378
38.5k
    res = xmlVRaiseError(schannel, channel, data, ctxt, node, domain, code,
379
38.5k
                         level, file, line, (const char *) str1,
380
38.5k
                         (const char *) str2, (const char *) str3, int1, col,
381
38.5k
                         msg, ap);
382
383
38.5k
    if (res < 0) {
384
0
        xmlCtxtErrMemory(ctxt);
385
0
        return;
386
0
    }
387
388
38.5k
    if (level >= XML_ERR_ERROR)
389
36.6k
        ctxt->errNo = code;
390
38.5k
    if (level == XML_ERR_FATAL) {
391
35.2k
        ctxt->wellFormed = 0;
392
393
        /*
394
         * By long-standing design, the parser isn't completely
395
         * stopped on well-formedness errors. Only SAX callbacks
396
         * are disabled.
397
         *
398
         * In some situations, we really want to abort as fast
399
         * as possible.
400
         */
401
35.2k
        if (xmlCtxtIsCatastrophicError(ctxt) ||
402
35.2k
            code == XML_ERR_RESOURCE_LIMIT ||
403
35.2k
            code == XML_ERR_ENTITY_LOOP)
404
64
            ctxt->disableSAX = 2; /* really stop parser */
405
35.1k
        else if (ctxt->recovery == 0)
406
6.84k
            ctxt->disableSAX = 1;
407
35.2k
    }
408
38.5k
}
409
410
/**
411
 * Raise a parser error.
412
 *
413
 * @param ctxt  a parser context
414
 * @param node  the current node or NULL
415
 * @param domain  the domain for the error
416
 * @param code  the code for the error
417
 * @param level  the xmlErrorLevel for the error
418
 * @param str1  extra string info
419
 * @param str2  extra string info
420
 * @param str3  extra string info
421
 * @param int1  extra int info
422
 * @param msg  the message to display/transmit
423
 * @param ...  extra parameters for the message display
424
 */
425
void
426
xmlCtxtErr(xmlParserCtxt *ctxt, xmlNode *node, xmlErrorDomain domain,
427
           xmlParserErrors code, xmlErrorLevel level,
428
           const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
429
           int int1, const char *msg, ...)
430
21.5M
{
431
21.5M
    va_list ap;
432
433
21.5M
    va_start(ap, msg);
434
21.5M
    xmlCtxtVErr(ctxt, node, domain, code, level,
435
21.5M
                str1, str2, str3, int1, msg, ap);
436
21.5M
    va_end(ap);
437
21.5M
}
438
439
/**
440
 * Get well-formedness and validation status after parsing. Also
441
 * reports catastrophic errors which are not related to parsing
442
 * like out-of-memory, I/O or other errors.
443
 *
444
 * @since 2.14.0
445
 *
446
 * @param ctxt  an XML parser context
447
 * @returns a bitmask of XML_STATUS_* flags ORed together.
448
 */
449
xmlParserStatus
450
0
xmlCtxtGetStatus(xmlParserCtxt *ctxt) {
451
0
    xmlParserStatus bits = 0;
452
453
0
    if (xmlCtxtIsCatastrophicError(ctxt)) {
454
0
        bits |= XML_STATUS_CATASTROPHIC_ERROR |
455
0
                XML_STATUS_NOT_WELL_FORMED |
456
0
                XML_STATUS_NOT_NS_WELL_FORMED;
457
0
        if ((ctxt != NULL) && (ctxt->validate))
458
0
            bits |= XML_STATUS_DTD_VALIDATION_FAILED;
459
460
0
        return(bits);
461
0
    }
462
463
0
    if (!ctxt->wellFormed)
464
0
        bits |= XML_STATUS_NOT_WELL_FORMED;
465
0
    if (!ctxt->nsWellFormed)
466
0
        bits |= XML_STATUS_NOT_NS_WELL_FORMED;
467
0
    if ((ctxt->validate) && (!ctxt->valid))
468
0
        bits |= XML_STATUS_DTD_VALIDATION_FAILED;
469
470
0
    return(bits);
471
0
}
472
473
/**
474
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
475
 *
476
 * @param ctxt  an XML parser context
477
 * @param code  the error number
478
 * @param info  extra information string
479
 */
480
void
481
xmlFatalErr(xmlParserCtxt *ctxt, xmlParserErrors code, const char *info)
482
682k
{
483
682k
    const char *errmsg;
484
682k
    xmlErrorDomain domain = XML_FROM_PARSER;
485
682k
    xmlErrorLevel level = XML_ERR_FATAL;
486
487
682k
    errmsg = xmlErrString(code);
488
489
682k
    if ((ctxt != NULL) && (ctxt->html)) {
490
0
        domain = XML_FROM_HTML;
491
492
        /* Continue if encoding is unsupported */
493
0
        if (code == XML_ERR_UNSUPPORTED_ENCODING)
494
0
            level = XML_ERR_ERROR;
495
0
    }
496
497
682k
    if (info == NULL) {
498
285k
        xmlCtxtErr(ctxt, NULL, domain, code, level,
499
285k
                   NULL, NULL, NULL, 0, "%s\n", errmsg);
500
396k
    } else {
501
396k
        xmlCtxtErr(ctxt, NULL, domain, code, level,
502
396k
                   (const xmlChar *) info, NULL, NULL, 0,
503
396k
                   "%s: %s\n", errmsg, info);
504
396k
    }
505
682k
}
506
507
/**
508
 * Return window into current parser data.
509
 *
510
 * @param input  parser input
511
 * @param startOut  start of window (output)
512
 * @param sizeInOut  maximum size of window (in)
513
 *                   actual size of window (out)
514
 * @param offsetOut  offset of current position inside
515
 *                   window (out)
516
 */
517
void
518
xmlParserInputGetWindow(xmlParserInput *input, const xmlChar **startOut,
519
0
                        int *sizeInOut, int *offsetOut) {
520
0
    const xmlChar *cur, *base, *start;
521
0
    int n, col;
522
0
    int size = *sizeInOut;
523
524
0
    cur = input->cur;
525
0
    base = input->base;
526
    /* skip backwards over any end-of-lines */
527
0
    while ((cur > base) && ((*(cur) == '\n') || (*(cur) == '\r'))) {
528
0
  cur--;
529
0
    }
530
0
    n = 0;
531
    /* search backwards for beginning-of-line (to max buff size) */
532
0
    while ((n < size) && (cur > base) &&
533
0
     (*cur != '\n') && (*cur != '\r')) {
534
0
        cur--;
535
0
        n++;
536
0
    }
537
0
    if ((n > 0) && ((*cur == '\n') || (*cur == '\r'))) {
538
0
        cur++;
539
0
    } else {
540
        /* skip over continuation bytes */
541
0
        while ((cur < input->cur) && ((*cur & 0xC0) == 0x80))
542
0
            cur++;
543
0
    }
544
    /* calculate the error position in terms of the current position */
545
0
    col = input->cur - cur;
546
    /* search forward for end-of-line (to max buff size) */
547
0
    n = 0;
548
0
    start = cur;
549
    /* copy selected text to our buffer */
550
0
    while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) {
551
0
        int len = input->end - cur;
552
0
        int c = xmlGetUTF8Char(cur, &len);
553
554
0
        if ((c < 0) || (n + len > size))
555
0
            break;
556
0
        cur += len;
557
0
  n += len;
558
0
    }
559
560
    /*
561
     * col can only point to the end of the buffer if
562
     * there's space for a marker.
563
     */
564
0
    if (col >= n)
565
0
        col = n < size ? n : size - 1;
566
567
0
    *startOut = start;
568
0
    *sizeInOut = n;
569
0
    *offsetOut = col;
570
0
}
571
572
/**
573
 * Check whether the character is allowed by the production
574
 *
575
 * @deprecated Internal function, don't use.
576
 *
577
 * ```
578
 * [84] Letter ::= BaseChar | Ideographic
579
 * ```
580
 *
581
 * @param c  an unicode character (int)
582
 * @returns 0 if not, non-zero otherwise
583
 */
584
int
585
0
xmlIsLetter(int c) {
586
0
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
587
0
}
588
589
/************************************************************************
590
 *                  *
591
 *    Input handling functions for progressive parsing  *
592
 *                  *
593
 ************************************************************************/
594
595
/* we need to keep enough input to show errors in context */
596
7.16M
#define LINE_LEN        80
597
598
/**
599
 * @deprecated This function was internal and is deprecated.
600
 *
601
 * @param in  an XML parser input
602
 * @param len  an indicative size for the lookahead
603
 * @returns -1 as this is an error to use it.
604
 */
605
int
606
0
xmlParserInputRead(xmlParserInput *in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
607
0
    return(-1);
608
0
}
609
610
/**
611
 * Grow the input buffer.
612
 *
613
 * @param ctxt  an XML parser context
614
 * @returns the number of bytes read or -1 in case of error.
615
 */
616
int
617
296k
xmlParserGrow(xmlParserCtxt *ctxt) {
618
296k
    xmlParserInputPtr in = ctxt->input;
619
296k
    xmlParserInputBufferPtr buf = in->buf;
620
296k
    size_t curEnd = in->end - in->cur;
621
296k
    size_t curBase = in->cur - in->base;
622
296k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
623
164k
                       XML_MAX_HUGE_LENGTH :
624
296k
                       XML_MAX_LOOKUP_LIMIT;
625
296k
    int ret;
626
627
296k
    if (buf == NULL)
628
0
        return(0);
629
    /* Don't grow push parser buffer. */
630
296k
    if (PARSER_PROGRESSIVE(ctxt))
631
31.5k
        return(0);
632
    /* Don't grow memory buffers. */
633
264k
    if ((buf->encoder == NULL) && (buf->readcallback == NULL))
634
98.5k
        return(0);
635
166k
    if (buf->error != 0)
636
20.8k
        return(-1);
637
638
145k
    if (curBase > maxLength) {
639
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
640
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
641
0
  return(-1);
642
0
    }
643
644
145k
    if (curEnd >= INPUT_CHUNK)
645
560
        return(0);
646
647
144k
    ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
648
144k
    xmlBufUpdateInput(buf->buffer, in, curBase);
649
650
144k
    if (ret < 0) {
651
84
        xmlCtxtErrIO(ctxt, buf->error, NULL);
652
84
    }
653
654
144k
    return(ret);
655
145k
}
656
657
/**
658
 * Raises an error with `code` if the input wasn't consumed
659
 * completely.
660
 *
661
 * @param ctxt  parser ctxt
662
 * @param code  error code
663
 */
664
void
665
386
xmlParserCheckEOF(xmlParserCtxt *ctxt, xmlParserErrors code) {
666
386
    xmlParserInputPtr in = ctxt->input;
667
386
    xmlParserInputBufferPtr buf;
668
669
386
    if (ctxt->errNo != XML_ERR_OK)
670
372
        return;
671
672
14
    if (in->cur < in->end) {
673
0
        xmlFatalErr(ctxt, code, NULL);
674
0
        return;
675
0
    }
676
677
14
    buf = in->buf;
678
14
    if ((buf != NULL) && (buf->encoder != NULL)) {
679
0
        size_t curBase = in->cur - in->base;
680
0
        size_t sizeOut = 64;
681
0
        xmlCharEncError ret;
682
683
        /*
684
         * Check for truncated multi-byte sequence
685
         */
686
0
        ret = xmlCharEncInput(buf, &sizeOut, /* flush */ 1);
687
0
        xmlBufUpdateInput(buf->buffer, in, curBase);
688
0
        if (ret != XML_ENC_ERR_SUCCESS) {
689
0
            xmlCtxtErrIO(ctxt, buf->error, NULL);
690
0
            return;
691
0
        }
692
693
        /* Shouldn't happen */
694
0
        if (in->cur < in->end)
695
0
            xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "expected EOF");
696
0
    }
697
14
}
698
699
/**
700
 * This function increase the input for the parser. It tries to
701
 * preserve pointers to the input buffer, and keep already read data
702
 *
703
 * @deprecated Don't use.
704
 *
705
 * @param in  an XML parser input
706
 * @param len  an indicative size for the lookahead
707
 * @returns the amount of char read, or -1 in case of error, 0 indicate the
708
 * end of this entity
709
 */
710
int
711
0
xmlParserInputGrow(xmlParserInput *in, int len) {
712
0
    int ret;
713
0
    size_t indx;
714
715
0
    if ((in == NULL) || (len < 0)) return(-1);
716
0
    if (in->buf == NULL) return(-1);
717
0
    if (in->base == NULL) return(-1);
718
0
    if (in->cur == NULL) return(-1);
719
0
    if (in->buf->buffer == NULL) return(-1);
720
721
    /* Don't grow memory buffers. */
722
0
    if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
723
0
        return(0);
724
725
0
    indx = in->cur - in->base;
726
0
    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
727
0
        return(0);
728
0
    }
729
0
    ret = xmlParserInputBufferGrow(in->buf, len);
730
731
0
    in->base = xmlBufContent(in->buf->buffer);
732
0
    if (in->base == NULL) {
733
0
        in->base = BAD_CAST "";
734
0
        in->cur = in->base;
735
0
        in->end = in->base;
736
0
        return(-1);
737
0
    }
738
0
    in->cur = in->base + indx;
739
0
    in->end = xmlBufEnd(in->buf->buffer);
740
741
0
    return(ret);
742
0
}
743
744
/**
745
 * Shrink the input buffer.
746
 *
747
 * @param ctxt  an XML parser context
748
 */
749
void
750
3.95M
xmlParserShrink(xmlParserCtxt *ctxt) {
751
3.95M
    xmlParserInputPtr in = ctxt->input;
752
3.95M
    xmlParserInputBufferPtr buf = in->buf;
753
3.95M
    size_t used, res;
754
755
3.95M
    if (buf == NULL)
756
0
        return;
757
758
3.95M
    used = in->cur - in->base;
759
760
3.95M
    if (used > LINE_LEN) {
761
3.20M
        res = xmlBufShrink(buf->buffer, used - LINE_LEN);
762
763
3.20M
        if (res > 0) {
764
3.20M
            used -= res;
765
3.20M
            xmlSaturatedAddSizeT(&in->consumed, res);
766
3.20M
        }
767
768
3.20M
        xmlBufUpdateInput(buf->buffer, in, used);
769
3.20M
    }
770
3.95M
}
771
772
/**
773
 * This function removes used input for the parser.
774
 *
775
 * @deprecated Don't use.
776
 *
777
 * @param in  an XML parser input
778
 */
779
void
780
0
xmlParserInputShrink(xmlParserInput *in) {
781
0
    size_t used;
782
0
    size_t ret;
783
784
0
    if (in == NULL) return;
785
0
    if (in->buf == NULL) return;
786
0
    if (in->base == NULL) return;
787
0
    if (in->cur == NULL) return;
788
0
    if (in->buf->buffer == NULL) return;
789
790
0
    used = in->cur - in->base;
791
792
0
    if (used > LINE_LEN) {
793
0
  ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
794
0
  if (ret > 0) {
795
0
            used -= ret;
796
0
            xmlSaturatedAddSizeT(&in->consumed, ret);
797
0
  }
798
799
0
        xmlBufUpdateInput(in->buf->buffer, in, used);
800
0
    }
801
0
}
802
803
/************************************************************************
804
 *                  *
805
 *    UTF8 character input and related functions    *
806
 *                  *
807
 ************************************************************************/
808
809
/**
810
 * Skip to the next char input char.
811
 *
812
 * @deprecated Internal function, do not use.
813
 *
814
 * @param ctxt  the XML parser context
815
 */
816
817
void
818
xmlNextChar(xmlParserCtxt *ctxt)
819
284M
{
820
284M
    const unsigned char *cur;
821
284M
    size_t avail;
822
284M
    int c;
823
824
284M
    if ((ctxt == NULL) || (ctxt->input == NULL))
825
0
        return;
826
827
284M
    avail = ctxt->input->end - ctxt->input->cur;
828
829
284M
    if (avail < INPUT_CHUNK) {
830
110k
        xmlParserGrow(ctxt);
831
110k
        if (ctxt->input->cur >= ctxt->input->end)
832
0
            return;
833
110k
        avail = ctxt->input->end - ctxt->input->cur;
834
110k
    }
835
836
284M
    cur = ctxt->input->cur;
837
284M
    c = *cur;
838
839
284M
    if (c < 0x80) {
840
283M
        if (c == '\n') {
841
0
            ctxt->input->cur++;
842
0
            ctxt->input->line++;
843
0
            ctxt->input->col = 1;
844
283M
        } else if (c == '\r') {
845
            /*
846
             *   2.11 End-of-Line Handling
847
             *   the literal two-character sequence "#xD#xA" or a standalone
848
             *   literal #xD, an XML processor must pass to the application
849
             *   the single character #xA.
850
             */
851
263
            ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
852
263
            ctxt->input->line++;
853
263
            ctxt->input->col = 1;
854
263
            return;
855
283M
        } else {
856
283M
            ctxt->input->cur++;
857
283M
            ctxt->input->col++;
858
283M
        }
859
283M
    } else {
860
1.10M
        ctxt->input->col++;
861
862
1.10M
        if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
863
16.2k
            goto encoding_error;
864
865
1.08M
        if (c < 0xe0) {
866
            /* 2-byte code */
867
678k
            if (c < 0xc2)
868
678k
                goto encoding_error;
869
150
            ctxt->input->cur += 2;
870
407k
        } else {
871
407k
            unsigned int val = (c << 8) | cur[1];
872
873
407k
            if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
874
0
                goto encoding_error;
875
876
407k
            if (c < 0xf0) {
877
                /* 3-byte code */
878
407k
                if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
879
0
                    goto encoding_error;
880
407k
                ctxt->input->cur += 3;
881
407k
            } else {
882
0
                if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
883
0
                    goto encoding_error;
884
885
                /* 4-byte code */
886
0
                if ((val < 0xf090) || (val >= 0xf490))
887
0
                    goto encoding_error;
888
0
                ctxt->input->cur += 4;
889
0
            }
890
407k
        }
891
1.08M
    }
892
893
284M
    return;
894
895
284M
encoding_error:
896
    /* Only report the first error */
897
694k
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
898
453
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
899
453
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
900
453
    }
901
694k
    ctxt->input->cur++;
902
694k
}
903
904
/**
905
 * The current char value, if using UTF-8 this may actually span multiple
906
 * bytes in the input buffer. Implement the end of line normalization:
907
 *
908
 * @deprecated Internal function, do not use.
909
 *
910
 * 2.11 End-of-Line Handling
911
 *
912
 * Wherever an external parsed entity or the literal entity value
913
 * of an internal parsed entity contains either the literal two-character
914
 * sequence "#xD#xA" or a standalone literal \#xD, an XML processor
915
 * must pass to the application the single character \#xA.
916
 * This behavior can conveniently be produced by normalizing all
917
 * line breaks to \#xA on input, before parsing.)
918
 *
919
 * @param ctxt  the XML parser context
920
 * @param len  pointer to the length of the char read
921
 * @returns the current char value and its length
922
 */
923
924
int
925
115M
xmlCurrentChar(xmlParserCtxt *ctxt, int *len) {
926
115M
    const unsigned char *cur;
927
115M
    size_t avail;
928
115M
    int c;
929
930
115M
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
931
932
115M
    avail = ctxt->input->end - ctxt->input->cur;
933
934
115M
    if (avail < INPUT_CHUNK) {
935
126k
        xmlParserGrow(ctxt);
936
126k
        avail = ctxt->input->end - ctxt->input->cur;
937
126k
    }
938
939
115M
    cur = ctxt->input->cur;
940
115M
    c = *cur;
941
942
115M
    if (c < 0x80) {
943
  /* 1-byte code */
944
25.4M
        if (c < 0x20) {
945
            /*
946
             *   2.11 End-of-Line Handling
947
             *   the literal two-character sequence "#xD#xA" or a standalone
948
             *   literal #xD, an XML processor must pass to the application
949
             *   the single character #xA.
950
             */
951
823k
            if (c == '\r') {
952
                /*
953
                 * TODO: This function shouldn't change the 'cur' pointer
954
                 * as side effect, but the NEXTL macro in parser.c relies
955
                 * on this behavior when incrementing line numbers.
956
                 */
957
12.8k
                if (cur[1] == '\n')
958
224
                    ctxt->input->cur++;
959
12.8k
                *len = 1;
960
12.8k
                c = '\n';
961
810k
            } else if (c == 0) {
962
396k
                if (ctxt->input->cur >= ctxt->input->end) {
963
698
                    *len = 0;
964
396k
                } else {
965
396k
                    *len = 1;
966
                    /*
967
                     * TODO: Null bytes should be handled by callers,
968
                     * but this can be tricky.
969
                     */
970
396k
                    xmlFatalErr(ctxt, XML_ERR_INVALID_CHAR,
971
396k
                            "Char 0x0 out of allowed range\n");
972
396k
                }
973
413k
            } else {
974
413k
                *len = 1;
975
413k
            }
976
24.6M
        } else {
977
24.6M
            *len = 1;
978
24.6M
        }
979
980
25.4M
        return(c);
981
89.8M
    } else {
982
89.8M
        int val;
983
984
89.8M
        if (avail < 2)
985
8
            goto incomplete_sequence;
986
89.8M
        if ((cur[1] & 0xc0) != 0x80)
987
7.25M
            goto encoding_error;
988
989
82.5M
        if (c < 0xe0) {
990
            /* 2-byte code */
991
2.28M
            if (c < 0xc2)
992
557k
                goto encoding_error;
993
1.72M
            val = (c & 0x1f) << 6;
994
1.72M
            val |= cur[1] & 0x3f;
995
1.72M
            *len = 2;
996
80.2M
        } else {
997
80.2M
            if (avail < 3)
998
0
                goto incomplete_sequence;
999
80.2M
            if ((cur[2] & 0xc0) != 0x80)
1000
1.76k
                goto encoding_error;
1001
1002
80.2M
            if (c < 0xf0) {
1003
                /* 3-byte code */
1004
80.2M
                val = (c & 0xf) << 12;
1005
80.2M
                val |= (cur[1] & 0x3f) << 6;
1006
80.2M
                val |= cur[2] & 0x3f;
1007
80.2M
                if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
1008
12
                    goto encoding_error;
1009
80.2M
                *len = 3;
1010
80.2M
            } else {
1011
1.12k
                if (avail < 4)
1012
0
                    goto incomplete_sequence;
1013
1.12k
                if ((cur[3] & 0xc0) != 0x80)
1014
152
                    goto encoding_error;
1015
1016
                /* 4-byte code */
1017
976
                val = (c & 0x0f) << 18;
1018
976
                val |= (cur[1] & 0x3f) << 12;
1019
976
                val |= (cur[2] & 0x3f) << 6;
1020
976
                val |= cur[3] & 0x3f;
1021
976
                if ((val < 0x10000) || (val >= 0x110000))
1022
882
                    goto encoding_error;
1023
94
                *len = 4;
1024
94
            }
1025
80.2M
        }
1026
1027
82.0M
        return(val);
1028
82.5M
    }
1029
1030
7.81M
encoding_error:
1031
    /* Only report the first error */
1032
7.81M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
1033
87
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
1034
87
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
1035
87
    }
1036
7.81M
    *len = 1;
1037
7.81M
    return(XML_INVALID_CHAR);
1038
1039
8
incomplete_sequence:
1040
    /*
1041
     * An encoding problem may arise from a truncated input buffer
1042
     * splitting a character in the middle. In that case do not raise
1043
     * an error but return 0. This should only happen when push parsing
1044
     * char data.
1045
     */
1046
8
    *len = 0;
1047
8
    return(0);
1048
115M
}
1049
1050
/**
1051
 * The current char value, if using UTF-8 this may actually span multiple
1052
 * bytes in the input buffer.
1053
 *
1054
 * @deprecated Internal function, do not use.
1055
 *
1056
 * @param ctxt  the XML parser context
1057
 * @param cur  pointer to the beginning of the char
1058
 * @param len  pointer to the length of the char read
1059
 * @returns the current char value and its length
1060
 */
1061
1062
int
1063
xmlStringCurrentChar(xmlParserCtxt *ctxt ATTRIBUTE_UNUSED,
1064
0
                     const xmlChar *cur, int *len) {
1065
0
    int c;
1066
1067
0
    if ((cur == NULL) || (len == NULL))
1068
0
        return(0);
1069
1070
    /* cur is zero-terminated, so we can lie about its length. */
1071
0
    *len = 4;
1072
0
    c = xmlGetUTF8Char(cur, len);
1073
1074
0
    return((c < 0) ? 0 : c);
1075
0
}
1076
1077
/**
1078
 * append the char value in the array
1079
 *
1080
 * @deprecated Internal function, don't use.
1081
 *
1082
 * @param out  pointer to an array of xmlChar
1083
 * @param val  the char value
1084
 * @returns the number of xmlChar written
1085
 */
1086
int
1087
55.3M
xmlCopyCharMultiByte(xmlChar *out, int val) {
1088
55.3M
    if ((out == NULL) || (val < 0)) return(0);
1089
    /*
1090
     * We are supposed to handle UTF8, check it's valid
1091
     * From rfc2044: encoding of the Unicode values on UTF-8:
1092
     *
1093
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
1094
     * 0000 0000-0000 007F   0xxxxxxx
1095
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1096
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1097
     */
1098
55.3M
    if  (val >= 0x80) {
1099
55.3M
  xmlChar *savedout = out;
1100
55.3M
  int bits;
1101
55.3M
  if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
1102
53.6M
  else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
1103
59
  else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
1104
0
  else {
1105
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1106
0
            xmlAbort("xmlCopyCharMultiByte: codepoint out of range\n");
1107
0
#endif
1108
0
      return(0);
1109
0
  }
1110
164M
  for ( ; bits >= 0; bits-= 6)
1111
108M
      *out++= ((val >> bits) & 0x3F) | 0x80 ;
1112
55.3M
  return (out - savedout);
1113
55.3M
    }
1114
392
    *out = val;
1115
392
    return 1;
1116
55.3M
}
1117
1118
/**
1119
 * append the char value in the array
1120
 *
1121
 * @deprecated Don't use.
1122
 *
1123
 * @param len  Ignored, compatibility
1124
 * @param out  pointer to an array of xmlChar
1125
 * @param val  the char value
1126
 * @returns the number of xmlChar written
1127
 */
1128
1129
int
1130
0
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
1131
0
    if ((out == NULL) || (val < 0)) return(0);
1132
    /* the len parameter is ignored */
1133
0
    if  (val >= 0x80) {
1134
0
  return(xmlCopyCharMultiByte (out, val));
1135
0
    }
1136
0
    *out = val;
1137
0
    return 1;
1138
0
}
1139
1140
/************************************************************************
1141
 *                  *
1142
 *    Commodity functions to switch encodings     *
1143
 *                  *
1144
 ************************************************************************/
1145
1146
/**
1147
 * Installs a custom implementation to convert between character
1148
 * encodings.
1149
 *
1150
 * This bypasses legacy feature like global encoding handlers or
1151
 * encoding aliases.
1152
 *
1153
 * @since 2.14.0
1154
 * @param ctxt  parser context
1155
 * @param impl  callback
1156
 * @param vctxt  user data
1157
 */
1158
void
1159
xmlCtxtSetCharEncConvImpl(xmlParserCtxt *ctxt, xmlCharEncConvImpl impl,
1160
0
                          void *vctxt) {
1161
0
    if (ctxt == NULL)
1162
0
        return;
1163
1164
0
    ctxt->convImpl = impl;
1165
0
    ctxt->convCtxt = vctxt;
1166
0
}
1167
1168
static xmlParserErrors
1169
0
xmlDetectEBCDIC(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr *hout) {
1170
0
    xmlChar out[200];
1171
0
    xmlParserInputPtr input = ctxt->input;
1172
0
    xmlCharEncodingHandlerPtr handler;
1173
0
    int inlen, outlen, i;
1174
0
    xmlParserErrors code;
1175
0
    xmlCharEncError res;
1176
1177
0
    *hout = NULL;
1178
1179
    /*
1180
     * To detect the EBCDIC code page, we convert the first 200 bytes
1181
     * to IBM037 (EBCDIC-US) and try to find the encoding declaration.
1182
     */
1183
0
    code = xmlCreateCharEncodingHandler("IBM037", XML_ENC_INPUT,
1184
0
            ctxt->convImpl, ctxt->convCtxt, &handler);
1185
0
    if (code != XML_ERR_OK)
1186
0
        return(code);
1187
0
    outlen = sizeof(out) - 1;
1188
0
    inlen = input->end - input->cur;
1189
0
    res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen,
1190
0
                           /* flush */ 0);
1191
    /*
1192
     * Return the EBCDIC handler if decoding failed. The error will
1193
     * be reported later.
1194
     */
1195
0
    if (res < 0)
1196
0
        goto done;
1197
0
    out[outlen] = 0;
1198
1199
0
    for (i = 0; i < outlen; i++) {
1200
0
        if (out[i] == '>')
1201
0
            break;
1202
0
        if ((out[i] == 'e') &&
1203
0
            (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1204
0
            int start, cur, quote;
1205
1206
0
            i += 8;
1207
0
            while (IS_BLANK_CH(out[i]))
1208
0
                i += 1;
1209
0
            if (out[i++] != '=')
1210
0
                break;
1211
0
            while (IS_BLANK_CH(out[i]))
1212
0
                i += 1;
1213
0
            quote = out[i++];
1214
0
            if ((quote != '\'') && (quote != '"'))
1215
0
                break;
1216
0
            start = i;
1217
0
            cur = out[i];
1218
0
            while (((cur >= 'a') && (cur <= 'z')) ||
1219
0
                   ((cur >= 'A') && (cur <= 'Z')) ||
1220
0
                   ((cur >= '0') && (cur <= '9')) ||
1221
0
                   (cur == '.') || (cur == '_') ||
1222
0
                   (cur == '-'))
1223
0
                cur = out[++i];
1224
0
            if (cur != quote)
1225
0
                break;
1226
0
            out[i] = 0;
1227
0
            xmlCharEncCloseFunc(handler);
1228
0
            code = xmlCreateCharEncodingHandler((char *) out + start,
1229
0
                    XML_ENC_INPUT, ctxt->convImpl, ctxt->convCtxt,
1230
0
                    &handler);
1231
0
            if (code != XML_ERR_OK)
1232
0
                return(code);
1233
0
            *hout = handler;
1234
0
            return(XML_ERR_OK);
1235
0
        }
1236
0
    }
1237
1238
0
done:
1239
    /*
1240
     * Encoding handlers are stateful, so we have to recreate them.
1241
     */
1242
0
    xmlCharEncCloseFunc(handler);
1243
0
    code = xmlCreateCharEncodingHandler("IBM037", XML_ENC_INPUT,
1244
0
            ctxt->convImpl, ctxt->convCtxt, &handler);
1245
0
    if (code != XML_ERR_OK)
1246
0
        return(code);
1247
0
    *hout = handler;
1248
0
    return(XML_ERR_OK);
1249
0
}
1250
1251
/**
1252
 * Use encoding specified by enum to decode input data. This overrides
1253
 * the encoding found in the XML declaration.
1254
 *
1255
 * This function can also be used to override the encoding of chunks
1256
 * passed to #xmlParseChunk.
1257
 *
1258
 * @param ctxt  the parser context
1259
 * @param enc  the encoding value (number)
1260
 * @returns 0 in case of success, -1 otherwise
1261
 */
1262
int
1263
xmlSwitchEncoding(xmlParserCtxt *ctxt, xmlCharEncoding enc)
1264
16
{
1265
16
    xmlCharEncodingHandlerPtr handler = NULL;
1266
16
    int ret;
1267
16
    xmlParserErrors code;
1268
1269
16
    if ((ctxt == NULL) || (ctxt->input == NULL))
1270
0
        return(-1);
1271
1272
16
    code = xmlLookupCharEncodingHandler(enc, &handler);
1273
16
    if (code != 0) {
1274
0
        xmlFatalErr(ctxt, code, NULL);
1275
0
        return(-1);
1276
0
    }
1277
1278
16
    ret = xmlSwitchToEncoding(ctxt, handler);
1279
1280
16
    if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1281
0
        ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1282
0
    }
1283
1284
16
    return(ret);
1285
16
}
1286
1287
/**
1288
 * @param ctxt  the parser context
1289
 * @param input  the input strea,
1290
 * @param encoding  the encoding name
1291
 * @returns 0 in case of success, -1 otherwise
1292
 */
1293
static int
1294
xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1295
0
                           const char *encoding) {
1296
0
    xmlCharEncodingHandlerPtr handler;
1297
0
    xmlParserErrors res;
1298
1299
0
    if (encoding == NULL)
1300
0
        return(-1);
1301
1302
0
    res = xmlCreateCharEncodingHandler(encoding, XML_ENC_INPUT,
1303
0
            ctxt->convImpl, ctxt->convCtxt, &handler);
1304
0
    if (res == XML_ERR_UNSUPPORTED_ENCODING) {
1305
0
        xmlWarningMsg(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1306
0
                      "Unsupported encoding: %s\n", BAD_CAST encoding, NULL);
1307
0
        return(-1);
1308
0
    } else if (res != XML_ERR_OK) {
1309
0
        xmlFatalErr(ctxt, res, encoding);
1310
0
        return(-1);
1311
0
    }
1312
1313
0
    res  = xmlInputSetEncodingHandler(input, handler);
1314
0
    if (res != XML_ERR_OK) {
1315
0
        xmlCtxtErrIO(ctxt, res, NULL);
1316
0
        return(-1);
1317
0
    }
1318
1319
0
    return(0);
1320
0
}
1321
1322
/**
1323
 * Use specified encoding to decode input data. This overrides the
1324
 * encoding found in the XML declaration.
1325
 *
1326
 * This function can also be used to override the encoding of chunks
1327
 * passed to #xmlParseChunk.
1328
 *
1329
 * @since 2.13.0
1330
 *
1331
 * @param ctxt  the parser context
1332
 * @param encoding  the encoding name
1333
 * @returns 0 in case of success, -1 otherwise
1334
 */
1335
int
1336
0
xmlSwitchEncodingName(xmlParserCtxt *ctxt, const char *encoding) {
1337
0
    if (ctxt == NULL)
1338
0
        return(-1);
1339
1340
0
    return(xmlSwitchInputEncodingName(ctxt, ctxt->input, encoding));
1341
0
}
1342
1343
/**
1344
 * Use encoding handler to decode input data.
1345
 *
1346
 * Closes the handler on error.
1347
 *
1348
 * @param input  the input stream
1349
 * @param handler  the encoding handler
1350
 * @returns an xmlParserErrors code.
1351
 */
1352
xmlParserErrors
1353
xmlInputSetEncodingHandler(xmlParserInput *input,
1354
321
                           xmlCharEncodingHandler *handler) {
1355
321
    xmlParserInputBufferPtr in;
1356
321
    xmlBufPtr buf;
1357
321
    xmlParserErrors code = XML_ERR_OK;
1358
1359
321
    if ((input == NULL) || (input->buf == NULL)) {
1360
0
        xmlCharEncCloseFunc(handler);
1361
0
  return(XML_ERR_ARGUMENT);
1362
0
    }
1363
321
    in = input->buf;
1364
1365
321
    input->flags |= XML_INPUT_HAS_ENCODING;
1366
1367
    /*
1368
     * UTF-8 requires no encoding handler.
1369
     */
1370
321
    if ((handler != NULL) &&
1371
321
        (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1372
0
        xmlCharEncCloseFunc(handler);
1373
0
        handler = NULL;
1374
0
    }
1375
1376
321
    if (in->encoder == handler)
1377
0
        return(XML_ERR_OK);
1378
1379
321
    if (in->encoder != NULL) {
1380
        /*
1381
         * Switching encodings during parsing is a really bad idea,
1382
         * but Chromium can switch between ISO-8859-1 and UTF-16 before
1383
         * separate calls to xmlParseChunk.
1384
         *
1385
         * TODO: We should check whether the "raw" input buffer is empty and
1386
         * convert the old content using the old encoder.
1387
         */
1388
1389
0
        xmlCharEncCloseFunc(in->encoder);
1390
0
        in->encoder = handler;
1391
0
        return(XML_ERR_OK);
1392
0
    }
1393
1394
321
    buf = xmlBufCreate(XML_IO_BUFFER_SIZE);
1395
321
    if (buf == NULL) {
1396
0
        xmlCharEncCloseFunc(handler);
1397
0
        return(XML_ERR_NO_MEMORY);
1398
0
    }
1399
1400
321
    in->encoder = handler;
1401
321
    in->raw = in->buffer;
1402
321
    in->buffer = buf;
1403
1404
    /*
1405
     * Is there already some content down the pipe to convert ?
1406
     */
1407
321
    if (input->end > input->base) {
1408
321
        size_t processed;
1409
321
        size_t nbchars;
1410
321
        xmlCharEncError res;
1411
1412
        /*
1413
         * Shrink the current input buffer.
1414
         * Move it as the raw buffer and create a new input buffer
1415
         */
1416
321
        processed = input->cur - input->base;
1417
321
        xmlBufShrink(in->raw, processed);
1418
321
        input->consumed += processed;
1419
321
        in->rawconsumed = processed;
1420
1421
        /*
1422
         * If we're push-parsing, we must convert the whole buffer.
1423
         *
1424
         * If we're pull-parsing, we could be parsing from a huge
1425
         * memory buffer which we don't want to convert completely.
1426
         */
1427
321
        if (input->flags & XML_INPUT_PROGRESSIVE)
1428
35
            nbchars = SIZE_MAX;
1429
286
        else
1430
286
            nbchars = 4000 /* MINLEN */;
1431
321
        res = xmlCharEncInput(in, &nbchars, /* flush */ 0);
1432
321
        if (res != XML_ENC_ERR_SUCCESS)
1433
0
            code = in->error;
1434
321
    }
1435
1436
321
    xmlBufResetInput(in->buffer, input);
1437
1438
321
    return(code);
1439
321
}
1440
1441
/**
1442
 * Use encoding handler to decode input data.
1443
 *
1444
 * @deprecated Internal function, don't use.
1445
 *
1446
 * @param ctxt  the parser context, only for error reporting
1447
 * @param input  the input stream
1448
 * @param handler  the encoding handler
1449
 * @returns 0 in case of success, -1 otherwise
1450
 */
1451
int
1452
xmlSwitchInputEncoding(xmlParserCtxt *ctxt, xmlParserInput *input,
1453
0
                       xmlCharEncodingHandler *handler) {
1454
0
    xmlParserErrors code = xmlInputSetEncodingHandler(input, handler);
1455
1456
0
    if (code != XML_ERR_OK) {
1457
0
        xmlCtxtErrIO(ctxt, code, NULL);
1458
0
        return(-1);
1459
0
    }
1460
1461
0
    return(0);
1462
0
}
1463
1464
/**
1465
 * Use encoding handler to decode input data.
1466
 *
1467
 * This function can be used to enforce the encoding of chunks passed
1468
 * to #xmlParseChunk.
1469
 *
1470
 * @param ctxt  the parser context
1471
 * @param handler  the encoding handler
1472
 * @returns 0 in case of success, -1 otherwise
1473
 */
1474
int
1475
xmlSwitchToEncoding(xmlParserCtxt *ctxt, xmlCharEncodingHandler *handler)
1476
16
{
1477
16
    xmlParserErrors code;
1478
1479
16
    if (ctxt == NULL)
1480
0
        return(-1);
1481
1482
16
    code = xmlInputSetEncodingHandler(ctxt->input, handler);
1483
16
    if (code != XML_ERR_OK) {
1484
0
        xmlCtxtErrIO(ctxt, code, NULL);
1485
0
        return(-1);
1486
0
    }
1487
1488
16
    return(0);
1489
16
}
1490
1491
/**
1492
 * Handle optional BOM, detect and switch to encoding.
1493
 *
1494
 * Assumes that there are at least four bytes in the input buffer.
1495
 *
1496
 * @param ctxt  the parser context
1497
 */
1498
void
1499
1.50k
xmlDetectEncoding(xmlParserCtxt *ctxt) {
1500
1.50k
    const xmlChar *in;
1501
1.50k
    xmlCharEncoding enc;
1502
1.50k
    int bomSize;
1503
1.50k
    int autoFlag = 0;
1504
1505
1.50k
    if (xmlParserGrow(ctxt) < 0)
1506
0
        return;
1507
1.50k
    in = ctxt->input->cur;
1508
1.50k
    if (ctxt->input->end - in < 4)
1509
0
        return;
1510
1511
1.50k
    if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1512
        /*
1513
         * If the encoding was already set, only skip the BOM which was
1514
         * possibly decoded to UTF-8.
1515
         */
1516
0
        if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1517
0
            ctxt->input->cur += 3;
1518
0
        }
1519
1520
0
        return;
1521
0
    }
1522
1523
1.50k
    enc = XML_CHAR_ENCODING_NONE;
1524
1.50k
    bomSize = 0;
1525
1526
    /*
1527
     * BOM sniffing and detection of initial bytes of an XML
1528
     * declaration.
1529
     *
1530
     * The HTML5 spec doesn't cover UTF-32 (UCS-4) or EBCDIC.
1531
     */
1532
1.50k
    switch (in[0]) {
1533
0
        case 0x00:
1534
0
            if ((!ctxt->html) &&
1535
0
                (in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1536
0
                enc = XML_CHAR_ENCODING_UCS4BE;
1537
0
                autoFlag = XML_INPUT_AUTO_OTHER;
1538
0
            } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1539
                /*
1540
                 * TODO: The HTML5 spec requires to check that the
1541
                 * next codepoint is an 'x'.
1542
                 */
1543
0
                enc = XML_CHAR_ENCODING_UTF16BE;
1544
0
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1545
0
            }
1546
0
            break;
1547
1548
1.48k
        case 0x3C:
1549
1.48k
            if (in[1] == 0x00) {
1550
0
                if ((!ctxt->html) &&
1551
0
                    (in[2] == 0x00) && (in[3] == 0x00)) {
1552
0
                    enc = XML_CHAR_ENCODING_UCS4LE;
1553
0
                    autoFlag = XML_INPUT_AUTO_OTHER;
1554
0
                } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1555
                    /*
1556
                     * TODO: The HTML5 spec requires to check that the
1557
                     * next codepoint is an 'x'.
1558
                     */
1559
0
                    enc = XML_CHAR_ENCODING_UTF16LE;
1560
0
                    autoFlag = XML_INPUT_AUTO_UTF16LE;
1561
0
                }
1562
0
            }
1563
1.48k
            break;
1564
1565
0
        case 0x4C:
1566
0
      if ((!ctxt->html) &&
1567
0
                (in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1568
0
          enc = XML_CHAR_ENCODING_EBCDIC;
1569
0
                autoFlag = XML_INPUT_AUTO_OTHER;
1570
0
            }
1571
0
            break;
1572
1573
0
        case 0xEF:
1574
0
            if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1575
0
                enc = XML_CHAR_ENCODING_UTF8;
1576
0
                autoFlag = XML_INPUT_AUTO_UTF8;
1577
0
                bomSize = 3;
1578
0
            }
1579
0
            break;
1580
1581
16
        case 0xFE:
1582
16
            if (in[1] == 0xFF) {
1583
16
                enc = XML_CHAR_ENCODING_UTF16BE;
1584
16
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1585
16
                bomSize = 2;
1586
16
            }
1587
16
            break;
1588
1589
0
        case 0xFF:
1590
0
            if (in[1] == 0xFE) {
1591
0
                enc = XML_CHAR_ENCODING_UTF16LE;
1592
0
                autoFlag = XML_INPUT_AUTO_UTF16LE;
1593
0
                bomSize = 2;
1594
0
            }
1595
0
            break;
1596
1.50k
    }
1597
1598
1.50k
    if (bomSize > 0) {
1599
16
        ctxt->input->cur += bomSize;
1600
16
    }
1601
1602
1.50k
    if (enc != XML_CHAR_ENCODING_NONE) {
1603
16
        ctxt->input->flags |= autoFlag;
1604
1605
16
        if (enc == XML_CHAR_ENCODING_EBCDIC) {
1606
0
            xmlCharEncodingHandlerPtr handler;
1607
0
            xmlParserErrors res;
1608
1609
0
            res = xmlDetectEBCDIC(ctxt, &handler);
1610
0
            if (res != XML_ERR_OK) {
1611
0
                xmlFatalErr(ctxt, res, "detecting EBCDIC\n");
1612
0
            } else {
1613
0
                xmlSwitchToEncoding(ctxt, handler);
1614
0
            }
1615
16
        } else {
1616
16
            xmlSwitchEncoding(ctxt, enc);
1617
16
        }
1618
16
    }
1619
1.50k
}
1620
1621
/**
1622
 * Set the encoding from a declaration in the document.
1623
 *
1624
 * If no encoding was set yet, switch the encoding. Otherwise, only warn
1625
 * about encoding mismatches.
1626
 *
1627
 * Takes ownership of 'encoding'.
1628
 *
1629
 * @param ctxt  the parser context
1630
 * @param encoding  declared encoding
1631
 */
1632
void
1633
758
xmlSetDeclaredEncoding(xmlParserCtxt *ctxt, xmlChar *encoding) {
1634
758
    if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1635
758
        ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1636
305
        xmlCharEncodingHandlerPtr handler;
1637
305
        xmlParserErrors res;
1638
305
        xmlCharEncFlags flags = XML_ENC_INPUT;
1639
1640
        /*
1641
         * xmlSwitchEncodingName treats unsupported encodings as
1642
         * warnings, but we want it to be an error in an encoding
1643
         * declaration.
1644
         */
1645
305
        if (ctxt->html)
1646
0
            flags |= XML_ENC_HTML;
1647
305
        res = xmlCreateCharEncodingHandler((const char *) encoding,
1648
305
                flags, ctxt->convImpl, ctxt->convCtxt, &handler);
1649
305
        if (res != XML_ERR_OK) {
1650
0
            xmlFatalErr(ctxt, res, (const char *) encoding);
1651
0
            xmlFree(encoding);
1652
0
            return;
1653
0
        }
1654
1655
305
        res  = xmlInputSetEncodingHandler(ctxt->input, handler);
1656
305
        if (res != XML_ERR_OK) {
1657
0
            xmlCtxtErrIO(ctxt, res, NULL);
1658
0
            xmlFree(encoding);
1659
0
            return;
1660
0
        }
1661
1662
305
        ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1663
453
    } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1664
0
        static const char *allowedUTF8[] = {
1665
0
            "UTF-8", "UTF8", NULL
1666
0
        };
1667
0
        static const char *allowedUTF16LE[] = {
1668
0
            "UTF-16", "UTF-16LE", "UTF16", NULL
1669
0
        };
1670
0
        static const char *allowedUTF16BE[] = {
1671
0
            "UTF-16", "UTF-16BE", "UTF16", NULL
1672
0
        };
1673
0
        const char **allowed = NULL;
1674
0
        const char *autoEnc = NULL;
1675
1676
0
        switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1677
0
            case XML_INPUT_AUTO_UTF8:
1678
0
                allowed = allowedUTF8;
1679
0
                autoEnc = "UTF-8";
1680
0
                break;
1681
0
            case XML_INPUT_AUTO_UTF16LE:
1682
0
                allowed = allowedUTF16LE;
1683
0
                autoEnc = "UTF-16LE";
1684
0
                break;
1685
0
            case XML_INPUT_AUTO_UTF16BE:
1686
0
                allowed = allowedUTF16BE;
1687
0
                autoEnc = "UTF-16BE";
1688
0
                break;
1689
0
        }
1690
1691
0
        if (allowed != NULL) {
1692
0
            const char **p;
1693
0
            int match = 0;
1694
1695
0
            for (p = allowed; *p != NULL; p++) {
1696
0
                if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1697
0
                    match = 1;
1698
0
                    break;
1699
0
                }
1700
0
            }
1701
1702
0
            if (match == 0) {
1703
0
                xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1704
0
                              "Encoding '%s' doesn't match "
1705
0
                              "auto-detected '%s'\n",
1706
0
                              encoding, BAD_CAST autoEnc);
1707
0
                xmlFree(encoding);
1708
0
                encoding = xmlStrdup(BAD_CAST autoEnc);
1709
0
                if (encoding == NULL)
1710
0
                    xmlCtxtErrMemory(ctxt);
1711
0
            }
1712
0
        }
1713
0
    }
1714
1715
758
    if (ctxt->encoding != NULL)
1716
58
        xmlFree(ctxt->encoding);
1717
758
    ctxt->encoding = encoding;
1718
758
}
1719
1720
/**
1721
 * @since 2.14.0
1722
 *
1723
 * @param ctxt  parser context
1724
 * @returns the encoding from the encoding declaration. This can differ
1725
 * from the actual encoding.
1726
 */
1727
const xmlChar *
1728
0
xmlCtxtGetDeclaredEncoding(xmlParserCtxt *ctxt) {
1729
0
    if (ctxt == NULL)
1730
0
        return(NULL);
1731
1732
0
    return(ctxt->encoding);
1733
0
}
1734
1735
/**
1736
 * @param ctxt  the parser context
1737
 * @returns the actual used to parse the document. This can differ from
1738
 * the declared encoding.
1739
 */
1740
const xmlChar *
1741
466
xmlGetActualEncoding(xmlParserCtxt *ctxt) {
1742
466
    const xmlChar *encoding = NULL;
1743
1744
466
    if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
1745
466
        (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
1746
        /* Preserve encoding exactly */
1747
101
        encoding = ctxt->encoding;
1748
365
    } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
1749
0
        encoding = BAD_CAST ctxt->input->buf->encoder->name;
1750
365
    } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1751
0
        encoding = BAD_CAST "UTF-8";
1752
0
    }
1753
1754
466
    return(encoding);
1755
466
}
1756
1757
/************************************************************************
1758
 *                  *
1759
 *  Commodity functions to handle entities processing   *
1760
 *                  *
1761
 ************************************************************************/
1762
1763
/**
1764
 * Free up an input stream.
1765
 *
1766
 * @param input  an xmlParserInput
1767
 */
1768
void
1769
1.59k
xmlFreeInputStream(xmlParserInput *input) {
1770
1.59k
    if (input == NULL) return;
1771
1772
1.59k
    if (input->filename != NULL) xmlFree((char *) input->filename);
1773
1.59k
    if (input->version != NULL) xmlFree((char *) input->version);
1774
1.59k
    if ((input->free != NULL) && (input->base != NULL))
1775
0
        input->free((xmlChar *) input->base);
1776
1.59k
    if (input->buf != NULL)
1777
1.59k
        xmlFreeParserInputBuffer(input->buf);
1778
1.59k
    xmlFree(input);
1779
1.59k
}
1780
1781
/**
1782
 * Create a new input stream structure.
1783
 *
1784
 * @deprecated Use #xmlNewInputFromUrl or similar functions.
1785
 *
1786
 * @param ctxt  an XML parser context
1787
 * @returns the new input stream or NULL
1788
 */
1789
xmlParserInput *
1790
0
xmlNewInputStream(xmlParserCtxt *ctxt) {
1791
0
    xmlParserInputPtr input;
1792
1793
0
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1794
0
    if (input == NULL) {
1795
0
        xmlCtxtErrMemory(ctxt);
1796
0
  return(NULL);
1797
0
    }
1798
0
    memset(input, 0, sizeof(xmlParserInput));
1799
0
    input->line = 1;
1800
0
    input->col = 1;
1801
1802
0
    return(input);
1803
0
}
1804
1805
/**
1806
 * Creates a new parser input from the filesystem, the network or
1807
 * a user-defined resource loader.
1808
 *
1809
 * @param ctxt  parser context
1810
 * @param url  filename or URL
1811
 * @param publicId  publid ID from doctype (optional)
1812
 * @param encoding  character encoding (optional)
1813
 * @param flags  unused, pass 0
1814
 * @returns a new parser input.
1815
 */
1816
xmlParserInput *
1817
xmlCtxtNewInputFromUrl(xmlParserCtxt *ctxt, const char *url,
1818
                       const char *publicId, const char *encoding,
1819
0
                       xmlParserInputFlags flags ATTRIBUTE_UNUSED) {
1820
0
    xmlParserInputPtr input;
1821
1822
0
    if ((ctxt == NULL) || (url == NULL))
1823
0
  return(NULL);
1824
1825
0
    input = xmlLoadResource(ctxt, url, publicId, XML_RESOURCE_MAIN_DOCUMENT);
1826
0
    if (input == NULL)
1827
0
        return(NULL);
1828
1829
0
    if (encoding != NULL)
1830
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1831
1832
0
    return(input);
1833
0
}
1834
1835
/**
1836
 * Internal helper function.
1837
 *
1838
 * @param buf  parser input buffer
1839
 * @param filename  filename or URL
1840
 * @returns a new parser input.
1841
 */
1842
static xmlParserInputPtr
1843
1.59k
xmlNewInputInternal(xmlParserInputBufferPtr buf, const char *filename) {
1844
1.59k
    xmlParserInputPtr input;
1845
1846
1.59k
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1847
1.59k
    if (input == NULL) {
1848
0
  xmlFreeParserInputBuffer(buf);
1849
0
  return(NULL);
1850
0
    }
1851
1.59k
    memset(input, 0, sizeof(xmlParserInput));
1852
1.59k
    input->line = 1;
1853
1.59k
    input->col = 1;
1854
1855
1.59k
    input->buf = buf;
1856
1.59k
    xmlBufResetInput(input->buf->buffer, input);
1857
1858
1.59k
    if (filename != NULL) {
1859
1.50k
        input->filename = xmlMemStrdup(filename);
1860
1.50k
        if (input->filename == NULL) {
1861
0
            xmlFreeInputStream(input);
1862
0
            return(NULL);
1863
0
        }
1864
1.50k
    }
1865
1866
1.59k
    return(input);
1867
1.59k
}
1868
1869
/**
1870
 * Creates a new parser input to read from a memory area.
1871
 *
1872
 * `url` is used as base to resolve external entities and for
1873
 * error reporting.
1874
 *
1875
 * If the XML_INPUT_BUF_STATIC flag is set, the memory area must
1876
 * stay unchanged until parsing has finished. This can avoid
1877
 * temporary copies.
1878
 *
1879
 * If the XML_INPUT_BUF_ZERO_TERMINATED flag is set, the memory
1880
 * area must contain a zero byte after the buffer at position `size`.
1881
 * This can avoid temporary copies.
1882
 *
1883
 * @since 2.14.0
1884
 *
1885
 * @param url  base URL (optional)
1886
 * @param mem  pointer to char array
1887
 * @param size  size of array
1888
 * @param flags  optimization hints
1889
 * @returns a new parser input or NULL if a memory allocation failed.
1890
 */
1891
xmlParserInput *
1892
xmlNewInputFromMemory(const char *url, const void *mem, size_t size,
1893
1.32k
                      xmlParserInputFlags flags) {
1894
1.32k
    xmlParserInputBufferPtr buf;
1895
1896
1.32k
    if (mem == NULL)
1897
0
  return(NULL);
1898
1899
1.32k
    buf = xmlNewInputBufferMemory(mem, size, flags, XML_CHAR_ENCODING_NONE);
1900
1.32k
    if (buf == NULL)
1901
0
        return(NULL);
1902
1903
1.32k
    return(xmlNewInputInternal(buf, url));
1904
1.32k
}
1905
1906
/**
1907
 * @param ctxt  parser context
1908
 * @param url  base URL (optional)
1909
 * @param mem  pointer to char array
1910
 * @param size  size of array
1911
 * @param encoding  character encoding (optional)
1912
 * @param flags  optimization hints
1913
 * @returns a new parser input or NULL in case of error.
1914
 */
1915
xmlParserInput *
1916
xmlCtxtNewInputFromMemory(xmlParserCtxt *ctxt, const char *url,
1917
                          const void *mem, size_t size,
1918
350
                          const char *encoding, xmlParserInputFlags flags) {
1919
350
    xmlParserInputPtr input;
1920
1921
350
    if ((ctxt == NULL) || (mem == NULL))
1922
0
  return(NULL);
1923
1924
350
    input = xmlNewInputFromMemory(url, mem, size, flags);
1925
350
    if (input == NULL) {
1926
0
        xmlCtxtErrMemory(ctxt);
1927
0
        return(NULL);
1928
0
    }
1929
1930
350
    if (encoding != NULL)
1931
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1932
1933
350
    return(input);
1934
350
}
1935
1936
/**
1937
 * Creates a new parser input to read from a zero-terminated string.
1938
 *
1939
 * `url` is used as base to resolve external entities and for
1940
 * error reporting.
1941
 *
1942
 * If the XML_INPUT_BUF_STATIC flag is set, the string must
1943
 * stay unchanged until parsing has finished. This can avoid
1944
 * temporary copies.
1945
 *
1946
 * @since 2.14.0
1947
 *
1948
 * @param url  base URL (optional)
1949
 * @param str  zero-terminated string
1950
 * @param flags  optimization hints
1951
 * @returns a new parser input or NULL if a memory allocation failed.
1952
 */
1953
xmlParserInput *
1954
xmlNewInputFromString(const char *url, const char *str,
1955
88
                      xmlParserInputFlags flags) {
1956
88
    xmlParserInputBufferPtr buf;
1957
1958
88
    if (str == NULL)
1959
0
  return(NULL);
1960
1961
88
    buf = xmlNewInputBufferString(str, flags);
1962
88
    if (buf == NULL)
1963
0
        return(NULL);
1964
1965
88
    return(xmlNewInputInternal(buf, url));
1966
88
}
1967
1968
/**
1969
 * @param ctxt  parser context
1970
 * @param url  base URL (optional)
1971
 * @param str  zero-terminated string
1972
 * @param encoding  character encoding (optional)
1973
 * @param flags  optimization hints
1974
 * @returns a new parser input.
1975
 */
1976
xmlParserInput *
1977
xmlCtxtNewInputFromString(xmlParserCtxt *ctxt, const char *url,
1978
                          const char *str, const char *encoding,
1979
88
                          xmlParserInputFlags flags) {
1980
88
    xmlParserInputPtr input;
1981
1982
88
    if ((ctxt == NULL) || (str == NULL))
1983
0
  return(NULL);
1984
1985
88
    input = xmlNewInputFromString(url, str, flags);
1986
88
    if (input == NULL) {
1987
0
        xmlCtxtErrMemory(ctxt);
1988
0
        return(NULL);
1989
0
    }
1990
1991
88
    if (encoding != NULL)
1992
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1993
1994
88
    return(input);
1995
88
}
1996
1997
/**
1998
 * Creates a new parser input to read from a file descriptor.
1999
 *
2000
 * `url` is used as base to resolve external entities and for
2001
 * error reporting.
2002
 *
2003
 * `fd` is closed after parsing has finished.
2004
 *
2005
 * Supported `flags` are XML_INPUT_UNZIP to decompress data
2006
 * automatically. This feature is deprecated and will be removed
2007
 * in a future release.
2008
 *
2009
 * @since 2.14.0
2010
 *
2011
 * @param url  base URL (optional)
2012
 * @param fd  file descriptor
2013
 * @param flags  input flags
2014
 * @returns a new parser input or NULL if a memory allocation failed.
2015
 */
2016
xmlParserInput *
2017
0
xmlNewInputFromFd(const char *url, int fd, xmlParserInputFlags flags) {
2018
0
    xmlParserInputBufferPtr buf;
2019
2020
0
    if (fd < 0)
2021
0
  return(NULL);
2022
2023
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2024
0
    if (buf == NULL)
2025
0
        return(NULL);
2026
2027
0
    if (xmlInputFromFd(buf, fd, flags) != XML_ERR_OK) {
2028
0
        xmlFreeParserInputBuffer(buf);
2029
0
        return(NULL);
2030
0
    }
2031
2032
0
    return(xmlNewInputInternal(buf, url));
2033
0
}
2034
2035
/**
2036
 * @param ctxt  parser context
2037
 * @param url  base URL (optional)
2038
 * @param fd  file descriptor
2039
 * @param encoding  character encoding (optional)
2040
 * @param flags  unused, pass 0
2041
 * @returns a new parser input.
2042
 */
2043
xmlParserInput *
2044
xmlCtxtNewInputFromFd(xmlParserCtxt *ctxt, const char *url,
2045
                      int fd, const char *encoding,
2046
0
                      xmlParserInputFlags flags) {
2047
0
    xmlParserInputPtr input;
2048
2049
0
    if ((ctxt == NULL) || (fd < 0))
2050
0
  return(NULL);
2051
2052
0
    if (ctxt->options & XML_PARSE_UNZIP)
2053
0
        flags |= XML_INPUT_UNZIP;
2054
2055
0
    input = xmlNewInputFromFd(url, fd, flags);
2056
0
    if (input == NULL) {
2057
0
  xmlCtxtErrMemory(ctxt);
2058
0
        return(NULL);
2059
0
    }
2060
2061
0
    if (encoding != NULL)
2062
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2063
2064
0
    return(input);
2065
0
}
2066
2067
/**
2068
 * Creates a new parser input to read from input callbacks and
2069
 * context.
2070
 *
2071
 * `url` is used as base to resolve external entities and for
2072
 * error reporting.
2073
 *
2074
 * `ioRead` is called to read new data into a provided buffer.
2075
 * It must return the number of bytes written into the buffer
2076
 * ot a negative xmlParserErrors code on failure.
2077
 *
2078
 * `ioClose` is called after parsing has finished.
2079
 *
2080
 * `ioCtxt` is an opaque pointer passed to the callbacks.
2081
 *
2082
 * @since 2.14.0
2083
 *
2084
 * @param url  base URL (optional)
2085
 * @param ioRead  read callback
2086
 * @param ioClose  close callback (optional)
2087
 * @param ioCtxt  IO context
2088
 * @param flags  unused, pass 0
2089
 * @returns a new parser input or NULL if a memory allocation failed.
2090
 */
2091
xmlParserInput *
2092
xmlNewInputFromIO(const char *url, xmlInputReadCallback ioRead,
2093
                  xmlInputCloseCallback ioClose, void *ioCtxt,
2094
0
                  xmlParserInputFlags flags ATTRIBUTE_UNUSED) {
2095
0
    xmlParserInputBufferPtr buf;
2096
2097
0
    if (ioRead == NULL)
2098
0
  return(NULL);
2099
2100
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2101
0
    if (buf == NULL) {
2102
0
        if (ioClose != NULL)
2103
0
            ioClose(ioCtxt);
2104
0
        return(NULL);
2105
0
    }
2106
2107
0
    buf->context = ioCtxt;
2108
0
    buf->readcallback = ioRead;
2109
0
    buf->closecallback = ioClose;
2110
2111
0
    return(xmlNewInputInternal(buf, url));
2112
0
}
2113
2114
/**
2115
 * @param ctxt  parser context
2116
 * @param url  base URL (optional)
2117
 * @param ioRead  read callback
2118
 * @param ioClose  close callback (optional)
2119
 * @param ioCtxt  IO context
2120
 * @param encoding  character encoding (optional)
2121
 * @param flags  unused, pass 0
2122
 * @returns a new parser input.
2123
 */
2124
xmlParserInput *
2125
xmlCtxtNewInputFromIO(xmlParserCtxt *ctxt, const char *url,
2126
                      xmlInputReadCallback ioRead,
2127
                      xmlInputCloseCallback ioClose,
2128
                      void *ioCtxt, const char *encoding,
2129
0
                      xmlParserInputFlags flags) {
2130
0
    xmlParserInputPtr input;
2131
2132
0
    if ((ctxt == NULL) || (ioRead == NULL))
2133
0
  return(NULL);
2134
2135
0
    input = xmlNewInputFromIO(url, ioRead, ioClose, ioCtxt, flags);
2136
0
    if (input == NULL) {
2137
0
        xmlCtxtErrMemory(ctxt);
2138
0
        return(NULL);
2139
0
    }
2140
2141
0
    if (encoding != NULL)
2142
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2143
2144
0
    return(input);
2145
0
}
2146
2147
/**
2148
 * Creates a new parser input for a push parser.
2149
 *
2150
 * @param url  base URL (optional)
2151
 * @param chunk  pointer to char array
2152
 * @param size  size of array
2153
 * @returns a new parser input or NULL if a memory allocation failed.
2154
 */
2155
xmlParserInput *
2156
175
xmlNewPushInput(const char *url, const char *chunk, int size) {
2157
175
    xmlParserInputBufferPtr buf;
2158
175
    xmlParserInputPtr input;
2159
2160
175
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2161
175
    if (buf == NULL)
2162
0
        return(NULL);
2163
2164
175
    input = xmlNewInputInternal(buf, url);
2165
175
    if (input == NULL)
2166
0
  return(NULL);
2167
2168
175
    input->flags |= XML_INPUT_PROGRESSIVE;
2169
2170
175
    if ((size > 0) && (chunk != NULL)) {
2171
0
        int res;
2172
2173
0
  res = xmlParserInputBufferPush(input->buf, size, chunk);
2174
0
        xmlBufResetInput(input->buf->buffer, input);
2175
0
        if (res < 0) {
2176
0
            xmlFreeInputStream(input);
2177
0
            return(NULL);
2178
0
        }
2179
0
    }
2180
2181
175
    return(input);
2182
175
}
2183
2184
/**
2185
 * Create a new input stream structure encapsulating the `input` into
2186
 * a stream suitable for the parser.
2187
 *
2188
 * @param ctxt  an XML parser context
2189
 * @param buf  an input buffer
2190
 * @param enc  the charset encoding if known
2191
 * @returns the new input stream or NULL
2192
 */
2193
xmlParserInput *
2194
xmlNewIOInputStream(xmlParserCtxt *ctxt, xmlParserInputBuffer *buf,
2195
0
              xmlCharEncoding enc) {
2196
0
    xmlParserInputPtr input;
2197
0
    const char *encoding;
2198
2199
0
    if ((ctxt == NULL) || (buf == NULL))
2200
0
        return(NULL);
2201
2202
0
    input = xmlNewInputInternal(buf, NULL);
2203
0
    if (input == NULL) {
2204
0
        xmlCtxtErrMemory(ctxt);
2205
0
  return(NULL);
2206
0
    }
2207
2208
0
    encoding = xmlGetCharEncodingName(enc);
2209
0
    if (encoding != NULL)
2210
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2211
2212
0
    return(input);
2213
0
}
2214
2215
/**
2216
 * Create a new input stream based on an xmlEntity
2217
 *
2218
 * @deprecated Internal function, do not use.
2219
 *
2220
 * @param ctxt  an XML parser context
2221
 * @param ent  an Entity pointer
2222
 * @returns the new input stream or NULL
2223
 */
2224
xmlParserInput *
2225
469
xmlNewEntityInputStream(xmlParserCtxt *ctxt, xmlEntity *ent) {
2226
469
    xmlParserInputPtr input;
2227
2228
469
    if ((ctxt == NULL) || (ent == NULL))
2229
0
  return(NULL);
2230
2231
469
    if (ent->content != NULL) {
2232
88
        input = xmlCtxtNewInputFromString(ctxt, NULL,
2233
88
                (const char *) ent->content, NULL, XML_INPUT_BUF_STATIC);
2234
381
    } else if (ent->URI != NULL) {
2235
381
        xmlResourceType rtype;
2236
2237
381
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY)
2238
292
            rtype = XML_RESOURCE_PARAMETER_ENTITY;
2239
89
        else
2240
89
            rtype = XML_RESOURCE_GENERAL_ENTITY;
2241
2242
381
        input = xmlLoadResource(ctxt, (char *) ent->URI,
2243
381
                                (char *) ent->ExternalID, rtype);
2244
381
    } else {
2245
0
        return(NULL);
2246
0
    }
2247
2248
469
    if (input == NULL)
2249
0
        return(NULL);
2250
2251
469
    input->entity = ent;
2252
2253
469
    return(input);
2254
469
}
2255
2256
/**
2257
 * Create a new input stream based on a memory buffer.
2258
 *
2259
 * @deprecated Use #xmlNewInputFromString.
2260
 *
2261
 * @param ctxt  an XML parser context
2262
 * @param buffer  an memory buffer
2263
 * @returns the new input stream
2264
 */
2265
xmlParserInput *
2266
0
xmlNewStringInputStream(xmlParserCtxt *ctxt, const xmlChar *buffer) {
2267
0
    return(xmlCtxtNewInputFromString(ctxt, NULL, (const char *) buffer,
2268
0
                                     NULL, 0));
2269
0
}
2270
2271
2272
/****************************************************************
2273
 *                *
2274
 *    External entities loading     *
2275
 *                *
2276
 ****************************************************************/
2277
2278
#ifdef LIBXML_CATALOG_ENABLED
2279
2280
/**
2281
 * Resolves an external ID or URL against the appropriate catalog.
2282
 *
2283
 * @param url  the URL or system ID for the entity to load
2284
 * @param publicId  the public ID for the entity to load (optional)
2285
 * @param localCatalogs  local catalogs (optional)
2286
 * @param allowGlobal  allow global system catalog
2287
 * @param out  resulting resource or NULL
2288
 * @returns an xmlParserErrors code
2289
 */
2290
static xmlParserErrors
2291
xmlResolveFromCatalog(const char *url, const char *publicId,
2292
0
                      void *localCatalogs, int allowGlobal, char **out) {
2293
0
    xmlError oldError;
2294
0
    xmlError *lastError;
2295
0
    char *resource = NULL;
2296
0
    xmlParserErrors code;
2297
2298
0
    if (out == NULL)
2299
0
        return(XML_ERR_ARGUMENT);
2300
0
    *out = NULL;
2301
0
    if ((localCatalogs == NULL) && (!allowGlobal))
2302
0
        return(XML_ERR_OK);
2303
2304
    /*
2305
     * Don't try to resolve if local file exists.
2306
     *
2307
     * TODO: This is somewhat non-deterministic.
2308
     */
2309
0
    if (xmlNoNetExists(url))
2310
0
        return(XML_ERR_OK);
2311
2312
    /* Backup and reset last error */
2313
0
    lastError = xmlGetLastErrorInternal();
2314
0
    oldError = *lastError;
2315
0
    lastError->code = XML_ERR_OK;
2316
2317
    /*
2318
     * Do a local lookup
2319
     */
2320
0
    if (localCatalogs != NULL) {
2321
0
        resource = (char *) xmlCatalogLocalResolve(localCatalogs,
2322
0
                                                   BAD_CAST publicId,
2323
0
                                                   BAD_CAST url);
2324
0
    }
2325
    /*
2326
     * Try a global lookup
2327
     */
2328
0
    if ((resource == NULL) && (allowGlobal)) {
2329
0
        resource = (char *) xmlCatalogResolve(BAD_CAST publicId,
2330
0
                                              BAD_CAST url);
2331
0
    }
2332
2333
    /*
2334
     * Try to resolve url using URI rules.
2335
     *
2336
     * TODO: We should consider using only a single resolution
2337
     * mechanism depending on resource type. Either by external ID
2338
     * or by URI.
2339
     */
2340
0
    if ((resource == NULL) && (url != NULL)) {
2341
0
        if (localCatalogs != NULL) {
2342
0
            resource = (char *) xmlCatalogLocalResolveURI(localCatalogs,
2343
0
                                                          BAD_CAST url);
2344
0
        }
2345
0
        if ((resource == NULL) && (allowGlobal)) {
2346
0
            resource = (char *) xmlCatalogResolveURI(BAD_CAST url);
2347
0
        }
2348
0
    }
2349
2350
0
    code = lastError->code;
2351
0
    if (code == XML_ERR_OK) {
2352
0
        *out = resource;
2353
0
    } else {
2354
0
        xmlFree(resource);
2355
0
    }
2356
2357
0
    *lastError = oldError;
2358
2359
0
    return(code);
2360
0
}
2361
2362
static char *
2363
xmlCtxtResolveFromCatalog(xmlParserCtxtPtr ctxt, const char *url,
2364
984
                          const char *publicId) {
2365
984
    char *resource;
2366
984
    void *localCatalogs = NULL;
2367
984
    int allowGlobal = 1;
2368
984
    xmlParserErrors code;
2369
2370
984
    if (ctxt != NULL) {
2371
        /*
2372
         * Loading of HTML documents shouldn't use XML catalogs.
2373
         */
2374
984
        if (ctxt->html)
2375
0
            return(NULL);
2376
2377
984
        localCatalogs = ctxt->catalogs;
2378
2379
984
        if (ctxt->options & XML_PARSE_NO_SYS_CATALOG)
2380
19
            allowGlobal = 0;
2381
984
    }
2382
2383
984
    switch (xmlCatalogGetDefaults()) {
2384
984
        case XML_CATA_ALLOW_NONE:
2385
984
            return(NULL);
2386
0
        case XML_CATA_ALLOW_DOCUMENT:
2387
0
            allowGlobal = 0;
2388
0
            break;
2389
0
        case XML_CATA_ALLOW_GLOBAL:
2390
0
            localCatalogs = NULL;
2391
0
            break;
2392
0
        case XML_CATA_ALLOW_ALL:
2393
0
            break;
2394
984
    }
2395
2396
0
    code = xmlResolveFromCatalog(url, publicId, localCatalogs,
2397
0
                                 allowGlobal, &resource);
2398
0
    if (code != XML_ERR_OK)
2399
0
        xmlCtxtErr(ctxt, NULL, XML_FROM_CATALOG, code, XML_ERR_ERROR,
2400
0
                   BAD_CAST url, BAD_CAST publicId, NULL, 0,
2401
0
                   "%s\n", xmlErrString(code), NULL);
2402
2403
0
    return(resource);
2404
984
}
2405
2406
#endif
2407
2408
/**
2409
 * @deprecated Internal function, don't use.
2410
 *
2411
 * @param ctxt  an XML parser context
2412
 * @param ret  an XML parser input
2413
 * @returns NULL.
2414
 */
2415
xmlParserInput *
2416
xmlCheckHTTPInput(xmlParserCtxt *ctxt ATTRIBUTE_UNUSED,
2417
0
                  xmlParserInput *ret ATTRIBUTE_UNUSED) {
2418
0
    return(NULL);
2419
0
}
2420
2421
/**
2422
 * Create a new input stream based on a file or a URL.
2423
 *
2424
 * The flag XML_INPUT_UNZIP allows decompression.
2425
 *
2426
 * The flag XML_INPUT_NETWORK allows network access.
2427
 *
2428
 * The following resource loaders will be called if they were
2429
 * registered (in order of precedence):
2430
 *
2431
 * - the per-thread #xmlParserInputBufferCreateFilenameFunc set with
2432
 *   #xmlParserInputBufferCreateFilenameDefault (deprecated)
2433
 * - the default loader which will return
2434
 *   - the result from a matching global input callback set with
2435
 *     #xmlRegisterInputCallbacks (deprecated)
2436
 *   - a file opened from the filesystem, with automatic detection
2437
 *     of compressed files if support is compiled in.
2438
 *
2439
 * @since 2.14.0
2440
 *
2441
 * @param url  the filename to use as entity
2442
 * @param flags  XML_INPUT flags
2443
 * @param out  pointer to new parser input
2444
 * @returns an xmlParserErrors code.
2445
 */
2446
xmlParserErrors
2447
xmlNewInputFromUrl(const char *url, xmlParserInputFlags flags,
2448
0
                   xmlParserInput **out) {
2449
0
    char *resource = NULL;
2450
0
    xmlParserInputBufferPtr buf;
2451
0
    xmlParserInputPtr input;
2452
0
    xmlParserErrors code = XML_ERR_OK;
2453
2454
0
    if (out == NULL)
2455
0
        return(XML_ERR_ARGUMENT);
2456
0
    *out = NULL;
2457
0
    if (url == NULL)
2458
0
        return(XML_ERR_ARGUMENT);
2459
2460
0
#ifdef LIBXML_CATALOG_ENABLED
2461
0
    if (flags & XML_INPUT_USE_SYS_CATALOG) {
2462
0
        code = xmlResolveFromCatalog(url, NULL, NULL, 1, &resource);
2463
0
        if (code != XML_ERR_OK)
2464
0
            return(code);
2465
0
        if (resource != NULL)
2466
0
            url = resource;
2467
0
    }
2468
0
#endif
2469
2470
0
    if (xmlParserInputBufferCreateFilenameValue != NULL) {
2471
0
        buf = xmlParserInputBufferCreateFilenameValue(url,
2472
0
                XML_CHAR_ENCODING_NONE);
2473
0
        if (buf == NULL)
2474
0
            code = XML_IO_ENOENT;
2475
0
    } else {
2476
0
        code = xmlParserInputBufferCreateUrl(url, XML_CHAR_ENCODING_NONE,
2477
0
                                             flags, &buf);
2478
0
    }
2479
2480
0
    if (code == XML_ERR_OK) {
2481
0
        input = xmlNewInputInternal(buf, url);
2482
0
        if (input == NULL)
2483
0
            code = XML_ERR_NO_MEMORY;
2484
2485
0
        *out = input;
2486
0
    }
2487
2488
0
    if (resource != NULL)
2489
0
        xmlFree(resource);
2490
0
    return(code);
2491
0
}
2492
2493
/**
2494
 * Create a new input stream based on a file or an URL.
2495
 *
2496
 * Unlike the default external entity loader, this function
2497
 * doesn't use XML catalogs.
2498
 *
2499
 * @deprecated Use #xmlNewInputFromUrl.
2500
 *
2501
 * @param ctxt  an XML parser context
2502
 * @param filename  the filename to use as entity
2503
 * @returns the new input stream or NULL in case of error
2504
 */
2505
xmlParserInput *
2506
0
xmlNewInputFromFile(xmlParserCtxt *ctxt, const char *filename) {
2507
0
    xmlParserInputPtr input;
2508
0
    xmlParserInputFlags flags = 0;
2509
0
    xmlParserErrors code;
2510
2511
0
    if ((ctxt == NULL) || (filename == NULL))
2512
0
        return(NULL);
2513
2514
0
    if (ctxt->options & XML_PARSE_UNZIP)
2515
0
        flags |= XML_INPUT_UNZIP;
2516
0
    if ((ctxt->options & XML_PARSE_NONET) == 0)
2517
0
        flags |= XML_INPUT_NETWORK;
2518
2519
0
    code = xmlNewInputFromUrl(filename, flags, &input);
2520
0
    if (code != XML_ERR_OK) {
2521
0
        xmlCtxtErrIO(ctxt, code, filename);
2522
0
        return(NULL);
2523
0
    }
2524
2525
0
    return(input);
2526
0
}
2527
2528
/**
2529
 * By default we don't load external entities, yet.
2530
 *
2531
 * @param url  the URL or system ID for the entity to load
2532
 * @param publicId  the public ID for the entity to load (optional)
2533
 * @param ctxt  the context in which the entity is called or NULL
2534
 * @returns a new allocated xmlParserInput, or NULL.
2535
 */
2536
static xmlParserInputPtr
2537
xmlDefaultExternalEntityLoader(const char *url, const char *publicId,
2538
                               xmlParserCtxtPtr ctxt)
2539
0
{
2540
0
    xmlParserInputPtr input = NULL;
2541
0
    char *resource = NULL;
2542
2543
0
    (void) publicId;
2544
2545
0
    if (url == NULL)
2546
0
        return(NULL);
2547
2548
0
#ifdef LIBXML_CATALOG_ENABLED
2549
0
    resource = xmlCtxtResolveFromCatalog(ctxt, url, publicId);
2550
0
    if (resource != NULL)
2551
0
  url = resource;
2552
0
#endif
2553
2554
    /*
2555
     * Several downstream test suites expect this error whenever
2556
     * an http URI is passed and NONET is set.
2557
     */
2558
0
    if ((ctxt != NULL) &&
2559
0
        (ctxt->options & XML_PARSE_NONET) &&
2560
0
        (xmlStrncasecmp(BAD_CAST url, BAD_CAST "http://", 7) == 0)) {
2561
0
        xmlCtxtErrIO(ctxt, XML_IO_NETWORK_ATTEMPT, url);
2562
0
    } else {
2563
0
        input = xmlNewInputFromFile(ctxt, url);
2564
0
    }
2565
2566
0
    if (resource != NULL)
2567
0
  xmlFree(resource);
2568
0
    return(input);
2569
0
}
2570
2571
/**
2572
 * A specific entity loader disabling network accesses, though still
2573
 * allowing local catalog accesses for resolution.
2574
 *
2575
 * @deprecated Use XML_PARSE_NONET.
2576
 *
2577
 * @param URL  the URL or system ID for the entity to load
2578
 * @param publicId  the public ID for the entity to load
2579
 * @param ctxt  the context in which the entity is called or NULL
2580
 * @returns a new allocated xmlParserInput, or NULL.
2581
 */
2582
xmlParserInput *
2583
xmlNoNetExternalEntityLoader(const char *URL, const char *publicId,
2584
0
                             xmlParserCtxt *ctxt) {
2585
0
    int oldOptions = 0;
2586
0
    xmlParserInputPtr input;
2587
2588
0
    if (ctxt != NULL) {
2589
0
        oldOptions = ctxt->options;
2590
0
        ctxt->options |= XML_PARSE_NONET;
2591
0
    }
2592
2593
0
    input = xmlDefaultExternalEntityLoader(URL, publicId, ctxt);
2594
2595
0
    if (ctxt != NULL)
2596
0
        ctxt->options = oldOptions;
2597
2598
0
    return(input);
2599
0
}
2600
2601
/*
2602
 * This global has to die eventually
2603
 */
2604
static xmlExternalEntityLoader
2605
xmlCurrentExternalEntityLoader = xmlDefaultExternalEntityLoader;
2606
2607
/**
2608
 * Changes the default external entity resolver function for the
2609
 * application.
2610
 *
2611
 * @deprecated This is a global setting and not thread-safe. Use
2612
 * #xmlCtxtSetResourceLoader or similar functions.
2613
 *
2614
 * @param f  the new entity resolver function
2615
 */
2616
void
2617
0
xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
2618
0
    xmlCurrentExternalEntityLoader = f;
2619
0
}
2620
2621
/**
2622
 * Get the default external entity resolver function for the application
2623
 *
2624
 * @deprecated See #xmlSetExternalEntityLoader.
2625
 *
2626
 * @returns the #xmlExternalEntityLoader function pointer
2627
 */
2628
xmlExternalEntityLoader
2629
0
xmlGetExternalEntityLoader(void) {
2630
0
    return(xmlCurrentExternalEntityLoader);
2631
0
}
2632
2633
/**
2634
 * Installs a custom callback to load documents, DTDs or external
2635
 * entities.
2636
 *
2637
 * If `vctxt` is NULL, the parser context will be passed.
2638
 *
2639
 * @since 2.14.0
2640
 * @param ctxt  parser context
2641
 * @param loader  callback
2642
 * @param vctxt  user data (optional)
2643
 */
2644
void
2645
xmlCtxtSetResourceLoader(xmlParserCtxt *ctxt, xmlResourceLoader loader,
2646
525
                         void *vctxt) {
2647
525
    if (ctxt == NULL)
2648
0
        return;
2649
2650
525
    ctxt->resourceLoader = loader;
2651
525
    ctxt->resourceCtxt = vctxt;
2652
525
}
2653
2654
/**
2655
 * @param ctxt  parser context
2656
 * @param url  the URL or system ID for the entity to load
2657
 * @param publicId  the public ID for the entity to load (optional)
2658
 * @param type  resource type
2659
 * @returns the xmlParserInput or NULL in case of error.
2660
 */
2661
xmlParserInput *
2662
xmlLoadResource(xmlParserCtxt *ctxt, const char *url, const char *publicId,
2663
984
                xmlResourceType type) {
2664
984
    char *canonicFilename;
2665
984
    xmlParserInputPtr ret;
2666
2667
984
    if (url == NULL)
2668
0
        return(NULL);
2669
2670
984
    if ((ctxt != NULL) && (ctxt->resourceLoader != NULL)) {
2671
984
        char *resource = NULL;
2672
984
        void *userData;
2673
984
        xmlParserInputFlags flags = 0;
2674
984
        int code;
2675
2676
984
#ifdef LIBXML_CATALOG_ENABLED
2677
984
        resource = xmlCtxtResolveFromCatalog(ctxt, url, publicId);
2678
984
        if (resource != NULL)
2679
0
            url = resource;
2680
984
#endif
2681
2682
984
        if (ctxt->options & XML_PARSE_UNZIP)
2683
645
            flags |= XML_INPUT_UNZIP;
2684
984
        if ((ctxt->options & XML_PARSE_NONET) == 0)
2685
175
            flags |= XML_INPUT_NETWORK;
2686
2687
984
        userData = ctxt->resourceCtxt;
2688
984
        if (userData == NULL)
2689
984
            userData = ctxt;
2690
2691
984
        code = ctxt->resourceLoader(userData, url, publicId, type,
2692
984
                                    flags, &ret);
2693
984
        if (code != XML_ERR_OK) {
2694
5
            xmlCtxtErrIO(ctxt, code, url);
2695
5
            ret = NULL;
2696
5
        }
2697
984
        if (resource != NULL)
2698
0
            xmlFree(resource);
2699
984
        return(ret);
2700
984
    }
2701
2702
0
    canonicFilename = (char *) xmlCanonicPath((const xmlChar *) url);
2703
0
    if (canonicFilename == NULL) {
2704
0
        xmlCtxtErrMemory(ctxt);
2705
0
        return(NULL);
2706
0
    }
2707
2708
0
    ret = xmlCurrentExternalEntityLoader(canonicFilename, publicId, ctxt);
2709
0
    xmlFree(canonicFilename);
2710
0
    return(ret);
2711
0
}
2712
2713
/**
2714
 * `URL` is a filename or URL. If if contains the substring "://",
2715
 * it is assumed to be a Legacy Extended IRI. Otherwise, it is
2716
 * treated as a filesystem path.
2717
 *
2718
 * `publicId` is an optional XML public ID, typically from a doctype
2719
 * declaration. It is used for catalog lookups.
2720
 *
2721
 * If catalog lookup is enabled (default is yes) and URL or ID are
2722
 * found in system or local XML catalogs, URL is replaced with the
2723
 * result. Then the following resource loaders will be called if
2724
 * they were registered (in order of precedence):
2725
 *
2726
 * - the resource loader set with #xmlCtxtSetResourceLoader
2727
 * - the global external entity loader set with
2728
 *   #xmlSetExternalEntityLoader (without catalog resolution,
2729
 *   deprecated)
2730
 * - the per-thread #xmlParserInputBufferCreateFilenameFunc set with
2731
 *   #xmlParserInputBufferCreateFilenameDefault (deprecated)
2732
 * - the default loader which will return
2733
 *   - the result from a matching global input callback set with
2734
 *     #xmlRegisterInputCallbacks (deprecated)
2735
 *   - a file opened from the filesystem, with automatic detection
2736
 *     of compressed files if support is compiled in.
2737
 *
2738
 * @param URL  the URL or system ID for the entity to load
2739
 * @param publicId  the public ID for the entity to load (optional)
2740
 * @param ctxt  the context in which the entity is called or NULL
2741
 * @returns the xmlParserInput or NULL
2742
 */
2743
xmlParserInput *
2744
xmlLoadExternalEntity(const char *URL, const char *publicId,
2745
0
                      xmlParserCtxt *ctxt) {
2746
0
    return(xmlLoadResource(ctxt, URL, publicId, XML_RESOURCE_UNKNOWN));
2747
0
}
2748
2749
/************************************************************************
2750
 *                  *
2751
 *    Commodity functions to handle parser contexts   *
2752
 *                  *
2753
 ************************************************************************/
2754
2755
/**
2756
 * Initialize a SAX parser context
2757
 *
2758
 * @param ctxt  XML parser context
2759
 * @param sax  SAX handlert
2760
 * @param userData  user data
2761
 * @returns 0 in case of success and -1 in case of error
2762
 */
2763
2764
static int
2765
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
2766
                     void *userData)
2767
525
{
2768
525
    xmlParserInputPtr input;
2769
525
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2770
525
    size_t initialNodeTabSize = 1;
2771
#else
2772
    size_t initialNodeTabSize = 10;
2773
#endif
2774
2775
525
    if (ctxt == NULL)
2776
0
        return(-1);
2777
2778
525
    if (ctxt->dict == NULL)
2779
525
  ctxt->dict = xmlDictCreate();
2780
525
    if (ctxt->dict == NULL)
2781
0
  return(-1);
2782
2783
525
    if (ctxt->sax == NULL)
2784
525
  ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2785
525
    if (ctxt->sax == NULL)
2786
0
  return(-1);
2787
525
    if (sax == NULL) {
2788
525
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2789
525
        xmlSAXVersion(ctxt->sax, 2);
2790
525
        ctxt->userData = ctxt;
2791
525
    } else {
2792
0
  if (sax->initialized == XML_SAX2_MAGIC) {
2793
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
2794
0
        } else {
2795
0
      memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2796
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
2797
0
        }
2798
0
        ctxt->userData = userData ? userData : ctxt;
2799
0
    }
2800
2801
525
    ctxt->maxatts = 0;
2802
525
    ctxt->atts = NULL;
2803
    /* Allocate the Input stack */
2804
525
    if (ctxt->inputTab == NULL) {
2805
525
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2806
525
        size_t initialSize = 1;
2807
#else
2808
        size_t initialSize = 5;
2809
#endif
2810
2811
525
  ctxt->inputTab = xmlMalloc(initialSize * sizeof(xmlParserInputPtr));
2812
525
  ctxt->inputMax = initialSize;
2813
525
    }
2814
525
    if (ctxt->inputTab == NULL)
2815
0
  return(-1);
2816
525
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
2817
0
        xmlFreeInputStream(input);
2818
0
    }
2819
525
    ctxt->inputNr = 0;
2820
525
    ctxt->input = NULL;
2821
2822
525
    ctxt->version = NULL;
2823
525
    ctxt->encoding = NULL;
2824
525
    ctxt->standalone = -1;
2825
525
    ctxt->hasExternalSubset = 0;
2826
525
    ctxt->hasPErefs = 0;
2827
525
    ctxt->html = 0;
2828
525
    ctxt->instate = XML_PARSER_START;
2829
2830
    /* Allocate the Node stack */
2831
525
    if (ctxt->nodeTab == NULL) {
2832
525
  ctxt->nodeTab = xmlMalloc(initialNodeTabSize * sizeof(xmlNodePtr));
2833
525
  ctxt->nodeMax = initialNodeTabSize;
2834
525
    }
2835
525
    if (ctxt->nodeTab == NULL)
2836
0
  return(-1);
2837
525
    ctxt->nodeNr = 0;
2838
525
    ctxt->node = NULL;
2839
2840
    /* Allocate the Name stack */
2841
525
    if (ctxt->nameTab == NULL) {
2842
525
  ctxt->nameTab = xmlMalloc(initialNodeTabSize * sizeof(xmlChar *));
2843
525
  ctxt->nameMax = initialNodeTabSize;
2844
525
    }
2845
525
    if (ctxt->nameTab == NULL)
2846
0
  return(-1);
2847
525
    ctxt->nameNr = 0;
2848
525
    ctxt->name = NULL;
2849
2850
    /* Allocate the space stack */
2851
525
    if (ctxt->spaceTab == NULL) {
2852
525
  ctxt->spaceTab = xmlMalloc(initialNodeTabSize * sizeof(int));
2853
525
  ctxt->spaceMax = initialNodeTabSize;
2854
525
    }
2855
525
    if (ctxt->spaceTab == NULL)
2856
0
  return(-1);
2857
525
    ctxt->spaceNr = 1;
2858
525
    ctxt->spaceTab[0] = -1;
2859
525
    ctxt->space = &ctxt->spaceTab[0];
2860
525
    ctxt->myDoc = NULL;
2861
525
    ctxt->wellFormed = 1;
2862
525
    ctxt->nsWellFormed = 1;
2863
525
    ctxt->valid = 1;
2864
2865
525
    ctxt->options = XML_PARSE_NODICT;
2866
2867
    /*
2868
     * Initialize some parser options from deprecated global variables.
2869
     * Note that the "modern" API taking options arguments or
2870
     * xmlCtxtSetOptions will ignore these defaults. They're only
2871
     * relevant if old API functions like xmlParseFile are used.
2872
     */
2873
525
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2874
525
    if (ctxt->loadsubset) {
2875
0
        ctxt->options |= XML_PARSE_DTDLOAD;
2876
0
    }
2877
525
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
2878
525
    if (ctxt->validate) {
2879
0
        ctxt->options |= XML_PARSE_DTDVALID;
2880
0
    }
2881
525
    ctxt->pedantic = xmlPedanticParserDefaultValue;
2882
525
    if (ctxt->pedantic) {
2883
0
        ctxt->options |= XML_PARSE_PEDANTIC;
2884
0
    }
2885
525
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2886
525
    if (ctxt->keepBlanks == 0) {
2887
0
  ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
2888
0
  ctxt->options |= XML_PARSE_NOBLANKS;
2889
0
    }
2890
525
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2891
525
    if (ctxt->replaceEntities) {
2892
0
        ctxt->options |= XML_PARSE_NOENT;
2893
0
    }
2894
525
    if (xmlGetWarningsDefaultValue == 0)
2895
0
        ctxt->options |= XML_PARSE_NOWARNING;
2896
2897
525
    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
2898
525
    ctxt->vctxt.userData = ctxt;
2899
525
    ctxt->vctxt.error = xmlParserValidityError;
2900
525
    ctxt->vctxt.warning = xmlParserValidityWarning;
2901
2902
525
    ctxt->record_info = 0;
2903
525
    ctxt->checkIndex = 0;
2904
525
    ctxt->inSubset = 0;
2905
525
    ctxt->errNo = XML_ERR_OK;
2906
525
    ctxt->depth = 0;
2907
525
    ctxt->catalogs = NULL;
2908
525
    ctxt->sizeentities = 0;
2909
525
    ctxt->sizeentcopy = 0;
2910
525
    ctxt->input_id = 1;
2911
525
    ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
2912
525
    xmlInitNodeInfoSeq(&ctxt->node_seq);
2913
2914
525
    if (ctxt->nsdb == NULL) {
2915
525
        ctxt->nsdb = xmlParserNsCreate();
2916
525
        if (ctxt->nsdb == NULL)
2917
0
            return(-1);
2918
525
    }
2919
2920
525
    return(0);
2921
525
}
2922
2923
/**
2924
 * Initialize a parser context
2925
 *
2926
 * @deprecated Internal function which will be made private in a future
2927
 * version.
2928
 *
2929
 * @param ctxt  an XML parser context
2930
 * @returns 0 in case of success and -1 in case of error
2931
 */
2932
2933
int
2934
xmlInitParserCtxt(xmlParserCtxt *ctxt)
2935
0
{
2936
0
    return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
2937
0
}
2938
2939
/**
2940
 * Free all the memory used by a parser context. However the parsed
2941
 * document in ctxt->myDoc is not freed.
2942
 *
2943
 * @param ctxt  an XML parser context
2944
 */
2945
2946
void
2947
xmlFreeParserCtxt(xmlParserCtxt *ctxt)
2948
525
{
2949
525
    xmlParserInputPtr input;
2950
2951
525
    if (ctxt == NULL) return;
2952
2953
700
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
2954
175
        xmlFreeInputStream(input);
2955
175
    }
2956
525
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2957
525
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
2958
525
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2959
525
    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
2960
525
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2961
525
    if (ctxt->version != NULL) xmlFree(ctxt->version);
2962
525
    if (ctxt->encoding != NULL) xmlFree(ctxt->encoding);
2963
525
    if (ctxt->extSubURI != NULL) xmlFree(ctxt->extSubURI);
2964
525
    if (ctxt->extSubSystem != NULL) xmlFree(ctxt->extSubSystem);
2965
525
#ifdef LIBXML_SAX1_ENABLED
2966
525
    if ((ctxt->sax != NULL) &&
2967
525
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
2968
#else
2969
    if (ctxt->sax != NULL)
2970
#endif /* LIBXML_SAX1_ENABLED */
2971
525
        xmlFree(ctxt->sax);
2972
525
    if (ctxt->directory != NULL) xmlFree(ctxt->directory);
2973
525
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2974
525
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
2975
525
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
2976
525
    if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
2977
525
    if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
2978
525
    if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
2979
525
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2980
525
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2981
525
    if (ctxt->attsDefault != NULL)
2982
36
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2983
525
    if (ctxt->attsSpecial != NULL)
2984
105
        xmlHashFree(ctxt->attsSpecial, NULL);
2985
525
    if (ctxt->freeElems != NULL) {
2986
0
        xmlNodePtr cur, next;
2987
2988
0
  cur = ctxt->freeElems;
2989
0
  while (cur != NULL) {
2990
0
      next = cur->next;
2991
0
      xmlFree(cur);
2992
0
      cur = next;
2993
0
  }
2994
0
    }
2995
525
    if (ctxt->freeAttrs != NULL) {
2996
0
        xmlAttrPtr cur, next;
2997
2998
0
  cur = ctxt->freeAttrs;
2999
0
  while (cur != NULL) {
3000
0
      next = cur->next;
3001
0
      xmlFree(cur);
3002
0
      cur = next;
3003
0
  }
3004
0
    }
3005
    /*
3006
     * cleanup the error strings
3007
     */
3008
525
    if (ctxt->lastError.message != NULL)
3009
512
        xmlFree(ctxt->lastError.message);
3010
525
    if (ctxt->lastError.file != NULL)
3011
481
        xmlFree(ctxt->lastError.file);
3012
525
    if (ctxt->lastError.str1 != NULL)
3013
196
        xmlFree(ctxt->lastError.str1);
3014
525
    if (ctxt->lastError.str2 != NULL)
3015
100
        xmlFree(ctxt->lastError.str2);
3016
525
    if (ctxt->lastError.str3 != NULL)
3017
0
        xmlFree(ctxt->lastError.str3);
3018
3019
525
#ifdef LIBXML_CATALOG_ENABLED
3020
525
    if (ctxt->catalogs != NULL)
3021
0
  xmlCatalogFreeLocal(ctxt->catalogs);
3022
525
#endif
3023
525
    xmlFree(ctxt);
3024
525
}
3025
3026
/**
3027
 * Allocate and initialize a new parser context.
3028
 *
3029
 * @returns the xmlParserCtxt or NULL
3030
 */
3031
3032
xmlParserCtxt *
3033
xmlNewParserCtxt(void)
3034
350
{
3035
350
    return(xmlNewSAXParserCtxt(NULL, NULL));
3036
350
}
3037
3038
/**
3039
 * Allocate and initialize a new SAX parser context. If userData is NULL,
3040
 * the parser context will be passed as user data.
3041
 *
3042
 * @since 2.11.0
3043
 *
3044
 * If you want support older versions,
3045
 * it's best to invoke #xmlNewParserCtxt and set ctxt->sax with
3046
 * struct assignment.
3047
 *
3048
 * @param sax  SAX handler
3049
 * @param userData  user data
3050
 * @returns the xmlParserCtxt or NULL if memory allocation failed.
3051
 */
3052
3053
xmlParserCtxt *
3054
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
3055
525
{
3056
525
    xmlParserCtxtPtr ctxt;
3057
3058
525
    xmlInitParser();
3059
3060
525
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
3061
525
    if (ctxt == NULL)
3062
0
  return(NULL);
3063
525
    memset(ctxt, 0, sizeof(xmlParserCtxt));
3064
525
    if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
3065
0
        xmlFreeParserCtxt(ctxt);
3066
0
  return(NULL);
3067
0
    }
3068
525
    return(ctxt);
3069
525
}
3070
3071
/**
3072
 * @since 2.14.0
3073
 *
3074
 * @param ctxt  parser context
3075
 * @returns the private application data.
3076
 */
3077
void *
3078
0
xmlCtxtGetPrivate(xmlParserCtxt *ctxt) {
3079
0
    if (ctxt == NULL)
3080
0
        return(NULL);
3081
3082
0
    return(ctxt->_private);
3083
0
}
3084
3085
/**
3086
 * Set the private application data.
3087
 *
3088
 * @since 2.14.0
3089
 *
3090
 * @param ctxt  parser context
3091
 * @param priv  private application data
3092
 */
3093
void
3094
0
xmlCtxtSetPrivate(xmlParserCtxt *ctxt, void *priv) {
3095
0
    if (ctxt == NULL)
3096
0
        return;
3097
3098
0
    ctxt->_private = priv;
3099
0
}
3100
3101
/**
3102
 * @since 2.14.0
3103
 *
3104
 * @param ctxt  parser context
3105
 * @returns the local catalogs.
3106
 */
3107
void *
3108
0
xmlCtxtGetCatalogs(xmlParserCtxt *ctxt) {
3109
0
    if (ctxt == NULL)
3110
0
        return(NULL);
3111
3112
0
    return(ctxt->catalogs);
3113
0
}
3114
3115
/**
3116
 * Set the local catalogs.
3117
 *
3118
 * @since 2.14.0
3119
 *
3120
 * @param ctxt  parser context
3121
 * @param catalogs  catalogs pointer
3122
 */
3123
void
3124
0
xmlCtxtSetCatalogs(xmlParserCtxt *ctxt, void *catalogs) {
3125
0
    if (ctxt == NULL)
3126
0
        return;
3127
3128
0
    ctxt->catalogs = catalogs;
3129
0
}
3130
3131
/**
3132
 * @since 2.14.0
3133
 *
3134
 * @param ctxt  parser context
3135
 * @returns the dictionary.
3136
 */
3137
xmlDict *
3138
0
xmlCtxtGetDict(xmlParserCtxt *ctxt) {
3139
0
    if (ctxt == NULL)
3140
0
        return(NULL);
3141
3142
0
    return(ctxt->dict);
3143
0
}
3144
3145
/**
3146
 * Set the dictionary. This should only be done immediately after
3147
 * creating a parser context.
3148
 *
3149
 * @since 2.14.0
3150
 *
3151
 * @param ctxt  parser context
3152
 * @param dict  dictionary
3153
 */
3154
void
3155
0
xmlCtxtSetDict(xmlParserCtxt *ctxt, xmlDict *dict) {
3156
0
    if (ctxt == NULL)
3157
0
        return;
3158
3159
0
    if (ctxt->dict != NULL)
3160
0
        xmlDictFree(ctxt->dict);
3161
3162
0
    xmlDictReference(dict);
3163
0
    ctxt->dict = dict;
3164
0
}
3165
3166
/**
3167
 * @since 2.14.0
3168
 *
3169
 * @param ctxt  parser context
3170
 * @returns the SAX handler struct. This is not a copy and must not
3171
 * be freed. Handlers can be updated.
3172
 */
3173
xmlSAXHandler *
3174
0
xmlCtxtGetSaxHandler(xmlParserCtxt *ctxt) {
3175
0
    if (ctxt == NULL)
3176
0
        return(NULL);
3177
3178
0
    return(ctxt->sax);
3179
0
}
3180
3181
/**
3182
 * Set the SAX handler struct to a copy of `sax`.
3183
 *
3184
 * @since 2.14.0
3185
 *
3186
 * @param ctxt  parser context
3187
 * @param sax  SAX handler
3188
 * @returns 0 on success or -1 if arguments are invalid or a memory
3189
 * allocation failed.
3190
 */
3191
int
3192
0
xmlCtxtSetSaxHandler(xmlParserCtxt *ctxt, const xmlSAXHandler *sax) {
3193
0
    xmlSAXHandler *copy;
3194
3195
0
    if ((ctxt == NULL) || (sax == NULL))
3196
0
        return(-1);
3197
3198
0
    copy = xmlMalloc(sizeof(*copy));
3199
0
    if (copy == NULL)
3200
0
        return(-1);
3201
3202
0
    memcpy(copy, sax, sizeof(*copy));
3203
0
    ctxt->sax = copy;
3204
3205
0
    return(0);
3206
0
}
3207
3208
/**
3209
 * @since 2.14.0
3210
 *
3211
 * @param ctxt  parser context
3212
 * @returns the parsed document or NULL if a fatal error occurred when
3213
 * parsing. The document must be freed by the caller. Resets the
3214
 * context's document to NULL.
3215
 */
3216
xmlDoc *
3217
350
xmlCtxtGetDocument(xmlParserCtxt *ctxt) {
3218
350
    xmlDocPtr doc;
3219
3220
350
    if (ctxt == NULL)
3221
0
        return(NULL);
3222
3223
350
    if ((ctxt->wellFormed) ||
3224
350
        (((ctxt->recovery) || (ctxt->html)) &&
3225
350
         (!xmlCtxtIsCatastrophicError(ctxt)))) {
3226
261
        doc = ctxt->myDoc;
3227
261
    } else {
3228
89
        if (ctxt->errNo == XML_ERR_OK)
3229
0
            xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error");
3230
89
        doc = NULL;
3231
89
        xmlFreeDoc(ctxt->myDoc);
3232
89
    }
3233
350
    ctxt->myDoc = NULL;
3234
3235
350
    return(doc);
3236
350
}
3237
3238
/**
3239
 * @since 2.14.0
3240
 *
3241
 * @param ctxt  parser context
3242
 * @returns 1 if this is a HTML parser context, 0 otherwise.
3243
 */
3244
int
3245
0
xmlCtxtIsHtml(xmlParserCtxt *ctxt) {
3246
0
    if (ctxt == NULL)
3247
0
        return(0);
3248
3249
0
    return(ctxt->html ? 1 : 0);
3250
0
}
3251
3252
/**
3253
 * Check whether the parser is stopped.
3254
 *
3255
 * The parser is stopped on fatal (non-wellformedness) errors or
3256
 * on user request with #xmlStopParser.
3257
 *
3258
 * @since 2.14.0
3259
 *
3260
 * @param ctxt  parser context
3261
 * @returns 1 if the parser is stopped, 0 otherwise.
3262
 */
3263
int
3264
0
xmlCtxtIsStopped(xmlParserCtxt *ctxt) {
3265
0
    if (ctxt == NULL)
3266
0
        return(0);
3267
3268
0
    return(ctxt->disableSAX != 0);
3269
0
}
3270
3271
/**
3272
 * Check whether a DTD subset is being parsed.
3273
 *
3274
 * Should only be used by SAX callbacks.
3275
 *
3276
 * Return values are
3277
 *
3278
 * - 0: not in DTD
3279
 * - 1: in internal DTD subset
3280
 * - 2: in external DTD subset
3281
 *
3282
 * @since 2.15.0
3283
 *
3284
 * @param ctxt  parser context
3285
 * @returns the subset status
3286
 */
3287
int
3288
0
xmlCtxtIsInSubset(xmlParserCtxt *ctxt) {
3289
0
    if (ctxt == NULL)
3290
0
        return(0);
3291
3292
0
    return(ctxt->inSubset);
3293
0
}
3294
3295
#ifdef LIBXML_VALID_ENABLED
3296
/**
3297
 * @since 2.14.0
3298
 *
3299
 * @param ctxt  parser context
3300
 * @returns the validation context.
3301
 */
3302
xmlValidCtxt *
3303
0
xmlCtxtGetValidCtxt(xmlParserCtxt *ctxt) {
3304
0
    if (ctxt == NULL)
3305
0
        return(NULL);
3306
3307
0
    return(&ctxt->vctxt);
3308
0
}
3309
#endif
3310
3311
/**
3312
 * Return user data.
3313
 *
3314
 * Return user data of a custom SAX parser or the parser context
3315
 * itself if unset.
3316
 *
3317
 * @since 2.15.0
3318
 *
3319
 * @param ctxt  parser context
3320
 * @returns the user data.
3321
 */
3322
void *
3323
0
xmlCtxtGetUserData(xmlParserCtxt *ctxt) {
3324
0
    if (ctxt == NULL)
3325
0
        return NULL;
3326
3327
0
    return ctxt->userData;
3328
0
}
3329
3330
/**
3331
 * Return the current node being parsed.
3332
 *
3333
 * This is only useful if the default SAX callbacks which build
3334
 * a document tree are intercepted. This mode of operation is
3335
 * fragile and discouraged.
3336
 *
3337
 * Returns the current element node, or the document node if no
3338
 * element was parsed yet.
3339
 *
3340
 * @since 2.15.0
3341
 *
3342
 * @param ctxt  parser context
3343
 * @returns the current node.
3344
 */
3345
xmlNode *
3346
0
xmlCtxtGetNode(xmlParserCtxt *ctxt) {
3347
0
    if (ctxt == NULL)
3348
0
        return NULL;
3349
3350
0
    if (ctxt->node != NULL)
3351
0
        return ctxt->node;
3352
0
    return (xmlNode *) ctxt->myDoc;
3353
0
}
3354
3355
/**
3356
 * Return data from the doctype declaration.
3357
 *
3358
 * Should only be used by SAX callbacks.
3359
 *
3360
 * @since 2.15.0
3361
 *
3362
 * @param ctxt  parser context
3363
 * @param name  name of the root element (output)
3364
 * @param systemId  system ID (URI) of the external subset (output)
3365
 * @param publicId  public ID of the external subset (output)
3366
 * @returns 0 on success, -1 if argument is invalid
3367
 */
3368
int
3369
xmlCtxtGetDocTypeDecl(xmlParserCtxt *ctxt,
3370
                      const xmlChar **name,
3371
                      const xmlChar **systemId,
3372
0
                      const xmlChar **publicId) {
3373
0
    if (ctxt == NULL)
3374
0
        return -1;
3375
3376
0
    if (name != NULL)
3377
0
        *name = ctxt->intSubName;
3378
0
    if (systemId != NULL)
3379
0
        *systemId = ctxt->extSubURI;
3380
0
    if (publicId != NULL)
3381
0
        *publicId = ctxt->extSubSystem; /* The member is misnamed */
3382
3383
0
    return 0;
3384
0
}
3385
3386
/**
3387
 * Return input position.
3388
 *
3389
 * Should only be used by error handlers or SAX callbacks.
3390
 *
3391
 * Because of entities, there can be multiple inputs. Non-negative
3392
 * values of `inputIndex` (0, 1, 2, ...)  select inputs starting
3393
 * from the outermost input. Negative values (-1, -2, ...) select
3394
 * inputs starting from the innermost input.
3395
 *
3396
 * The byte position is counted in possibly decoded UTF-8 bytes,
3397
 * so it won't match the position in the raw input data.
3398
 *
3399
 * @since 2.15.0
3400
 *
3401
 * @param ctxt  parser context
3402
 * @param inputIndex  input index
3403
 * @param filename  filename (output)
3404
 * @param line  line number (output)
3405
 * @param col  column number (output)
3406
 * @param utf8BytePos  byte position (output)
3407
 * @returns 0 on success, -1 if arguments are invalid
3408
 */
3409
int
3410
xmlCtxtGetInputPosition(xmlParserCtxt *ctxt, int inputIndex,
3411
                        const char **filename, int *line, int *col,
3412
0
                        unsigned long *utf8BytePos) {
3413
0
    xmlParserInput *input;
3414
3415
0
    if (ctxt == NULL)
3416
0
        return -1;
3417
3418
0
    if (inputIndex < 0) {
3419
0
        inputIndex += ctxt->inputNr;
3420
0
        if (inputIndex < 0)
3421
0
            return -1;
3422
0
    }
3423
0
    if (inputIndex >= ctxt->inputNr)
3424
0
        return -1;
3425
3426
0
    input = ctxt->inputTab[inputIndex];
3427
3428
0
    if (filename != NULL)
3429
0
        *filename = input->filename;
3430
0
    if (line != NULL)
3431
0
        *line = input->line;
3432
0
    if (col != NULL)
3433
0
        *col = input->col;
3434
3435
0
    if (utf8BytePos != NULL) {
3436
0
        unsigned long consumed;
3437
3438
0
        consumed = input->consumed;
3439
0
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
3440
0
        *utf8BytePos = consumed;
3441
0
    }
3442
3443
0
    return 0;
3444
0
}
3445
3446
/**
3447
 * Return window into input data.
3448
 *
3449
 * Should only be used by error handlers or SAX callbacks.
3450
 * The returned pointer is only valid until the callback returns.
3451
 *
3452
 * Because of entities, there can be multiple inputs. Non-negative
3453
 * values of `inputIndex` (0, 1, 2, ...)  select inputs starting
3454
 * from the outermost input. Negative values (-1, -2, ...) select
3455
 * inputs starting from the innermost input.
3456
 *
3457
 * @since 2.15.0
3458
 *
3459
 * @param ctxt  parser context
3460
 * @param inputIndex  input index
3461
 * @param startOut  start of window (output)
3462
 * @param sizeInOut  maximum size of window (in)
3463
 *                   actual size of window (out)
3464
 * @param offsetOut  offset of current position inside
3465
 *                   window (out)
3466
 * @returns 0 on success, -1 if arguments are invalid
3467
 */
3468
int
3469
xmlCtxtGetInputWindow(xmlParserCtxt *ctxt, int inputIndex,
3470
                      const xmlChar **startOut,
3471
0
                      int *sizeInOut, int *offsetOut) {
3472
0
    xmlParserInput *input;
3473
3474
0
    if (ctxt == NULL || startOut == NULL || sizeInOut == NULL ||
3475
0
        offsetOut == NULL)
3476
0
        return -1;
3477
3478
0
    if (inputIndex < 0) {
3479
0
        inputIndex += ctxt->inputNr;
3480
0
        if (inputIndex < 0)
3481
0
            return -1;
3482
0
    }
3483
0
    if (inputIndex >= ctxt->inputNr)
3484
0
        return -1;
3485
3486
0
    input = ctxt->inputTab[inputIndex];
3487
3488
0
    xmlParserInputGetWindow(input, startOut, sizeInOut, offsetOut);
3489
3490
0
    return 0;
3491
0
}
3492
3493
/************************************************************************
3494
 *                  *
3495
 *    Handling of node information        *
3496
 *                  *
3497
 ************************************************************************/
3498
3499
/**
3500
 * Same as #xmlCtxtReset
3501
 *
3502
 * @deprecated Use #xmlCtxtReset
3503
 *
3504
 * @param ctxt  an XML parser context
3505
 */
3506
void
3507
xmlClearParserCtxt(xmlParserCtxt *ctxt)
3508
0
{
3509
0
    xmlCtxtReset(ctxt);
3510
0
}
3511
3512
3513
/**
3514
 * Find the parser node info struct for a given node
3515
 *
3516
 * @deprecated Don't use.
3517
 *
3518
 * @param ctx  an XML parser context
3519
 * @param node  an XML node within the tree
3520
 * @returns an xmlParserNodeInfo block pointer or NULL
3521
 */
3522
const xmlParserNodeInfo *
3523
xmlParserFindNodeInfo(xmlParserCtxt *ctx, xmlNode *node)
3524
0
{
3525
0
    unsigned long pos;
3526
3527
0
    if ((ctx == NULL) || (node == NULL))
3528
0
        return (NULL);
3529
    /* Find position where node should be at */
3530
0
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
3531
0
    if (pos < ctx->node_seq.length
3532
0
        && ctx->node_seq.buffer[pos].node == node)
3533
0
        return &ctx->node_seq.buffer[pos];
3534
0
    else
3535
0
        return NULL;
3536
0
}
3537
3538
3539
/**
3540
 * Initialize (set to initial state) node info sequence
3541
 *
3542
 * @deprecated Don't use.
3543
 *
3544
 * @param seq  a node info sequence pointer
3545
 */
3546
void
3547
xmlInitNodeInfoSeq(xmlParserNodeInfoSeq *seq)
3548
1.00k
{
3549
1.00k
    if (seq == NULL)
3550
0
        return;
3551
1.00k
    seq->length = 0;
3552
1.00k
    seq->maximum = 0;
3553
1.00k
    seq->buffer = NULL;
3554
1.00k
}
3555
3556
/**
3557
 * Clear (release memory and reinitialize) node info sequence
3558
 *
3559
 * @deprecated Don't use.
3560
 *
3561
 * @param seq  a node info sequence pointer
3562
 */
3563
void
3564
xmlClearNodeInfoSeq(xmlParserNodeInfoSeq *seq)
3565
0
{
3566
0
    if (seq == NULL)
3567
0
        return;
3568
0
    if (seq->buffer != NULL)
3569
0
        xmlFree(seq->buffer);
3570
0
    xmlInitNodeInfoSeq(seq);
3571
0
}
3572
3573
/**
3574
 * Find the index that the info record for the given node is or
3575
 * should be at in a sorted sequence.
3576
 *
3577
 * @deprecated Don't use.
3578
 *
3579
 * @param seq  a node info sequence pointer
3580
 * @param node  an XML node pointer
3581
 * @returns a long indicating the position of the record
3582
 */
3583
unsigned long
3584
xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeq *seq,
3585
                           xmlNode *node)
3586
0
{
3587
0
    unsigned long upper, lower, middle;
3588
0
    int found = 0;
3589
3590
0
    if ((seq == NULL) || (node == NULL))
3591
0
        return ((unsigned long) -1);
3592
3593
    /* Do a binary search for the key */
3594
0
    lower = 1;
3595
0
    upper = seq->length;
3596
0
    middle = 0;
3597
0
    while (lower <= upper && !found) {
3598
0
        middle = lower + (upper - lower) / 2;
3599
0
        if (node == seq->buffer[middle - 1].node)
3600
0
            found = 1;
3601
0
        else if (node < seq->buffer[middle - 1].node)
3602
0
            upper = middle - 1;
3603
0
        else
3604
0
            lower = middle + 1;
3605
0
    }
3606
3607
    /* Return position */
3608
0
    if (middle == 0 || seq->buffer[middle - 1].node < node)
3609
0
        return middle;
3610
0
    else
3611
0
        return middle - 1;
3612
0
}
3613
3614
3615
/**
3616
 * Insert node info record into the sorted sequence
3617
 *
3618
 * @deprecated Don't use.
3619
 *
3620
 * @param ctxt  an XML parser context
3621
 * @param info  a node info sequence pointer
3622
 */
3623
void
3624
xmlParserAddNodeInfo(xmlParserCtxt *ctxt,
3625
                     xmlParserNodeInfo *info)
3626
0
{
3627
0
    unsigned long pos;
3628
3629
0
    if ((ctxt == NULL) || (info == NULL)) return;
3630
3631
    /* Find pos and check to see if node is already in the sequence */
3632
0
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
3633
0
                                     info->node);
3634
3635
0
    if ((pos < ctxt->node_seq.length) &&
3636
0
        (ctxt->node_seq.buffer != NULL) &&
3637
0
        (ctxt->node_seq.buffer[pos].node == info->node)) {
3638
0
        ctxt->node_seq.buffer[pos] = *info;
3639
0
    }
3640
3641
    /* Otherwise, we need to add new node to buffer */
3642
0
    else {
3643
0
        if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
3644
0
            xmlParserNodeInfo *tmp;
3645
0
            int newSize;
3646
3647
0
            newSize = xmlGrowCapacity(ctxt->node_seq.maximum, sizeof(tmp[0]),
3648
0
                                      4, XML_MAX_ITEMS);
3649
0
            if (newSize < 0) {
3650
0
    xmlCtxtErrMemory(ctxt);
3651
0
                return;
3652
0
            }
3653
0
            tmp = xmlRealloc(ctxt->node_seq.buffer, newSize * sizeof(tmp[0]));
3654
0
            if (tmp == NULL) {
3655
0
    xmlCtxtErrMemory(ctxt);
3656
0
                return;
3657
0
            }
3658
0
            ctxt->node_seq.buffer = tmp;
3659
0
            ctxt->node_seq.maximum = newSize;
3660
0
        }
3661
3662
        /* If position is not at end, move elements out of the way */
3663
0
        if (pos != ctxt->node_seq.length) {
3664
0
            unsigned long i;
3665
3666
0
            for (i = ctxt->node_seq.length; i > pos; i--)
3667
0
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
3668
0
        }
3669
3670
        /* Copy element and increase length */
3671
0
        ctxt->node_seq.buffer[pos] = *info;
3672
0
        ctxt->node_seq.length++;
3673
0
    }
3674
0
}
3675
3676
/************************************************************************
3677
 *                  *
3678
 *    Defaults settings         *
3679
 *                  *
3680
 ************************************************************************/
3681
/**
3682
 * Set and return the previous value for enabling pedantic warnings.
3683
 *
3684
 * @deprecated Use the modern options API with XML_PARSE_PEDANTIC.
3685
 *
3686
 * @param val  int 0 or 1
3687
 * @returns the last value for 0 for no substitution, 1 for substitution.
3688
 */
3689
3690
int
3691
0
xmlPedanticParserDefault(int val) {
3692
0
    int old = xmlPedanticParserDefaultValue;
3693
3694
0
    xmlPedanticParserDefaultValue = val;
3695
0
    return(old);
3696
0
}
3697
3698
/**
3699
 * Has no effect.
3700
 *
3701
 * @deprecated Line numbers are always enabled.
3702
 *
3703
 * @param val  int 0 or 1
3704
 * @returns 1
3705
 */
3706
3707
int
3708
0
xmlLineNumbersDefault(int val ATTRIBUTE_UNUSED) {
3709
0
    return(1);
3710
0
}
3711
3712
/**
3713
 * Set and return the previous value for default entity support.
3714
 *
3715
 * @deprecated Use the modern options API with XML_PARSE_NOENT.
3716
 *
3717
 * @param val  int 0 or 1
3718
 * @returns the last value for 0 for no substitution, 1 for substitution.
3719
 */
3720
3721
int
3722
0
xmlSubstituteEntitiesDefault(int val) {
3723
0
    int old = xmlSubstituteEntitiesDefaultValue;
3724
3725
0
    xmlSubstituteEntitiesDefaultValue = val;
3726
0
    return(old);
3727
0
}
3728
3729
/**
3730
 * Set and return the previous value for default blanks text nodes support.
3731
 *
3732
 * @deprecated Use the modern options API with XML_PARSE_NOBLANKS.
3733
 *
3734
 * @param val  int 0 or 1
3735
 * @returns the last value for 0 for no substitution, 1 for substitution.
3736
 */
3737
3738
int
3739
0
xmlKeepBlanksDefault(int val) {
3740
0
    int old = xmlKeepBlanksDefaultValue;
3741
3742
0
    xmlKeepBlanksDefaultValue = val;
3743
0
#ifdef LIBXML_OUTPUT_ENABLED
3744
0
    if (!val)
3745
0
        xmlIndentTreeOutput = 1;
3746
0
#endif
3747
0
    return(old);
3748
0
}
3749