Coverage Report

Created: 2025-06-22 06:55

/src/libxml2/parserInternals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * Author: Daniel Veillard
8
 */
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22
23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/entities.h>
28
#include <libxml/xmlerror.h>
29
#include <libxml/encoding.h>
30
#include <libxml/xmlIO.h>
31
#include <libxml/uri.h>
32
#include <libxml/dict.h>
33
#include <libxml/xmlsave.h>
34
#ifdef LIBXML_CATALOG_ENABLED
35
#include <libxml/catalog.h>
36
#endif
37
#include <libxml/chvalid.h>
38
39
#define CUR(ctxt) ctxt->input->cur
40
#define END(ctxt) ctxt->input->end
41
42
#include "private/buf.h"
43
#include "private/enc.h"
44
#include "private/error.h"
45
#include "private/globals.h"
46
#include "private/io.h"
47
#include "private/memory.h"
48
#include "private/parser.h"
49
50
#ifndef SIZE_MAX
51
  #define SIZE_MAX ((size_t) -1)
52
#endif
53
54
15.0M
#define XML_MAX_ERRORS 100
55
56
/*
57
 * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
58
 * factor of serialized output after entity expansion.
59
 */
60
15.1k
#define XML_MAX_AMPLIFICATION_DEFAULT 5
61
62
/*
63
 * Various global defaults for parsing
64
 */
65
66
/**
67
 * check the compiled lib version against the include one.
68
 *
69
 * @param version  the include version number
70
 */
71
void
72
0
xmlCheckVersion(int version) {
73
0
    int myversion = LIBXML_VERSION;
74
75
0
    xmlInitParser();
76
77
0
    if ((myversion / 10000) != (version / 10000)) {
78
0
  xmlPrintErrorMessage(
79
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
80
0
    (version / 10000), (myversion / 10000));
81
0
    } else if ((myversion / 100) < (version / 100)) {
82
0
  xmlPrintErrorMessage(
83
0
    "Warning: program compiled against libxml %d using older %d\n",
84
0
    (version / 100), (myversion / 100));
85
0
    }
86
0
}
87
88
89
/************************************************************************
90
 *                  *
91
 *    Some factorized error routines        *
92
 *                  *
93
 ************************************************************************/
94
95
96
/**
97
 * Register a callback function that will be called on errors and
98
 * warnings. If handler is NULL, the error handler will be deactivated.
99
 *
100
 * This is the recommended way to collect errors from the parser and
101
 * takes precedence over all other error reporting mechanisms.
102
 * These are (in order of precedence):
103
 *
104
 * - per-context structured handler (#xmlCtxtSetErrorHandler)
105
 * - per-context structured "serror" SAX handler
106
 * - global structured handler (#xmlSetStructuredErrorFunc)
107
 * - per-context generic "error" and "warning" SAX handlers
108
 * - global generic handler (#xmlSetGenericErrorFunc)
109
 * - print to stderr
110
 *
111
 * @since 2.13.0
112
 * @param ctxt  an XML parser context
113
 * @param handler  error handler
114
 * @param data  data for error handler
115
 */
116
void
117
xmlCtxtSetErrorHandler(xmlParserCtxt *ctxt, xmlStructuredErrorFunc handler,
118
                       void *data)
119
0
{
120
0
    if (ctxt == NULL)
121
0
        return;
122
0
    ctxt->errorHandler = handler;
123
0
    ctxt->errorCtxt = data;
124
0
}
125
126
/**
127
 * Get the last error raised.
128
 *
129
 * Note that the XML parser typically doesn't stop after
130
 * encountering an error and will often report multiple errors.
131
 * Most of the time, the last error isn't useful. Future
132
 * versions might return the first parser error instead.
133
 *
134
 * @param ctx  an XML parser context
135
 * @returns NULL if no error occurred or a pointer to the error
136
 */
137
const xmlError *
138
xmlCtxtGetLastError(void *ctx)
139
15.0k
{
140
15.0k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
141
142
15.0k
    if (ctxt == NULL)
143
0
        return (NULL);
144
15.0k
    if (ctxt->lastError.code == XML_ERR_OK)
145
0
        return (NULL);
146
15.0k
    return (&ctxt->lastError);
147
15.0k
}
148
149
/**
150
 * Reset the last parser error to success. This does not change
151
 * the well-formedness status.
152
 *
153
 * @param ctx  an XML parser context
154
 */
155
void
156
xmlCtxtResetLastError(void *ctx)
157
0
{
158
0
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
159
160
0
    if (ctxt == NULL)
161
0
        return;
162
0
    ctxt->errNo = XML_ERR_OK;
163
0
    if (ctxt->lastError.code == XML_ERR_OK)
164
0
        return;
165
0
    xmlResetError(&ctxt->lastError);
166
0
}
167
168
/**
169
 * Handle an out-of-memory error.
170
 *
171
 * @since 2.13.0
172
 * @param ctxt  an XML parser context
173
 */
174
void
175
xmlCtxtErrMemory(xmlParserCtxt *ctxt)
176
0
{
177
0
    xmlStructuredErrorFunc schannel = NULL;
178
0
    xmlGenericErrorFunc channel = NULL;
179
0
    void *data;
180
181
0
    if (ctxt == NULL) {
182
0
        xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_PARSER, NULL);
183
0
        return;
184
0
    }
185
186
0
    ctxt->errNo = XML_ERR_NO_MEMORY;
187
0
    ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
188
0
    ctxt->wellFormed = 0;
189
0
    ctxt->disableSAX = 2;
190
191
0
    if (ctxt->errorHandler) {
192
0
        schannel = ctxt->errorHandler;
193
0
        data = ctxt->errorCtxt;
194
0
    } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
195
0
        (ctxt->sax->serror != NULL)) {
196
0
        schannel = ctxt->sax->serror;
197
0
        data = ctxt->userData;
198
0
    } else {
199
0
        channel = ctxt->sax->error;
200
0
        data = ctxt->userData;
201
0
    }
202
203
0
    xmlRaiseMemoryError(schannel, channel, data, XML_FROM_PARSER,
204
0
                        &ctxt->lastError);
205
0
}
206
207
/**
208
 * If filename is empty, use the one from context input if available.
209
 *
210
 * Report an IO error to the parser context.
211
 *
212
 * @param ctxt  parser context
213
 * @param code  xmlParserErrors code
214
 * @param uri  filename or URI (optional)
215
 */
216
void
217
xmlCtxtErrIO(xmlParserCtxt *ctxt, int code, const char *uri)
218
4.27k
{
219
4.27k
    const char *errstr, *msg, *str1, *str2;
220
4.27k
    xmlErrorLevel level;
221
222
4.27k
    if (ctxt == NULL)
223
0
        return;
224
225
4.27k
    if (((code == XML_IO_ENOENT) ||
226
4.27k
         (code == XML_IO_UNKNOWN))) {
227
        /*
228
         * Only report a warning if a file could not be found. This should
229
         * only be done for external entities, but the external entity loader
230
         * of xsltproc can try multiple paths and assumes that ENOENT doesn't
231
         * raise an error and aborts parsing.
232
         */
233
0
        if (ctxt->validate == 0)
234
0
            level = XML_ERR_WARNING;
235
0
        else
236
0
            level = XML_ERR_ERROR;
237
4.27k
    } else if (code == XML_IO_NETWORK_ATTEMPT) {
238
0
        level = XML_ERR_ERROR;
239
4.27k
    } else {
240
4.27k
        level = XML_ERR_FATAL;
241
4.27k
    }
242
243
4.27k
    errstr = xmlErrString(code);
244
245
4.27k
    if (uri == NULL) {
246
4.27k
        msg = "%s\n";
247
4.27k
        str1 = errstr;
248
4.27k
        str2 = NULL;
249
4.27k
    } else {
250
0
        msg = "failed to load \"%s\": %s\n";
251
0
        str1 = uri;
252
0
        str2 = errstr;
253
0
    }
254
255
4.27k
    xmlCtxtErr(ctxt, NULL, XML_FROM_IO, code, level,
256
4.27k
               (const xmlChar *) uri, NULL, NULL, 0,
257
4.27k
               msg, str1, str2);
258
4.27k
}
259
260
/**
261
 * @param ctxt  parser context
262
 * @returns true if the last error is catastrophic.
263
 */
264
int
265
15.2M
xmlCtxtIsCatastrophicError(xmlParserCtxt *ctxt) {
266
15.2M
    if (ctxt == NULL)
267
0
        return(1);
268
269
15.2M
    return(xmlIsCatastrophicError(ctxt->lastError.level,
270
15.2M
                                  ctxt->lastError.code));
271
15.2M
}
272
273
/**
274
 * Raise a parser error.
275
 *
276
 * @param ctxt  a parser context
277
 * @param node  the current node or NULL
278
 * @param domain  the domain for the error
279
 * @param code  the code for the error
280
 * @param level  the xmlErrorLevel for the error
281
 * @param str1  extra string info
282
 * @param str2  extra string info
283
 * @param str3  extra string info
284
 * @param int1  extra int info
285
 * @param msg  the message to display/transmit
286
 * @param ap  extra parameters for the message display
287
 */
288
void
289
xmlCtxtVErr(xmlParserCtxt *ctxt, xmlNode *node, xmlErrorDomain domain,
290
            xmlParserErrors code, xmlErrorLevel level,
291
            const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
292
            int int1, const char *msg, va_list ap)
293
15.0M
{
294
15.0M
    xmlStructuredErrorFunc schannel = NULL;
295
15.0M
    xmlGenericErrorFunc channel = NULL;
296
15.0M
    void *data = NULL;
297
15.0M
    const char *file = NULL;
298
15.0M
    int line = 0;
299
15.0M
    int col = 0;
300
15.0M
    int res;
301
302
15.0M
    if (code == XML_ERR_NO_MEMORY) {
303
0
        xmlCtxtErrMemory(ctxt);
304
0
        return;
305
0
    }
306
307
15.0M
    if (ctxt == NULL) {
308
0
        res = xmlVRaiseError(NULL, NULL, NULL, NULL, node, domain, code,
309
0
                             level, NULL, 0, (const char *) str1,
310
0
                             (const char *) str2, (const char *) str3,
311
0
                             int1, 0, msg, ap);
312
0
        if (res < 0)
313
0
            xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_PARSER, NULL);
314
315
0
        return;
316
0
    }
317
318
15.0M
    if (PARSER_STOPPED(ctxt))
319
875
  return;
320
321
    /* Don't overwrite catastrophic errors */
322
15.0M
    if (xmlCtxtIsCatastrophicError(ctxt))
323
0
        return;
324
325
15.0M
    if (level == XML_ERR_WARNING) {
326
35.9k
        if (ctxt->nbWarnings >= XML_MAX_ERRORS)
327
19.3k
            return;
328
16.6k
        ctxt->nbWarnings += 1;
329
15.0M
    } else {
330
        /* Report at least one fatal error. */
331
15.0M
        if ((ctxt->nbErrors >= XML_MAX_ERRORS) &&
332
15.0M
            ((level < XML_ERR_FATAL) || (ctxt->wellFormed == 0)) &&
333
15.0M
            (!xmlIsCatastrophicError(level, code)))
334
14.7M
            return;
335
297k
        ctxt->nbErrors += 1;
336
297k
    }
337
338
314k
    if (((ctxt->options & XML_PARSE_NOERROR) == 0) &&
339
314k
        ((level != XML_ERR_WARNING) ||
340
314k
         ((ctxt->options & XML_PARSE_NOWARNING) == 0))) {
341
314k
        if (ctxt->errorHandler) {
342
0
            schannel = ctxt->errorHandler;
343
0
            data = ctxt->errorCtxt;
344
314k
        } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
345
314k
            (ctxt->sax->serror != NULL)) {
346
0
            schannel = ctxt->sax->serror;
347
0
            data = ctxt->userData;
348
314k
        } else if ((domain == XML_FROM_VALID) || (domain == XML_FROM_DTD)) {
349
4.20k
            if (level == XML_ERR_WARNING)
350
0
                channel = ctxt->vctxt.warning;
351
4.20k
            else
352
4.20k
                channel = ctxt->vctxt.error;
353
4.20k
            data = ctxt->vctxt.userData;
354
310k
        } else {
355
310k
            if (level == XML_ERR_WARNING)
356
16.6k
                channel = ctxt->sax->warning;
357
293k
            else
358
293k
                channel = ctxt->sax->error;
359
310k
            data = ctxt->userData;
360
310k
        }
361
314k
    }
362
363
314k
    if (ctxt->input != NULL) {
364
314k
        xmlParserInputPtr input = ctxt->input;
365
366
314k
        if ((input->filename == NULL) &&
367
314k
            (ctxt->inputNr > 1)) {
368
43.2k
            input = ctxt->inputTab[ctxt->inputNr - 2];
369
43.2k
        }
370
314k
        file = input->filename;
371
314k
        line = input->line;
372
314k
        col = input->col;
373
314k
    }
374
375
314k
    res = xmlVRaiseError(schannel, channel, data, ctxt, node, domain, code,
376
314k
                         level, file, line, (const char *) str1,
377
314k
                         (const char *) str2, (const char *) str3, int1, col,
378
314k
                         msg, ap);
379
380
314k
    if (res < 0) {
381
0
        xmlCtxtErrMemory(ctxt);
382
0
        return;
383
0
    }
384
385
314k
    if (level >= XML_ERR_ERROR)
386
297k
        ctxt->errNo = code;
387
314k
    if (level == XML_ERR_FATAL) {
388
254k
        ctxt->wellFormed = 0;
389
390
254k
        if (xmlCtxtIsCatastrophicError(ctxt))
391
0
            ctxt->disableSAX = 2; /* stop parser */
392
254k
        else if (ctxt->recovery == 0)
393
254k
            ctxt->disableSAX = 1;
394
254k
    }
395
314k
}
396
397
/**
398
 * Raise a parser error.
399
 *
400
 * @param ctxt  a parser context
401
 * @param node  the current node or NULL
402
 * @param domain  the domain for the error
403
 * @param code  the code for the error
404
 * @param level  the xmlErrorLevel for the error
405
 * @param str1  extra string info
406
 * @param str2  extra string info
407
 * @param str3  extra string info
408
 * @param int1  extra int info
409
 * @param msg  the message to display/transmit
410
 * @param ...  extra parameters for the message display
411
 */
412
void
413
xmlCtxtErr(xmlParserCtxt *ctxt, xmlNode *node, xmlErrorDomain domain,
414
           xmlParserErrors code, xmlErrorLevel level,
415
           const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
416
           int int1, const char *msg, ...)
417
15.0M
{
418
15.0M
    va_list ap;
419
420
15.0M
    va_start(ap, msg);
421
15.0M
    xmlCtxtVErr(ctxt, node, domain, code, level,
422
15.0M
                str1, str2, str3, int1, msg, ap);
423
15.0M
    va_end(ap);
424
15.0M
}
425
426
/**
427
 * Get well-formedness and validation status after parsing. Also
428
 * reports catastrophic errors which are not related to parsing
429
 * like out-of-memory, I/O or other errors.
430
 *
431
 * @since 2.14.0
432
 *
433
 * @param ctxt  an XML parser context
434
 * @returns a bitmask of XML_STATUS_* flags ORed together.
435
 */
436
xmlParserStatus
437
0
xmlCtxtGetStatus(xmlParserCtxt *ctxt) {
438
0
    xmlParserStatus bits = 0;
439
440
0
    if (xmlCtxtIsCatastrophicError(ctxt)) {
441
0
        bits |= XML_STATUS_CATASTROPHIC_ERROR |
442
0
                XML_STATUS_NOT_WELL_FORMED |
443
0
                XML_STATUS_NOT_NS_WELL_FORMED;
444
0
        if ((ctxt != NULL) && (ctxt->validate))
445
0
            bits |= XML_STATUS_DTD_VALIDATION_FAILED;
446
447
0
        return(bits);
448
0
    }
449
450
0
    if (!ctxt->wellFormed)
451
0
        bits |= XML_STATUS_NOT_WELL_FORMED;
452
0
    if (!ctxt->nsWellFormed)
453
0
        bits |= XML_STATUS_NOT_NS_WELL_FORMED;
454
0
    if ((ctxt->validate) && (!ctxt->valid))
455
0
        bits |= XML_STATUS_DTD_VALIDATION_FAILED;
456
457
0
    return(bits);
458
0
}
459
460
/**
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 *
463
 * @param ctxt  an XML parser context
464
 * @param code  the error number
465
 * @param info  extra information string
466
 */
467
void
468
xmlFatalErr(xmlParserCtxt *ctxt, xmlParserErrors code, const char *info)
469
3.39M
{
470
3.39M
    const char *errmsg;
471
3.39M
    xmlErrorDomain domain = XML_FROM_PARSER;
472
3.39M
    xmlErrorLevel level = XML_ERR_FATAL;
473
474
3.39M
    errmsg = xmlErrString(code);
475
476
3.39M
    if ((ctxt != NULL) && (ctxt->html)) {
477
0
        domain = XML_FROM_HTML;
478
479
        /* Continue if encoding is unsupported */
480
0
        if (code == XML_ERR_UNSUPPORTED_ENCODING)
481
0
            level = XML_ERR_ERROR;
482
0
    }
483
484
3.39M
    if (info == NULL) {
485
550k
        xmlCtxtErr(ctxt, NULL, domain, code, level,
486
550k
                   NULL, NULL, NULL, 0, "%s\n", errmsg);
487
2.84M
    } else {
488
2.84M
        xmlCtxtErr(ctxt, NULL, domain, code, level,
489
2.84M
                   (const xmlChar *) info, NULL, NULL, 0,
490
2.84M
                   "%s: %s\n", errmsg, info);
491
2.84M
    }
492
3.39M
}
493
494
/**
495
 * Check whether the character is allowed by the production
496
 *
497
 * @deprecated Internal function, don't use.
498
 *
499
 * ```
500
 * [84] Letter ::= BaseChar | Ideographic
501
 * ```
502
 *
503
 * @param c  an unicode character (int)
504
 * @returns 0 if not, non-zero otherwise
505
 */
506
int
507
0
xmlIsLetter(int c) {
508
0
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
509
0
}
510
511
/************************************************************************
512
 *                  *
513
 *    Input handling functions for progressive parsing  *
514
 *                  *
515
 ************************************************************************/
516
517
/* we need to keep enough input to show errors in context */
518
16.1M
#define LINE_LEN        80
519
520
/**
521
 * Blocks further parser processing don't override error
522
 * for internal use
523
 *
524
 * @param ctxt  an XML parser context
525
 */
526
void
527
1.21k
xmlHaltParser(xmlParserCtxt *ctxt) {
528
1.21k
    if (ctxt == NULL)
529
0
        return;
530
1.21k
    ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
531
1.21k
    ctxt->disableSAX = 2;
532
1.21k
}
533
534
/**
535
 * @deprecated This function was internal and is deprecated.
536
 *
537
 * @param in  an XML parser input
538
 * @param len  an indicative size for the lookahead
539
 * @returns -1 as this is an error to use it.
540
 */
541
int
542
0
xmlParserInputRead(xmlParserInput *in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
543
0
    return(-1);
544
0
}
545
546
/**
547
 * Grow the input buffer.
548
 *
549
 * @param ctxt  an XML parser context
550
 * @returns the number of bytes read or -1 in case of error.
551
 */
552
int
553
2.94M
xmlParserGrow(xmlParserCtxt *ctxt) {
554
2.94M
    xmlParserInputPtr in = ctxt->input;
555
2.94M
    xmlParserInputBufferPtr buf = in->buf;
556
2.94M
    size_t curEnd = in->end - in->cur;
557
2.94M
    size_t curBase = in->cur - in->base;
558
2.94M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
559
2.94M
                       XML_MAX_HUGE_LENGTH :
560
2.94M
                       XML_MAX_LOOKUP_LIMIT;
561
2.94M
    int ret;
562
563
2.94M
    if (buf == NULL)
564
0
        return(0);
565
    /* Don't grow push parser buffer. */
566
2.94M
    if (PARSER_PROGRESSIVE(ctxt))
567
0
        return(0);
568
    /* Don't grow memory buffers. */
569
2.94M
    if ((buf->encoder == NULL) && (buf->readcallback == NULL))
570
2.78M
        return(0);
571
155k
    if (buf->error != 0)
572
4.94k
        return(-1);
573
574
150k
    if (curBase > maxLength) {
575
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
576
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
577
0
        xmlHaltParser(ctxt);
578
0
  return(-1);
579
0
    }
580
581
150k
    if (curEnd >= INPUT_CHUNK)
582
2.60k
        return(0);
583
584
147k
    ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
585
147k
    xmlBufUpdateInput(buf->buffer, in, curBase);
586
587
147k
    if (ret < 0) {
588
30
        xmlCtxtErrIO(ctxt, buf->error, NULL);
589
30
    }
590
591
147k
    return(ret);
592
150k
}
593
594
/**
595
 * Raises an error with `code` if the input wasn't consumed
596
 * completely.
597
 *
598
 * @param ctxt  parser ctxt
599
 * @param code  error code
600
 */
601
void
602
9.74k
xmlParserCheckEOF(xmlParserCtxt *ctxt, xmlParserErrors code) {
603
9.74k
    xmlParserInputPtr in = ctxt->input;
604
9.74k
    xmlParserInputBufferPtr buf;
605
606
9.74k
    if (ctxt->errNo != XML_ERR_OK)
607
9.71k
        return;
608
609
25
    if (in->cur < in->end) {
610
6
        xmlFatalErr(ctxt, code, NULL);
611
6
        return;
612
6
    }
613
614
19
    buf = in->buf;
615
19
    if ((buf != NULL) && (buf->encoder != NULL)) {
616
4
        size_t curBase = in->cur - in->base;
617
4
        size_t sizeOut = 64;
618
4
        xmlCharEncError ret;
619
620
        /*
621
         * Check for truncated multi-byte sequence
622
         */
623
4
        ret = xmlCharEncInput(buf, &sizeOut, /* flush */ 1);
624
4
        xmlBufUpdateInput(buf->buffer, in, curBase);
625
4
        if (ret != XML_ENC_ERR_SUCCESS) {
626
1
            xmlCtxtErrIO(ctxt, buf->error, NULL);
627
1
            return;
628
1
        }
629
630
        /* Shouldn't happen */
631
3
        if (in->cur < in->end)
632
0
            xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "expected EOF");
633
3
    }
634
19
}
635
636
/**
637
 * This function increase the input for the parser. It tries to
638
 * preserve pointers to the input buffer, and keep already read data
639
 *
640
 * @deprecated Don't use.
641
 *
642
 * @param in  an XML parser input
643
 * @param len  an indicative size for the lookahead
644
 * @returns the amount of char read, or -1 in case of error, 0 indicate the
645
 * end of this entity
646
 */
647
int
648
0
xmlParserInputGrow(xmlParserInput *in, int len) {
649
0
    int ret;
650
0
    size_t indx;
651
652
0
    if ((in == NULL) || (len < 0)) return(-1);
653
0
    if (in->buf == NULL) return(-1);
654
0
    if (in->base == NULL) return(-1);
655
0
    if (in->cur == NULL) return(-1);
656
0
    if (in->buf->buffer == NULL) return(-1);
657
658
    /* Don't grow memory buffers. */
659
0
    if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
660
0
        return(0);
661
662
0
    indx = in->cur - in->base;
663
0
    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
664
0
        return(0);
665
0
    }
666
0
    ret = xmlParserInputBufferGrow(in->buf, len);
667
668
0
    in->base = xmlBufContent(in->buf->buffer);
669
0
    if (in->base == NULL) {
670
0
        in->base = BAD_CAST "";
671
0
        in->cur = in->base;
672
0
        in->end = in->base;
673
0
        return(-1);
674
0
    }
675
0
    in->cur = in->base + indx;
676
0
    in->end = xmlBufEnd(in->buf->buffer);
677
678
0
    return(ret);
679
0
}
680
681
/**
682
 * Shrink the input buffer.
683
 *
684
 * @param ctxt  an XML parser context
685
 */
686
void
687
9.73M
xmlParserShrink(xmlParserCtxt *ctxt) {
688
9.73M
    xmlParserInputPtr in = ctxt->input;
689
9.73M
    xmlParserInputBufferPtr buf = in->buf;
690
9.73M
    size_t used, res;
691
692
9.73M
    if (buf == NULL)
693
0
        return;
694
695
9.73M
    used = in->cur - in->base;
696
697
9.73M
    if (used > LINE_LEN) {
698
6.39M
        res = xmlBufShrink(buf->buffer, used - LINE_LEN);
699
700
6.39M
        if (res > 0) {
701
6.39M
            used -= res;
702
6.39M
            if ((res > ULONG_MAX) ||
703
6.39M
                (in->consumed > ULONG_MAX - (unsigned long)res))
704
0
                in->consumed = ULONG_MAX;
705
6.39M
            else
706
6.39M
                in->consumed += res;
707
6.39M
        }
708
709
6.39M
        xmlBufUpdateInput(buf->buffer, in, used);
710
6.39M
    }
711
9.73M
}
712
713
/**
714
 * This function removes used input for the parser.
715
 *
716
 * @deprecated Don't use.
717
 *
718
 * @param in  an XML parser input
719
 */
720
void
721
0
xmlParserInputShrink(xmlParserInput *in) {
722
0
    size_t used;
723
0
    size_t ret;
724
725
0
    if (in == NULL) return;
726
0
    if (in->buf == NULL) return;
727
0
    if (in->base == NULL) return;
728
0
    if (in->cur == NULL) return;
729
0
    if (in->buf->buffer == NULL) return;
730
731
0
    used = in->cur - in->base;
732
733
0
    if (used > LINE_LEN) {
734
0
  ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
735
0
  if (ret > 0) {
736
0
            used -= ret;
737
0
            if ((ret > ULONG_MAX) ||
738
0
                (in->consumed > ULONG_MAX - (unsigned long)ret))
739
0
                in->consumed = ULONG_MAX;
740
0
            else
741
0
                in->consumed += ret;
742
0
  }
743
744
0
        xmlBufUpdateInput(in->buf->buffer, in, used);
745
0
    }
746
0
}
747
748
/************************************************************************
749
 *                  *
750
 *    UTF8 character input and related functions    *
751
 *                  *
752
 ************************************************************************/
753
754
/**
755
 * Skip to the next char input char.
756
 *
757
 * @deprecated Internal function, do not use.
758
 *
759
 * @param ctxt  the XML parser context
760
 */
761
762
void
763
xmlNextChar(xmlParserCtxt *ctxt)
764
20.8M
{
765
20.8M
    const unsigned char *cur;
766
20.8M
    size_t avail;
767
20.8M
    int c;
768
769
20.8M
    if ((ctxt == NULL) || (ctxt->input == NULL))
770
0
        return;
771
772
20.8M
    avail = ctxt->input->end - ctxt->input->cur;
773
774
20.8M
    if (avail < INPUT_CHUNK) {
775
746k
        xmlParserGrow(ctxt);
776
746k
        if (ctxt->input->cur >= ctxt->input->end)
777
706
            return;
778
745k
        avail = ctxt->input->end - ctxt->input->cur;
779
745k
    }
780
781
20.8M
    cur = ctxt->input->cur;
782
20.8M
    c = *cur;
783
784
20.8M
    if (c < 0x80) {
785
20.2M
        if (c == '\n') {
786
12.8M
            ctxt->input->cur++;
787
12.8M
            ctxt->input->line++;
788
12.8M
            ctxt->input->col = 1;
789
12.8M
        } else if (c == '\r') {
790
            /*
791
             *   2.11 End-of-Line Handling
792
             *   the literal two-character sequence "#xD#xA" or a standalone
793
             *   literal #xD, an XML processor must pass to the application
794
             *   the single character #xA.
795
             */
796
5.15k
            ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
797
5.15k
            ctxt->input->line++;
798
5.15k
            ctxt->input->col = 1;
799
5.15k
            return;
800
7.43M
        } else {
801
7.43M
            ctxt->input->cur++;
802
7.43M
            ctxt->input->col++;
803
7.43M
        }
804
20.2M
    } else {
805
589k
        ctxt->input->col++;
806
807
589k
        if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
808
28.8k
            goto encoding_error;
809
810
560k
        if (c < 0xe0) {
811
            /* 2-byte code */
812
282k
            if (c < 0xc2)
813
42.3k
                goto encoding_error;
814
239k
            ctxt->input->cur += 2;
815
278k
        } else {
816
278k
            unsigned int val = (c << 8) | cur[1];
817
818
278k
            if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
819
114
                goto encoding_error;
820
821
278k
            if (c < 0xf0) {
822
                /* 3-byte code */
823
251k
                if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
824
137
                    goto encoding_error;
825
251k
                ctxt->input->cur += 3;
826
251k
            } else {
827
26.8k
                if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
828
10.6k
                    goto encoding_error;
829
830
                /* 4-byte code */
831
16.1k
                if ((val < 0xf090) || (val >= 0xf490))
832
14.6k
                    goto encoding_error;
833
1.50k
                ctxt->input->cur += 4;
834
1.50k
            }
835
278k
        }
836
560k
    }
837
838
20.7M
    return;
839
840
20.7M
encoding_error:
841
    /* Only report the first error */
842
96.8k
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
843
403
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
844
403
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
845
403
    }
846
96.8k
    ctxt->input->cur++;
847
96.8k
}
848
849
/**
850
 * The current char value, if using UTF-8 this may actually span multiple
851
 * bytes in the input buffer. Implement the end of line normalization:
852
 *
853
 * @deprecated Internal function, do not use.
854
 *
855
 * 2.11 End-of-Line Handling
856
 *
857
 * Wherever an external parsed entity or the literal entity value
858
 * of an internal parsed entity contains either the literal two-character
859
 * sequence "#xD#xA" or a standalone literal \#xD, an XML processor
860
 * must pass to the application the single character \#xA.
861
 * This behavior can conveniently be produced by normalizing all
862
 * line breaks to \#xA on input, before parsing.)
863
 *
864
 * @param ctxt  the XML parser context
865
 * @param len  pointer to the length of the char read
866
 * @returns the current char value and its length
867
 */
868
869
int
870
56.5M
xmlCurrentChar(xmlParserCtxt *ctxt, int *len) {
871
56.5M
    const unsigned char *cur;
872
56.5M
    size_t avail;
873
56.5M
    int c;
874
875
56.5M
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
876
877
56.5M
    avail = ctxt->input->end - ctxt->input->cur;
878
879
56.5M
    if (avail < INPUT_CHUNK) {
880
448k
        xmlParserGrow(ctxt);
881
448k
        avail = ctxt->input->end - ctxt->input->cur;
882
448k
    }
883
884
56.5M
    cur = ctxt->input->cur;
885
56.5M
    c = *cur;
886
887
56.5M
    if (c < 0x80) {
888
  /* 1-byte code */
889
22.6M
        if (c < 0x20) {
890
            /*
891
             *   2.11 End-of-Line Handling
892
             *   the literal two-character sequence "#xD#xA" or a standalone
893
             *   literal #xD, an XML processor must pass to the application
894
             *   the single character #xA.
895
             */
896
10.4M
            if (c == '\r') {
897
                /*
898
                 * TODO: This function shouldn't change the 'cur' pointer
899
                 * as side effect, but the NEXTL macro in parser.c relies
900
                 * on this behavior when incrementing line numbers.
901
                 */
902
284k
                if (cur[1] == '\n')
903
7.01k
                    ctxt->input->cur++;
904
284k
                *len = 1;
905
284k
                c = '\n';
906
10.2M
            } else if (c == 0) {
907
2.85M
                if (ctxt->input->cur >= ctxt->input->end) {
908
15.1k
                    *len = 0;
909
2.84M
                } else {
910
2.84M
                    *len = 1;
911
                    /*
912
                     * TODO: Null bytes should be handled by callers,
913
                     * but this can be tricky.
914
                     */
915
2.84M
                    xmlFatalErr(ctxt, XML_ERR_INVALID_CHAR,
916
2.84M
                            "Char 0x0 out of allowed range\n");
917
2.84M
                }
918
7.34M
            } else {
919
7.34M
                *len = 1;
920
7.34M
            }
921
12.1M
        } else {
922
12.1M
            *len = 1;
923
12.1M
        }
924
925
22.6M
        return(c);
926
33.9M
    } else {
927
33.9M
        int val;
928
929
33.9M
        if (avail < 2)
930
669
            goto incomplete_sequence;
931
33.9M
        if ((cur[1] & 0xc0) != 0x80)
932
1.76M
            goto encoding_error;
933
934
32.1M
        if (c < 0xe0) {
935
            /* 2-byte code */
936
22.9M
            if (c < 0xc2)
937
9.74M
                goto encoding_error;
938
13.1M
            val = (c & 0x1f) << 6;
939
13.1M
            val |= cur[1] & 0x3f;
940
13.1M
            *len = 2;
941
13.1M
        } else {
942
9.27M
            if (avail < 3)
943
37
                goto incomplete_sequence;
944
9.27M
            if ((cur[2] & 0xc0) != 0x80)
945
55.6k
                goto encoding_error;
946
947
9.21M
            if (c < 0xf0) {
948
                /* 3-byte code */
949
9.16M
                val = (c & 0xf) << 12;
950
9.16M
                val |= (cur[1] & 0x3f) << 6;
951
9.16M
                val |= cur[2] & 0x3f;
952
9.16M
                if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
953
1.49k
                    goto encoding_error;
954
9.16M
                *len = 3;
955
9.16M
            } else {
956
53.2k
                if (avail < 4)
957
25
                    goto incomplete_sequence;
958
53.2k
                if ((cur[3] & 0xc0) != 0x80)
959
7.30k
                    goto encoding_error;
960
961
                /* 4-byte code */
962
45.9k
                val = (c & 0x0f) << 18;
963
45.9k
                val |= (cur[1] & 0x3f) << 12;
964
45.9k
                val |= (cur[2] & 0x3f) << 6;
965
45.9k
                val |= cur[3] & 0x3f;
966
45.9k
                if ((val < 0x10000) || (val >= 0x110000))
967
15.2k
                    goto encoding_error;
968
30.6k
                *len = 4;
969
30.6k
            }
970
9.21M
        }
971
972
22.3M
        return(val);
973
32.1M
    }
974
975
11.5M
encoding_error:
976
    /* Only report the first error */
977
11.5M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
978
2.04k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
979
2.04k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
980
2.04k
    }
981
11.5M
    *len = 1;
982
11.5M
    return(XML_INVALID_CHAR);
983
984
731
incomplete_sequence:
985
    /*
986
     * An encoding problem may arise from a truncated input buffer
987
     * splitting a character in the middle. In that case do not raise
988
     * an error but return 0. This should only happen when push parsing
989
     * char data.
990
     */
991
731
    *len = 0;
992
731
    return(0);
993
56.5M
}
994
995
/**
996
 * The current char value, if using UTF-8 this may actually span multiple
997
 * bytes in the input buffer.
998
 *
999
 * @deprecated Internal function, do not use.
1000
 *
1001
 * @param ctxt  the XML parser context
1002
 * @param cur  pointer to the beginning of the char
1003
 * @param len  pointer to the length of the char read
1004
 * @returns the current char value and its length
1005
 */
1006
1007
int
1008
xmlStringCurrentChar(xmlParserCtxt *ctxt ATTRIBUTE_UNUSED,
1009
0
                     const xmlChar *cur, int *len) {
1010
0
    int c;
1011
1012
0
    if ((cur == NULL) || (len == NULL))
1013
0
        return(0);
1014
1015
    /* cur is zero-terminated, so we can lie about its length. */
1016
0
    *len = 4;
1017
0
    c = xmlGetUTF8Char(cur, len);
1018
1019
0
    return((c < 0) ? 0 : c);
1020
0
}
1021
1022
/**
1023
 * append the char value in the array
1024
 *
1025
 * @deprecated Internal function, don't use.
1026
 *
1027
 * @param out  pointer to an array of xmlChar
1028
 * @param val  the char value
1029
 * @returns the number of xmlChar written
1030
 */
1031
int
1032
31.9M
xmlCopyCharMultiByte(xmlChar *out, int val) {
1033
31.9M
    if ((out == NULL) || (val < 0)) return(0);
1034
    /*
1035
     * We are supposed to handle UTF8, check it's valid
1036
     * From rfc2044: encoding of the Unicode values on UTF-8:
1037
     *
1038
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
1039
     * 0000 0000-0000 007F   0xxxxxxx
1040
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1041
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1042
     */
1043
31.9M
    if  (val >= 0x80) {
1044
31.9M
  xmlChar *savedout = out;
1045
31.9M
  int bits;
1046
31.9M
  if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
1047
19.2M
  else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
1048
43.8k
  else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
1049
0
  else {
1050
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1051
0
            xmlAbort("xmlCopyCharMultiByte: codepoint out of range\n");
1052
0
#endif
1053
0
      return(0);
1054
0
  }
1055
83.1M
  for ( ; bits >= 0; bits-= 6)
1056
51.2M
      *out++= ((val >> bits) & 0x3F) | 0x80 ;
1057
31.9M
  return (out - savedout);
1058
31.9M
    }
1059
0
    *out = val;
1060
0
    return 1;
1061
31.9M
}
1062
1063
/**
1064
 * append the char value in the array
1065
 *
1066
 * @deprecated Don't use.
1067
 *
1068
 * @param len  Ignored, compatibility
1069
 * @param out  pointer to an array of xmlChar
1070
 * @param val  the char value
1071
 * @returns the number of xmlChar written
1072
 */
1073
1074
int
1075
0
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
1076
0
    if ((out == NULL) || (val < 0)) return(0);
1077
    /* the len parameter is ignored */
1078
0
    if  (val >= 0x80) {
1079
0
  return(xmlCopyCharMultiByte (out, val));
1080
0
    }
1081
0
    *out = val;
1082
0
    return 1;
1083
0
}
1084
1085
/************************************************************************
1086
 *                  *
1087
 *    Commodity functions to switch encodings     *
1088
 *                  *
1089
 ************************************************************************/
1090
1091
/**
1092
 * Installs a custom implementation to convert between character
1093
 * encodings.
1094
 *
1095
 * This bypasses legacy feature like global encoding handlers or
1096
 * encoding aliases.
1097
 *
1098
 * @since 2.14.0
1099
 * @param ctxt  parser context
1100
 * @param impl  callback
1101
 * @param vctxt  user data
1102
 */
1103
void
1104
xmlCtxtSetCharEncConvImpl(xmlParserCtxt *ctxt, xmlCharEncConvImpl impl,
1105
0
                          void *vctxt) {
1106
0
    if (ctxt == NULL)
1107
0
        return;
1108
1109
0
    ctxt->convImpl = impl;
1110
0
    ctxt->convCtxt = vctxt;
1111
0
}
1112
1113
static xmlParserErrors
1114
279
xmlDetectEBCDIC(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr *hout) {
1115
279
    xmlChar out[200];
1116
279
    xmlParserInputPtr input = ctxt->input;
1117
279
    xmlCharEncodingHandlerPtr handler;
1118
279
    int inlen, outlen, i;
1119
279
    xmlParserErrors code;
1120
279
    xmlCharEncError res;
1121
1122
279
    *hout = NULL;
1123
1124
    /*
1125
     * To detect the EBCDIC code page, we convert the first 200 bytes
1126
     * to IBM037 (EBCDIC-US) and try to find the encoding declaration.
1127
     */
1128
279
    code = xmlCreateCharEncodingHandler("IBM037", XML_ENC_INPUT,
1129
279
            ctxt->convImpl, ctxt->convCtxt, &handler);
1130
279
    if (code != XML_ERR_OK)
1131
0
        return(code);
1132
279
    outlen = sizeof(out) - 1;
1133
279
    inlen = input->end - input->cur;
1134
279
    res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen,
1135
279
                           /* flush */ 0);
1136
    /*
1137
     * Return the EBCDIC handler if decoding failed. The error will
1138
     * be reported later.
1139
     */
1140
279
    if (res < 0)
1141
9
        goto done;
1142
270
    out[outlen] = 0;
1143
1144
4.01k
    for (i = 0; i < outlen; i++) {
1145
3.88k
        if (out[i] == '>')
1146
1
            break;
1147
3.87k
        if ((out[i] == 'e') &&
1148
3.87k
            (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1149
131
            int start, cur, quote;
1150
1151
131
            i += 8;
1152
131
            while (IS_BLANK_CH(out[i]))
1153
630
                i += 1;
1154
131
            if (out[i++] != '=')
1155
29
                break;
1156
102
            while (IS_BLANK_CH(out[i]))
1157
600
                i += 1;
1158
102
            quote = out[i++];
1159
102
            if ((quote != '\'') && (quote != '"'))
1160
32
                break;
1161
70
            start = i;
1162
70
            cur = out[i];
1163
1.44k
            while (((cur >= 'a') && (cur <= 'z')) ||
1164
1.44k
                   ((cur >= 'A') && (cur <= 'Z')) ||
1165
1.44k
                   ((cur >= '0') && (cur <= '9')) ||
1166
1.44k
                   (cur == '.') || (cur == '_') ||
1167
1.44k
                   (cur == '-'))
1168
1.37k
                cur = out[++i];
1169
70
            if (cur != quote)
1170
51
                break;
1171
19
            out[i] = 0;
1172
19
            xmlCharEncCloseFunc(handler);
1173
19
            code = xmlCreateCharEncodingHandler((char *) out + start,
1174
19
                    XML_ENC_INPUT, ctxt->convImpl, ctxt->convCtxt,
1175
19
                    &handler);
1176
19
            if (code != XML_ERR_OK)
1177
16
                return(code);
1178
3
            *hout = handler;
1179
3
            return(XML_ERR_OK);
1180
19
        }
1181
3.87k
    }
1182
1183
260
done:
1184
    /*
1185
     * Encoding handlers are stateful, so we have to recreate them.
1186
     */
1187
260
    xmlCharEncCloseFunc(handler);
1188
260
    code = xmlCreateCharEncodingHandler("IBM037", XML_ENC_INPUT,
1189
260
            ctxt->convImpl, ctxt->convCtxt, &handler);
1190
260
    if (code != XML_ERR_OK)
1191
0
        return(code);
1192
260
    *hout = handler;
1193
260
    return(XML_ERR_OK);
1194
260
}
1195
1196
/**
1197
 * Use encoding specified by enum to decode input data. This overrides
1198
 * the encoding found in the XML declaration.
1199
 *
1200
 * This function can also be used to override the encoding of chunks
1201
 * passed to #xmlParseChunk.
1202
 *
1203
 * @param ctxt  the parser context
1204
 * @param enc  the encoding value (number)
1205
 * @returns 0 in case of success, -1 otherwise
1206
 */
1207
int
1208
xmlSwitchEncoding(xmlParserCtxt *ctxt, xmlCharEncoding enc)
1209
430
{
1210
430
    xmlCharEncodingHandlerPtr handler = NULL;
1211
430
    int ret;
1212
430
    xmlParserErrors code;
1213
1214
430
    if ((ctxt == NULL) || (ctxt->input == NULL))
1215
0
        return(-1);
1216
1217
430
    code = xmlLookupCharEncodingHandler(enc, &handler);
1218
430
    if (code != 0) {
1219
0
        xmlFatalErr(ctxt, code, NULL);
1220
0
        return(-1);
1221
0
    }
1222
1223
430
    ret = xmlSwitchToEncoding(ctxt, handler);
1224
1225
430
    if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1226
0
        ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1227
0
    }
1228
1229
430
    return(ret);
1230
430
}
1231
1232
/**
1233
 * @param ctxt  the parser context
1234
 * @param input  the input strea,
1235
 * @param encoding  the encoding name
1236
 * @returns 0 in case of success, -1 otherwise
1237
 */
1238
static int
1239
xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1240
0
                           const char *encoding) {
1241
0
    xmlCharEncodingHandlerPtr handler;
1242
0
    xmlParserErrors res;
1243
1244
0
    if (encoding == NULL)
1245
0
        return(-1);
1246
1247
0
    res = xmlCreateCharEncodingHandler(encoding, XML_ENC_INPUT,
1248
0
            ctxt->convImpl, ctxt->convCtxt, &handler);
1249
0
    if (res == XML_ERR_UNSUPPORTED_ENCODING) {
1250
0
        xmlWarningMsg(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1251
0
                      "Unsupported encoding: %s\n", BAD_CAST encoding, NULL);
1252
0
        return(-1);
1253
0
    } else if (res != XML_ERR_OK) {
1254
0
        xmlFatalErr(ctxt, res, encoding);
1255
0
        return(-1);
1256
0
    }
1257
1258
0
    res  = xmlInputSetEncodingHandler(input, handler);
1259
0
    if (res != XML_ERR_OK) {
1260
0
        xmlCtxtErrIO(ctxt, res, NULL);
1261
0
        return(-1);
1262
0
    }
1263
1264
0
    return(0);
1265
0
}
1266
1267
/**
1268
 * Use specified encoding to decode input data. This overrides the
1269
 * encoding found in the XML declaration.
1270
 *
1271
 * This function can also be used to override the encoding of chunks
1272
 * passed to #xmlParseChunk.
1273
 *
1274
 * @since 2.13.0
1275
 *
1276
 * @param ctxt  the parser context
1277
 * @param encoding  the encoding name
1278
 * @returns 0 in case of success, -1 otherwise
1279
 */
1280
int
1281
0
xmlSwitchEncodingName(xmlParserCtxt *ctxt, const char *encoding) {
1282
0
    if (ctxt == NULL)
1283
0
        return(-1);
1284
1285
0
    return(xmlSwitchInputEncodingName(ctxt, ctxt->input, encoding));
1286
0
}
1287
1288
/**
1289
 * Use encoding handler to decode input data.
1290
 *
1291
 * Closes the handler on error.
1292
 *
1293
 * @param input  the input stream
1294
 * @param handler  the encoding handler
1295
 * @returns an xmlParserErrors code.
1296
 */
1297
xmlParserErrors
1298
xmlInputSetEncodingHandler(xmlParserInput *input,
1299
1.64k
                           xmlCharEncodingHandler *handler) {
1300
1.64k
    xmlParserInputBufferPtr in;
1301
1.64k
    xmlBufPtr buf;
1302
1.64k
    xmlParserErrors code = XML_ERR_OK;
1303
1304
1.64k
    if ((input == NULL) || (input->buf == NULL)) {
1305
0
        xmlCharEncCloseFunc(handler);
1306
0
  return(XML_ERR_ARGUMENT);
1307
0
    }
1308
1.64k
    in = input->buf;
1309
1310
1.64k
    input->flags |= XML_INPUT_HAS_ENCODING;
1311
1312
    /*
1313
     * UTF-8 requires no encoding handler.
1314
     */
1315
1.64k
    if ((handler != NULL) &&
1316
1.64k
        (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1317
0
        xmlCharEncCloseFunc(handler);
1318
0
        handler = NULL;
1319
0
    }
1320
1321
1.64k
    if (in->encoder == handler)
1322
19
        return(XML_ERR_OK);
1323
1324
1.62k
    if (in->encoder != NULL) {
1325
        /*
1326
         * Switching encodings during parsing is a really bad idea,
1327
         * but Chromium can switch between ISO-8859-1 and UTF-16 before
1328
         * separate calls to xmlParseChunk.
1329
         *
1330
         * TODO: We should check whether the "raw" input buffer is empty and
1331
         * convert the old content using the old encoder.
1332
         */
1333
1334
0
        xmlCharEncCloseFunc(in->encoder);
1335
0
        in->encoder = handler;
1336
0
        return(XML_ERR_OK);
1337
0
    }
1338
1339
1.62k
    buf = xmlBufCreate(XML_IO_BUFFER_SIZE);
1340
1.62k
    if (buf == NULL) {
1341
0
        xmlCharEncCloseFunc(handler);
1342
0
        return(XML_ERR_NO_MEMORY);
1343
0
    }
1344
1345
1.62k
    in->encoder = handler;
1346
1.62k
    in->raw = in->buffer;
1347
1.62k
    in->buffer = buf;
1348
1349
    /*
1350
     * Is there already some content down the pipe to convert ?
1351
     */
1352
1.62k
    if (input->end > input->base) {
1353
1.62k
        size_t processed;
1354
1.62k
        size_t nbchars;
1355
1.62k
        xmlCharEncError res;
1356
1357
        /*
1358
         * Shrink the current input buffer.
1359
         * Move it as the raw buffer and create a new input buffer
1360
         */
1361
1.62k
        processed = input->cur - input->base;
1362
1.62k
        xmlBufShrink(in->raw, processed);
1363
1.62k
        input->consumed += processed;
1364
1.62k
        in->rawconsumed = processed;
1365
1366
        /*
1367
         * If we're push-parsing, we must convert the whole buffer.
1368
         *
1369
         * If we're pull-parsing, we could be parsing from a huge
1370
         * memory buffer which we don't want to convert completely.
1371
         */
1372
1.62k
        if (input->flags & XML_INPUT_PROGRESSIVE)
1373
0
            nbchars = SIZE_MAX;
1374
1.62k
        else
1375
1.62k
            nbchars = 4000 /* MINLEN */;
1376
1.62k
        res = xmlCharEncInput(in, &nbchars, /* flush */ 0);
1377
1.62k
        if (res != XML_ENC_ERR_SUCCESS)
1378
57
            code = in->error;
1379
1.62k
    }
1380
1381
1.62k
    xmlBufResetInput(in->buffer, input);
1382
1383
1.62k
    return(code);
1384
1.62k
}
1385
1386
/**
1387
 * Use encoding handler to decode input data.
1388
 *
1389
 * @deprecated Internal function, don't use.
1390
 *
1391
 * @param ctxt  the parser context, only for error reporting
1392
 * @param input  the input stream
1393
 * @param handler  the encoding handler
1394
 * @returns 0 in case of success, -1 otherwise
1395
 */
1396
int
1397
xmlSwitchInputEncoding(xmlParserCtxt *ctxt, xmlParserInput *input,
1398
0
                       xmlCharEncodingHandler *handler) {
1399
0
    xmlParserErrors code = xmlInputSetEncodingHandler(input, handler);
1400
1401
0
    if (code != XML_ERR_OK) {
1402
0
        xmlCtxtErrIO(ctxt, code, NULL);
1403
0
        return(-1);
1404
0
    }
1405
1406
0
    return(0);
1407
0
}
1408
1409
/**
1410
 * Use encoding handler to decode input data.
1411
 *
1412
 * This function can be used to enforce the encoding of chunks passed
1413
 * to #xmlParseChunk.
1414
 *
1415
 * @param ctxt  the parser context
1416
 * @param handler  the encoding handler
1417
 * @returns 0 in case of success, -1 otherwise
1418
 */
1419
int
1420
xmlSwitchToEncoding(xmlParserCtxt *ctxt, xmlCharEncodingHandler *handler)
1421
693
{
1422
693
    xmlParserErrors code;
1423
1424
693
    if (ctxt == NULL)
1425
0
        return(-1);
1426
1427
693
    code = xmlInputSetEncodingHandler(ctxt->input, handler);
1428
693
    if (code != XML_ERR_OK) {
1429
28
        xmlCtxtErrIO(ctxt, code, NULL);
1430
28
        return(-1);
1431
28
    }
1432
1433
665
    return(0);
1434
693
}
1435
1436
/**
1437
 * Handle optional BOM, detect and switch to encoding.
1438
 *
1439
 * Assumes that there are at least four bytes in the input buffer.
1440
 *
1441
 * @param ctxt  the parser context
1442
 */
1443
void
1444
15.1k
xmlDetectEncoding(xmlParserCtxt *ctxt) {
1445
15.1k
    const xmlChar *in;
1446
15.1k
    xmlCharEncoding enc;
1447
15.1k
    int bomSize;
1448
15.1k
    int autoFlag = 0;
1449
1450
15.1k
    if (xmlParserGrow(ctxt) < 0)
1451
0
        return;
1452
15.1k
    in = ctxt->input->cur;
1453
15.1k
    if (ctxt->input->end - in < 4)
1454
343
        return;
1455
1456
14.7k
    if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1457
        /*
1458
         * If the encoding was already set, only skip the BOM which was
1459
         * possibly decoded to UTF-8.
1460
         */
1461
0
        if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1462
0
            ctxt->input->cur += 3;
1463
0
        }
1464
1465
0
        return;
1466
0
    }
1467
1468
14.7k
    enc = XML_CHAR_ENCODING_NONE;
1469
14.7k
    bomSize = 0;
1470
1471
    /*
1472
     * BOM sniffing and detection of initial bytes of an XML
1473
     * declaration.
1474
     *
1475
     * The HTML5 spec doesn't cover UTF-32 (UCS-4) or EBCDIC.
1476
     */
1477
14.7k
    switch (in[0]) {
1478
86
        case 0x00:
1479
86
            if ((!ctxt->html) &&
1480
86
                (in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1481
1
                enc = XML_CHAR_ENCODING_UCS4BE;
1482
1
                autoFlag = XML_INPUT_AUTO_OTHER;
1483
85
            } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1484
                /*
1485
                 * TODO: The HTML5 spec requires to check that the
1486
                 * next codepoint is an 'x'.
1487
                 */
1488
63
                enc = XML_CHAR_ENCODING_UTF16BE;
1489
63
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1490
63
            }
1491
86
            break;
1492
1493
13.9k
        case 0x3C:
1494
13.9k
            if (in[1] == 0x00) {
1495
113
                if ((!ctxt->html) &&
1496
113
                    (in[2] == 0x00) && (in[3] == 0x00)) {
1497
30
                    enc = XML_CHAR_ENCODING_UCS4LE;
1498
30
                    autoFlag = XML_INPUT_AUTO_OTHER;
1499
83
                } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1500
                    /*
1501
                     * TODO: The HTML5 spec requires to check that the
1502
                     * next codepoint is an 'x'.
1503
                     */
1504
74
                    enc = XML_CHAR_ENCODING_UTF16LE;
1505
74
                    autoFlag = XML_INPUT_AUTO_UTF16LE;
1506
74
                }
1507
113
            }
1508
13.9k
            break;
1509
1510
306
        case 0x4C:
1511
306
      if ((!ctxt->html) &&
1512
306
                (in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1513
279
          enc = XML_CHAR_ENCODING_EBCDIC;
1514
279
                autoFlag = XML_INPUT_AUTO_OTHER;
1515
279
            }
1516
306
            break;
1517
1518
29
        case 0xEF:
1519
29
            if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1520
17
                enc = XML_CHAR_ENCODING_UTF8;
1521
17
                autoFlag = XML_INPUT_AUTO_UTF8;
1522
17
                bomSize = 3;
1523
17
            }
1524
29
            break;
1525
1526
157
        case 0xFE:
1527
157
            if (in[1] == 0xFF) {
1528
145
                enc = XML_CHAR_ENCODING_UTF16BE;
1529
145
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1530
145
                bomSize = 2;
1531
145
            }
1532
157
            break;
1533
1534
104
        case 0xFF:
1535
104
            if (in[1] == 0xFE) {
1536
100
                enc = XML_CHAR_ENCODING_UTF16LE;
1537
100
                autoFlag = XML_INPUT_AUTO_UTF16LE;
1538
100
                bomSize = 2;
1539
100
            }
1540
104
            break;
1541
14.7k
    }
1542
1543
14.7k
    if (bomSize > 0) {
1544
262
        ctxt->input->cur += bomSize;
1545
262
    }
1546
1547
14.7k
    if (enc != XML_CHAR_ENCODING_NONE) {
1548
709
        ctxt->input->flags |= autoFlag;
1549
1550
709
        if (enc == XML_CHAR_ENCODING_EBCDIC) {
1551
279
            xmlCharEncodingHandlerPtr handler;
1552
279
            xmlParserErrors res;
1553
1554
279
            res = xmlDetectEBCDIC(ctxt, &handler);
1555
279
            if (res != XML_ERR_OK) {
1556
16
                xmlFatalErr(ctxt, res, "detecting EBCDIC\n");
1557
263
            } else {
1558
263
                xmlSwitchToEncoding(ctxt, handler);
1559
263
            }
1560
430
        } else {
1561
430
            xmlSwitchEncoding(ctxt, enc);
1562
430
        }
1563
709
    }
1564
14.7k
}
1565
1566
/**
1567
 * Set the encoding from a declaration in the document.
1568
 *
1569
 * If no encoding was set yet, switch the encoding. Otherwise, only warn
1570
 * about encoding mismatches.
1571
 *
1572
 * Takes ownership of 'encoding'.
1573
 *
1574
 * @param ctxt  the parser context
1575
 * @param encoding  declared encoding
1576
 */
1577
void
1578
1.01k
xmlSetDeclaredEncoding(xmlParserCtxt *ctxt, xmlChar *encoding) {
1579
1.01k
    if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1580
1.01k
        ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1581
1.00k
        xmlCharEncodingHandlerPtr handler;
1582
1.00k
        xmlParserErrors res;
1583
1.00k
        xmlCharEncFlags flags = XML_ENC_INPUT;
1584
1585
        /*
1586
         * xmlSwitchEncodingName treats unsupported encodings as
1587
         * warnings, but we want it to be an error in an encoding
1588
         * declaration.
1589
         */
1590
1.00k
        if (ctxt->html)
1591
0
            flags |= XML_ENC_HTML;
1592
1.00k
        res = xmlCreateCharEncodingHandler((const char *) encoding,
1593
1.00k
                flags, ctxt->convImpl, ctxt->convCtxt, &handler);
1594
1.00k
        if (res != XML_ERR_OK) {
1595
53
            xmlFatalErr(ctxt, res, (const char *) encoding);
1596
53
            xmlFree(encoding);
1597
53
            return;
1598
53
        }
1599
1600
952
        res  = xmlInputSetEncodingHandler(ctxt->input, handler);
1601
952
        if (res != XML_ERR_OK) {
1602
29
            xmlCtxtErrIO(ctxt, res, NULL);
1603
29
            xmlFree(encoding);
1604
29
            return;
1605
29
        }
1606
1607
923
        ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1608
923
    } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1609
9
        static const char *allowedUTF8[] = {
1610
9
            "UTF-8", "UTF8", NULL
1611
9
        };
1612
9
        static const char *allowedUTF16LE[] = {
1613
9
            "UTF-16", "UTF-16LE", "UTF16", NULL
1614
9
        };
1615
9
        static const char *allowedUTF16BE[] = {
1616
9
            "UTF-16", "UTF-16BE", "UTF16", NULL
1617
9
        };
1618
9
        const char **allowed = NULL;
1619
9
        const char *autoEnc = NULL;
1620
1621
9
        switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1622
6
            case XML_INPUT_AUTO_UTF8:
1623
6
                allowed = allowedUTF8;
1624
6
                autoEnc = "UTF-8";
1625
6
                break;
1626
1
            case XML_INPUT_AUTO_UTF16LE:
1627
1
                allowed = allowedUTF16LE;
1628
1
                autoEnc = "UTF-16LE";
1629
1
                break;
1630
1
            case XML_INPUT_AUTO_UTF16BE:
1631
1
                allowed = allowedUTF16BE;
1632
1
                autoEnc = "UTF-16BE";
1633
1
                break;
1634
9
        }
1635
1636
9
        if (allowed != NULL) {
1637
8
            const char **p;
1638
8
            int match = 0;
1639
1640
25
            for (p = allowed; *p != NULL; p++) {
1641
18
                if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1642
1
                    match = 1;
1643
1
                    break;
1644
1
                }
1645
18
            }
1646
1647
8
            if (match == 0) {
1648
7
                xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1649
7
                              "Encoding '%s' doesn't match "
1650
7
                              "auto-detected '%s'\n",
1651
7
                              encoding, BAD_CAST autoEnc);
1652
7
                xmlFree(encoding);
1653
7
                encoding = xmlStrdup(BAD_CAST autoEnc);
1654
7
                if (encoding == NULL)
1655
0
                    xmlCtxtErrMemory(ctxt);
1656
7
            }
1657
8
        }
1658
9
    }
1659
1660
932
    if (ctxt->encoding != NULL)
1661
0
        xmlFree((xmlChar *) ctxt->encoding);
1662
932
    ctxt->encoding = encoding;
1663
932
}
1664
1665
/**
1666
 * @since 2.14.0
1667
 *
1668
 * @param ctxt  parser context
1669
 * @returns the encoding from the encoding declaration. This can differ
1670
 * from the actual encoding.
1671
 */
1672
const xmlChar *
1673
0
xmlCtxtGetDeclaredEncoding(xmlParserCtxt *ctxt) {
1674
0
    if (ctxt == NULL)
1675
0
        return(NULL);
1676
1677
0
    return(ctxt->encoding);
1678
0
}
1679
1680
/**
1681
 * @param ctxt  the parser context
1682
 * @returns the actual used to parse the document. This can differ from
1683
 * the declared encoding.
1684
 */
1685
const xmlChar *
1686
13.9k
xmlGetActualEncoding(xmlParserCtxt *ctxt) {
1687
13.9k
    const xmlChar *encoding = NULL;
1688
1689
13.9k
    if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
1690
13.9k
        (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
1691
        /* Preserve encoding exactly */
1692
1.01k
        encoding = ctxt->encoding;
1693
12.9k
    } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
1694
1
        encoding = BAD_CAST ctxt->input->buf->encoder->name;
1695
12.9k
    } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1696
0
        encoding = BAD_CAST "UTF-8";
1697
0
    }
1698
1699
13.9k
    return(encoding);
1700
13.9k
}
1701
1702
/************************************************************************
1703
 *                  *
1704
 *  Commodity functions to handle entities processing   *
1705
 *                  *
1706
 ************************************************************************/
1707
1708
/**
1709
 * Free up an input stream.
1710
 *
1711
 * @param input  an xmlParserInput
1712
 */
1713
void
1714
48.2k
xmlFreeInputStream(xmlParserInput *input) {
1715
48.2k
    if (input == NULL) return;
1716
1717
48.2k
    if (input->filename != NULL) xmlFree((char *) input->filename);
1718
48.2k
    if (input->version != NULL) xmlFree((char *) input->version);
1719
48.2k
    if ((input->free != NULL) && (input->base != NULL))
1720
0
        input->free((xmlChar *) input->base);
1721
48.2k
    if (input->buf != NULL)
1722
48.2k
        xmlFreeParserInputBuffer(input->buf);
1723
48.2k
    xmlFree(input);
1724
48.2k
}
1725
1726
/**
1727
 * Create a new input stream structure.
1728
 *
1729
 * @deprecated Use #xmlNewInputFromUrl or similar functions.
1730
 *
1731
 * @param ctxt  an XML parser context
1732
 * @returns the new input stream or NULL
1733
 */
1734
xmlParserInput *
1735
0
xmlNewInputStream(xmlParserCtxt *ctxt) {
1736
0
    xmlParserInputPtr input;
1737
1738
0
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1739
0
    if (input == NULL) {
1740
0
        xmlCtxtErrMemory(ctxt);
1741
0
  return(NULL);
1742
0
    }
1743
0
    memset(input, 0, sizeof(xmlParserInput));
1744
0
    input->line = 1;
1745
0
    input->col = 1;
1746
1747
0
    return(input);
1748
0
}
1749
1750
/**
1751
 * Creates a new parser input from the filesystem, the network or
1752
 * a user-defined resource loader.
1753
 *
1754
 * @param ctxt  parser context
1755
 * @param url  filename or URL
1756
 * @param publicId  publid ID from doctype (optional)
1757
 * @param encoding  character encoding (optional)
1758
 * @param flags  unused, pass 0
1759
 * @returns a new parser input.
1760
 */
1761
xmlParserInput *
1762
xmlCtxtNewInputFromUrl(xmlParserCtxt *ctxt, const char *url,
1763
                       const char *publicId, const char *encoding,
1764
0
                       xmlParserInputFlags flags ATTRIBUTE_UNUSED) {
1765
0
    xmlParserInputPtr input;
1766
1767
0
    if ((ctxt == NULL) || (url == NULL))
1768
0
  return(NULL);
1769
1770
0
    input = xmlLoadResource(ctxt, url, publicId, XML_RESOURCE_MAIN_DOCUMENT);
1771
0
    if (input == NULL)
1772
0
        return(NULL);
1773
1774
0
    if (encoding != NULL)
1775
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1776
1777
0
    return(input);
1778
0
}
1779
1780
/**
1781
 * Internal helper function.
1782
 *
1783
 * @param buf  parser input buffer
1784
 * @param filename  filename or URL
1785
 * @returns a new parser input.
1786
 */
1787
static xmlParserInputPtr
1788
48.2k
xmlNewInputInternal(xmlParserInputBufferPtr buf, const char *filename) {
1789
48.2k
    xmlParserInputPtr input;
1790
1791
48.2k
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1792
48.2k
    if (input == NULL) {
1793
0
  xmlFreeParserInputBuffer(buf);
1794
0
  return(NULL);
1795
0
    }
1796
48.2k
    memset(input, 0, sizeof(xmlParserInput));
1797
48.2k
    input->line = 1;
1798
48.2k
    input->col = 1;
1799
1800
48.2k
    input->buf = buf;
1801
48.2k
    xmlBufResetInput(input->buf->buffer, input);
1802
1803
48.2k
    if (filename != NULL) {
1804
0
        input->filename = xmlMemStrdup(filename);
1805
0
        if (input->filename == NULL) {
1806
0
            xmlFreeInputStream(input);
1807
0
            return(NULL);
1808
0
        }
1809
0
    }
1810
1811
48.2k
    return(input);
1812
48.2k
}
1813
1814
/**
1815
 * Creates a new parser input to read from a memory area.
1816
 *
1817
 * `url` is used as base to resolve external entities and for
1818
 * error reporting.
1819
 *
1820
 * If the XML_INPUT_BUF_STATIC flag is set, the memory area must
1821
 * stay unchanged until parsing has finished. This can avoid
1822
 * temporary copies.
1823
 *
1824
 * If the XML_INPUT_BUF_ZERO_TERMINATED flag is set, the memory
1825
 * area must contain a zero byte after the buffer at position `size`.
1826
 * This can avoid temporary copies.
1827
 *
1828
 * @since 2.14.0
1829
 *
1830
 * @param url  base URL (optional)
1831
 * @param mem  pointer to char array
1832
 * @param size  size of array
1833
 * @param flags  optimization hints
1834
 * @returns a new parser input or NULL if a memory allocation failed.
1835
 */
1836
xmlParserInput *
1837
xmlNewInputFromMemory(const char *url, const void *mem, size_t size,
1838
15.1k
                      xmlParserInputFlags flags) {
1839
15.1k
    xmlParserInputBufferPtr buf;
1840
1841
15.1k
    if (mem == NULL)
1842
0
  return(NULL);
1843
1844
15.1k
    buf = xmlNewInputBufferMemory(mem, size, flags, XML_CHAR_ENCODING_NONE);
1845
15.1k
    if (buf == NULL)
1846
0
        return(NULL);
1847
1848
15.1k
    return(xmlNewInputInternal(buf, url));
1849
15.1k
}
1850
1851
/**
1852
 * @param ctxt  parser context
1853
 * @param url  base URL (optional)
1854
 * @param mem  pointer to char array
1855
 * @param size  size of array
1856
 * @param encoding  character encoding (optional)
1857
 * @param flags  optimization hints
1858
 * @returns a new parser input or NULL in case of error.
1859
 */
1860
xmlParserInput *
1861
xmlCtxtNewInputFromMemory(xmlParserCtxt *ctxt, const char *url,
1862
                          const void *mem, size_t size,
1863
15.1k
                          const char *encoding, xmlParserInputFlags flags) {
1864
15.1k
    xmlParserInputPtr input;
1865
1866
15.1k
    if ((ctxt == NULL) || (mem == NULL))
1867
0
  return(NULL);
1868
1869
15.1k
    input = xmlNewInputFromMemory(url, mem, size, flags);
1870
15.1k
    if (input == NULL) {
1871
0
        xmlCtxtErrMemory(ctxt);
1872
0
        return(NULL);
1873
0
    }
1874
1875
15.1k
    if (encoding != NULL)
1876
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1877
1878
15.1k
    return(input);
1879
15.1k
}
1880
1881
/**
1882
 * Creates a new parser input to read from a zero-terminated string.
1883
 *
1884
 * `url` is used as base to resolve external entities and for
1885
 * error reporting.
1886
 *
1887
 * If the XML_INPUT_BUF_STATIC flag is set, the string must
1888
 * stay unchanged until parsing has finished. This can avoid
1889
 * temporary copies.
1890
 *
1891
 * @since 2.14.0
1892
 *
1893
 * @param url  base URL (optional)
1894
 * @param str  zero-terminated string
1895
 * @param flags  optimization hints
1896
 * @returns a new parser input or NULL if a memory allocation failed.
1897
 */
1898
xmlParserInput *
1899
xmlNewInputFromString(const char *url, const char *str,
1900
33.1k
                      xmlParserInputFlags flags) {
1901
33.1k
    xmlParserInputBufferPtr buf;
1902
1903
33.1k
    if (str == NULL)
1904
0
  return(NULL);
1905
1906
33.1k
    buf = xmlNewInputBufferString(str, flags);
1907
33.1k
    if (buf == NULL)
1908
0
        return(NULL);
1909
1910
33.1k
    return(xmlNewInputInternal(buf, url));
1911
33.1k
}
1912
1913
/**
1914
 * @param ctxt  parser context
1915
 * @param url  base URL (optional)
1916
 * @param str  zero-terminated string
1917
 * @param encoding  character encoding (optional)
1918
 * @param flags  optimization hints
1919
 * @returns a new parser input.
1920
 */
1921
xmlParserInput *
1922
xmlCtxtNewInputFromString(xmlParserCtxt *ctxt, const char *url,
1923
                          const char *str, const char *encoding,
1924
33.1k
                          xmlParserInputFlags flags) {
1925
33.1k
    xmlParserInputPtr input;
1926
1927
33.1k
    if ((ctxt == NULL) || (str == NULL))
1928
0
  return(NULL);
1929
1930
33.1k
    input = xmlNewInputFromString(url, str, flags);
1931
33.1k
    if (input == NULL) {
1932
0
        xmlCtxtErrMemory(ctxt);
1933
0
        return(NULL);
1934
0
    }
1935
1936
33.1k
    if (encoding != NULL)
1937
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1938
1939
33.1k
    return(input);
1940
33.1k
}
1941
1942
/**
1943
 * Creates a new parser input to read from a zero-terminated string.
1944
 *
1945
 * `url` is used as base to resolve external entities and for
1946
 * error reporting.
1947
 *
1948
 * `fd` is closed after parsing has finished.
1949
 *
1950
 * Supported `flags` are XML_INPUT_UNZIP to decompress data
1951
 * automatically. This feature is deprecated and will be removed
1952
 * in a future release.
1953
 *
1954
 * @since 2.14.0
1955
 *
1956
 * @param url  base URL (optional)
1957
 * @param fd  file descriptor
1958
 * @param flags  input flags
1959
 * @returns a new parser input or NULL if a memory allocation failed.
1960
 */
1961
xmlParserInput *
1962
0
xmlNewInputFromFd(const char *url, int fd, xmlParserInputFlags flags) {
1963
0
    xmlParserInputBufferPtr buf;
1964
1965
0
    if (fd < 0)
1966
0
  return(NULL);
1967
1968
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
1969
0
    if (buf == NULL)
1970
0
        return(NULL);
1971
1972
0
    if (xmlInputFromFd(buf, fd, flags) != XML_ERR_OK) {
1973
0
        xmlFreeParserInputBuffer(buf);
1974
0
        return(NULL);
1975
0
    }
1976
1977
0
    return(xmlNewInputInternal(buf, url));
1978
0
}
1979
1980
/**
1981
 * @param ctxt  parser context
1982
 * @param url  base URL (optional)
1983
 * @param fd  file descriptor
1984
 * @param encoding  character encoding (optional)
1985
 * @param flags  unused, pass 0
1986
 * @returns a new parser input.
1987
 */
1988
xmlParserInput *
1989
xmlCtxtNewInputFromFd(xmlParserCtxt *ctxt, const char *url,
1990
                      int fd, const char *encoding,
1991
0
                      xmlParserInputFlags flags) {
1992
0
    xmlParserInputPtr input;
1993
1994
0
    if ((ctxt == NULL) || (fd < 0))
1995
0
  return(NULL);
1996
1997
0
    if (ctxt->options & XML_PARSE_UNZIP)
1998
0
        flags |= XML_INPUT_UNZIP;
1999
2000
0
    input = xmlNewInputFromFd(url, fd, flags);
2001
0
    if (input == NULL) {
2002
0
  xmlCtxtErrMemory(ctxt);
2003
0
        return(NULL);
2004
0
    }
2005
2006
0
    if (encoding != NULL)
2007
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2008
2009
0
    return(input);
2010
0
}
2011
2012
/**
2013
 * Creates a new parser input to read from input callbacks and
2014
 * cintext.
2015
 *
2016
 * `url` is used as base to resolve external entities and for
2017
 * error reporting.
2018
 *
2019
 * `ioRead` is called to read new data into a provided buffer.
2020
 * It must return the number of bytes written into the buffer
2021
 * ot a negative xmlParserErrors code on failure.
2022
 *
2023
 * `ioClose` is called after parsing has finished.
2024
 *
2025
 * `ioCtxt` is an opaque pointer passed to the callbacks.
2026
 *
2027
 * @since 2.14.0
2028
 *
2029
 * @param url  base URL (optional)
2030
 * @param ioRead  read callback
2031
 * @param ioClose  close callback (optional)
2032
 * @param ioCtxt  IO context
2033
 * @param flags  unused, pass 0
2034
 * @returns a new parser input or NULL if a memory allocation failed.
2035
 */
2036
xmlParserInput *
2037
xmlNewInputFromIO(const char *url, xmlInputReadCallback ioRead,
2038
                  xmlInputCloseCallback ioClose, void *ioCtxt,
2039
0
                  xmlParserInputFlags flags ATTRIBUTE_UNUSED) {
2040
0
    xmlParserInputBufferPtr buf;
2041
2042
0
    if (ioRead == NULL)
2043
0
  return(NULL);
2044
2045
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2046
0
    if (buf == NULL) {
2047
0
        if (ioClose != NULL)
2048
0
            ioClose(ioCtxt);
2049
0
        return(NULL);
2050
0
    }
2051
2052
0
    buf->context = ioCtxt;
2053
0
    buf->readcallback = ioRead;
2054
0
    buf->closecallback = ioClose;
2055
2056
0
    return(xmlNewInputInternal(buf, url));
2057
0
}
2058
2059
/**
2060
 * @param ctxt  parser context
2061
 * @param url  base URL (optional)
2062
 * @param ioRead  read callback
2063
 * @param ioClose  close callback (optional)
2064
 * @param ioCtxt  IO context
2065
 * @param encoding  character encoding (optional)
2066
 * @param flags  unused, pass 0
2067
 * @returns a new parser input.
2068
 */
2069
xmlParserInput *
2070
xmlCtxtNewInputFromIO(xmlParserCtxt *ctxt, const char *url,
2071
                      xmlInputReadCallback ioRead,
2072
                      xmlInputCloseCallback ioClose,
2073
                      void *ioCtxt, const char *encoding,
2074
0
                      xmlParserInputFlags flags) {
2075
0
    xmlParserInputPtr input;
2076
2077
0
    if ((ctxt == NULL) || (ioRead == NULL))
2078
0
  return(NULL);
2079
2080
0
    input = xmlNewInputFromIO(url, ioRead, ioClose, ioCtxt, flags);
2081
0
    if (input == NULL) {
2082
0
        xmlCtxtErrMemory(ctxt);
2083
0
        return(NULL);
2084
0
    }
2085
2086
0
    if (encoding != NULL)
2087
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2088
2089
0
    return(input);
2090
0
}
2091
2092
/**
2093
 * Creates a new parser input for a push parser.
2094
 *
2095
 * @param url  base URL (optional)
2096
 * @param chunk  pointer to char array
2097
 * @param size  size of array
2098
 * @returns a new parser input or NULL if a memory allocation failed.
2099
 */
2100
xmlParserInput *
2101
0
xmlNewPushInput(const char *url, const char *chunk, int size) {
2102
0
    xmlParserInputBufferPtr buf;
2103
0
    xmlParserInputPtr input;
2104
2105
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2106
0
    if (buf == NULL)
2107
0
        return(NULL);
2108
2109
0
    input = xmlNewInputInternal(buf, url);
2110
0
    if (input == NULL)
2111
0
  return(NULL);
2112
2113
0
    input->flags |= XML_INPUT_PROGRESSIVE;
2114
2115
0
    if ((size > 0) && (chunk != NULL)) {
2116
0
        int res;
2117
2118
0
  res = xmlParserInputBufferPush(input->buf, size, chunk);
2119
0
        xmlBufResetInput(input->buf->buffer, input);
2120
0
        if (res < 0) {
2121
0
            xmlFreeInputStream(input);
2122
0
            return(NULL);
2123
0
        }
2124
0
    }
2125
2126
0
    return(input);
2127
0
}
2128
2129
/**
2130
 * Create a new input stream structure encapsulating the `input` into
2131
 * a stream suitable for the parser.
2132
 *
2133
 * @param ctxt  an XML parser context
2134
 * @param buf  an input buffer
2135
 * @param enc  the charset encoding if known
2136
 * @returns the new input stream or NULL
2137
 */
2138
xmlParserInput *
2139
xmlNewIOInputStream(xmlParserCtxt *ctxt, xmlParserInputBuffer *buf,
2140
0
              xmlCharEncoding enc) {
2141
0
    xmlParserInputPtr input;
2142
0
    const char *encoding;
2143
2144
0
    if ((ctxt == NULL) || (buf == NULL))
2145
0
        return(NULL);
2146
2147
0
    input = xmlNewInputInternal(buf, NULL);
2148
0
    if (input == NULL) {
2149
0
        xmlCtxtErrMemory(ctxt);
2150
0
  return(NULL);
2151
0
    }
2152
2153
0
    encoding = xmlGetCharEncodingName(enc);
2154
0
    if (encoding != NULL)
2155
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2156
2157
0
    return(input);
2158
0
}
2159
2160
/**
2161
 * Create a new input stream based on an xmlEntity
2162
 *
2163
 * @deprecated Internal function, do not use.
2164
 *
2165
 * @param ctxt  an XML parser context
2166
 * @param ent  an Entity pointer
2167
 * @returns the new input stream or NULL
2168
 */
2169
xmlParserInput *
2170
33.1k
xmlNewEntityInputStream(xmlParserCtxt *ctxt, xmlEntity *ent) {
2171
33.1k
    xmlParserInputPtr input;
2172
2173
33.1k
    if ((ctxt == NULL) || (ent == NULL))
2174
0
  return(NULL);
2175
2176
33.1k
    if (ent->content != NULL) {
2177
33.1k
        input = xmlCtxtNewInputFromString(ctxt, NULL,
2178
33.1k
                (const char *) ent->content, NULL, XML_INPUT_BUF_STATIC);
2179
33.1k
    } else if (ent->URI != NULL) {
2180
0
        xmlResourceType rtype;
2181
2182
0
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY)
2183
0
            rtype = XML_RESOURCE_PARAMETER_ENTITY;
2184
0
        else
2185
0
            rtype = XML_RESOURCE_GENERAL_ENTITY;
2186
2187
0
        input = xmlLoadResource(ctxt, (char *) ent->URI,
2188
0
                                (char *) ent->ExternalID, rtype);
2189
0
    } else {
2190
0
        return(NULL);
2191
0
    }
2192
2193
33.1k
    if (input == NULL)
2194
0
        return(NULL);
2195
2196
33.1k
    input->entity = ent;
2197
2198
33.1k
    return(input);
2199
33.1k
}
2200
2201
/**
2202
 * Create a new input stream based on a memory buffer.
2203
 *
2204
 * @deprecated Use #xmlNewInputFromString.
2205
 *
2206
 * @param ctxt  an XML parser context
2207
 * @param buffer  an memory buffer
2208
 * @returns the new input stream
2209
 */
2210
xmlParserInput *
2211
0
xmlNewStringInputStream(xmlParserCtxt *ctxt, const xmlChar *buffer) {
2212
0
    return(xmlCtxtNewInputFromString(ctxt, NULL, (const char *) buffer,
2213
0
                                     NULL, 0));
2214
0
}
2215
2216
2217
/****************************************************************
2218
 *                *
2219
 *    External entities loading     *
2220
 *                *
2221
 ****************************************************************/
2222
2223
#ifdef LIBXML_CATALOG_ENABLED
2224
2225
/**
2226
 * Resolves an external ID or URL against the appropriate catalog.
2227
 *
2228
 * @param url  the URL or system ID for the entity to load
2229
 * @param publicId  the public ID for the entity to load (optional)
2230
 * @param localCatalogs  local catalogs (optional)
2231
 * @param allowGlobal  allow global system catalog
2232
 * @param out  resulting resource or NULL
2233
 * @returns an xmlParserErrors code
2234
 */
2235
static xmlParserErrors
2236
xmlResolveFromCatalog(const char *url, const char *publicId,
2237
0
                      void *localCatalogs, int allowGlobal, char **out) {
2238
0
    xmlError oldError;
2239
0
    xmlError *lastError;
2240
0
    char *resource = NULL;
2241
0
    xmlParserErrors code;
2242
2243
0
    if (out == NULL)
2244
0
        return(XML_ERR_ARGUMENT);
2245
0
    *out = NULL;
2246
0
    if ((localCatalogs == NULL) && (!allowGlobal))
2247
0
        return(XML_ERR_OK);
2248
2249
    /*
2250
     * Don't try to resolve if local file exists.
2251
     *
2252
     * TODO: This is somewhat non-deterministic.
2253
     */
2254
0
    if (xmlNoNetExists(url))
2255
0
        return(XML_ERR_OK);
2256
2257
    /* Backup and reset last error */
2258
0
    lastError = xmlGetLastErrorInternal();
2259
0
    oldError = *lastError;
2260
0
    lastError->code = XML_ERR_OK;
2261
2262
    /*
2263
     * Do a local lookup
2264
     */
2265
0
    if (localCatalogs != NULL) {
2266
0
        resource = (char *) xmlCatalogLocalResolve(localCatalogs,
2267
0
                                                   BAD_CAST publicId,
2268
0
                                                   BAD_CAST url);
2269
0
    }
2270
    /*
2271
     * Try a global lookup
2272
     */
2273
0
    if ((resource == NULL) && (allowGlobal)) {
2274
0
        resource = (char *) xmlCatalogResolve(BAD_CAST publicId,
2275
0
                                              BAD_CAST url);
2276
0
    }
2277
2278
    /*
2279
     * Try to resolve url using URI rules.
2280
     *
2281
     * TODO: We should consider using only a single resolution
2282
     * mechanism depending on resource type. Either by external ID
2283
     * or by URI.
2284
     */
2285
0
    if ((resource == NULL) && (url != NULL)) {
2286
0
        if (localCatalogs != NULL) {
2287
0
            resource = (char *) xmlCatalogLocalResolveURI(localCatalogs,
2288
0
                                                          BAD_CAST url);
2289
0
        }
2290
0
        if ((resource == NULL) && (allowGlobal)) {
2291
0
            resource = (char *) xmlCatalogResolveURI(BAD_CAST url);
2292
0
        }
2293
0
    }
2294
2295
0
    code = lastError->code;
2296
0
    if (code == XML_ERR_OK) {
2297
0
        *out = resource;
2298
0
    } else {
2299
0
        xmlFree(resource);
2300
0
    }
2301
2302
0
    *lastError = oldError;
2303
2304
0
    return(code);
2305
0
}
2306
2307
static char *
2308
xmlCtxtResolveFromCatalog(xmlParserCtxtPtr ctxt, const char *url,
2309
0
                          const char *publicId) {
2310
0
    char *resource;
2311
0
    void *localCatalogs = NULL;
2312
0
    int allowGlobal = 1;
2313
0
    xmlParserErrors code;
2314
2315
0
    if (ctxt != NULL) {
2316
        /*
2317
         * Loading of HTML documents shouldn't use XML catalogs.
2318
         */
2319
0
        if (ctxt->html)
2320
0
            return(NULL);
2321
2322
0
        localCatalogs = ctxt->catalogs;
2323
2324
0
        if (ctxt->options & XML_PARSE_NO_SYS_CATALOG)
2325
0
            allowGlobal = 0;
2326
0
    }
2327
2328
0
    switch (xmlCatalogGetDefaults()) {
2329
0
        case XML_CATA_ALLOW_NONE:
2330
0
            return(NULL);
2331
0
        case XML_CATA_ALLOW_DOCUMENT:
2332
0
            allowGlobal = 0;
2333
0
            break;
2334
0
        case XML_CATA_ALLOW_GLOBAL:
2335
0
            localCatalogs = NULL;
2336
0
            break;
2337
0
        case XML_CATA_ALLOW_ALL:
2338
0
            break;
2339
0
    }
2340
2341
0
    code = xmlResolveFromCatalog(url, publicId, localCatalogs,
2342
0
                                 allowGlobal, &resource);
2343
0
    if (code != XML_ERR_OK)
2344
0
        xmlCtxtErr(ctxt, NULL, XML_FROM_CATALOG, code, XML_ERR_ERROR,
2345
0
                   BAD_CAST url, BAD_CAST publicId, NULL, 0,
2346
0
                   "%s\n", xmlErrString(code), NULL);
2347
2348
0
    return(resource);
2349
0
}
2350
2351
#endif
2352
2353
/**
2354
 * @deprecated Internal function, don't use.
2355
 *
2356
 * @param ctxt  an XML parser context
2357
 * @param ret  an XML parser input
2358
 * @returns NULL.
2359
 */
2360
xmlParserInput *
2361
xmlCheckHTTPInput(xmlParserCtxt *ctxt ATTRIBUTE_UNUSED,
2362
0
                  xmlParserInput *ret ATTRIBUTE_UNUSED) {
2363
0
    return(NULL);
2364
0
}
2365
2366
/**
2367
 * Create a new input stream based on a file or a URL.
2368
 *
2369
 * The flag XML_INPUT_UNZIP allows decompression.
2370
 *
2371
 * The flag XML_INPUT_NETWORK allows network access.
2372
 *
2373
 * The following resource loaders will be called if they were
2374
 * registered (in order of precedence):
2375
 *
2376
 * - the per-thread #xmlParserInputBufferCreateFilenameFunc set with
2377
 *   #xmlParserInputBufferCreateFilenameDefault (deprecated)
2378
 * - the default loader which will return
2379
 *   - the result from a matching global input callback set with
2380
 *     #xmlRegisterInputCallbacks (deprecated)
2381
 *   - a file opened from the filesystem, with automatic detection
2382
 *     of compressed files if support is compiled in.
2383
 *
2384
 * @since 2.14.0
2385
 *
2386
 * @param url  the filename to use as entity
2387
 * @param flags  XML_INPUT flags
2388
 * @param out  pointer to new parser input
2389
 * @returns an xmlParserErrors code.
2390
 */
2391
xmlParserErrors
2392
xmlNewInputFromUrl(const char *url, xmlParserInputFlags flags,
2393
0
                   xmlParserInput **out) {
2394
0
    char *resource = NULL;
2395
0
    xmlParserInputBufferPtr buf;
2396
0
    xmlParserInputPtr input;
2397
0
    xmlParserErrors code = XML_ERR_OK;
2398
2399
0
    if (out == NULL)
2400
0
        return(XML_ERR_ARGUMENT);
2401
0
    *out = NULL;
2402
0
    if (url == NULL)
2403
0
        return(XML_ERR_ARGUMENT);
2404
2405
0
#ifdef LIBXML_CATALOG_ENABLED
2406
0
    if (flags & XML_INPUT_USE_SYS_CATALOG) {
2407
0
        code = xmlResolveFromCatalog(url, NULL, NULL, 1, &resource);
2408
0
        if (code != XML_ERR_OK)
2409
0
            return(code);
2410
0
        if (resource != NULL)
2411
0
            url = resource;
2412
0
    }
2413
0
#endif
2414
2415
0
    if (xmlParserInputBufferCreateFilenameValue != NULL) {
2416
0
        buf = xmlParserInputBufferCreateFilenameValue(url,
2417
0
                XML_CHAR_ENCODING_NONE);
2418
0
        if (buf == NULL)
2419
0
            code = XML_IO_ENOENT;
2420
0
    } else {
2421
0
        code = xmlParserInputBufferCreateUrl(url, XML_CHAR_ENCODING_NONE,
2422
0
                                             flags, &buf);
2423
0
    }
2424
2425
0
    if (code == XML_ERR_OK) {
2426
0
        input = xmlNewInputInternal(buf, url);
2427
0
        if (input == NULL)
2428
0
            code = XML_ERR_NO_MEMORY;
2429
2430
0
        *out = input;
2431
0
    }
2432
2433
0
    if (resource != NULL)
2434
0
        xmlFree(resource);
2435
0
    return(code);
2436
0
}
2437
2438
/**
2439
 * Create a new input stream based on a file or an URL.
2440
 *
2441
 * Unlike the default external entity loader, this function
2442
 * doesn't use XML catalogs.
2443
 *
2444
 * @deprecated Use #xmlNewInputFromUrl.
2445
 *
2446
 * @param ctxt  an XML parser context
2447
 * @param filename  the filename to use as entity
2448
 * @returns the new input stream or NULL in case of error
2449
 */
2450
xmlParserInput *
2451
0
xmlNewInputFromFile(xmlParserCtxt *ctxt, const char *filename) {
2452
0
    xmlParserInputPtr input;
2453
0
    xmlParserInputFlags flags = 0;
2454
0
    xmlParserErrors code;
2455
2456
0
    if ((ctxt == NULL) || (filename == NULL))
2457
0
        return(NULL);
2458
2459
0
    if (ctxt->options & XML_PARSE_UNZIP)
2460
0
        flags |= XML_INPUT_UNZIP;
2461
0
    if ((ctxt->options & XML_PARSE_NONET) == 0)
2462
0
        flags |= XML_INPUT_NETWORK;
2463
2464
0
    code = xmlNewInputFromUrl(filename, flags, &input);
2465
0
    if (code != XML_ERR_OK) {
2466
0
        xmlCtxtErrIO(ctxt, code, filename);
2467
0
        return(NULL);
2468
0
    }
2469
2470
0
    return(input);
2471
0
}
2472
2473
/**
2474
 * By default we don't load external entities, yet.
2475
 *
2476
 * @param url  the URL or system ID for the entity to load
2477
 * @param publicId  the public ID for the entity to load (optional)
2478
 * @param ctxt  the context in which the entity is called or NULL
2479
 * @returns a new allocated xmlParserInput, or NULL.
2480
 */
2481
static xmlParserInputPtr
2482
xmlDefaultExternalEntityLoader(const char *url, const char *publicId,
2483
                               xmlParserCtxtPtr ctxt)
2484
0
{
2485
0
    xmlParserInputPtr input = NULL;
2486
0
    char *resource = NULL;
2487
2488
0
    (void) publicId;
2489
2490
0
    if (url == NULL)
2491
0
        return(NULL);
2492
2493
0
#ifdef LIBXML_CATALOG_ENABLED
2494
0
    resource = xmlCtxtResolveFromCatalog(ctxt, url, publicId);
2495
0
    if (resource != NULL)
2496
0
  url = resource;
2497
0
#endif
2498
2499
    /*
2500
     * Several downstream test suites expect this error whenever
2501
     * an http URI is passed and NONET is set.
2502
     */
2503
0
    if ((ctxt != NULL) &&
2504
0
        (ctxt->options & XML_PARSE_NONET) &&
2505
0
        (xmlStrncasecmp(BAD_CAST url, BAD_CAST "http://", 7) == 0)) {
2506
0
        xmlCtxtErrIO(ctxt, XML_IO_NETWORK_ATTEMPT, url);
2507
0
    } else {
2508
0
        input = xmlNewInputFromFile(ctxt, url);
2509
0
    }
2510
2511
0
    if (resource != NULL)
2512
0
  xmlFree(resource);
2513
0
    return(input);
2514
0
}
2515
2516
/**
2517
 * A specific entity loader disabling network accesses, though still
2518
 * allowing local catalog accesses for resolution.
2519
 *
2520
 * @deprecated Use XML_PARSE_NONET.
2521
 *
2522
 * @param URL  the URL or system ID for the entity to load
2523
 * @param publicId  the public ID for the entity to load
2524
 * @param ctxt  the context in which the entity is called or NULL
2525
 * @returns a new allocated xmlParserInput, or NULL.
2526
 */
2527
xmlParserInput *
2528
xmlNoNetExternalEntityLoader(const char *URL, const char *publicId,
2529
0
                             xmlParserCtxt *ctxt) {
2530
0
    int oldOptions = 0;
2531
0
    xmlParserInputPtr input;
2532
2533
0
    if (ctxt != NULL) {
2534
0
        oldOptions = ctxt->options;
2535
0
        ctxt->options |= XML_PARSE_NONET;
2536
0
    }
2537
2538
0
    input = xmlDefaultExternalEntityLoader(URL, publicId, ctxt);
2539
2540
0
    if (ctxt != NULL)
2541
0
        ctxt->options = oldOptions;
2542
2543
0
    return(input);
2544
0
}
2545
2546
/*
2547
 * This global has to die eventually
2548
 */
2549
static xmlExternalEntityLoader
2550
xmlCurrentExternalEntityLoader = xmlDefaultExternalEntityLoader;
2551
2552
/**
2553
 * Changes the default external entity resolver function for the
2554
 * application.
2555
 *
2556
 * @deprecated This is a global setting and not thread-safe. Use
2557
 * #xmlCtxtSetResourceLoader or similar functions.
2558
 *
2559
 * @param f  the new entity resolver function
2560
 */
2561
void
2562
0
xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
2563
0
    xmlCurrentExternalEntityLoader = f;
2564
0
}
2565
2566
/**
2567
 * Get the default external entity resolver function for the application
2568
 *
2569
 * @deprecated See #xmlSetExternalEntityLoader.
2570
 *
2571
 * @returns the #xmlExternalEntityLoader function pointer
2572
 */
2573
xmlExternalEntityLoader
2574
0
xmlGetExternalEntityLoader(void) {
2575
0
    return(xmlCurrentExternalEntityLoader);
2576
0
}
2577
2578
/**
2579
 * Installs a custom callback to load documents, DTDs or external
2580
 * entities.
2581
 *
2582
 * If `vctxt` is NULL, the parser context will be passed.
2583
 *
2584
 * @since 2.14.0
2585
 * @param ctxt  parser context
2586
 * @param loader  callback
2587
 * @param vctxt  user data (optional)
2588
 */
2589
void
2590
xmlCtxtSetResourceLoader(xmlParserCtxt *ctxt, xmlResourceLoader loader,
2591
0
                         void *vctxt) {
2592
0
    if (ctxt == NULL)
2593
0
        return;
2594
2595
0
    ctxt->resourceLoader = loader;
2596
0
    ctxt->resourceCtxt = vctxt;
2597
0
}
2598
2599
/**
2600
 * @param ctxt  parser context
2601
 * @param url  the URL or system ID for the entity to load
2602
 * @param publicId  the public ID for the entity to load (optional)
2603
 * @param type  resource type
2604
 * @returns the xmlParserInput or NULL in case of error.
2605
 */
2606
xmlParserInput *
2607
xmlLoadResource(xmlParserCtxt *ctxt, const char *url, const char *publicId,
2608
0
                xmlResourceType type) {
2609
0
    char *canonicFilename;
2610
0
    xmlParserInputPtr ret;
2611
2612
0
    if (url == NULL)
2613
0
        return(NULL);
2614
2615
0
    if ((ctxt != NULL) && (ctxt->resourceLoader != NULL)) {
2616
0
        char *resource = NULL;
2617
0
        void *userData;
2618
0
        xmlParserInputFlags flags = 0;
2619
0
        int code;
2620
2621
0
#ifdef LIBXML_CATALOG_ENABLED
2622
0
        resource = xmlCtxtResolveFromCatalog(ctxt, url, publicId);
2623
0
        if (resource != NULL)
2624
0
            url = resource;
2625
0
#endif
2626
2627
0
        if (ctxt->options & XML_PARSE_UNZIP)
2628
0
            flags |= XML_INPUT_UNZIP;
2629
0
        if ((ctxt->options & XML_PARSE_NONET) == 0)
2630
0
            flags |= XML_INPUT_NETWORK;
2631
2632
0
        userData = ctxt->resourceCtxt;
2633
0
        if (userData == NULL)
2634
0
            userData = ctxt;
2635
2636
0
        code = ctxt->resourceLoader(userData, url, publicId, type,
2637
0
                                    flags, &ret);
2638
0
        if (code != XML_ERR_OK) {
2639
0
            xmlCtxtErrIO(ctxt, code, url);
2640
0
            ret = NULL;
2641
0
        }
2642
0
        if (resource != NULL)
2643
0
            xmlFree(resource);
2644
0
        return(ret);
2645
0
    }
2646
2647
0
    canonicFilename = (char *) xmlCanonicPath((const xmlChar *) url);
2648
0
    if (canonicFilename == NULL) {
2649
0
        xmlCtxtErrMemory(ctxt);
2650
0
        return(NULL);
2651
0
    }
2652
2653
0
    ret = xmlCurrentExternalEntityLoader(canonicFilename, publicId, ctxt);
2654
0
    xmlFree(canonicFilename);
2655
0
    return(ret);
2656
0
}
2657
2658
/**
2659
 * `URL` is a filename or URL. If if contains the substring "://",
2660
 * it is assumed to be a Legacy Extended IRI. Otherwise, it is
2661
 * treated as a filesystem path.
2662
 *
2663
 * `publicId` is an optional XML public ID, typically from a doctype
2664
 * declaration. It is used for catalog lookups.
2665
 *
2666
 * If catalog lookup is enabled (default is yes) and URL or ID are
2667
 * found in system or local XML catalogs, URL is replaced with the
2668
 * result. Then the following resource loaders will be called if
2669
 * they were registered (in order of precedence):
2670
 *
2671
 * - the resource loader set with #xmlCtxtSetResourceLoader
2672
 * - the global external entity loader set with
2673
 *   #xmlSetExternalEntityLoader (without catalog resolution,
2674
 *   deprecated)
2675
 * - the per-thread #xmlParserInputBufferCreateFilenameFunc set with
2676
 *   #xmlParserInputBufferCreateFilenameDefault (deprecated)
2677
 * - the default loader which will return
2678
 *   - the result from a matching global input callback set with
2679
 *     #xmlRegisterInputCallbacks (deprecated)
2680
 *   - a file opened from the filesystem, with automatic detection
2681
 *     of compressed files if support is compiled in.
2682
 *
2683
 * @param URL  the URL or system ID for the entity to load
2684
 * @param publicId  the public ID for the entity to load (optional)
2685
 * @param ctxt  the context in which the entity is called or NULL
2686
 * @returns the xmlParserInput or NULL
2687
 */
2688
xmlParserInput *
2689
xmlLoadExternalEntity(const char *URL, const char *publicId,
2690
0
                      xmlParserCtxt *ctxt) {
2691
0
    return(xmlLoadResource(ctxt, URL, publicId, XML_RESOURCE_UNKNOWN));
2692
0
}
2693
2694
/************************************************************************
2695
 *                  *
2696
 *    Commodity functions to handle parser contexts   *
2697
 *                  *
2698
 ************************************************************************/
2699
2700
/**
2701
 * Initialize a SAX parser context
2702
 *
2703
 * @param ctxt  XML parser context
2704
 * @param sax  SAX handlert
2705
 * @param userData  user data
2706
 * @returns 0 in case of success and -1 in case of error
2707
 */
2708
2709
static int
2710
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
2711
                     void *userData)
2712
15.1k
{
2713
15.1k
    xmlParserInputPtr input;
2714
15.1k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2715
15.1k
    size_t initialNodeTabSize = 1;
2716
#else
2717
    size_t initialNodeTabSize = 10;
2718
#endif
2719
2720
15.1k
    if (ctxt == NULL)
2721
0
        return(-1);
2722
2723
15.1k
    if (ctxt->dict == NULL)
2724
15.1k
  ctxt->dict = xmlDictCreate();
2725
15.1k
    if (ctxt->dict == NULL)
2726
0
  return(-1);
2727
2728
15.1k
    if (ctxt->sax == NULL)
2729
15.1k
  ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2730
15.1k
    if (ctxt->sax == NULL)
2731
0
  return(-1);
2732
15.1k
    if (sax == NULL) {
2733
15.1k
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2734
15.1k
        xmlSAXVersion(ctxt->sax, 2);
2735
15.1k
        ctxt->userData = ctxt;
2736
15.1k
    } else {
2737
0
  if (sax->initialized == XML_SAX2_MAGIC) {
2738
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
2739
0
        } else {
2740
0
      memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2741
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
2742
0
        }
2743
0
        ctxt->userData = userData ? userData : ctxt;
2744
0
    }
2745
2746
15.1k
    ctxt->maxatts = 0;
2747
15.1k
    ctxt->atts = NULL;
2748
    /* Allocate the Input stack */
2749
15.1k
    if (ctxt->inputTab == NULL) {
2750
15.1k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2751
15.1k
        size_t initialSize = 1;
2752
#else
2753
        size_t initialSize = 5;
2754
#endif
2755
2756
15.1k
  ctxt->inputTab = xmlMalloc(initialSize * sizeof(xmlParserInputPtr));
2757
15.1k
  ctxt->inputMax = initialSize;
2758
15.1k
    }
2759
15.1k
    if (ctxt->inputTab == NULL)
2760
0
  return(-1);
2761
15.1k
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
2762
0
        xmlFreeInputStream(input);
2763
0
    }
2764
15.1k
    ctxt->inputNr = 0;
2765
15.1k
    ctxt->input = NULL;
2766
2767
15.1k
    ctxt->version = NULL;
2768
15.1k
    ctxt->encoding = NULL;
2769
15.1k
    ctxt->standalone = -1;
2770
15.1k
    ctxt->hasExternalSubset = 0;
2771
15.1k
    ctxt->hasPErefs = 0;
2772
15.1k
    ctxt->html = 0;
2773
15.1k
    ctxt->instate = XML_PARSER_START;
2774
2775
    /* Allocate the Node stack */
2776
15.1k
    if (ctxt->nodeTab == NULL) {
2777
15.1k
  ctxt->nodeTab = xmlMalloc(initialNodeTabSize * sizeof(xmlNodePtr));
2778
15.1k
  ctxt->nodeMax = initialNodeTabSize;
2779
15.1k
    }
2780
15.1k
    if (ctxt->nodeTab == NULL)
2781
0
  return(-1);
2782
15.1k
    ctxt->nodeNr = 0;
2783
15.1k
    ctxt->node = NULL;
2784
2785
    /* Allocate the Name stack */
2786
15.1k
    if (ctxt->nameTab == NULL) {
2787
15.1k
  ctxt->nameTab = xmlMalloc(initialNodeTabSize * sizeof(xmlChar *));
2788
15.1k
  ctxt->nameMax = initialNodeTabSize;
2789
15.1k
    }
2790
15.1k
    if (ctxt->nameTab == NULL)
2791
0
  return(-1);
2792
15.1k
    ctxt->nameNr = 0;
2793
15.1k
    ctxt->name = NULL;
2794
2795
    /* Allocate the space stack */
2796
15.1k
    if (ctxt->spaceTab == NULL) {
2797
15.1k
  ctxt->spaceTab = xmlMalloc(initialNodeTabSize * sizeof(int));
2798
15.1k
  ctxt->spaceMax = initialNodeTabSize;
2799
15.1k
    }
2800
15.1k
    if (ctxt->spaceTab == NULL)
2801
0
  return(-1);
2802
15.1k
    ctxt->spaceNr = 1;
2803
15.1k
    ctxt->spaceTab[0] = -1;
2804
15.1k
    ctxt->space = &ctxt->spaceTab[0];
2805
15.1k
    ctxt->myDoc = NULL;
2806
15.1k
    ctxt->wellFormed = 1;
2807
15.1k
    ctxt->nsWellFormed = 1;
2808
15.1k
    ctxt->valid = 1;
2809
2810
15.1k
    ctxt->options = XML_PARSE_NODICT;
2811
2812
    /*
2813
     * Initialize some parser options from deprecated global variables.
2814
     * Note that the "modern" API taking options arguments or
2815
     * xmlCtxtSetOptions will ignore these defaults. They're only
2816
     * relevant if old API functions like xmlParseFile are used.
2817
     */
2818
15.1k
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2819
15.1k
    if (ctxt->loadsubset) {
2820
0
        ctxt->options |= XML_PARSE_DTDLOAD;
2821
0
    }
2822
15.1k
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
2823
15.1k
    if (ctxt->validate) {
2824
0
        ctxt->options |= XML_PARSE_DTDVALID;
2825
0
    }
2826
15.1k
    ctxt->pedantic = xmlPedanticParserDefaultValue;
2827
15.1k
    if (ctxt->pedantic) {
2828
0
        ctxt->options |= XML_PARSE_PEDANTIC;
2829
0
    }
2830
15.1k
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2831
15.1k
    if (ctxt->keepBlanks == 0) {
2832
0
  ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
2833
0
  ctxt->options |= XML_PARSE_NOBLANKS;
2834
0
    }
2835
15.1k
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2836
15.1k
    if (ctxt->replaceEntities) {
2837
0
        ctxt->options |= XML_PARSE_NOENT;
2838
0
    }
2839
15.1k
    if (xmlGetWarningsDefaultValue == 0)
2840
0
        ctxt->options |= XML_PARSE_NOWARNING;
2841
2842
15.1k
    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
2843
15.1k
    ctxt->vctxt.userData = ctxt;
2844
15.1k
    ctxt->vctxt.error = xmlParserValidityError;
2845
15.1k
    ctxt->vctxt.warning = xmlParserValidityWarning;
2846
2847
15.1k
    ctxt->record_info = 0;
2848
15.1k
    ctxt->checkIndex = 0;
2849
15.1k
    ctxt->inSubset = 0;
2850
15.1k
    ctxt->errNo = XML_ERR_OK;
2851
15.1k
    ctxt->depth = 0;
2852
15.1k
    ctxt->catalogs = NULL;
2853
15.1k
    ctxt->sizeentities = 0;
2854
15.1k
    ctxt->sizeentcopy = 0;
2855
15.1k
    ctxt->input_id = 1;
2856
15.1k
    ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
2857
15.1k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
2858
2859
15.1k
    if (ctxt->nsdb == NULL) {
2860
15.1k
        ctxt->nsdb = xmlParserNsCreate();
2861
15.1k
        if (ctxt->nsdb == NULL)
2862
0
            return(-1);
2863
15.1k
    }
2864
2865
15.1k
    return(0);
2866
15.1k
}
2867
2868
/**
2869
 * Initialize a parser context
2870
 *
2871
 * @deprecated Internal function which will be made private in a future
2872
 * version.
2873
 *
2874
 * @param ctxt  an XML parser context
2875
 * @returns 0 in case of success and -1 in case of error
2876
 */
2877
2878
int
2879
xmlInitParserCtxt(xmlParserCtxt *ctxt)
2880
0
{
2881
0
    return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
2882
0
}
2883
2884
/**
2885
 * Free all the memory used by a parser context. However the parsed
2886
 * document in ctxt->myDoc is not freed.
2887
 *
2888
 * @param ctxt  an XML parser context
2889
 */
2890
2891
void
2892
xmlFreeParserCtxt(xmlParserCtxt *ctxt)
2893
15.1k
{
2894
15.1k
    xmlParserInputPtr input;
2895
2896
15.1k
    if (ctxt == NULL) return;
2897
2898
30.2k
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
2899
15.1k
        xmlFreeInputStream(input);
2900
15.1k
    }
2901
15.1k
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2902
15.1k
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
2903
15.1k
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2904
15.1k
    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
2905
15.1k
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2906
15.1k
    if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2907
15.1k
    if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2908
15.1k
    if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2909
15.1k
    if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2910
15.1k
#ifdef LIBXML_SAX1_ENABLED
2911
15.1k
    if ((ctxt->sax != NULL) &&
2912
15.1k
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
2913
#else
2914
    if (ctxt->sax != NULL)
2915
#endif /* LIBXML_SAX1_ENABLED */
2916
15.1k
        xmlFree(ctxt->sax);
2917
15.1k
    if (ctxt->directory != NULL) xmlFree(ctxt->directory);
2918
15.1k
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2919
15.1k
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
2920
15.1k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
2921
15.1k
    if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
2922
15.1k
    if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
2923
15.1k
    if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
2924
15.1k
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2925
15.1k
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2926
15.1k
    if (ctxt->attsDefault != NULL)
2927
1.61k
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2928
15.1k
    if (ctxt->attsSpecial != NULL)
2929
1.80k
        xmlHashFree(ctxt->attsSpecial, NULL);
2930
15.1k
    if (ctxt->freeElems != NULL) {
2931
0
        xmlNodePtr cur, next;
2932
2933
0
  cur = ctxt->freeElems;
2934
0
  while (cur != NULL) {
2935
0
      next = cur->next;
2936
0
      xmlFree(cur);
2937
0
      cur = next;
2938
0
  }
2939
0
    }
2940
15.1k
    if (ctxt->freeAttrs != NULL) {
2941
0
        xmlAttrPtr cur, next;
2942
2943
0
  cur = ctxt->freeAttrs;
2944
0
  while (cur != NULL) {
2945
0
      next = cur->next;
2946
0
      xmlFree(cur);
2947
0
      cur = next;
2948
0
  }
2949
0
    }
2950
    /*
2951
     * cleanup the error strings
2952
     */
2953
15.1k
    if (ctxt->lastError.message != NULL)
2954
15.0k
        xmlFree(ctxt->lastError.message);
2955
15.1k
    if (ctxt->lastError.file != NULL)
2956
0
        xmlFree(ctxt->lastError.file);
2957
15.1k
    if (ctxt->lastError.str1 != NULL)
2958
8.46k
        xmlFree(ctxt->lastError.str1);
2959
15.1k
    if (ctxt->lastError.str2 != NULL)
2960
311
        xmlFree(ctxt->lastError.str2);
2961
15.1k
    if (ctxt->lastError.str3 != NULL)
2962
87
        xmlFree(ctxt->lastError.str3);
2963
2964
15.1k
#ifdef LIBXML_CATALOG_ENABLED
2965
15.1k
    if (ctxt->catalogs != NULL)
2966
0
  xmlCatalogFreeLocal(ctxt->catalogs);
2967
15.1k
#endif
2968
15.1k
    xmlFree(ctxt);
2969
15.1k
}
2970
2971
/**
2972
 * Allocate and initialize a new parser context.
2973
 *
2974
 * @returns the xmlParserCtxt or NULL
2975
 */
2976
2977
xmlParserCtxt *
2978
xmlNewParserCtxt(void)
2979
15.1k
{
2980
15.1k
    return(xmlNewSAXParserCtxt(NULL, NULL));
2981
15.1k
}
2982
2983
/**
2984
 * Allocate and initialize a new SAX parser context. If userData is NULL,
2985
 * the parser context will be passed as user data.
2986
 *
2987
 * @since 2.11.0
2988
 *
2989
 * If you want support older versions,
2990
 * it's best to invoke #xmlNewParserCtxt and set ctxt->sax with
2991
 * struct assignment.
2992
 *
2993
 * @param sax  SAX handler
2994
 * @param userData  user data
2995
 * @returns the xmlParserCtxt or NULL if memory allocation failed.
2996
 */
2997
2998
xmlParserCtxt *
2999
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
3000
15.1k
{
3001
15.1k
    xmlParserCtxtPtr ctxt;
3002
3003
15.1k
    xmlInitParser();
3004
3005
15.1k
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
3006
15.1k
    if (ctxt == NULL)
3007
0
  return(NULL);
3008
15.1k
    memset(ctxt, 0, sizeof(xmlParserCtxt));
3009
15.1k
    if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
3010
0
        xmlFreeParserCtxt(ctxt);
3011
0
  return(NULL);
3012
0
    }
3013
15.1k
    return(ctxt);
3014
15.1k
}
3015
3016
/**
3017
 * @since 2.14.0
3018
 *
3019
 * @param ctxt  parser context
3020
 * @returns the private application data.
3021
 */
3022
void *
3023
0
xmlCtxtGetPrivate(xmlParserCtxt *ctxt) {
3024
0
    if (ctxt == NULL)
3025
0
        return(NULL);
3026
3027
0
    return(ctxt->_private);
3028
0
}
3029
3030
/**
3031
 * Set the private application data.
3032
 *
3033
 * @since 2.14.0
3034
 *
3035
 * @param ctxt  parser context
3036
 * @param priv  private application data
3037
 */
3038
void
3039
0
xmlCtxtSetPrivate(xmlParserCtxt *ctxt, void *priv) {
3040
0
    if (ctxt == NULL)
3041
0
        return;
3042
3043
0
    ctxt->_private = priv;
3044
0
}
3045
3046
/**
3047
 * @since 2.14.0
3048
 *
3049
 * @param ctxt  parser context
3050
 * @returns the local catalogs.
3051
 */
3052
void *
3053
0
xmlCtxtGetCatalogs(xmlParserCtxt *ctxt) {
3054
0
    if (ctxt == NULL)
3055
0
        return(NULL);
3056
3057
0
    return(ctxt->catalogs);
3058
0
}
3059
3060
/**
3061
 * Set the local catalogs.
3062
 *
3063
 * @since 2.14.0
3064
 *
3065
 * @param ctxt  parser context
3066
 * @param catalogs  catalogs pointer
3067
 */
3068
void
3069
0
xmlCtxtSetCatalogs(xmlParserCtxt *ctxt, void *catalogs) {
3070
0
    if (ctxt == NULL)
3071
0
        return;
3072
3073
0
    ctxt->catalogs = catalogs;
3074
0
}
3075
3076
/**
3077
 * @since 2.14.0
3078
 *
3079
 * @param ctxt  parser context
3080
 * @returns the dictionary.
3081
 */
3082
xmlDict *
3083
0
xmlCtxtGetDict(xmlParserCtxt *ctxt) {
3084
0
    if (ctxt == NULL)
3085
0
        return(NULL);
3086
3087
0
    return(ctxt->dict);
3088
0
}
3089
3090
/**
3091
 * Set the dictionary. This should only be done immediately after
3092
 * creating a parser context.
3093
 *
3094
 * @since 2.14.0
3095
 *
3096
 * @param ctxt  parser context
3097
 * @param dict  dictionary
3098
 */
3099
void
3100
0
xmlCtxtSetDict(xmlParserCtxt *ctxt, xmlDict *dict) {
3101
0
    if (ctxt == NULL)
3102
0
        return;
3103
3104
0
    if (ctxt->dict != NULL)
3105
0
        xmlDictFree(ctxt->dict);
3106
3107
0
    xmlDictReference(dict);
3108
0
    ctxt->dict = dict;
3109
0
}
3110
3111
/**
3112
 * @since 2.14.0
3113
 *
3114
 * @param ctxt  parser context
3115
 * @returns the SAX handler struct. This is not a copy and must not
3116
 * be freed. Handlers can be updated.
3117
 */
3118
xmlSAXHandler *
3119
0
xmlCtxtGetSaxHandler(xmlParserCtxt *ctxt) {
3120
0
    if (ctxt == NULL)
3121
0
        return(NULL);
3122
3123
0
    return(ctxt->sax);
3124
0
}
3125
3126
/**
3127
 * Set the SAX handler struct to a copy of `sax`.
3128
 *
3129
 * @since 2.14.0
3130
 *
3131
 * @param ctxt  parser context
3132
 * @param sax  SAX handler
3133
 * @returns 0 on success or -1 if arguments are invalid or a memory
3134
 * allocation failed.
3135
 */
3136
int
3137
0
xmlCtxtSetSaxHandler(xmlParserCtxt *ctxt, const xmlSAXHandler *sax) {
3138
0
    xmlSAXHandler *copy;
3139
3140
0
    if ((ctxt == NULL) || (sax == NULL))
3141
0
        return(-1);
3142
3143
0
    copy = xmlMalloc(sizeof(*copy));
3144
0
    if (copy == NULL)
3145
0
        return(-1);
3146
3147
0
    memcpy(copy, sax, sizeof(*copy));
3148
0
    ctxt->sax = copy;
3149
3150
0
    return(0);
3151
0
}
3152
3153
/**
3154
 * @since 2.14.0
3155
 *
3156
 * @param ctxt  parser context
3157
 * @returns the parsed document or NULL if a fatal error occurred when
3158
 * parsing. The document must be freed by the caller. Resets the
3159
 * context's document to NULL.
3160
 */
3161
xmlDoc *
3162
0
xmlCtxtGetDocument(xmlParserCtxt *ctxt) {
3163
0
    xmlDocPtr doc;
3164
3165
0
    if (ctxt == NULL)
3166
0
        return(NULL);
3167
3168
0
    if ((ctxt->wellFormed) ||
3169
0
        (((ctxt->recovery) || (ctxt->html)) &&
3170
0
         (!xmlCtxtIsCatastrophicError(ctxt)))) {
3171
0
        doc = ctxt->myDoc;
3172
0
    } else {
3173
0
        if (ctxt->errNo == XML_ERR_OK)
3174
0
            xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error");
3175
0
        doc = NULL;
3176
0
        xmlFreeDoc(ctxt->myDoc);
3177
0
    }
3178
0
    ctxt->myDoc = NULL;
3179
3180
0
    return(doc);
3181
0
}
3182
3183
/**
3184
 * @since 2.14.0
3185
 *
3186
 * @param ctxt  parser context
3187
 * @returns 1 if this is a HTML parser context, 0 otherwise.
3188
 */
3189
int
3190
0
xmlCtxtIsHtml(xmlParserCtxt *ctxt) {
3191
0
    if (ctxt == NULL)
3192
0
        return(0);
3193
3194
0
    return(ctxt->html ? 1 : 0);
3195
0
}
3196
3197
/**
3198
 * Check whether the parser is stopped.
3199
 *
3200
 * The parser is stopped on fatal (non-wellformedness) errors or
3201
 * on user request with #xmlStopParser.
3202
 *
3203
 * @since 2.14.0
3204
 *
3205
 * @param ctxt  parser context
3206
 * @returns 1 if the parser is stopped, 0 otherwise.
3207
 */
3208
int
3209
0
xmlCtxtIsStopped(xmlParserCtxt *ctxt) {
3210
0
    if (ctxt == NULL)
3211
0
        return(0);
3212
3213
0
    return(ctxt->disableSAX != 0);
3214
0
}
3215
3216
#ifdef LIBXML_VALID_ENABLED
3217
/**
3218
 * @since 2.14.0
3219
 *
3220
 * @param ctxt  parser context
3221
 * @returns the validation context.
3222
 */
3223
xmlValidCtxt *
3224
0
xmlCtxtGetValidCtxt(xmlParserCtxt *ctxt) {
3225
0
    if (ctxt == NULL)
3226
0
        return(NULL);
3227
3228
0
    return(&ctxt->vctxt);
3229
0
}
3230
#endif
3231
3232
/************************************************************************
3233
 *                  *
3234
 *    Handling of node information        *
3235
 *                  *
3236
 ************************************************************************/
3237
3238
/**
3239
 * Same as #xmlCtxtReset
3240
 *
3241
 * @deprecated Use #xmlCtxtReset
3242
 *
3243
 * @param ctxt  an XML parser context
3244
 */
3245
void
3246
xmlClearParserCtxt(xmlParserCtxt *ctxt)
3247
0
{
3248
0
    xmlCtxtReset(ctxt);
3249
0
}
3250
3251
3252
/**
3253
 * Find the parser node info struct for a given node
3254
 *
3255
 * @deprecated Don't use.
3256
 *
3257
 * @param ctx  an XML parser context
3258
 * @param node  an XML node within the tree
3259
 * @returns an xmlParserNodeInfo block pointer or NULL
3260
 */
3261
const xmlParserNodeInfo *
3262
xmlParserFindNodeInfo(xmlParserCtxt *ctx, xmlNode *node)
3263
0
{
3264
0
    unsigned long pos;
3265
3266
0
    if ((ctx == NULL) || (node == NULL))
3267
0
        return (NULL);
3268
    /* Find position where node should be at */
3269
0
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
3270
0
    if (pos < ctx->node_seq.length
3271
0
        && ctx->node_seq.buffer[pos].node == node)
3272
0
        return &ctx->node_seq.buffer[pos];
3273
0
    else
3274
0
        return NULL;
3275
0
}
3276
3277
3278
/**
3279
 * Initialize (set to initial state) node info sequence
3280
 *
3281
 * @deprecated Don't use.
3282
 *
3283
 * @param seq  a node info sequence pointer
3284
 */
3285
void
3286
xmlInitNodeInfoSeq(xmlParserNodeInfoSeq *seq)
3287
15.1k
{
3288
15.1k
    if (seq == NULL)
3289
0
        return;
3290
15.1k
    seq->length = 0;
3291
15.1k
    seq->maximum = 0;
3292
15.1k
    seq->buffer = NULL;
3293
15.1k
}
3294
3295
/**
3296
 * Clear (release memory and reinitialize) node info sequence
3297
 *
3298
 * @deprecated Don't use.
3299
 *
3300
 * @param seq  a node info sequence pointer
3301
 */
3302
void
3303
xmlClearNodeInfoSeq(xmlParserNodeInfoSeq *seq)
3304
0
{
3305
0
    if (seq == NULL)
3306
0
        return;
3307
0
    if (seq->buffer != NULL)
3308
0
        xmlFree(seq->buffer);
3309
0
    xmlInitNodeInfoSeq(seq);
3310
0
}
3311
3312
/**
3313
 * Find the index that the info record for the given node is or
3314
 * should be at in a sorted sequence.
3315
 *
3316
 * @deprecated Don't use.
3317
 *
3318
 * @param seq  a node info sequence pointer
3319
 * @param node  an XML node pointer
3320
 * @returns a long indicating the position of the record
3321
 */
3322
unsigned long
3323
xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeq *seq,
3324
                           xmlNode *node)
3325
0
{
3326
0
    unsigned long upper, lower, middle;
3327
0
    int found = 0;
3328
3329
0
    if ((seq == NULL) || (node == NULL))
3330
0
        return ((unsigned long) -1);
3331
3332
    /* Do a binary search for the key */
3333
0
    lower = 1;
3334
0
    upper = seq->length;
3335
0
    middle = 0;
3336
0
    while (lower <= upper && !found) {
3337
0
        middle = lower + (upper - lower) / 2;
3338
0
        if (node == seq->buffer[middle - 1].node)
3339
0
            found = 1;
3340
0
        else if (node < seq->buffer[middle - 1].node)
3341
0
            upper = middle - 1;
3342
0
        else
3343
0
            lower = middle + 1;
3344
0
    }
3345
3346
    /* Return position */
3347
0
    if (middle == 0 || seq->buffer[middle - 1].node < node)
3348
0
        return middle;
3349
0
    else
3350
0
        return middle - 1;
3351
0
}
3352
3353
3354
/**
3355
 * Insert node info record into the sorted sequence
3356
 *
3357
 * @deprecated Don't use.
3358
 *
3359
 * @param ctxt  an XML parser context
3360
 * @param info  a node info sequence pointer
3361
 */
3362
void
3363
xmlParserAddNodeInfo(xmlParserCtxt *ctxt,
3364
                     xmlParserNodeInfo *info)
3365
0
{
3366
0
    unsigned long pos;
3367
3368
0
    if ((ctxt == NULL) || (info == NULL)) return;
3369
3370
    /* Find pos and check to see if node is already in the sequence */
3371
0
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
3372
0
                                     info->node);
3373
3374
0
    if ((pos < ctxt->node_seq.length) &&
3375
0
        (ctxt->node_seq.buffer != NULL) &&
3376
0
        (ctxt->node_seq.buffer[pos].node == info->node)) {
3377
0
        ctxt->node_seq.buffer[pos] = *info;
3378
0
    }
3379
3380
    /* Otherwise, we need to add new node to buffer */
3381
0
    else {
3382
0
        if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
3383
0
            xmlParserNodeInfo *tmp;
3384
0
            int newSize;
3385
3386
0
            newSize = xmlGrowCapacity(ctxt->node_seq.maximum, sizeof(tmp[0]),
3387
0
                                      4, XML_MAX_ITEMS);
3388
0
            if (newSize < 0) {
3389
0
    xmlCtxtErrMemory(ctxt);
3390
0
                return;
3391
0
            }
3392
0
            tmp = xmlRealloc(ctxt->node_seq.buffer, newSize * sizeof(tmp[0]));
3393
0
            if (tmp == NULL) {
3394
0
    xmlCtxtErrMemory(ctxt);
3395
0
                return;
3396
0
            }
3397
0
            ctxt->node_seq.buffer = tmp;
3398
0
            ctxt->node_seq.maximum = newSize;
3399
0
        }
3400
3401
        /* If position is not at end, move elements out of the way */
3402
0
        if (pos != ctxt->node_seq.length) {
3403
0
            unsigned long i;
3404
3405
0
            for (i = ctxt->node_seq.length; i > pos; i--)
3406
0
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
3407
0
        }
3408
3409
        /* Copy element and increase length */
3410
0
        ctxt->node_seq.buffer[pos] = *info;
3411
0
        ctxt->node_seq.length++;
3412
0
    }
3413
0
}
3414
3415
/************************************************************************
3416
 *                  *
3417
 *    Defaults settings         *
3418
 *                  *
3419
 ************************************************************************/
3420
/**
3421
 * Set and return the previous value for enabling pedantic warnings.
3422
 *
3423
 * @deprecated Use the modern options API with XML_PARSE_PEDANTIC.
3424
 *
3425
 * @param val  int 0 or 1
3426
 * @returns the last value for 0 for no substitution, 1 for substitution.
3427
 */
3428
3429
int
3430
0
xmlPedanticParserDefault(int val) {
3431
0
    int old = xmlPedanticParserDefaultValue;
3432
3433
0
    xmlPedanticParserDefaultValue = val;
3434
0
    return(old);
3435
0
}
3436
3437
/**
3438
 * Has no effect.
3439
 *
3440
 * @deprecated Line numbers are always enabled.
3441
 *
3442
 * @param val  int 0 or 1
3443
 * @returns 1
3444
 */
3445
3446
int
3447
0
xmlLineNumbersDefault(int val ATTRIBUTE_UNUSED) {
3448
0
    return(1);
3449
0
}
3450
3451
/**
3452
 * Set and return the previous value for default entity support.
3453
 *
3454
 * @deprecated Use the modern options API with XML_PARSE_NOENT.
3455
 *
3456
 * @param val  int 0 or 1
3457
 * @returns the last value for 0 for no substitution, 1 for substitution.
3458
 */
3459
3460
int
3461
0
xmlSubstituteEntitiesDefault(int val) {
3462
0
    int old = xmlSubstituteEntitiesDefaultValue;
3463
3464
0
    xmlSubstituteEntitiesDefaultValue = val;
3465
0
    return(old);
3466
0
}
3467
3468
/**
3469
 * Set and return the previous value for default blanks text nodes support.
3470
 *
3471
 * @deprecated Use the modern options API with XML_PARSE_NOBLANKS.
3472
 *
3473
 * @param val  int 0 or 1
3474
 * @returns the last value for 0 for no substitution, 1 for substitution.
3475
 */
3476
3477
int
3478
0
xmlKeepBlanksDefault(int val) {
3479
0
    int old = xmlKeepBlanksDefaultValue;
3480
3481
0
    xmlKeepBlanksDefaultValue = val;
3482
0
#ifdef LIBXML_OUTPUT_ENABLED
3483
0
    if (!val)
3484
0
        xmlIndentTreeOutput = 1;
3485
0
#endif
3486
0
    return(old);
3487
0
}
3488