Coverage Report

Created: 2025-08-11 06:23

/src/libxml2/parserInternals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * Author: Daniel Veillard
8
 */
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22
23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/entities.h>
28
#include <libxml/xmlerror.h>
29
#include <libxml/encoding.h>
30
#include <libxml/xmlIO.h>
31
#include <libxml/uri.h>
32
#include <libxml/dict.h>
33
#include <libxml/xmlsave.h>
34
#ifdef LIBXML_CATALOG_ENABLED
35
#include <libxml/catalog.h>
36
#endif
37
#include <libxml/chvalid.h>
38
39
#define CUR(ctxt) ctxt->input->cur
40
#define END(ctxt) ctxt->input->end
41
42
#include "private/buf.h"
43
#include "private/enc.h"
44
#include "private/error.h"
45
#include "private/globals.h"
46
#include "private/io.h"
47
#include "private/memory.h"
48
#include "private/parser.h"
49
50
#ifndef SIZE_MAX
51
  #define SIZE_MAX ((size_t) -1)
52
#endif
53
54
33.2M
#define XML_MAX_ERRORS 100
55
56
/*
57
 * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
58
 * factor of serialized output after entity expansion.
59
 */
60
43.5k
#define XML_MAX_AMPLIFICATION_DEFAULT 5
61
62
/*
63
 * Various global defaults for parsing
64
 */
65
66
/**
67
 * check the compiled lib version against the include one.
68
 *
69
 * @param version  the include version number
70
 */
71
void
72
0
xmlCheckVersion(int version) {
73
0
    int myversion = LIBXML_VERSION;
74
75
0
    xmlInitParser();
76
77
0
    if ((myversion / 10000) != (version / 10000)) {
78
0
  xmlPrintErrorMessage(
79
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
80
0
    (version / 10000), (myversion / 10000));
81
0
    } else if ((myversion / 100) < (version / 100)) {
82
0
  xmlPrintErrorMessage(
83
0
    "Warning: program compiled against libxml %d using older %d\n",
84
0
    (version / 100), (myversion / 100));
85
0
    }
86
0
}
87
88
89
/************************************************************************
90
 *                  *
91
 *    Some factorized error routines        *
92
 *                  *
93
 ************************************************************************/
94
95
96
/**
97
 * Register a callback function that will be called on errors and
98
 * warnings. If handler is NULL, the error handler will be deactivated.
99
 *
100
 * If you only want to disable parser errors being printed to
101
 * stderr, use xmlParserOption XML_PARSE_NOERROR.
102
 *
103
 * This is the recommended way to collect errors from the parser and
104
 * takes precedence over all other error reporting mechanisms.
105
 * These are (in order of precedence):
106
 *
107
 * - per-context structured handler (#xmlCtxtSetErrorHandler)
108
 * - per-context structured "serror" SAX handler
109
 * - global structured handler (#xmlSetStructuredErrorFunc)
110
 * - per-context generic "error" and "warning" SAX handlers
111
 * - global generic handler (#xmlSetGenericErrorFunc)
112
 * - print to stderr
113
 *
114
 * @since 2.13.0
115
 * @param ctxt  an XML parser context
116
 * @param handler  error handler
117
 * @param data  data for error handler
118
 */
119
void
120
xmlCtxtSetErrorHandler(xmlParserCtxt *ctxt, xmlStructuredErrorFunc handler,
121
                       void *data)
122
43.5k
{
123
43.5k
    if (ctxt == NULL)
124
0
        return;
125
43.5k
    ctxt->errorHandler = handler;
126
43.5k
    ctxt->errorCtxt = data;
127
43.5k
}
128
129
/**
130
 * Get the last error raised.
131
 *
132
 * Note that the XML parser typically doesn't stop after
133
 * encountering an error and will often report multiple errors.
134
 * Most of the time, the last error isn't useful. Future
135
 * versions might return the first parser error instead.
136
 *
137
 * @param ctx  an XML parser context
138
 * @returns NULL if no error occurred or a pointer to the error
139
 */
140
const xmlError *
141
xmlCtxtGetLastError(void *ctx)
142
0
{
143
0
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
144
145
0
    if (ctxt == NULL)
146
0
        return (NULL);
147
0
    if (ctxt->lastError.code == XML_ERR_OK)
148
0
        return (NULL);
149
0
    return (&ctxt->lastError);
150
0
}
151
152
/**
153
 * Reset the last parser error to success. This does not change
154
 * the well-formedness status.
155
 *
156
 * @param ctx  an XML parser context
157
 */
158
void
159
xmlCtxtResetLastError(void *ctx)
160
0
{
161
0
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
162
163
0
    if (ctxt == NULL)
164
0
        return;
165
0
    ctxt->errNo = XML_ERR_OK;
166
0
    if (ctxt->lastError.code == XML_ERR_OK)
167
0
        return;
168
0
    xmlResetError(&ctxt->lastError);
169
0
}
170
171
/**
172
 * Handle an out-of-memory error.
173
 *
174
 * @since 2.13.0
175
 * @param ctxt  an XML parser context
176
 */
177
void
178
xmlCtxtErrMemory(xmlParserCtxt *ctxt)
179
11.3k
{
180
11.3k
    xmlStructuredErrorFunc schannel = NULL;
181
11.3k
    xmlGenericErrorFunc channel = NULL;
182
11.3k
    void *data;
183
184
11.3k
    if (ctxt == NULL) {
185
0
        xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_PARSER, NULL);
186
0
        return;
187
0
    }
188
189
11.3k
    ctxt->errNo = XML_ERR_NO_MEMORY;
190
11.3k
    ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
191
11.3k
    ctxt->wellFormed = 0;
192
11.3k
    ctxt->disableSAX = 2;
193
194
11.3k
    if (ctxt->errorHandler) {
195
11.3k
        schannel = ctxt->errorHandler;
196
11.3k
        data = ctxt->errorCtxt;
197
11.3k
    } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
198
0
        (ctxt->sax->serror != NULL)) {
199
0
        schannel = ctxt->sax->serror;
200
0
        data = ctxt->userData;
201
0
    } else {
202
0
        channel = ctxt->sax->error;
203
0
        data = ctxt->userData;
204
0
    }
205
206
11.3k
    xmlRaiseMemoryError(schannel, channel, data, XML_FROM_PARSER,
207
11.3k
                        &ctxt->lastError);
208
11.3k
}
209
210
/**
211
 * If filename is empty, use the one from context input if available.
212
 *
213
 * Report an IO error to the parser context.
214
 *
215
 * @param ctxt  parser context
216
 * @param code  xmlParserErrors code
217
 * @param uri  filename or URI (optional)
218
 */
219
void
220
xmlCtxtErrIO(xmlParserCtxt *ctxt, int code, const char *uri)
221
139k
{
222
139k
    const char *errstr, *msg, *str1, *str2;
223
139k
    xmlErrorLevel level;
224
225
139k
    if (ctxt == NULL)
226
0
        return;
227
228
139k
    if (((code == XML_IO_ENOENT) ||
229
139k
         (code == XML_IO_UNKNOWN))) {
230
        /*
231
         * Only report a warning if a file could not be found. This should
232
         * only be done for external entities, but the external entity loader
233
         * of xsltproc can try multiple paths and assumes that ENOENT doesn't
234
         * raise an error and aborts parsing.
235
         */
236
8.18k
        if (ctxt->validate == 0)
237
4.95k
            level = XML_ERR_WARNING;
238
3.23k
        else
239
3.23k
            level = XML_ERR_ERROR;
240
131k
    } else if (code == XML_IO_NETWORK_ATTEMPT) {
241
0
        level = XML_ERR_ERROR;
242
131k
    } else {
243
131k
        level = XML_ERR_FATAL;
244
131k
    }
245
246
139k
    errstr = xmlErrString(code);
247
248
139k
    if (uri == NULL) {
249
131k
        msg = "%s\n";
250
131k
        str1 = errstr;
251
131k
        str2 = NULL;
252
131k
    } else {
253
8.26k
        msg = "failed to load \"%s\": %s\n";
254
8.26k
        str1 = uri;
255
8.26k
        str2 = errstr;
256
8.26k
    }
257
258
139k
    xmlCtxtErr(ctxt, NULL, XML_FROM_IO, code, level,
259
139k
               (const xmlChar *) uri, NULL, NULL, 0,
260
139k
               msg, str1, str2);
261
139k
}
262
263
/**
264
 * @param ctxt  parser context
265
 * @returns true if the last error is catastrophic.
266
 */
267
int
268
16.6M
xmlCtxtIsCatastrophicError(xmlParserCtxt *ctxt) {
269
16.6M
    if (ctxt == NULL)
270
0
        return(1);
271
272
16.6M
    return(xmlIsCatastrophicError(ctxt->lastError.level,
273
16.6M
                                  ctxt->lastError.code));
274
16.6M
}
275
276
/**
277
 * Raise a parser error.
278
 *
279
 * @param ctxt  a parser context
280
 * @param node  the current node or NULL
281
 * @param domain  the domain for the error
282
 * @param code  the code for the error
283
 * @param level  the xmlErrorLevel for the error
284
 * @param str1  extra string info
285
 * @param str2  extra string info
286
 * @param str3  extra string info
287
 * @param int1  extra int info
288
 * @param msg  the message to display/transmit
289
 * @param ap  extra parameters for the message display
290
 */
291
void
292
xmlCtxtVErr(xmlParserCtxt *ctxt, xmlNode *node, xmlErrorDomain domain,
293
            xmlParserErrors code, xmlErrorLevel level,
294
            const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
295
            int int1, const char *msg, va_list ap)
296
17.4M
{
297
17.4M
    xmlStructuredErrorFunc schannel = NULL;
298
17.4M
    xmlGenericErrorFunc channel = NULL;
299
17.4M
    void *data = NULL;
300
17.4M
    const char *file = NULL;
301
17.4M
    int line = 0;
302
17.4M
    int col = 0;
303
17.4M
    int res;
304
305
17.4M
    if (code == XML_ERR_NO_MEMORY) {
306
2.41k
        xmlCtxtErrMemory(ctxt);
307
2.41k
        return;
308
2.41k
    }
309
310
17.4M
    if (ctxt == NULL) {
311
0
        res = xmlVRaiseError(NULL, NULL, NULL, NULL, node, domain, code,
312
0
                             level, NULL, 0, (const char *) str1,
313
0
                             (const char *) str2, (const char *) str3,
314
0
                             int1, 0, msg, ap);
315
0
        if (res < 0)
316
0
            xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_PARSER, NULL);
317
318
0
        return;
319
0
    }
320
321
17.4M
    if (PARSER_STOPPED(ctxt))
322
775k
  return;
323
324
    /* Don't overwrite catastrophic errors */
325
16.6M
    if (xmlCtxtIsCatastrophicError(ctxt))
326
0
        return;
327
328
16.6M
    if (level == XML_ERR_WARNING) {
329
127k
        if (ctxt->nbWarnings >= XML_MAX_ERRORS)
330
83.6k
            return;
331
43.6k
        ctxt->nbWarnings += 1;
332
16.5M
    } else {
333
        /*
334
         * By long-standing design, the parser isn't completely
335
         * stopped on well-formedness errors. Only SAX callbacks
336
         * are disabled.
337
         *
338
         * In some situations, we really want to abort as fast
339
         * as possible.
340
         */
341
16.5M
        if (xmlIsCatastrophicError(level, code) ||
342
16.5M
            code == XML_ERR_RESOURCE_LIMIT ||
343
16.5M
            code == XML_ERR_ENTITY_LOOP) {
344
344
            ctxt->disableSAX = 2; /* really stop parser */
345
16.5M
        } else {
346
            /* Report at least one fatal error. */
347
16.5M
            if (ctxt->nbErrors >= XML_MAX_ERRORS &&
348
16.5M
                (level < XML_ERR_FATAL || ctxt->wellFormed == 0))
349
15.9M
                return;
350
351
566k
            if (level == XML_ERR_FATAL && ctxt->recovery == 0)
352
118k
                ctxt->disableSAX = 1;
353
566k
        }
354
355
566k
        if (level == XML_ERR_FATAL)
356
427k
            ctxt->wellFormed = 0;
357
566k
        ctxt->errNo = code;
358
566k
        ctxt->nbErrors += 1;
359
566k
    }
360
361
610k
    if (((ctxt->options & XML_PARSE_NOERROR) == 0) &&
362
610k
        ((level != XML_ERR_WARNING) ||
363
240k
         ((ctxt->options & XML_PARSE_NOWARNING) == 0))) {
364
231k
        if (ctxt->errorHandler) {
365
231k
            schannel = ctxt->errorHandler;
366
231k
            data = ctxt->errorCtxt;
367
231k
        } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
368
0
            (ctxt->sax->serror != NULL)) {
369
0
            schannel = ctxt->sax->serror;
370
0
            data = ctxt->userData;
371
0
        } else if ((domain == XML_FROM_VALID) || (domain == XML_FROM_DTD)) {
372
0
            if (level == XML_ERR_WARNING)
373
0
                channel = ctxt->vctxt.warning;
374
0
            else
375
0
                channel = ctxt->vctxt.error;
376
0
            data = ctxt->vctxt.userData;
377
0
        } else {
378
0
            if (level == XML_ERR_WARNING)
379
0
                channel = ctxt->sax->warning;
380
0
            else
381
0
                channel = ctxt->sax->error;
382
0
            data = ctxt->userData;
383
0
        }
384
231k
    }
385
386
610k
    if (ctxt->input != NULL) {
387
608k
        xmlParserInputPtr input = ctxt->input;
388
389
608k
        if ((input->filename == NULL) &&
390
608k
            (ctxt->inputNr > 1)) {
391
18.6k
            input = ctxt->inputTab[ctxt->inputNr - 2];
392
18.6k
        }
393
608k
        file = input->filename;
394
608k
        line = input->line;
395
608k
        col = input->col;
396
608k
    }
397
398
610k
    res = xmlVRaiseError(schannel, channel, data, ctxt, node, domain, code,
399
610k
                         level, file, line, (const char *) str1,
400
610k
                         (const char *) str2, (const char *) str3, int1, col,
401
610k
                         msg, ap);
402
403
610k
    if (res < 0) {
404
2.51k
        xmlCtxtErrMemory(ctxt);
405
2.51k
        return;
406
2.51k
    }
407
610k
}
408
409
/**
410
 * Raise a parser error.
411
 *
412
 * @param ctxt  a parser context
413
 * @param node  the current node or NULL
414
 * @param domain  the domain for the error
415
 * @param code  the code for the error
416
 * @param level  the xmlErrorLevel for the error
417
 * @param str1  extra string info
418
 * @param str2  extra string info
419
 * @param str3  extra string info
420
 * @param int1  extra int info
421
 * @param msg  the message to display/transmit
422
 * @param ...  extra parameters for the message display
423
 */
424
void
425
xmlCtxtErr(xmlParserCtxt *ctxt, xmlNode *node, xmlErrorDomain domain,
426
           xmlParserErrors code, xmlErrorLevel level,
427
           const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
428
           int int1, const char *msg, ...)
429
17.1M
{
430
17.1M
    va_list ap;
431
432
17.1M
    va_start(ap, msg);
433
17.1M
    xmlCtxtVErr(ctxt, node, domain, code, level,
434
17.1M
                str1, str2, str3, int1, msg, ap);
435
17.1M
    va_end(ap);
436
17.1M
}
437
438
/**
439
 * Get well-formedness and validation status after parsing. Also
440
 * reports catastrophic errors which are not related to parsing
441
 * like out-of-memory, I/O or other errors.
442
 *
443
 * @since 2.14.0
444
 *
445
 * @param ctxt  an XML parser context
446
 * @returns a bitmask of XML_STATUS_* flags ORed together.
447
 */
448
xmlParserStatus
449
0
xmlCtxtGetStatus(xmlParserCtxt *ctxt) {
450
0
    xmlParserStatus bits = 0;
451
452
0
    if (xmlCtxtIsCatastrophicError(ctxt)) {
453
0
        bits |= XML_STATUS_CATASTROPHIC_ERROR |
454
0
                XML_STATUS_NOT_WELL_FORMED |
455
0
                XML_STATUS_NOT_NS_WELL_FORMED;
456
0
        if ((ctxt != NULL) && (ctxt->validate))
457
0
            bits |= XML_STATUS_DTD_VALIDATION_FAILED;
458
459
0
        return(bits);
460
0
    }
461
462
0
    if (!ctxt->wellFormed)
463
0
        bits |= XML_STATUS_NOT_WELL_FORMED;
464
0
    if (!ctxt->nsWellFormed)
465
0
        bits |= XML_STATUS_NOT_NS_WELL_FORMED;
466
0
    if ((ctxt->validate) && (!ctxt->valid))
467
0
        bits |= XML_STATUS_DTD_VALIDATION_FAILED;
468
469
0
    return(bits);
470
0
}
471
472
/**
473
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
474
 *
475
 * @param ctxt  an XML parser context
476
 * @param code  the error number
477
 * @param info  extra information string
478
 */
479
void
480
xmlFatalErr(xmlParserCtxt *ctxt, xmlParserErrors code, const char *info)
481
3.39M
{
482
3.39M
    const char *errmsg;
483
3.39M
    xmlErrorDomain domain = XML_FROM_PARSER;
484
3.39M
    xmlErrorLevel level = XML_ERR_FATAL;
485
486
3.39M
    errmsg = xmlErrString(code);
487
488
3.39M
    if ((ctxt != NULL) && (ctxt->html)) {
489
0
        domain = XML_FROM_HTML;
490
491
        /* Continue if encoding is unsupported */
492
0
        if (code == XML_ERR_UNSUPPORTED_ENCODING)
493
0
            level = XML_ERR_ERROR;
494
0
    }
495
496
3.39M
    if (info == NULL) {
497
2.91M
        xmlCtxtErr(ctxt, NULL, domain, code, level,
498
2.91M
                   NULL, NULL, NULL, 0, "%s\n", errmsg);
499
2.91M
    } else {
500
473k
        xmlCtxtErr(ctxt, NULL, domain, code, level,
501
473k
                   (const xmlChar *) info, NULL, NULL, 0,
502
473k
                   "%s: %s\n", errmsg, info);
503
473k
    }
504
3.39M
}
505
506
/**
507
 * Return window into current parser data.
508
 *
509
 * @param input  parser input
510
 * @param startOut  start of window (output)
511
 * @param sizeInOut  maximum size of window (in)
512
 *                   actual size of window (out)
513
 * @param offsetOut  offset of current position inside
514
 *                   window (out)
515
 */
516
void
517
xmlParserInputGetWindow(xmlParserInput *input, const xmlChar **startOut,
518
0
                        int *sizeInOut, int *offsetOut) {
519
0
    const xmlChar *cur, *base, *start;
520
0
    int n, col;
521
0
    int size = *sizeInOut;
522
523
0
    cur = input->cur;
524
0
    base = input->base;
525
    /* skip backwards over any end-of-lines */
526
0
    while ((cur > base) && ((*(cur) == '\n') || (*(cur) == '\r'))) {
527
0
  cur--;
528
0
    }
529
0
    n = 0;
530
    /* search backwards for beginning-of-line (to max buff size) */
531
0
    while ((n < size) && (cur > base) &&
532
0
     (*cur != '\n') && (*cur != '\r')) {
533
0
        cur--;
534
0
        n++;
535
0
    }
536
0
    if ((n > 0) && ((*cur == '\n') || (*cur == '\r'))) {
537
0
        cur++;
538
0
    } else {
539
        /* skip over continuation bytes */
540
0
        while ((cur < input->cur) && ((*cur & 0xC0) == 0x80))
541
0
            cur++;
542
0
    }
543
    /* calculate the error position in terms of the current position */
544
0
    col = input->cur - cur;
545
    /* search forward for end-of-line (to max buff size) */
546
0
    n = 0;
547
0
    start = cur;
548
    /* copy selected text to our buffer */
549
0
    while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) {
550
0
        int len = input->end - cur;
551
0
        int c = xmlGetUTF8Char(cur, &len);
552
553
0
        if ((c < 0) || (n + len > size))
554
0
            break;
555
0
        cur += len;
556
0
  n += len;
557
0
    }
558
559
    /*
560
     * col can only point to the end of the buffer if
561
     * there's space for a marker.
562
     */
563
0
    if (col >= n)
564
0
        col = n < size ? n : size - 1;
565
566
0
    *startOut = start;
567
0
    *sizeInOut = n;
568
0
    *offsetOut = col;
569
0
}
570
571
/**
572
 * Check whether the character is allowed by the production
573
 *
574
 * @deprecated Internal function, don't use.
575
 *
576
 * ```
577
 * [84] Letter ::= BaseChar | Ideographic
578
 * ```
579
 *
580
 * @param c  an unicode character (int)
581
 * @returns 0 if not, non-zero otherwise
582
 */
583
int
584
0
xmlIsLetter(int c) {
585
0
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
586
0
}
587
588
/************************************************************************
589
 *                  *
590
 *    Input handling functions for progressive parsing  *
591
 *                  *
592
 ************************************************************************/
593
594
/* we need to keep enough input to show errors in context */
595
6.42M
#define LINE_LEN        80
596
597
/**
598
 * @deprecated This function was internal and is deprecated.
599
 *
600
 * @param in  an XML parser input
601
 * @param len  an indicative size for the lookahead
602
 * @returns -1 as this is an error to use it.
603
 */
604
int
605
0
xmlParserInputRead(xmlParserInput *in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
606
0
    return(-1);
607
0
}
608
609
/**
610
 * Grow the input buffer.
611
 *
612
 * @param ctxt  an XML parser context
613
 * @returns the number of bytes read or -1 in case of error.
614
 */
615
int
616
7.41M
xmlParserGrow(xmlParserCtxt *ctxt) {
617
7.41M
    xmlParserInputPtr in = ctxt->input;
618
7.41M
    xmlParserInputBufferPtr buf = in->buf;
619
7.41M
    size_t curEnd = in->end - in->cur;
620
7.41M
    size_t curBase = in->cur - in->base;
621
7.41M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
622
3.70M
                       XML_MAX_HUGE_LENGTH :
623
7.41M
                       XML_MAX_LOOKUP_LIMIT;
624
7.41M
    int ret;
625
626
7.41M
    if (buf == NULL)
627
0
        return(0);
628
    /* Don't grow push parser buffer. */
629
7.41M
    if (PARSER_PROGRESSIVE(ctxt))
630
1.79M
        return(0);
631
    /* Don't grow memory buffers. */
632
5.62M
    if ((buf->encoder == NULL) && (buf->readcallback == NULL))
633
5.26M
        return(0);
634
355k
    if (buf->error != 0)
635
90.1k
        return(-1);
636
637
265k
    if (curBase > maxLength) {
638
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
639
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
640
0
  return(-1);
641
0
    }
642
643
265k
    if (curEnd >= INPUT_CHUNK)
644
230
        return(0);
645
646
264k
    ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
647
264k
    xmlBufUpdateInput(buf->buffer, in, curBase);
648
649
264k
    if (ret < 0) {
650
299
        xmlCtxtErrIO(ctxt, buf->error, NULL);
651
299
    }
652
653
264k
    return(ret);
654
265k
}
655
656
/**
657
 * Raises an error with `code` if the input wasn't consumed
658
 * completely.
659
 *
660
 * @param ctxt  parser ctxt
661
 * @param code  error code
662
 */
663
void
664
20.2k
xmlParserCheckEOF(xmlParserCtxt *ctxt, xmlParserErrors code) {
665
20.2k
    xmlParserInputPtr in = ctxt->input;
666
20.2k
    xmlParserInputBufferPtr buf;
667
668
20.2k
    if (ctxt->errNo != XML_ERR_OK)
669
17.0k
        return;
670
671
3.17k
    if (in->cur < in->end) {
672
2
        xmlFatalErr(ctxt, code, NULL);
673
2
        return;
674
2
    }
675
676
3.17k
    buf = in->buf;
677
3.17k
    if ((buf != NULL) && (buf->encoder != NULL)) {
678
14
        size_t curBase = in->cur - in->base;
679
14
        size_t sizeOut = 64;
680
14
        xmlCharEncError ret;
681
682
        /*
683
         * Check for truncated multi-byte sequence
684
         */
685
14
        ret = xmlCharEncInput(buf, &sizeOut, /* flush */ 1);
686
14
        xmlBufUpdateInput(buf->buffer, in, curBase);
687
14
        if (ret != XML_ENC_ERR_SUCCESS) {
688
1
            xmlCtxtErrIO(ctxt, buf->error, NULL);
689
1
            return;
690
1
        }
691
692
        /* Shouldn't happen */
693
13
        if (in->cur < in->end)
694
0
            xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "expected EOF");
695
13
    }
696
3.17k
}
697
698
/**
699
 * This function increase the input for the parser. It tries to
700
 * preserve pointers to the input buffer, and keep already read data
701
 *
702
 * @deprecated Don't use.
703
 *
704
 * @param in  an XML parser input
705
 * @param len  an indicative size for the lookahead
706
 * @returns the amount of char read, or -1 in case of error, 0 indicate the
707
 * end of this entity
708
 */
709
int
710
0
xmlParserInputGrow(xmlParserInput *in, int len) {
711
0
    int ret;
712
0
    size_t indx;
713
714
0
    if ((in == NULL) || (len < 0)) return(-1);
715
0
    if (in->buf == NULL) return(-1);
716
0
    if (in->base == NULL) return(-1);
717
0
    if (in->cur == NULL) return(-1);
718
0
    if (in->buf->buffer == NULL) return(-1);
719
720
    /* Don't grow memory buffers. */
721
0
    if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
722
0
        return(0);
723
724
0
    indx = in->cur - in->base;
725
0
    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
726
0
        return(0);
727
0
    }
728
0
    ret = xmlParserInputBufferGrow(in->buf, len);
729
730
0
    in->base = xmlBufContent(in->buf->buffer);
731
0
    if (in->base == NULL) {
732
0
        in->base = BAD_CAST "";
733
0
        in->cur = in->base;
734
0
        in->end = in->base;
735
0
        return(-1);
736
0
    }
737
0
    in->cur = in->base + indx;
738
0
    in->end = xmlBufEnd(in->buf->buffer);
739
740
0
    return(ret);
741
0
}
742
743
/**
744
 * Shrink the input buffer.
745
 *
746
 * @param ctxt  an XML parser context
747
 */
748
void
749
3.63M
xmlParserShrink(xmlParserCtxt *ctxt) {
750
3.63M
    xmlParserInputPtr in = ctxt->input;
751
3.63M
    xmlParserInputBufferPtr buf = in->buf;
752
3.63M
    size_t used, res;
753
754
3.63M
    if (buf == NULL)
755
0
        return;
756
757
3.63M
    used = in->cur - in->base;
758
759
3.63M
    if (used > LINE_LEN) {
760
2.78M
        res = xmlBufShrink(buf->buffer, used - LINE_LEN);
761
762
2.78M
        if (res > 0) {
763
2.78M
            used -= res;
764
2.78M
            xmlSaturatedAddSizeT(&in->consumed, res);
765
2.78M
        }
766
767
2.78M
        xmlBufUpdateInput(buf->buffer, in, used);
768
2.78M
    }
769
3.63M
}
770
771
/**
772
 * This function removes used input for the parser.
773
 *
774
 * @deprecated Don't use.
775
 *
776
 * @param in  an XML parser input
777
 */
778
void
779
0
xmlParserInputShrink(xmlParserInput *in) {
780
0
    size_t used;
781
0
    size_t ret;
782
783
0
    if (in == NULL) return;
784
0
    if (in->buf == NULL) return;
785
0
    if (in->base == NULL) return;
786
0
    if (in->cur == NULL) return;
787
0
    if (in->buf->buffer == NULL) return;
788
789
0
    used = in->cur - in->base;
790
791
0
    if (used > LINE_LEN) {
792
0
  ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
793
0
  if (ret > 0) {
794
0
            used -= ret;
795
0
            xmlSaturatedAddSizeT(&in->consumed, ret);
796
0
  }
797
798
0
        xmlBufUpdateInput(in->buf->buffer, in, used);
799
0
    }
800
0
}
801
802
/************************************************************************
803
 *                  *
804
 *    UTF8 character input and related functions    *
805
 *                  *
806
 ************************************************************************/
807
808
/**
809
 * Skip to the next char input char.
810
 *
811
 * @deprecated Internal function, do not use.
812
 *
813
 * @param ctxt  the XML parser context
814
 */
815
816
void
817
xmlNextChar(xmlParserCtxt *ctxt)
818
316M
{
819
316M
    const unsigned char *cur;
820
316M
    size_t avail;
821
316M
    int c;
822
823
316M
    if ((ctxt == NULL) || (ctxt->input == NULL))
824
0
        return;
825
826
316M
    avail = ctxt->input->end - ctxt->input->cur;
827
828
316M
    if (avail < INPUT_CHUNK) {
829
2.14M
        xmlParserGrow(ctxt);
830
2.14M
        if (ctxt->input->cur >= ctxt->input->end)
831
544
            return;
832
2.14M
        avail = ctxt->input->end - ctxt->input->cur;
833
2.14M
    }
834
835
316M
    cur = ctxt->input->cur;
836
316M
    c = *cur;
837
838
316M
    if (c < 0x80) {
839
307M
        if (c == '\n') {
840
2.16M
            ctxt->input->cur++;
841
2.16M
            ctxt->input->line++;
842
2.16M
            ctxt->input->col = 1;
843
305M
        } else if (c == '\r') {
844
            /*
845
             *   2.11 End-of-Line Handling
846
             *   the literal two-character sequence "#xD#xA" or a standalone
847
             *   literal #xD, an XML processor must pass to the application
848
             *   the single character #xA.
849
             */
850
41.0k
            ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
851
41.0k
            ctxt->input->line++;
852
41.0k
            ctxt->input->col = 1;
853
41.0k
            return;
854
305M
        } else {
855
305M
            ctxt->input->cur++;
856
305M
            ctxt->input->col++;
857
305M
        }
858
307M
    } else {
859
8.99M
        ctxt->input->col++;
860
861
8.99M
        if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
862
8.67M
            goto encoding_error;
863
864
327k
        if (c < 0xe0) {
865
            /* 2-byte code */
866
280k
            if (c < 0xc2)
867
229k
                goto encoding_error;
868
51.2k
            ctxt->input->cur += 2;
869
51.2k
        } else {
870
46.9k
            unsigned int val = (c << 8) | cur[1];
871
872
46.9k
            if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
873
13.1k
                goto encoding_error;
874
875
33.8k
            if (c < 0xf0) {
876
                /* 3-byte code */
877
30.4k
                if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
878
442
                    goto encoding_error;
879
30.0k
                ctxt->input->cur += 3;
880
30.0k
            } else {
881
3.34k
                if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
882
780
                    goto encoding_error;
883
884
                /* 4-byte code */
885
2.56k
                if ((val < 0xf090) || (val >= 0xf490))
886
2.02k
                    goto encoding_error;
887
537
                ctxt->input->cur += 4;
888
537
            }
889
33.8k
        }
890
327k
    }
891
892
307M
    return;
893
894
307M
encoding_error:
895
    /* Only report the first error */
896
8.91M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
897
87.5k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
898
87.5k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
899
87.5k
    }
900
8.91M
    ctxt->input->cur++;
901
8.91M
}
902
903
/**
904
 * The current char value, if using UTF-8 this may actually span multiple
905
 * bytes in the input buffer. Implement the end of line normalization:
906
 *
907
 * @deprecated Internal function, do not use.
908
 *
909
 * 2.11 End-of-Line Handling
910
 *
911
 * Wherever an external parsed entity or the literal entity value
912
 * of an internal parsed entity contains either the literal two-character
913
 * sequence "#xD#xA" or a standalone literal \#xD, an XML processor
914
 * must pass to the application the single character \#xA.
915
 * This behavior can conveniently be produced by normalizing all
916
 * line breaks to \#xA on input, before parsing.)
917
 *
918
 * @param ctxt  the XML parser context
919
 * @param len  pointer to the length of the char read
920
 * @returns the current char value and its length
921
 */
922
923
int
924
53.9M
xmlCurrentChar(xmlParserCtxt *ctxt, int *len) {
925
53.9M
    const unsigned char *cur;
926
53.9M
    size_t avail;
927
53.9M
    int c;
928
929
53.9M
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
930
931
53.9M
    avail = ctxt->input->end - ctxt->input->cur;
932
933
53.9M
    if (avail < INPUT_CHUNK) {
934
2.41M
        xmlParserGrow(ctxt);
935
2.41M
        avail = ctxt->input->end - ctxt->input->cur;
936
2.41M
    }
937
938
53.9M
    cur = ctxt->input->cur;
939
53.9M
    c = *cur;
940
941
53.9M
    if (c < 0x80) {
942
  /* 1-byte code */
943
20.5M
        if (c < 0x20) {
944
            /*
945
             *   2.11 End-of-Line Handling
946
             *   the literal two-character sequence "#xD#xA" or a standalone
947
             *   literal #xD, an XML processor must pass to the application
948
             *   the single character #xA.
949
             */
950
5.64M
            if (c == '\r') {
951
                /*
952
                 * TODO: This function shouldn't change the 'cur' pointer
953
                 * as side effect, but the NEXTL macro in parser.c relies
954
                 * on this behavior when incrementing line numbers.
955
                 */
956
522k
                if (cur[1] == '\n')
957
61.3k
                    ctxt->input->cur++;
958
522k
                *len = 1;
959
522k
                c = '\n';
960
5.12M
            } else if (c == 0) {
961
512k
                if (ctxt->input->cur >= ctxt->input->end) {
962
39.5k
                    *len = 0;
963
472k
                } else {
964
472k
                    *len = 1;
965
                    /*
966
                     * TODO: Null bytes should be handled by callers,
967
                     * but this can be tricky.
968
                     */
969
472k
                    xmlFatalErr(ctxt, XML_ERR_INVALID_CHAR,
970
472k
                            "Char 0x0 out of allowed range\n");
971
472k
                }
972
4.61M
            } else {
973
4.61M
                *len = 1;
974
4.61M
            }
975
14.9M
        } else {
976
14.9M
            *len = 1;
977
14.9M
        }
978
979
20.5M
        return(c);
980
33.3M
    } else {
981
33.3M
        int val;
982
983
33.3M
        if (avail < 2)
984
1.33k
            goto incomplete_sequence;
985
33.3M
        if ((cur[1] & 0xc0) != 0x80)
986
5.11M
            goto encoding_error;
987
988
28.2M
        if (c < 0xe0) {
989
            /* 2-byte code */
990
17.0M
            if (c < 0xc2)
991
584k
                goto encoding_error;
992
16.4M
            val = (c & 0x1f) << 6;
993
16.4M
            val |= cur[1] & 0x3f;
994
16.4M
            *len = 2;
995
16.4M
        } else {
996
11.2M
            if (avail < 3)
997
98
                goto incomplete_sequence;
998
11.2M
            if ((cur[2] & 0xc0) != 0x80)
999
11.1k
                goto encoding_error;
1000
1001
11.1M
            if (c < 0xf0) {
1002
                /* 3-byte code */
1003
11.1M
                val = (c & 0xf) << 12;
1004
11.1M
                val |= (cur[1] & 0x3f) << 6;
1005
11.1M
                val |= cur[2] & 0x3f;
1006
11.1M
                if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
1007
1.37k
                    goto encoding_error;
1008
11.1M
                *len = 3;
1009
11.1M
            } else {
1010
24.0k
                if (avail < 4)
1011
84
                    goto incomplete_sequence;
1012
23.9k
                if ((cur[3] & 0xc0) != 0x80)
1013
6.82k
                    goto encoding_error;
1014
1015
                /* 4-byte code */
1016
17.1k
                val = (c & 0x0f) << 18;
1017
17.1k
                val |= (cur[1] & 0x3f) << 12;
1018
17.1k
                val |= (cur[2] & 0x3f) << 6;
1019
17.1k
                val |= cur[3] & 0x3f;
1020
17.1k
                if ((val < 0x10000) || (val >= 0x110000))
1021
6.14k
                    goto encoding_error;
1022
11.0k
                *len = 4;
1023
11.0k
            }
1024
11.1M
        }
1025
1026
27.6M
        return(val);
1027
28.2M
    }
1028
1029
5.72M
encoding_error:
1030
    /* Only report the first error */
1031
5.72M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
1032
18.4k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
1033
18.4k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
1034
18.4k
    }
1035
5.72M
    *len = 1;
1036
5.72M
    return(XML_INVALID_CHAR);
1037
1038
1.51k
incomplete_sequence:
1039
    /*
1040
     * An encoding problem may arise from a truncated input buffer
1041
     * splitting a character in the middle. In that case do not raise
1042
     * an error but return 0. This should only happen when push parsing
1043
     * char data.
1044
     */
1045
1.51k
    *len = 0;
1046
1.51k
    return(0);
1047
53.9M
}
1048
1049
/**
1050
 * The current char value, if using UTF-8 this may actually span multiple
1051
 * bytes in the input buffer.
1052
 *
1053
 * @deprecated Internal function, do not use.
1054
 *
1055
 * @param ctxt  the XML parser context
1056
 * @param cur  pointer to the beginning of the char
1057
 * @param len  pointer to the length of the char read
1058
 * @returns the current char value and its length
1059
 */
1060
1061
int
1062
xmlStringCurrentChar(xmlParserCtxt *ctxt ATTRIBUTE_UNUSED,
1063
0
                     const xmlChar *cur, int *len) {
1064
0
    int c;
1065
1066
0
    if ((cur == NULL) || (len == NULL))
1067
0
        return(0);
1068
1069
    /* cur is zero-terminated, so we can lie about its length. */
1070
0
    *len = 4;
1071
0
    c = xmlGetUTF8Char(cur, len);
1072
1073
0
    return((c < 0) ? 0 : c);
1074
0
}
1075
1076
/**
1077
 * append the char value in the array
1078
 *
1079
 * @deprecated Internal function, don't use.
1080
 *
1081
 * @param out  pointer to an array of xmlChar
1082
 * @param val  the char value
1083
 * @returns the number of xmlChar written
1084
 */
1085
int
1086
23.8M
xmlCopyCharMultiByte(xmlChar *out, int val) {
1087
23.8M
    if ((out == NULL) || (val < 0)) return(0);
1088
    /*
1089
     * We are supposed to handle UTF8, check it's valid
1090
     * From rfc2044: encoding of the Unicode values on UTF-8:
1091
     *
1092
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
1093
     * 0000 0000-0000 007F   0xxxxxxx
1094
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1095
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1096
     */
1097
23.8M
    if  (val >= 0x80) {
1098
23.8M
  xmlChar *savedout = out;
1099
23.8M
  int bits;
1100
23.8M
  if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
1101
13.9M
  else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
1102
8.43k
  else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
1103
0
  else {
1104
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1105
0
            xmlAbort("xmlCopyCharMultiByte: codepoint out of range\n");
1106
0
#endif
1107
0
      return(0);
1108
0
  }
1109
61.6M
  for ( ; bits >= 0; bits-= 6)
1110
37.8M
      *out++= ((val >> bits) & 0x3F) | 0x80 ;
1111
23.8M
  return (out - savedout);
1112
23.8M
    }
1113
7.25k
    *out = val;
1114
7.25k
    return 1;
1115
23.8M
}
1116
1117
/**
1118
 * append the char value in the array
1119
 *
1120
 * @deprecated Don't use.
1121
 *
1122
 * @param len  Ignored, compatibility
1123
 * @param out  pointer to an array of xmlChar
1124
 * @param val  the char value
1125
 * @returns the number of xmlChar written
1126
 */
1127
1128
int
1129
0
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
1130
0
    if ((out == NULL) || (val < 0)) return(0);
1131
    /* the len parameter is ignored */
1132
0
    if  (val >= 0x80) {
1133
0
  return(xmlCopyCharMultiByte (out, val));
1134
0
    }
1135
0
    *out = val;
1136
0
    return 1;
1137
0
}
1138
1139
/************************************************************************
1140
 *                  *
1141
 *    Commodity functions to switch encodings     *
1142
 *                  *
1143
 ************************************************************************/
1144
1145
/**
1146
 * Installs a custom implementation to convert between character
1147
 * encodings.
1148
 *
1149
 * This bypasses legacy feature like global encoding handlers or
1150
 * encoding aliases.
1151
 *
1152
 * @since 2.14.0
1153
 * @param ctxt  parser context
1154
 * @param impl  callback
1155
 * @param vctxt  user data
1156
 */
1157
void
1158
xmlCtxtSetCharEncConvImpl(xmlParserCtxt *ctxt, xmlCharEncConvImpl impl,
1159
0
                          void *vctxt) {
1160
0
    if (ctxt == NULL)
1161
0
        return;
1162
1163
0
    ctxt->convImpl = impl;
1164
0
    ctxt->convCtxt = vctxt;
1165
0
}
1166
1167
static xmlParserErrors
1168
2.37k
xmlDetectEBCDIC(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr *hout) {
1169
2.37k
    xmlChar out[200];
1170
2.37k
    xmlParserInputPtr input = ctxt->input;
1171
2.37k
    xmlCharEncodingHandlerPtr handler;
1172
2.37k
    int inlen, outlen, i;
1173
2.37k
    xmlParserErrors code;
1174
2.37k
    xmlCharEncError res;
1175
1176
2.37k
    *hout = NULL;
1177
1178
    /*
1179
     * To detect the EBCDIC code page, we convert the first 200 bytes
1180
     * to IBM037 (EBCDIC-US) and try to find the encoding declaration.
1181
     */
1182
2.37k
    code = xmlCreateCharEncodingHandler("IBM037", XML_ENC_INPUT,
1183
2.37k
            ctxt->convImpl, ctxt->convCtxt, &handler);
1184
2.37k
    if (code != XML_ERR_OK)
1185
2
        return(code);
1186
2.37k
    outlen = sizeof(out) - 1;
1187
2.37k
    inlen = input->end - input->cur;
1188
2.37k
    res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen,
1189
2.37k
                           /* flush */ 0);
1190
    /*
1191
     * Return the EBCDIC handler if decoding failed. The error will
1192
     * be reported later.
1193
     */
1194
2.37k
    if (res < 0)
1195
390
        goto done;
1196
1.98k
    out[outlen] = 0;
1197
1198
44.3k
    for (i = 0; i < outlen; i++) {
1199
43.8k
        if (out[i] == '>')
1200
326
            break;
1201
43.5k
        if ((out[i] == 'e') &&
1202
43.5k
            (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1203
1.21k
            int start, cur, quote;
1204
1205
1.21k
            i += 8;
1206
1.21k
            while (IS_BLANK_CH(out[i]))
1207
836
                i += 1;
1208
1.21k
            if (out[i++] != '=')
1209
422
                break;
1210
792
            while (IS_BLANK_CH(out[i]))
1211
1.25k
                i += 1;
1212
792
            quote = out[i++];
1213
792
            if ((quote != '\'') && (quote != '"'))
1214
553
                break;
1215
239
            start = i;
1216
239
            cur = out[i];
1217
2.70k
            while (((cur >= 'a') && (cur <= 'z')) ||
1218
2.70k
                   ((cur >= 'A') && (cur <= 'Z')) ||
1219
2.70k
                   ((cur >= '0') && (cur <= '9')) ||
1220
2.70k
                   (cur == '.') || (cur == '_') ||
1221
2.70k
                   (cur == '-'))
1222
2.46k
                cur = out[++i];
1223
239
            if (cur != quote)
1224
197
                break;
1225
42
            out[i] = 0;
1226
42
            xmlCharEncCloseFunc(handler);
1227
42
            code = xmlCreateCharEncodingHandler((char *) out + start,
1228
42
                    XML_ENC_INPUT, ctxt->convImpl, ctxt->convCtxt,
1229
42
                    &handler);
1230
42
            if (code != XML_ERR_OK)
1231
34
                return(code);
1232
8
            *hout = handler;
1233
8
            return(XML_ERR_OK);
1234
42
        }
1235
43.5k
    }
1236
1237
2.32k
done:
1238
    /*
1239
     * Encoding handlers are stateful, so we have to recreate them.
1240
     */
1241
2.32k
    xmlCharEncCloseFunc(handler);
1242
2.32k
    code = xmlCreateCharEncodingHandler("IBM037", XML_ENC_INPUT,
1243
2.32k
            ctxt->convImpl, ctxt->convCtxt, &handler);
1244
2.32k
    if (code != XML_ERR_OK)
1245
5
        return(code);
1246
2.32k
    *hout = handler;
1247
2.32k
    return(XML_ERR_OK);
1248
2.32k
}
1249
1250
/**
1251
 * Use encoding specified by enum to decode input data. This overrides
1252
 * the encoding found in the XML declaration.
1253
 *
1254
 * This function can also be used to override the encoding of chunks
1255
 * passed to #xmlParseChunk.
1256
 *
1257
 * @param ctxt  the parser context
1258
 * @param enc  the encoding value (number)
1259
 * @returns 0 in case of success, -1 otherwise
1260
 */
1261
int
1262
xmlSwitchEncoding(xmlParserCtxt *ctxt, xmlCharEncoding enc)
1263
6.08k
{
1264
6.08k
    xmlCharEncodingHandlerPtr handler = NULL;
1265
6.08k
    int ret;
1266
6.08k
    xmlParserErrors code;
1267
1268
6.08k
    if ((ctxt == NULL) || (ctxt->input == NULL))
1269
0
        return(-1);
1270
1271
6.08k
    code = xmlLookupCharEncodingHandler(enc, &handler);
1272
6.08k
    if (code != 0) {
1273
4
        xmlFatalErr(ctxt, code, NULL);
1274
4
        return(-1);
1275
4
    }
1276
1277
6.07k
    ret = xmlSwitchToEncoding(ctxt, handler);
1278
1279
6.07k
    if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1280
0
        ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1281
0
    }
1282
1283
6.07k
    return(ret);
1284
6.08k
}
1285
1286
/**
1287
 * @param ctxt  the parser context
1288
 * @param input  the input strea,
1289
 * @param encoding  the encoding name
1290
 * @returns 0 in case of success, -1 otherwise
1291
 */
1292
static int
1293
xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1294
0
                           const char *encoding) {
1295
0
    xmlCharEncodingHandlerPtr handler;
1296
0
    xmlParserErrors res;
1297
1298
0
    if (encoding == NULL)
1299
0
        return(-1);
1300
1301
0
    res = xmlCreateCharEncodingHandler(encoding, XML_ENC_INPUT,
1302
0
            ctxt->convImpl, ctxt->convCtxt, &handler);
1303
0
    if (res == XML_ERR_UNSUPPORTED_ENCODING) {
1304
0
        xmlWarningMsg(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1305
0
                      "Unsupported encoding: %s\n", BAD_CAST encoding, NULL);
1306
0
        return(-1);
1307
0
    } else if (res != XML_ERR_OK) {
1308
0
        xmlFatalErr(ctxt, res, encoding);
1309
0
        return(-1);
1310
0
    }
1311
1312
0
    res  = xmlInputSetEncodingHandler(input, handler);
1313
0
    if (res != XML_ERR_OK) {
1314
0
        xmlCtxtErrIO(ctxt, res, NULL);
1315
0
        return(-1);
1316
0
    }
1317
1318
0
    return(0);
1319
0
}
1320
1321
/**
1322
 * Use specified encoding to decode input data. This overrides the
1323
 * encoding found in the XML declaration.
1324
 *
1325
 * This function can also be used to override the encoding of chunks
1326
 * passed to #xmlParseChunk.
1327
 *
1328
 * @since 2.13.0
1329
 *
1330
 * @param ctxt  the parser context
1331
 * @param encoding  the encoding name
1332
 * @returns 0 in case of success, -1 otherwise
1333
 */
1334
int
1335
0
xmlSwitchEncodingName(xmlParserCtxt *ctxt, const char *encoding) {
1336
0
    if (ctxt == NULL)
1337
0
        return(-1);
1338
1339
0
    return(xmlSwitchInputEncodingName(ctxt, ctxt->input, encoding));
1340
0
}
1341
1342
/**
1343
 * Use encoding handler to decode input data.
1344
 *
1345
 * Closes the handler on error.
1346
 *
1347
 * @param input  the input stream
1348
 * @param handler  the encoding handler
1349
 * @returns an xmlParserErrors code.
1350
 */
1351
xmlParserErrors
1352
xmlInputSetEncodingHandler(xmlParserInput *input,
1353
11.9k
                           xmlCharEncodingHandler *handler) {
1354
11.9k
    xmlParserInputBufferPtr in;
1355
11.9k
    xmlBufPtr buf;
1356
11.9k
    xmlParserErrors code = XML_ERR_OK;
1357
1358
11.9k
    if ((input == NULL) || (input->buf == NULL)) {
1359
0
        xmlCharEncCloseFunc(handler);
1360
0
  return(XML_ERR_ARGUMENT);
1361
0
    }
1362
11.9k
    in = input->buf;
1363
1364
11.9k
    input->flags |= XML_INPUT_HAS_ENCODING;
1365
1366
    /*
1367
     * UTF-8 requires no encoding handler.
1368
     */
1369
11.9k
    if ((handler != NULL) &&
1370
11.9k
        (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1371
0
        xmlCharEncCloseFunc(handler);
1372
0
        handler = NULL;
1373
0
    }
1374
1375
11.9k
    if (in->encoder == handler)
1376
304
        return(XML_ERR_OK);
1377
1378
11.6k
    if (in->encoder != NULL) {
1379
        /*
1380
         * Switching encodings during parsing is a really bad idea,
1381
         * but Chromium can switch between ISO-8859-1 and UTF-16 before
1382
         * separate calls to xmlParseChunk.
1383
         *
1384
         * TODO: We should check whether the "raw" input buffer is empty and
1385
         * convert the old content using the old encoder.
1386
         */
1387
1388
0
        xmlCharEncCloseFunc(in->encoder);
1389
0
        in->encoder = handler;
1390
0
        return(XML_ERR_OK);
1391
0
    }
1392
1393
11.6k
    buf = xmlBufCreate(XML_IO_BUFFER_SIZE);
1394
11.6k
    if (buf == NULL) {
1395
25
        xmlCharEncCloseFunc(handler);
1396
25
        return(XML_ERR_NO_MEMORY);
1397
25
    }
1398
1399
11.6k
    in->encoder = handler;
1400
11.6k
    in->raw = in->buffer;
1401
11.6k
    in->buffer = buf;
1402
1403
    /*
1404
     * Is there already some content down the pipe to convert ?
1405
     */
1406
11.6k
    if (input->end > input->base) {
1407
11.6k
        size_t processed;
1408
11.6k
        size_t nbchars;
1409
11.6k
        xmlCharEncError res;
1410
1411
        /*
1412
         * Shrink the current input buffer.
1413
         * Move it as the raw buffer and create a new input buffer
1414
         */
1415
11.6k
        processed = input->cur - input->base;
1416
11.6k
        xmlBufShrink(in->raw, processed);
1417
11.6k
        input->consumed += processed;
1418
11.6k
        in->rawconsumed = processed;
1419
1420
        /*
1421
         * If we're push-parsing, we must convert the whole buffer.
1422
         *
1423
         * If we're pull-parsing, we could be parsing from a huge
1424
         * memory buffer which we don't want to convert completely.
1425
         */
1426
11.6k
        if (input->flags & XML_INPUT_PROGRESSIVE)
1427
1.29k
            nbchars = SIZE_MAX;
1428
10.3k
        else
1429
10.3k
            nbchars = 4000 /* MINLEN */;
1430
11.6k
        res = xmlCharEncInput(in, &nbchars, /* flush */ 0);
1431
11.6k
        if (res != XML_ENC_ERR_SUCCESS)
1432
3.11k
            code = in->error;
1433
11.6k
    }
1434
1435
11.6k
    xmlBufResetInput(in->buffer, input);
1436
1437
11.6k
    return(code);
1438
11.6k
}
1439
1440
/**
1441
 * Use encoding handler to decode input data.
1442
 *
1443
 * @deprecated Internal function, don't use.
1444
 *
1445
 * @param ctxt  the parser context, only for error reporting
1446
 * @param input  the input stream
1447
 * @param handler  the encoding handler
1448
 * @returns 0 in case of success, -1 otherwise
1449
 */
1450
int
1451
xmlSwitchInputEncoding(xmlParserCtxt *ctxt, xmlParserInput *input,
1452
0
                       xmlCharEncodingHandler *handler) {
1453
0
    xmlParserErrors code = xmlInputSetEncodingHandler(input, handler);
1454
1455
0
    if (code != XML_ERR_OK) {
1456
0
        xmlCtxtErrIO(ctxt, code, NULL);
1457
0
        return(-1);
1458
0
    }
1459
1460
0
    return(0);
1461
0
}
1462
1463
/**
1464
 * Use encoding handler to decode input data.
1465
 *
1466
 * This function can be used to enforce the encoding of chunks passed
1467
 * to #xmlParseChunk.
1468
 *
1469
 * @param ctxt  the parser context
1470
 * @param handler  the encoding handler
1471
 * @returns 0 in case of success, -1 otherwise
1472
 */
1473
int
1474
xmlSwitchToEncoding(xmlParserCtxt *ctxt, xmlCharEncodingHandler *handler)
1475
8.41k
{
1476
8.41k
    xmlParserErrors code;
1477
1478
8.41k
    if (ctxt == NULL)
1479
0
        return(-1);
1480
1481
8.41k
    code = xmlInputSetEncodingHandler(ctxt->input, handler);
1482
8.41k
    if (code != XML_ERR_OK) {
1483
1.71k
        xmlCtxtErrIO(ctxt, code, NULL);
1484
1.71k
        return(-1);
1485
1.71k
    }
1486
1487
6.69k
    return(0);
1488
8.41k
}
1489
1490
/**
1491
 * Handle optional BOM, detect and switch to encoding.
1492
 *
1493
 * Assumes that there are at least four bytes in the input buffer.
1494
 *
1495
 * @param ctxt  the parser context
1496
 */
1497
void
1498
214k
xmlDetectEncoding(xmlParserCtxt *ctxt) {
1499
214k
    const xmlChar *in;
1500
214k
    xmlCharEncoding enc;
1501
214k
    int bomSize;
1502
214k
    int autoFlag = 0;
1503
1504
214k
    if (xmlParserGrow(ctxt) < 0)
1505
0
        return;
1506
214k
    in = ctxt->input->cur;
1507
214k
    if (ctxt->input->end - in < 4)
1508
1.29k
        return;
1509
1510
213k
    if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1511
        /*
1512
         * If the encoding was already set, only skip the BOM which was
1513
         * possibly decoded to UTF-8.
1514
         */
1515
0
        if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1516
0
            ctxt->input->cur += 3;
1517
0
        }
1518
1519
0
        return;
1520
0
    }
1521
1522
213k
    enc = XML_CHAR_ENCODING_NONE;
1523
213k
    bomSize = 0;
1524
1525
    /*
1526
     * BOM sniffing and detection of initial bytes of an XML
1527
     * declaration.
1528
     *
1529
     * The HTML5 spec doesn't cover UTF-32 (UCS-4) or EBCDIC.
1530
     */
1531
213k
    switch (in[0]) {
1532
1.45k
        case 0x00:
1533
1.45k
            if ((!ctxt->html) &&
1534
1.45k
                (in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1535
83
                enc = XML_CHAR_ENCODING_UCS4BE;
1536
83
                autoFlag = XML_INPUT_AUTO_OTHER;
1537
1.37k
            } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1538
                /*
1539
                 * TODO: The HTML5 spec requires to check that the
1540
                 * next codepoint is an 'x'.
1541
                 */
1542
319
                enc = XML_CHAR_ENCODING_UTF16BE;
1543
319
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1544
319
            }
1545
1.45k
            break;
1546
1547
188k
        case 0x3C:
1548
188k
            if (in[1] == 0x00) {
1549
4.11k
                if ((!ctxt->html) &&
1550
4.11k
                    (in[2] == 0x00) && (in[3] == 0x00)) {
1551
510
                    enc = XML_CHAR_ENCODING_UCS4LE;
1552
510
                    autoFlag = XML_INPUT_AUTO_OTHER;
1553
3.60k
                } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1554
                    /*
1555
                     * TODO: The HTML5 spec requires to check that the
1556
                     * next codepoint is an 'x'.
1557
                     */
1558
3.13k
                    enc = XML_CHAR_ENCODING_UTF16LE;
1559
3.13k
                    autoFlag = XML_INPUT_AUTO_UTF16LE;
1560
3.13k
                }
1561
4.11k
            }
1562
188k
            break;
1563
1564
3.09k
        case 0x4C:
1565
3.09k
      if ((!ctxt->html) &&
1566
3.09k
                (in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1567
2.37k
          enc = XML_CHAR_ENCODING_EBCDIC;
1568
2.37k
                autoFlag = XML_INPUT_AUTO_OTHER;
1569
2.37k
            }
1570
3.09k
            break;
1571
1572
137
        case 0xEF:
1573
137
            if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1574
29
                enc = XML_CHAR_ENCODING_UTF8;
1575
29
                autoFlag = XML_INPUT_AUTO_UTF8;
1576
29
                bomSize = 3;
1577
29
            }
1578
137
            break;
1579
1580
2.12k
        case 0xFE:
1581
2.12k
            if (in[1] == 0xFF) {
1582
1.86k
                enc = XML_CHAR_ENCODING_UTF16BE;
1583
1.86k
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1584
1.86k
                bomSize = 2;
1585
1.86k
            }
1586
2.12k
            break;
1587
1588
354
        case 0xFF:
1589
354
            if (in[1] == 0xFE) {
1590
149
                enc = XML_CHAR_ENCODING_UTF16LE;
1591
149
                autoFlag = XML_INPUT_AUTO_UTF16LE;
1592
149
                bomSize = 2;
1593
149
            }
1594
354
            break;
1595
213k
    }
1596
1597
213k
    if (bomSize > 0) {
1598
2.03k
        ctxt->input->cur += bomSize;
1599
2.03k
    }
1600
1601
213k
    if (enc != XML_CHAR_ENCODING_NONE) {
1602
8.45k
        ctxt->input->flags |= autoFlag;
1603
1604
8.45k
        if (enc == XML_CHAR_ENCODING_EBCDIC) {
1605
2.37k
            xmlCharEncodingHandlerPtr handler;
1606
2.37k
            xmlParserErrors res;
1607
1608
2.37k
            res = xmlDetectEBCDIC(ctxt, &handler);
1609
2.37k
            if (res != XML_ERR_OK) {
1610
41
                xmlFatalErr(ctxt, res, "detecting EBCDIC\n");
1611
2.33k
            } else {
1612
2.33k
                xmlSwitchToEncoding(ctxt, handler);
1613
2.33k
            }
1614
6.08k
        } else {
1615
6.08k
            xmlSwitchEncoding(ctxt, enc);
1616
6.08k
        }
1617
8.45k
    }
1618
213k
}
1619
1620
/**
1621
 * Set the encoding from a declaration in the document.
1622
 *
1623
 * If no encoding was set yet, switch the encoding. Otherwise, only warn
1624
 * about encoding mismatches.
1625
 *
1626
 * Takes ownership of 'encoding'.
1627
 *
1628
 * @param ctxt  the parser context
1629
 * @param encoding  declared encoding
1630
 */
1631
void
1632
5.05k
xmlSetDeclaredEncoding(xmlParserCtxt *ctxt, xmlChar *encoding) {
1633
5.05k
    if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1634
5.05k
        ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1635
4.27k
        xmlCharEncodingHandlerPtr handler;
1636
4.27k
        xmlParserErrors res;
1637
4.27k
        xmlCharEncFlags flags = XML_ENC_INPUT;
1638
1639
        /*
1640
         * xmlSwitchEncodingName treats unsupported encodings as
1641
         * warnings, but we want it to be an error in an encoding
1642
         * declaration.
1643
         */
1644
4.27k
        if (ctxt->html)
1645
0
            flags |= XML_ENC_HTML;
1646
4.27k
        res = xmlCreateCharEncodingHandler((const char *) encoding,
1647
4.27k
                flags, ctxt->convImpl, ctxt->convCtxt, &handler);
1648
4.27k
        if (res != XML_ERR_OK) {
1649
741
            xmlFatalErr(ctxt, res, (const char *) encoding);
1650
741
            xmlFree(encoding);
1651
741
            return;
1652
741
        }
1653
1654
3.53k
        res  = xmlInputSetEncodingHandler(ctxt->input, handler);
1655
3.53k
        if (res != XML_ERR_OK) {
1656
1.42k
            xmlCtxtErrIO(ctxt, res, NULL);
1657
1.42k
            xmlFree(encoding);
1658
1.42k
            return;
1659
1.42k
        }
1660
1661
2.10k
        ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1662
2.10k
    } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1663
10
        static const char *allowedUTF8[] = {
1664
10
            "UTF-8", "UTF8", NULL
1665
10
        };
1666
10
        static const char *allowedUTF16LE[] = {
1667
10
            "UTF-16", "UTF-16LE", "UTF16", NULL
1668
10
        };
1669
10
        static const char *allowedUTF16BE[] = {
1670
10
            "UTF-16", "UTF-16BE", "UTF16", NULL
1671
10
        };
1672
10
        const char **allowed = NULL;
1673
10
        const char *autoEnc = NULL;
1674
1675
10
        switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1676
8
            case XML_INPUT_AUTO_UTF8:
1677
8
                allowed = allowedUTF8;
1678
8
                autoEnc = "UTF-8";
1679
8
                break;
1680
0
            case XML_INPUT_AUTO_UTF16LE:
1681
0
                allowed = allowedUTF16LE;
1682
0
                autoEnc = "UTF-16LE";
1683
0
                break;
1684
0
            case XML_INPUT_AUTO_UTF16BE:
1685
0
                allowed = allowedUTF16BE;
1686
0
                autoEnc = "UTF-16BE";
1687
0
                break;
1688
10
        }
1689
1690
10
        if (allowed != NULL) {
1691
8
            const char **p;
1692
8
            int match = 0;
1693
1694
21
            for (p = allowed; *p != NULL; p++) {
1695
15
                if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1696
2
                    match = 1;
1697
2
                    break;
1698
2
                }
1699
15
            }
1700
1701
8
            if (match == 0) {
1702
6
                xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1703
6
                              "Encoding '%s' doesn't match "
1704
6
                              "auto-detected '%s'\n",
1705
6
                              encoding, BAD_CAST autoEnc);
1706
6
                xmlFree(encoding);
1707
6
                encoding = xmlStrdup(BAD_CAST autoEnc);
1708
6
                if (encoding == NULL)
1709
1
                    xmlCtxtErrMemory(ctxt);
1710
6
            }
1711
8
        }
1712
10
    }
1713
1714
2.88k
    if (ctxt->encoding != NULL)
1715
1.12k
        xmlFree(ctxt->encoding);
1716
2.88k
    ctxt->encoding = encoding;
1717
2.88k
}
1718
1719
/**
1720
 * @since 2.14.0
1721
 *
1722
 * @param ctxt  parser context
1723
 * @returns the encoding from the encoding declaration. This can differ
1724
 * from the actual encoding.
1725
 */
1726
const xmlChar *
1727
0
xmlCtxtGetDeclaredEncoding(xmlParserCtxt *ctxt) {
1728
0
    if (ctxt == NULL)
1729
0
        return(NULL);
1730
1731
0
    return(ctxt->encoding);
1732
0
}
1733
1734
/**
1735
 * @param ctxt  the parser context
1736
 * @returns the actual used to parse the document. This can differ from
1737
 * the declared encoding.
1738
 */
1739
const xmlChar *
1740
24.9k
xmlGetActualEncoding(xmlParserCtxt *ctxt) {
1741
24.9k
    const xmlChar *encoding = NULL;
1742
1743
24.9k
    if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
1744
24.9k
        (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
1745
        /* Preserve encoding exactly */
1746
1.09k
        encoding = ctxt->encoding;
1747
23.8k
    } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
1748
204
        encoding = BAD_CAST ctxt->input->buf->encoder->name;
1749
23.6k
    } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1750
207
        encoding = BAD_CAST "UTF-8";
1751
207
    }
1752
1753
24.9k
    return(encoding);
1754
24.9k
}
1755
1756
/************************************************************************
1757
 *                  *
1758
 *  Commodity functions to handle entities processing   *
1759
 *                  *
1760
 ************************************************************************/
1761
1762
/**
1763
 * Free up an input stream.
1764
 *
1765
 * @param input  an xmlParserInput
1766
 */
1767
void
1768
229k
xmlFreeInputStream(xmlParserInput *input) {
1769
229k
    if (input == NULL) return;
1770
1771
224k
    if (input->filename != NULL) xmlFree((char *) input->filename);
1772
224k
    if (input->version != NULL) xmlFree((char *) input->version);
1773
224k
    if ((input->free != NULL) && (input->base != NULL))
1774
0
        input->free((xmlChar *) input->base);
1775
224k
    if (input->buf != NULL)
1776
224k
        xmlFreeParserInputBuffer(input->buf);
1777
224k
    xmlFree(input);
1778
224k
}
1779
1780
/**
1781
 * Create a new input stream structure.
1782
 *
1783
 * @deprecated Use #xmlNewInputFromUrl or similar functions.
1784
 *
1785
 * @param ctxt  an XML parser context
1786
 * @returns the new input stream or NULL
1787
 */
1788
xmlParserInput *
1789
0
xmlNewInputStream(xmlParserCtxt *ctxt) {
1790
0
    xmlParserInputPtr input;
1791
1792
0
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1793
0
    if (input == NULL) {
1794
0
        xmlCtxtErrMemory(ctxt);
1795
0
  return(NULL);
1796
0
    }
1797
0
    memset(input, 0, sizeof(xmlParserInput));
1798
0
    input->line = 1;
1799
0
    input->col = 1;
1800
1801
0
    return(input);
1802
0
}
1803
1804
/**
1805
 * Creates a new parser input from the filesystem, the network or
1806
 * a user-defined resource loader.
1807
 *
1808
 * @param ctxt  parser context
1809
 * @param url  filename or URL
1810
 * @param publicId  publid ID from doctype (optional)
1811
 * @param encoding  character encoding (optional)
1812
 * @param flags  unused, pass 0
1813
 * @returns a new parser input.
1814
 */
1815
xmlParserInput *
1816
xmlCtxtNewInputFromUrl(xmlParserCtxt *ctxt, const char *url,
1817
                       const char *publicId, const char *encoding,
1818
0
                       xmlParserInputFlags flags ATTRIBUTE_UNUSED) {
1819
0
    xmlParserInputPtr input;
1820
1821
0
    if ((ctxt == NULL) || (url == NULL))
1822
0
  return(NULL);
1823
1824
0
    input = xmlLoadResource(ctxt, url, publicId, XML_RESOURCE_MAIN_DOCUMENT);
1825
0
    if (input == NULL)
1826
0
        return(NULL);
1827
1828
0
    if (encoding != NULL)
1829
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1830
1831
0
    return(input);
1832
0
}
1833
1834
/**
1835
 * Internal helper function.
1836
 *
1837
 * @param buf  parser input buffer
1838
 * @param filename  filename or URL
1839
 * @returns a new parser input.
1840
 */
1841
static xmlParserInputPtr
1842
224k
xmlNewInputInternal(xmlParserInputBufferPtr buf, const char *filename) {
1843
224k
    xmlParserInputPtr input;
1844
1845
224k
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1846
224k
    if (input == NULL) {
1847
26
  xmlFreeParserInputBuffer(buf);
1848
26
  return(NULL);
1849
26
    }
1850
224k
    memset(input, 0, sizeof(xmlParserInput));
1851
224k
    input->line = 1;
1852
224k
    input->col = 1;
1853
1854
224k
    input->buf = buf;
1855
224k
    xmlBufResetInput(input->buf->buffer, input);
1856
1857
224k
    if (filename != NULL) {
1858
175k
        input->filename = xmlMemStrdup(filename);
1859
175k
        if (input->filename == NULL) {
1860
13
            xmlFreeInputStream(input);
1861
13
            return(NULL);
1862
13
        }
1863
175k
    }
1864
1865
224k
    return(input);
1866
224k
}
1867
1868
/**
1869
 * Creates a new parser input to read from a memory area.
1870
 *
1871
 * `url` is used as base to resolve external entities and for
1872
 * error reporting.
1873
 *
1874
 * If the XML_INPUT_BUF_STATIC flag is set, the memory area must
1875
 * stay unchanged until parsing has finished. This can avoid
1876
 * temporary copies.
1877
 *
1878
 * If the XML_INPUT_BUF_ZERO_TERMINATED flag is set, the memory
1879
 * area must contain a zero byte after the buffer at position `size`.
1880
 * This can avoid temporary copies.
1881
 *
1882
 * @since 2.14.0
1883
 *
1884
 * @param url  base URL (optional)
1885
 * @param mem  pointer to char array
1886
 * @param size  size of array
1887
 * @param flags  optimization hints
1888
 * @returns a new parser input or NULL if a memory allocation failed.
1889
 */
1890
xmlParserInput *
1891
xmlNewInputFromMemory(const char *url, const void *mem, size_t size,
1892
175k
                      xmlParserInputFlags flags) {
1893
175k
    xmlParserInputBufferPtr buf;
1894
1895
175k
    if (mem == NULL)
1896
0
  return(NULL);
1897
1898
175k
    buf = xmlNewInputBufferMemory(mem, size, flags, XML_CHAR_ENCODING_NONE);
1899
175k
    if (buf == NULL)
1900
30
        return(NULL);
1901
1902
175k
    return(xmlNewInputInternal(buf, url));
1903
175k
}
1904
1905
/**
1906
 * @param ctxt  parser context
1907
 * @param url  base URL (optional)
1908
 * @param mem  pointer to char array
1909
 * @param size  size of array
1910
 * @param encoding  character encoding (optional)
1911
 * @param flags  optimization hints
1912
 * @returns a new parser input or NULL in case of error.
1913
 */
1914
xmlParserInput *
1915
xmlCtxtNewInputFromMemory(xmlParserCtxt *ctxt, const char *url,
1916
                          const void *mem, size_t size,
1917
0
                          const char *encoding, xmlParserInputFlags flags) {
1918
0
    xmlParserInputPtr input;
1919
1920
0
    if ((ctxt == NULL) || (mem == NULL))
1921
0
  return(NULL);
1922
1923
0
    input = xmlNewInputFromMemory(url, mem, size, flags);
1924
0
    if (input == NULL) {
1925
0
        xmlCtxtErrMemory(ctxt);
1926
0
        return(NULL);
1927
0
    }
1928
1929
0
    if (encoding != NULL)
1930
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1931
1932
0
    return(input);
1933
0
}
1934
1935
/**
1936
 * Creates a new parser input to read from a zero-terminated string.
1937
 *
1938
 * `url` is used as base to resolve external entities and for
1939
 * error reporting.
1940
 *
1941
 * If the XML_INPUT_BUF_STATIC flag is set, the string must
1942
 * stay unchanged until parsing has finished. This can avoid
1943
 * temporary copies.
1944
 *
1945
 * @since 2.14.0
1946
 *
1947
 * @param url  base URL (optional)
1948
 * @param str  zero-terminated string
1949
 * @param flags  optimization hints
1950
 * @returns a new parser input or NULL if a memory allocation failed.
1951
 */
1952
xmlParserInput *
1953
xmlNewInputFromString(const char *url, const char *str,
1954
12.4k
                      xmlParserInputFlags flags) {
1955
12.4k
    xmlParserInputBufferPtr buf;
1956
1957
12.4k
    if (str == NULL)
1958
0
  return(NULL);
1959
1960
12.4k
    buf = xmlNewInputBufferString(str, flags);
1961
12.4k
    if (buf == NULL)
1962
10
        return(NULL);
1963
1964
12.4k
    return(xmlNewInputInternal(buf, url));
1965
12.4k
}
1966
1967
/**
1968
 * @param ctxt  parser context
1969
 * @param url  base URL (optional)
1970
 * @param str  zero-terminated string
1971
 * @param encoding  character encoding (optional)
1972
 * @param flags  optimization hints
1973
 * @returns a new parser input.
1974
 */
1975
xmlParserInput *
1976
xmlCtxtNewInputFromString(xmlParserCtxt *ctxt, const char *url,
1977
                          const char *str, const char *encoding,
1978
12.4k
                          xmlParserInputFlags flags) {
1979
12.4k
    xmlParserInputPtr input;
1980
1981
12.4k
    if ((ctxt == NULL) || (str == NULL))
1982
0
  return(NULL);
1983
1984
12.4k
    input = xmlNewInputFromString(url, str, flags);
1985
12.4k
    if (input == NULL) {
1986
16
        xmlCtxtErrMemory(ctxt);
1987
16
        return(NULL);
1988
16
    }
1989
1990
12.4k
    if (encoding != NULL)
1991
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1992
1993
12.4k
    return(input);
1994
12.4k
}
1995
1996
/**
1997
 * Creates a new parser input to read from a file descriptor.
1998
 *
1999
 * `url` is used as base to resolve external entities and for
2000
 * error reporting.
2001
 *
2002
 * `fd` is closed after parsing has finished.
2003
 *
2004
 * Supported `flags` are XML_INPUT_UNZIP to decompress data
2005
 * automatically. This feature is deprecated and will be removed
2006
 * in a future release.
2007
 *
2008
 * @since 2.14.0
2009
 *
2010
 * @param url  base URL (optional)
2011
 * @param fd  file descriptor
2012
 * @param flags  input flags
2013
 * @returns a new parser input or NULL if a memory allocation failed.
2014
 */
2015
xmlParserInput *
2016
0
xmlNewInputFromFd(const char *url, int fd, xmlParserInputFlags flags) {
2017
0
    xmlParserInputBufferPtr buf;
2018
2019
0
    if (fd < 0)
2020
0
  return(NULL);
2021
2022
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2023
0
    if (buf == NULL)
2024
0
        return(NULL);
2025
2026
0
    if (xmlInputFromFd(buf, fd, flags) != XML_ERR_OK) {
2027
0
        xmlFreeParserInputBuffer(buf);
2028
0
        return(NULL);
2029
0
    }
2030
2031
0
    return(xmlNewInputInternal(buf, url));
2032
0
}
2033
2034
/**
2035
 * @param ctxt  parser context
2036
 * @param url  base URL (optional)
2037
 * @param fd  file descriptor
2038
 * @param encoding  character encoding (optional)
2039
 * @param flags  unused, pass 0
2040
 * @returns a new parser input.
2041
 */
2042
xmlParserInput *
2043
xmlCtxtNewInputFromFd(xmlParserCtxt *ctxt, const char *url,
2044
                      int fd, const char *encoding,
2045
0
                      xmlParserInputFlags flags) {
2046
0
    xmlParserInputPtr input;
2047
2048
0
    if ((ctxt == NULL) || (fd < 0))
2049
0
  return(NULL);
2050
2051
0
    if (ctxt->options & XML_PARSE_UNZIP)
2052
0
        flags |= XML_INPUT_UNZIP;
2053
2054
0
    input = xmlNewInputFromFd(url, fd, flags);
2055
0
    if (input == NULL) {
2056
0
  xmlCtxtErrMemory(ctxt);
2057
0
        return(NULL);
2058
0
    }
2059
2060
0
    if (encoding != NULL)
2061
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2062
2063
0
    return(input);
2064
0
}
2065
2066
/**
2067
 * Creates a new parser input to read from input callbacks and
2068
 * context.
2069
 *
2070
 * `url` is used as base to resolve external entities and for
2071
 * error reporting.
2072
 *
2073
 * `ioRead` is called to read new data into a provided buffer.
2074
 * It must return the number of bytes written into the buffer
2075
 * ot a negative xmlParserErrors code on failure.
2076
 *
2077
 * `ioClose` is called after parsing has finished.
2078
 *
2079
 * `ioCtxt` is an opaque pointer passed to the callbacks.
2080
 *
2081
 * @since 2.14.0
2082
 *
2083
 * @param url  base URL (optional)
2084
 * @param ioRead  read callback
2085
 * @param ioClose  close callback (optional)
2086
 * @param ioCtxt  IO context
2087
 * @param flags  unused, pass 0
2088
 * @returns a new parser input or NULL if a memory allocation failed.
2089
 */
2090
xmlParserInput *
2091
xmlNewInputFromIO(const char *url, xmlInputReadCallback ioRead,
2092
                  xmlInputCloseCallback ioClose, void *ioCtxt,
2093
0
                  xmlParserInputFlags flags ATTRIBUTE_UNUSED) {
2094
0
    xmlParserInputBufferPtr buf;
2095
2096
0
    if (ioRead == NULL)
2097
0
  return(NULL);
2098
2099
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2100
0
    if (buf == NULL) {
2101
0
        if (ioClose != NULL)
2102
0
            ioClose(ioCtxt);
2103
0
        return(NULL);
2104
0
    }
2105
2106
0
    buf->context = ioCtxt;
2107
0
    buf->readcallback = ioRead;
2108
0
    buf->closecallback = ioClose;
2109
2110
0
    return(xmlNewInputInternal(buf, url));
2111
0
}
2112
2113
/**
2114
 * @param ctxt  parser context
2115
 * @param url  base URL (optional)
2116
 * @param ioRead  read callback
2117
 * @param ioClose  close callback (optional)
2118
 * @param ioCtxt  IO context
2119
 * @param encoding  character encoding (optional)
2120
 * @param flags  unused, pass 0
2121
 * @returns a new parser input.
2122
 */
2123
xmlParserInput *
2124
xmlCtxtNewInputFromIO(xmlParserCtxt *ctxt, const char *url,
2125
                      xmlInputReadCallback ioRead,
2126
                      xmlInputCloseCallback ioClose,
2127
                      void *ioCtxt, const char *encoding,
2128
0
                      xmlParserInputFlags flags) {
2129
0
    xmlParserInputPtr input;
2130
2131
0
    if ((ctxt == NULL) || (ioRead == NULL))
2132
0
  return(NULL);
2133
2134
0
    input = xmlNewInputFromIO(url, ioRead, ioClose, ioCtxt, flags);
2135
0
    if (input == NULL) {
2136
0
        xmlCtxtErrMemory(ctxt);
2137
0
        return(NULL);
2138
0
    }
2139
2140
0
    if (encoding != NULL)
2141
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2142
2143
0
    return(input);
2144
0
}
2145
2146
/**
2147
 * Creates a new parser input for a push parser.
2148
 *
2149
 * @param url  base URL (optional)
2150
 * @param chunk  pointer to char array
2151
 * @param size  size of array
2152
 * @returns a new parser input or NULL if a memory allocation failed.
2153
 */
2154
xmlParserInput *
2155
36.0k
xmlNewPushInput(const char *url, const char *chunk, int size) {
2156
36.0k
    xmlParserInputBufferPtr buf;
2157
36.0k
    xmlParserInputPtr input;
2158
2159
36.0k
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2160
36.0k
    if (buf == NULL)
2161
8
        return(NULL);
2162
2163
36.0k
    input = xmlNewInputInternal(buf, url);
2164
36.0k
    if (input == NULL)
2165
3
  return(NULL);
2166
2167
36.0k
    input->flags |= XML_INPUT_PROGRESSIVE;
2168
2169
36.0k
    if ((size > 0) && (chunk != NULL)) {
2170
35.0k
        int res;
2171
2172
35.0k
  res = xmlParserInputBufferPush(input->buf, size, chunk);
2173
35.0k
        xmlBufResetInput(input->buf->buffer, input);
2174
35.0k
        if (res < 0) {
2175
0
            xmlFreeInputStream(input);
2176
0
            return(NULL);
2177
0
        }
2178
35.0k
    }
2179
2180
36.0k
    return(input);
2181
36.0k
}
2182
2183
/**
2184
 * Create a new input stream structure encapsulating the `input` into
2185
 * a stream suitable for the parser.
2186
 *
2187
 * @param ctxt  an XML parser context
2188
 * @param buf  an input buffer
2189
 * @param enc  the charset encoding if known
2190
 * @returns the new input stream or NULL
2191
 */
2192
xmlParserInput *
2193
xmlNewIOInputStream(xmlParserCtxt *ctxt, xmlParserInputBuffer *buf,
2194
0
              xmlCharEncoding enc) {
2195
0
    xmlParserInputPtr input;
2196
0
    const char *encoding;
2197
2198
0
    if ((ctxt == NULL) || (buf == NULL))
2199
0
        return(NULL);
2200
2201
0
    input = xmlNewInputInternal(buf, NULL);
2202
0
    if (input == NULL) {
2203
0
        xmlCtxtErrMemory(ctxt);
2204
0
  return(NULL);
2205
0
    }
2206
2207
0
    encoding = xmlGetCharEncodingName(enc);
2208
0
    if (encoding != NULL)
2209
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2210
2211
0
    return(input);
2212
0
}
2213
2214
/**
2215
 * Create a new input stream based on an xmlEntity
2216
 *
2217
 * @deprecated Internal function, do not use.
2218
 *
2219
 * @param ctxt  an XML parser context
2220
 * @param ent  an Entity pointer
2221
 * @returns the new input stream or NULL
2222
 */
2223
xmlParserInput *
2224
115k
xmlNewEntityInputStream(xmlParserCtxt *ctxt, xmlEntity *ent) {
2225
115k
    xmlParserInputPtr input;
2226
2227
115k
    if ((ctxt == NULL) || (ent == NULL))
2228
0
  return(NULL);
2229
2230
115k
    if (ent->content != NULL) {
2231
12.4k
        input = xmlCtxtNewInputFromString(ctxt, NULL,
2232
12.4k
                (const char *) ent->content, NULL, XML_INPUT_BUF_STATIC);
2233
103k
    } else if (ent->URI != NULL) {
2234
103k
        xmlResourceType rtype;
2235
2236
103k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY)
2237
99.7k
            rtype = XML_RESOURCE_PARAMETER_ENTITY;
2238
3.30k
        else
2239
3.30k
            rtype = XML_RESOURCE_GENERAL_ENTITY;
2240
2241
103k
        input = xmlLoadResource(ctxt, (char *) ent->URI,
2242
103k
                                (char *) ent->ExternalID, rtype);
2243
103k
    } else {
2244
421
        return(NULL);
2245
421
    }
2246
2247
115k
    if (input == NULL)
2248
6.47k
        return(NULL);
2249
2250
109k
    input->entity = ent;
2251
2252
109k
    return(input);
2253
115k
}
2254
2255
/**
2256
 * Create a new input stream based on a memory buffer.
2257
 *
2258
 * @deprecated Use #xmlNewInputFromString.
2259
 *
2260
 * @param ctxt  an XML parser context
2261
 * @param buffer  an memory buffer
2262
 * @returns the new input stream
2263
 */
2264
xmlParserInput *
2265
0
xmlNewStringInputStream(xmlParserCtxt *ctxt, const xmlChar *buffer) {
2266
0
    return(xmlCtxtNewInputFromString(ctxt, NULL, (const char *) buffer,
2267
0
                                     NULL, 0));
2268
0
}
2269
2270
2271
/****************************************************************
2272
 *                *
2273
 *    External entities loading     *
2274
 *                *
2275
 ****************************************************************/
2276
2277
#ifdef LIBXML_CATALOG_ENABLED
2278
2279
/**
2280
 * Resolves an external ID or URL against the appropriate catalog.
2281
 *
2282
 * @param url  the URL or system ID for the entity to load
2283
 * @param publicId  the public ID for the entity to load (optional)
2284
 * @param localCatalogs  local catalogs (optional)
2285
 * @param allowGlobal  allow global system catalog
2286
 * @param out  resulting resource or NULL
2287
 * @returns an xmlParserErrors code
2288
 */
2289
static xmlParserErrors
2290
xmlResolveFromCatalog(const char *url, const char *publicId,
2291
0
                      void *localCatalogs, int allowGlobal, char **out) {
2292
0
    xmlError oldError;
2293
0
    xmlError *lastError;
2294
0
    char *resource = NULL;
2295
0
    xmlParserErrors code;
2296
2297
0
    if (out == NULL)
2298
0
        return(XML_ERR_ARGUMENT);
2299
0
    *out = NULL;
2300
0
    if ((localCatalogs == NULL) && (!allowGlobal))
2301
0
        return(XML_ERR_OK);
2302
2303
    /*
2304
     * Don't try to resolve if local file exists.
2305
     *
2306
     * TODO: This is somewhat non-deterministic.
2307
     */
2308
0
    if (xmlNoNetExists(url))
2309
0
        return(XML_ERR_OK);
2310
2311
    /* Backup and reset last error */
2312
0
    lastError = xmlGetLastErrorInternal();
2313
0
    oldError = *lastError;
2314
0
    lastError->code = XML_ERR_OK;
2315
2316
    /*
2317
     * Do a local lookup
2318
     */
2319
0
    if (localCatalogs != NULL) {
2320
0
        resource = (char *) xmlCatalogLocalResolve(localCatalogs,
2321
0
                                                   BAD_CAST publicId,
2322
0
                                                   BAD_CAST url);
2323
0
    }
2324
    /*
2325
     * Try a global lookup
2326
     */
2327
0
    if ((resource == NULL) && (allowGlobal)) {
2328
0
        resource = (char *) xmlCatalogResolve(BAD_CAST publicId,
2329
0
                                              BAD_CAST url);
2330
0
    }
2331
2332
    /*
2333
     * Try to resolve url using URI rules.
2334
     *
2335
     * TODO: We should consider using only a single resolution
2336
     * mechanism depending on resource type. Either by external ID
2337
     * or by URI.
2338
     */
2339
0
    if ((resource == NULL) && (url != NULL)) {
2340
0
        if (localCatalogs != NULL) {
2341
0
            resource = (char *) xmlCatalogLocalResolveURI(localCatalogs,
2342
0
                                                          BAD_CAST url);
2343
0
        }
2344
0
        if ((resource == NULL) && (allowGlobal)) {
2345
0
            resource = (char *) xmlCatalogResolveURI(BAD_CAST url);
2346
0
        }
2347
0
    }
2348
2349
0
    code = lastError->code;
2350
0
    if (code == XML_ERR_OK) {
2351
0
        *out = resource;
2352
0
    } else {
2353
0
        xmlFree(resource);
2354
0
    }
2355
2356
0
    *lastError = oldError;
2357
2358
0
    return(code);
2359
0
}
2360
2361
static char *
2362
xmlCtxtResolveFromCatalog(xmlParserCtxtPtr ctxt, const char *url,
2363
183k
                          const char *publicId) {
2364
183k
    char *resource;
2365
183k
    void *localCatalogs = NULL;
2366
183k
    int allowGlobal = 1;
2367
183k
    xmlParserErrors code;
2368
2369
183k
    if (ctxt != NULL) {
2370
        /*
2371
         * Loading of HTML documents shouldn't use XML catalogs.
2372
         */
2373
183k
        if (ctxt->html)
2374
0
            return(NULL);
2375
2376
183k
        localCatalogs = ctxt->catalogs;
2377
2378
183k
        if (ctxt->options & XML_PARSE_NO_SYS_CATALOG)
2379
40.0k
            allowGlobal = 0;
2380
183k
    }
2381
2382
183k
    switch (xmlCatalogGetDefaults()) {
2383
183k
        case XML_CATA_ALLOW_NONE:
2384
183k
            return(NULL);
2385
0
        case XML_CATA_ALLOW_DOCUMENT:
2386
0
            allowGlobal = 0;
2387
0
            break;
2388
0
        case XML_CATA_ALLOW_GLOBAL:
2389
0
            localCatalogs = NULL;
2390
0
            break;
2391
0
        case XML_CATA_ALLOW_ALL:
2392
0
            break;
2393
183k
    }
2394
2395
0
    code = xmlResolveFromCatalog(url, publicId, localCatalogs,
2396
0
                                 allowGlobal, &resource);
2397
0
    if (code != XML_ERR_OK)
2398
0
        xmlCtxtErr(ctxt, NULL, XML_FROM_CATALOG, code, XML_ERR_ERROR,
2399
0
                   BAD_CAST url, BAD_CAST publicId, NULL, 0,
2400
0
                   "%s\n", xmlErrString(code), NULL);
2401
2402
0
    return(resource);
2403
183k
}
2404
2405
#endif
2406
2407
/**
2408
 * @deprecated Internal function, don't use.
2409
 *
2410
 * @param ctxt  an XML parser context
2411
 * @param ret  an XML parser input
2412
 * @returns NULL.
2413
 */
2414
xmlParserInput *
2415
xmlCheckHTTPInput(xmlParserCtxt *ctxt ATTRIBUTE_UNUSED,
2416
0
                  xmlParserInput *ret ATTRIBUTE_UNUSED) {
2417
0
    return(NULL);
2418
0
}
2419
2420
/**
2421
 * Create a new input stream based on a file or a URL.
2422
 *
2423
 * The flag XML_INPUT_UNZIP allows decompression.
2424
 *
2425
 * The flag XML_INPUT_NETWORK allows network access.
2426
 *
2427
 * The following resource loaders will be called if they were
2428
 * registered (in order of precedence):
2429
 *
2430
 * - the per-thread #xmlParserInputBufferCreateFilenameFunc set with
2431
 *   #xmlParserInputBufferCreateFilenameDefault (deprecated)
2432
 * - the default loader which will return
2433
 *   - the result from a matching global input callback set with
2434
 *     #xmlRegisterInputCallbacks (deprecated)
2435
 *   - a file opened from the filesystem, with automatic detection
2436
 *     of compressed files if support is compiled in.
2437
 *
2438
 * @since 2.14.0
2439
 *
2440
 * @param url  the filename to use as entity
2441
 * @param flags  XML_INPUT flags
2442
 * @param out  pointer to new parser input
2443
 * @returns an xmlParserErrors code.
2444
 */
2445
xmlParserErrors
2446
xmlNewInputFromUrl(const char *url, xmlParserInputFlags flags,
2447
0
                   xmlParserInput **out) {
2448
0
    char *resource = NULL;
2449
0
    xmlParserInputBufferPtr buf;
2450
0
    xmlParserInputPtr input;
2451
0
    xmlParserErrors code = XML_ERR_OK;
2452
2453
0
    if (out == NULL)
2454
0
        return(XML_ERR_ARGUMENT);
2455
0
    *out = NULL;
2456
0
    if (url == NULL)
2457
0
        return(XML_ERR_ARGUMENT);
2458
2459
0
#ifdef LIBXML_CATALOG_ENABLED
2460
0
    if (flags & XML_INPUT_USE_SYS_CATALOG) {
2461
0
        code = xmlResolveFromCatalog(url, NULL, NULL, 1, &resource);
2462
0
        if (code != XML_ERR_OK)
2463
0
            return(code);
2464
0
        if (resource != NULL)
2465
0
            url = resource;
2466
0
    }
2467
0
#endif
2468
2469
0
    if (xmlParserInputBufferCreateFilenameValue != NULL) {
2470
0
        buf = xmlParserInputBufferCreateFilenameValue(url,
2471
0
                XML_CHAR_ENCODING_NONE);
2472
0
        if (buf == NULL)
2473
0
            code = XML_IO_ENOENT;
2474
0
    } else {
2475
0
        code = xmlParserInputBufferCreateUrl(url, XML_CHAR_ENCODING_NONE,
2476
0
                                             flags, &buf);
2477
0
    }
2478
2479
0
    if (code == XML_ERR_OK) {
2480
0
        input = xmlNewInputInternal(buf, url);
2481
0
        if (input == NULL)
2482
0
            code = XML_ERR_NO_MEMORY;
2483
2484
0
        *out = input;
2485
0
    }
2486
2487
0
    if (resource != NULL)
2488
0
        xmlFree(resource);
2489
0
    return(code);
2490
0
}
2491
2492
/**
2493
 * Create a new input stream based on a file or an URL.
2494
 *
2495
 * Unlike the default external entity loader, this function
2496
 * doesn't use XML catalogs.
2497
 *
2498
 * @deprecated Use #xmlNewInputFromUrl.
2499
 *
2500
 * @param ctxt  an XML parser context
2501
 * @param filename  the filename to use as entity
2502
 * @returns the new input stream or NULL in case of error
2503
 */
2504
xmlParserInput *
2505
0
xmlNewInputFromFile(xmlParserCtxt *ctxt, const char *filename) {
2506
0
    xmlParserInputPtr input;
2507
0
    xmlParserInputFlags flags = 0;
2508
0
    xmlParserErrors code;
2509
2510
0
    if ((ctxt == NULL) || (filename == NULL))
2511
0
        return(NULL);
2512
2513
0
    if (ctxt->options & XML_PARSE_UNZIP)
2514
0
        flags |= XML_INPUT_UNZIP;
2515
0
    if ((ctxt->options & XML_PARSE_NONET) == 0)
2516
0
        flags |= XML_INPUT_NETWORK;
2517
2518
0
    code = xmlNewInputFromUrl(filename, flags, &input);
2519
0
    if (code != XML_ERR_OK) {
2520
0
        xmlCtxtErrIO(ctxt, code, filename);
2521
0
        return(NULL);
2522
0
    }
2523
2524
0
    return(input);
2525
0
}
2526
2527
/**
2528
 * By default we don't load external entities, yet.
2529
 *
2530
 * @param url  the URL or system ID for the entity to load
2531
 * @param publicId  the public ID for the entity to load (optional)
2532
 * @param ctxt  the context in which the entity is called or NULL
2533
 * @returns a new allocated xmlParserInput, or NULL.
2534
 */
2535
static xmlParserInputPtr
2536
xmlDefaultExternalEntityLoader(const char *url, const char *publicId,
2537
                               xmlParserCtxtPtr ctxt)
2538
0
{
2539
0
    xmlParserInputPtr input = NULL;
2540
0
    char *resource = NULL;
2541
2542
0
    (void) publicId;
2543
2544
0
    if (url == NULL)
2545
0
        return(NULL);
2546
2547
0
#ifdef LIBXML_CATALOG_ENABLED
2548
0
    resource = xmlCtxtResolveFromCatalog(ctxt, url, publicId);
2549
0
    if (resource != NULL)
2550
0
  url = resource;
2551
0
#endif
2552
2553
    /*
2554
     * Several downstream test suites expect this error whenever
2555
     * an http URI is passed and NONET is set.
2556
     */
2557
0
    if ((ctxt != NULL) &&
2558
0
        (ctxt->options & XML_PARSE_NONET) &&
2559
0
        (xmlStrncasecmp(BAD_CAST url, BAD_CAST "http://", 7) == 0)) {
2560
0
        xmlCtxtErrIO(ctxt, XML_IO_NETWORK_ATTEMPT, url);
2561
0
    } else {
2562
0
        input = xmlNewInputFromFile(ctxt, url);
2563
0
    }
2564
2565
0
    if (resource != NULL)
2566
0
  xmlFree(resource);
2567
0
    return(input);
2568
0
}
2569
2570
/**
2571
 * A specific entity loader disabling network accesses, though still
2572
 * allowing local catalog accesses for resolution.
2573
 *
2574
 * @deprecated Use XML_PARSE_NONET.
2575
 *
2576
 * @param URL  the URL or system ID for the entity to load
2577
 * @param publicId  the public ID for the entity to load
2578
 * @param ctxt  the context in which the entity is called or NULL
2579
 * @returns a new allocated xmlParserInput, or NULL.
2580
 */
2581
xmlParserInput *
2582
xmlNoNetExternalEntityLoader(const char *URL, const char *publicId,
2583
0
                             xmlParserCtxt *ctxt) {
2584
0
    int oldOptions = 0;
2585
0
    xmlParserInputPtr input;
2586
2587
0
    if (ctxt != NULL) {
2588
0
        oldOptions = ctxt->options;
2589
0
        ctxt->options |= XML_PARSE_NONET;
2590
0
    }
2591
2592
0
    input = xmlDefaultExternalEntityLoader(URL, publicId, ctxt);
2593
2594
0
    if (ctxt != NULL)
2595
0
        ctxt->options = oldOptions;
2596
2597
0
    return(input);
2598
0
}
2599
2600
/*
2601
 * This global has to die eventually
2602
 */
2603
static xmlExternalEntityLoader
2604
xmlCurrentExternalEntityLoader = xmlDefaultExternalEntityLoader;
2605
2606
/**
2607
 * Changes the default external entity resolver function for the
2608
 * application.
2609
 *
2610
 * @deprecated This is a global setting and not thread-safe. Use
2611
 * #xmlCtxtSetResourceLoader or similar functions.
2612
 *
2613
 * @param f  the new entity resolver function
2614
 */
2615
void
2616
0
xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
2617
0
    xmlCurrentExternalEntityLoader = f;
2618
0
}
2619
2620
/**
2621
 * Get the default external entity resolver function for the application
2622
 *
2623
 * @deprecated See #xmlSetExternalEntityLoader.
2624
 *
2625
 * @returns the #xmlExternalEntityLoader function pointer
2626
 */
2627
xmlExternalEntityLoader
2628
0
xmlGetExternalEntityLoader(void) {
2629
0
    return(xmlCurrentExternalEntityLoader);
2630
0
}
2631
2632
/**
2633
 * Installs a custom callback to load documents, DTDs or external
2634
 * entities.
2635
 *
2636
 * If `vctxt` is NULL, the parser context will be passed.
2637
 *
2638
 * @since 2.14.0
2639
 * @param ctxt  parser context
2640
 * @param loader  callback
2641
 * @param vctxt  user data (optional)
2642
 */
2643
void
2644
xmlCtxtSetResourceLoader(xmlParserCtxt *ctxt, xmlResourceLoader loader,
2645
43.5k
                         void *vctxt) {
2646
43.5k
    if (ctxt == NULL)
2647
0
        return;
2648
2649
43.5k
    ctxt->resourceLoader = loader;
2650
43.5k
    ctxt->resourceCtxt = vctxt;
2651
43.5k
}
2652
2653
/**
2654
 * @param ctxt  parser context
2655
 * @param url  the URL or system ID for the entity to load
2656
 * @param publicId  the public ID for the entity to load (optional)
2657
 * @param type  resource type
2658
 * @returns the xmlParserInput or NULL in case of error.
2659
 */
2660
xmlParserInput *
2661
xmlLoadResource(xmlParserCtxt *ctxt, const char *url, const char *publicId,
2662
184k
                xmlResourceType type) {
2663
184k
    char *canonicFilename;
2664
184k
    xmlParserInputPtr ret;
2665
2666
184k
    if (url == NULL)
2667
289
        return(NULL);
2668
2669
183k
    if ((ctxt != NULL) && (ctxt->resourceLoader != NULL)) {
2670
183k
        char *resource = NULL;
2671
183k
        void *userData;
2672
183k
        xmlParserInputFlags flags = 0;
2673
183k
        int code;
2674
2675
183k
#ifdef LIBXML_CATALOG_ENABLED
2676
183k
        resource = xmlCtxtResolveFromCatalog(ctxt, url, publicId);
2677
183k
        if (resource != NULL)
2678
0
            url = resource;
2679
183k
#endif
2680
2681
183k
        if (ctxt->options & XML_PARSE_UNZIP)
2682
39.7k
            flags |= XML_INPUT_UNZIP;
2683
183k
        if ((ctxt->options & XML_PARSE_NONET) == 0)
2684
122k
            flags |= XML_INPUT_NETWORK;
2685
2686
183k
        userData = ctxt->resourceCtxt;
2687
183k
        if (userData == NULL)
2688
183k
            userData = ctxt;
2689
2690
183k
        code = ctxt->resourceLoader(userData, url, publicId, type,
2691
183k
                                    flags, &ret);
2692
183k
        if (code != XML_ERR_OK) {
2693
8.26k
            xmlCtxtErrIO(ctxt, code, url);
2694
8.26k
            ret = NULL;
2695
8.26k
        }
2696
183k
        if (resource != NULL)
2697
0
            xmlFree(resource);
2698
183k
        return(ret);
2699
183k
    }
2700
2701
0
    canonicFilename = (char *) xmlCanonicPath((const xmlChar *) url);
2702
0
    if (canonicFilename == NULL) {
2703
0
        xmlCtxtErrMemory(ctxt);
2704
0
        return(NULL);
2705
0
    }
2706
2707
0
    ret = xmlCurrentExternalEntityLoader(canonicFilename, publicId, ctxt);
2708
0
    xmlFree(canonicFilename);
2709
0
    return(ret);
2710
0
}
2711
2712
/**
2713
 * `URL` is a filename or URL. If if contains the substring "://",
2714
 * it is assumed to be a Legacy Extended IRI. Otherwise, it is
2715
 * treated as a filesystem path.
2716
 *
2717
 * `publicId` is an optional XML public ID, typically from a doctype
2718
 * declaration. It is used for catalog lookups.
2719
 *
2720
 * If catalog lookup is enabled (default is yes) and URL or ID are
2721
 * found in system or local XML catalogs, URL is replaced with the
2722
 * result. Then the following resource loaders will be called if
2723
 * they were registered (in order of precedence):
2724
 *
2725
 * - the resource loader set with #xmlCtxtSetResourceLoader
2726
 * - the global external entity loader set with
2727
 *   #xmlSetExternalEntityLoader (without catalog resolution,
2728
 *   deprecated)
2729
 * - the per-thread #xmlParserInputBufferCreateFilenameFunc set with
2730
 *   #xmlParserInputBufferCreateFilenameDefault (deprecated)
2731
 * - the default loader which will return
2732
 *   - the result from a matching global input callback set with
2733
 *     #xmlRegisterInputCallbacks (deprecated)
2734
 *   - a file opened from the filesystem, with automatic detection
2735
 *     of compressed files if support is compiled in.
2736
 *
2737
 * @param URL  the URL or system ID for the entity to load
2738
 * @param publicId  the public ID for the entity to load (optional)
2739
 * @param ctxt  the context in which the entity is called or NULL
2740
 * @returns the xmlParserInput or NULL
2741
 */
2742
xmlParserInput *
2743
xmlLoadExternalEntity(const char *URL, const char *publicId,
2744
0
                      xmlParserCtxt *ctxt) {
2745
0
    return(xmlLoadResource(ctxt, URL, publicId, XML_RESOURCE_UNKNOWN));
2746
0
}
2747
2748
/************************************************************************
2749
 *                  *
2750
 *    Commodity functions to handle parser contexts   *
2751
 *                  *
2752
 ************************************************************************/
2753
2754
/**
2755
 * Initialize a SAX parser context
2756
 *
2757
 * @param ctxt  XML parser context
2758
 * @param sax  SAX handlert
2759
 * @param userData  user data
2760
 * @returns 0 in case of success and -1 in case of error
2761
 */
2762
2763
static int
2764
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
2765
                     void *userData)
2766
43.5k
{
2767
43.5k
    xmlParserInputPtr input;
2768
43.5k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2769
43.5k
    size_t initialNodeTabSize = 1;
2770
#else
2771
    size_t initialNodeTabSize = 10;
2772
#endif
2773
2774
43.5k
    if (ctxt == NULL)
2775
0
        return(-1);
2776
2777
43.5k
    if (ctxt->dict == NULL)
2778
43.5k
  ctxt->dict = xmlDictCreate();
2779
43.5k
    if (ctxt->dict == NULL)
2780
7
  return(-1);
2781
2782
43.5k
    if (ctxt->sax == NULL)
2783
43.5k
  ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2784
43.5k
    if (ctxt->sax == NULL)
2785
5
  return(-1);
2786
43.5k
    if (sax == NULL) {
2787
7.43k
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2788
7.43k
        xmlSAXVersion(ctxt->sax, 2);
2789
7.43k
        ctxt->userData = ctxt;
2790
36.1k
    } else {
2791
36.1k
  if (sax->initialized == XML_SAX2_MAGIC) {
2792
36.1k
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
2793
36.1k
        } else {
2794
0
      memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2795
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
2796
0
        }
2797
36.1k
        ctxt->userData = userData ? userData : ctxt;
2798
36.1k
    }
2799
2800
43.5k
    ctxt->maxatts = 0;
2801
43.5k
    ctxt->atts = NULL;
2802
    /* Allocate the Input stack */
2803
43.5k
    if (ctxt->inputTab == NULL) {
2804
43.5k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2805
43.5k
        size_t initialSize = 1;
2806
#else
2807
        size_t initialSize = 5;
2808
#endif
2809
2810
43.5k
  ctxt->inputTab = xmlMalloc(initialSize * sizeof(xmlParserInputPtr));
2811
43.5k
  ctxt->inputMax = initialSize;
2812
43.5k
    }
2813
43.5k
    if (ctxt->inputTab == NULL)
2814
6
  return(-1);
2815
43.5k
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
2816
0
        xmlFreeInputStream(input);
2817
0
    }
2818
43.5k
    ctxt->inputNr = 0;
2819
43.5k
    ctxt->input = NULL;
2820
2821
43.5k
    ctxt->version = NULL;
2822
43.5k
    ctxt->encoding = NULL;
2823
43.5k
    ctxt->standalone = -1;
2824
43.5k
    ctxt->hasExternalSubset = 0;
2825
43.5k
    ctxt->hasPErefs = 0;
2826
43.5k
    ctxt->html = 0;
2827
43.5k
    ctxt->instate = XML_PARSER_START;
2828
2829
    /* Allocate the Node stack */
2830
43.5k
    if (ctxt->nodeTab == NULL) {
2831
43.5k
  ctxt->nodeTab = xmlMalloc(initialNodeTabSize * sizeof(xmlNodePtr));
2832
43.5k
  ctxt->nodeMax = initialNodeTabSize;
2833
43.5k
    }
2834
43.5k
    if (ctxt->nodeTab == NULL)
2835
2
  return(-1);
2836
43.5k
    ctxt->nodeNr = 0;
2837
43.5k
    ctxt->node = NULL;
2838
2839
    /* Allocate the Name stack */
2840
43.5k
    if (ctxt->nameTab == NULL) {
2841
43.5k
  ctxt->nameTab = xmlMalloc(initialNodeTabSize * sizeof(xmlChar *));
2842
43.5k
  ctxt->nameMax = initialNodeTabSize;
2843
43.5k
    }
2844
43.5k
    if (ctxt->nameTab == NULL)
2845
6
  return(-1);
2846
43.5k
    ctxt->nameNr = 0;
2847
43.5k
    ctxt->name = NULL;
2848
2849
    /* Allocate the space stack */
2850
43.5k
    if (ctxt->spaceTab == NULL) {
2851
43.5k
  ctxt->spaceTab = xmlMalloc(initialNodeTabSize * sizeof(int));
2852
43.5k
  ctxt->spaceMax = initialNodeTabSize;
2853
43.5k
    }
2854
43.5k
    if (ctxt->spaceTab == NULL)
2855
9
  return(-1);
2856
43.5k
    ctxt->spaceNr = 1;
2857
43.5k
    ctxt->spaceTab[0] = -1;
2858
43.5k
    ctxt->space = &ctxt->spaceTab[0];
2859
43.5k
    ctxt->myDoc = NULL;
2860
43.5k
    ctxt->wellFormed = 1;
2861
43.5k
    ctxt->nsWellFormed = 1;
2862
43.5k
    ctxt->valid = 1;
2863
2864
43.5k
    ctxt->options = XML_PARSE_NODICT;
2865
2866
    /*
2867
     * Initialize some parser options from deprecated global variables.
2868
     * Note that the "modern" API taking options arguments or
2869
     * xmlCtxtSetOptions will ignore these defaults. They're only
2870
     * relevant if old API functions like xmlParseFile are used.
2871
     */
2872
43.5k
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2873
43.5k
    if (ctxt->loadsubset) {
2874
0
        ctxt->options |= XML_PARSE_DTDLOAD;
2875
0
    }
2876
43.5k
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
2877
43.5k
    if (ctxt->validate) {
2878
0
        ctxt->options |= XML_PARSE_DTDVALID;
2879
0
    }
2880
43.5k
    ctxt->pedantic = xmlPedanticParserDefaultValue;
2881
43.5k
    if (ctxt->pedantic) {
2882
0
        ctxt->options |= XML_PARSE_PEDANTIC;
2883
0
    }
2884
43.5k
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2885
43.5k
    if (ctxt->keepBlanks == 0) {
2886
0
  ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
2887
0
  ctxt->options |= XML_PARSE_NOBLANKS;
2888
0
    }
2889
43.5k
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2890
43.5k
    if (ctxt->replaceEntities) {
2891
0
        ctxt->options |= XML_PARSE_NOENT;
2892
0
    }
2893
43.5k
    if (xmlGetWarningsDefaultValue == 0)
2894
0
        ctxt->options |= XML_PARSE_NOWARNING;
2895
2896
43.5k
    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
2897
43.5k
    ctxt->vctxt.userData = ctxt;
2898
43.5k
    ctxt->vctxt.error = xmlParserValidityError;
2899
43.5k
    ctxt->vctxt.warning = xmlParserValidityWarning;
2900
2901
43.5k
    ctxt->record_info = 0;
2902
43.5k
    ctxt->checkIndex = 0;
2903
43.5k
    ctxt->inSubset = 0;
2904
43.5k
    ctxt->errNo = XML_ERR_OK;
2905
43.5k
    ctxt->depth = 0;
2906
43.5k
    ctxt->catalogs = NULL;
2907
43.5k
    ctxt->sizeentities = 0;
2908
43.5k
    ctxt->sizeentcopy = 0;
2909
43.5k
    ctxt->input_id = 1;
2910
43.5k
    ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
2911
43.5k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
2912
2913
43.5k
    if (ctxt->nsdb == NULL) {
2914
43.5k
        ctxt->nsdb = xmlParserNsCreate();
2915
43.5k
        if (ctxt->nsdb == NULL)
2916
4
            return(-1);
2917
43.5k
    }
2918
2919
43.5k
    return(0);
2920
43.5k
}
2921
2922
/**
2923
 * Initialize a parser context
2924
 *
2925
 * @deprecated Internal function which will be made private in a future
2926
 * version.
2927
 *
2928
 * @param ctxt  an XML parser context
2929
 * @returns 0 in case of success and -1 in case of error
2930
 */
2931
2932
int
2933
xmlInitParserCtxt(xmlParserCtxt *ctxt)
2934
0
{
2935
0
    return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
2936
0
}
2937
2938
/**
2939
 * Free all the memory used by a parser context. However the parsed
2940
 * document in ctxt->myDoc is not freed.
2941
 *
2942
 * @param ctxt  an XML parser context
2943
 */
2944
2945
void
2946
xmlFreeParserCtxt(xmlParserCtxt *ctxt)
2947
43.5k
{
2948
43.5k
    xmlParserInputPtr input;
2949
2950
43.5k
    if (ctxt == NULL) return;
2951
2952
86.7k
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
2953
43.2k
        xmlFreeInputStream(input);
2954
43.2k
    }
2955
43.5k
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2956
43.5k
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
2957
43.5k
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2958
43.5k
    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
2959
43.5k
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2960
43.5k
    if (ctxt->version != NULL) xmlFree(ctxt->version);
2961
43.5k
    if (ctxt->encoding != NULL) xmlFree(ctxt->encoding);
2962
43.5k
    if (ctxt->extSubURI != NULL) xmlFree(ctxt->extSubURI);
2963
43.5k
    if (ctxt->extSubSystem != NULL) xmlFree(ctxt->extSubSystem);
2964
43.5k
#ifdef LIBXML_SAX1_ENABLED
2965
43.5k
    if ((ctxt->sax != NULL) &&
2966
43.5k
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
2967
#else
2968
    if (ctxt->sax != NULL)
2969
#endif /* LIBXML_SAX1_ENABLED */
2970
43.5k
        xmlFree(ctxt->sax);
2971
43.5k
    if (ctxt->directory != NULL) xmlFree(ctxt->directory);
2972
43.5k
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2973
43.5k
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
2974
43.5k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
2975
43.5k
    if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
2976
43.5k
    if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
2977
43.5k
    if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
2978
43.5k
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2979
43.5k
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2980
43.5k
    if (ctxt->attsDefault != NULL)
2981
2.38k
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2982
43.5k
    if (ctxt->attsSpecial != NULL)
2983
2.86k
        xmlHashFree(ctxt->attsSpecial, NULL);
2984
43.5k
    if (ctxt->freeElems != NULL) {
2985
23.2k
        xmlNodePtr cur, next;
2986
2987
23.2k
  cur = ctxt->freeElems;
2988
46.5k
  while (cur != NULL) {
2989
23.2k
      next = cur->next;
2990
23.2k
      xmlFree(cur);
2991
23.2k
      cur = next;
2992
23.2k
  }
2993
23.2k
    }
2994
43.5k
    if (ctxt->freeAttrs != NULL) {
2995
13.2k
        xmlAttrPtr cur, next;
2996
2997
13.2k
  cur = ctxt->freeAttrs;
2998
26.4k
  while (cur != NULL) {
2999
13.2k
      next = cur->next;
3000
13.2k
      xmlFree(cur);
3001
13.2k
      cur = next;
3002
13.2k
  }
3003
13.2k
    }
3004
    /*
3005
     * cleanup the error strings
3006
     */
3007
43.5k
    if (ctxt->lastError.message != NULL)
3008
31.1k
        xmlFree(ctxt->lastError.message);
3009
43.5k
    if (ctxt->lastError.file != NULL)
3010
7.21k
        xmlFree(ctxt->lastError.file);
3011
43.5k
    if (ctxt->lastError.str1 != NULL)
3012
17.9k
        xmlFree(ctxt->lastError.str1);
3013
43.5k
    if (ctxt->lastError.str2 != NULL)
3014
5.43k
        xmlFree(ctxt->lastError.str2);
3015
43.5k
    if (ctxt->lastError.str3 != NULL)
3016
426
        xmlFree(ctxt->lastError.str3);
3017
3018
43.5k
#ifdef LIBXML_CATALOG_ENABLED
3019
43.5k
    if (ctxt->catalogs != NULL)
3020
0
  xmlCatalogFreeLocal(ctxt->catalogs);
3021
43.5k
#endif
3022
43.5k
    xmlFree(ctxt);
3023
43.5k
}
3024
3025
/**
3026
 * Allocate and initialize a new parser context.
3027
 *
3028
 * @returns the xmlParserCtxt or NULL
3029
 */
3030
3031
xmlParserCtxt *
3032
xmlNewParserCtxt(void)
3033
7.44k
{
3034
7.44k
    return(xmlNewSAXParserCtxt(NULL, NULL));
3035
7.44k
}
3036
3037
/**
3038
 * Allocate and initialize a new SAX parser context. If userData is NULL,
3039
 * the parser context will be passed as user data.
3040
 *
3041
 * @since 2.11.0
3042
 *
3043
 * If you want support older versions,
3044
 * it's best to invoke #xmlNewParserCtxt and set ctxt->sax with
3045
 * struct assignment.
3046
 *
3047
 * @param sax  SAX handler
3048
 * @param userData  user data
3049
 * @returns the xmlParserCtxt or NULL if memory allocation failed.
3050
 */
3051
3052
xmlParserCtxt *
3053
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
3054
43.5k
{
3055
43.5k
    xmlParserCtxtPtr ctxt;
3056
3057
43.5k
    xmlInitParser();
3058
3059
43.5k
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
3060
43.5k
    if (ctxt == NULL)
3061
7
  return(NULL);
3062
43.5k
    memset(ctxt, 0, sizeof(xmlParserCtxt));
3063
43.5k
    if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
3064
39
        xmlFreeParserCtxt(ctxt);
3065
39
  return(NULL);
3066
39
    }
3067
43.5k
    return(ctxt);
3068
43.5k
}
3069
3070
/**
3071
 * @since 2.14.0
3072
 *
3073
 * @param ctxt  parser context
3074
 * @returns the private application data.
3075
 */
3076
void *
3077
0
xmlCtxtGetPrivate(xmlParserCtxt *ctxt) {
3078
0
    if (ctxt == NULL)
3079
0
        return(NULL);
3080
3081
0
    return(ctxt->_private);
3082
0
}
3083
3084
/**
3085
 * Set the private application data.
3086
 *
3087
 * @since 2.14.0
3088
 *
3089
 * @param ctxt  parser context
3090
 * @param priv  private application data
3091
 */
3092
void
3093
0
xmlCtxtSetPrivate(xmlParserCtxt *ctxt, void *priv) {
3094
0
    if (ctxt == NULL)
3095
0
        return;
3096
3097
0
    ctxt->_private = priv;
3098
0
}
3099
3100
/**
3101
 * @since 2.14.0
3102
 *
3103
 * @param ctxt  parser context
3104
 * @returns the local catalogs.
3105
 */
3106
void *
3107
0
xmlCtxtGetCatalogs(xmlParserCtxt *ctxt) {
3108
0
    if (ctxt == NULL)
3109
0
        return(NULL);
3110
3111
0
    return(ctxt->catalogs);
3112
0
}
3113
3114
/**
3115
 * Set the local catalogs.
3116
 *
3117
 * @since 2.14.0
3118
 *
3119
 * @param ctxt  parser context
3120
 * @param catalogs  catalogs pointer
3121
 */
3122
void
3123
0
xmlCtxtSetCatalogs(xmlParserCtxt *ctxt, void *catalogs) {
3124
0
    if (ctxt == NULL)
3125
0
        return;
3126
3127
0
    ctxt->catalogs = catalogs;
3128
0
}
3129
3130
/**
3131
 * @since 2.14.0
3132
 *
3133
 * @param ctxt  parser context
3134
 * @returns the dictionary.
3135
 */
3136
xmlDict *
3137
0
xmlCtxtGetDict(xmlParserCtxt *ctxt) {
3138
0
    if (ctxt == NULL)
3139
0
        return(NULL);
3140
3141
0
    return(ctxt->dict);
3142
0
}
3143
3144
/**
3145
 * Set the dictionary. This should only be done immediately after
3146
 * creating a parser context.
3147
 *
3148
 * @since 2.14.0
3149
 *
3150
 * @param ctxt  parser context
3151
 * @param dict  dictionary
3152
 */
3153
void
3154
0
xmlCtxtSetDict(xmlParserCtxt *ctxt, xmlDict *dict) {
3155
0
    if (ctxt == NULL)
3156
0
        return;
3157
3158
0
    if (ctxt->dict != NULL)
3159
0
        xmlDictFree(ctxt->dict);
3160
3161
0
    xmlDictReference(dict);
3162
0
    ctxt->dict = dict;
3163
0
}
3164
3165
/**
3166
 * @since 2.14.0
3167
 *
3168
 * @param ctxt  parser context
3169
 * @returns the SAX handler struct. This is not a copy and must not
3170
 * be freed. Handlers can be updated.
3171
 */
3172
xmlSAXHandler *
3173
0
xmlCtxtGetSaxHandler(xmlParserCtxt *ctxt) {
3174
0
    if (ctxt == NULL)
3175
0
        return(NULL);
3176
3177
0
    return(ctxt->sax);
3178
0
}
3179
3180
/**
3181
 * Set the SAX handler struct to a copy of `sax`.
3182
 *
3183
 * @since 2.14.0
3184
 *
3185
 * @param ctxt  parser context
3186
 * @param sax  SAX handler
3187
 * @returns 0 on success or -1 if arguments are invalid or a memory
3188
 * allocation failed.
3189
 */
3190
int
3191
0
xmlCtxtSetSaxHandler(xmlParserCtxt *ctxt, const xmlSAXHandler *sax) {
3192
0
    xmlSAXHandler *copy;
3193
3194
0
    if ((ctxt == NULL) || (sax == NULL))
3195
0
        return(-1);
3196
3197
0
    copy = xmlMalloc(sizeof(*copy));
3198
0
    if (copy == NULL)
3199
0
        return(-1);
3200
3201
0
    memcpy(copy, sax, sizeof(*copy));
3202
0
    ctxt->sax = copy;
3203
3204
0
    return(0);
3205
0
}
3206
3207
/**
3208
 * @since 2.14.0
3209
 *
3210
 * @param ctxt  parser context
3211
 * @returns the parsed document or NULL if a fatal error occurred when
3212
 * parsing. The document must be freed by the caller. Resets the
3213
 * context's document to NULL.
3214
 */
3215
xmlDoc *
3216
0
xmlCtxtGetDocument(xmlParserCtxt *ctxt) {
3217
0
    xmlDocPtr doc;
3218
3219
0
    if (ctxt == NULL)
3220
0
        return(NULL);
3221
3222
0
    if ((ctxt->wellFormed) ||
3223
0
        (((ctxt->recovery) || (ctxt->html)) &&
3224
0
         (!xmlCtxtIsCatastrophicError(ctxt)))) {
3225
0
        doc = ctxt->myDoc;
3226
0
    } else {
3227
0
        if (ctxt->errNo == XML_ERR_OK)
3228
0
            xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error");
3229
0
        doc = NULL;
3230
0
        xmlFreeDoc(ctxt->myDoc);
3231
0
    }
3232
0
    ctxt->myDoc = NULL;
3233
3234
0
    return(doc);
3235
0
}
3236
3237
/**
3238
 * @since 2.14.0
3239
 *
3240
 * @param ctxt  parser context
3241
 * @returns 1 if this is a HTML parser context, 0 otherwise.
3242
 */
3243
int
3244
0
xmlCtxtIsHtml(xmlParserCtxt *ctxt) {
3245
0
    if (ctxt == NULL)
3246
0
        return(0);
3247
3248
0
    return(ctxt->html ? 1 : 0);
3249
0
}
3250
3251
/**
3252
 * Check whether the parser is stopped.
3253
 *
3254
 * The parser is stopped on fatal (non-wellformedness) errors or
3255
 * on user request with #xmlStopParser.
3256
 *
3257
 * @since 2.14.0
3258
 *
3259
 * @param ctxt  parser context
3260
 * @returns 1 if the parser is stopped, 0 otherwise.
3261
 */
3262
int
3263
0
xmlCtxtIsStopped(xmlParserCtxt *ctxt) {
3264
0
    if (ctxt == NULL)
3265
0
        return(0);
3266
3267
0
    return(ctxt->disableSAX != 0);
3268
0
}
3269
3270
/**
3271
 * Check whether a DTD subset is being parsed.
3272
 *
3273
 * Should only be used by SAX callbacks.
3274
 *
3275
 * Return values are
3276
 *
3277
 * - 0: not in DTD
3278
 * - 1: in internal DTD subset
3279
 * - 2: in external DTD subset
3280
 *
3281
 * @since 2.15.0
3282
 *
3283
 * @param ctxt  parser context
3284
 * @returns the subset status
3285
 */
3286
int
3287
0
xmlCtxtIsInSubset(xmlParserCtxt *ctxt) {
3288
0
    if (ctxt == NULL)
3289
0
        return(0);
3290
3291
0
    return(ctxt->inSubset);
3292
0
}
3293
3294
#ifdef LIBXML_VALID_ENABLED
3295
/**
3296
 * @since 2.14.0
3297
 *
3298
 * @param ctxt  parser context
3299
 * @returns the validation context.
3300
 */
3301
xmlValidCtxt *
3302
0
xmlCtxtGetValidCtxt(xmlParserCtxt *ctxt) {
3303
0
    if (ctxt == NULL)
3304
0
        return(NULL);
3305
3306
0
    return(&ctxt->vctxt);
3307
0
}
3308
#endif
3309
3310
/**
3311
 * Return user data.
3312
 *
3313
 * Return user data of a custom SAX parser or the parser context
3314
 * itself if unset.
3315
 *
3316
 * @since 2.15.0
3317
 *
3318
 * @param ctxt  parser context
3319
 * @returns the user data.
3320
 */
3321
void *
3322
0
xmlCtxtGetUserData(xmlParserCtxt *ctxt) {
3323
0
    if (ctxt == NULL)
3324
0
        return NULL;
3325
3326
0
    return ctxt->userData;
3327
0
}
3328
3329
/**
3330
 * Return the current node being parsed.
3331
 *
3332
 * This is only useful if the default SAX callbacks which build
3333
 * a document tree are intercepted. This mode of operation is
3334
 * fragile and discouraged.
3335
 *
3336
 * Returns the current element node, or the document node if no
3337
 * element was parsed yet.
3338
 *
3339
 * @since 2.15.0
3340
 *
3341
 * @param ctxt  parser context
3342
 * @returns the current node.
3343
 */
3344
xmlNode *
3345
0
xmlCtxtGetNode(xmlParserCtxt *ctxt) {
3346
0
    if (ctxt == NULL)
3347
0
        return NULL;
3348
3349
0
    if (ctxt->node != NULL)
3350
0
        return ctxt->node;
3351
0
    return (xmlNode *) ctxt->myDoc;
3352
0
}
3353
3354
/**
3355
 * Return data from the doctype declaration.
3356
 *
3357
 * Should only be used by SAX callbacks.
3358
 *
3359
 * @since 2.15.0
3360
 *
3361
 * @param ctxt  parser context
3362
 * @param name  name of the root element (output)
3363
 * @param systemId  system ID (URI) of the external subset (output)
3364
 * @param publicId  public ID of the external subset (output)
3365
 * @returns 0 on success, -1 if argument is invalid
3366
 */
3367
int
3368
xmlCtxtGetDocTypeDecl(xmlParserCtxt *ctxt,
3369
                      const xmlChar **name,
3370
                      const xmlChar **systemId,
3371
0
                      const xmlChar **publicId) {
3372
0
    if (ctxt == NULL)
3373
0
        return -1;
3374
3375
0
    if (name != NULL)
3376
0
        *name = ctxt->intSubName;
3377
0
    if (systemId != NULL)
3378
0
        *systemId = ctxt->extSubURI;
3379
0
    if (publicId != NULL)
3380
0
        *publicId = ctxt->extSubSystem; /* The member is misnamed */
3381
3382
0
    return 0;
3383
0
}
3384
3385
/**
3386
 * Return input position.
3387
 *
3388
 * Should only be used by error handlers or SAX callbacks.
3389
 *
3390
 * Because of entities, there can be multiple inputs. Non-negative
3391
 * values of `inputIndex` (0, 1, 2, ...)  select inputs starting
3392
 * from the outermost input. Negative values (-1, -2, ...) select
3393
 * inputs starting from the innermost input.
3394
 *
3395
 * The byte position is counted in possibly decoded UTF-8 bytes,
3396
 * so it won't match the position in the raw input data.
3397
 *
3398
 * @since 2.15.0
3399
 *
3400
 * @param ctxt  parser context
3401
 * @param inputIndex  input index
3402
 * @param filename  filename (output)
3403
 * @param line  line number (output)
3404
 * @param col  column number (output)
3405
 * @param utf8BytePos  byte position (output)
3406
 * @returns 0 on success, -1 if arguments are invalid
3407
 */
3408
int
3409
xmlCtxtGetInputPosition(xmlParserCtxt *ctxt, int inputIndex,
3410
                        const char **filename, int *line, int *col,
3411
0
                        unsigned long *utf8BytePos) {
3412
0
    xmlParserInput *input;
3413
3414
0
    if (ctxt == NULL)
3415
0
        return -1;
3416
3417
0
    if (inputIndex < 0) {
3418
0
        inputIndex += ctxt->inputNr;
3419
0
        if (inputIndex < 0)
3420
0
            return -1;
3421
0
    }
3422
0
    if (inputIndex >= ctxt->inputNr)
3423
0
        return -1;
3424
3425
0
    input = ctxt->inputTab[inputIndex];
3426
3427
0
    if (filename != NULL)
3428
0
        *filename = input->filename;
3429
0
    if (line != NULL)
3430
0
        *line = input->line;
3431
0
    if (col != NULL)
3432
0
        *col = input->col;
3433
3434
0
    if (utf8BytePos != NULL) {
3435
0
        unsigned long consumed;
3436
3437
0
        consumed = input->consumed;
3438
0
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
3439
0
        *utf8BytePos = consumed;
3440
0
    }
3441
3442
0
    return 0;
3443
0
}
3444
3445
/**
3446
 * Return window into input data.
3447
 *
3448
 * Should only be used by error handlers or SAX callbacks.
3449
 * The returned pointer is only valid until the callback returns.
3450
 *
3451
 * Because of entities, there can be multiple inputs. Non-negative
3452
 * values of `inputIndex` (0, 1, 2, ...)  select inputs starting
3453
 * from the outermost input. Negative values (-1, -2, ...) select
3454
 * inputs starting from the innermost input.
3455
 *
3456
 * @since 2.15.0
3457
 *
3458
 * @param ctxt  parser context
3459
 * @param inputIndex  input index
3460
 * @param startOut  start of window (output)
3461
 * @param sizeInOut  maximum size of window (in)
3462
 *                   actual size of window (out)
3463
 * @param offsetOut  offset of current position inside
3464
 *                   window (out)
3465
 * @returns 0 on success, -1 if arguments are invalid
3466
 */
3467
int
3468
xmlCtxtGetInputWindow(xmlParserCtxt *ctxt, int inputIndex,
3469
                      const xmlChar **startOut,
3470
0
                      int *sizeInOut, int *offsetOut) {
3471
0
    xmlParserInput *input;
3472
3473
0
    if (ctxt == NULL || startOut == NULL || sizeInOut == NULL ||
3474
0
        offsetOut == NULL)
3475
0
        return -1;
3476
3477
0
    if (inputIndex < 0) {
3478
0
        inputIndex += ctxt->inputNr;
3479
0
        if (inputIndex < 0)
3480
0
            return -1;
3481
0
    }
3482
0
    if (inputIndex >= ctxt->inputNr)
3483
0
        return -1;
3484
3485
0
    input = ctxt->inputTab[inputIndex];
3486
3487
0
    xmlParserInputGetWindow(input, startOut, sizeInOut, offsetOut);
3488
3489
0
    return 0;
3490
0
}
3491
3492
/************************************************************************
3493
 *                  *
3494
 *    Handling of node information        *
3495
 *                  *
3496
 ************************************************************************/
3497
3498
/**
3499
 * Same as #xmlCtxtReset
3500
 *
3501
 * @deprecated Use #xmlCtxtReset
3502
 *
3503
 * @param ctxt  an XML parser context
3504
 */
3505
void
3506
xmlClearParserCtxt(xmlParserCtxt *ctxt)
3507
0
{
3508
0
    xmlCtxtReset(ctxt);
3509
0
}
3510
3511
3512
/**
3513
 * Find the parser node info struct for a given node
3514
 *
3515
 * @deprecated Don't use.
3516
 *
3517
 * @param ctx  an XML parser context
3518
 * @param node  an XML node within the tree
3519
 * @returns an xmlParserNodeInfo block pointer or NULL
3520
 */
3521
const xmlParserNodeInfo *
3522
xmlParserFindNodeInfo(xmlParserCtxt *ctx, xmlNode *node)
3523
0
{
3524
0
    unsigned long pos;
3525
3526
0
    if ((ctx == NULL) || (node == NULL))
3527
0
        return (NULL);
3528
    /* Find position where node should be at */
3529
0
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
3530
0
    if (pos < ctx->node_seq.length
3531
0
        && ctx->node_seq.buffer[pos].node == node)
3532
0
        return &ctx->node_seq.buffer[pos];
3533
0
    else
3534
0
        return NULL;
3535
0
}
3536
3537
3538
/**
3539
 * Initialize (set to initial state) node info sequence
3540
 *
3541
 * @deprecated Don't use.
3542
 *
3543
 * @param seq  a node info sequence pointer
3544
 */
3545
void
3546
xmlInitNodeInfoSeq(xmlParserNodeInfoSeq *seq)
3547
43.5k
{
3548
43.5k
    if (seq == NULL)
3549
0
        return;
3550
43.5k
    seq->length = 0;
3551
43.5k
    seq->maximum = 0;
3552
43.5k
    seq->buffer = NULL;
3553
43.5k
}
3554
3555
/**
3556
 * Clear (release memory and reinitialize) node info sequence
3557
 *
3558
 * @deprecated Don't use.
3559
 *
3560
 * @param seq  a node info sequence pointer
3561
 */
3562
void
3563
xmlClearNodeInfoSeq(xmlParserNodeInfoSeq *seq)
3564
0
{
3565
0
    if (seq == NULL)
3566
0
        return;
3567
0
    if (seq->buffer != NULL)
3568
0
        xmlFree(seq->buffer);
3569
0
    xmlInitNodeInfoSeq(seq);
3570
0
}
3571
3572
/**
3573
 * Find the index that the info record for the given node is or
3574
 * should be at in a sorted sequence.
3575
 *
3576
 * @deprecated Don't use.
3577
 *
3578
 * @param seq  a node info sequence pointer
3579
 * @param node  an XML node pointer
3580
 * @returns a long indicating the position of the record
3581
 */
3582
unsigned long
3583
xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeq *seq,
3584
                           xmlNode *node)
3585
0
{
3586
0
    unsigned long upper, lower, middle;
3587
0
    int found = 0;
3588
3589
0
    if ((seq == NULL) || (node == NULL))
3590
0
        return ((unsigned long) -1);
3591
3592
    /* Do a binary search for the key */
3593
0
    lower = 1;
3594
0
    upper = seq->length;
3595
0
    middle = 0;
3596
0
    while (lower <= upper && !found) {
3597
0
        middle = lower + (upper - lower) / 2;
3598
0
        if (node == seq->buffer[middle - 1].node)
3599
0
            found = 1;
3600
0
        else if (node < seq->buffer[middle - 1].node)
3601
0
            upper = middle - 1;
3602
0
        else
3603
0
            lower = middle + 1;
3604
0
    }
3605
3606
    /* Return position */
3607
0
    if (middle == 0 || seq->buffer[middle - 1].node < node)
3608
0
        return middle;
3609
0
    else
3610
0
        return middle - 1;
3611
0
}
3612
3613
3614
/**
3615
 * Insert node info record into the sorted sequence
3616
 *
3617
 * @deprecated Don't use.
3618
 *
3619
 * @param ctxt  an XML parser context
3620
 * @param info  a node info sequence pointer
3621
 */
3622
void
3623
xmlParserAddNodeInfo(xmlParserCtxt *ctxt,
3624
                     xmlParserNodeInfo *info)
3625
0
{
3626
0
    unsigned long pos;
3627
3628
0
    if ((ctxt == NULL) || (info == NULL)) return;
3629
3630
    /* Find pos and check to see if node is already in the sequence */
3631
0
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
3632
0
                                     info->node);
3633
3634
0
    if ((pos < ctxt->node_seq.length) &&
3635
0
        (ctxt->node_seq.buffer != NULL) &&
3636
0
        (ctxt->node_seq.buffer[pos].node == info->node)) {
3637
0
        ctxt->node_seq.buffer[pos] = *info;
3638
0
    }
3639
3640
    /* Otherwise, we need to add new node to buffer */
3641
0
    else {
3642
0
        if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
3643
0
            xmlParserNodeInfo *tmp;
3644
0
            int newSize;
3645
3646
0
            newSize = xmlGrowCapacity(ctxt->node_seq.maximum, sizeof(tmp[0]),
3647
0
                                      4, XML_MAX_ITEMS);
3648
0
            if (newSize < 0) {
3649
0
    xmlCtxtErrMemory(ctxt);
3650
0
                return;
3651
0
            }
3652
0
            tmp = xmlRealloc(ctxt->node_seq.buffer, newSize * sizeof(tmp[0]));
3653
0
            if (tmp == NULL) {
3654
0
    xmlCtxtErrMemory(ctxt);
3655
0
                return;
3656
0
            }
3657
0
            ctxt->node_seq.buffer = tmp;
3658
0
            ctxt->node_seq.maximum = newSize;
3659
0
        }
3660
3661
        /* If position is not at end, move elements out of the way */
3662
0
        if (pos != ctxt->node_seq.length) {
3663
0
            unsigned long i;
3664
3665
0
            for (i = ctxt->node_seq.length; i > pos; i--)
3666
0
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
3667
0
        }
3668
3669
        /* Copy element and increase length */
3670
0
        ctxt->node_seq.buffer[pos] = *info;
3671
0
        ctxt->node_seq.length++;
3672
0
    }
3673
0
}
3674
3675
/************************************************************************
3676
 *                  *
3677
 *    Defaults settings         *
3678
 *                  *
3679
 ************************************************************************/
3680
/**
3681
 * Set and return the previous value for enabling pedantic warnings.
3682
 *
3683
 * @deprecated Use the modern options API with XML_PARSE_PEDANTIC.
3684
 *
3685
 * @param val  int 0 or 1
3686
 * @returns the last value for 0 for no substitution, 1 for substitution.
3687
 */
3688
3689
int
3690
0
xmlPedanticParserDefault(int val) {
3691
0
    int old = xmlPedanticParserDefaultValue;
3692
3693
0
    xmlPedanticParserDefaultValue = val;
3694
0
    return(old);
3695
0
}
3696
3697
/**
3698
 * Has no effect.
3699
 *
3700
 * @deprecated Line numbers are always enabled.
3701
 *
3702
 * @param val  int 0 or 1
3703
 * @returns 1
3704
 */
3705
3706
int
3707
0
xmlLineNumbersDefault(int val ATTRIBUTE_UNUSED) {
3708
0
    return(1);
3709
0
}
3710
3711
/**
3712
 * Set and return the previous value for default entity support.
3713
 *
3714
 * @deprecated Use the modern options API with XML_PARSE_NOENT.
3715
 *
3716
 * @param val  int 0 or 1
3717
 * @returns the last value for 0 for no substitution, 1 for substitution.
3718
 */
3719
3720
int
3721
0
xmlSubstituteEntitiesDefault(int val) {
3722
0
    int old = xmlSubstituteEntitiesDefaultValue;
3723
3724
0
    xmlSubstituteEntitiesDefaultValue = val;
3725
0
    return(old);
3726
0
}
3727
3728
/**
3729
 * Set and return the previous value for default blanks text nodes support.
3730
 *
3731
 * @deprecated Use the modern options API with XML_PARSE_NOBLANKS.
3732
 *
3733
 * @param val  int 0 or 1
3734
 * @returns the last value for 0 for no substitution, 1 for substitution.
3735
 */
3736
3737
int
3738
0
xmlKeepBlanksDefault(int val) {
3739
0
    int old = xmlKeepBlanksDefaultValue;
3740
3741
0
    xmlKeepBlanksDefaultValue = val;
3742
0
#ifdef LIBXML_OUTPUT_ENABLED
3743
0
    if (!val)
3744
0
        xmlIndentTreeOutput = 1;
3745
0
#endif
3746
0
    return(old);
3747
0
}
3748