Coverage Report

Created: 2025-07-18 06:31

/src/libxml2/parserInternals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * daniel@veillard.com
8
 */
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22
23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/entities.h>
28
#include <libxml/xmlerror.h>
29
#include <libxml/encoding.h>
30
#include <libxml/xmlIO.h>
31
#include <libxml/uri.h>
32
#include <libxml/dict.h>
33
#include <libxml/xmlsave.h>
34
#ifdef LIBXML_CATALOG_ENABLED
35
#include <libxml/catalog.h>
36
#endif
37
#include <libxml/chvalid.h>
38
#include <libxml/nanohttp.h>
39
40
#define CUR(ctxt) ctxt->input->cur
41
#define END(ctxt) ctxt->input->end
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
#include "private/io.h"
47
#include "private/memory.h"
48
#include "private/parser.h"
49
50
#ifndef SIZE_MAX
51
  #define SIZE_MAX ((size_t) -1)
52
#endif
53
54
60.6M
#define XML_MAX_ERRORS 100
55
56
/*
57
 * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
58
 * factor of serialized output after entity expansion.
59
 */
60
1.02M
#define XML_MAX_AMPLIFICATION_DEFAULT 5
61
62
/*
63
 * Various global defaults for parsing
64
 */
65
66
/**
67
 * xmlCheckVersion:
68
 * @version: the include version number
69
 *
70
 * check the compiled lib version against the include one.
71
 */
72
void
73
0
xmlCheckVersion(int version) {
74
0
    int myversion = LIBXML_VERSION;
75
76
0
    xmlInitParser();
77
78
0
    if ((myversion / 10000) != (version / 10000)) {
79
0
  xmlPrintErrorMessage(
80
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
81
0
    (version / 10000), (myversion / 10000));
82
0
    } else if ((myversion / 100) < (version / 100)) {
83
0
  xmlPrintErrorMessage(
84
0
    "Warning: program compiled against libxml %d using older %d\n",
85
0
    (version / 100), (myversion / 100));
86
0
    }
87
0
}
88
89
90
/************************************************************************
91
 *                  *
92
 *    Some factorized error routines        *
93
 *                  *
94
 ************************************************************************/
95
96
97
/**
98
 * xmlCtxtSetErrorHandler:
99
 * @ctxt:  an XML parser context
100
 * @handler:  error handler
101
 * @data:  data for error handler
102
 *
103
 * Register a callback function that will be called on errors and
104
 * warnings. If handler is NULL, the error handler will be deactivated.
105
 *
106
 * This is the recommended way to collect errors from the parser and
107
 * takes precedence over all other error reporting mechanisms.
108
 * These are (in order of precedence):
109
 *
110
 * - per-context structured handler (xmlCtxtSetErrorHandler)
111
 * - per-context structured "serror" SAX handler
112
 * - global structured handler (xmlSetStructuredErrorFunc)
113
 * - per-context generic "error" and "warning" SAX handlers
114
 * - global generic handler (xmlSetGenericErrorFunc)
115
 * - print to stderr
116
 *
117
 * Available since 2.13.0.
118
 */
119
void
120
xmlCtxtSetErrorHandler(xmlParserCtxtPtr ctxt, xmlStructuredErrorFunc handler,
121
                       void *data)
122
0
{
123
0
    if (ctxt == NULL)
124
0
        return;
125
0
    ctxt->errorHandler = handler;
126
0
    ctxt->errorCtxt = data;
127
0
}
128
129
/**
130
 * xmlCtxtGetLastError:
131
 * @ctx:  an XML parser context
132
 *
133
 * Get the last parsing error registered.
134
 *
135
 * Returns NULL if no error occurred or a pointer to the error
136
 */
137
const xmlError *
138
xmlCtxtGetLastError(void *ctx)
139
0
{
140
0
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
141
142
0
    if (ctxt == NULL)
143
0
        return (NULL);
144
0
    if (ctxt->lastError.code == XML_ERR_OK)
145
0
        return (NULL);
146
0
    return (&ctxt->lastError);
147
0
}
148
149
/**
150
 * xmlCtxtResetLastError:
151
 * @ctx:  an XML parser context
152
 *
153
 * Cleanup the last global error registered. For parsing error
154
 * this does not change the well-formedness result.
155
 */
156
void
157
xmlCtxtResetLastError(void *ctx)
158
0
{
159
0
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
160
161
0
    if (ctxt == NULL)
162
0
        return;
163
0
    ctxt->errNo = XML_ERR_OK;
164
0
    if (ctxt->lastError.code == XML_ERR_OK)
165
0
        return;
166
0
    xmlResetError(&ctxt->lastError);
167
0
}
168
169
/**
170
 * xmlCtxtErrMemory:
171
 * @ctxt:  an XML parser context
172
 *
173
 * Handle an out-of-memory error.
174
 *
175
 * Available since 2.13.0.
176
 */
177
void
178
xmlCtxtErrMemory(xmlParserCtxtPtr ctxt)
179
4.45k
{
180
4.45k
    xmlStructuredErrorFunc schannel = NULL;
181
4.45k
    xmlGenericErrorFunc channel = NULL;
182
4.45k
    void *data;
183
184
4.45k
    if (ctxt == NULL) {
185
0
        xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_PARSER, NULL);
186
0
        return;
187
0
    }
188
189
4.45k
    ctxt->errNo = XML_ERR_NO_MEMORY;
190
4.45k
    ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
191
4.45k
    ctxt->wellFormed = 0;
192
4.45k
    ctxt->disableSAX = 2;
193
194
4.45k
    if (ctxt->errorHandler) {
195
0
        schannel = ctxt->errorHandler;
196
0
        data = ctxt->errorCtxt;
197
4.45k
    } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
198
4.45k
        (ctxt->sax->serror != NULL)) {
199
0
        schannel = ctxt->sax->serror;
200
0
        data = ctxt->userData;
201
4.45k
    } else {
202
4.45k
        channel = ctxt->sax->error;
203
4.45k
        data = ctxt->userData;
204
4.45k
    }
205
206
4.45k
    xmlRaiseMemoryError(schannel, channel, data, XML_FROM_PARSER,
207
4.45k
                        &ctxt->lastError);
208
4.45k
}
209
210
/**
211
 * xmlCtxtErrIO:
212
 * @ctxt:  parser context
213
 * @code:  xmlParserErrors code
214
 * @uri:  filename or URI (optional)
215
 *
216
 * If filename is empty, use the one from context input if available.
217
 *
218
 * Report an IO error to the parser context.
219
 */
220
void
221
xmlCtxtErrIO(xmlParserCtxtPtr ctxt, int code, const char *uri)
222
106k
{
223
106k
    const char *errstr, *msg, *str1, *str2;
224
106k
    xmlErrorLevel level;
225
226
106k
    if (ctxt == NULL)
227
0
        return;
228
229
106k
    if (((code == XML_IO_ENOENT) ||
230
106k
         (code == XML_IO_UNKNOWN))) {
231
        /*
232
         * Only report a warning if a file could not be found. This should
233
         * only be done for external entities, but the external entity loader
234
         * of xsltproc can try multiple paths and assumes that ENOENT doesn't
235
         * raise an error and aborts parsing.
236
         */
237
0
        if (ctxt->validate == 0)
238
0
            level = XML_ERR_WARNING;
239
0
        else
240
0
            level = XML_ERR_ERROR;
241
106k
    } else if (code == XML_IO_NETWORK_ATTEMPT) {
242
0
        level = XML_ERR_ERROR;
243
106k
    } else {
244
106k
        level = XML_ERR_FATAL;
245
106k
    }
246
247
106k
    errstr = xmlErrString(code);
248
249
106k
    if (uri == NULL) {
250
106k
        msg = "%s\n";
251
106k
        str1 = errstr;
252
106k
        str2 = NULL;
253
106k
    } else {
254
0
        msg = "failed to load \"%s\": %s\n";
255
0
        str1 = uri;
256
0
        str2 = errstr;
257
0
    }
258
259
106k
    xmlCtxtErr(ctxt, NULL, XML_FROM_IO, code, level,
260
106k
               (const xmlChar *) uri, NULL, NULL, 0,
261
106k
               msg, str1, str2);
262
106k
}
263
264
/**
265
 * xmlCtxtIsCatastrophicError:
266
 * @ctxt:  parser context
267
 *
268
 * Returns true if the last error is catastrophic.
269
 */
270
int
271
63.6M
xmlCtxtIsCatastrophicError(xmlParserCtxtPtr ctxt) {
272
63.6M
    if (ctxt == NULL)
273
0
        return(1);
274
275
63.6M
    return(xmlIsCatastrophicError(ctxt->lastError.level,
276
63.6M
                                  ctxt->lastError.code));
277
63.6M
}
278
279
/**
280
 * xmlCtxtVErr:
281
 * @ctxt:  a parser context
282
 * @node: the current node or NULL
283
 * @domain: the domain for the error
284
 * @code: the code for the error
285
 * @level: the xmlErrorLevel for the error
286
 * @str1: extra string info
287
 * @str2: extra string info
288
 * @str3: extra string info
289
 * @int1: extra int info
290
 * @msg:  the message to display/transmit
291
 * @ap:  extra parameters for the message display
292
 *
293
 * Raise a parser error.
294
 */
295
void
296
xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
297
            xmlParserErrors code, xmlErrorLevel level,
298
            const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
299
            int int1, const char *msg, va_list ap)
300
60.6M
{
301
60.6M
    xmlStructuredErrorFunc schannel = NULL;
302
60.6M
    xmlGenericErrorFunc channel = NULL;
303
60.6M
    void *data = NULL;
304
60.6M
    const char *file = NULL;
305
60.6M
    int line = 0;
306
60.6M
    int col = 0;
307
60.6M
    int res;
308
309
60.6M
    if (code == XML_ERR_NO_MEMORY) {
310
51
        xmlCtxtErrMemory(ctxt);
311
51
        return;
312
51
    }
313
314
60.6M
    if (ctxt == NULL) {
315
0
        res = xmlVRaiseError(NULL, NULL, NULL, NULL, node, domain, code,
316
0
                             level, NULL, 0, (const char *) str1,
317
0
                             (const char *) str2, (const char *) str3,
318
0
                             int1, 0, msg, ap);
319
0
        if (res < 0)
320
0
            xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_PARSER, NULL);
321
322
0
        return;
323
0
    }
324
325
60.6M
    if (PARSER_STOPPED(ctxt))
326
16.8k
  return;
327
328
    /* Don't overwrite catastrophic errors */
329
60.6M
    if (xmlCtxtIsCatastrophicError(ctxt))
330
0
        return;
331
332
60.6M
    if (level == XML_ERR_WARNING) {
333
83.9k
        if (ctxt->nbWarnings >= XML_MAX_ERRORS)
334
307
            return;
335
83.6k
        ctxt->nbWarnings += 1;
336
60.5M
    } else {
337
        /* Report at least one fatal error. */
338
60.5M
        if ((ctxt->nbErrors >= XML_MAX_ERRORS) &&
339
60.5M
            ((level < XML_ERR_FATAL) || (ctxt->wellFormed == 0)) &&
340
60.5M
            (!xmlIsCatastrophicError(level, code)))
341
57.0M
            return;
342
3.45M
        ctxt->nbErrors += 1;
343
3.45M
    }
344
345
3.53M
    if (((ctxt->options & XML_PARSE_NOERROR) == 0) &&
346
3.53M
        ((level != XML_ERR_WARNING) ||
347
3.53M
         ((ctxt->options & XML_PARSE_NOWARNING) == 0))) {
348
3.53M
        if (ctxt->errorHandler) {
349
0
            schannel = ctxt->errorHandler;
350
0
            data = ctxt->errorCtxt;
351
3.53M
        } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
352
3.53M
            (ctxt->sax->serror != NULL)) {
353
0
            schannel = ctxt->sax->serror;
354
0
            data = ctxt->userData;
355
3.53M
        } else if ((domain == XML_FROM_VALID) || (domain == XML_FROM_DTD)) {
356
13.0k
            if (level == XML_ERR_WARNING)
357
0
                channel = ctxt->vctxt.warning;
358
13.0k
            else
359
13.0k
                channel = ctxt->vctxt.error;
360
13.0k
            data = ctxt->vctxt.userData;
361
3.52M
        } else {
362
3.52M
            if (level == XML_ERR_WARNING)
363
83.6k
                channel = ctxt->sax->warning;
364
3.43M
            else
365
3.43M
                channel = ctxt->sax->error;
366
3.52M
            data = ctxt->userData;
367
3.52M
        }
368
3.53M
    }
369
370
3.53M
    if (ctxt->input != NULL) {
371
3.53M
        xmlParserInputPtr input = ctxt->input;
372
373
3.53M
        if ((input->filename == NULL) &&
374
3.53M
            (ctxt->inputNr > 1)) {
375
274k
            input = ctxt->inputTab[ctxt->inputNr - 2];
376
274k
        }
377
3.53M
        file = input->filename;
378
3.53M
        line = input->line;
379
3.53M
        col = input->col;
380
3.53M
    }
381
382
3.53M
    res = xmlVRaiseError(schannel, channel, data, ctxt, node, domain, code,
383
3.53M
                         level, file, line, (const char *) str1,
384
3.53M
                         (const char *) str2, (const char *) str3, int1, col,
385
3.53M
                         msg, ap);
386
387
3.53M
    if (res < 0) {
388
408
        xmlCtxtErrMemory(ctxt);
389
408
        return;
390
408
    }
391
392
3.53M
    if (level >= XML_ERR_ERROR)
393
3.45M
        ctxt->errNo = code;
394
3.53M
    if (level == XML_ERR_FATAL) {
395
3.00M
        ctxt->wellFormed = 0;
396
397
3.00M
        if (xmlCtxtIsCatastrophicError(ctxt))
398
0
            ctxt->disableSAX = 2; /* stop parser */
399
3.00M
        else if (ctxt->recovery == 0)
400
3.00M
            ctxt->disableSAX = 1;
401
3.00M
    }
402
3.53M
}
403
404
/**
405
 * xmlCtxtErr:
406
 * @ctxt:  a parser context
407
 * @node: the current node or NULL
408
 * @domain: the domain for the error
409
 * @code: the code for the error
410
 * @level: the xmlErrorLevel for the error
411
 * @str1: extra string info
412
 * @str2: extra string info
413
 * @str3: extra string info
414
 * @int1: extra int info
415
 * @msg:  the message to display/transmit
416
 * @...:  extra parameters for the message display
417
 *
418
 * Raise a parser error.
419
 */
420
void
421
xmlCtxtErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
422
           xmlParserErrors code, xmlErrorLevel level,
423
           const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
424
           int int1, const char *msg, ...)
425
60.6M
{
426
60.6M
    va_list ap;
427
428
60.6M
    va_start(ap, msg);
429
60.6M
    xmlCtxtVErr(ctxt, node, domain, code, level,
430
60.6M
                str1, str2, str3, int1, msg, ap);
431
60.6M
    va_end(ap);
432
60.6M
}
433
434
/**
435
 * xmlCtxtGetStatus:
436
 * @ctxt:  an XML parser context
437
 *
438
 * Get well-formedness and validation status after parsing. Also
439
 * reports catastrophic errors which are not related to parsing
440
 * like out-of-memory, I/O or other errors.
441
 *
442
 * Available since 2.14.0.
443
 *
444
 * Returns a bitmask of XML_STATUS_* flags ORed together.
445
 */
446
xmlParserStatus
447
0
xmlCtxtGetStatus(xmlParserCtxt *ctxt) {
448
0
    xmlParserStatus bits = 0;
449
450
0
    if (xmlCtxtIsCatastrophicError(ctxt)) {
451
0
        bits |= XML_STATUS_CATASTROPHIC_ERROR |
452
0
                XML_STATUS_NOT_WELL_FORMED |
453
0
                XML_STATUS_NOT_NS_WELL_FORMED;
454
0
        if ((ctxt != NULL) && (ctxt->validate))
455
0
            bits |= XML_STATUS_DTD_VALIDATION_FAILED;
456
457
0
        return(bits);
458
0
    }
459
460
0
    if (!ctxt->wellFormed)
461
0
        bits |= XML_STATUS_NOT_WELL_FORMED;
462
0
    if (!ctxt->nsWellFormed)
463
0
        bits |= XML_STATUS_NOT_NS_WELL_FORMED;
464
0
    if ((ctxt->validate) && (!ctxt->valid))
465
0
        bits |= XML_STATUS_DTD_VALIDATION_FAILED;
466
467
0
    return(bits);
468
0
}
469
470
/**
471
 * xmlFatalErr:
472
 * @ctxt:  an XML parser context
473
 * @code:  the error number
474
 * @info:  extra information string
475
 *
476
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
477
 */
478
void
479
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors code, const char *info)
480
6.23M
{
481
6.23M
    const char *errmsg;
482
483
6.23M
    errmsg = xmlErrString(code);
484
485
6.23M
    if (info == NULL) {
486
1.73M
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, XML_ERR_FATAL,
487
1.73M
                   NULL, NULL, NULL, 0, "%s\n", errmsg);
488
4.49M
    } else {
489
4.49M
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, XML_ERR_FATAL,
490
4.49M
                   (const xmlChar *) info, NULL, NULL, 0,
491
4.49M
                   "%s: %s\n", errmsg, info);
492
4.49M
    }
493
6.23M
}
494
495
/**
496
 * xmlIsLetter:
497
 * @c:  an unicode character (int)
498
 *
499
 * DEPRECATED: Internal function, don't use.
500
 *
501
 * Check whether the character is allowed by the production
502
 * [84] Letter ::= BaseChar | Ideographic
503
 *
504
 * Returns 0 if not, non-zero otherwise
505
 */
506
int
507
0
xmlIsLetter(int c) {
508
0
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
509
0
}
510
511
/************************************************************************
512
 *                  *
513
 *    Input handling functions for progressive parsing  *
514
 *                  *
515
 ************************************************************************/
516
517
/* we need to keep enough input to show errors in context */
518
39.3M
#define LINE_LEN        80
519
520
/**
521
 * xmlHaltParser:
522
 * @ctxt:  an XML parser context
523
 *
524
 * Blocks further parser processing don't override error
525
 * for internal use
526
 */
527
void
528
48.7k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
529
48.7k
    if (ctxt == NULL)
530
0
        return;
531
48.7k
    ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
532
48.7k
    ctxt->disableSAX = 2;
533
48.7k
}
534
535
/**
536
 * xmlParserInputRead:
537
 * @in:  an XML parser input
538
 * @len:  an indicative size for the lookahead
539
 *
540
 * DEPRECATED: This function was internal and is deprecated.
541
 *
542
 * Returns -1 as this is an error to use it.
543
 */
544
int
545
0
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
546
0
    return(-1);
547
0
}
548
549
/**
550
 * xmlParserGrow:
551
 * @ctxt:  an XML parser context
552
 *
553
 * Grow the input buffer.
554
 *
555
 * Returns the number of bytes read or -1 in case of error.
556
 */
557
int
558
59.5M
xmlParserGrow(xmlParserCtxtPtr ctxt) {
559
59.5M
    xmlParserInputPtr in = ctxt->input;
560
59.5M
    xmlParserInputBufferPtr buf = in->buf;
561
59.5M
    size_t curEnd = in->end - in->cur;
562
59.5M
    size_t curBase = in->cur - in->base;
563
59.5M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
564
0
                       XML_MAX_HUGE_LENGTH :
565
59.5M
                       XML_MAX_LOOKUP_LIMIT;
566
59.5M
    int ret;
567
568
59.5M
    if (buf == NULL)
569
0
        return(0);
570
    /* Don't grow push parser buffer. */
571
59.5M
    if (PARSER_PROGRESSIVE(ctxt))
572
0
        return(0);
573
    /* Don't grow memory buffers. */
574
59.5M
    if ((buf->encoder == NULL) && (buf->readcallback == NULL))
575
47.2M
        return(0);
576
12.3M
    if (buf->error != 0)
577
109k
        return(-1);
578
579
12.2M
    if (curBase > maxLength) {
580
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
581
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
582
0
        xmlHaltParser(ctxt);
583
0
  return(-1);
584
0
    }
585
586
12.2M
    if (curEnd >= INPUT_CHUNK)
587
31.1k
        return(0);
588
589
12.2M
    ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
590
12.2M
    xmlBufUpdateInput(buf->buffer, in, curBase);
591
592
12.2M
    if (ret < 0) {
593
409
        xmlCtxtErrIO(ctxt, buf->error, NULL);
594
409
    }
595
596
12.2M
    return(ret);
597
12.2M
}
598
599
/**
600
 * xmlParserCheckEOF:
601
 * @ctxt:  parser ctxt
602
 * @code:  error code
603
 *
604
 * Raises an error with @code if the input wasn't consumed
605
 * completely.
606
 */
607
void
608
714k
xmlParserCheckEOF(xmlParserCtxtPtr ctxt, xmlParserErrors code) {
609
714k
    xmlParserInputPtr in = ctxt->input;
610
714k
    xmlParserInputBufferPtr buf;
611
612
714k
    if (ctxt->errNo != XML_ERR_OK)
613
259k
        return;
614
615
454k
    if (in->cur < in->end) {
616
1.82k
        xmlFatalErr(ctxt, code, NULL);
617
1.82k
        return;
618
1.82k
    }
619
620
453k
    buf = in->buf;
621
453k
    if ((buf != NULL) && (buf->encoder != NULL)) {
622
37.3k
        size_t curBase = in->cur - in->base;
623
37.3k
        size_t sizeOut = 64;
624
37.3k
        xmlCharEncError ret;
625
626
        /*
627
         * Check for truncated multi-byte sequence
628
         */
629
37.3k
        ret = xmlCharEncInput(buf, &sizeOut, /* flush */ 1);
630
37.3k
        xmlBufUpdateInput(buf->buffer, in, curBase);
631
37.3k
        if (ret != XML_ENC_ERR_SUCCESS) {
632
2
            xmlCtxtErrIO(ctxt, buf->error, NULL);
633
2
            return;
634
2
        }
635
636
        /* Shouldn't happen */
637
37.3k
        if (in->cur < in->end)
638
0
            xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "expected EOF");
639
37.3k
    }
640
453k
}
641
642
/**
643
 * xmlParserInputGrow:
644
 * @in:  an XML parser input
645
 * @len:  an indicative size for the lookahead
646
 *
647
 * DEPRECATED: Don't use.
648
 *
649
 * This function increase the input for the parser. It tries to
650
 * preserve pointers to the input buffer, and keep already read data
651
 *
652
 * Returns the amount of char read, or -1 in case of error, 0 indicate the
653
 * end of this entity
654
 */
655
int
656
0
xmlParserInputGrow(xmlParserInputPtr in, int len) {
657
0
    int ret;
658
0
    size_t indx;
659
660
0
    if ((in == NULL) || (len < 0)) return(-1);
661
0
    if (in->buf == NULL) return(-1);
662
0
    if (in->base == NULL) return(-1);
663
0
    if (in->cur == NULL) return(-1);
664
0
    if (in->buf->buffer == NULL) return(-1);
665
666
    /* Don't grow memory buffers. */
667
0
    if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
668
0
        return(0);
669
670
0
    indx = in->cur - in->base;
671
0
    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
672
0
        return(0);
673
0
    }
674
0
    ret = xmlParserInputBufferGrow(in->buf, len);
675
676
0
    in->base = xmlBufContent(in->buf->buffer);
677
0
    if (in->base == NULL) {
678
0
        in->base = BAD_CAST "";
679
0
        in->cur = in->base;
680
0
        in->end = in->base;
681
0
        return(-1);
682
0
    }
683
0
    in->cur = in->base + indx;
684
0
    in->end = xmlBufEnd(in->buf->buffer);
685
686
0
    return(ret);
687
0
}
688
689
/**
690
 * xmlParserShrink:
691
 * @ctxt:  an XML parser context
692
 *
693
 * Shrink the input buffer.
694
 */
695
void
696
22.4M
xmlParserShrink(xmlParserCtxtPtr ctxt) {
697
22.4M
    xmlParserInputPtr in = ctxt->input;
698
22.4M
    xmlParserInputBufferPtr buf = in->buf;
699
22.4M
    size_t used, res;
700
701
22.4M
    if (buf == NULL)
702
0
        return;
703
704
22.4M
    used = in->cur - in->base;
705
706
22.4M
    if (used > LINE_LEN) {
707
16.9M
        res = xmlBufShrink(buf->buffer, used - LINE_LEN);
708
709
16.9M
        if (res > 0) {
710
16.9M
            used -= res;
711
16.9M
            if ((res > ULONG_MAX) ||
712
16.9M
                (in->consumed > ULONG_MAX - (unsigned long)res))
713
0
                in->consumed = ULONG_MAX;
714
16.9M
            else
715
16.9M
                in->consumed += res;
716
16.9M
        }
717
718
16.9M
        xmlBufUpdateInput(buf->buffer, in, used);
719
16.9M
    }
720
22.4M
}
721
722
/**
723
 * xmlParserInputShrink:
724
 * @in:  an XML parser input
725
 *
726
 * DEPRECATED: Don't use.
727
 *
728
 * This function removes used input for the parser.
729
 */
730
void
731
0
xmlParserInputShrink(xmlParserInputPtr in) {
732
0
    size_t used;
733
0
    size_t ret;
734
735
0
    if (in == NULL) return;
736
0
    if (in->buf == NULL) return;
737
0
    if (in->base == NULL) return;
738
0
    if (in->cur == NULL) return;
739
0
    if (in->buf->buffer == NULL) return;
740
741
0
    used = in->cur - in->base;
742
743
0
    if (used > LINE_LEN) {
744
0
  ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
745
0
  if (ret > 0) {
746
0
            used -= ret;
747
0
            if ((ret > ULONG_MAX) ||
748
0
                (in->consumed > ULONG_MAX - (unsigned long)ret))
749
0
                in->consumed = ULONG_MAX;
750
0
            else
751
0
                in->consumed += ret;
752
0
  }
753
754
0
        xmlBufUpdateInput(in->buf->buffer, in, used);
755
0
    }
756
0
}
757
758
/************************************************************************
759
 *                  *
760
 *    UTF8 character input and related functions    *
761
 *                  *
762
 ************************************************************************/
763
764
/**
765
 * xmlNextChar:
766
 * @ctxt:  the XML parser context
767
 *
768
 * DEPRECATED: Internal function, do not use.
769
 *
770
 * Skip to the next char input char.
771
 */
772
773
void
774
xmlNextChar(xmlParserCtxtPtr ctxt)
775
33.4M
{
776
33.4M
    const unsigned char *cur;
777
33.4M
    size_t avail;
778
33.4M
    int c;
779
780
33.4M
    if ((ctxt == NULL) || (ctxt->input == NULL))
781
0
        return;
782
783
33.4M
    avail = ctxt->input->end - ctxt->input->cur;
784
785
33.4M
    if (avail < INPUT_CHUNK) {
786
8.83M
        xmlParserGrow(ctxt);
787
8.83M
        if (ctxt->input->cur >= ctxt->input->end)
788
6.62k
            return;
789
8.83M
        avail = ctxt->input->end - ctxt->input->cur;
790
8.83M
    }
791
792
33.4M
    cur = ctxt->input->cur;
793
33.4M
    c = *cur;
794
795
33.4M
    if (c < 0x80) {
796
32.0M
        if (c == '\n') {
797
70.6k
            ctxt->input->cur++;
798
70.6k
            ctxt->input->line++;
799
70.6k
            ctxt->input->col = 1;
800
31.9M
        } else if (c == '\r') {
801
            /*
802
             *   2.11 End-of-Line Handling
803
             *   the literal two-character sequence "#xD#xA" or a standalone
804
             *   literal #xD, an XML processor must pass to the application
805
             *   the single character #xA.
806
             */
807
288k
            ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
808
288k
            ctxt->input->line++;
809
288k
            ctxt->input->col = 1;
810
288k
            return;
811
31.7M
        } else {
812
31.7M
            ctxt->input->cur++;
813
31.7M
            ctxt->input->col++;
814
31.7M
        }
815
32.0M
    } else {
816
1.39M
        ctxt->input->col++;
817
818
1.39M
        if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
819
217k
            goto encoding_error;
820
821
1.18M
        if (c < 0xe0) {
822
            /* 2-byte code */
823
858k
            if (c < 0xc2)
824
769k
                goto encoding_error;
825
89.0k
            ctxt->input->cur += 2;
826
323k
        } else {
827
323k
            unsigned int val = (c << 8) | cur[1];
828
829
323k
            if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
830
4.39k
                goto encoding_error;
831
832
318k
            if (c < 0xf0) {
833
                /* 3-byte code */
834
316k
                if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
835
525
                    goto encoding_error;
836
315k
                ctxt->input->cur += 3;
837
315k
            } else {
838
2.27k
                if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
839
653
                    goto encoding_error;
840
841
                /* 4-byte code */
842
1.62k
                if ((val < 0xf090) || (val >= 0xf490))
843
564
                    goto encoding_error;
844
1.05k
                ctxt->input->cur += 4;
845
1.05k
            }
846
318k
        }
847
1.18M
    }
848
849
32.1M
    return;
850
851
32.1M
encoding_error:
852
    /* Only report the first error */
853
993k
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
854
24.3k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
855
24.3k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
856
24.3k
    }
857
993k
    ctxt->input->cur++;
858
993k
}
859
860
/**
861
 * xmlCurrentChar:
862
 * @ctxt:  the XML parser context
863
 * @len:  pointer to the length of the char read
864
 *
865
 * DEPRECATED: Internal function, do not use.
866
 *
867
 * The current char value, if using UTF-8 this may actually span multiple
868
 * bytes in the input buffer. Implement the end of line normalization:
869
 * 2.11 End-of-Line Handling
870
 * Wherever an external parsed entity or the literal entity value
871
 * of an internal parsed entity contains either the literal two-character
872
 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
873
 * must pass to the application the single character #xA.
874
 * This behavior can conveniently be produced by normalizing all
875
 * line breaks to #xA on input, before parsing.)
876
 *
877
 * Returns the current char value and its length
878
 */
879
880
int
881
329M
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
882
329M
    const unsigned char *cur;
883
329M
    size_t avail;
884
329M
    int c;
885
886
329M
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
887
888
329M
    avail = ctxt->input->end - ctxt->input->cur;
889
890
329M
    if (avail < INPUT_CHUNK) {
891
12.4M
        xmlParserGrow(ctxt);
892
12.4M
        avail = ctxt->input->end - ctxt->input->cur;
893
12.4M
    }
894
895
329M
    cur = ctxt->input->cur;
896
329M
    c = *cur;
897
898
329M
    if (c < 0x80) {
899
  /* 1-byte code */
900
205M
        if (c < 0x20) {
901
            /*
902
             *   2.11 End-of-Line Handling
903
             *   the literal two-character sequence "#xD#xA" or a standalone
904
             *   literal #xD, an XML processor must pass to the application
905
             *   the single character #xA.
906
             */
907
66.0M
            if (c == '\r') {
908
                /*
909
                 * TODO: This function shouldn't change the 'cur' pointer
910
                 * as side effect, but the NEXTL macro in parser.c relies
911
                 * on this behavior when incrementing line numbers.
912
                 */
913
183k
                if (cur[1] == '\n')
914
67.0k
                    ctxt->input->cur++;
915
183k
                *len = 1;
916
183k
                c = '\n';
917
65.8M
            } else if (c == 0) {
918
4.53M
                if (ctxt->input->cur >= ctxt->input->end) {
919
47.9k
                    *len = 0;
920
4.48M
                } else {
921
4.48M
                    *len = 1;
922
                    /*
923
                     * TODO: Null bytes should be handled by callers,
924
                     * but this can be tricky.
925
                     */
926
4.48M
                    xmlFatalErr(ctxt, XML_ERR_INVALID_CHAR,
927
4.48M
                            "Char 0x0 out of allowed range\n");
928
4.48M
                }
929
61.3M
            } else {
930
61.3M
                *len = 1;
931
61.3M
            }
932
139M
        } else {
933
139M
            *len = 1;
934
139M
        }
935
936
205M
        return(c);
937
205M
    } else {
938
124M
        int val;
939
940
124M
        if (avail < 2)
941
9.19k
            goto incomplete_sequence;
942
124M
        if ((cur[1] & 0xc0) != 0x80)
943
8.52M
            goto encoding_error;
944
945
115M
        if (c < 0xe0) {
946
            /* 2-byte code */
947
75.4M
            if (c < 0xc2)
948
34.4M
                goto encoding_error;
949
40.9M
            val = (c & 0x1f) << 6;
950
40.9M
            val |= cur[1] & 0x3f;
951
40.9M
            *len = 2;
952
40.9M
        } else {
953
40.3M
            if (avail < 3)
954
476
                goto incomplete_sequence;
955
40.3M
            if ((cur[2] & 0xc0) != 0x80)
956
27.2k
                goto encoding_error;
957
958
40.3M
            if (c < 0xf0) {
959
                /* 3-byte code */
960
40.3M
                val = (c & 0xf) << 12;
961
40.3M
                val |= (cur[1] & 0x3f) << 6;
962
40.3M
                val |= cur[2] & 0x3f;
963
40.3M
                if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
964
1.76k
                    goto encoding_error;
965
40.3M
                *len = 3;
966
40.3M
            } else {
967
61.5k
                if (avail < 4)
968
256
                    goto incomplete_sequence;
969
61.3k
                if ((cur[3] & 0xc0) != 0x80)
970
5.60k
                    goto encoding_error;
971
972
                /* 4-byte code */
973
55.7k
                val = (c & 0x0f) << 18;
974
55.7k
                val |= (cur[1] & 0x3f) << 12;
975
55.7k
                val |= (cur[2] & 0x3f) << 6;
976
55.7k
                val |= cur[3] & 0x3f;
977
55.7k
                if ((val < 0x10000) || (val >= 0x110000))
978
9.67k
                    goto encoding_error;
979
46.0k
                *len = 4;
980
46.0k
            }
981
40.3M
        }
982
983
81.2M
        return(val);
984
115M
    }
985
986
43.0M
encoding_error:
987
    /* Only report the first error */
988
43.0M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
989
53.5k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
990
53.5k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
991
53.5k
    }
992
43.0M
    *len = 1;
993
43.0M
    return(XML_INVALID_CHAR);
994
995
9.92k
incomplete_sequence:
996
    /*
997
     * An encoding problem may arise from a truncated input buffer
998
     * splitting a character in the middle. In that case do not raise
999
     * an error but return 0. This should only happen when push parsing
1000
     * char data.
1001
     */
1002
9.92k
    *len = 0;
1003
9.92k
    return(0);
1004
329M
}
1005
1006
/**
1007
 * xmlStringCurrentChar:
1008
 * @ctxt:  the XML parser context
1009
 * @cur:  pointer to the beginning of the char
1010
 * @len:  pointer to the length of the char read
1011
 *
1012
 * DEPRECATED: Internal function, do not use.
1013
 *
1014
 * The current char value, if using UTF-8 this may actually span multiple
1015
 * bytes in the input buffer.
1016
 *
1017
 * Returns the current char value and its length
1018
 */
1019
1020
int
1021
xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
1022
7.30M
                     const xmlChar *cur, int *len) {
1023
7.30M
    int c;
1024
1025
7.30M
    if ((cur == NULL) || (len == NULL))
1026
0
        return(0);
1027
1028
    /* cur is zero-terminated, so we can lie about its length. */
1029
7.30M
    *len = 4;
1030
7.30M
    c = xmlGetUTF8Char(cur, len);
1031
1032
7.30M
    return((c < 0) ? 0 : c);
1033
7.30M
}
1034
1035
/**
1036
 * xmlCopyCharMultiByte:
1037
 * @out:  pointer to an array of xmlChar
1038
 * @val:  the char value
1039
 *
1040
 * DEPRECATED: Internal function, don't use.
1041
 *
1042
 * append the char value in the array
1043
 *
1044
 * Returns the number of xmlChar written
1045
 */
1046
int
1047
109M
xmlCopyCharMultiByte(xmlChar *out, int val) {
1048
109M
    if ((out == NULL) || (val < 0)) return(0);
1049
    /*
1050
     * We are supposed to handle UTF8, check it's valid
1051
     * From rfc2044: encoding of the Unicode values on UTF-8:
1052
     *
1053
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
1054
     * 0000 0000-0000 007F   0xxxxxxx
1055
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1056
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1057
     */
1058
109M
    if  (val >= 0x80) {
1059
109M
  xmlChar *savedout = out;
1060
109M
  int bits;
1061
109M
  if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
1062
86.8M
  else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
1063
21.8k
  else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
1064
0
  else {
1065
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1066
0
            xmlAbort("xmlCopyCharMultiByte: codepoint out of range\n");
1067
0
#endif
1068
0
      return(0);
1069
0
  }
1070
305M
  for ( ; bits >= 0; bits-= 6)
1071
196M
      *out++= ((val >> bits) & 0x3F) | 0x80 ;
1072
109M
  return (out - savedout);
1073
109M
    }
1074
0
    *out = val;
1075
0
    return 1;
1076
109M
}
1077
1078
/**
1079
 * xmlCopyChar:
1080
 * @len:  Ignored, compatibility
1081
 * @out:  pointer to an array of xmlChar
1082
 * @val:  the char value
1083
 *
1084
 * DEPRECATED: Don't use.
1085
 *
1086
 * append the char value in the array
1087
 *
1088
 * Returns the number of xmlChar written
1089
 */
1090
1091
int
1092
0
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
1093
0
    if ((out == NULL) || (val < 0)) return(0);
1094
    /* the len parameter is ignored */
1095
0
    if  (val >= 0x80) {
1096
0
  return(xmlCopyCharMultiByte (out, val));
1097
0
    }
1098
0
    *out = val;
1099
0
    return 1;
1100
0
}
1101
1102
/************************************************************************
1103
 *                  *
1104
 *    Commodity functions to switch encodings     *
1105
 *                  *
1106
 ************************************************************************/
1107
1108
/**
1109
 * xmlCtxtSetCharEncConvImpl:
1110
 * @ctxt:  parser context
1111
 * @impl:  callback
1112
 * @vctxt:  user data
1113
 *
1114
 * Installs a custom implementation to convert between character
1115
 * encodings.
1116
 *
1117
 * This bypasses legacy feature like global encoding handlers or
1118
 * encoding aliases.
1119
 *
1120
 * Available since 2.14.0.
1121
 */
1122
void
1123
xmlCtxtSetCharEncConvImpl(xmlParserCtxtPtr ctxt, xmlCharEncConvImpl impl,
1124
0
                          void *vctxt) {
1125
0
    if (ctxt == NULL)
1126
0
        return;
1127
1128
0
    ctxt->convImpl = impl;
1129
0
    ctxt->convCtxt = vctxt;
1130
0
}
1131
1132
static xmlParserErrors
1133
2.51k
xmlDetectEBCDIC(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr *hout) {
1134
2.51k
    xmlChar out[200];
1135
2.51k
    xmlParserInputPtr input = ctxt->input;
1136
2.51k
    xmlCharEncodingHandlerPtr handler;
1137
2.51k
    int inlen, outlen, i;
1138
2.51k
    xmlParserErrors code;
1139
2.51k
    xmlCharEncError res;
1140
1141
2.51k
    *hout = NULL;
1142
1143
    /*
1144
     * To detect the EBCDIC code page, we convert the first 200 bytes
1145
     * to IBM037 (EBCDIC-US) and try to find the encoding declaration.
1146
     */
1147
2.51k
    code = xmlCreateCharEncodingHandler("IBM037", XML_ENC_INPUT,
1148
2.51k
            ctxt->convImpl, ctxt->convCtxt, &handler);
1149
2.51k
    if (code != XML_ERR_OK)
1150
2
        return(code);
1151
2.51k
    outlen = sizeof(out) - 1;
1152
2.51k
    inlen = input->end - input->cur;
1153
2.51k
    res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen,
1154
2.51k
                           /* flush */ 0);
1155
    /*
1156
     * Return the EBCDIC handler if decoding failed. The error will
1157
     * be reported later.
1158
     */
1159
2.51k
    if (res < 0)
1160
232
        goto done;
1161
2.27k
    out[outlen] = 0;
1162
1163
55.8k
    for (i = 0; i < outlen; i++) {
1164
55.5k
        if (out[i] == '>')
1165
250
            break;
1166
55.2k
        if ((out[i] == 'e') &&
1167
55.2k
            (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1168
1.66k
            int start, cur, quote;
1169
1170
1.66k
            i += 8;
1171
1.66k
            while (IS_BLANK_CH(out[i]))
1172
721
                i += 1;
1173
1.66k
            if (out[i++] != '=')
1174
327
                break;
1175
1.33k
            while (IS_BLANK_CH(out[i]))
1176
1.84k
                i += 1;
1177
1.33k
            quote = out[i++];
1178
1.33k
            if ((quote != '\'') && (quote != '"'))
1179
398
                break;
1180
938
            start = i;
1181
938
            cur = out[i];
1182
4.86k
            while (((cur >= 'a') && (cur <= 'z')) ||
1183
4.86k
                   ((cur >= 'A') && (cur <= 'Z')) ||
1184
4.86k
                   ((cur >= '0') && (cur <= '9')) ||
1185
4.86k
                   (cur == '.') || (cur == '_') ||
1186
4.86k
                   (cur == '-'))
1187
3.92k
                cur = out[++i];
1188
938
            if (cur != quote)
1189
417
                break;
1190
521
            out[i] = 0;
1191
521
            xmlCharEncCloseFunc(handler);
1192
521
            code = xmlCreateCharEncodingHandler((char *) out + start,
1193
521
                    XML_ENC_INPUT, ctxt->convImpl, ctxt->convCtxt,
1194
521
                    &handler);
1195
521
            if (code != XML_ERR_OK)
1196
225
                return(code);
1197
296
            *hout = handler;
1198
296
            return(XML_ERR_OK);
1199
521
        }
1200
55.2k
    }
1201
1202
1.99k
done:
1203
    /*
1204
     * Encoding handlers are stateful, so we have to recreate them.
1205
     */
1206
1.99k
    xmlCharEncCloseFunc(handler);
1207
1.99k
    code = xmlCreateCharEncodingHandler("IBM037", XML_ENC_INPUT,
1208
1.99k
            ctxt->convImpl, ctxt->convCtxt, &handler);
1209
1.99k
    if (code != XML_ERR_OK)
1210
1
        return(code);
1211
1.98k
    *hout = handler;
1212
1.98k
    return(XML_ERR_OK);
1213
1.99k
}
1214
1215
/**
1216
 * xmlSwitchEncoding:
1217
 * @ctxt:  the parser context
1218
 * @enc:  the encoding value (number)
1219
 *
1220
 * Use encoding specified by enum to decode input data. This overrides
1221
 * the encoding found in the XML declaration.
1222
 *
1223
 * This function can also be used to override the encoding of chunks
1224
 * passed to xmlParseChunk.
1225
 *
1226
 * Returns 0 in case of success, -1 otherwise
1227
 */
1228
int
1229
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1230
14.4k
{
1231
14.4k
    xmlCharEncodingHandlerPtr handler = NULL;
1232
14.4k
    int ret;
1233
14.4k
    xmlParserErrors code;
1234
1235
14.4k
    if ((ctxt == NULL) || (ctxt->input == NULL))
1236
0
        return(-1);
1237
1238
14.4k
    code = xmlLookupCharEncodingHandler(enc, &handler);
1239
14.4k
    if (code != 0) {
1240
4
        xmlFatalErr(ctxt, code, NULL);
1241
4
        return(-1);
1242
4
    }
1243
1244
14.4k
    ret = xmlSwitchToEncoding(ctxt, handler);
1245
1246
14.4k
    if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1247
0
        ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1248
0
    }
1249
1250
14.4k
    return(ret);
1251
14.4k
}
1252
1253
/**
1254
 * xmlSwitchInputEncodingName:
1255
 * @ctxt:  the parser context
1256
 * @input:  the input strea,
1257
 * @encoding:  the encoding name
1258
 *
1259
 * Returns 0 in case of success, -1 otherwise
1260
 */
1261
static int
1262
xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1263
0
                           const char *encoding) {
1264
0
    xmlCharEncodingHandlerPtr handler;
1265
0
    xmlParserErrors res;
1266
1267
0
    if (encoding == NULL)
1268
0
        return(-1);
1269
1270
0
    res = xmlCreateCharEncodingHandler(encoding, XML_ENC_INPUT,
1271
0
            ctxt->convImpl, ctxt->convCtxt, &handler);
1272
0
    if (res == XML_ERR_UNSUPPORTED_ENCODING) {
1273
0
        xmlWarningMsg(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1274
0
                      "Unsupported encoding: %s\n", BAD_CAST encoding, NULL);
1275
0
        return(-1);
1276
0
    } else if (res != XML_ERR_OK) {
1277
0
        xmlFatalErr(ctxt, res, encoding);
1278
0
        return(-1);
1279
0
    }
1280
1281
0
    res  = xmlInputSetEncodingHandler(input, handler);
1282
0
    if (res != XML_ERR_OK) {
1283
0
        xmlCtxtErrIO(ctxt, res, NULL);
1284
0
        return(-1);
1285
0
    }
1286
1287
0
    return(0);
1288
0
}
1289
1290
/**
1291
 * xmlSwitchEncodingName:
1292
 * @ctxt:  the parser context
1293
 * @encoding:  the encoding name
1294
 *
1295
 * Use specified encoding to decode input data. This overrides the
1296
 * encoding found in the XML declaration.
1297
 *
1298
 * This function can also be used to override the encoding of chunks
1299
 * passed to xmlParseChunk.
1300
 *
1301
 * Available since 2.13.0.
1302
 *
1303
 * Returns 0 in case of success, -1 otherwise
1304
 */
1305
int
1306
0
xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding) {
1307
0
    if (ctxt == NULL)
1308
0
        return(-1);
1309
1310
0
    return(xmlSwitchInputEncodingName(ctxt, ctxt->input, encoding));
1311
0
}
1312
1313
/**
1314
 * xmlInputSetEncodingHandler:
1315
 * @input:  the input stream
1316
 * @handler:  the encoding handler
1317
 *
1318
 * Use encoding handler to decode input data.
1319
 *
1320
 * Closes the handler on error.
1321
 *
1322
 * Returns an xmlParserErrors code.
1323
 */
1324
xmlParserErrors
1325
xmlInputSetEncodingHandler(xmlParserInputPtr input,
1326
134k
                           xmlCharEncodingHandlerPtr handler) {
1327
134k
    xmlParserInputBufferPtr in;
1328
134k
    xmlBufPtr buf;
1329
134k
    xmlParserErrors code = XML_ERR_OK;
1330
1331
134k
    if ((input == NULL) || (input->buf == NULL)) {
1332
0
        xmlCharEncCloseFunc(handler);
1333
0
  return(XML_ERR_ARGUMENT);
1334
0
    }
1335
134k
    in = input->buf;
1336
1337
134k
    input->flags |= XML_INPUT_HAS_ENCODING;
1338
1339
    /*
1340
     * UTF-8 requires no encoding handler.
1341
     */
1342
134k
    if ((handler != NULL) &&
1343
134k
        (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1344
0
        xmlCharEncCloseFunc(handler);
1345
0
        handler = NULL;
1346
0
    }
1347
1348
134k
    if (in->encoder == handler)
1349
15.7k
        return(XML_ERR_OK);
1350
1351
118k
    if (in->encoder != NULL) {
1352
        /*
1353
         * Switching encodings during parsing is a really bad idea,
1354
         * but Chromium can switch between ISO-8859-1 and UTF-16 before
1355
         * separate calls to xmlParseChunk.
1356
         *
1357
         * TODO: We should check whether the "raw" input buffer is empty and
1358
         * convert the old content using the old encoder.
1359
         */
1360
1361
0
        xmlCharEncCloseFunc(in->encoder);
1362
0
        in->encoder = handler;
1363
0
        return(XML_ERR_OK);
1364
0
    }
1365
1366
118k
    buf = xmlBufCreate(XML_IO_BUFFER_SIZE);
1367
118k
    if (buf == NULL) {
1368
15
        xmlCharEncCloseFunc(handler);
1369
15
        return(XML_ERR_NO_MEMORY);
1370
15
    }
1371
1372
118k
    in->encoder = handler;
1373
118k
    in->raw = in->buffer;
1374
118k
    in->buffer = buf;
1375
1376
    /*
1377
     * Is there already some content down the pipe to convert ?
1378
     */
1379
118k
    if (input->end > input->base) {
1380
118k
        size_t processed;
1381
118k
        size_t nbchars;
1382
118k
        xmlCharEncError res;
1383
1384
        /*
1385
         * Shrink the current input buffer.
1386
         * Move it as the raw buffer and create a new input buffer
1387
         */
1388
118k
        processed = input->cur - input->base;
1389
118k
        xmlBufShrink(in->raw, processed);
1390
118k
        input->consumed += processed;
1391
118k
        in->rawconsumed = processed;
1392
1393
        /*
1394
         * If we're push-parsing, we must convert the whole buffer.
1395
         *
1396
         * If we're pull-parsing, we could be parsing from a huge
1397
         * memory buffer which we don't want to convert completely.
1398
         */
1399
118k
        if (input->flags & XML_INPUT_PROGRESSIVE)
1400
0
            nbchars = SIZE_MAX;
1401
118k
        else
1402
118k
            nbchars = 4000 /* MINLEN */;
1403
118k
        res = xmlCharEncInput(in, &nbchars, /* flush */ 0);
1404
118k
        if (res != XML_ENC_ERR_SUCCESS)
1405
2.61k
            code = in->error;
1406
118k
    }
1407
1408
118k
    xmlBufResetInput(in->buffer, input);
1409
1410
118k
    return(code);
1411
118k
}
1412
1413
/**
1414
 * xmlSwitchInputEncoding:
1415
 * @ctxt:  the parser context, only for error reporting
1416
 * @input:  the input stream
1417
 * @handler:  the encoding handler
1418
 *
1419
 * DEPRECATED: Internal function, don't use.
1420
 *
1421
 * Use encoding handler to decode input data.
1422
 *
1423
 * Returns 0 in case of success, -1 otherwise
1424
 */
1425
int
1426
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1427
0
                       xmlCharEncodingHandlerPtr handler) {
1428
0
    xmlParserErrors code = xmlInputSetEncodingHandler(input, handler);
1429
1430
0
    if (code != XML_ERR_OK) {
1431
0
        xmlCtxtErrIO(ctxt, code, NULL);
1432
0
        return(-1);
1433
0
    }
1434
1435
0
    return(0);
1436
0
}
1437
1438
/**
1439
 * xmlSwitchToEncoding:
1440
 * @ctxt:  the parser context
1441
 * @handler:  the encoding handler
1442
 *
1443
 * Use encoding handler to decode input data.
1444
 *
1445
 * This function can be used to enforce the encoding of chunks passed
1446
 * to xmlParseChunk.
1447
 *
1448
 * Returns 0 in case of success, -1 otherwise
1449
 */
1450
int
1451
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1452
16.7k
{
1453
16.7k
    xmlParserErrors code;
1454
1455
16.7k
    if (ctxt == NULL)
1456
0
        return(-1);
1457
1458
16.7k
    code = xmlInputSetEncodingHandler(ctxt->input, handler);
1459
16.7k
    if (code != XML_ERR_OK) {
1460
1.66k
        xmlCtxtErrIO(ctxt, code, NULL);
1461
1.66k
        return(-1);
1462
1.66k
    }
1463
1464
15.0k
    return(0);
1465
16.7k
}
1466
1467
/**
1468
 * xmlDetectEncoding:
1469
 * @ctxt:  the parser context
1470
 *
1471
 * Handle optional BOM, detect and switch to encoding.
1472
 *
1473
 * Assumes that there are at least four bytes in the input buffer.
1474
 */
1475
void
1476
916k
xmlDetectEncoding(xmlParserCtxtPtr ctxt) {
1477
916k
    const xmlChar *in;
1478
916k
    xmlCharEncoding enc;
1479
916k
    int bomSize;
1480
916k
    int autoFlag = 0;
1481
1482
916k
    if (xmlParserGrow(ctxt) < 0)
1483
0
        return;
1484
916k
    in = ctxt->input->cur;
1485
916k
    if (ctxt->input->end - in < 4)
1486
1.26k
        return;
1487
1488
915k
    if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1489
        /*
1490
         * If the encoding was already set, only skip the BOM which was
1491
         * possibly decoded to UTF-8.
1492
         */
1493
0
        if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1494
0
            ctxt->input->cur += 3;
1495
0
        }
1496
1497
0
        return;
1498
0
    }
1499
1500
915k
    enc = XML_CHAR_ENCODING_NONE;
1501
915k
    bomSize = 0;
1502
1503
    /*
1504
     * BOM sniffing and detection of initial bytes of an XML
1505
     * declaration.
1506
     *
1507
     * The HTML5 spec doesn't cover UTF-32 (UCS-4) or EBCDIC.
1508
     */
1509
915k
    switch (in[0]) {
1510
3.17k
        case 0x00:
1511
3.17k
            if ((!ctxt->html) &&
1512
3.17k
                (in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1513
229
                enc = XML_CHAR_ENCODING_UCS4BE;
1514
229
                autoFlag = XML_INPUT_AUTO_OTHER;
1515
2.94k
            } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1516
                /*
1517
                 * TODO: The HTML5 spec requires to check that the
1518
                 * next codepoint is an 'x'.
1519
                 */
1520
1.05k
                enc = XML_CHAR_ENCODING_UTF16BE;
1521
1.05k
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1522
1.05k
            }
1523
3.17k
            break;
1524
1525
882k
        case 0x3C:
1526
882k
            if (in[1] == 0x00) {
1527
2.69k
                if ((!ctxt->html) &&
1528
2.69k
                    (in[2] == 0x00) && (in[3] == 0x00)) {
1529
535
                    enc = XML_CHAR_ENCODING_UCS4LE;
1530
535
                    autoFlag = XML_INPUT_AUTO_OTHER;
1531
2.15k
                } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1532
                    /*
1533
                     * TODO: The HTML5 spec requires to check that the
1534
                     * next codepoint is an 'x'.
1535
                     */
1536
1.32k
                    enc = XML_CHAR_ENCODING_UTF16LE;
1537
1.32k
                    autoFlag = XML_INPUT_AUTO_UTF16LE;
1538
1.32k
                }
1539
2.69k
            }
1540
882k
            break;
1541
1542
3.27k
        case 0x4C:
1543
3.27k
      if ((!ctxt->html) &&
1544
3.27k
                (in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1545
2.51k
          enc = XML_CHAR_ENCODING_EBCDIC;
1546
2.51k
                autoFlag = XML_INPUT_AUTO_OTHER;
1547
2.51k
            }
1548
3.27k
            break;
1549
1550
7.34k
        case 0xEF:
1551
7.34k
            if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1552
6.84k
                enc = XML_CHAR_ENCODING_UTF8;
1553
6.84k
                autoFlag = XML_INPUT_AUTO_UTF8;
1554
6.84k
                bomSize = 3;
1555
6.84k
            }
1556
7.34k
            break;
1557
1558
4.18k
        case 0xFE:
1559
4.18k
            if (in[1] == 0xFF) {
1560
3.94k
                enc = XML_CHAR_ENCODING_UTF16BE;
1561
3.94k
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1562
3.94k
                bomSize = 2;
1563
3.94k
            }
1564
4.18k
            break;
1565
1566
775
        case 0xFF:
1567
775
            if (in[1] == 0xFE) {
1568
502
                enc = XML_CHAR_ENCODING_UTF16LE;
1569
502
                autoFlag = XML_INPUT_AUTO_UTF16LE;
1570
502
                bomSize = 2;
1571
502
            }
1572
775
            break;
1573
915k
    }
1574
1575
915k
    if (bomSize > 0) {
1576
11.2k
        ctxt->input->cur += bomSize;
1577
11.2k
    }
1578
1579
915k
    if (enc != XML_CHAR_ENCODING_NONE) {
1580
16.9k
        ctxt->input->flags |= autoFlag;
1581
1582
16.9k
        if (enc == XML_CHAR_ENCODING_EBCDIC) {
1583
2.51k
            xmlCharEncodingHandlerPtr handler;
1584
2.51k
            xmlParserErrors res;
1585
1586
2.51k
            res = xmlDetectEBCDIC(ctxt, &handler);
1587
2.51k
            if (res != XML_ERR_OK) {
1588
228
                xmlFatalErr(ctxt, res, "detecting EBCDIC\n");
1589
2.28k
            } else {
1590
2.28k
                xmlSwitchToEncoding(ctxt, handler);
1591
2.28k
            }
1592
14.4k
        } else {
1593
14.4k
            xmlSwitchEncoding(ctxt, enc);
1594
14.4k
        }
1595
16.9k
    }
1596
915k
}
1597
1598
/**
1599
 * xmlSetDeclaredEncoding:
1600
 * @ctxt:  the parser context
1601
 * @encoding:  declared encoding
1602
 *
1603
 * Set the encoding from a declaration in the document.
1604
 *
1605
 * If no encoding was set yet, switch the encoding. Otherwise, only warn
1606
 * about encoding mismatches.
1607
 *
1608
 * Takes ownership of 'encoding'.
1609
 */
1610
void
1611
124k
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
1612
124k
    if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1613
124k
        ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1614
123k
        xmlCharEncodingHandlerPtr handler;
1615
123k
        xmlParserErrors res;
1616
1617
        /*
1618
         * xmlSwitchEncodingName treats unsupported encodings as
1619
         * warnings, but we want it to be an error in an encoding
1620
         * declaration.
1621
         */
1622
123k
        res = xmlCreateCharEncodingHandler((const char *) encoding,
1623
123k
                XML_ENC_INPUT, ctxt->convImpl, ctxt->convCtxt, &handler);
1624
123k
        if (res != XML_ERR_OK) {
1625
5.94k
            xmlFatalErr(ctxt, res, (const char *) encoding);
1626
5.94k
            xmlFree(encoding);
1627
5.94k
            return;
1628
5.94k
        }
1629
1630
117k
        res  = xmlInputSetEncodingHandler(ctxt->input, handler);
1631
117k
        if (res != XML_ERR_OK) {
1632
966
            xmlCtxtErrIO(ctxt, res, NULL);
1633
966
            xmlFree(encoding);
1634
966
            return;
1635
966
        }
1636
1637
116k
        ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1638
116k
    } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1639
302
        static const char *allowedUTF8[] = {
1640
302
            "UTF-8", "UTF8", NULL
1641
302
        };
1642
302
        static const char *allowedUTF16LE[] = {
1643
302
            "UTF-16", "UTF-16LE", "UTF16", NULL
1644
302
        };
1645
302
        static const char *allowedUTF16BE[] = {
1646
302
            "UTF-16", "UTF-16BE", "UTF16", NULL
1647
302
        };
1648
302
        const char **allowed = NULL;
1649
302
        const char *autoEnc = NULL;
1650
1651
302
        switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1652
302
            case XML_INPUT_AUTO_UTF8:
1653
302
                allowed = allowedUTF8;
1654
302
                autoEnc = "UTF-8";
1655
302
                break;
1656
0
            case XML_INPUT_AUTO_UTF16LE:
1657
0
                allowed = allowedUTF16LE;
1658
0
                autoEnc = "UTF-16LE";
1659
0
                break;
1660
0
            case XML_INPUT_AUTO_UTF16BE:
1661
0
                allowed = allowedUTF16BE;
1662
0
                autoEnc = "UTF-16BE";
1663
0
                break;
1664
302
        }
1665
1666
302
        if (allowed != NULL) {
1667
302
            const char **p;
1668
302
            int match = 0;
1669
1670
750
            for (p = allowed; *p != NULL; p++) {
1671
526
                if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1672
78
                    match = 1;
1673
78
                    break;
1674
78
                }
1675
526
            }
1676
1677
302
            if (match == 0) {
1678
224
                xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1679
224
                              "Encoding '%s' doesn't match "
1680
224
                              "auto-detected '%s'\n",
1681
224
                              encoding, BAD_CAST autoEnc);
1682
224
                xmlFree(encoding);
1683
224
                encoding = xmlStrdup(BAD_CAST autoEnc);
1684
224
                if (encoding == NULL)
1685
2
                    xmlCtxtErrMemory(ctxt);
1686
224
            }
1687
302
        }
1688
302
    }
1689
1690
117k
    if (ctxt->encoding != NULL)
1691
305
        xmlFree((xmlChar *) ctxt->encoding);
1692
117k
    ctxt->encoding = encoding;
1693
117k
}
1694
1695
/**
1696
 * xmlCtxtGetDeclaredEncoding:
1697
 * @ctxt:  parser context
1698
 *
1699
 * Available since 2.14.0.
1700
 *
1701
 * Returns the encoding from the encoding declaration. This can differ
1702
 * from the actual encoding.
1703
 */
1704
const xmlChar *
1705
0
xmlCtxtGetDeclaredEncoding(xmlParserCtxtPtr ctxt) {
1706
0
    if (ctxt == NULL)
1707
0
        return(NULL);
1708
1709
0
    return(ctxt->encoding);
1710
0
}
1711
1712
/**
1713
 * xmlGetActualEncoding:
1714
 * @ctxt:  the parser context
1715
 *
1716
 * Returns the actual used to parse the document. This can differ from
1717
 * the declared encoding.
1718
 */
1719
const xmlChar *
1720
814k
xmlGetActualEncoding(xmlParserCtxtPtr ctxt) {
1721
814k
    const xmlChar *encoding = NULL;
1722
1723
814k
    if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
1724
814k
        (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
1725
        /* Preserve encoding exactly */
1726
116k
        encoding = ctxt->encoding;
1727
697k
    } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
1728
249
        encoding = BAD_CAST ctxt->input->buf->encoder->name;
1729
697k
    } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1730
0
        encoding = BAD_CAST "UTF-8";
1731
0
    }
1732
1733
814k
    return(encoding);
1734
814k
}
1735
1736
/************************************************************************
1737
 *                  *
1738
 *  Commodity functions to handle entities processing   *
1739
 *                  *
1740
 ************************************************************************/
1741
1742
/**
1743
 * xmlFreeInputStream:
1744
 * @input:  an xmlParserInputPtr
1745
 *
1746
 * Free up an input stream.
1747
 */
1748
void
1749
933k
xmlFreeInputStream(xmlParserInputPtr input) {
1750
933k
    if (input == NULL) return;
1751
1752
924k
    if (input->filename != NULL) xmlFree((char *) input->filename);
1753
924k
    if (input->version != NULL) xmlFree((char *) input->version);
1754
924k
    if ((input->free != NULL) && (input->base != NULL))
1755
0
        input->free((xmlChar *) input->base);
1756
924k
    if (input->buf != NULL)
1757
924k
        xmlFreeParserInputBuffer(input->buf);
1758
924k
    xmlFree(input);
1759
924k
}
1760
1761
/**
1762
 * xmlNewInputStream:
1763
 * @ctxt:  an XML parser context
1764
 *
1765
 * DEPRECATED: Use xmlNewInputFromUrl or similar functions.
1766
 *
1767
 * Create a new input stream structure.
1768
 *
1769
 * Returns the new input stream or NULL
1770
 */
1771
xmlParserInputPtr
1772
0
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1773
0
    xmlParserInputPtr input;
1774
1775
0
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1776
0
    if (input == NULL) {
1777
0
        xmlCtxtErrMemory(ctxt);
1778
0
  return(NULL);
1779
0
    }
1780
0
    memset(input, 0, sizeof(xmlParserInput));
1781
0
    input->line = 1;
1782
0
    input->col = 1;
1783
1784
0
    return(input);
1785
0
}
1786
1787
/**
1788
 * xmlCtxtNewInputFromUrl:
1789
 * @ctxt:  parser context
1790
 * @url:  filename or URL
1791
 * @publicId:  publid ID from doctype (optional)
1792
 * @encoding:  character encoding (optional)
1793
 * @flags:  unused, pass 0
1794
 *
1795
 * Creates a new parser input from the filesystem, the network or
1796
 * a user-defined resource loader.
1797
 *
1798
 * Returns a new parser input.
1799
 */
1800
xmlParserInputPtr
1801
xmlCtxtNewInputFromUrl(xmlParserCtxtPtr ctxt, const char *url,
1802
                       const char *publicId, const char *encoding,
1803
0
                       xmlParserInputFlags flags ATTRIBUTE_UNUSED) {
1804
0
    xmlParserInputPtr input;
1805
1806
0
    if ((ctxt == NULL) || (url == NULL))
1807
0
  return(NULL);
1808
1809
0
    input = xmlLoadResource(ctxt, url, publicId, XML_RESOURCE_MAIN_DOCUMENT);
1810
0
    if (input == NULL)
1811
0
        return(NULL);
1812
1813
0
    if (encoding != NULL)
1814
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1815
1816
0
    return(input);
1817
0
}
1818
1819
/**
1820
 * xmlNewInputInternal:
1821
 * @buf:  parser input buffer
1822
 * @filename:  filename or URL
1823
 *
1824
 * Internal helper function.
1825
 *
1826
 * Returns a new parser input.
1827
 */
1828
static xmlParserInputPtr
1829
924k
xmlNewInputInternal(xmlParserInputBufferPtr buf, const char *filename) {
1830
924k
    xmlParserInputPtr input;
1831
1832
924k
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1833
924k
    if (input == NULL) {
1834
56
  xmlFreeParserInputBuffer(buf);
1835
56
  return(NULL);
1836
56
    }
1837
924k
    memset(input, 0, sizeof(xmlParserInput));
1838
924k
    input->line = 1;
1839
924k
    input->col = 1;
1840
1841
924k
    input->buf = buf;
1842
924k
    xmlBufResetInput(input->buf->buffer, input);
1843
1844
924k
    if (filename != NULL) {
1845
65.4k
        input->filename = xmlMemStrdup(filename);
1846
65.4k
        if (input->filename == NULL) {
1847
0
            xmlFreeInputStream(input);
1848
0
            return(NULL);
1849
0
        }
1850
65.4k
    }
1851
1852
924k
    return(input);
1853
924k
}
1854
1855
/**
1856
 * xmlNewInputFromMemory:
1857
 * @url:  base URL (optional)
1858
 * @mem:  pointer to char array
1859
 * @size:  size of array
1860
 * @flags:  optimization hints
1861
 *
1862
 * Creates a new parser input to read from a memory area.
1863
 *
1864
 * @url is used as base to resolve external entities and for
1865
 * error reporting.
1866
 *
1867
 * If the XML_INPUT_BUF_STATIC flag is set, the memory area must
1868
 * stay unchanged until parsing has finished. This can avoid
1869
 * temporary copies.
1870
 *
1871
 * If the XML_INPUT_BUF_ZERO_TERMINATED flag is set, the memory
1872
 * area must contain a zero byte after the buffer at position @size.
1873
 * This can avoid temporary copies.
1874
 *
1875
 * Available since 2.14.0.
1876
 *
1877
 * Returns a new parser input or NULL if a memory allocation failed.
1878
 */
1879
xmlParserInputPtr
1880
xmlNewInputFromMemory(const char *url, const void *mem, size_t size,
1881
65.4k
                      xmlParserInputFlags flags) {
1882
65.4k
    xmlParserInputBufferPtr buf;
1883
1884
65.4k
    if (mem == NULL)
1885
0
  return(NULL);
1886
1887
65.4k
    buf = xmlNewInputBufferMemory(mem, size, flags, XML_CHAR_ENCODING_NONE);
1888
65.4k
    if (buf == NULL)
1889
0
        return(NULL);
1890
1891
65.4k
    return(xmlNewInputInternal(buf, url));
1892
65.4k
}
1893
1894
/**
1895
 * xmlCtxtNewInputFromMemory:
1896
 * @ctxt:  parser context
1897
 * @url:  base URL (optional)
1898
 * @mem:  pointer to char array
1899
 * @size:  size of array
1900
 * @encoding:  character encoding (optional)
1901
 * @flags:  optimization hints
1902
 *
1903
 * Returns a new parser input or NULL in case of error.
1904
 */
1905
xmlParserInputPtr
1906
xmlCtxtNewInputFromMemory(xmlParserCtxtPtr ctxt, const char *url,
1907
                          const void *mem, size_t size,
1908
65.4k
                          const char *encoding, xmlParserInputFlags flags) {
1909
65.4k
    xmlParserInputPtr input;
1910
1911
65.4k
    if ((ctxt == NULL) || (mem == NULL))
1912
0
  return(NULL);
1913
1914
65.4k
    input = xmlNewInputFromMemory(url, mem, size, flags);
1915
65.4k
    if (input == NULL) {
1916
0
        xmlCtxtErrMemory(ctxt);
1917
0
        return(NULL);
1918
0
    }
1919
1920
65.4k
    if (encoding != NULL)
1921
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1922
1923
65.4k
    return(input);
1924
65.4k
}
1925
1926
/**
1927
 * xmlNewInputFromString:
1928
 * @url:  base URL (optional)
1929
 * @str:  zero-terminated string
1930
 * @flags:  optimization hints
1931
 *
1932
 * Creates a new parser input to read from a zero-terminated string.
1933
 *
1934
 * @url is used as base to resolve external entities and for
1935
 * error reporting.
1936
 *
1937
 * If the XML_INPUT_BUF_STATIC flag is set, the string must
1938
 * stay unchanged until parsing has finished. This can avoid
1939
 * temporary copies.
1940
 *
1941
 * Available since 2.14.0.
1942
 *
1943
 * Returns a new parser input or NULL if a memory allocation failed.
1944
 */
1945
xmlParserInputPtr
1946
xmlNewInputFromString(const char *url, const char *str,
1947
8.33k
                      xmlParserInputFlags flags) {
1948
8.33k
    xmlParserInputBufferPtr buf;
1949
1950
8.33k
    if (str == NULL)
1951
0
  return(NULL);
1952
1953
8.33k
    buf = xmlNewInputBufferString(str, flags);
1954
8.33k
    if (buf == NULL)
1955
2
        return(NULL);
1956
1957
8.32k
    return(xmlNewInputInternal(buf, url));
1958
8.33k
}
1959
1960
/**
1961
 * xmlCtxtNewInputFromString:
1962
 * @ctxt:  parser context
1963
 * @url:  base URL (optional)
1964
 * @str:  zero-terminated string
1965
 * @encoding:  character encoding (optional)
1966
 * @flags:  optimization hints
1967
 *
1968
 * Returns a new parser input.
1969
 */
1970
xmlParserInputPtr
1971
xmlCtxtNewInputFromString(xmlParserCtxtPtr ctxt, const char *url,
1972
                          const char *str, const char *encoding,
1973
8.33k
                          xmlParserInputFlags flags) {
1974
8.33k
    xmlParserInputPtr input;
1975
1976
8.33k
    if ((ctxt == NULL) || (str == NULL))
1977
0
  return(NULL);
1978
1979
8.33k
    input = xmlNewInputFromString(url, str, flags);
1980
8.33k
    if (input == NULL) {
1981
3
        xmlCtxtErrMemory(ctxt);
1982
3
        return(NULL);
1983
3
    }
1984
1985
8.32k
    if (encoding != NULL)
1986
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1987
1988
8.32k
    return(input);
1989
8.33k
}
1990
1991
/**
1992
 * xmlNewInputFromFd:
1993
 * @url:  base URL (optional)
1994
 * @fd:  file descriptor
1995
 * @flags:  input flags
1996
 *
1997
 * Creates a new parser input to read from a zero-terminated string.
1998
 *
1999
 * @url is used as base to resolve external entities and for
2000
 * error reporting.
2001
 *
2002
 * @fd is closed after parsing has finished.
2003
 *
2004
 * Supported @flags are XML_INPUT_UNZIP to decompress data
2005
 * automatically. This feature is deprecated and will be removed
2006
 * in a future release.
2007
 *
2008
 * Available since 2.14.0.
2009
 *
2010
 * Returns a new parser input or NULL if a memory allocation failed.
2011
 */
2012
xmlParserInputPtr
2013
0
xmlNewInputFromFd(const char *url, int fd, xmlParserInputFlags flags) {
2014
0
    xmlParserInputBufferPtr buf;
2015
2016
0
    if (fd < 0)
2017
0
  return(NULL);
2018
2019
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2020
0
    if (buf == NULL)
2021
0
        return(NULL);
2022
2023
0
    if (xmlInputFromFd(buf, fd, flags) != XML_ERR_OK) {
2024
0
        xmlFreeParserInputBuffer(buf);
2025
0
        return(NULL);
2026
0
    }
2027
2028
0
    return(xmlNewInputInternal(buf, url));
2029
0
}
2030
2031
/**
2032
 * xmlCtxtNewInputFromFd:
2033
 * @ctxt:  parser context
2034
 * @url:  base URL (optional)
2035
 * @fd:  file descriptor
2036
 * @encoding:  character encoding (optional)
2037
 * @flags:  unused, pass 0
2038
 *
2039
 * Returns a new parser input.
2040
 */
2041
xmlParserInputPtr
2042
xmlCtxtNewInputFromFd(xmlParserCtxtPtr ctxt, const char *url,
2043
                      int fd, const char *encoding,
2044
0
                      xmlParserInputFlags flags) {
2045
0
    xmlParserInputPtr input;
2046
2047
0
    if ((ctxt == NULL) || (fd < 0))
2048
0
  return(NULL);
2049
2050
0
    if (ctxt->options & XML_PARSE_UNZIP)
2051
0
        flags |= XML_INPUT_UNZIP;
2052
2053
0
    input = xmlNewInputFromFd(url, fd, flags);
2054
0
    if (input == NULL) {
2055
0
  xmlCtxtErrMemory(ctxt);
2056
0
        return(NULL);
2057
0
    }
2058
2059
0
    if (encoding != NULL)
2060
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2061
2062
0
    return(input);
2063
0
}
2064
2065
/**
2066
 * xmlNewInputFromIO:
2067
 * @url:  base URL (optional)
2068
 * @ioRead:  read callback
2069
 * @ioClose:  close callback (optional)
2070
 * @ioCtxt:  IO context
2071
 * @flags:  unused, pass 0
2072
 *
2073
 * Creates a new parser input to read from input callbacks and
2074
 * cintext.
2075
 *
2076
 * @url is used as base to resolve external entities and for
2077
 * error reporting.
2078
 *
2079
 * @ioRead is called to read new data into a provided buffer.
2080
 * It must return the number of bytes written into the buffer
2081
 * ot a negative xmlParserErrors code on failure.
2082
 *
2083
 * @ioClose is called after parsing has finished.
2084
 *
2085
 * @ioCtxt is an opaque pointer passed to the callbacks.
2086
 *
2087
 * Available since 2.14.0.
2088
 *
2089
 * Returns a new parser input or NULL if a memory allocation failed.
2090
 */
2091
xmlParserInputPtr
2092
xmlNewInputFromIO(const char *url, xmlInputReadCallback ioRead,
2093
                  xmlInputCloseCallback ioClose, void *ioCtxt,
2094
0
                  xmlParserInputFlags flags ATTRIBUTE_UNUSED) {
2095
0
    xmlParserInputBufferPtr buf;
2096
2097
0
    if (ioRead == NULL)
2098
0
  return(NULL);
2099
2100
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2101
0
    if (buf == NULL) {
2102
0
        if (ioClose != NULL)
2103
0
            ioClose(ioCtxt);
2104
0
        return(NULL);
2105
0
    }
2106
2107
0
    buf->context = ioCtxt;
2108
0
    buf->readcallback = ioRead;
2109
0
    buf->closecallback = ioClose;
2110
2111
0
    return(xmlNewInputInternal(buf, url));
2112
0
}
2113
2114
/**
2115
 * xmlCtxtNewInputFromIO:
2116
 * @ctxt:  parser context
2117
 * @url:  base URL (optional)
2118
 * @ioRead:  read callback
2119
 * @ioClose:  close callback (optional)
2120
 * @ioCtxt:  IO context
2121
 * @encoding:  character encoding (optional)
2122
 * @flags:  unused, pass 0
2123
 *
2124
 * Returns a new parser input.
2125
 */
2126
xmlParserInputPtr
2127
xmlCtxtNewInputFromIO(xmlParserCtxtPtr ctxt, const char *url,
2128
                      xmlInputReadCallback ioRead,
2129
                      xmlInputCloseCallback ioClose,
2130
                      void *ioCtxt, const char *encoding,
2131
0
                      xmlParserInputFlags flags) {
2132
0
    xmlParserInputPtr input;
2133
2134
0
    if ((ctxt == NULL) || (ioRead == NULL))
2135
0
  return(NULL);
2136
2137
0
    input = xmlNewInputFromIO(url, ioRead, ioClose, ioCtxt, flags);
2138
0
    if (input == NULL) {
2139
0
        xmlCtxtErrMemory(ctxt);
2140
0
        return(NULL);
2141
0
    }
2142
2143
0
    if (encoding != NULL)
2144
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2145
2146
0
    return(input);
2147
0
}
2148
2149
/**
2150
 * xmlNewPushInput:
2151
 * @url:  base URL (optional)
2152
 * @chunk:  pointer to char array
2153
 * @size:  size of array
2154
 *
2155
 * Creates a new parser input for a push parser.
2156
 *
2157
 * Returns a new parser input or NULL if a memory allocation failed.
2158
 */
2159
xmlParserInputPtr
2160
0
xmlNewPushInput(const char *url, const char *chunk, int size) {
2161
0
    xmlParserInputBufferPtr buf;
2162
0
    xmlParserInputPtr input;
2163
2164
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2165
0
    if (buf == NULL)
2166
0
        return(NULL);
2167
2168
0
    input = xmlNewInputInternal(buf, url);
2169
0
    if (input == NULL)
2170
0
  return(NULL);
2171
2172
0
    input->flags |= XML_INPUT_PROGRESSIVE;
2173
2174
0
    if ((size > 0) && (chunk != NULL)) {
2175
0
        int res;
2176
2177
0
  res = xmlParserInputBufferPush(input->buf, size, chunk);
2178
0
        xmlBufResetInput(input->buf->buffer, input);
2179
0
        if (res < 0) {
2180
0
            xmlFreeInputStream(input);
2181
0
            return(NULL);
2182
0
        }
2183
0
    }
2184
2185
0
    return(input);
2186
0
}
2187
2188
/**
2189
 * xmlNewIOInputStream:
2190
 * @ctxt:  an XML parser context
2191
 * @buf:  an input buffer
2192
 * @enc:  the charset encoding if known
2193
 *
2194
 * Create a new input stream structure encapsulating the @input into
2195
 * a stream suitable for the parser.
2196
 *
2197
 * Returns the new input stream or NULL
2198
 */
2199
xmlParserInputPtr
2200
xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr buf,
2201
851k
              xmlCharEncoding enc) {
2202
851k
    xmlParserInputPtr input;
2203
851k
    const char *encoding;
2204
2205
851k
    if ((ctxt == NULL) || (buf == NULL))
2206
0
        return(NULL);
2207
2208
851k
    input = xmlNewInputInternal(buf, NULL);
2209
851k
    if (input == NULL) {
2210
55
        xmlCtxtErrMemory(ctxt);
2211
55
  return(NULL);
2212
55
    }
2213
2214
851k
    encoding = xmlGetCharEncodingName(enc);
2215
851k
    if (encoding != NULL)
2216
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2217
2218
851k
    return(input);
2219
851k
}
2220
2221
/**
2222
 * xmlNewEntityInputStream:
2223
 * @ctxt:  an XML parser context
2224
 * @ent:  an Entity pointer
2225
 *
2226
 * DEPRECATED: Internal function, do not use.
2227
 *
2228
 * Create a new input stream based on an xmlEntityPtr
2229
 *
2230
 * Returns the new input stream or NULL
2231
 */
2232
xmlParserInputPtr
2233
54.7k
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
2234
54.7k
    xmlParserInputPtr input;
2235
2236
54.7k
    if ((ctxt == NULL) || (ent == NULL))
2237
0
  return(NULL);
2238
2239
54.7k
    if (ent->content != NULL) {
2240
8.33k
        input = xmlCtxtNewInputFromString(ctxt, NULL,
2241
8.33k
                (const char *) ent->content, NULL, XML_INPUT_BUF_STATIC);
2242
46.4k
    } else if (ent->URI != NULL) {
2243
45.9k
        xmlResourceType rtype;
2244
2245
45.9k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY)
2246
36.1k
            rtype = XML_RESOURCE_PARAMETER_ENTITY;
2247
9.72k
        else
2248
9.72k
            rtype = XML_RESOURCE_GENERAL_ENTITY;
2249
2250
45.9k
        input = xmlLoadResource(ctxt, (char *) ent->URI,
2251
45.9k
                                (char *) ent->ExternalID, rtype);
2252
45.9k
    } else {
2253
517
        return(NULL);
2254
517
    }
2255
2256
54.2k
    if (input == NULL)
2257
10.1k
        return(NULL);
2258
2259
44.0k
    input->entity = ent;
2260
2261
44.0k
    return(input);
2262
54.2k
}
2263
2264
/**
2265
 * xmlNewStringInputStream:
2266
 * @ctxt:  an XML parser context
2267
 * @buffer:  an memory buffer
2268
 *
2269
 * DEPRECATED: Use xmlNewInputFromString.
2270
 *
2271
 * Create a new input stream based on a memory buffer.
2272
 *
2273
 * Returns the new input stream
2274
 */
2275
xmlParserInputPtr
2276
0
xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2277
0
    return(xmlCtxtNewInputFromString(ctxt, NULL, (const char *) buffer,
2278
0
                                     NULL, 0));
2279
0
}
2280
2281
2282
/****************************************************************
2283
 *                *
2284
 *    External entities loading     *
2285
 *                *
2286
 ****************************************************************/
2287
2288
#ifdef LIBXML_CATALOG_ENABLED
2289
2290
/**
2291
 * xmlResolveResourceFromCatalog:
2292
 * @URL:  the URL for the entity to load
2293
 * @ID:  the System ID for the entity to load
2294
 * @ctxt:  the context in which the entity is called or NULL
2295
 *
2296
 * Resolves the URL and ID against the appropriate catalog.
2297
 * This function is used by xmlDefaultExternalEntityLoader and
2298
 * xmlNoNetExternalEntityLoader.
2299
 *
2300
 * Returns a new allocated URL, or NULL.
2301
 */
2302
static xmlChar *
2303
xmlResolveResourceFromCatalog(const char *URL, const char *ID,
2304
0
                              xmlParserCtxtPtr ctxt) {
2305
0
    xmlChar *resource = NULL;
2306
0
    xmlCatalogAllow pref;
2307
0
    int allowLocal = 0;
2308
0
    int allowGlobal = 0;
2309
2310
    /*
2311
     * If the resource doesn't exists as a file,
2312
     * try to load it from the resource pointed in the catalogs
2313
     */
2314
0
    pref = xmlCatalogGetDefaults();
2315
2316
0
    if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
2317
0
        ((pref == XML_CATA_ALLOW_ALL) ||
2318
0
         (pref == XML_CATA_ALLOW_DOCUMENT)))
2319
0
        allowLocal = 1;
2320
2321
0
    if (((ctxt == NULL) ||
2322
0
         ((ctxt->options & XML_PARSE_NO_SYS_CATALOG) == 0)) &&
2323
0
        ((pref == XML_CATA_ALLOW_ALL) ||
2324
0
         (pref == XML_CATA_ALLOW_GLOBAL)))
2325
0
        allowGlobal = 1;
2326
2327
0
    if ((pref != XML_CATA_ALLOW_NONE) && (!xmlNoNetExists(URL))) {
2328
  /*
2329
   * Do a local lookup
2330
   */
2331
0
        if (allowLocal) {
2332
0
      resource = xmlCatalogLocalResolve(ctxt->catalogs,
2333
0
                (const xmlChar *)ID,
2334
0
                (const xmlChar *)URL);
2335
0
        }
2336
  /*
2337
   * Try a global lookup
2338
   */
2339
0
  if ((resource == NULL) && (allowGlobal)) {
2340
0
      resource = xmlCatalogResolve((const xmlChar *)ID,
2341
0
           (const xmlChar *)URL);
2342
0
  }
2343
0
  if ((resource == NULL) && (URL != NULL))
2344
0
      resource = xmlStrdup((const xmlChar *) URL);
2345
2346
  /*
2347
   * TODO: do an URI lookup on the reference
2348
   */
2349
0
  if ((resource != NULL) && (!xmlNoNetExists((const char *)resource))) {
2350
0
      xmlChar *tmp = NULL;
2351
2352
0
      if (allowLocal) {
2353
0
    tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource);
2354
0
      }
2355
0
      if ((tmp == NULL) && (allowGlobal)) {
2356
0
    tmp = xmlCatalogResolveURI(resource);
2357
0
      }
2358
2359
0
      if (tmp != NULL) {
2360
0
    xmlFree(resource);
2361
0
    resource = tmp;
2362
0
      }
2363
0
  }
2364
0
    }
2365
2366
0
    return resource;
2367
0
}
2368
2369
#endif
2370
2371
#ifdef LIBXML_HTTP_ENABLED
2372
static xmlParserErrors
2373
xmlCheckHTTPInputInternal(xmlParserInputPtr input) {
2374
    const char *encoding;
2375
    const char *redir;
2376
    const char *mime;
2377
    int code;
2378
2379
    if ((input == NULL) || (input->buf == NULL) ||
2380
        (input->buf->readcallback != xmlIOHTTPRead) ||
2381
        (input->buf->context == NULL))
2382
        return(XML_ERR_OK);
2383
2384
    code = xmlNanoHTTPReturnCode(input->buf->context);
2385
    if (code >= 400) {
2386
        /* fatal error */
2387
        return(XML_IO_LOAD_ERROR);
2388
    }
2389
2390
    mime = xmlNanoHTTPMimeType(input->buf->context);
2391
    if ((xmlStrstr(BAD_CAST mime, BAD_CAST "/xml")) ||
2392
        (xmlStrstr(BAD_CAST mime, BAD_CAST "+xml"))) {
2393
        encoding = xmlNanoHTTPEncoding(input->buf->context);
2394
        if (encoding != NULL) {
2395
            xmlCharEncodingHandlerPtr handler;
2396
            xmlParserErrors res;
2397
2398
            res = xmlOpenCharEncodingHandler(encoding, /* output */ 0,
2399
                                             &handler);
2400
            if (res == 0)
2401
                xmlInputSetEncodingHandler(input, handler);
2402
        }
2403
    }
2404
2405
    redir = xmlNanoHTTPRedir(input->buf->context);
2406
    if (redir != NULL) {
2407
        if (input->filename != NULL)
2408
            xmlFree((xmlChar *) input->filename);
2409
        input->filename = xmlMemStrdup(redir);
2410
        if (input->filename == NULL)
2411
            return(XML_ERR_NO_MEMORY);
2412
    }
2413
2414
    return(XML_ERR_OK);
2415
}
2416
#endif /* LIBXML_HTTP_ENABLED */
2417
2418
/**
2419
 * xmlCheckHTTPInput:
2420
 * @ctxt: an XML parser context
2421
 * @ret: an XML parser input
2422
 *
2423
 * DEPRECATED: Internal function, don't use.
2424
 *
2425
 * Check an input in case it was created from an HTTP stream, in that
2426
 * case it will handle encoding and update of the base URL in case of
2427
 * redirection. It also checks for HTTP errors in which case the input
2428
 * is cleanly freed up and an appropriate error is raised in context
2429
 *
2430
 * Returns the input or NULL in case of HTTP error.
2431
 */
2432
xmlParserInputPtr
2433
0
xmlCheckHTTPInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr ret) {
2434
    /* Avoid unused variable warning if features are disabled. */
2435
0
    (void) ctxt;
2436
2437
#ifdef LIBXML_HTTP_ENABLED
2438
    {
2439
        int code = xmlCheckHTTPInputInternal(ret);
2440
2441
        if (code != XML_ERR_OK) {
2442
            if (ret->filename != NULL)
2443
                xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, ret->filename);
2444
            else
2445
                xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, "<null>");
2446
            xmlFreeInputStream(ret);
2447
            return(NULL);
2448
        }
2449
    }
2450
#endif
2451
2452
0
    return(ret);
2453
0
}
2454
2455
/**
2456
 * xmlNewInputFromUrl:
2457
 * @filename:  the filename to use as entity
2458
 * @flags:  XML_INPUT flags
2459
 * @out:  pointer to new parser input
2460
 *
2461
 * Create a new input stream based on a file or a URL.
2462
 *
2463
 * The flag XML_INPUT_UNZIP allows decompression.
2464
 *
2465
 * The flag XML_INPUT_NETWORK allows network access.
2466
 *
2467
 * The following resource loaders will be called if they were
2468
 * registered (in order of precedence):
2469
 *
2470
 * - the per-thread xmlParserInputBufferCreateFilenameFunc set with
2471
 *   xmlParserInputBufferCreateFilenameDefault (deprecated)
2472
 * - the default loader which will return
2473
 *   - the result from a matching global input callback set with
2474
 *     xmlRegisterInputCallbacks (deprecated)
2475
 *   - a HTTP resource if support is compiled in.
2476
 *   - a file opened from the filesystem, with automatic detection
2477
 *     of compressed files if support is compiled in.
2478
 *
2479
 * Available since 2.14.0.
2480
 *
2481
 * Returns an xmlParserErrors code.
2482
 */
2483
xmlParserErrors
2484
xmlNewInputFromUrl(const char *filename, xmlParserInputFlags flags,
2485
0
                   xmlParserInputPtr *out) {
2486
0
    xmlParserInputBufferPtr buf;
2487
0
    xmlParserInputPtr input;
2488
0
    xmlParserErrors code = XML_ERR_OK;
2489
2490
0
    if (out == NULL)
2491
0
        return(XML_ERR_ARGUMENT);
2492
0
    *out = NULL;
2493
0
    if (filename == NULL)
2494
0
        return(XML_ERR_ARGUMENT);
2495
2496
0
    if (xmlParserInputBufferCreateFilenameValue != NULL) {
2497
0
        buf = xmlParserInputBufferCreateFilenameValue(filename,
2498
0
                XML_CHAR_ENCODING_NONE);
2499
0
        if (buf == NULL)
2500
0
            code = XML_IO_ENOENT;
2501
0
    } else {
2502
0
        code = xmlParserInputBufferCreateUrl(filename, XML_CHAR_ENCODING_NONE,
2503
0
                                             flags, &buf);
2504
0
    }
2505
0
    if (code != XML_ERR_OK)
2506
0
  return(code);
2507
2508
0
    input = xmlNewInputInternal(buf, filename);
2509
0
    if (input == NULL)
2510
0
  return(XML_ERR_NO_MEMORY);
2511
2512
#ifdef LIBXML_HTTP_ENABLED
2513
    code = xmlCheckHTTPInputInternal(input);
2514
    if (code != XML_ERR_OK) {
2515
        xmlFreeInputStream(input);
2516
        return(code);
2517
    }
2518
#endif
2519
2520
0
    *out = input;
2521
0
    return(XML_ERR_OK);
2522
0
}
2523
2524
/**
2525
 * xmlNewInputFromFile:
2526
 * @ctxt:  an XML parser context
2527
 * @filename:  the filename to use as entity
2528
 *
2529
 * DEPRECATED: Use xmlNewInputFromUrl.
2530
 *
2531
 * Create a new input stream based on a file or an URL.
2532
 *
2533
 * Returns the new input stream or NULL in case of error
2534
 */
2535
xmlParserInputPtr
2536
0
xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2537
0
    xmlParserInputPtr input;
2538
0
    xmlParserInputFlags flags = 0;
2539
0
    xmlParserErrors code;
2540
2541
0
    if ((ctxt == NULL) || (filename == NULL))
2542
0
        return(NULL);
2543
2544
0
    if (ctxt->options & XML_PARSE_UNZIP)
2545
0
        flags |= XML_INPUT_UNZIP;
2546
0
    if ((ctxt->options & XML_PARSE_NONET) == 0)
2547
0
        flags |= XML_INPUT_NETWORK;
2548
2549
0
    code = xmlNewInputFromUrl(filename, flags, &input);
2550
0
    if (code != XML_ERR_OK) {
2551
0
        xmlCtxtErrIO(ctxt, code, filename);
2552
0
        return(NULL);
2553
0
    }
2554
2555
0
    return(input);
2556
0
}
2557
2558
/**
2559
 * xmlDefaultExternalEntityLoader:
2560
 * @URL:  the URL for the entity to load
2561
 * @ID:  the System ID for the entity to load
2562
 * @ctxt:  the context in which the entity is called or NULL
2563
 *
2564
 * By default we don't load external entities, yet.
2565
 *
2566
 * Returns a new allocated xmlParserInputPtr, or NULL.
2567
 */
2568
static xmlParserInputPtr
2569
xmlDefaultExternalEntityLoader(const char *url, const char *ID,
2570
                               xmlParserCtxtPtr ctxt)
2571
0
{
2572
0
    xmlParserInputPtr input = NULL;
2573
0
    char *resource = NULL;
2574
2575
0
    (void) ID;
2576
2577
0
    if (url == NULL)
2578
0
        return(NULL);
2579
2580
0
#ifdef LIBXML_CATALOG_ENABLED
2581
0
    resource = (char *) xmlResolveResourceFromCatalog(url, ID, ctxt);
2582
0
    if (resource != NULL)
2583
0
  url = resource;
2584
0
#endif
2585
2586
0
    if ((ctxt != NULL) &&
2587
0
        (ctxt->options & XML_PARSE_NONET) &&
2588
0
        (xmlStrncasecmp(BAD_CAST url, BAD_CAST "http://", 7) == 0)) {
2589
0
        xmlCtxtErrIO(ctxt, XML_IO_NETWORK_ATTEMPT, url);
2590
0
    } else {
2591
0
        input = xmlNewInputFromFile(ctxt, url);
2592
0
    }
2593
2594
0
    if (resource != NULL)
2595
0
  xmlFree(resource);
2596
0
    return(input);
2597
0
}
2598
2599
/**
2600
 * xmlNoNetExternalEntityLoader:
2601
 * @URL:  the URL for the entity to load
2602
 * @ID:  the System ID for the entity to load
2603
 * @ctxt:  the context in which the entity is called or NULL
2604
 *
2605
 * DEPRECATED: Use XML_PARSE_NONET.
2606
 *
2607
 * A specific entity loader disabling network accesses, though still
2608
 * allowing local catalog accesses for resolution.
2609
 *
2610
 * Returns a new allocated xmlParserInputPtr, or NULL.
2611
 */
2612
xmlParserInputPtr
2613
xmlNoNetExternalEntityLoader(const char *URL, const char *ID,
2614
0
                             xmlParserCtxtPtr ctxt) {
2615
0
    int oldOptions = 0;
2616
0
    xmlParserInputPtr input;
2617
2618
0
    if (ctxt != NULL) {
2619
0
        oldOptions = ctxt->options;
2620
0
        ctxt->options |= XML_PARSE_NONET;
2621
0
    }
2622
2623
0
    input = xmlDefaultExternalEntityLoader(URL, ID, ctxt);
2624
2625
0
    if (ctxt != NULL)
2626
0
        ctxt->options = oldOptions;
2627
2628
0
    return(input);
2629
0
}
2630
2631
/*
2632
 * This global has to die eventually
2633
 */
2634
static xmlExternalEntityLoader
2635
xmlCurrentExternalEntityLoader = xmlDefaultExternalEntityLoader;
2636
2637
/**
2638
 * xmlSetExternalEntityLoader:
2639
 * @f:  the new entity resolver function
2640
 *
2641
 * DEPRECATED: This is a global setting and not thread-safe. Use
2642
 * xmlCtxtSetResourceLoader or similar functions.
2643
 *
2644
 * Changes the default external entity resolver function for the
2645
 * application.
2646
 */
2647
void
2648
2
xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
2649
2
    xmlCurrentExternalEntityLoader = f;
2650
2
}
2651
2652
/**
2653
 * xmlGetExternalEntityLoader:
2654
 *
2655
 * DEPRECATED: See xmlSetExternalEntityLoader.
2656
 *
2657
 * Get the default external entity resolver function for the application
2658
 *
2659
 * Returns the xmlExternalEntityLoader function pointer
2660
 */
2661
xmlExternalEntityLoader
2662
0
xmlGetExternalEntityLoader(void) {
2663
0
    return(xmlCurrentExternalEntityLoader);
2664
0
}
2665
2666
/**
2667
 * xmlCtxtSetResourceLoader:
2668
 * @ctxt:  parser context
2669
 * @loader:  callback
2670
 * @vctxt:  user data
2671
 *
2672
 * Installs a custom callback to load documents, DTDs or external
2673
 * entities.
2674
 *
2675
 * If @vctxt is NULL, the parser context will be passed.
2676
 *
2677
 * Available since 2.14.0.
2678
 */
2679
void
2680
xmlCtxtSetResourceLoader(xmlParserCtxtPtr ctxt, xmlResourceLoader loader,
2681
0
                         void *vctxt) {
2682
0
    if (ctxt == NULL)
2683
0
        return;
2684
2685
0
    ctxt->resourceLoader = loader;
2686
0
    ctxt->resourceCtxt = vctxt;
2687
0
}
2688
2689
/**
2690
 * xmlLoadResource:
2691
 * @ctxt:  parser context
2692
 * @url:  the URL for the entity to load
2693
 * @publicId:  the Public ID for the entity to load
2694
 * @type:  resource type
2695
 *
2696
 * Returns the xmlParserInputPtr or NULL in case of error.
2697
 */
2698
xmlParserInputPtr
2699
xmlLoadResource(xmlParserCtxtPtr ctxt, const char *url, const char *publicId,
2700
1.03M
                xmlResourceType type) {
2701
1.03M
    char *canonicFilename;
2702
1.03M
    xmlParserInputPtr ret;
2703
2704
1.03M
    if (url == NULL)
2705
263
        return(NULL);
2706
2707
1.03M
    if ((ctxt != NULL) && (ctxt->resourceLoader != NULL)) {
2708
0
        char *resource = NULL;
2709
0
        void *userData;
2710
0
        xmlParserInputFlags flags = 0;
2711
0
        int code;
2712
2713
0
#ifdef LIBXML_CATALOG_ENABLED
2714
0
        resource = (char *) xmlResolveResourceFromCatalog(url, publicId, ctxt);
2715
0
        if (resource != NULL)
2716
0
            url = resource;
2717
0
#endif
2718
2719
0
        if (ctxt->options & XML_PARSE_UNZIP)
2720
0
            flags |= XML_INPUT_UNZIP;
2721
0
        if ((ctxt->options & XML_PARSE_NONET) == 0)
2722
0
            flags |= XML_INPUT_NETWORK;
2723
2724
0
        userData = ctxt->resourceCtxt;
2725
0
        if (userData == NULL)
2726
0
            userData = ctxt;
2727
2728
0
        code = ctxt->resourceLoader(userData, url, publicId, type,
2729
0
                                    flags, &ret);
2730
0
        if (code != XML_ERR_OK) {
2731
0
            xmlCtxtErrIO(ctxt, code, url);
2732
0
            ret = NULL;
2733
0
        }
2734
0
        if (resource != NULL)
2735
0
            xmlFree(resource);
2736
0
        return(ret);
2737
0
    }
2738
2739
1.03M
    canonicFilename = (char *) xmlCanonicPath((const xmlChar *) url);
2740
1.03M
    if (canonicFilename == NULL) {
2741
56
        xmlCtxtErrMemory(ctxt);
2742
56
        return(NULL);
2743
56
    }
2744
2745
1.03M
    ret = xmlCurrentExternalEntityLoader(canonicFilename, publicId, ctxt);
2746
1.03M
    xmlFree(canonicFilename);
2747
1.03M
    return(ret);
2748
1.03M
}
2749
2750
/**
2751
 * xmlLoadExternalEntity:
2752
 * @URL:  the URL for the entity to load
2753
 * @ID:  the Public ID for the entity to load
2754
 * @ctxt:  the context in which the entity is called or NULL
2755
 *
2756
 * @URL is a filename or URL. If if contains the substring "://",
2757
 * it is assumed to be a Legacy Extended IRI. Otherwise, it is
2758
 * treated as a filesystem path.
2759
 *
2760
 * @ID is an optional XML public ID, typically from a doctype
2761
 * declaration. It is used for catalog lookups.
2762
 *
2763
 * If catalog lookup is enabled (default is yes) and URL or ID are
2764
 * found in system or local XML catalogs, URL is replaced with the
2765
 * result. Then the following resource loaders will be called if
2766
 * they were registered (in order of precedence):
2767
 *
2768
 * - the resource loader set with xmlCtxtSetResourceLoader
2769
 * - the global external entity loader set with
2770
 *   xmlSetExternalEntityLoader (without catalog resolution,
2771
 *   deprecated)
2772
 * - the per-thread xmlParserInputBufferCreateFilenameFunc set with
2773
 *   xmlParserInputBufferCreateFilenameDefault (deprecated)
2774
 * - the default loader which will return
2775
 *   - the result from a matching global input callback set with
2776
 *     xmlRegisterInputCallbacks (deprecated)
2777
 *   - a HTTP resource if support is compiled in.
2778
 *   - a file opened from the filesystem, with automatic detection
2779
 *     of compressed files if support is compiled in.
2780
 *
2781
 * Returns the xmlParserInputPtr or NULL
2782
 */
2783
xmlParserInputPtr
2784
xmlLoadExternalEntity(const char *URL, const char *ID,
2785
962k
                      xmlParserCtxtPtr ctxt) {
2786
962k
    return(xmlLoadResource(ctxt, URL, ID, XML_RESOURCE_UNKNOWN));
2787
962k
}
2788
2789
/************************************************************************
2790
 *                  *
2791
 *    Commodity functions to handle parser contexts   *
2792
 *                  *
2793
 ************************************************************************/
2794
2795
/**
2796
 * xmlInitSAXParserCtxt:
2797
 * @ctxt:  XML parser context
2798
 * @sax:  SAX handlert
2799
 * @userData:  user data
2800
 *
2801
 * Initialize a SAX parser context
2802
 *
2803
 * Returns 0 in case of success and -1 in case of error
2804
 */
2805
2806
static int
2807
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
2808
                     void *userData)
2809
1.02M
{
2810
1.02M
    xmlParserInputPtr input;
2811
1.02M
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2812
1.02M
    size_t initialNodeTabSize = 1;
2813
#else
2814
    size_t initialNodeTabSize = 10;
2815
#endif
2816
2817
1.02M
    if (ctxt == NULL)
2818
0
        return(-1);
2819
2820
1.02M
    if (ctxt->dict == NULL)
2821
1.02M
  ctxt->dict = xmlDictCreate();
2822
1.02M
    if (ctxt->dict == NULL)
2823
50
  return(-1);
2824
2825
1.02M
    if (ctxt->sax == NULL)
2826
1.02M
  ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2827
1.02M
    if (ctxt->sax == NULL)
2828
46
  return(-1);
2829
1.02M
    if (sax == NULL) {
2830
1.02M
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2831
1.02M
        xmlSAXVersion(ctxt->sax, 2);
2832
1.02M
        ctxt->userData = ctxt;
2833
1.02M
    } else {
2834
0
  if (sax->initialized == XML_SAX2_MAGIC) {
2835
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
2836
0
        } else {
2837
0
      memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2838
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
2839
0
        }
2840
0
        ctxt->userData = userData ? userData : ctxt;
2841
0
    }
2842
2843
1.02M
    ctxt->maxatts = 0;
2844
1.02M
    ctxt->atts = NULL;
2845
    /* Allocate the Input stack */
2846
1.02M
    if (ctxt->inputTab == NULL) {
2847
1.02M
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2848
1.02M
        size_t initialSize = 1;
2849
#else
2850
        size_t initialSize = 5;
2851
#endif
2852
2853
1.02M
  ctxt->inputTab = xmlMalloc(initialSize * sizeof(xmlParserInputPtr));
2854
1.02M
  ctxt->inputMax = initialSize;
2855
1.02M
    }
2856
1.02M
    if (ctxt->inputTab == NULL)
2857
31
  return(-1);
2858
1.02M
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
2859
0
        xmlFreeInputStream(input);
2860
0
    }
2861
1.02M
    ctxt->inputNr = 0;
2862
1.02M
    ctxt->input = NULL;
2863
2864
1.02M
    ctxt->version = NULL;
2865
1.02M
    ctxt->encoding = NULL;
2866
1.02M
    ctxt->standalone = -1;
2867
1.02M
    ctxt->hasExternalSubset = 0;
2868
1.02M
    ctxt->hasPErefs = 0;
2869
1.02M
    ctxt->html = 0;
2870
1.02M
    ctxt->instate = XML_PARSER_START;
2871
2872
    /* Allocate the Node stack */
2873
1.02M
    if (ctxt->nodeTab == NULL) {
2874
1.02M
  ctxt->nodeTab = xmlMalloc(initialNodeTabSize * sizeof(xmlNodePtr));
2875
1.02M
  ctxt->nodeMax = initialNodeTabSize;
2876
1.02M
    }
2877
1.02M
    if (ctxt->nodeTab == NULL)
2878
32
  return(-1);
2879
1.02M
    ctxt->nodeNr = 0;
2880
1.02M
    ctxt->node = NULL;
2881
2882
    /* Allocate the Name stack */
2883
1.02M
    if (ctxt->nameTab == NULL) {
2884
1.02M
  ctxt->nameTab = xmlMalloc(initialNodeTabSize * sizeof(xmlChar *));
2885
1.02M
  ctxt->nameMax = initialNodeTabSize;
2886
1.02M
    }
2887
1.02M
    if (ctxt->nameTab == NULL)
2888
38
  return(-1);
2889
1.02M
    ctxt->nameNr = 0;
2890
1.02M
    ctxt->name = NULL;
2891
2892
    /* Allocate the space stack */
2893
1.02M
    if (ctxt->spaceTab == NULL) {
2894
1.02M
  ctxt->spaceTab = xmlMalloc(initialNodeTabSize * sizeof(int));
2895
1.02M
  ctxt->spaceMax = initialNodeTabSize;
2896
1.02M
    }
2897
1.02M
    if (ctxt->spaceTab == NULL)
2898
52
  return(-1);
2899
1.02M
    ctxt->spaceNr = 1;
2900
1.02M
    ctxt->spaceTab[0] = -1;
2901
1.02M
    ctxt->space = &ctxt->spaceTab[0];
2902
1.02M
    ctxt->myDoc = NULL;
2903
1.02M
    ctxt->wellFormed = 1;
2904
1.02M
    ctxt->nsWellFormed = 1;
2905
1.02M
    ctxt->valid = 1;
2906
2907
1.02M
    ctxt->options = XML_PARSE_NODICT;
2908
2909
    /*
2910
     * Initialize some parser options from deprecated global variables.
2911
     * Note that the "modern" API taking options arguments or
2912
     * xmlCtxtSetOptions will ignore these defaults. They're only
2913
     * relevant if old API functions like xmlParseFile are used.
2914
     */
2915
1.02M
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2916
1.02M
    if (ctxt->loadsubset) {
2917
0
        ctxt->options |= XML_PARSE_DTDLOAD;
2918
0
    }
2919
1.02M
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
2920
1.02M
    if (ctxt->validate) {
2921
0
        ctxt->options |= XML_PARSE_DTDVALID;
2922
0
    }
2923
1.02M
    ctxt->pedantic = xmlPedanticParserDefaultValue;
2924
1.02M
    if (ctxt->pedantic) {
2925
0
        ctxt->options |= XML_PARSE_PEDANTIC;
2926
0
    }
2927
1.02M
    ctxt->linenumbers = xmlLineNumbersDefaultValue;
2928
1.02M
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2929
1.02M
    if (ctxt->keepBlanks == 0) {
2930
0
  ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
2931
0
  ctxt->options |= XML_PARSE_NOBLANKS;
2932
0
    }
2933
1.02M
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2934
1.02M
    if (ctxt->replaceEntities) {
2935
0
        ctxt->options |= XML_PARSE_NOENT;
2936
0
    }
2937
1.02M
    if (xmlGetWarningsDefaultValue == 0)
2938
0
        ctxt->options |= XML_PARSE_NOWARNING;
2939
2940
1.02M
    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
2941
1.02M
    ctxt->vctxt.userData = ctxt;
2942
1.02M
    ctxt->vctxt.error = xmlParserValidityError;
2943
1.02M
    ctxt->vctxt.warning = xmlParserValidityWarning;
2944
2945
1.02M
    ctxt->record_info = 0;
2946
1.02M
    ctxt->checkIndex = 0;
2947
1.02M
    ctxt->inSubset = 0;
2948
1.02M
    ctxt->errNo = XML_ERR_OK;
2949
1.02M
    ctxt->depth = 0;
2950
1.02M
    ctxt->catalogs = NULL;
2951
1.02M
    ctxt->sizeentities = 0;
2952
1.02M
    ctxt->sizeentcopy = 0;
2953
1.02M
    ctxt->input_id = 1;
2954
1.02M
    ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
2955
1.02M
    xmlInitNodeInfoSeq(&ctxt->node_seq);
2956
2957
1.02M
    if (ctxt->nsdb == NULL) {
2958
1.02M
        ctxt->nsdb = xmlParserNsCreate();
2959
1.02M
        if (ctxt->nsdb == NULL)
2960
50
            return(-1);
2961
1.02M
    }
2962
2963
1.02M
    return(0);
2964
1.02M
}
2965
2966
/**
2967
 * xmlInitParserCtxt:
2968
 * @ctxt:  an XML parser context
2969
 *
2970
 * DEPRECATED: Internal function which will be made private in a future
2971
 * version.
2972
 *
2973
 * Initialize a parser context
2974
 *
2975
 * Returns 0 in case of success and -1 in case of error
2976
 */
2977
2978
int
2979
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2980
0
{
2981
0
    return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
2982
0
}
2983
2984
/**
2985
 * xmlFreeParserCtxt:
2986
 * @ctxt:  an XML parser context
2987
 *
2988
 * Free all the memory used by a parser context. However the parsed
2989
 * document in ctxt->myDoc is not freed.
2990
 */
2991
2992
void
2993
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2994
1.02M
{
2995
1.02M
    xmlParserInputPtr input;
2996
2997
1.02M
    if (ctxt == NULL) return;
2998
2999
1.02M
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
3000
0
        xmlFreeInputStream(input);
3001
0
    }
3002
1.02M
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
3003
1.02M
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
3004
1.02M
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
3005
1.02M
    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
3006
1.02M
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
3007
1.02M
    if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
3008
1.02M
    if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
3009
1.02M
    if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
3010
1.02M
    if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
3011
#ifdef LIBXML_SAX1_ENABLED
3012
    if ((ctxt->sax != NULL) &&
3013
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
3014
#else
3015
1.02M
    if (ctxt->sax != NULL)
3016
1.02M
#endif /* LIBXML_SAX1_ENABLED */
3017
1.02M
        xmlFree(ctxt->sax);
3018
1.02M
    if (ctxt->directory != NULL) xmlFree(ctxt->directory);
3019
1.02M
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
3020
1.02M
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
3021
1.02M
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
3022
1.02M
    if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
3023
1.02M
    if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
3024
1.02M
    if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
3025
1.02M
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
3026
1.02M
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
3027
1.02M
    if (ctxt->attsDefault != NULL)
3028
16.2k
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
3029
1.02M
    if (ctxt->attsSpecial != NULL)
3030
18.8k
        xmlHashFree(ctxt->attsSpecial, NULL);
3031
1.02M
    if (ctxt->freeElems != NULL) {
3032
0
        xmlNodePtr cur, next;
3033
3034
0
  cur = ctxt->freeElems;
3035
0
  while (cur != NULL) {
3036
0
      next = cur->next;
3037
0
      xmlFree(cur);
3038
0
      cur = next;
3039
0
  }
3040
0
    }
3041
1.02M
    if (ctxt->freeAttrs != NULL) {
3042
0
        xmlAttrPtr cur, next;
3043
3044
0
  cur = ctxt->freeAttrs;
3045
0
  while (cur != NULL) {
3046
0
      next = cur->next;
3047
0
      xmlFree(cur);
3048
0
      cur = next;
3049
0
  }
3050
0
    }
3051
    /*
3052
     * cleanup the error strings
3053
     */
3054
1.02M
    if (ctxt->lastError.message != NULL)
3055
439k
        xmlFree(ctxt->lastError.message);
3056
1.02M
    if (ctxt->lastError.file != NULL)
3057
40.8k
        xmlFree(ctxt->lastError.file);
3058
1.02M
    if (ctxt->lastError.str1 != NULL)
3059
273k
        xmlFree(ctxt->lastError.str1);
3060
1.02M
    if (ctxt->lastError.str2 != NULL)
3061
156k
        xmlFree(ctxt->lastError.str2);
3062
1.02M
    if (ctxt->lastError.str3 != NULL)
3063
12.3k
        xmlFree(ctxt->lastError.str3);
3064
3065
1.02M
#ifdef LIBXML_CATALOG_ENABLED
3066
1.02M
    if (ctxt->catalogs != NULL)
3067
0
  xmlCatalogFreeLocal(ctxt->catalogs);
3068
1.02M
#endif
3069
1.02M
    xmlFree(ctxt);
3070
1.02M
}
3071
3072
/**
3073
 * xmlNewParserCtxt:
3074
 *
3075
 * Allocate and initialize a new parser context.
3076
 *
3077
 * Returns the xmlParserCtxtPtr or NULL
3078
 */
3079
3080
xmlParserCtxtPtr
3081
xmlNewParserCtxt(void)
3082
1.02M
{
3083
1.02M
    return(xmlNewSAXParserCtxt(NULL, NULL));
3084
1.02M
}
3085
3086
/**
3087
 * xmlNewSAXParserCtxt:
3088
 * @sax:  SAX handler
3089
 * @userData:  user data
3090
 *
3091
 * Allocate and initialize a new SAX parser context. If userData is NULL,
3092
 * the parser context will be passed as user data.
3093
 *
3094
 * Available since 2.11.0. If you want support older versions,
3095
 * it's best to invoke xmlNewParserCtxt and set ctxt->sax with
3096
 * struct assignment.
3097
 *
3098
 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
3099
 */
3100
3101
xmlParserCtxtPtr
3102
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
3103
1.02M
{
3104
1.02M
    xmlParserCtxtPtr ctxt;
3105
3106
1.02M
    xmlInitParser();
3107
3108
1.02M
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
3109
1.02M
    if (ctxt == NULL)
3110
43
  return(NULL);
3111
1.02M
    memset(ctxt, 0, sizeof(xmlParserCtxt));
3112
1.02M
    if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
3113
299
        xmlFreeParserCtxt(ctxt);
3114
299
  return(NULL);
3115
299
    }
3116
1.02M
    return(ctxt);
3117
1.02M
}
3118
3119
/**
3120
 * xmlCtxtGetPrivate:
3121
 * @ctxt:  parser context
3122
 *
3123
 * Available since 2.14.0.
3124
 *
3125
 * Returns the private application data.
3126
 */
3127
void *
3128
0
xmlCtxtGetPrivate(xmlParserCtxtPtr ctxt) {
3129
0
    if (ctxt == NULL)
3130
0
        return(NULL);
3131
3132
0
    return(ctxt->_private);
3133
0
}
3134
3135
/**
3136
 * xmlCtxtSetPrivate:
3137
 * @ctxt:  parser context
3138
 * @priv:  private application data
3139
 *
3140
 * Available since 2.14.0.
3141
 *
3142
 * Set the private application data.
3143
 */
3144
void
3145
0
xmlCtxtSetPrivate(xmlParserCtxtPtr ctxt, void *priv) {
3146
0
    if (ctxt == NULL)
3147
0
        return;
3148
3149
0
    ctxt->_private = priv;
3150
0
}
3151
3152
/**
3153
 * xmlCtxtGetCatalogs:
3154
 * @ctxt:  parser context
3155
 *
3156
 * Available since 2.14.0.
3157
 *
3158
 * Returns the local catalogs.
3159
 */
3160
void *
3161
0
xmlCtxtGetCatalogs(xmlParserCtxtPtr ctxt) {
3162
0
    if (ctxt == NULL)
3163
0
        return(NULL);
3164
3165
0
    return(ctxt->catalogs);
3166
0
}
3167
3168
/**
3169
 * xmlCtxtSetCatalogs:
3170
 * @ctxt:  parser context
3171
 * @catalogs:  catalogs pointer
3172
 *
3173
 * Available since 2.14.0.
3174
 *
3175
 * Set the local catalogs.
3176
 */
3177
void
3178
0
xmlCtxtSetCatalogs(xmlParserCtxtPtr ctxt, void *catalogs) {
3179
0
    if (ctxt == NULL)
3180
0
        return;
3181
3182
0
    ctxt->catalogs = catalogs;
3183
0
}
3184
3185
/**
3186
 * xmlCtxtGetDict:
3187
 * @ctxt:  parser context
3188
 *
3189
 * Available since 2.14.0.
3190
 *
3191
 * Returns the dictionary.
3192
 */
3193
xmlDictPtr
3194
0
xmlCtxtGetDict(xmlParserCtxtPtr ctxt) {
3195
0
    if (ctxt == NULL)
3196
0
        return(NULL);
3197
3198
0
    return(ctxt->dict);
3199
0
}
3200
3201
/**
3202
 * xmlCtxtSetDict:
3203
 * @ctxt:  parser context
3204
 * @dict:  dictionary
3205
 *
3206
 * Available since 2.14.0.
3207
 *
3208
 * Set the dictionary. This should only be done immediately after
3209
 * creating a parser context.
3210
 */
3211
void
3212
0
xmlCtxtSetDict(xmlParserCtxtPtr ctxt, xmlDictPtr dict) {
3213
0
    if (ctxt == NULL)
3214
0
        return;
3215
3216
0
    if (ctxt->dict != NULL)
3217
0
        xmlDictFree(ctxt->dict);
3218
3219
0
    xmlDictReference(dict);
3220
0
    ctxt->dict = dict;
3221
0
}
3222
3223
/**
3224
 * xmlCtxtGetSaxHandler:
3225
 * @ctxt:  parser context
3226
 *
3227
 * Available since 2.14.0.
3228
 *
3229
 * Returns the SAX handler struct. This is not a copy and must not
3230
 * be freed. Handlers can be updated.
3231
 */
3232
xmlSAXHandler *
3233
0
xmlCtxtGetSaxHandler(xmlParserCtxtPtr ctxt) {
3234
0
    if (ctxt == NULL)
3235
0
        return(NULL);
3236
3237
0
    return(ctxt->sax);
3238
0
}
3239
3240
/**
3241
 * xmlCtxtSetSaxHandler:
3242
 * @ctxt:  parser context
3243
 * @sax:  SAX handler
3244
 *
3245
 * Available since 2.14.0.
3246
 *
3247
 * Set the SAX handler struct to a copy of @sax.
3248
 *
3249
 * Returns 0 on success or -1 if arguments are invalid or a memory
3250
 * allocation failed.
3251
 */
3252
int
3253
0
xmlCtxtSetSaxHandler(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax) {
3254
0
    xmlSAXHandler *copy;
3255
3256
0
    if ((ctxt == NULL) || (sax == NULL))
3257
0
        return(-1);
3258
3259
0
    copy = xmlMalloc(sizeof(*copy));
3260
0
    if (copy == NULL)
3261
0
        return(-1);
3262
3263
0
    memcpy(copy, sax, sizeof(*copy));
3264
0
    ctxt->sax = copy;
3265
3266
0
    return(0);
3267
0
}
3268
3269
/**
3270
 * xmlCtxtGetDocument:
3271
 * @ctxt:  parser context
3272
 *
3273
 * Available since 2.14.0.
3274
 *
3275
 * Returns the parsed document or NULL if a fatal error occurred when
3276
 * parsing. The document must be freed by the caller. Resets the
3277
 * context's document to NULL.
3278
 */
3279
xmlDocPtr
3280
858k
xmlCtxtGetDocument(xmlParserCtxtPtr ctxt) {
3281
858k
    xmlDocPtr doc;
3282
3283
858k
    if (ctxt == NULL)
3284
0
        return(NULL);
3285
3286
858k
    if ((ctxt->wellFormed) ||
3287
858k
        (((ctxt->recovery) || (ctxt->html)) &&
3288
616k
         (!xmlCtxtIsCatastrophicError(ctxt)))) {
3289
616k
        doc = ctxt->myDoc;
3290
616k
    } else {
3291
242k
        if (ctxt->errNo == XML_ERR_OK)
3292
0
            xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error");
3293
242k
        doc = NULL;
3294
242k
        xmlFreeDoc(ctxt->myDoc);
3295
242k
    }
3296
858k
    ctxt->myDoc = NULL;
3297
3298
858k
    return(doc);
3299
858k
}
3300
3301
/**
3302
 * xmlCtxtIsHtml:
3303
 * @ctxt:  parser context
3304
 *
3305
 * Available since 2.14.0.
3306
 *
3307
 * Returns 1 if this is a HTML parser context, 0 otherwise.
3308
 */
3309
int
3310
0
xmlCtxtIsHtml(xmlParserCtxtPtr ctxt) {
3311
0
    if (ctxt == NULL)
3312
0
        return(0);
3313
3314
0
    return(ctxt->html ? 1 : 0);
3315
0
}
3316
3317
/**
3318
 * xmlCtxtIsStopped:
3319
 * @ctxt:  parser context
3320
 *
3321
 * Available since 2.14.0.
3322
 *
3323
 * Returns 1 if the parser is stopped, 0 otherwise.
3324
 */
3325
int
3326
0
xmlCtxtIsStopped(xmlParserCtxtPtr ctxt) {
3327
0
    if (ctxt == NULL)
3328
0
        return(0);
3329
3330
0
    return(PARSER_STOPPED(ctxt));
3331
0
}
3332
3333
#ifdef LIBXML_VALID_ENABLED
3334
/**
3335
 * xmlCtxtGetValidCtxt:
3336
 * @ctxt:  parser context
3337
 *
3338
 * Available since 2.14.0.
3339
 *
3340
 * Returns the validation context.
3341
 */
3342
xmlValidCtxtPtr
3343
xmlCtxtGetValidCtxt(xmlParserCtxtPtr ctxt) {
3344
    if (ctxt == NULL)
3345
        return(NULL);
3346
3347
    return(&ctxt->vctxt);
3348
}
3349
#endif
3350
3351
/************************************************************************
3352
 *                  *
3353
 *    Handling of node information        *
3354
 *                  *
3355
 ************************************************************************/
3356
3357
/**
3358
 * xmlClearParserCtxt:
3359
 * @ctxt:  an XML parser context
3360
 *
3361
 * Clear (release owned resources) and reinitialize a parser context
3362
 */
3363
3364
void
3365
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
3366
0
{
3367
0
  if (ctxt==NULL)
3368
0
    return;
3369
0
  xmlClearNodeInfoSeq(&ctxt->node_seq);
3370
0
  xmlCtxtReset(ctxt);
3371
0
}
3372
3373
3374
/**
3375
 * xmlParserFindNodeInfo:
3376
 * @ctx:  an XML parser context
3377
 * @node:  an XML node within the tree
3378
 *
3379
 * DEPRECATED: Don't use.
3380
 *
3381
 * Find the parser node info struct for a given node
3382
 *
3383
 * Returns an xmlParserNodeInfo block pointer or NULL
3384
 */
3385
const xmlParserNodeInfo *
3386
xmlParserFindNodeInfo(xmlParserCtxtPtr ctx, xmlNodePtr node)
3387
0
{
3388
0
    unsigned long pos;
3389
3390
0
    if ((ctx == NULL) || (node == NULL))
3391
0
        return (NULL);
3392
    /* Find position where node should be at */
3393
0
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
3394
0
    if (pos < ctx->node_seq.length
3395
0
        && ctx->node_seq.buffer[pos].node == node)
3396
0
        return &ctx->node_seq.buffer[pos];
3397
0
    else
3398
0
        return NULL;
3399
0
}
3400
3401
3402
/**
3403
 * xmlInitNodeInfoSeq:
3404
 * @seq:  a node info sequence pointer
3405
 *
3406
 * DEPRECATED: Don't use.
3407
 *
3408
 * -- Initialize (set to initial state) node info sequence
3409
 */
3410
void
3411
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3412
1.02M
{
3413
1.02M
    if (seq == NULL)
3414
0
        return;
3415
1.02M
    seq->length = 0;
3416
1.02M
    seq->maximum = 0;
3417
1.02M
    seq->buffer = NULL;
3418
1.02M
}
3419
3420
/**
3421
 * xmlClearNodeInfoSeq:
3422
 * @seq:  a node info sequence pointer
3423
 *
3424
 * DEPRECATED: Don't use.
3425
 *
3426
 * -- Clear (release memory and reinitialize) node
3427
 *   info sequence
3428
 */
3429
void
3430
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3431
0
{
3432
0
    if (seq == NULL)
3433
0
        return;
3434
0
    if (seq->buffer != NULL)
3435
0
        xmlFree(seq->buffer);
3436
0
    xmlInitNodeInfoSeq(seq);
3437
0
}
3438
3439
/**
3440
 * xmlParserFindNodeInfoIndex:
3441
 * @seq:  a node info sequence pointer
3442
 * @node:  an XML node pointer
3443
 *
3444
 * DEPRECATED: Don't use.
3445
 *
3446
 * xmlParserFindNodeInfoIndex : Find the index that the info record for
3447
 *   the given node is or should be at in a sorted sequence
3448
 *
3449
 * Returns a long indicating the position of the record
3450
 */
3451
unsigned long
3452
xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeqPtr seq,
3453
                           xmlNodePtr node)
3454
0
{
3455
0
    unsigned long upper, lower, middle;
3456
0
    int found = 0;
3457
3458
0
    if ((seq == NULL) || (node == NULL))
3459
0
        return ((unsigned long) -1);
3460
3461
    /* Do a binary search for the key */
3462
0
    lower = 1;
3463
0
    upper = seq->length;
3464
0
    middle = 0;
3465
0
    while (lower <= upper && !found) {
3466
0
        middle = lower + (upper - lower) / 2;
3467
0
        if (node == seq->buffer[middle - 1].node)
3468
0
            found = 1;
3469
0
        else if (node < seq->buffer[middle - 1].node)
3470
0
            upper = middle - 1;
3471
0
        else
3472
0
            lower = middle + 1;
3473
0
    }
3474
3475
    /* Return position */
3476
0
    if (middle == 0 || seq->buffer[middle - 1].node < node)
3477
0
        return middle;
3478
0
    else
3479
0
        return middle - 1;
3480
0
}
3481
3482
3483
/**
3484
 * xmlParserAddNodeInfo:
3485
 * @ctxt:  an XML parser context
3486
 * @info:  a node info sequence pointer
3487
 *
3488
 * DEPRECATED: Don't use.
3489
 *
3490
 * Insert node info record into the sorted sequence
3491
 */
3492
void
3493
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
3494
                     xmlParserNodeInfoPtr info)
3495
0
{
3496
0
    unsigned long pos;
3497
3498
0
    if ((ctxt == NULL) || (info == NULL)) return;
3499
3500
    /* Find pos and check to see if node is already in the sequence */
3501
0
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
3502
0
                                     info->node);
3503
3504
0
    if ((pos < ctxt->node_seq.length) &&
3505
0
        (ctxt->node_seq.buffer != NULL) &&
3506
0
        (ctxt->node_seq.buffer[pos].node == info->node)) {
3507
0
        ctxt->node_seq.buffer[pos] = *info;
3508
0
    }
3509
3510
    /* Otherwise, we need to add new node to buffer */
3511
0
    else {
3512
0
        if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
3513
0
            xmlParserNodeInfo *tmp;
3514
0
            int newSize;
3515
3516
0
            newSize = xmlGrowCapacity(ctxt->node_seq.maximum, sizeof(tmp[0]),
3517
0
                                      4, XML_MAX_ITEMS);
3518
0
            if (newSize < 0) {
3519
0
    xmlCtxtErrMemory(ctxt);
3520
0
                return;
3521
0
            }
3522
0
            tmp = xmlRealloc(ctxt->node_seq.buffer, newSize * sizeof(tmp[0]));
3523
0
            if (tmp == NULL) {
3524
0
    xmlCtxtErrMemory(ctxt);
3525
0
                return;
3526
0
            }
3527
0
            ctxt->node_seq.buffer = tmp;
3528
0
            ctxt->node_seq.maximum = newSize;
3529
0
        }
3530
3531
        /* If position is not at end, move elements out of the way */
3532
0
        if (pos != ctxt->node_seq.length) {
3533
0
            unsigned long i;
3534
3535
0
            for (i = ctxt->node_seq.length; i > pos; i--)
3536
0
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
3537
0
        }
3538
3539
        /* Copy element and increase length */
3540
0
        ctxt->node_seq.buffer[pos] = *info;
3541
0
        ctxt->node_seq.length++;
3542
0
    }
3543
0
}
3544
3545
/************************************************************************
3546
 *                  *
3547
 *    Defaults settings         *
3548
 *                  *
3549
 ************************************************************************/
3550
/**
3551
 * xmlPedanticParserDefault:
3552
 * @val:  int 0 or 1
3553
 *
3554
 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
3555
 *
3556
 * Set and return the previous value for enabling pedantic warnings.
3557
 *
3558
 * Returns the last value for 0 for no substitution, 1 for substitution.
3559
 */
3560
3561
int
3562
0
xmlPedanticParserDefault(int val) {
3563
0
    int old = xmlPedanticParserDefaultValue;
3564
3565
0
    xmlPedanticParserDefaultValue = val;
3566
0
    return(old);
3567
0
}
3568
3569
/**
3570
 * xmlLineNumbersDefault:
3571
 * @val:  int 0 or 1
3572
 *
3573
 * DEPRECATED: The modern options API always enables line numbers.
3574
 *
3575
 * Set and return the previous value for enabling line numbers in elements
3576
 * contents. This may break on old application and is turned off by default.
3577
 *
3578
 * Returns the last value for 0 for no substitution, 1 for substitution.
3579
 */
3580
3581
int
3582
0
xmlLineNumbersDefault(int val) {
3583
0
    int old = xmlLineNumbersDefaultValue;
3584
3585
0
    xmlLineNumbersDefaultValue = val;
3586
0
    return(old);
3587
0
}
3588
3589
/**
3590
 * xmlSubstituteEntitiesDefault:
3591
 * @val:  int 0 or 1
3592
 *
3593
 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
3594
 *
3595
 * Set and return the previous value for default entity support.
3596
 * Initially the parser always keep entity references instead of substituting
3597
 * entity values in the output. This function has to be used to change the
3598
 * default parser behavior
3599
 * SAX::substituteEntities() has to be used for changing that on a file by
3600
 * file basis.
3601
 *
3602
 * Returns the last value for 0 for no substitution, 1 for substitution.
3603
 */
3604
3605
int
3606
0
xmlSubstituteEntitiesDefault(int val) {
3607
0
    int old = xmlSubstituteEntitiesDefaultValue;
3608
3609
0
    xmlSubstituteEntitiesDefaultValue = val;
3610
0
    return(old);
3611
0
}
3612
3613
/**
3614
 * xmlKeepBlanksDefault:
3615
 * @val:  int 0 or 1
3616
 *
3617
 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
3618
 *
3619
 * Set and return the previous value for default blanks text nodes support.
3620
 * The 1.x version of the parser used an heuristic to try to detect
3621
 * ignorable white spaces. As a result the SAX callback was generating
3622
 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
3623
 * using the DOM output text nodes containing those blanks were not generated.
3624
 * The 2.x and later version will switch to the XML standard way and
3625
 * ignorableWhitespace() are only generated when running the parser in
3626
 * validating mode and when the current element doesn't allow CDATA or
3627
 * mixed content.
3628
 * This function is provided as a way to force the standard behavior
3629
 * on 1.X libs and to switch back to the old mode for compatibility when
3630
 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
3631
 * by using xmlIsBlankNode() commodity function to detect the "empty"
3632
 * nodes generated.
3633
 * This value also affect autogeneration of indentation when saving code
3634
 * if blanks sections are kept, indentation is not generated.
3635
 *
3636
 * Returns the last value for 0 for no substitution, 1 for substitution.
3637
 */
3638
3639
int
3640
0
xmlKeepBlanksDefault(int val) {
3641
0
    int old = xmlKeepBlanksDefaultValue;
3642
3643
0
    xmlKeepBlanksDefaultValue = val;
3644
0
#ifdef LIBXML_OUTPUT_ENABLED
3645
0
    if (!val)
3646
0
        xmlIndentTreeOutput = 1;
3647
0
#endif
3648
0
    return(old);
3649
0
}
3650