Coverage Report

Created: 2024-09-06 07:53

/src/libxml2/parserInternals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * daniel@veillard.com
8
 */
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22
23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/entities.h>
28
#include <libxml/xmlerror.h>
29
#include <libxml/encoding.h>
30
#include <libxml/xmlIO.h>
31
#include <libxml/uri.h>
32
#include <libxml/dict.h>
33
#include <libxml/xmlsave.h>
34
#ifdef LIBXML_CATALOG_ENABLED
35
#include <libxml/catalog.h>
36
#endif
37
#include <libxml/chvalid.h>
38
#include <libxml/nanohttp.h>
39
40
#define CUR(ctxt) ctxt->input->cur
41
#define END(ctxt) ctxt->input->end
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
#include "private/io.h"
47
#include "private/parser.h"
48
49
20.8M
#define XML_MAX_ERRORS 100
50
51
/*
52
 * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
53
 * factor of serialized output after entity expansion.
54
 */
55
20.2k
#define XML_MAX_AMPLIFICATION_DEFAULT 5
56
57
/*
58
 * Various global defaults for parsing
59
 */
60
61
/**
62
 * xmlCheckVersion:
63
 * @version: the include version number
64
 *
65
 * check the compiled lib version against the include one.
66
 */
67
void
68
20.2k
xmlCheckVersion(int version) {
69
20.2k
    int myversion = LIBXML_VERSION;
70
71
20.2k
    xmlInitParser();
72
73
20.2k
    if ((myversion / 10000) != (version / 10000)) {
74
0
  xmlPrintErrorMessage(
75
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
76
0
    (version / 10000), (myversion / 10000));
77
20.2k
    } else if ((myversion / 100) < (version / 100)) {
78
0
  xmlPrintErrorMessage(
79
0
    "Warning: program compiled against libxml %d using older %d\n",
80
0
    (version / 100), (myversion / 100));
81
0
    }
82
20.2k
}
83
84
85
/************************************************************************
86
 *                  *
87
 *    Some factorized error routines        *
88
 *                  *
89
 ************************************************************************/
90
91
92
/**
93
 * xmlCtxtSetErrorHandler:
94
 * @ctxt:  an XML parser context
95
 * @handler:  error handler
96
 * @data:  data for error handler
97
 *
98
 * Register a callback function that will be called on errors and
99
 * warnings. If handler is NULL, the error handler will be deactivated.
100
 *
101
 * This is the recommended way to collect errors from the parser and
102
 * takes precedence over all other error reporting mechanisms.
103
 * These are (in order of precedence):
104
 *
105
 * - per-context structured handler (xmlCtxtSetErrorHandler)
106
 * - per-context structured "serror" SAX handler
107
 * - global structured handler (xmlSetStructuredErrorFunc)
108
 * - per-context generic "error" and "warning" SAX handlers
109
 * - global generic handler (xmlSetGenericErrorFunc)
110
 * - print to stderr
111
 *
112
 * Available since 2.13.0.
113
 */
114
void
115
xmlCtxtSetErrorHandler(xmlParserCtxtPtr ctxt, xmlStructuredErrorFunc handler,
116
                       void *data)
117
0
{
118
0
    if (ctxt == NULL)
119
0
        return;
120
0
    ctxt->errorHandler = handler;
121
0
    ctxt->errorCtxt = data;
122
0
}
123
124
/**
125
 * xmlCtxtGetLastError:
126
 * @ctx:  an XML parser context
127
 *
128
 * Get the last parsing error registered.
129
 *
130
 * Returns NULL if no error occurred or a pointer to the error
131
 */
132
const xmlError *
133
xmlCtxtGetLastError(void *ctx)
134
0
{
135
0
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
136
137
0
    if (ctxt == NULL)
138
0
        return (NULL);
139
0
    if (ctxt->lastError.code == XML_ERR_OK)
140
0
        return (NULL);
141
0
    return (&ctxt->lastError);
142
0
}
143
144
/**
145
 * xmlCtxtResetLastError:
146
 * @ctx:  an XML parser context
147
 *
148
 * Cleanup the last global error registered. For parsing error
149
 * this does not change the well-formedness result.
150
 */
151
void
152
xmlCtxtResetLastError(void *ctx)
153
0
{
154
0
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
155
156
0
    if (ctxt == NULL)
157
0
        return;
158
0
    ctxt->errNo = XML_ERR_OK;
159
0
    if (ctxt->lastError.code == XML_ERR_OK)
160
0
        return;
161
0
    xmlResetError(&ctxt->lastError);
162
0
}
163
164
/**
165
 * xmlCtxtErrMemory:
166
 * @ctxt:  an XML parser context
167
 *
168
 * Handle an out-of-memory error.
169
 *
170
 * Available since 2.13.0.
171
 */
172
void
173
xmlCtxtErrMemory(xmlParserCtxtPtr ctxt)
174
0
{
175
0
    xmlStructuredErrorFunc schannel = NULL;
176
0
    xmlGenericErrorFunc channel = NULL;
177
0
    void *data;
178
179
0
    if (ctxt == NULL)
180
0
        return;
181
182
0
    ctxt->errNo = XML_ERR_NO_MEMORY;
183
0
    ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
184
0
    ctxt->wellFormed = 0;
185
0
    ctxt->disableSAX = 2;
186
187
0
    if (ctxt->errorHandler) {
188
0
        schannel = ctxt->errorHandler;
189
0
        data = ctxt->errorCtxt;
190
0
    } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
191
0
        (ctxt->sax->serror != NULL)) {
192
0
        schannel = ctxt->sax->serror;
193
0
        data = ctxt->userData;
194
0
    } else {
195
0
        channel = ctxt->sax->error;
196
0
        data = ctxt->userData;
197
0
    }
198
199
0
    xmlRaiseMemoryError(schannel, channel, data, XML_FROM_PARSER,
200
0
                        &ctxt->lastError);
201
0
}
202
203
/**
204
 * xmlCtxtErrIO:
205
 * @ctxt:  parser context
206
 * @code:  xmlParserErrors code
207
 * @uri:  filename or URI (optional)
208
 *
209
 * If filename is empty, use the one from context input if available.
210
 *
211
 * Report an IO error to the parser context.
212
 */
213
void
214
xmlCtxtErrIO(xmlParserCtxtPtr ctxt, int code, const char *uri)
215
5.62k
{
216
5.62k
    const char *errstr, *msg, *str1, *str2;
217
5.62k
    xmlErrorLevel level;
218
219
5.62k
    if (ctxt == NULL)
220
0
        return;
221
222
5.62k
    if (((code == XML_IO_ENOENT) ||
223
5.62k
         (code == XML_IO_UNKNOWN))) {
224
        /*
225
         * Only report a warning if a file could not be found. This should
226
         * only be done for external entities, but the external entity loader
227
         * of xsltproc can try multiple paths and assumes that ENOENT doesn't
228
         * raise an error and aborts parsing.
229
         */
230
0
        if (ctxt->validate == 0)
231
0
            level = XML_ERR_WARNING;
232
0
        else
233
0
            level = XML_ERR_ERROR;
234
5.62k
    } else if (code == XML_IO_NETWORK_ATTEMPT) {
235
0
        level = XML_ERR_ERROR;
236
5.62k
    } else {
237
5.62k
        level = XML_ERR_FATAL;
238
5.62k
    }
239
240
5.62k
    errstr = xmlErrString(code);
241
242
5.62k
    if (uri == NULL) {
243
5.62k
        msg = "%s\n";
244
5.62k
        str1 = errstr;
245
5.62k
        str2 = NULL;
246
5.62k
    } else {
247
0
        msg = "failed to load \"%s\": %s\n";
248
0
        str1 = uri;
249
0
        str2 = errstr;
250
0
    }
251
252
5.62k
    xmlCtxtErr(ctxt, NULL, XML_FROM_IO, code, level,
253
5.62k
               (const xmlChar *) uri, NULL, NULL, 0,
254
5.62k
               msg, str1, str2);
255
5.62k
}
256
257
static int
258
20.4M
xmlCtxtIsCatastrophicError(xmlParserCtxtPtr ctxt) {
259
20.4M
    int fatal = 0;
260
20.4M
    int code;
261
262
20.4M
    if (ctxt == NULL)
263
0
        return(1);
264
265
20.4M
    if (ctxt->lastError.level != XML_ERR_FATAL)
266
2.90M
        return(0);
267
268
17.5M
    code = ctxt->lastError.code;
269
270
17.5M
    switch (code) {
271
0
        case XML_ERR_NO_MEMORY:
272
63
        case XML_ERR_RESOURCE_LIMIT:
273
63
        case XML_ERR_SYSTEM:
274
63
        case XML_ERR_ARGUMENT:
275
63
        case XML_ERR_INTERNAL_ERROR:
276
63
            fatal = 1;
277
63
            break;
278
17.5M
        default:
279
17.5M
            if ((code >= 1500) && (code <= 1599))
280
0
                fatal = 1;
281
17.5M
            break;
282
17.5M
    }
283
284
17.5M
    return(fatal);
285
17.5M
}
286
287
/**
288
 * xmlCtxtVErr:
289
 * @ctxt:  a parser context
290
 * @node: the current node or NULL
291
 * @domain: the domain for the error
292
 * @code: the code for the error
293
 * @level: the xmlErrorLevel for the error
294
 * @str1: extra string info
295
 * @str2: extra string info
296
 * @str3: extra string info
297
 * @int1: extra int info
298
 * @msg:  the message to display/transmit
299
 * @ap:  extra parameters for the message display
300
 *
301
 * Raise a parser error.
302
 */
303
void
304
xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
305
            xmlParserErrors code, xmlErrorLevel level,
306
            const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
307
            int int1, const char *msg, va_list ap)
308
20.8M
{
309
20.8M
    xmlStructuredErrorFunc schannel = NULL;
310
20.8M
    xmlGenericErrorFunc channel = NULL;
311
20.8M
    void *data = NULL;
312
20.8M
    const char *file = NULL;
313
20.8M
    int line = 0;
314
20.8M
    int col = 0;
315
20.8M
    int res;
316
317
20.8M
    if (code == XML_ERR_NO_MEMORY) {
318
0
        xmlCtxtErrMemory(ctxt);
319
0
        return;
320
0
    }
321
322
20.8M
    if (ctxt == NULL)
323
0
        return;
324
325
20.8M
    if (PARSER_STOPPED(ctxt))
326
1.12k
  return;
327
328
20.8M
    if (level == XML_ERR_WARNING) {
329
56.1k
        if (ctxt->nbWarnings >= XML_MAX_ERRORS)
330
20.0k
            goto done;
331
36.1k
        ctxt->nbWarnings += 1;
332
20.8M
    } else {
333
        /* Report at least one fatal error. */
334
20.8M
        if ((ctxt->nbErrors >= XML_MAX_ERRORS) &&
335
20.8M
            ((level < XML_ERR_FATAL) || (ctxt->wellFormed == 0)))
336
20.3M
            goto done;
337
419k
        ctxt->nbErrors += 1;
338
419k
    }
339
340
456k
    if (((ctxt->options & XML_PARSE_NOERROR) == 0) &&
341
456k
        ((level != XML_ERR_WARNING) ||
342
456k
         ((ctxt->options & XML_PARSE_NOWARNING) == 0))) {
343
456k
        if (ctxt->errorHandler) {
344
0
            schannel = ctxt->errorHandler;
345
0
            data = ctxt->errorCtxt;
346
456k
        } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
347
456k
            (ctxt->sax->serror != NULL)) {
348
0
            schannel = ctxt->sax->serror;
349
0
            data = ctxt->userData;
350
456k
        } else if ((domain == XML_FROM_VALID) || (domain == XML_FROM_DTD)) {
351
62.6k
            if (level == XML_ERR_WARNING)
352
11.2k
                channel = ctxt->vctxt.warning;
353
51.3k
            else
354
51.3k
                channel = ctxt->vctxt.error;
355
62.6k
            data = ctxt->vctxt.userData;
356
393k
        } else {
357
393k
            if (level == XML_ERR_WARNING)
358
24.9k
                channel = ctxt->sax->warning;
359
368k
            else
360
368k
                channel = ctxt->sax->error;
361
393k
            data = ctxt->userData;
362
393k
        }
363
456k
    }
364
365
456k
    if (ctxt->input != NULL) {
366
456k
        xmlParserInputPtr input = ctxt->input;
367
368
456k
        if ((input->filename == NULL) &&
369
456k
            (ctxt->inputNr > 1)) {
370
36.6k
            input = ctxt->inputTab[ctxt->inputNr - 2];
371
36.6k
        }
372
456k
        file = input->filename;
373
456k
        line = input->line;
374
456k
        col = input->col;
375
456k
    }
376
377
456k
    res = xmlVRaiseError(schannel, channel, data, ctxt, node, domain, code,
378
456k
                         level, file, line, (const char *) str1,
379
456k
                         (const char *) str2, (const char *) str3, int1, col,
380
456k
                         msg, ap);
381
382
456k
    if (res < 0) {
383
0
        xmlCtxtErrMemory(ctxt);
384
0
        return;
385
0
    }
386
387
20.8M
done:
388
20.8M
    if (level >= XML_ERR_ERROR)
389
20.8M
        ctxt->errNo = code;
390
20.8M
    if (level == XML_ERR_FATAL) {
391
20.4M
        ctxt->wellFormed = 0;
392
393
20.4M
        if (xmlCtxtIsCatastrophicError(ctxt))
394
63
            ctxt->disableSAX = 2; /* stop parser */
395
20.4M
        else if (ctxt->recovery == 0)
396
20.4M
            ctxt->disableSAX = 1;
397
20.4M
    }
398
20.8M
}
399
400
/**
401
 * xmlCtxtErr:
402
 * @ctxt:  a parser context
403
 * @node: the current node or NULL
404
 * @domain: the domain for the error
405
 * @code: the code for the error
406
 * @level: the xmlErrorLevel for the error
407
 * @str1: extra string info
408
 * @str2: extra string info
409
 * @str3: extra string info
410
 * @int1: extra int info
411
 * @msg:  the message to display/transmit
412
 * @...:  extra parameters for the message display
413
 *
414
 * Raise a parser error.
415
 */
416
void
417
xmlCtxtErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
418
           xmlParserErrors code, xmlErrorLevel level,
419
           const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
420
           int int1, const char *msg, ...)
421
20.7M
{
422
20.7M
    va_list ap;
423
424
20.7M
    va_start(ap, msg);
425
20.7M
    xmlCtxtVErr(ctxt, node, domain, code, level,
426
20.7M
                str1, str2, str3, int1, msg, ap);
427
20.7M
    va_end(ap);
428
20.7M
}
429
430
/**
431
 * xmlCtxtGetStatus:
432
 * @ctxt:  an XML parser context
433
 *
434
 * Get well-formedness and validation status after parsing. Also
435
 * reports catastrophic errors which are not related to parsing
436
 * like out-of-memory, I/O or other errors.
437
 *
438
 * Available since 2.14.0.
439
 *
440
 * Returns a bitmask of XML_STATUS_* flags ORed together.
441
 */
442
int
443
0
xmlCtxtGetStatus(xmlParserCtxt *ctxt) {
444
0
    int bits = 0;
445
446
0
    if (xmlCtxtIsCatastrophicError(ctxt)) {
447
0
        bits |= XML_STATUS_CATASTROPHIC_ERROR |
448
0
                XML_STATUS_NOT_WELL_FORMED |
449
0
                XML_STATUS_NOT_NS_WELL_FORMED;
450
0
        if ((ctxt != NULL) && (ctxt->validate))
451
0
            bits |= XML_STATUS_DTD_VALIDATION_FAILED;
452
453
0
        return(bits);
454
0
    }
455
456
0
    if (!ctxt->wellFormed)
457
0
        bits |= XML_STATUS_NOT_WELL_FORMED;
458
0
    if (!ctxt->nsWellFormed)
459
0
        bits |= XML_STATUS_NOT_NS_WELL_FORMED;
460
0
    if ((ctxt->validate) && (!ctxt->valid))
461
0
        bits |= XML_STATUS_DTD_VALIDATION_FAILED;
462
463
0
    return(bits);
464
0
}
465
466
/**
467
 * xmlFatalErr:
468
 * @ctxt:  an XML parser context
469
 * @code:  the error number
470
 * @info:  extra information string
471
 *
472
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
473
 */
474
void
475
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors code, const char *info)
476
6.60M
{
477
6.60M
    const char *errmsg;
478
479
6.60M
    errmsg = xmlErrString(code);
480
481
6.60M
    if (info == NULL) {
482
481k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, XML_ERR_FATAL,
483
481k
                   NULL, NULL, NULL, 0, "%s\n", errmsg);
484
6.12M
    } else {
485
6.12M
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, XML_ERR_FATAL,
486
6.12M
                   (const xmlChar *) info, NULL, NULL, 0,
487
6.12M
                   "%s: %s\n", errmsg, info);
488
6.12M
    }
489
6.60M
}
490
491
/**
492
 * xmlIsLetter:
493
 * @c:  an unicode character (int)
494
 *
495
 * DEPRECATED: Internal function, don't use.
496
 *
497
 * Check whether the character is allowed by the production
498
 * [84] Letter ::= BaseChar | Ideographic
499
 *
500
 * Returns 0 if not, non-zero otherwise
501
 */
502
int
503
0
xmlIsLetter(int c) {
504
0
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
505
0
}
506
507
/************************************************************************
508
 *                  *
509
 *    Input handling functions for progressive parsing  *
510
 *                  *
511
 ************************************************************************/
512
513
/* we need to keep enough input to show errors in context */
514
50.8k
#define LINE_LEN        80
515
516
/**
517
 * xmlHaltParser:
518
 * @ctxt:  an XML parser context
519
 *
520
 * Blocks further parser processing don't override error
521
 * for internal use
522
 */
523
void
524
1.55k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
525
1.55k
    if (ctxt == NULL)
526
0
        return;
527
1.55k
    ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
528
1.55k
    ctxt->disableSAX = 2;
529
1.55k
}
530
531
/**
532
 * xmlParserInputRead:
533
 * @in:  an XML parser input
534
 * @len:  an indicative size for the lookahead
535
 *
536
 * DEPRECATED: This function was internal and is deprecated.
537
 *
538
 * Returns -1 as this is an error to use it.
539
 */
540
int
541
0
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
542
0
    return(-1);
543
0
}
544
545
/**
546
 * xmlParserGrow:
547
 * @ctxt:  an XML parser context
548
 *
549
 * Grow the input buffer.
550
 *
551
 * Returns the number of bytes read or -1 in case of error.
552
 */
553
int
554
2.27M
xmlParserGrow(xmlParserCtxtPtr ctxt) {
555
2.27M
    xmlParserInputPtr in = ctxt->input;
556
2.27M
    xmlParserInputBufferPtr buf = in->buf;
557
2.27M
    size_t curEnd = in->end - in->cur;
558
2.27M
    size_t curBase = in->cur - in->base;
559
2.27M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
560
0
                       XML_MAX_HUGE_LENGTH :
561
2.27M
                       XML_MAX_LOOKUP_LIMIT;
562
2.27M
    int ret;
563
564
2.27M
    if (buf == NULL)
565
0
        return(0);
566
    /* Don't grow push parser buffer. */
567
2.27M
    if (PARSER_PROGRESSIVE(ctxt))
568
0
        return(0);
569
    /* Don't grow memory buffers. */
570
2.27M
    if ((buf->encoder == NULL) && (buf->readcallback == NULL))
571
845k
        return(0);
572
1.42M
    if (buf->error != 0)
573
26.0k
        return(-1);
574
575
1.40M
    if (curBase > maxLength) {
576
609
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
577
609
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
578
609
        xmlHaltParser(ctxt);
579
609
  return(-1);
580
609
    }
581
582
1.40M
    if (curEnd >= INPUT_CHUNK)
583
22.0k
        return(0);
584
585
1.37M
    ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
586
1.37M
    xmlBufUpdateInput(buf->buffer, in, curBase);
587
588
1.37M
    if (ret < 0) {
589
190
        xmlCtxtErrIO(ctxt, buf->error, NULL);
590
190
    }
591
592
1.37M
    return(ret);
593
1.40M
}
594
595
/**
596
 * xmlParserInputGrow:
597
 * @in:  an XML parser input
598
 * @len:  an indicative size for the lookahead
599
 *
600
 * DEPRECATED: Don't use.
601
 *
602
 * This function increase the input for the parser. It tries to
603
 * preserve pointers to the input buffer, and keep already read data
604
 *
605
 * Returns the amount of char read, or -1 in case of error, 0 indicate the
606
 * end of this entity
607
 */
608
int
609
0
xmlParserInputGrow(xmlParserInputPtr in, int len) {
610
0
    int ret;
611
0
    size_t indx;
612
613
0
    if ((in == NULL) || (len < 0)) return(-1);
614
0
    if (in->buf == NULL) return(-1);
615
0
    if (in->base == NULL) return(-1);
616
0
    if (in->cur == NULL) return(-1);
617
0
    if (in->buf->buffer == NULL) return(-1);
618
619
    /* Don't grow memory buffers. */
620
0
    if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
621
0
        return(0);
622
623
0
    indx = in->cur - in->base;
624
0
    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
625
0
        return(0);
626
0
    }
627
0
    ret = xmlParserInputBufferGrow(in->buf, len);
628
629
0
    in->base = xmlBufContent(in->buf->buffer);
630
0
    if (in->base == NULL) {
631
0
        in->base = BAD_CAST "";
632
0
        in->cur = in->base;
633
0
        in->end = in->base;
634
0
        return(-1);
635
0
    }
636
0
    in->cur = in->base + indx;
637
0
    in->end = xmlBufEnd(in->buf->buffer);
638
639
0
    return(ret);
640
0
}
641
642
/**
643
 * xmlParserShrink:
644
 * @ctxt:  an XML parser context
645
 *
646
 * Shrink the input buffer.
647
 */
648
void
649
26.2k
xmlParserShrink(xmlParserCtxtPtr ctxt) {
650
26.2k
    xmlParserInputPtr in = ctxt->input;
651
26.2k
    xmlParserInputBufferPtr buf = in->buf;
652
26.2k
    size_t used, res;
653
654
26.2k
    if (buf == NULL)
655
0
        return;
656
657
26.2k
    used = in->cur - in->base;
658
659
26.2k
    if (used > LINE_LEN) {
660
24.5k
        res = xmlBufShrink(buf->buffer, used - LINE_LEN);
661
662
24.5k
        if (res > 0) {
663
24.5k
            used -= res;
664
24.5k
            if ((res > ULONG_MAX) ||
665
24.5k
                (in->consumed > ULONG_MAX - (unsigned long)res))
666
0
                in->consumed = ULONG_MAX;
667
24.5k
            else
668
24.5k
                in->consumed += res;
669
24.5k
        }
670
671
24.5k
        xmlBufUpdateInput(buf->buffer, in, used);
672
24.5k
    }
673
26.2k
}
674
675
/**
676
 * xmlParserInputShrink:
677
 * @in:  an XML parser input
678
 *
679
 * DEPRECATED: Don't use.
680
 *
681
 * This function removes used input for the parser.
682
 */
683
void
684
0
xmlParserInputShrink(xmlParserInputPtr in) {
685
0
    size_t used;
686
0
    size_t ret;
687
688
0
    if (in == NULL) return;
689
0
    if (in->buf == NULL) return;
690
0
    if (in->base == NULL) return;
691
0
    if (in->cur == NULL) return;
692
0
    if (in->buf->buffer == NULL) return;
693
694
0
    used = in->cur - in->base;
695
696
0
    if (used > LINE_LEN) {
697
0
  ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
698
0
  if (ret > 0) {
699
0
            used -= ret;
700
0
            if ((ret > ULONG_MAX) ||
701
0
                (in->consumed > ULONG_MAX - (unsigned long)ret))
702
0
                in->consumed = ULONG_MAX;
703
0
            else
704
0
                in->consumed += ret;
705
0
  }
706
707
0
        xmlBufUpdateInput(in->buf->buffer, in, used);
708
0
    }
709
0
}
710
711
/************************************************************************
712
 *                  *
713
 *    UTF8 character input and related functions    *
714
 *                  *
715
 ************************************************************************/
716
717
/**
718
 * xmlNextChar:
719
 * @ctxt:  the XML parser context
720
 *
721
 * DEPRECATED: Internal function, do not use.
722
 *
723
 * Skip to the next char input char.
724
 */
725
726
void
727
xmlNextChar(xmlParserCtxtPtr ctxt)
728
5.26M
{
729
5.26M
    const unsigned char *cur;
730
5.26M
    size_t avail;
731
5.26M
    int c;
732
733
5.26M
    if ((ctxt == NULL) || (ctxt->input == NULL))
734
0
        return;
735
736
5.26M
    avail = ctxt->input->end - ctxt->input->cur;
737
738
5.26M
    if (avail < INPUT_CHUNK) {
739
356k
        xmlParserGrow(ctxt);
740
356k
        if (ctxt->input->cur >= ctxt->input->end)
741
1.95k
            return;
742
354k
        avail = ctxt->input->end - ctxt->input->cur;
743
354k
    }
744
745
5.26M
    cur = ctxt->input->cur;
746
5.26M
    c = *cur;
747
748
5.26M
    if (c < 0x80) {
749
4.87M
        if (c == '\n') {
750
242k
            ctxt->input->cur++;
751
242k
            ctxt->input->line++;
752
242k
            ctxt->input->col = 1;
753
4.63M
        } else if (c == '\r') {
754
            /*
755
             *   2.11 End-of-Line Handling
756
             *   the literal two-character sequence "#xD#xA" or a standalone
757
             *   literal #xD, an XML processor must pass to the application
758
             *   the single character #xA.
759
             */
760
22.5k
            ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
761
22.5k
            ctxt->input->line++;
762
22.5k
            ctxt->input->col = 1;
763
22.5k
            return;
764
4.61M
        } else {
765
4.61M
            ctxt->input->cur++;
766
4.61M
            ctxt->input->col++;
767
4.61M
        }
768
4.87M
    } else {
769
381k
        ctxt->input->col++;
770
771
381k
        if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
772
6.59k
            goto encoding_error;
773
774
374k
        if (c < 0xe0) {
775
            /* 2-byte code */
776
14.1k
            if (c < 0xc2)
777
13.1k
                goto encoding_error;
778
1.04k
            ctxt->input->cur += 2;
779
360k
        } else {
780
360k
            unsigned int val = (c << 8) | cur[1];
781
782
360k
            if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
783
504
                goto encoding_error;
784
785
360k
            if (c < 0xf0) {
786
                /* 3-byte code */
787
356k
                if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
788
367
                    goto encoding_error;
789
356k
                ctxt->input->cur += 3;
790
356k
            } else {
791
3.09k
                if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
792
162
                    goto encoding_error;
793
794
                /* 4-byte code */
795
2.92k
                if ((val < 0xf090) || (val >= 0xf490))
796
2.48k
                    goto encoding_error;
797
442
                ctxt->input->cur += 4;
798
442
            }
799
360k
        }
800
374k
    }
801
802
5.21M
    return;
803
804
5.21M
encoding_error:
805
    /* Only report the first error */
806
23.2k
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
807
578
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
808
578
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
809
578
    }
810
23.2k
    ctxt->input->cur++;
811
23.2k
}
812
813
/**
814
 * xmlCurrentChar:
815
 * @ctxt:  the XML parser context
816
 * @len:  pointer to the length of the char read
817
 *
818
 * DEPRECATED: Internal function, do not use.
819
 *
820
 * The current char value, if using UTF-8 this may actually span multiple
821
 * bytes in the input buffer. Implement the end of line normalization:
822
 * 2.11 End-of-Line Handling
823
 * Wherever an external parsed entity or the literal entity value
824
 * of an internal parsed entity contains either the literal two-character
825
 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
826
 * must pass to the application the single character #xA.
827
 * This behavior can conveniently be produced by normalizing all
828
 * line breaks to #xA on input, before parsing.)
829
 *
830
 * Returns the current char value and its length
831
 */
832
833
int
834
220M
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
835
220M
    const unsigned char *cur;
836
220M
    size_t avail;
837
220M
    int c;
838
839
220M
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
840
841
220M
    avail = ctxt->input->end - ctxt->input->cur;
842
843
220M
    if (avail < INPUT_CHUNK) {
844
768k
        xmlParserGrow(ctxt);
845
768k
        avail = ctxt->input->end - ctxt->input->cur;
846
768k
    }
847
848
220M
    cur = ctxt->input->cur;
849
220M
    c = *cur;
850
851
220M
    if (c < 0x80) {
852
  /* 1-byte code */
853
28.1M
        if (c < 0x20) {
854
            /*
855
             *   2.11 End-of-Line Handling
856
             *   the literal two-character sequence "#xD#xA" or a standalone
857
             *   literal #xD, an XML processor must pass to the application
858
             *   the single character #xA.
859
             */
860
15.0M
            if (c == '\r') {
861
                /*
862
                 * TODO: This function shouldn't change the 'cur' pointer
863
                 * as side effect, but the NEXTL macro in parser.c relies
864
                 * on this behavior when incrementing line numbers.
865
                 */
866
193k
                if (cur[1] == '\n')
867
12.6k
                    ctxt->input->cur++;
868
193k
                *len = 1;
869
193k
                c = '\n';
870
14.8M
            } else if (c == 0) {
871
6.13M
                if (ctxt->input->cur >= ctxt->input->end) {
872
10.7k
                    *len = 0;
873
6.12M
                } else {
874
6.12M
                    *len = 1;
875
                    /*
876
                     * TODO: Null bytes should be handled by callers,
877
                     * but this can be tricky.
878
                     */
879
6.12M
                    xmlFatalErr(ctxt, XML_ERR_INVALID_CHAR,
880
6.12M
                            "Char 0x0 out of allowed range\n");
881
6.12M
                }
882
8.72M
            } else {
883
8.72M
                *len = 1;
884
8.72M
            }
885
15.0M
        } else {
886
13.1M
            *len = 1;
887
13.1M
        }
888
889
28.1M
        return(c);
890
191M
    } else {
891
191M
        int val;
892
893
191M
        if (avail < 2)
894
959
            goto incomplete_sequence;
895
191M
        if ((cur[1] & 0xc0) != 0x80)
896
4.95M
            goto encoding_error;
897
898
186M
        if (c < 0xe0) {
899
            /* 2-byte code */
900
7.16M
            if (c < 0xc2)
901
3.41M
                goto encoding_error;
902
3.74M
            val = (c & 0x1f) << 6;
903
3.74M
            val |= cur[1] & 0x3f;
904
3.74M
            *len = 2;
905
179M
        } else {
906
179M
            if (avail < 3)
907
81
                goto incomplete_sequence;
908
179M
            if ((cur[2] & 0xc0) != 0x80)
909
185k
                goto encoding_error;
910
911
179M
            if (c < 0xf0) {
912
                /* 3-byte code */
913
179M
                val = (c & 0xf) << 12;
914
179M
                val |= (cur[1] & 0x3f) << 6;
915
179M
                val |= cur[2] & 0x3f;
916
179M
                if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
917
15.3k
                    goto encoding_error;
918
179M
                *len = 3;
919
179M
            } else {
920
67.5k
                if (avail < 4)
921
52
                    goto incomplete_sequence;
922
67.5k
                if ((cur[3] & 0xc0) != 0x80)
923
22.1k
                    goto encoding_error;
924
925
                /* 4-byte code */
926
45.3k
                val = (c & 0x0f) << 18;
927
45.3k
                val |= (cur[1] & 0x3f) << 12;
928
45.3k
                val |= (cur[2] & 0x3f) << 6;
929
45.3k
                val |= cur[3] & 0x3f;
930
45.3k
                if ((val < 0x10000) || (val >= 0x110000))
931
14.4k
                    goto encoding_error;
932
30.9k
                *len = 4;
933
30.9k
            }
934
179M
        }
935
936
183M
        return(val);
937
186M
    }
938
939
8.61M
encoding_error:
940
    /* Only report the first error */
941
8.61M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
942
3.43k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
943
3.43k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
944
3.43k
    }
945
8.61M
    *len = 1;
946
8.61M
    return(XML_INVALID_CHAR);
947
948
1.09k
incomplete_sequence:
949
    /*
950
     * An encoding problem may arise from a truncated input buffer
951
     * splitting a character in the middle. In that case do not raise
952
     * an error but return 0. This should only happen when push parsing
953
     * char data.
954
     */
955
1.09k
    *len = 0;
956
1.09k
    return(0);
957
220M
}
958
959
/**
960
 * xmlStringCurrentChar:
961
 * @ctxt:  the XML parser context
962
 * @cur:  pointer to the beginning of the char
963
 * @len:  pointer to the length of the char read
964
 *
965
 * DEPRECATED: Internal function, do not use.
966
 *
967
 * The current char value, if using UTF-8 this may actually span multiple
968
 * bytes in the input buffer.
969
 *
970
 * Returns the current char value and its length
971
 */
972
973
int
974
xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
975
7.26M
                     const xmlChar *cur, int *len) {
976
7.26M
    int c;
977
978
7.26M
    if ((cur == NULL) || (len == NULL))
979
0
        return(0);
980
981
    /* cur is zero-terminated, so we can lie about its length. */
982
7.26M
    *len = 4;
983
7.26M
    c = xmlGetUTF8Char(cur, len);
984
985
7.26M
    return((c < 0) ? 0 : c);
986
7.26M
}
987
988
/**
989
 * xmlCopyCharMultiByte:
990
 * @out:  pointer to an array of xmlChar
991
 * @val:  the char value
992
 *
993
 * append the char value in the array
994
 *
995
 * Returns the number of xmlChar written
996
 */
997
int
998
151M
xmlCopyCharMultiByte(xmlChar *out, int val) {
999
151M
    if ((out == NULL) || (val < 0)) return(0);
1000
    /*
1001
     * We are supposed to handle UTF8, check it's valid
1002
     * From rfc2044: encoding of the Unicode values on UTF-8:
1003
     *
1004
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
1005
     * 0000 0000-0000 007F   0xxxxxxx
1006
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1007
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1008
     */
1009
151M
    if  (val >= 0x80) {
1010
151M
  xmlChar *savedout = out;
1011
151M
  int bits;
1012
151M
  if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
1013
147M
  else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
1014
44.5k
  else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
1015
0
  else {
1016
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1017
0
            xmlAbort("xmlCopyCharMultiByte: codepoint out of range\n");
1018
0
#endif
1019
0
      return(0);
1020
0
  }
1021
450M
  for ( ; bits >= 0; bits-= 6)
1022
298M
      *out++= ((val >> bits) & 0x3F) | 0x80 ;
1023
151M
  return (out - savedout);
1024
151M
    }
1025
759
    *out = val;
1026
759
    return 1;
1027
151M
}
1028
1029
/**
1030
 * xmlCopyChar:
1031
 * @len:  Ignored, compatibility
1032
 * @out:  pointer to an array of xmlChar
1033
 * @val:  the char value
1034
 *
1035
 * DEPRECATED: Don't use.
1036
 *
1037
 * append the char value in the array
1038
 *
1039
 * Returns the number of xmlChar written
1040
 */
1041
1042
int
1043
0
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
1044
0
    if ((out == NULL) || (val < 0)) return(0);
1045
    /* the len parameter is ignored */
1046
0
    if  (val >= 0x80) {
1047
0
  return(xmlCopyCharMultiByte (out, val));
1048
0
    }
1049
0
    *out = val;
1050
0
    return 1;
1051
0
}
1052
1053
/************************************************************************
1054
 *                  *
1055
 *    Commodity functions to switch encodings     *
1056
 *                  *
1057
 ************************************************************************/
1058
1059
/**
1060
 * xmlCtxtSetCharEncConvImpl:
1061
 * @ctxt:  parser context
1062
 * @impl:  callback
1063
 * @vctxt:  user data
1064
 *
1065
 * Installs a custom implementation to convert between character
1066
 * encodings.
1067
 *
1068
 * This bypasses legacy feature like global encoding handlers or
1069
 * encoding aliases.
1070
 *
1071
 * Available since 2.14.0.
1072
 */
1073
void
1074
xmlCtxtSetCharEncConvImpl(xmlParserCtxtPtr ctxt, xmlCharEncConvImpl impl,
1075
0
                          void *vctxt) {
1076
0
    if (ctxt == NULL)
1077
0
        return;
1078
1079
0
    ctxt->convImpl = impl;
1080
0
    ctxt->convCtxt = vctxt;
1081
0
}
1082
1083
static int
1084
337
xmlDetectEBCDIC(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr *hout) {
1085
337
    xmlChar out[200];
1086
337
    xmlParserInputPtr input = ctxt->input;
1087
337
    xmlCharEncodingHandlerPtr handler;
1088
337
    int inlen, outlen, res, i;
1089
1090
337
    *hout = NULL;
1091
1092
    /*
1093
     * To detect the EBCDIC code page, we convert the first 200 bytes
1094
     * to IBM037 (EBCDIC-US) and try to find the encoding declaration.
1095
     */
1096
337
    res = xmlCreateCharEncodingHandler("IBM037", /* output */ 0,
1097
337
            ctxt->convImpl, ctxt->convCtxt, &handler);
1098
337
    if (res != 0)
1099
0
        return(res);
1100
337
    outlen = sizeof(out) - 1;
1101
337
    inlen = input->end - input->cur;
1102
337
    res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen);
1103
    /*
1104
     * Return the EBCDIC handler if decoding failed. The error will
1105
     * be reported later.
1106
     */
1107
337
    if (res < 0)
1108
6
        goto done;
1109
331
    out[outlen] = 0;
1110
1111
4.72k
    for (i = 0; i < outlen; i++) {
1112
4.60k
        if (out[i] == '>')
1113
5
            break;
1114
4.59k
        if ((out[i] == 'e') &&
1115
4.59k
            (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1116
210
            int start, cur, quote;
1117
1118
210
            i += 8;
1119
210
            while (IS_BLANK_CH(out[i]))
1120
1.20k
                i += 1;
1121
210
            if (out[i++] != '=')
1122
54
                break;
1123
156
            while (IS_BLANK_CH(out[i]))
1124
892
                i += 1;
1125
156
            quote = out[i++];
1126
156
            if ((quote != '\'') && (quote != '"'))
1127
55
                break;
1128
101
            start = i;
1129
101
            cur = out[i];
1130
2.41k
            while (((cur >= 'a') && (cur <= 'z')) ||
1131
2.41k
                   ((cur >= 'A') && (cur <= 'Z')) ||
1132
2.41k
                   ((cur >= '0') && (cur <= '9')) ||
1133
2.41k
                   (cur == '.') || (cur == '_') ||
1134
2.41k
                   (cur == '-'))
1135
2.31k
                cur = out[++i];
1136
101
            if (cur != quote)
1137
96
                break;
1138
5
            out[i] = 0;
1139
5
            xmlCharEncCloseFunc(handler);
1140
5
            res = xmlCreateCharEncodingHandler((char *) out + start,
1141
5
                    /* output */ 0, ctxt->convImpl, ctxt->convCtxt,
1142
5
                    &handler);
1143
5
            if (res != 0)
1144
3
                return(res);
1145
2
            *hout = handler;
1146
2
            return(0);
1147
5
        }
1148
4.59k
    }
1149
1150
332
done:
1151
    /*
1152
     * Encoding handlers are stateful, so we have to recreate them.
1153
     */
1154
332
    xmlCharEncCloseFunc(handler);
1155
332
    res = xmlCreateCharEncodingHandler("IBM037", /* output */ 0,
1156
332
            ctxt->convImpl, ctxt->convCtxt, &handler);
1157
332
    if (res != 0)
1158
0
        return(res);
1159
332
    *hout = handler;
1160
332
    return(0);
1161
332
}
1162
1163
/**
1164
 * xmlSwitchEncoding:
1165
 * @ctxt:  the parser context
1166
 * @enc:  the encoding value (number)
1167
 *
1168
 * Use encoding specified by enum to decode input data. This overrides
1169
 * the encoding found in the XML declaration.
1170
 *
1171
 * This function can also be used to override the encoding of chunks
1172
 * passed to xmlParseChunk.
1173
 *
1174
 * Returns 0 in case of success, -1 otherwise
1175
 */
1176
int
1177
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1178
1.67k
{
1179
1.67k
    xmlCharEncodingHandlerPtr handler = NULL;
1180
1.67k
    int ret;
1181
1.67k
    int res;
1182
1183
1.67k
    if ((ctxt == NULL) || (ctxt->input == NULL))
1184
0
        return(-1);
1185
1186
1.67k
    res = xmlLookupCharEncodingHandler(enc, &handler);
1187
1.67k
    if (res != 0) {
1188
0
        xmlFatalErr(ctxt, res, NULL);
1189
0
        return(-1);
1190
0
    }
1191
1192
1.67k
    ret = xmlSwitchToEncoding(ctxt, handler);
1193
1194
1.67k
    if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1195
0
        ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1196
0
    }
1197
1198
1.67k
    return(ret);
1199
1.67k
}
1200
1201
/**
1202
 * xmlSwitchInputEncodingName:
1203
 * @ctxt:  the parser context
1204
 * @input:  the input strea,
1205
 * @encoding:  the encoding name
1206
 *
1207
 * Returns 0 in case of success, -1 otherwise
1208
 */
1209
static int
1210
xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1211
0
                           const char *encoding) {
1212
0
    xmlCharEncodingHandlerPtr handler;
1213
0
    int res;
1214
1215
0
    if (encoding == NULL)
1216
0
        return(-1);
1217
1218
0
    res = xmlCreateCharEncodingHandler(encoding, /* output */ 0,
1219
0
            ctxt->convImpl, ctxt->convCtxt, &handler);
1220
0
    if (res == XML_ERR_UNSUPPORTED_ENCODING) {
1221
0
        xmlWarningMsg(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1222
0
                      "Unsupported encoding: %s\n", BAD_CAST encoding, NULL);
1223
0
        return(-1);
1224
0
    } else if (res != XML_ERR_OK) {
1225
0
        xmlFatalErr(ctxt, res, encoding);
1226
0
        return(-1);
1227
0
    }
1228
1229
0
    res  = xmlInputSetEncodingHandler(input, handler);
1230
0
    if (res != XML_ERR_OK) {
1231
0
        xmlCtxtErrIO(ctxt, res, NULL);
1232
0
        return(-1);
1233
0
    }
1234
1235
0
    return(0);
1236
0
}
1237
1238
/**
1239
 * xmlSwitchEncodingName:
1240
 * @ctxt:  the parser context
1241
 * @encoding:  the encoding name
1242
 *
1243
 * Use specified encoding to decode input data. This overrides the
1244
 * encoding found in the XML declaration.
1245
 *
1246
 * This function can also be used to override the encoding of chunks
1247
 * passed to xmlParseChunk.
1248
 *
1249
 * Available since 2.13.0.
1250
 *
1251
 * Returns 0 in case of success, -1 otherwise
1252
 */
1253
int
1254
0
xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding) {
1255
0
    if (ctxt == NULL)
1256
0
        return(-1);
1257
1258
0
    return(xmlSwitchInputEncodingName(ctxt, ctxt->input, encoding));
1259
0
}
1260
1261
/**
1262
 * xmlInputSetEncodingHandler:
1263
 * @input:  the input stream
1264
 * @handler:  the encoding handler
1265
 *
1266
 * Use encoding handler to decode input data.
1267
 *
1268
 * Closes the handler on error.
1269
 *
1270
 * Returns an xmlParserErrors code.
1271
 */
1272
int
1273
xmlInputSetEncodingHandler(xmlParserInputPtr input,
1274
4.19k
                           xmlCharEncodingHandlerPtr handler) {
1275
4.19k
    xmlParserInputBufferPtr in;
1276
4.19k
    xmlBufPtr buf;
1277
4.19k
    int code = XML_ERR_OK;
1278
1279
4.19k
    if ((input == NULL) || (input->buf == NULL)) {
1280
0
        xmlCharEncCloseFunc(handler);
1281
0
  return(XML_ERR_ARGUMENT);
1282
0
    }
1283
4.19k
    in = input->buf;
1284
1285
4.19k
    input->flags |= XML_INPUT_HAS_ENCODING;
1286
1287
    /*
1288
     * UTF-8 requires no encoding handler.
1289
     */
1290
4.19k
    if ((handler != NULL) &&
1291
4.19k
        (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1292
0
        xmlCharEncCloseFunc(handler);
1293
0
        handler = NULL;
1294
0
    }
1295
1296
4.19k
    if (in->encoder == handler)
1297
1.10k
        return(XML_ERR_OK);
1298
1299
3.08k
    if (in->encoder != NULL) {
1300
        /*
1301
         * Switching encodings during parsing is a really bad idea,
1302
         * but Chromium can switch between ISO-8859-1 and UTF-16 before
1303
         * separate calls to xmlParseChunk.
1304
         *
1305
         * TODO: We should check whether the "raw" input buffer is empty and
1306
         * convert the old content using the old encoder.
1307
         */
1308
1309
0
        xmlCharEncCloseFunc(in->encoder);
1310
0
        in->encoder = handler;
1311
0
        return(XML_ERR_OK);
1312
0
    }
1313
1314
3.08k
    buf = xmlBufCreate(XML_IO_BUFFER_SIZE);
1315
3.08k
    if (buf == NULL) {
1316
0
        xmlCharEncCloseFunc(handler);
1317
0
        return(XML_ERR_NO_MEMORY);
1318
0
    }
1319
1320
3.08k
    in->encoder = handler;
1321
3.08k
    in->raw = in->buffer;
1322
3.08k
    in->buffer = buf;
1323
1324
    /*
1325
     * Is there already some content down the pipe to convert ?
1326
     */
1327
3.08k
    if (input->end > input->base) {
1328
3.08k
        size_t processed;
1329
3.08k
        size_t nbchars;
1330
3.08k
        int res;
1331
1332
        /*
1333
         * Shrink the current input buffer.
1334
         * Move it as the raw buffer and create a new input buffer
1335
         */
1336
3.08k
        processed = input->cur - input->base;
1337
3.08k
        xmlBufShrink(in->raw, processed);
1338
3.08k
        input->consumed += processed;
1339
3.08k
        in->rawconsumed = processed;
1340
1341
3.08k
        nbchars = 4000 /* MINLEN */;
1342
3.08k
        res = xmlCharEncInput(in, &nbchars);
1343
3.08k
        if (res < 0)
1344
79
            code = in->error;
1345
3.08k
    }
1346
1347
3.08k
    xmlBufResetInput(in->buffer, input);
1348
1349
3.08k
    return(code);
1350
3.08k
}
1351
1352
/**
1353
 * xmlSwitchInputEncoding:
1354
 * @ctxt:  the parser context, only for error reporting
1355
 * @input:  the input stream
1356
 * @handler:  the encoding handler
1357
 *
1358
 * DEPRECATED: Internal function, don't use.
1359
 *
1360
 * Use encoding handler to decode input data.
1361
 *
1362
 * Returns 0 in case of success, -1 otherwise
1363
 */
1364
int
1365
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1366
0
                       xmlCharEncodingHandlerPtr handler) {
1367
0
    int code = xmlInputSetEncodingHandler(input, handler);
1368
1369
0
    if (code != XML_ERR_OK) {
1370
0
        xmlCtxtErrIO(ctxt, code, NULL);
1371
0
        return(-1);
1372
0
    }
1373
1374
0
    return(0);
1375
0
}
1376
1377
/**
1378
 * xmlSwitchToEncoding:
1379
 * @ctxt:  the parser context
1380
 * @handler:  the encoding handler
1381
 *
1382
 * Use encoding handler to decode input data.
1383
 *
1384
 * This function can be used to enforce the encoding of chunks passed
1385
 * to xmlParseChunk.
1386
 *
1387
 * Returns 0 in case of success, -1 otherwise
1388
 */
1389
int
1390
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1391
2.01k
{
1392
2.01k
    int code;
1393
1394
2.01k
    if (ctxt == NULL)
1395
0
        return(-1);
1396
1397
2.01k
    code = xmlInputSetEncodingHandler(ctxt->input, handler);
1398
2.01k
    if (code != XML_ERR_OK) {
1399
28
        xmlCtxtErrIO(ctxt, code, NULL);
1400
28
        return(-1);
1401
28
    }
1402
1403
1.98k
    return(0);
1404
2.01k
}
1405
1406
/**
1407
 * xmlDetectEncoding:
1408
 * @ctxt:  the parser context
1409
 *
1410
 * Handle optional BOM, detect and switch to encoding.
1411
 *
1412
 * Assumes that there are at least four bytes in the input buffer.
1413
 */
1414
void
1415
20.2k
xmlDetectEncoding(xmlParserCtxtPtr ctxt) {
1416
20.2k
    const xmlChar *in;
1417
20.2k
    xmlCharEncoding enc;
1418
20.2k
    int bomSize;
1419
20.2k
    int autoFlag = 0;
1420
1421
20.2k
    if (xmlParserGrow(ctxt) < 0)
1422
0
        return;
1423
20.2k
    in = ctxt->input->cur;
1424
20.2k
    if (ctxt->input->end - in < 4)
1425
3.52k
        return;
1426
1427
16.6k
    if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1428
        /*
1429
         * If the encoding was already set, only skip the BOM which was
1430
         * possibly decoded to UTF-8.
1431
         */
1432
0
        if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1433
0
            ctxt->input->cur += 3;
1434
0
        }
1435
1436
0
        return;
1437
0
    }
1438
1439
16.6k
    enc = XML_CHAR_ENCODING_NONE;
1440
16.6k
    bomSize = 0;
1441
1442
16.6k
    switch (in[0]) {
1443
155
        case 0x00:
1444
155
            if ((in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1445
5
                enc = XML_CHAR_ENCODING_UCS4BE;
1446
5
                autoFlag = XML_INPUT_AUTO_OTHER;
1447
150
            } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1448
77
                enc = XML_CHAR_ENCODING_UTF16BE;
1449
77
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1450
77
            }
1451
155
            break;
1452
1453
14.3k
        case 0x3C:
1454
14.3k
            if (in[1] == 0x00) {
1455
229
                if ((in[2] == 0x00) && (in[3] == 0x00)) {
1456
14
                    enc = XML_CHAR_ENCODING_UCS4LE;
1457
14
                    autoFlag = XML_INPUT_AUTO_OTHER;
1458
215
                } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1459
207
                    enc = XML_CHAR_ENCODING_UTF16LE;
1460
207
                    autoFlag = XML_INPUT_AUTO_UTF16LE;
1461
207
                }
1462
229
            }
1463
14.3k
            break;
1464
1465
344
        case 0x4C:
1466
344
      if ((in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1467
337
          enc = XML_CHAR_ENCODING_EBCDIC;
1468
337
                autoFlag = XML_INPUT_AUTO_OTHER;
1469
337
            }
1470
344
            break;
1471
1472
1.02k
        case 0xEF:
1473
1.02k
            if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1474
1.01k
                enc = XML_CHAR_ENCODING_UTF8;
1475
1.01k
                autoFlag = XML_INPUT_AUTO_UTF8;
1476
1.01k
                bomSize = 3;
1477
1.01k
            }
1478
1.02k
            break;
1479
1480
190
        case 0xFE:
1481
190
            if (in[1] == 0xFF) {
1482
183
                enc = XML_CHAR_ENCODING_UTF16BE;
1483
183
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1484
183
                bomSize = 2;
1485
183
            }
1486
190
            break;
1487
1488
201
        case 0xFF:
1489
201
            if (in[1] == 0xFE) {
1490
178
                enc = XML_CHAR_ENCODING_UTF16LE;
1491
178
                autoFlag = XML_INPUT_AUTO_UTF16LE;
1492
178
                bomSize = 2;
1493
178
            }
1494
201
            break;
1495
16.6k
    }
1496
1497
16.6k
    if (bomSize > 0) {
1498
1.37k
        ctxt->input->cur += bomSize;
1499
1.37k
    }
1500
1501
16.6k
    if (enc != XML_CHAR_ENCODING_NONE) {
1502
2.01k
        ctxt->input->flags |= autoFlag;
1503
1504
2.01k
        if (enc == XML_CHAR_ENCODING_EBCDIC) {
1505
337
            xmlCharEncodingHandlerPtr handler;
1506
337
            int res;
1507
1508
337
            res = xmlDetectEBCDIC(ctxt, &handler);
1509
337
            if (res != XML_ERR_OK) {
1510
3
                xmlFatalErr(ctxt, res, "detecting EBCDIC\n");
1511
334
            } else {
1512
334
                xmlSwitchToEncoding(ctxt, handler);
1513
334
            }
1514
1.67k
        } else {
1515
1.67k
            xmlSwitchEncoding(ctxt, enc);
1516
1.67k
        }
1517
2.01k
    }
1518
16.6k
}
1519
1520
/**
1521
 * xmlSetDeclaredEncoding:
1522
 * @ctxt:  the parser context
1523
 * @encoding:  declared encoding
1524
 *
1525
 * Set the encoding from a declaration in the document.
1526
 *
1527
 * If no encoding was set yet, switch the encoding. Otherwise, only warn
1528
 * about encoding mismatches.
1529
 *
1530
 * Takes ownership of 'encoding'.
1531
 */
1532
void
1533
2.24k
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
1534
2.24k
    if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1535
2.24k
        ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1536
2.22k
        xmlCharEncodingHandlerPtr handler;
1537
2.22k
        int res;
1538
1539
        /*
1540
         * xmlSwitchEncodingName treats unsupported encodings as
1541
         * warnings, but we want it to be an error in an encoding
1542
         * declaration.
1543
         */
1544
2.22k
        res = xmlCreateCharEncodingHandler((const char *) encoding,
1545
2.22k
                /* output */ 0, ctxt->convImpl, ctxt->convCtxt, &handler);
1546
2.22k
        if (res != XML_ERR_OK) {
1547
46
            xmlFatalErr(ctxt, res, (const char *) encoding);
1548
46
            xmlFree(encoding);
1549
46
            return;
1550
46
        }
1551
1552
2.17k
        res  = xmlInputSetEncodingHandler(ctxt->input, handler);
1553
2.17k
        if (res != XML_ERR_OK) {
1554
51
            xmlCtxtErrIO(ctxt, res, NULL);
1555
51
            xmlFree(encoding);
1556
51
            return;
1557
51
        }
1558
1559
2.12k
        ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1560
2.12k
    } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1561
16
        static const char *allowedUTF8[] = {
1562
16
            "UTF-8", "UTF8", NULL
1563
16
        };
1564
16
        static const char *allowedUTF16LE[] = {
1565
16
            "UTF-16", "UTF-16LE", "UTF16", NULL
1566
16
        };
1567
16
        static const char *allowedUTF16BE[] = {
1568
16
            "UTF-16", "UTF-16BE", "UTF16", NULL
1569
16
        };
1570
16
        const char **allowed = NULL;
1571
16
        const char *autoEnc = NULL;
1572
1573
16
        switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1574
9
            case XML_INPUT_AUTO_UTF8:
1575
9
                allowed = allowedUTF8;
1576
9
                autoEnc = "UTF-8";
1577
9
                break;
1578
4
            case XML_INPUT_AUTO_UTF16LE:
1579
4
                allowed = allowedUTF16LE;
1580
4
                autoEnc = "UTF-16LE";
1581
4
                break;
1582
1
            case XML_INPUT_AUTO_UTF16BE:
1583
1
                allowed = allowedUTF16BE;
1584
1
                autoEnc = "UTF-16BE";
1585
1
                break;
1586
16
        }
1587
1588
16
        if (allowed != NULL) {
1589
14
            const char **p;
1590
14
            int match = 0;
1591
1592
42
            for (p = allowed; *p != NULL; p++) {
1593
30
                if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1594
2
                    match = 1;
1595
2
                    break;
1596
2
                }
1597
30
            }
1598
1599
14
            if (match == 0) {
1600
12
                xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1601
12
                              "Encoding '%s' doesn't match "
1602
12
                              "auto-detected '%s'\n",
1603
12
                              encoding, BAD_CAST autoEnc);
1604
12
                xmlFree(encoding);
1605
12
                encoding = xmlStrdup(BAD_CAST autoEnc);
1606
12
                if (encoding == NULL)
1607
0
                    xmlCtxtErrMemory(ctxt);
1608
12
            }
1609
14
        }
1610
16
    }
1611
1612
2.14k
    if (ctxt->encoding != NULL)
1613
0
        xmlFree((xmlChar *) ctxt->encoding);
1614
2.14k
    ctxt->encoding = encoding;
1615
2.14k
}
1616
1617
/**
1618
 * xmlCtxtGetDeclaredEncoding:
1619
 * ctxt:  parser context
1620
 *
1621
 * Available since 2.14.0.
1622
 *
1623
 * Returns the encoding from the encoding declaration. This can differ
1624
 * from the actual encoding.
1625
 */
1626
const xmlChar *
1627
0
xmlCtxtGetDeclaredEncoding(xmlParserCtxtPtr ctxt) {
1628
0
    if (ctxt == NULL)
1629
0
        return(NULL);
1630
1631
0
    return(ctxt->encoding);
1632
0
}
1633
1634
/**
1635
 * xmlGetActualEncoding:
1636
 * @ctxt:  the parser context
1637
 *
1638
 * Returns the actual used to parse the document. This can differ from
1639
 * the declared encoding.
1640
 */
1641
const xmlChar *
1642
15.5k
xmlGetActualEncoding(xmlParserCtxtPtr ctxt) {
1643
15.5k
    const xmlChar *encoding = NULL;
1644
1645
15.5k
    if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
1646
15.5k
        (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
1647
        /* Preserve encoding exactly */
1648
3.47k
        encoding = ctxt->encoding;
1649
12.0k
    } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
1650
4
        encoding = BAD_CAST ctxt->input->buf->encoder->name;
1651
12.0k
    } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1652
0
        encoding = BAD_CAST "UTF-8";
1653
0
    }
1654
1655
15.5k
    return(encoding);
1656
15.5k
}
1657
1658
/************************************************************************
1659
 *                  *
1660
 *  Commodity functions to handle entities processing   *
1661
 *                  *
1662
 ************************************************************************/
1663
1664
/**
1665
 * xmlFreeInputStream:
1666
 * @input:  an xmlParserInputPtr
1667
 *
1668
 * Free up an input stream.
1669
 */
1670
void
1671
38.7k
xmlFreeInputStream(xmlParserInputPtr input) {
1672
38.7k
    if (input == NULL) return;
1673
1674
38.7k
    if (input->filename != NULL) xmlFree((char *) input->filename);
1675
38.7k
    if (input->version != NULL) xmlFree((char *) input->version);
1676
38.7k
    if ((input->free != NULL) && (input->base != NULL))
1677
0
        input->free((xmlChar *) input->base);
1678
38.7k
    if (input->buf != NULL)
1679
38.7k
        xmlFreeParserInputBuffer(input->buf);
1680
38.7k
    xmlFree(input);
1681
38.7k
}
1682
1683
/**
1684
 * xmlNewInputStream:
1685
 * @ctxt:  an XML parser context
1686
 *
1687
 * DEPRECATED: Use xmlNewInputFromUrl or similar functions.
1688
 *
1689
 * Create a new input stream structure.
1690
 *
1691
 * Returns the new input stream or NULL
1692
 */
1693
xmlParserInputPtr
1694
0
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1695
0
    xmlParserInputPtr input;
1696
1697
0
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1698
0
    if (input == NULL) {
1699
0
        xmlCtxtErrMemory(ctxt);
1700
0
  return(NULL);
1701
0
    }
1702
0
    memset(input, 0, sizeof(xmlParserInput));
1703
0
    input->line = 1;
1704
0
    input->col = 1;
1705
1706
0
    return(input);
1707
0
}
1708
1709
/**
1710
 * xmlCtxtNewInputFromUrl:
1711
 * @ctxt:  parser context
1712
 * @url:  filename or URL
1713
 * @publicId:  publid ID from doctype (optional)
1714
 * @encoding:  character encoding (optional)
1715
 * @flags:  unused, pass 0
1716
 *
1717
 * Creates a new parser input from the filesystem, the network or
1718
 * a user-defined resource loader.
1719
 *
1720
 * Returns a new parser input.
1721
 */
1722
xmlParserInputPtr
1723
xmlCtxtNewInputFromUrl(xmlParserCtxtPtr ctxt, const char *url,
1724
                       const char *publicId, const char *encoding,
1725
0
                       int flags ATTRIBUTE_UNUSED) {
1726
0
    xmlParserInputPtr input;
1727
1728
0
    if ((ctxt == NULL) || (url == NULL))
1729
0
  return(NULL);
1730
1731
0
    input = xmlLoadResource(ctxt, url, publicId, XML_RESOURCE_MAIN_DOCUMENT);
1732
0
    if (input == NULL)
1733
0
        return(NULL);
1734
1735
0
    if (encoding != NULL)
1736
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1737
1738
0
    return(input);
1739
0
}
1740
1741
/**
1742
 * xmlNewInputInternal:
1743
 * @buf:  parser input buffer
1744
 * @filename:  filename or URL
1745
 *
1746
 * Internal helper function.
1747
 *
1748
 * Returns a new parser input.
1749
 */
1750
static xmlParserInputPtr
1751
38.7k
xmlNewInputInternal(xmlParserInputBufferPtr buf, const char *filename) {
1752
38.7k
    xmlParserInputPtr input;
1753
1754
38.7k
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1755
38.7k
    if (input == NULL) {
1756
0
  xmlFreeParserInputBuffer(buf);
1757
0
  return(NULL);
1758
0
    }
1759
38.7k
    memset(input, 0, sizeof(xmlParserInput));
1760
38.7k
    input->line = 1;
1761
38.7k
    input->col = 1;
1762
1763
38.7k
    input->buf = buf;
1764
38.7k
    xmlBufResetInput(input->buf->buffer, input);
1765
1766
38.7k
    if (filename != NULL) {
1767
9.89k
        input->filename = xmlMemStrdup(filename);
1768
9.89k
        if (input->filename == NULL) {
1769
0
            xmlFreeInputStream(input);
1770
0
            return(NULL);
1771
0
        }
1772
9.89k
    }
1773
1774
38.7k
    return(input);
1775
38.7k
}
1776
1777
/**
1778
 * xmlNewInputFromMemory:
1779
 * @url:  base URL (optional)
1780
 * @mem:  pointer to char array
1781
 * @size:  size of array
1782
 * @flags:  optimization hints
1783
 *
1784
 * Creates a new parser input to read from a memory area.
1785
 *
1786
 * @url is used as base to resolve external entities and for
1787
 * error reporting.
1788
 *
1789
 * If the XML_INPUT_BUF_STATIC flag is set, the memory area must
1790
 * stay unchanged until parsing has finished. This can avoid
1791
 * temporary copies.
1792
 *
1793
 * If the XML_INPUT_BUF_ZERO_TERMINATED flag is set, the memory
1794
 * area must contain a zero byte after the buffer at position @size.
1795
 * This can avoid temporary copies.
1796
 *
1797
 * Available since 2.14.0.
1798
 *
1799
 * Returns a new parser input or NULL if a memory allocation failed.
1800
 */
1801
xmlParserInputPtr
1802
xmlNewInputFromMemory(const char *url, const void *mem, size_t size,
1803
20.2k
                      int flags) {
1804
20.2k
    xmlParserInputBufferPtr buf;
1805
1806
20.2k
    if (mem == NULL)
1807
0
  return(NULL);
1808
1809
20.2k
    buf = xmlNewInputBufferMemory(mem, size, flags, XML_CHAR_ENCODING_NONE);
1810
20.2k
    if (buf == NULL)
1811
0
        return(NULL);
1812
1813
20.2k
    return(xmlNewInputInternal(buf, url));
1814
20.2k
}
1815
1816
/**
1817
 * xmlCtxtNewInputFromMemory:
1818
 * @ctxt:  parser context
1819
 * @url:  base URL (optional)
1820
 * @mem:  pointer to char array
1821
 * @size:  size of array
1822
 * @encoding:  character encoding (optional)
1823
 * @flags:  optimization hints
1824
 *
1825
 * Returns a new parser input or NULL in case of error.
1826
 */
1827
xmlParserInputPtr
1828
xmlCtxtNewInputFromMemory(xmlParserCtxtPtr ctxt, const char *url,
1829
                          const void *mem, size_t size,
1830
20.2k
                          const char *encoding, int flags) {
1831
20.2k
    xmlParserInputPtr input;
1832
1833
20.2k
    if ((ctxt == NULL) || (mem == NULL))
1834
0
  return(NULL);
1835
1836
20.2k
    input = xmlNewInputFromMemory(url, mem, size, flags);
1837
20.2k
    if (input == NULL) {
1838
0
        xmlCtxtErrMemory(ctxt);
1839
0
        return(NULL);
1840
0
    }
1841
1842
20.2k
    if (encoding != NULL)
1843
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1844
1845
20.2k
    return(input);
1846
20.2k
}
1847
1848
/**
1849
 * xmlNewInputFromString:
1850
 * @url:  base URL (optional)
1851
 * @str:  zero-terminated string
1852
 * @flags:  optimization hints
1853
 *
1854
 * Creates a new parser input to read from a zero-terminated string.
1855
 *
1856
 * @url is used as base to resolve external entities and for
1857
 * error reporting.
1858
 *
1859
 * If the XML_INPUT_BUF_STATIC flag is set, the string must
1860
 * stay unchanged until parsing has finished. This can avoid
1861
 * temporary copies.
1862
 *
1863
 * Available since 2.14.0.
1864
 *
1865
 * Returns a new parser input or NULL if a memory allocation failed.
1866
 */
1867
xmlParserInputPtr
1868
18.5k
xmlNewInputFromString(const char *url, const char *str, int flags) {
1869
18.5k
    xmlParserInputBufferPtr buf;
1870
1871
18.5k
    if (str == NULL)
1872
0
  return(NULL);
1873
1874
18.5k
    buf = xmlNewInputBufferString(str, flags);
1875
18.5k
    if (buf == NULL)
1876
0
        return(NULL);
1877
1878
18.5k
    return(xmlNewInputInternal(buf, url));
1879
18.5k
}
1880
1881
/**
1882
 * xmlCtxtNewInputFromString:
1883
 * @ctxt:  parser context
1884
 * @url:  base URL (optional)
1885
 * @str:  zero-terminated string
1886
 * @encoding:  character encoding (optional)
1887
 * @flags:  optimization hints
1888
 *
1889
 * Returns a new parser input.
1890
 */
1891
xmlParserInputPtr
1892
xmlCtxtNewInputFromString(xmlParserCtxtPtr ctxt, const char *url,
1893
18.5k
                          const char *str, const char *encoding, int flags) {
1894
18.5k
    xmlParserInputPtr input;
1895
1896
18.5k
    if ((ctxt == NULL) || (str == NULL))
1897
0
  return(NULL);
1898
1899
18.5k
    input = xmlNewInputFromString(url, str, flags);
1900
18.5k
    if (input == NULL) {
1901
0
        xmlCtxtErrMemory(ctxt);
1902
0
        return(NULL);
1903
0
    }
1904
1905
18.5k
    if (encoding != NULL)
1906
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1907
1908
18.5k
    return(input);
1909
18.5k
}
1910
1911
/**
1912
 * xmlNewInputFromFd:
1913
 * @url:  base URL (optional)
1914
 * @fd:  file descriptor
1915
 * @flags:  unused, pass 0
1916
 *
1917
 * Creates a new parser input to read from a zero-terminated string.
1918
 *
1919
 * @url is used as base to resolve external entities and for
1920
 * error reporting.
1921
 *
1922
 * @fd is closed after parsing has finished.
1923
 *
1924
 * Available since 2.14.0.
1925
 *
1926
 * Returns a new parser input or NULL if a memory allocation failed.
1927
 */
1928
xmlParserInputPtr
1929
0
xmlNewInputFromFd(const char *url, int fd, int flags ATTRIBUTE_UNUSED) {
1930
0
    xmlParserInputBufferPtr buf;
1931
1932
0
    if (fd < 0)
1933
0
  return(NULL);
1934
1935
0
    buf = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
1936
0
    if (buf == NULL)
1937
0
        return(NULL);
1938
1939
0
    return(xmlNewInputInternal(buf, url));
1940
0
}
1941
1942
/**
1943
 * xmlCtxtNewInputFromFd:
1944
 * @ctxt:  parser context
1945
 * @url:  base URL (optional)
1946
 * @fd:  file descriptor
1947
 * @encoding:  character encoding (optional)
1948
 * @flags:  unused, pass 0
1949
 *
1950
 * Returns a new parser input.
1951
 */
1952
xmlParserInputPtr
1953
xmlCtxtNewInputFromFd(xmlParserCtxtPtr ctxt, const char *url,
1954
0
                      int fd, const char *encoding, int flags) {
1955
0
    xmlParserInputPtr input;
1956
1957
0
    if ((ctxt == NULL) || (fd < 0))
1958
0
  return(NULL);
1959
1960
0
    input = xmlNewInputFromFd(url, fd, flags);
1961
0
    if (input == NULL) {
1962
0
  xmlCtxtErrMemory(ctxt);
1963
0
        return(NULL);
1964
0
    }
1965
1966
0
    if (encoding != NULL)
1967
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
1968
1969
0
    return(input);
1970
0
}
1971
1972
/**
1973
 * xmlNewInputFromIO:
1974
 * @url:  base URL (optional)
1975
 * @ioRead:  read callback
1976
 * @ioClose:  close callback (optional)
1977
 * @ioCtxt:  IO context
1978
 * @flags:  unused, pass 0
1979
 *
1980
 * Creates a new parser input to read from input callbacks and
1981
 * cintext.
1982
 *
1983
 * @url is used as base to resolve external entities and for
1984
 * error reporting.
1985
 *
1986
 * @ioRead is called to read new data into a provided buffer.
1987
 * It must return the number of bytes written into the buffer
1988
 * ot a negative xmlParserErrors code on failure.
1989
 *
1990
 * @ioClose is called after parsing has finished.
1991
 *
1992
 * @ioCtxt is an opaque pointer passed to the callbacks.
1993
 *
1994
 * Available since 2.14.0.
1995
 *
1996
 * Returns a new parser input or NULL if a memory allocation failed.
1997
 */
1998
xmlParserInputPtr
1999
xmlNewInputFromIO(const char *url, xmlInputReadCallback ioRead,
2000
                  xmlInputCloseCallback ioClose, void *ioCtxt,
2001
0
                  int flags ATTRIBUTE_UNUSED) {
2002
0
    xmlParserInputBufferPtr buf;
2003
2004
0
    if (ioRead == NULL)
2005
0
  return(NULL);
2006
2007
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2008
0
    if (buf == NULL) {
2009
0
        if (ioClose != NULL)
2010
0
            ioClose(ioCtxt);
2011
0
        return(NULL);
2012
0
    }
2013
2014
0
    buf->context = ioCtxt;
2015
0
    buf->readcallback = ioRead;
2016
0
    buf->closecallback = ioClose;
2017
2018
0
    return(xmlNewInputInternal(buf, url));
2019
0
}
2020
2021
/**
2022
 * xmlCtxtNewInputFromIO:
2023
 * @ctxt:  parser context
2024
 * @url:  base URL (optional)
2025
 * @ioRead:  read callback
2026
 * @ioClose:  close callback (optional)
2027
 * @ioCtxt:  IO context
2028
 * @encoding:  character encoding (optional)
2029
 * @flags:  unused, pass 0
2030
 *
2031
 * Returns a new parser input.
2032
 */
2033
xmlParserInputPtr
2034
xmlCtxtNewInputFromIO(xmlParserCtxtPtr ctxt, const char *url,
2035
                      xmlInputReadCallback ioRead,
2036
                      xmlInputCloseCallback ioClose,
2037
0
                      void *ioCtxt, const char *encoding, int flags) {
2038
0
    xmlParserInputPtr input;
2039
2040
0
    if ((ctxt == NULL) || (ioRead == NULL))
2041
0
  return(NULL);
2042
2043
0
    input = xmlNewInputFromIO(url, ioRead, ioClose, ioCtxt, flags);
2044
0
    if (input == NULL) {
2045
0
        xmlCtxtErrMemory(ctxt);
2046
0
        return(NULL);
2047
0
    }
2048
2049
0
    if (encoding != NULL)
2050
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2051
2052
0
    return(input);
2053
0
}
2054
2055
/**
2056
 * xmlNewPushInput:
2057
 * @url:  base URL (optional)
2058
 * @chunk:  pointer to char array
2059
 * @size:  size of array
2060
 *
2061
 * Creates a new parser input for a push parser.
2062
 *
2063
 * Returns a new parser input or NULL if a memory allocation failed.
2064
 */
2065
xmlParserInputPtr
2066
0
xmlNewPushInput(const char *url, const char *chunk, int size) {
2067
0
    xmlParserInputBufferPtr buf;
2068
0
    xmlParserInputPtr input;
2069
2070
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
2071
0
    if (buf == NULL)
2072
0
        return(NULL);
2073
2074
0
    input = xmlNewInputInternal(buf, url);
2075
0
    if (input == NULL)
2076
0
  return(NULL);
2077
2078
0
    input->flags |= XML_INPUT_PROGRESSIVE;
2079
2080
0
    if ((size > 0) && (chunk != NULL)) {
2081
0
        int res;
2082
2083
0
  res = xmlParserInputBufferPush(input->buf, size, chunk);
2084
0
        xmlBufResetInput(input->buf->buffer, input);
2085
0
        if (res < 0) {
2086
0
            xmlFreeInputStream(input);
2087
0
            return(NULL);
2088
0
        }
2089
0
    }
2090
2091
0
    return(input);
2092
0
}
2093
2094
/**
2095
 * xmlNewIOInputStream:
2096
 * @ctxt:  an XML parser context
2097
 * @buf:  an input buffer
2098
 * @enc:  the charset encoding if known
2099
 *
2100
 * Create a new input stream structure encapsulating the @input into
2101
 * a stream suitable for the parser.
2102
 *
2103
 * Returns the new input stream or NULL
2104
 */
2105
xmlParserInputPtr
2106
xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr buf,
2107
0
              xmlCharEncoding enc) {
2108
0
    xmlParserInputPtr input;
2109
0
    const char *encoding;
2110
2111
0
    if ((ctxt == NULL) || (buf == NULL))
2112
0
        return(NULL);
2113
2114
0
    input = xmlNewInputInternal(buf, NULL);
2115
0
    if (input == NULL) {
2116
0
        xmlCtxtErrMemory(ctxt);
2117
0
  return(NULL);
2118
0
    }
2119
2120
0
    encoding = xmlGetCharEncodingName(enc);
2121
0
    if (encoding != NULL)
2122
0
        xmlSwitchInputEncodingName(ctxt, input, encoding);
2123
2124
0
    return(input);
2125
0
}
2126
2127
/**
2128
 * xmlNewEntityInputStream:
2129
 * @ctxt:  an XML parser context
2130
 * @ent:  an Entity pointer
2131
 *
2132
 * DEPRECATED: Internal function, do not use.
2133
 *
2134
 * Create a new input stream based on an xmlEntityPtr
2135
 *
2136
 * Returns the new input stream or NULL
2137
 */
2138
xmlParserInputPtr
2139
18.5k
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
2140
18.5k
    xmlParserInputPtr input;
2141
2142
18.5k
    if ((ctxt == NULL) || (ent == NULL))
2143
0
  return(NULL);
2144
2145
18.5k
    if (ent->content != NULL) {
2146
18.5k
        input = xmlCtxtNewInputFromString(ctxt, NULL,
2147
18.5k
                (const char *) ent->content, NULL, XML_INPUT_BUF_STATIC);
2148
18.5k
    } else if (ent->URI != NULL) {
2149
0
        xmlResourceType rtype;
2150
2151
0
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY)
2152
0
            rtype = XML_RESOURCE_PARAMETER_ENTITY;
2153
0
        else
2154
0
            rtype = XML_RESOURCE_GENERAL_ENTITY;
2155
2156
0
        input = xmlLoadResource(ctxt, (char *) ent->URI,
2157
0
                                (char *) ent->ExternalID, rtype);
2158
0
    } else {
2159
0
        return(NULL);
2160
0
    }
2161
2162
18.5k
    if (input == NULL)
2163
0
        return(NULL);
2164
2165
18.5k
    input->entity = ent;
2166
2167
18.5k
    return(input);
2168
18.5k
}
2169
2170
/**
2171
 * xmlNewStringInputStream:
2172
 * @ctxt:  an XML parser context
2173
 * @buffer:  an memory buffer
2174
 *
2175
 * DEPRECATED: Use xmlNewInputFromString.
2176
 *
2177
 * Create a new input stream based on a memory buffer.
2178
 *
2179
 * Returns the new input stream
2180
 */
2181
xmlParserInputPtr
2182
0
xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2183
0
    return(xmlCtxtNewInputFromString(ctxt, NULL, (const char *) buffer,
2184
0
                                     NULL, 0));
2185
0
}
2186
2187
2188
/****************************************************************
2189
 *                *
2190
 *    External entities loading     *
2191
 *                *
2192
 ****************************************************************/
2193
2194
#ifdef LIBXML_CATALOG_ENABLED
2195
2196
/**
2197
 * xmlResolveResourceFromCatalog:
2198
 * @URL:  the URL for the entity to load
2199
 * @ID:  the System ID for the entity to load
2200
 * @ctxt:  the context in which the entity is called or NULL
2201
 *
2202
 * Resolves the URL and ID against the appropriate catalog.
2203
 * This function is used by xmlDefaultExternalEntityLoader and
2204
 * xmlNoNetExternalEntityLoader.
2205
 *
2206
 * Returns a new allocated URL, or NULL.
2207
 */
2208
static xmlChar *
2209
xmlResolveResourceFromCatalog(const char *URL, const char *ID,
2210
0
                              xmlParserCtxtPtr ctxt) {
2211
0
    xmlChar *resource = NULL;
2212
0
    xmlCatalogAllow pref;
2213
0
    int allowLocal = 0;
2214
0
    int allowGlobal = 0;
2215
2216
    /*
2217
     * If the resource doesn't exists as a file,
2218
     * try to load it from the resource pointed in the catalogs
2219
     */
2220
0
    pref = xmlCatalogGetDefaults();
2221
2222
0
    if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
2223
0
        ((pref == XML_CATA_ALLOW_ALL) ||
2224
0
         (pref == XML_CATA_ALLOW_DOCUMENT)))
2225
0
        allowLocal = 1;
2226
2227
0
    if (((ctxt == NULL) ||
2228
0
         ((ctxt->options & XML_PARSE_NO_SYS_CATALOG) == 0)) &&
2229
0
        ((pref == XML_CATA_ALLOW_ALL) ||
2230
0
         (pref == XML_CATA_ALLOW_GLOBAL)))
2231
0
        allowGlobal = 1;
2232
2233
0
    if ((pref != XML_CATA_ALLOW_NONE) && (!xmlNoNetExists(URL))) {
2234
  /*
2235
   * Do a local lookup
2236
   */
2237
0
        if (allowLocal) {
2238
0
      resource = xmlCatalogLocalResolve(ctxt->catalogs,
2239
0
                (const xmlChar *)ID,
2240
0
                (const xmlChar *)URL);
2241
0
        }
2242
  /*
2243
   * Try a global lookup
2244
   */
2245
0
  if ((resource == NULL) && (allowGlobal)) {
2246
0
      resource = xmlCatalogResolve((const xmlChar *)ID,
2247
0
           (const xmlChar *)URL);
2248
0
  }
2249
0
  if ((resource == NULL) && (URL != NULL))
2250
0
      resource = xmlStrdup((const xmlChar *) URL);
2251
2252
  /*
2253
   * TODO: do an URI lookup on the reference
2254
   */
2255
0
  if ((resource != NULL) && (!xmlNoNetExists((const char *)resource))) {
2256
0
      xmlChar *tmp = NULL;
2257
2258
0
      if (allowLocal) {
2259
0
    tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource);
2260
0
      }
2261
0
      if ((tmp == NULL) && (allowGlobal)) {
2262
0
    tmp = xmlCatalogResolveURI(resource);
2263
0
      }
2264
2265
0
      if (tmp != NULL) {
2266
0
    xmlFree(resource);
2267
0
    resource = tmp;
2268
0
      }
2269
0
  }
2270
0
    }
2271
2272
0
    return resource;
2273
0
}
2274
2275
#endif
2276
2277
#ifdef LIBXML_HTTP_ENABLED
2278
static int
2279
xmlCheckHTTPInputInternal(xmlParserInputPtr input) {
2280
    const char *encoding;
2281
    const char *redir;
2282
    const char *mime;
2283
    int code;
2284
2285
    if ((input == NULL) || (input->buf == NULL) ||
2286
        (input->buf->readcallback != xmlIOHTTPRead) ||
2287
        (input->buf->context == NULL))
2288
        return(XML_ERR_OK);
2289
2290
    code = xmlNanoHTTPReturnCode(input->buf->context);
2291
    if (code >= 400) {
2292
        /* fatal error */
2293
        return(XML_IO_LOAD_ERROR);
2294
    }
2295
2296
    mime = xmlNanoHTTPMimeType(input->buf->context);
2297
    if ((xmlStrstr(BAD_CAST mime, BAD_CAST "/xml")) ||
2298
        (xmlStrstr(BAD_CAST mime, BAD_CAST "+xml"))) {
2299
        encoding = xmlNanoHTTPEncoding(input->buf->context);
2300
        if (encoding != NULL) {
2301
            xmlCharEncodingHandlerPtr handler;
2302
            int res;
2303
2304
            res = xmlOpenCharEncodingHandler(encoding, /* output */ 0,
2305
                                             &handler);
2306
            if (res == 0)
2307
                xmlInputSetEncodingHandler(input, handler);
2308
        }
2309
    }
2310
2311
    redir = xmlNanoHTTPRedir(input->buf->context);
2312
    if (redir != NULL) {
2313
        if (input->filename != NULL)
2314
            xmlFree((xmlChar *) input->filename);
2315
        input->filename = xmlMemStrdup(redir);
2316
        if (input->filename == NULL)
2317
            return(XML_ERR_NO_MEMORY);
2318
    }
2319
2320
    return(XML_ERR_OK);
2321
}
2322
#endif /* LIBXML_HTTP_ENABLED */
2323
2324
/**
2325
 * xmlCheckHTTPInput:
2326
 * @ctxt: an XML parser context
2327
 * @ret: an XML parser input
2328
 *
2329
 * DEPRECATED: Internal function, don't use.
2330
 *
2331
 * Check an input in case it was created from an HTTP stream, in that
2332
 * case it will handle encoding and update of the base URL in case of
2333
 * redirection. It also checks for HTTP errors in which case the input
2334
 * is cleanly freed up and an appropriate error is raised in context
2335
 *
2336
 * Returns the input or NULL in case of HTTP error.
2337
 */
2338
xmlParserInputPtr
2339
0
xmlCheckHTTPInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr ret) {
2340
    /* Avoid unused variable warning if features are disabled. */
2341
0
    (void) ctxt;
2342
2343
#ifdef LIBXML_HTTP_ENABLED
2344
    {
2345
        int code = xmlCheckHTTPInputInternal(ret);
2346
2347
        if (code != XML_ERR_OK) {
2348
            if (ret->filename != NULL)
2349
                xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, ret->filename);
2350
            else
2351
                xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, "<null>");
2352
            xmlFreeInputStream(ret);
2353
            return(NULL);
2354
        }
2355
    }
2356
#endif
2357
2358
0
    return(ret);
2359
0
}
2360
2361
/**
2362
 * xmlNewInputFromUrl:
2363
 * @filename:  the filename to use as entity
2364
 * @flags:  XML_INPUT flags
2365
 * @out:  pointer to new parser input
2366
 *
2367
 * Create a new input stream based on a file or a URL.
2368
 *
2369
 * The flag XML_INPUT_UNZIP allows decompression.
2370
 *
2371
 * The flag XML_INPUT_NETWORK allows network access.
2372
 *
2373
 * The following resource loaders will be called if they were
2374
 * registered (in order of precedence):
2375
 *
2376
 * - the per-thread xmlParserInputBufferCreateFilenameFunc set with
2377
 *   xmlParserInputBufferCreateFilenameDefault (deprecated)
2378
 * - the default loader which will return
2379
 *   - the result from a matching global input callback set with
2380
 *     xmlRegisterInputCallbacks (deprecated)
2381
 *   - a HTTP resource if support is compiled in.
2382
 *   - a file opened from the filesystem, with automatic detection
2383
 *     of compressed files if support is compiled in.
2384
 *
2385
 * Available since 2.14.0.
2386
 *
2387
 * Returns an xmlParserErrors code.
2388
 */
2389
int
2390
0
xmlNewInputFromUrl(const char *filename, int flags, xmlParserInputPtr *out) {
2391
0
    xmlParserInputBufferPtr buf;
2392
0
    xmlParserInputPtr input;
2393
0
    int code = XML_ERR_OK;
2394
2395
0
    if (out == NULL)
2396
0
        return(XML_ERR_ARGUMENT);
2397
0
    *out = NULL;
2398
0
    if (filename == NULL)
2399
0
        return(XML_ERR_ARGUMENT);
2400
2401
0
    if (xmlParserInputBufferCreateFilenameValue != NULL) {
2402
0
        buf = xmlParserInputBufferCreateFilenameValue(filename,
2403
0
                XML_CHAR_ENCODING_NONE);
2404
0
        if (buf == NULL)
2405
0
            code = XML_IO_ENOENT;
2406
0
    } else {
2407
0
        code = xmlParserInputBufferCreateUrl(filename, XML_CHAR_ENCODING_NONE,
2408
0
                                             flags, &buf);
2409
0
    }
2410
0
    if (code != XML_ERR_OK)
2411
0
  return(code);
2412
2413
0
    input = xmlNewInputInternal(buf, filename);
2414
0
    if (input == NULL)
2415
0
  return(XML_ERR_NO_MEMORY);
2416
2417
#ifdef LIBXML_HTTP_ENABLED
2418
    code = xmlCheckHTTPInputInternal(input);
2419
    if (code != XML_ERR_OK) {
2420
        xmlFreeInputStream(input);
2421
        return(code);
2422
    }
2423
#endif
2424
2425
0
    *out = input;
2426
0
    return(XML_ERR_OK);
2427
0
}
2428
2429
/**
2430
 * xmlNewInputFromFile:
2431
 * @ctxt:  an XML parser context
2432
 * @filename:  the filename to use as entity
2433
 *
2434
 * DEPRECATED: Use xmlNewInputFromUrl.
2435
 *
2436
 * Create a new input stream based on a file or an URL.
2437
 *
2438
 * Returns the new input stream or NULL in case of error
2439
 */
2440
xmlParserInputPtr
2441
0
xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2442
0
    xmlParserInputPtr input;
2443
0
    int flags = 0;
2444
0
    int code;
2445
2446
0
    if ((ctxt == NULL) || (filename == NULL))
2447
0
        return(NULL);
2448
2449
0
    if ((ctxt->options & XML_PARSE_NO_UNZIP) == 0)
2450
0
        flags |= XML_INPUT_UNZIP;
2451
0
    if ((ctxt->options & XML_PARSE_NONET) == 0)
2452
0
        flags |= XML_INPUT_NETWORK;
2453
2454
0
    code = xmlNewInputFromUrl(filename, flags, &input);
2455
0
    if (code != XML_ERR_OK) {
2456
0
        xmlCtxtErrIO(ctxt, code, filename);
2457
0
        return(NULL);
2458
0
    }
2459
2460
0
    return(input);
2461
0
}
2462
2463
/**
2464
 * xmlDefaultExternalEntityLoader:
2465
 * @URL:  the URL for the entity to load
2466
 * @ID:  the System ID for the entity to load
2467
 * @ctxt:  the context in which the entity is called or NULL
2468
 *
2469
 * By default we don't load external entities, yet.
2470
 *
2471
 * Returns a new allocated xmlParserInputPtr, or NULL.
2472
 */
2473
static xmlParserInputPtr
2474
xmlDefaultExternalEntityLoader(const char *url, const char *ID,
2475
                               xmlParserCtxtPtr ctxt)
2476
0
{
2477
0
    xmlParserInputPtr input = NULL;
2478
0
    char *resource = NULL;
2479
2480
0
    (void) ID;
2481
2482
0
    if (url == NULL)
2483
0
        return(NULL);
2484
2485
0
#ifdef LIBXML_CATALOG_ENABLED
2486
0
    resource = (char *) xmlResolveResourceFromCatalog(url, ID, ctxt);
2487
0
    if (resource != NULL)
2488
0
  url = resource;
2489
0
#endif
2490
2491
0
    if ((ctxt != NULL) &&
2492
0
        (ctxt->options & XML_PARSE_NONET) &&
2493
0
        (xmlStrncasecmp(BAD_CAST url, BAD_CAST "http://", 7) == 0)) {
2494
0
        xmlCtxtErrIO(ctxt, XML_IO_NETWORK_ATTEMPT, url);
2495
0
    } else {
2496
0
        input = xmlNewInputFromFile(ctxt, url);
2497
0
    }
2498
2499
0
    if (resource != NULL)
2500
0
  xmlFree(resource);
2501
0
    return(input);
2502
0
}
2503
2504
/**
2505
 * xmlNoNetExternalEntityLoader:
2506
 * @URL:  the URL for the entity to load
2507
 * @ID:  the System ID for the entity to load
2508
 * @ctxt:  the context in which the entity is called or NULL
2509
 *
2510
 * DEPRECATED: Use XML_PARSE_NONET.
2511
 *
2512
 * A specific entity loader disabling network accesses, though still
2513
 * allowing local catalog accesses for resolution.
2514
 *
2515
 * Returns a new allocated xmlParserInputPtr, or NULL.
2516
 */
2517
xmlParserInputPtr
2518
xmlNoNetExternalEntityLoader(const char *URL, const char *ID,
2519
0
                             xmlParserCtxtPtr ctxt) {
2520
0
    int oldOptions = 0;
2521
0
    xmlParserInputPtr input;
2522
2523
0
    if (ctxt != NULL) {
2524
0
        oldOptions = ctxt->options;
2525
0
        ctxt->options |= XML_PARSE_NONET;
2526
0
    }
2527
2528
0
    input = xmlDefaultExternalEntityLoader(URL, ID, ctxt);
2529
2530
0
    if (ctxt != NULL)
2531
0
        ctxt->options = oldOptions;
2532
2533
0
    return(input);
2534
0
}
2535
2536
/*
2537
 * This global has to die eventually
2538
 */
2539
static xmlExternalEntityLoader
2540
xmlCurrentExternalEntityLoader = xmlDefaultExternalEntityLoader;
2541
2542
/**
2543
 * xmlSetExternalEntityLoader:
2544
 * @f:  the new entity resolver function
2545
 *
2546
 * DEPRECATED: This is a global setting and not thread-safe. Use
2547
 * xmlCtxtSetResourceLoader or similar functions.
2548
 *
2549
 * Changes the default external entity resolver function for the
2550
 * application.
2551
 */
2552
void
2553
0
xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
2554
0
    xmlCurrentExternalEntityLoader = f;
2555
0
}
2556
2557
/**
2558
 * xmlGetExternalEntityLoader:
2559
 *
2560
 * DEPRECATED: See xmlSetExternalEntityLoader.
2561
 *
2562
 * Get the default external entity resolver function for the application
2563
 *
2564
 * Returns the xmlExternalEntityLoader function pointer
2565
 */
2566
xmlExternalEntityLoader
2567
0
xmlGetExternalEntityLoader(void) {
2568
0
    return(xmlCurrentExternalEntityLoader);
2569
0
}
2570
2571
/**
2572
 * xmlCtxtSetResourceLoader:
2573
 * @ctxt:  parser context
2574
 * @loader:  callback
2575
 * @vctxt:  user data
2576
 *
2577
 * Installs a custom callback to load documents, DTDs or external
2578
 * entities.
2579
 *
2580
 * Available since 2.14.0.
2581
 */
2582
void
2583
xmlCtxtSetResourceLoader(xmlParserCtxtPtr ctxt, xmlResourceLoader loader,
2584
0
                         void *vctxt) {
2585
0
    if (ctxt == NULL)
2586
0
        return;
2587
2588
0
    ctxt->resourceLoader = loader;
2589
0
    ctxt->resourceCtxt = vctxt;
2590
0
}
2591
2592
/**
2593
 * xmlLoadResource:
2594
 * @ctxt:  parser context
2595
 * @url:  the URL for the entity to load
2596
 * @publicId:  the Public ID for the entity to load
2597
 * @type:  resource type
2598
 *
2599
 * Returns the xmlParserInputPtr or NULL in case of error.
2600
 */
2601
xmlParserInputPtr
2602
xmlLoadResource(xmlParserCtxtPtr ctxt, const char *url, const char *publicId,
2603
0
                xmlResourceType type) {
2604
0
    char *canonicFilename;
2605
0
    xmlParserInputPtr ret;
2606
2607
0
    if (url == NULL)
2608
0
        return(NULL);
2609
2610
0
    if ((ctxt != NULL) && (ctxt->resourceLoader != NULL)) {
2611
0
        char *resource = NULL;
2612
0
        int flags = 0;
2613
0
        int code;
2614
2615
0
#ifdef LIBXML_CATALOG_ENABLED
2616
0
        resource = (char *) xmlResolveResourceFromCatalog(url, publicId, ctxt);
2617
0
        if (resource != NULL)
2618
0
            url = resource;
2619
0
#endif
2620
2621
0
        if ((ctxt->options & XML_PARSE_NO_UNZIP) == 0)
2622
0
            flags |= XML_INPUT_UNZIP;
2623
0
        if ((ctxt->options & XML_PARSE_NONET) == 0)
2624
0
            flags |= XML_INPUT_NETWORK;
2625
2626
0
        code = ctxt->resourceLoader(ctxt->resourceCtxt, url, publicId, type,
2627
0
                                    flags, &ret);
2628
0
        if (code != XML_ERR_OK) {
2629
0
            xmlCtxtErrIO(ctxt, code, url);
2630
0
            ret = NULL;
2631
0
        }
2632
0
        if (resource != NULL)
2633
0
            xmlFree(resource);
2634
0
        return(ret);
2635
0
    }
2636
2637
0
    canonicFilename = (char *) xmlCanonicPath((const xmlChar *) url);
2638
0
    if (canonicFilename == NULL) {
2639
0
        xmlCtxtErrMemory(ctxt);
2640
0
        return(NULL);
2641
0
    }
2642
2643
0
    ret = xmlCurrentExternalEntityLoader(canonicFilename, publicId, ctxt);
2644
0
    xmlFree(canonicFilename);
2645
0
    return(ret);
2646
0
}
2647
2648
/**
2649
 * xmlLoadExternalEntity:
2650
 * @URL:  the URL for the entity to load
2651
 * @ID:  the Public ID for the entity to load
2652
 * @ctxt:  the context in which the entity is called or NULL
2653
 *
2654
 * @URL is a filename or URL. If if contains the substring "://",
2655
 * it is assumed to be a Legacy Extended IRI. Otherwise, it is
2656
 * treated as a filesystem path.
2657
 *
2658
 * @ID is an optional XML public ID, typically from a doctype
2659
 * declaration. It is used for catalog lookups.
2660
 *
2661
 * If catalog lookup is enabled (default is yes) and URL or ID are
2662
 * found in system or local XML catalogs, URL is replaced with the
2663
 * result. Then the following resource loaders will be called if
2664
 * they were registered (in order of precedence):
2665
 *
2666
 * - the resource loader set with xmlCtxtSetResourceLoader
2667
 * - the global external entity loader set with
2668
 *   xmlSetExternalEntityLoader (without catalog resolution,
2669
 *   deprecated)
2670
 * - the per-thread xmlParserInputBufferCreateFilenameFunc set with
2671
 *   xmlParserInputBufferCreateFilenameDefault (deprecated)
2672
 * - the default loader which will return
2673
 *   - the result from a matching global input callback set with
2674
 *     xmlRegisterInputCallbacks (deprecated)
2675
 *   - a HTTP resource if support is compiled in.
2676
 *   - a file opened from the filesystem, with automatic detection
2677
 *     of compressed files if support is compiled in.
2678
 *
2679
 * Returns the xmlParserInputPtr or NULL
2680
 */
2681
xmlParserInputPtr
2682
xmlLoadExternalEntity(const char *URL, const char *ID,
2683
0
                      xmlParserCtxtPtr ctxt) {
2684
0
    return(xmlLoadResource(ctxt, URL, ID, XML_RESOURCE_UNKNOWN));
2685
0
}
2686
2687
/************************************************************************
2688
 *                  *
2689
 *    Commodity functions to handle parser contexts   *
2690
 *                  *
2691
 ************************************************************************/
2692
2693
/**
2694
 * xmlInitSAXParserCtxt:
2695
 * @ctxt:  XML parser context
2696
 * @sax:  SAX handlert
2697
 * @userData:  user data
2698
 *
2699
 * Initialize a SAX parser context
2700
 *
2701
 * Returns 0 in case of success and -1 in case of error
2702
 */
2703
2704
static int
2705
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
2706
                     void *userData)
2707
20.2k
{
2708
20.2k
    xmlParserInputPtr input;
2709
20.2k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2710
20.2k
    size_t initialNodeTabSize = 1;
2711
#else
2712
    size_t initialNodeTabSize = 10;
2713
#endif
2714
2715
20.2k
    if (ctxt == NULL)
2716
0
        return(-1);
2717
2718
20.2k
    if (ctxt->dict == NULL)
2719
20.2k
  ctxt->dict = xmlDictCreate();
2720
20.2k
    if (ctxt->dict == NULL)
2721
0
  return(-1);
2722
2723
20.2k
    if (ctxt->sax == NULL)
2724
20.2k
  ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2725
20.2k
    if (ctxt->sax == NULL)
2726
0
  return(-1);
2727
20.2k
    if (sax == NULL) {
2728
20.2k
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2729
20.2k
        xmlSAXVersion(ctxt->sax, 2);
2730
20.2k
        ctxt->userData = ctxt;
2731
20.2k
    } else {
2732
0
  if (sax->initialized == XML_SAX2_MAGIC) {
2733
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
2734
0
        } else {
2735
0
      memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2736
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
2737
0
        }
2738
0
        ctxt->userData = userData ? userData : ctxt;
2739
0
    }
2740
2741
20.2k
    ctxt->maxatts = 0;
2742
20.2k
    ctxt->atts = NULL;
2743
    /* Allocate the Input stack */
2744
20.2k
    if (ctxt->inputTab == NULL) {
2745
20.2k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2746
20.2k
        size_t initialSize = 1;
2747
#else
2748
        size_t initialSize = 5;
2749
#endif
2750
2751
20.2k
  ctxt->inputTab = xmlMalloc(initialSize * sizeof(xmlParserInputPtr));
2752
20.2k
  ctxt->inputMax = initialSize;
2753
20.2k
    }
2754
20.2k
    if (ctxt->inputTab == NULL)
2755
0
  return(-1);
2756
20.2k
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2757
0
        xmlFreeInputStream(input);
2758
0
    }
2759
20.2k
    ctxt->inputNr = 0;
2760
20.2k
    ctxt->input = NULL;
2761
2762
20.2k
    ctxt->version = NULL;
2763
20.2k
    ctxt->encoding = NULL;
2764
20.2k
    ctxt->standalone = -1;
2765
20.2k
    ctxt->hasExternalSubset = 0;
2766
20.2k
    ctxt->hasPErefs = 0;
2767
20.2k
    ctxt->html = 0;
2768
20.2k
    ctxt->instate = XML_PARSER_START;
2769
2770
    /* Allocate the Node stack */
2771
20.2k
    if (ctxt->nodeTab == NULL) {
2772
20.2k
  ctxt->nodeTab = xmlMalloc(initialNodeTabSize * sizeof(xmlNodePtr));
2773
20.2k
  ctxt->nodeMax = initialNodeTabSize;
2774
20.2k
    }
2775
20.2k
    if (ctxt->nodeTab == NULL)
2776
0
  return(-1);
2777
20.2k
    ctxt->nodeNr = 0;
2778
20.2k
    ctxt->node = NULL;
2779
2780
    /* Allocate the Name stack */
2781
20.2k
    if (ctxt->nameTab == NULL) {
2782
20.2k
  ctxt->nameTab = xmlMalloc(initialNodeTabSize * sizeof(xmlChar *));
2783
20.2k
  ctxt->nameMax = initialNodeTabSize;
2784
20.2k
    }
2785
20.2k
    if (ctxt->nameTab == NULL)
2786
0
  return(-1);
2787
20.2k
    ctxt->nameNr = 0;
2788
20.2k
    ctxt->name = NULL;
2789
2790
    /* Allocate the space stack */
2791
20.2k
    if (ctxt->spaceTab == NULL) {
2792
20.2k
  ctxt->spaceTab = xmlMalloc(initialNodeTabSize * sizeof(int));
2793
20.2k
  ctxt->spaceMax = initialNodeTabSize;
2794
20.2k
    }
2795
20.2k
    if (ctxt->spaceTab == NULL)
2796
0
  return(-1);
2797
20.2k
    ctxt->spaceNr = 1;
2798
20.2k
    ctxt->spaceTab[0] = -1;
2799
20.2k
    ctxt->space = &ctxt->spaceTab[0];
2800
20.2k
    ctxt->myDoc = NULL;
2801
20.2k
    ctxt->wellFormed = 1;
2802
20.2k
    ctxt->nsWellFormed = 1;
2803
20.2k
    ctxt->valid = 1;
2804
2805
20.2k
    ctxt->options = XML_PARSE_NODICT;
2806
2807
    /*
2808
     * Initialize some parser options from deprecated global variables.
2809
     * Note that the "modern" API taking options arguments or
2810
     * xmlCtxtSetOptions will ignore these defaults. They're only
2811
     * relevant if old API functions like xmlParseFile are used.
2812
     */
2813
20.2k
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2814
20.2k
    if (ctxt->loadsubset) {
2815
0
        ctxt->options |= XML_PARSE_DTDLOAD;
2816
0
    }
2817
20.2k
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
2818
20.2k
    if (ctxt->validate) {
2819
0
        ctxt->options |= XML_PARSE_DTDVALID;
2820
0
    }
2821
20.2k
    ctxt->pedantic = xmlPedanticParserDefaultValue;
2822
20.2k
    if (ctxt->pedantic) {
2823
0
        ctxt->options |= XML_PARSE_PEDANTIC;
2824
0
    }
2825
20.2k
    ctxt->linenumbers = xmlLineNumbersDefaultValue;
2826
20.2k
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2827
20.2k
    if (ctxt->keepBlanks == 0) {
2828
0
  ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
2829
0
  ctxt->options |= XML_PARSE_NOBLANKS;
2830
0
    }
2831
20.2k
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2832
20.2k
    if (ctxt->replaceEntities) {
2833
0
        ctxt->options |= XML_PARSE_NOENT;
2834
0
    }
2835
20.2k
    if (xmlGetWarningsDefaultValue == 0)
2836
0
        ctxt->options |= XML_PARSE_NOWARNING;
2837
2838
20.2k
    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
2839
20.2k
    ctxt->vctxt.userData = ctxt;
2840
20.2k
    ctxt->vctxt.error = xmlParserValidityError;
2841
20.2k
    ctxt->vctxt.warning = xmlParserValidityWarning;
2842
2843
20.2k
    ctxt->record_info = 0;
2844
20.2k
    ctxt->checkIndex = 0;
2845
20.2k
    ctxt->inSubset = 0;
2846
20.2k
    ctxt->errNo = XML_ERR_OK;
2847
20.2k
    ctxt->depth = 0;
2848
20.2k
    ctxt->catalogs = NULL;
2849
20.2k
    ctxt->sizeentities = 0;
2850
20.2k
    ctxt->sizeentcopy = 0;
2851
20.2k
    ctxt->input_id = 1;
2852
20.2k
    ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
2853
20.2k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
2854
2855
20.2k
    if (ctxt->nsdb == NULL) {
2856
20.2k
        ctxt->nsdb = xmlParserNsCreate();
2857
20.2k
        if (ctxt->nsdb == NULL)
2858
0
            return(-1);
2859
20.2k
    }
2860
2861
20.2k
    return(0);
2862
20.2k
}
2863
2864
/**
2865
 * xmlInitParserCtxt:
2866
 * @ctxt:  an XML parser context
2867
 *
2868
 * DEPRECATED: Internal function which will be made private in a future
2869
 * version.
2870
 *
2871
 * Initialize a parser context
2872
 *
2873
 * Returns 0 in case of success and -1 in case of error
2874
 */
2875
2876
int
2877
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2878
0
{
2879
0
    return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
2880
0
}
2881
2882
/**
2883
 * xmlFreeParserCtxt:
2884
 * @ctxt:  an XML parser context
2885
 *
2886
 * Free all the memory used by a parser context. However the parsed
2887
 * document in ctxt->myDoc is not freed.
2888
 */
2889
2890
void
2891
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2892
20.2k
{
2893
20.2k
    xmlParserInputPtr input;
2894
2895
20.2k
    if (ctxt == NULL) return;
2896
2897
20.2k
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2898
0
        xmlFreeInputStream(input);
2899
0
    }
2900
20.2k
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2901
20.2k
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
2902
20.2k
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2903
20.2k
    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
2904
20.2k
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2905
20.2k
    if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2906
20.2k
    if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2907
20.2k
    if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2908
20.2k
    if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2909
20.2k
#ifdef LIBXML_SAX1_ENABLED
2910
20.2k
    if ((ctxt->sax != NULL) &&
2911
20.2k
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
2912
#else
2913
    if (ctxt->sax != NULL)
2914
#endif /* LIBXML_SAX1_ENABLED */
2915
20.2k
        xmlFree(ctxt->sax);
2916
20.2k
    if (ctxt->directory != NULL) xmlFree(ctxt->directory);
2917
20.2k
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2918
20.2k
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
2919
20.2k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
2920
20.2k
    if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
2921
20.2k
    if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
2922
20.2k
    if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
2923
20.2k
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2924
20.2k
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2925
20.2k
    if (ctxt->attsDefault != NULL)
2926
2.06k
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2927
20.2k
    if (ctxt->attsSpecial != NULL)
2928
2.16k
        xmlHashFree(ctxt->attsSpecial, NULL);
2929
20.2k
    if (ctxt->freeElems != NULL) {
2930
0
        xmlNodePtr cur, next;
2931
2932
0
  cur = ctxt->freeElems;
2933
0
  while (cur != NULL) {
2934
0
      next = cur->next;
2935
0
      xmlFree(cur);
2936
0
      cur = next;
2937
0
  }
2938
0
    }
2939
20.2k
    if (ctxt->freeAttrs != NULL) {
2940
0
        xmlAttrPtr cur, next;
2941
2942
0
  cur = ctxt->freeAttrs;
2943
0
  while (cur != NULL) {
2944
0
      next = cur->next;
2945
0
      xmlFree(cur);
2946
0
      cur = next;
2947
0
  }
2948
0
    }
2949
    /*
2950
     * cleanup the error strings
2951
     */
2952
20.2k
    if (ctxt->lastError.message != NULL)
2953
20.1k
        xmlFree(ctxt->lastError.message);
2954
20.2k
    if (ctxt->lastError.file != NULL)
2955
9.86k
        xmlFree(ctxt->lastError.file);
2956
20.2k
    if (ctxt->lastError.str1 != NULL)
2957
7.61k
        xmlFree(ctxt->lastError.str1);
2958
20.2k
    if (ctxt->lastError.str2 != NULL)
2959
229
        xmlFree(ctxt->lastError.str2);
2960
20.2k
    if (ctxt->lastError.str3 != NULL)
2961
59
        xmlFree(ctxt->lastError.str3);
2962
2963
20.2k
#ifdef LIBXML_CATALOG_ENABLED
2964
20.2k
    if (ctxt->catalogs != NULL)
2965
65
  xmlCatalogFreeLocal(ctxt->catalogs);
2966
20.2k
#endif
2967
20.2k
    xmlFree(ctxt);
2968
20.2k
}
2969
2970
/**
2971
 * xmlNewParserCtxt:
2972
 *
2973
 * Allocate and initialize a new parser context.
2974
 *
2975
 * Returns the xmlParserCtxtPtr or NULL
2976
 */
2977
2978
xmlParserCtxtPtr
2979
xmlNewParserCtxt(void)
2980
20.2k
{
2981
20.2k
    return(xmlNewSAXParserCtxt(NULL, NULL));
2982
20.2k
}
2983
2984
/**
2985
 * xmlNewSAXParserCtxt:
2986
 * @sax:  SAX handler
2987
 * @userData:  user data
2988
 *
2989
 * Allocate and initialize a new SAX parser context. If userData is NULL,
2990
 * the parser context will be passed as user data.
2991
 *
2992
 * Available since 2.11.0. If you want support older versions,
2993
 * it's best to invoke xmlNewParserCtxt and set ctxt->sax with
2994
 * struct assignment.
2995
 *
2996
 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
2997
 */
2998
2999
xmlParserCtxtPtr
3000
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
3001
20.2k
{
3002
20.2k
    xmlParserCtxtPtr ctxt;
3003
3004
20.2k
    xmlInitParser();
3005
3006
20.2k
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
3007
20.2k
    if (ctxt == NULL)
3008
0
  return(NULL);
3009
20.2k
    memset(ctxt, 0, sizeof(xmlParserCtxt));
3010
20.2k
    if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
3011
0
        xmlFreeParserCtxt(ctxt);
3012
0
  return(NULL);
3013
0
    }
3014
20.2k
    return(ctxt);
3015
20.2k
}
3016
3017
/**
3018
 * xmlCtxtGetPrivate:
3019
 * ctxt:  parser context
3020
 *
3021
 * Available since 2.14.0.
3022
 *
3023
 * Returns the private application data.
3024
 */
3025
void *
3026
0
xmlCtxtGetPrivate(xmlParserCtxtPtr ctxt) {
3027
0
    if (ctxt == NULL)
3028
0
        return(NULL);
3029
3030
0
    return(ctxt->_private);
3031
0
}
3032
3033
/**
3034
 * xmlCtxtSetPrivate:
3035
 * ctxt:  parser context
3036
 * priv:  private application data
3037
 *
3038
 * Available since 2.14.0.
3039
 *
3040
 * Set the private application data.
3041
 */
3042
void
3043
0
xmlCtxtSetPrivate(xmlParserCtxtPtr ctxt, void *priv) {
3044
0
    if (ctxt == NULL)
3045
0
        return;
3046
3047
0
    ctxt->_private = priv;
3048
0
}
3049
3050
/**
3051
 * xmlCtxtGetCatalogs:
3052
 * ctxt:  parser context
3053
 *
3054
 * Available since 2.14.0.
3055
 *
3056
 * Returns the local catalogs.
3057
 */
3058
void *
3059
0
xmlCtxtGetCatalogs(xmlParserCtxtPtr ctxt) {
3060
0
    if (ctxt == NULL)
3061
0
        return(NULL);
3062
3063
0
    return(ctxt->catalogs);
3064
0
}
3065
3066
/**
3067
 * xmlCtxtSetCatalogs:
3068
 * ctxt:  parser context
3069
 * catalogs:  catalogs pointer
3070
 *
3071
 * Available since 2.14.0.
3072
 *
3073
 * Set the local catalogs.
3074
 */
3075
void
3076
0
xmlCtxtSetCatalogs(xmlParserCtxtPtr ctxt, void *catalogs) {
3077
0
    if (ctxt == NULL)
3078
0
        return;
3079
3080
0
    ctxt->catalogs = catalogs;
3081
0
}
3082
3083
/**
3084
 * xmlCtxtGetDict:
3085
 * ctxt:  parser context
3086
 *
3087
 * Available since 2.14.0.
3088
 *
3089
 * Returns the dictionary.
3090
 */
3091
xmlDictPtr
3092
0
xmlCtxtGetDict(xmlParserCtxtPtr ctxt) {
3093
0
    if (ctxt == NULL)
3094
0
        return(NULL);
3095
3096
0
    return(ctxt->dict);
3097
0
}
3098
3099
/**
3100
 * xmlCtxtSetDict:
3101
 * ctxt:  parser context
3102
 * dict:  dictionary
3103
 *
3104
 * Available since 2.14.0.
3105
 *
3106
 * Set the dictionary. This should only be done immediately after
3107
 * creating a parser context.
3108
 */
3109
void
3110
0
xmlCtxtSetDict(xmlParserCtxtPtr ctxt, xmlDictPtr dict) {
3111
0
    if (ctxt == NULL)
3112
0
        return;
3113
3114
0
    if (ctxt->dict != NULL)
3115
0
        xmlDictFree(ctxt->dict);
3116
3117
0
    xmlDictReference(dict);
3118
0
    ctxt->dict = dict;
3119
0
}
3120
3121
/************************************************************************
3122
 *                  *
3123
 *    Handling of node information        *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/**
3128
 * xmlClearParserCtxt:
3129
 * @ctxt:  an XML parser context
3130
 *
3131
 * Clear (release owned resources) and reinitialize a parser context
3132
 */
3133
3134
void
3135
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
3136
0
{
3137
0
  if (ctxt==NULL)
3138
0
    return;
3139
0
  xmlClearNodeInfoSeq(&ctxt->node_seq);
3140
0
  xmlCtxtReset(ctxt);
3141
0
}
3142
3143
3144
/**
3145
 * xmlParserFindNodeInfo:
3146
 * @ctx:  an XML parser context
3147
 * @node:  an XML node within the tree
3148
 *
3149
 * DEPRECATED: Don't use.
3150
 *
3151
 * Find the parser node info struct for a given node
3152
 *
3153
 * Returns an xmlParserNodeInfo block pointer or NULL
3154
 */
3155
const xmlParserNodeInfo *
3156
xmlParserFindNodeInfo(xmlParserCtxtPtr ctx, xmlNodePtr node)
3157
0
{
3158
0
    unsigned long pos;
3159
3160
0
    if ((ctx == NULL) || (node == NULL))
3161
0
        return (NULL);
3162
    /* Find position where node should be at */
3163
0
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
3164
0
    if (pos < ctx->node_seq.length
3165
0
        && ctx->node_seq.buffer[pos].node == node)
3166
0
        return &ctx->node_seq.buffer[pos];
3167
0
    else
3168
0
        return NULL;
3169
0
}
3170
3171
3172
/**
3173
 * xmlInitNodeInfoSeq:
3174
 * @seq:  a node info sequence pointer
3175
 *
3176
 * DEPRECATED: Don't use.
3177
 *
3178
 * -- Initialize (set to initial state) node info sequence
3179
 */
3180
void
3181
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3182
20.2k
{
3183
20.2k
    if (seq == NULL)
3184
0
        return;
3185
20.2k
    seq->length = 0;
3186
20.2k
    seq->maximum = 0;
3187
20.2k
    seq->buffer = NULL;
3188
20.2k
}
3189
3190
/**
3191
 * xmlClearNodeInfoSeq:
3192
 * @seq:  a node info sequence pointer
3193
 *
3194
 * DEPRECATED: Don't use.
3195
 *
3196
 * -- Clear (release memory and reinitialize) node
3197
 *   info sequence
3198
 */
3199
void
3200
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3201
0
{
3202
0
    if (seq == NULL)
3203
0
        return;
3204
0
    if (seq->buffer != NULL)
3205
0
        xmlFree(seq->buffer);
3206
0
    xmlInitNodeInfoSeq(seq);
3207
0
}
3208
3209
/**
3210
 * xmlParserFindNodeInfoIndex:
3211
 * @seq:  a node info sequence pointer
3212
 * @node:  an XML node pointer
3213
 *
3214
 * DEPRECATED: Don't use.
3215
 *
3216
 * xmlParserFindNodeInfoIndex : Find the index that the info record for
3217
 *   the given node is or should be at in a sorted sequence
3218
 *
3219
 * Returns a long indicating the position of the record
3220
 */
3221
unsigned long
3222
xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeqPtr seq,
3223
                           xmlNodePtr node)
3224
0
{
3225
0
    unsigned long upper, lower, middle;
3226
0
    int found = 0;
3227
3228
0
    if ((seq == NULL) || (node == NULL))
3229
0
        return ((unsigned long) -1);
3230
3231
    /* Do a binary search for the key */
3232
0
    lower = 1;
3233
0
    upper = seq->length;
3234
0
    middle = 0;
3235
0
    while (lower <= upper && !found) {
3236
0
        middle = lower + (upper - lower) / 2;
3237
0
        if (node == seq->buffer[middle - 1].node)
3238
0
            found = 1;
3239
0
        else if (node < seq->buffer[middle - 1].node)
3240
0
            upper = middle - 1;
3241
0
        else
3242
0
            lower = middle + 1;
3243
0
    }
3244
3245
    /* Return position */
3246
0
    if (middle == 0 || seq->buffer[middle - 1].node < node)
3247
0
        return middle;
3248
0
    else
3249
0
        return middle - 1;
3250
0
}
3251
3252
3253
/**
3254
 * xmlParserAddNodeInfo:
3255
 * @ctxt:  an XML parser context
3256
 * @info:  a node info sequence pointer
3257
 *
3258
 * DEPRECATED: Don't use.
3259
 *
3260
 * Insert node info record into the sorted sequence
3261
 */
3262
void
3263
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
3264
                     xmlParserNodeInfoPtr info)
3265
0
{
3266
0
    unsigned long pos;
3267
3268
0
    if ((ctxt == NULL) || (info == NULL)) return;
3269
3270
    /* Find pos and check to see if node is already in the sequence */
3271
0
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
3272
0
                                     info->node);
3273
3274
0
    if ((pos < ctxt->node_seq.length) &&
3275
0
        (ctxt->node_seq.buffer != NULL) &&
3276
0
        (ctxt->node_seq.buffer[pos].node == info->node)) {
3277
0
        ctxt->node_seq.buffer[pos] = *info;
3278
0
    }
3279
3280
    /* Otherwise, we need to add new node to buffer */
3281
0
    else {
3282
0
        if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
3283
0
      (ctxt->node_seq.buffer == NULL)) {
3284
0
            xmlParserNodeInfo *tmp_buffer;
3285
0
            unsigned int byte_size;
3286
3287
0
            if (ctxt->node_seq.maximum == 0)
3288
0
                ctxt->node_seq.maximum = 2;
3289
0
            byte_size = (sizeof(*ctxt->node_seq.buffer) *
3290
0
      (2 * ctxt->node_seq.maximum));
3291
3292
0
            if (ctxt->node_seq.buffer == NULL)
3293
0
                tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
3294
0
            else
3295
0
                tmp_buffer =
3296
0
                    (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
3297
0
                                                     byte_size);
3298
3299
0
            if (tmp_buffer == NULL) {
3300
0
    xmlCtxtErrMemory(ctxt);
3301
0
                return;
3302
0
            }
3303
0
            ctxt->node_seq.buffer = tmp_buffer;
3304
0
            ctxt->node_seq.maximum *= 2;
3305
0
        }
3306
3307
        /* If position is not at end, move elements out of the way */
3308
0
        if (pos != ctxt->node_seq.length) {
3309
0
            unsigned long i;
3310
3311
0
            for (i = ctxt->node_seq.length; i > pos; i--)
3312
0
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
3313
0
        }
3314
3315
        /* Copy element and increase length */
3316
0
        ctxt->node_seq.buffer[pos] = *info;
3317
0
        ctxt->node_seq.length++;
3318
0
    }
3319
0
}
3320
3321
/************************************************************************
3322
 *                  *
3323
 *    Defaults settings         *
3324
 *                  *
3325
 ************************************************************************/
3326
/**
3327
 * xmlPedanticParserDefault:
3328
 * @val:  int 0 or 1
3329
 *
3330
 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
3331
 *
3332
 * Set and return the previous value for enabling pedantic warnings.
3333
 *
3334
 * Returns the last value for 0 for no substitution, 1 for substitution.
3335
 */
3336
3337
int
3338
0
xmlPedanticParserDefault(int val) {
3339
0
    int old = xmlPedanticParserDefaultValue;
3340
3341
0
    xmlPedanticParserDefaultValue = val;
3342
0
    return(old);
3343
0
}
3344
3345
/**
3346
 * xmlLineNumbersDefault:
3347
 * @val:  int 0 or 1
3348
 *
3349
 * DEPRECATED: The modern options API always enables line numbers.
3350
 *
3351
 * Set and return the previous value for enabling line numbers in elements
3352
 * contents. This may break on old application and is turned off by default.
3353
 *
3354
 * Returns the last value for 0 for no substitution, 1 for substitution.
3355
 */
3356
3357
int
3358
0
xmlLineNumbersDefault(int val) {
3359
0
    int old = xmlLineNumbersDefaultValue;
3360
3361
0
    xmlLineNumbersDefaultValue = val;
3362
0
    return(old);
3363
0
}
3364
3365
/**
3366
 * xmlSubstituteEntitiesDefault:
3367
 * @val:  int 0 or 1
3368
 *
3369
 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
3370
 *
3371
 * Set and return the previous value for default entity support.
3372
 * Initially the parser always keep entity references instead of substituting
3373
 * entity values in the output. This function has to be used to change the
3374
 * default parser behavior
3375
 * SAX::substituteEntities() has to be used for changing that on a file by
3376
 * file basis.
3377
 *
3378
 * Returns the last value for 0 for no substitution, 1 for substitution.
3379
 */
3380
3381
int
3382
0
xmlSubstituteEntitiesDefault(int val) {
3383
0
    int old = xmlSubstituteEntitiesDefaultValue;
3384
3385
0
    xmlSubstituteEntitiesDefaultValue = val;
3386
0
    return(old);
3387
0
}
3388
3389
/**
3390
 * xmlKeepBlanksDefault:
3391
 * @val:  int 0 or 1
3392
 *
3393
 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
3394
 *
3395
 * Set and return the previous value for default blanks text nodes support.
3396
 * The 1.x version of the parser used an heuristic to try to detect
3397
 * ignorable white spaces. As a result the SAX callback was generating
3398
 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
3399
 * using the DOM output text nodes containing those blanks were not generated.
3400
 * The 2.x and later version will switch to the XML standard way and
3401
 * ignorableWhitespace() are only generated when running the parser in
3402
 * validating mode and when the current element doesn't allow CDATA or
3403
 * mixed content.
3404
 * This function is provided as a way to force the standard behavior
3405
 * on 1.X libs and to switch back to the old mode for compatibility when
3406
 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
3407
 * by using xmlIsBlankNode() commodity function to detect the "empty"
3408
 * nodes generated.
3409
 * This value also affect autogeneration of indentation when saving code
3410
 * if blanks sections are kept, indentation is not generated.
3411
 *
3412
 * Returns the last value for 0 for no substitution, 1 for substitution.
3413
 */
3414
3415
int
3416
0
xmlKeepBlanksDefault(int val) {
3417
0
    int old = xmlKeepBlanksDefaultValue;
3418
3419
0
    xmlKeepBlanksDefaultValue = val;
3420
0
#ifdef LIBXML_OUTPUT_ENABLED
3421
0
    if (!val)
3422
0
        xmlIndentTreeOutput = 1;
3423
0
#endif
3424
0
    return(old);
3425
0
}
3426