Coverage Report

Created: 2023-06-07 06:14

/src/libxml2/parserInternals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * daniel@veillard.com
8
 */
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22
23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/valid.h>
28
#include <libxml/entities.h>
29
#include <libxml/xmlerror.h>
30
#include <libxml/encoding.h>
31
#include <libxml/valid.h>
32
#include <libxml/xmlIO.h>
33
#include <libxml/uri.h>
34
#include <libxml/dict.h>
35
#include <libxml/SAX.h>
36
#ifdef LIBXML_CATALOG_ENABLED
37
#include <libxml/catalog.h>
38
#endif
39
#include <libxml/globals.h>
40
#include <libxml/chvalid.h>
41
42
54.5M
#define CUR(ctxt) ctxt->input->cur
43
54.5M
#define END(ctxt) ctxt->input->end
44
54.5M
#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
45
46
#include "private/buf.h"
47
#include "private/enc.h"
48
#include "private/error.h"
49
#include "private/io.h"
50
#include "private/parser.h"
51
52
/*
53
 * Various global defaults for parsing
54
 */
55
56
/**
57
 * xmlCheckVersion:
58
 * @version: the include version number
59
 *
60
 * check the compiled lib version against the include one.
61
 * This can warn or immediately kill the application
62
 */
63
void
64
0
xmlCheckVersion(int version) {
65
0
    int myversion = LIBXML_VERSION;
66
67
0
    xmlInitParser();
68
69
0
    if ((myversion / 10000) != (version / 10000)) {
70
0
  xmlGenericError(xmlGenericErrorContext,
71
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
72
0
    (version / 10000), (myversion / 10000));
73
0
  fprintf(stderr,
74
0
    "Fatal: program compiled against libxml %d using libxml %d\n",
75
0
    (version / 10000), (myversion / 10000));
76
0
    }
77
0
    if ((myversion / 100) < (version / 100)) {
78
0
  xmlGenericError(xmlGenericErrorContext,
79
0
    "Warning: program compiled against libxml %d using older %d\n",
80
0
    (version / 100), (myversion / 100));
81
0
    }
82
0
}
83
84
85
/************************************************************************
86
 *                  *
87
 *    Some factorized error routines        *
88
 *                  *
89
 ************************************************************************/
90
91
92
/**
93
 * xmlErrMemory:
94
 * @ctxt:  an XML parser context
95
 * @extra:  extra information
96
 *
97
 * Handle a redefinition of attribute error
98
 */
99
void
100
xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
101
3.36k
{
102
3.36k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
103
3.36k
        (ctxt->instate == XML_PARSER_EOF))
104
689
  return;
105
2.67k
    if (ctxt != NULL) {
106
2.15k
        ctxt->errNo = XML_ERR_NO_MEMORY;
107
2.15k
        ctxt->instate = XML_PARSER_EOF;
108
2.15k
        ctxt->disableSAX = 1;
109
2.15k
    }
110
2.67k
    if (extra)
111
1.24k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
112
1.24k
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
113
1.24k
                        NULL, NULL, 0, 0,
114
1.24k
                        "Memory allocation failed : %s\n", extra);
115
1.43k
    else
116
1.43k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
117
1.43k
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
118
1.43k
                        NULL, NULL, 0, 0, "Memory allocation failed\n");
119
2.67k
}
120
121
/**
122
 * __xmlErrEncoding:
123
 * @ctxt:  an XML parser context
124
 * @xmlerr:  the error number
125
 * @msg:  the error message
126
 * @str1:  an string info
127
 * @str2:  an string info
128
 *
129
 * Handle an encoding error
130
 */
131
void
132
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
133
                 const char *msg, const xmlChar * str1, const xmlChar * str2)
134
48.6k
{
135
48.6k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
136
48.6k
        (ctxt->instate == XML_PARSER_EOF))
137
0
  return;
138
48.6k
    if (ctxt != NULL)
139
48.6k
        ctxt->errNo = xmlerr;
140
48.6k
    __xmlRaiseError(NULL, NULL, NULL,
141
48.6k
                    ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
142
48.6k
                    NULL, 0, (const char *) str1, (const char *) str2,
143
48.6k
                    NULL, 0, 0, msg, str1, str2);
144
48.6k
    if (ctxt != NULL) {
145
48.6k
        ctxt->wellFormed = 0;
146
48.6k
        if (ctxt->recovery == 0)
147
48.4k
            ctxt->disableSAX = 1;
148
48.6k
    }
149
48.6k
}
150
151
/**
152
 * xmlErrInternal:
153
 * @ctxt:  an XML parser context
154
 * @msg:  the error message
155
 * @str:  error information
156
 *
157
 * Handle an internal error
158
 */
159
static void LIBXML_ATTR_FORMAT(2,0)
160
xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
161
626
{
162
626
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
163
626
        (ctxt->instate == XML_PARSER_EOF))
164
0
  return;
165
626
    if (ctxt != NULL)
166
626
        ctxt->errNo = XML_ERR_INTERNAL_ERROR;
167
626
    __xmlRaiseError(NULL, NULL, NULL,
168
626
                    ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
169
626
                    XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
170
626
                    0, 0, msg, str);
171
626
    if (ctxt != NULL) {
172
626
        ctxt->wellFormed = 0;
173
626
        if (ctxt->recovery == 0)
174
626
            ctxt->disableSAX = 1;
175
626
    }
176
626
}
177
178
/**
179
 * xmlFatalErr:
180
 * @ctxt:  an XML parser context
181
 * @error:  the error number
182
 * @extra:  extra information string
183
 *
184
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
185
 */
186
void
187
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
188
1.72M
{
189
1.72M
    const char *errmsg;
190
191
1.72M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
192
1.72M
        (ctxt->instate == XML_PARSER_EOF))
193
18.0k
  return;
194
1.70M
    switch (error) {
195
13.5k
        case XML_ERR_INVALID_HEX_CHARREF:
196
13.5k
            errmsg = "CharRef: invalid hexadecimal value";
197
13.5k
            break;
198
12.4k
        case XML_ERR_INVALID_DEC_CHARREF:
199
12.4k
            errmsg = "CharRef: invalid decimal value";
200
12.4k
            break;
201
0
        case XML_ERR_INVALID_CHARREF:
202
0
            errmsg = "CharRef: invalid value";
203
0
            break;
204
1.38M
        case XML_ERR_INTERNAL_ERROR:
205
1.38M
            errmsg = "internal error";
206
1.38M
            break;
207
0
        case XML_ERR_PEREF_AT_EOF:
208
0
            errmsg = "PEReference at end of document";
209
0
            break;
210
0
        case XML_ERR_PEREF_IN_PROLOG:
211
0
            errmsg = "PEReference in prolog";
212
0
            break;
213
0
        case XML_ERR_PEREF_IN_EPILOG:
214
0
            errmsg = "PEReference in epilog";
215
0
            break;
216
0
        case XML_ERR_PEREF_NO_NAME:
217
0
            errmsg = "PEReference: no name";
218
0
            break;
219
21.0k
        case XML_ERR_PEREF_SEMICOL_MISSING:
220
21.0k
            errmsg = "PEReference: expecting ';'";
221
21.0k
            break;
222
797
        case XML_ERR_ENTITY_LOOP:
223
797
            errmsg = "Detected an entity reference loop";
224
797
            break;
225
0
        case XML_ERR_ENTITY_NOT_STARTED:
226
0
            errmsg = "EntityValue: \" or ' expected";
227
0
            break;
228
239
        case XML_ERR_ENTITY_PE_INTERNAL:
229
239
            errmsg = "PEReferences forbidden in internal subset";
230
239
            break;
231
1.14k
        case XML_ERR_ENTITY_NOT_FINISHED:
232
1.14k
            errmsg = "EntityValue: \" or ' expected";
233
1.14k
            break;
234
10.7k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
235
10.7k
            errmsg = "AttValue: \" or ' expected";
236
10.7k
            break;
237
25.1k
        case XML_ERR_LT_IN_ATTRIBUTE:
238
25.1k
            errmsg = "Unescaped '<' not allowed in attributes values";
239
25.1k
            break;
240
4.07k
        case XML_ERR_LITERAL_NOT_STARTED:
241
4.07k
            errmsg = "SystemLiteral \" or ' expected";
242
4.07k
            break;
243
4.40k
        case XML_ERR_LITERAL_NOT_FINISHED:
244
4.40k
            errmsg = "Unfinished System or Public ID \" or ' expected";
245
4.40k
            break;
246
7.58k
        case XML_ERR_MISPLACED_CDATA_END:
247
7.58k
            errmsg = "Sequence ']]>' not allowed in content";
248
7.58k
            break;
249
1.50k
        case XML_ERR_URI_REQUIRED:
250
1.50k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
251
1.50k
            break;
252
2.67k
        case XML_ERR_PUBID_REQUIRED:
253
2.67k
            errmsg = "PUBLIC, the Public Identifier is missing";
254
2.67k
            break;
255
14.8k
        case XML_ERR_HYPHEN_IN_COMMENT:
256
14.8k
            errmsg = "Comment must not contain '--' (double-hyphen)";
257
14.8k
            break;
258
2.13k
        case XML_ERR_PI_NOT_STARTED:
259
2.13k
            errmsg = "xmlParsePI : no target name";
260
2.13k
            break;
261
1.99k
        case XML_ERR_RESERVED_XML_NAME:
262
1.99k
            errmsg = "Invalid PI name";
263
1.99k
            break;
264
793
        case XML_ERR_NOTATION_NOT_STARTED:
265
793
            errmsg = "NOTATION: Name expected here";
266
793
            break;
267
4.02k
        case XML_ERR_NOTATION_NOT_FINISHED:
268
4.02k
            errmsg = "'>' required to close NOTATION declaration";
269
4.02k
            break;
270
2.68k
        case XML_ERR_VALUE_REQUIRED:
271
2.68k
            errmsg = "Entity value required";
272
2.68k
            break;
273
1.22k
        case XML_ERR_URI_FRAGMENT:
274
1.22k
            errmsg = "Fragment not allowed";
275
1.22k
            break;
276
9.29k
        case XML_ERR_ATTLIST_NOT_STARTED:
277
9.29k
            errmsg = "'(' required to start ATTLIST enumeration";
278
9.29k
            break;
279
846
        case XML_ERR_NMTOKEN_REQUIRED:
280
846
            errmsg = "NmToken expected in ATTLIST enumeration";
281
846
            break;
282
1.44k
        case XML_ERR_ATTLIST_NOT_FINISHED:
283
1.44k
            errmsg = "')' required to finish ATTLIST enumeration";
284
1.44k
            break;
285
1.69k
        case XML_ERR_MIXED_NOT_STARTED:
286
1.69k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
287
1.69k
            break;
288
0
        case XML_ERR_PCDATA_REQUIRED:
289
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
290
0
            break;
291
3.02k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
292
3.02k
            errmsg = "ContentDecl : Name or '(' expected";
293
3.02k
            break;
294
2.50k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
295
2.50k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
296
2.50k
            break;
297
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
298
0
            errmsg =
299
0
                "PEReference: forbidden within markup decl in internal subset";
300
0
            break;
301
18.6k
        case XML_ERR_GT_REQUIRED:
302
18.6k
            errmsg = "expected '>'";
303
18.6k
            break;
304
255
        case XML_ERR_CONDSEC_INVALID:
305
255
            errmsg = "XML conditional section '[' expected";
306
255
            break;
307
2.88k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
308
2.88k
            errmsg = "Content error in the external subset";
309
2.88k
            break;
310
2.04k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
311
2.04k
            errmsg =
312
2.04k
                "conditional section INCLUDE or IGNORE keyword expected";
313
2.04k
            break;
314
605
        case XML_ERR_CONDSEC_NOT_FINISHED:
315
605
            errmsg = "XML conditional section not closed";
316
605
            break;
317
461
        case XML_ERR_XMLDECL_NOT_STARTED:
318
461
            errmsg = "Text declaration '<?xml' required";
319
461
            break;
320
32.7k
        case XML_ERR_XMLDECL_NOT_FINISHED:
321
32.7k
            errmsg = "parsing XML declaration: '?>' expected";
322
32.7k
            break;
323
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
324
0
            errmsg = "external parsed entities cannot be standalone";
325
0
            break;
326
27.1k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
327
27.1k
            errmsg = "EntityRef: expecting ';'";
328
27.1k
            break;
329
6.93k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
330
6.93k
            errmsg = "DOCTYPE improperly terminated";
331
6.93k
            break;
332
0
        case XML_ERR_LTSLASH_REQUIRED:
333
0
            errmsg = "EndTag: '</' not found";
334
0
            break;
335
4.02k
        case XML_ERR_EQUAL_REQUIRED:
336
4.02k
            errmsg = "expected '='";
337
4.02k
            break;
338
10.0k
        case XML_ERR_STRING_NOT_CLOSED:
339
10.0k
            errmsg = "String not closed expecting \" or '";
340
10.0k
            break;
341
1.54k
        case XML_ERR_STRING_NOT_STARTED:
342
1.54k
            errmsg = "String not started expecting ' or \"";
343
1.54k
            break;
344
449
        case XML_ERR_ENCODING_NAME:
345
449
            errmsg = "Invalid XML encoding name";
346
449
            break;
347
1.99k
        case XML_ERR_STANDALONE_VALUE:
348
1.99k
            errmsg = "standalone accepts only 'yes' or 'no'";
349
1.99k
            break;
350
1.11k
        case XML_ERR_DOCUMENT_EMPTY:
351
1.11k
            errmsg = "Document is empty";
352
1.11k
            break;
353
15.2k
        case XML_ERR_DOCUMENT_END:
354
15.2k
            errmsg = "Extra content at the end of the document";
355
15.2k
            break;
356
3.00k
        case XML_ERR_NOT_WELL_BALANCED:
357
3.00k
            errmsg = "chunk is not well balanced";
358
3.00k
            break;
359
0
        case XML_ERR_EXTRA_CONTENT:
360
0
            errmsg = "extra content at the end of well balanced chunk";
361
0
            break;
362
13.5k
        case XML_ERR_VERSION_MISSING:
363
13.5k
            errmsg = "Malformed declaration expecting version";
364
13.5k
            break;
365
3.78k
        case XML_ERR_NAME_TOO_LONG:
366
3.78k
            errmsg = "Name too long";
367
3.78k
            break;
368
15.6k
        case XML_ERR_INVALID_ENCODING:
369
15.6k
            errmsg = "Invalid bytes in character encoding";
370
15.6k
            break;
371
0
        case XML_IO_UNKNOWN:
372
0
            errmsg = "I/O error";
373
0
            break;
374
#if 0
375
        case:
376
            errmsg = "";
377
            break;
378
#endif
379
3.41k
        default:
380
3.41k
            errmsg = "Unregistered error message";
381
1.70M
    }
382
1.70M
    if (ctxt != NULL)
383
1.70M
  ctxt->errNo = error;
384
1.70M
    if (info == NULL) {
385
313k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
386
313k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
387
313k
                        errmsg);
388
1.39M
    } else {
389
1.39M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
390
1.39M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
391
1.39M
                        errmsg, info);
392
1.39M
    }
393
1.70M
    if (ctxt != NULL) {
394
1.70M
  ctxt->wellFormed = 0;
395
1.70M
  if (ctxt->recovery == 0)
396
371k
      ctxt->disableSAX = 1;
397
1.70M
    }
398
1.70M
}
399
400
/**
401
 * xmlErrEncodingInt:
402
 * @ctxt:  an XML parser context
403
 * @error:  the error number
404
 * @msg:  the error message
405
 * @val:  an integer value
406
 *
407
 * n encoding error
408
 */
409
static void LIBXML_ATTR_FORMAT(3,0)
410
xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
411
                  const char *msg, int val)
412
76.2k
{
413
76.2k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
414
76.2k
        (ctxt->instate == XML_PARSER_EOF))
415
0
  return;
416
76.2k
    if (ctxt != NULL)
417
76.2k
        ctxt->errNo = error;
418
76.2k
    __xmlRaiseError(NULL, NULL, NULL,
419
76.2k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
420
76.2k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
421
76.2k
    if (ctxt != NULL) {
422
76.2k
        ctxt->wellFormed = 0;
423
76.2k
        if (ctxt->recovery == 0)
424
34.4k
            ctxt->disableSAX = 1;
425
76.2k
    }
426
76.2k
}
427
428
/**
429
 * xmlIsLetter:
430
 * @c:  an unicode character (int)
431
 *
432
 * Check whether the character is allowed by the production
433
 * [84] Letter ::= BaseChar | Ideographic
434
 *
435
 * Returns 0 if not, non-zero otherwise
436
 */
437
int
438
0
xmlIsLetter(int c) {
439
0
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
440
0
}
441
442
/************************************************************************
443
 *                  *
444
 *    Input handling functions for progressive parsing  *
445
 *                  *
446
 ************************************************************************/
447
448
/* #define DEBUG_INPUT */
449
/* #define DEBUG_STACK */
450
/* #define DEBUG_PUSH */
451
452
453
/* we need to keep enough input to show errors in context */
454
122k
#define LINE_LEN        80
455
456
#ifdef DEBUG_INPUT
457
#define CHECK_BUFFER(in) check_buffer(in)
458
459
static
460
void check_buffer(xmlParserInputPtr in) {
461
    if (in->base != xmlBufContent(in->buf->buffer)) {
462
        xmlGenericError(xmlGenericErrorContext,
463
    "xmlParserInput: base mismatch problem\n");
464
    }
465
    if (in->cur < in->base) {
466
        xmlGenericError(xmlGenericErrorContext,
467
    "xmlParserInput: cur < base problem\n");
468
    }
469
    if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
470
        xmlGenericError(xmlGenericErrorContext,
471
    "xmlParserInput: cur > base + use problem\n");
472
    }
473
    xmlGenericError(xmlGenericErrorContext,"buffer %p : content %x, cur %d, use %d\n",
474
            (void *) in, (int) xmlBufContent(in->buf->buffer),
475
            in->cur - in->base, xmlBufUse(in->buf->buffer));
476
}
477
478
#else
479
#define CHECK_BUFFER(in)
480
#endif
481
482
483
/**
484
 * xmlHaltParser:
485
 * @ctxt:  an XML parser context
486
 *
487
 * Blocks further parser processing don't override error
488
 * for internal use
489
 */
490
void
491
68.5k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
492
68.5k
    if (ctxt == NULL)
493
0
        return;
494
68.5k
    ctxt->instate = XML_PARSER_EOF;
495
68.5k
    ctxt->disableSAX = 1;
496
72.2k
    while (ctxt->inputNr > 1)
497
3.77k
        xmlFreeInputStream(inputPop(ctxt));
498
68.5k
    if (ctxt->input != NULL) {
499
        /*
500
   * in case there was a specific allocation deallocate before
501
   * overriding base
502
   */
503
68.5k
        if (ctxt->input->free != NULL) {
504
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
505
0
      ctxt->input->free = NULL;
506
0
  }
507
68.5k
        if (ctxt->input->buf != NULL) {
508
68.0k
            xmlFreeParserInputBuffer(ctxt->input->buf);
509
68.0k
            ctxt->input->buf = NULL;
510
68.0k
        }
511
68.5k
  ctxt->input->cur = BAD_CAST"";
512
68.5k
        ctxt->input->length = 0;
513
68.5k
  ctxt->input->base = ctxt->input->cur;
514
68.5k
        ctxt->input->end = ctxt->input->cur;
515
68.5k
    }
516
68.5k
}
517
518
/**
519
 * xmlParserInputRead:
520
 * @in:  an XML parser input
521
 * @len:  an indicative size for the lookahead
522
 *
523
 * DEPRECATED: This function was internal and is deprecated.
524
 *
525
 * Returns -1 as this is an error to use it.
526
 */
527
int
528
0
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
529
0
    return(-1);
530
0
}
531
532
/**
533
 * xmlParserGrow:
534
 * @ctxt:  an XML parser context
535
 */
536
int
537
24.0M
xmlParserGrow(xmlParserCtxtPtr ctxt) {
538
24.0M
    xmlParserInputPtr in = ctxt->input;
539
24.0M
    xmlParserInputBufferPtr buf = in->buf;
540
24.0M
    ptrdiff_t curEnd = in->end - in->cur;
541
24.0M
    ptrdiff_t curBase = in->cur - in->base;
542
24.0M
    int ret;
543
544
24.0M
    if (buf == NULL)
545
1.01M
        return(0);
546
    /* Don't grow push parser buffer. */
547
23.0M
    if (ctxt->progressive)
548
0
        return(0);
549
    /* Don't grow memory buffers. */
550
23.0M
    if ((buf->encoder == NULL) && (buf->readcallback == NULL))
551
10.8M
        return(0);
552
553
12.1M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
554
12.1M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
555
12.1M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
556
7
        xmlErrMemory(ctxt, "Huge input lookup");
557
7
        xmlHaltParser(ctxt);
558
7
  return(-1);
559
7
    }
560
561
12.1M
    if (curEnd >= INPUT_CHUNK)
562
68.6k
        return(0);
563
564
12.0M
    ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
565
12.0M
    xmlBufSetInputBaseCur(buf->buffer, in, 0, curBase);
566
567
12.0M
    if (ret < 0) {
568
15.6k
        xmlFatalErr(ctxt, buf->error, NULL);
569
15.6k
        xmlHaltParser(ctxt);
570
15.6k
    }
571
572
12.0M
    return(ret);
573
12.1M
}
574
575
/**
576
 * xmlParserInputGrow:
577
 * @in:  an XML parser input
578
 * @len:  an indicative size for the lookahead
579
 *
580
 * DEPRECATED: Don't use.
581
 *
582
 * This function increase the input for the parser. It tries to
583
 * preserve pointers to the input buffer, and keep already read data
584
 *
585
 * Returns the amount of char read, or -1 in case of error, 0 indicate the
586
 * end of this entity
587
 */
588
int
589
0
xmlParserInputGrow(xmlParserInputPtr in, int len) {
590
0
    int ret;
591
0
    size_t indx;
592
593
0
    if ((in == NULL) || (len < 0)) return(-1);
594
#ifdef DEBUG_INPUT
595
    xmlGenericError(xmlGenericErrorContext, "Grow\n");
596
#endif
597
0
    if (in->buf == NULL) return(-1);
598
0
    if (in->base == NULL) return(-1);
599
0
    if (in->cur == NULL) return(-1);
600
0
    if (in->buf->buffer == NULL) return(-1);
601
602
    /* Don't grow memory buffers. */
603
0
    if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
604
0
        return(0);
605
606
0
    CHECK_BUFFER(in);
607
608
0
    indx = in->cur - in->base;
609
0
    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
610
611
0
  CHECK_BUFFER(in);
612
613
0
        return(0);
614
0
    }
615
0
    ret = xmlParserInputBufferGrow(in->buf, len);
616
617
0
    in->base = xmlBufContent(in->buf->buffer);
618
0
    if (in->base == NULL) {
619
0
        in->base = BAD_CAST "";
620
0
        in->cur = in->base;
621
0
        in->end = in->base;
622
0
        return(-1);
623
0
    }
624
0
    in->cur = in->base + indx;
625
0
    in->end = xmlBufEnd(in->buf->buffer);
626
627
0
    CHECK_BUFFER(in);
628
629
0
    return(ret);
630
0
}
631
632
/**
633
 * xmlParserShrink:
634
 * @ctxt:  an XML parser context
635
 */
636
void
637
579k
xmlParserShrink(xmlParserCtxtPtr ctxt) {
638
579k
    xmlParserInputPtr in = ctxt->input;
639
579k
    xmlParserInputBufferPtr buf = in->buf;
640
579k
    size_t used;
641
642
    /* Don't shrink pull parser memory buffers. */
643
579k
    if ((buf == NULL) ||
644
579k
        ((ctxt->progressive == 0) &&
645
576k
         (buf->encoder == NULL) && (buf->readcallback == NULL)))
646
457k
        return;
647
648
122k
    used = in->cur - in->base;
649
    /*
650
     * Do not shrink on large buffers whose only a tiny fraction
651
     * was consumed
652
     */
653
122k
    if (used > INPUT_CHUNK) {
654
122k
  size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
655
656
122k
  if (res > 0) {
657
122k
            used -= res;
658
122k
            if ((res > ULONG_MAX) ||
659
122k
                (in->consumed > ULONG_MAX - (unsigned long)res))
660
0
                in->consumed = ULONG_MAX;
661
122k
            else
662
122k
                in->consumed += res;
663
122k
  }
664
122k
    }
665
666
122k
    xmlBufSetInputBaseCur(buf->buffer, in, 0, used);
667
122k
}
668
669
/**
670
 * xmlParserInputShrink:
671
 * @in:  an XML parser input
672
 *
673
 * DEPRECATED: Don't use.
674
 *
675
 * This function removes used input for the parser.
676
 */
677
void
678
0
xmlParserInputShrink(xmlParserInputPtr in) {
679
0
    size_t used;
680
0
    size_t ret;
681
682
#ifdef DEBUG_INPUT
683
    xmlGenericError(xmlGenericErrorContext, "Shrink\n");
684
#endif
685
0
    if (in == NULL) return;
686
0
    if (in->buf == NULL) return;
687
0
    if (in->base == NULL) return;
688
0
    if (in->cur == NULL) return;
689
0
    if (in->buf->buffer == NULL) return;
690
691
0
    CHECK_BUFFER(in);
692
693
0
    used = in->cur - in->base;
694
    /*
695
     * Do not shrink on large buffers whose only a tiny fraction
696
     * was consumed
697
     */
698
0
    if (used > INPUT_CHUNK) {
699
0
  ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
700
0
  if (ret > 0) {
701
0
            used -= ret;
702
0
            if ((ret > ULONG_MAX) ||
703
0
                (in->consumed > ULONG_MAX - (unsigned long)ret))
704
0
                in->consumed = ULONG_MAX;
705
0
            else
706
0
                in->consumed += ret;
707
0
  }
708
0
    }
709
710
0
    if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
711
0
        xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
712
0
    }
713
714
0
    in->base = xmlBufContent(in->buf->buffer);
715
0
    if (in->base == NULL) {
716
        /* TODO: raise error */
717
0
        in->base = BAD_CAST "";
718
0
        in->cur = in->base;
719
0
        in->end = in->base;
720
0
        return;
721
0
    }
722
0
    in->cur = in->base + used;
723
0
    in->end = xmlBufEnd(in->buf->buffer);
724
725
0
    CHECK_BUFFER(in);
726
0
}
727
728
/************************************************************************
729
 *                  *
730
 *    UTF8 character input and related functions    *
731
 *                  *
732
 ************************************************************************/
733
734
/**
735
 * xmlNextChar:
736
 * @ctxt:  the XML parser context
737
 *
738
 * DEPRECATED: Internal function, do not use.
739
 *
740
 * Skip to the next char input char.
741
 */
742
743
void
744
xmlNextChar(xmlParserCtxtPtr ctxt)
745
54.5M
{
746
54.5M
    if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
747
54.5M
        (ctxt->input == NULL))
748
527
        return;
749
750
54.5M
    if (!(VALID_CTXT(ctxt))) {
751
0
        xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
752
0
  ctxt->errNo = XML_ERR_INTERNAL_ERROR;
753
0
        xmlStopParser(ctxt);
754
0
  return;
755
0
    }
756
757
54.5M
    if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) {
758
2.37M
        if (xmlParserGrow(ctxt) < 0)
759
2.87k
            return;
760
2.36M
        if (ctxt->input->cur >= ctxt->input->end)
761
399
            return;
762
2.36M
    }
763
764
54.5M
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
765
51.9M
        const unsigned char *cur;
766
51.9M
        unsigned char c;
767
768
        /*
769
         *   2.11 End-of-Line Handling
770
         *   the literal two-character sequence "#xD#xA" or a standalone
771
         *   literal #xD, an XML processor must pass to the application
772
         *   the single character #xA.
773
         */
774
51.9M
        if (*(ctxt->input->cur) == '\n') {
775
1.41M
            ctxt->input->line++; ctxt->input->col = 1;
776
1.41M
        } else
777
50.5M
            ctxt->input->col++;
778
779
        /*
780
         * We are supposed to handle UTF8, check it's valid
781
         * From rfc2044: encoding of the Unicode values on UTF-8:
782
         *
783
         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
784
         * 0000 0000-0000 007F   0xxxxxxx
785
         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
786
         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
787
         *
788
         * Check for the 0x110000 limit too
789
         */
790
51.9M
        cur = ctxt->input->cur;
791
792
51.9M
        c = *cur;
793
51.9M
        if (c & 0x80) {
794
37.1M
            size_t avail;
795
796
37.1M
            if (c == 0xC0)
797
775
          goto encoding_error;
798
799
37.1M
            avail = ctxt->input->end - ctxt->input->cur;
800
801
37.1M
            if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
802
6.51k
                goto encoding_error;
803
37.1M
            if ((c & 0xe0) == 0xe0) {
804
37.0M
                unsigned int val;
805
806
37.0M
                if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
807
483
                    goto encoding_error;
808
37.0M
                if ((c & 0xf0) == 0xf0) {
809
1.00k
                    if (((c & 0xf8) != 0xf0) ||
810
1.00k
                        (avail < 4) || ((cur[3] & 0xc0) != 0x80))
811
418
                        goto encoding_error;
812
                    /* 4-byte code */
813
587
                    ctxt->input->cur += 4;
814
587
                    val = (cur[0] & 0x7) << 18;
815
587
                    val |= (cur[1] & 0x3f) << 12;
816
587
                    val |= (cur[2] & 0x3f) << 6;
817
587
                    val |= cur[3] & 0x3f;
818
37.0M
                } else {
819
                    /* 3-byte code */
820
37.0M
                    ctxt->input->cur += 3;
821
37.0M
                    val = (cur[0] & 0xf) << 12;
822
37.0M
                    val |= (cur[1] & 0x3f) << 6;
823
37.0M
                    val |= cur[2] & 0x3f;
824
37.0M
                }
825
37.0M
                if (((val > 0xd7ff) && (val < 0xe000)) ||
826
37.0M
                    ((val > 0xfffd) && (val < 0x10000)) ||
827
37.0M
                    (val >= 0x110000)) {
828
1.60k
    xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
829
1.60k
          "Char 0x%X out of allowed range\n",
830
1.60k
          val);
831
1.60k
                }
832
37.0M
            } else
833
                /* 2-byte code */
834
132k
                ctxt->input->cur += 2;
835
37.1M
        } else
836
            /* 1-byte code */
837
14.8M
            ctxt->input->cur++;
838
51.9M
    } else {
839
        /*
840
         * Assume it's a fixed length encoding (1) with
841
         * a compatible encoding for the ASCII set, since
842
         * XML constructs only use < 128 chars
843
         */
844
845
2.53M
        if (*(ctxt->input->cur) == '\n') {
846
120k
            ctxt->input->line++; ctxt->input->col = 1;
847
120k
        } else
848
2.41M
            ctxt->input->col++;
849
2.53M
        ctxt->input->cur++;
850
2.53M
    }
851
54.5M
    return;
852
54.5M
encoding_error:
853
    /*
854
     * If we detect an UTF8 error that probably mean that the
855
     * input encoding didn't get properly advertised in the
856
     * declaration header. Report the error and switch the encoding
857
     * to ISO-Latin-1 (if you don't like this policy, just declare the
858
     * encoding !)
859
     */
860
8.19k
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
861
8.19k
        (ctxt->input->end - ctxt->input->cur < 4)) {
862
609
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
863
609
         "Input is not proper UTF-8, indicate encoding !\n",
864
609
         NULL, NULL);
865
7.58k
    } else {
866
7.58k
        char buffer[150];
867
868
7.58k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
869
7.58k
      ctxt->input->cur[0], ctxt->input->cur[1],
870
7.58k
      ctxt->input->cur[2], ctxt->input->cur[3]);
871
7.58k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
872
7.58k
         "Input is not proper UTF-8, indicate encoding !\n%s",
873
7.58k
         BAD_CAST buffer, NULL);
874
7.58k
    }
875
8.19k
    ctxt->charset = XML_CHAR_ENCODING_8859_1;
876
8.19k
    ctxt->input->cur++;
877
8.19k
    return;
878
54.5M
}
879
880
/**
881
 * xmlCurrentChar:
882
 * @ctxt:  the XML parser context
883
 * @len:  pointer to the length of the char read
884
 *
885
 * DEPRECATED: Internal function, do not use.
886
 *
887
 * The current char value, if using UTF-8 this may actually span multiple
888
 * bytes in the input buffer. Implement the end of line normalization:
889
 * 2.11 End-of-Line Handling
890
 * Wherever an external parsed entity or the literal entity value
891
 * of an internal parsed entity contains either the literal two-character
892
 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
893
 * must pass to the application the single character #xA.
894
 * This behavior can conveniently be produced by normalizing all
895
 * line breaks to #xA on input, before parsing.)
896
 *
897
 * Returns the current char value and its length
898
 */
899
900
int
901
995M
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
902
995M
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
903
995M
    if (ctxt->instate == XML_PARSER_EOF)
904
2.12k
  return(0);
905
906
995M
    if ((ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) &&
907
995M
        (xmlParserGrow(ctxt) < 0))
908
6.51k
        return(0);
909
910
995M
    if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
911
125M
      *len = 1;
912
125M
      return(*ctxt->input->cur);
913
125M
    }
914
869M
    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
915
  /*
916
   * We are supposed to handle UTF8, check it's valid
917
   * From rfc2044: encoding of the Unicode values on UTF-8:
918
   *
919
   * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
920
   * 0000 0000-0000 007F   0xxxxxxx
921
   * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
922
   * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
923
   *
924
   * Check for the 0x110000 limit too
925
   */
926
820M
  const unsigned char *cur = ctxt->input->cur;
927
820M
  unsigned char c;
928
820M
  unsigned int val;
929
930
820M
  c = *cur;
931
820M
  if (c & 0x80) {
932
739M
            size_t avail;
933
934
739M
      if (((c & 0x40) == 0) || (c == 0xC0))
935
11.3k
    goto encoding_error;
936
937
739M
            avail = ctxt->input->end - ctxt->input->cur;
938
939
739M
            if (avail < 2)
940
629
                goto incomplete_sequence;
941
739M
      if ((cur[1] & 0xc0) != 0x80)
942
25.5k
    goto encoding_error;
943
739M
      if ((c & 0xe0) == 0xe0) {
944
686M
                if (avail < 3)
945
252
                    goto incomplete_sequence;
946
686M
    if ((cur[2] & 0xc0) != 0x80)
947
1.11k
        goto encoding_error;
948
686M
    if ((c & 0xf0) == 0xf0) {
949
32.1k
                    if (avail < 4)
950
100
                        goto incomplete_sequence;
951
32.0k
        if (((c & 0xf8) != 0xf0) ||
952
32.0k
      ((cur[3] & 0xc0) != 0x80))
953
744
      goto encoding_error;
954
        /* 4-byte code */
955
31.3k
        *len = 4;
956
31.3k
        val = (cur[0] & 0x7) << 18;
957
31.3k
        val |= (cur[1] & 0x3f) << 12;
958
31.3k
        val |= (cur[2] & 0x3f) << 6;
959
31.3k
        val |= cur[3] & 0x3f;
960
31.3k
        if (val < 0x10000)
961
213
      goto encoding_error;
962
686M
    } else {
963
      /* 3-byte code */
964
686M
        *len = 3;
965
686M
        val = (cur[0] & 0xf) << 12;
966
686M
        val |= (cur[1] & 0x3f) << 6;
967
686M
        val |= cur[2] & 0x3f;
968
686M
        if (val < 0x800)
969
259
      goto encoding_error;
970
686M
    }
971
686M
      } else {
972
        /* 2-byte code */
973
53.0M
    *len = 2;
974
53.0M
    val = (cur[0] & 0x1f) << 6;
975
53.0M
    val |= cur[1] & 0x3f;
976
53.0M
    if (val < 0x80)
977
558
        goto encoding_error;
978
53.0M
      }
979
739M
      if (!IS_CHAR(val)) {
980
61.9k
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
981
61.9k
          "Char 0x%X out of allowed range\n", val);
982
61.9k
      }
983
739M
      return(val);
984
739M
  } else {
985
      /* 1-byte code */
986
80.9M
      *len = 1;
987
80.9M
      if ((*ctxt->input->cur == 0) &&
988
80.9M
          (ctxt->input->end > ctxt->input->cur)) {
989
12.7k
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
990
12.7k
          "Char 0x0 out of allowed range\n", 0);
991
12.7k
      }
992
80.9M
      if (*ctxt->input->cur == 0xD) {
993
341k
    if (ctxt->input->cur[1] == 0xA) {
994
16.4k
        ctxt->input->cur++;
995
16.4k
    }
996
341k
    return(0xA);
997
341k
      }
998
80.6M
      return(*ctxt->input->cur);
999
80.9M
  }
1000
820M
    }
1001
    /*
1002
     * Assume it's a fixed length encoding (1) with
1003
     * a compatible encoding for the ASCII set, since
1004
     * XML constructs only use < 128 chars
1005
     */
1006
49.1M
    *len = 1;
1007
49.1M
    if (*ctxt->input->cur == 0xD) {
1008
61.9k
  if (ctxt->input->cur[1] == 0xA) {
1009
12.8k
      ctxt->input->cur++;
1010
12.8k
  }
1011
61.9k
  return(0xA);
1012
61.9k
    }
1013
49.0M
    return(*ctxt->input->cur);
1014
1015
39.7k
encoding_error:
1016
    /*
1017
     * If we detect an UTF8 error that probably mean that the
1018
     * input encoding didn't get properly advertised in the
1019
     * declaration header. Report the error and switch the encoding
1020
     * to ISO-Latin-1 (if you don't like this policy, just declare the
1021
     * encoding !)
1022
     */
1023
39.7k
    if (ctxt->input->end - ctxt->input->cur < 4) {
1024
798
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
1025
798
         "Input is not proper UTF-8, indicate encoding !\n",
1026
798
         NULL, NULL);
1027
38.9k
    } else {
1028
38.9k
        char buffer[150];
1029
1030
38.9k
  snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1031
38.9k
      ctxt->input->cur[0], ctxt->input->cur[1],
1032
38.9k
      ctxt->input->cur[2], ctxt->input->cur[3]);
1033
38.9k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
1034
38.9k
         "Input is not proper UTF-8, indicate encoding !\n%s",
1035
38.9k
         BAD_CAST buffer, NULL);
1036
38.9k
    }
1037
39.7k
    ctxt->charset = XML_CHAR_ENCODING_8859_1;
1038
39.7k
    *len = 1;
1039
39.7k
    return(*ctxt->input->cur);
1040
1041
981
incomplete_sequence:
1042
    /*
1043
     * An encoding problem may arise from a truncated input buffer
1044
     * splitting a character in the middle. In that case do not raise
1045
     * an error but return 0. This should only happen when push parsing
1046
     * char data.
1047
     */
1048
981
    *len = 0;
1049
981
    return(0);
1050
49.1M
}
1051
1052
/**
1053
 * xmlStringCurrentChar:
1054
 * @ctxt:  the XML parser context
1055
 * @cur:  pointer to the beginning of the char
1056
 * @len:  pointer to the length of the char read
1057
 *
1058
 * DEPRECATED: Internal function, do not use.
1059
 *
1060
 * The current char value, if using UTF-8 this may actually span multiple
1061
 * bytes in the input buffer.
1062
 *
1063
 * Returns the current char value and its length
1064
 */
1065
1066
int
1067
xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1068
603M
{
1069
603M
    if ((len == NULL) || (cur == NULL)) return(0);
1070
603M
    if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
1071
        /*
1072
         * We are supposed to handle UTF8, check it's valid
1073
         * From rfc2044: encoding of the Unicode values on UTF-8:
1074
         *
1075
         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
1076
         * 0000 0000-0000 007F   0xxxxxxx
1077
         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1078
         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1079
         *
1080
         * Check for the 0x110000 limit too
1081
         */
1082
587M
        unsigned char c;
1083
587M
        unsigned int val;
1084
1085
587M
        c = *cur;
1086
587M
        if (c & 0x80) {
1087
548M
            if ((cur[1] & 0xc0) != 0x80)
1088
810
                goto encoding_error;
1089
548M
            if ((c & 0xe0) == 0xe0) {
1090
1091
546M
                if ((cur[2] & 0xc0) != 0x80)
1092
0
                    goto encoding_error;
1093
546M
                if ((c & 0xf0) == 0xf0) {
1094
5.35k
                    if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1095
0
                        goto encoding_error;
1096
                    /* 4-byte code */
1097
5.35k
                    *len = 4;
1098
5.35k
                    val = (cur[0] & 0x7) << 18;
1099
5.35k
                    val |= (cur[1] & 0x3f) << 12;
1100
5.35k
                    val |= (cur[2] & 0x3f) << 6;
1101
5.35k
                    val |= cur[3] & 0x3f;
1102
546M
                } else {
1103
                    /* 3-byte code */
1104
546M
                    *len = 3;
1105
546M
                    val = (cur[0] & 0xf) << 12;
1106
546M
                    val |= (cur[1] & 0x3f) << 6;
1107
546M
                    val |= cur[2] & 0x3f;
1108
546M
                }
1109
546M
            } else {
1110
                /* 2-byte code */
1111
1.53M
                *len = 2;
1112
1.53M
                val = (cur[0] & 0x1f) << 6;
1113
1.53M
                val |= cur[1] & 0x3f;
1114
1.53M
            }
1115
548M
            if (!IS_CHAR(val)) {
1116
0
          xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
1117
0
          "Char 0x%X out of allowed range\n", val);
1118
0
            }
1119
548M
            return (val);
1120
548M
        } else {
1121
            /* 1-byte code */
1122
39.2M
            *len = 1;
1123
39.2M
            return (*cur);
1124
39.2M
        }
1125
587M
    }
1126
    /*
1127
     * Assume it's a fixed length encoding (1) with
1128
     * a compatible encoding for the ASCII set, since
1129
     * XML constructs only use < 128 chars
1130
     */
1131
16.0M
    *len = 1;
1132
16.0M
    return (*cur);
1133
810
encoding_error:
1134
1135
    /*
1136
     * An encoding problem may arise from a truncated input buffer
1137
     * splitting a character in the middle. In that case do not raise
1138
     * an error but return 0 to indicate an end of stream problem
1139
     */
1140
810
    if ((ctxt == NULL) || (ctxt->input == NULL) ||
1141
810
        (ctxt->input->end - ctxt->input->cur < 4)) {
1142
810
  *len = 0;
1143
810
  return(0);
1144
810
    }
1145
    /*
1146
     * If we detect an UTF8 error that probably mean that the
1147
     * input encoding didn't get properly advertised in the
1148
     * declaration header. Report the error and switch the encoding
1149
     * to ISO-Latin-1 (if you don't like this policy, just declare the
1150
     * encoding !)
1151
     */
1152
0
    {
1153
0
        char buffer[150];
1154
1155
0
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1156
0
      ctxt->input->cur[0], ctxt->input->cur[1],
1157
0
      ctxt->input->cur[2], ctxt->input->cur[3]);
1158
0
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
1159
0
         "Input is not proper UTF-8, indicate encoding !\n%s",
1160
0
         BAD_CAST buffer, NULL);
1161
0
    }
1162
0
    *len = 1;
1163
0
    return (*cur);
1164
810
}
1165
1166
/**
1167
 * xmlCopyCharMultiByte:
1168
 * @out:  pointer to an array of xmlChar
1169
 * @val:  the char value
1170
 *
1171
 * append the char value in the array
1172
 *
1173
 * Returns the number of xmlChar written
1174
 */
1175
int
1176
1.22G
xmlCopyCharMultiByte(xmlChar *out, int val) {
1177
1.22G
    if ((out == NULL) || (val < 0)) return(0);
1178
    /*
1179
     * We are supposed to handle UTF8, check it's valid
1180
     * From rfc2044: encoding of the Unicode values on UTF-8:
1181
     *
1182
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
1183
     * 0000 0000-0000 007F   0xxxxxxx
1184
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1185
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1186
     */
1187
1.22G
    if  (val >= 0x80) {
1188
1.22G
  xmlChar *savedout = out;
1189
1.22G
  int bits;
1190
1.22G
  if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
1191
1.17G
  else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
1192
20.3k
  else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
1193
0
  else {
1194
0
      xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
1195
0
        "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
1196
0
            val);
1197
0
      return(0);
1198
0
  }
1199
3.62G
  for ( ; bits >= 0; bits-= 6)
1200
2.39G
      *out++= ((val >> bits) & 0x3F) | 0x80 ;
1201
1.22G
  return (out - savedout);
1202
1.22G
    }
1203
129k
    *out = val;
1204
129k
    return 1;
1205
1.22G
}
1206
1207
/**
1208
 * xmlCopyChar:
1209
 * @len:  Ignored, compatibility
1210
 * @out:  pointer to an array of xmlChar
1211
 * @val:  the char value
1212
 *
1213
 * append the char value in the array
1214
 *
1215
 * Returns the number of xmlChar written
1216
 */
1217
1218
int
1219
2.43M
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
1220
2.43M
    if ((out == NULL) || (val < 0)) return(0);
1221
    /* the len parameter is ignored */
1222
2.43M
    if  (val >= 0x80) {
1223
2.29M
  return(xmlCopyCharMultiByte (out, val));
1224
2.29M
    }
1225
140k
    *out = val;
1226
140k
    return 1;
1227
2.43M
}
1228
1229
/************************************************************************
1230
 *                  *
1231
 *    Commodity functions to switch encodings     *
1232
 *                  *
1233
 ************************************************************************/
1234
1235
static xmlCharEncodingHandlerPtr
1236
2.43k
xmlDetectEBCDIC(xmlParserInputPtr input) {
1237
2.43k
    xmlChar out[200];
1238
2.43k
    xmlCharEncodingHandlerPtr handler;
1239
2.43k
    int inlen, outlen, res, i;
1240
1241
    /*
1242
     * To detect the EBCDIC code page, we convert the first 200 bytes
1243
     * to EBCDIC-US and try to find the encoding declaration.
1244
     */
1245
2.43k
    handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC);
1246
2.43k
    if (handler == NULL)
1247
1
        return(NULL);
1248
2.43k
    outlen = sizeof(out) - 1;
1249
2.43k
    inlen = input->end - input->cur;
1250
2.43k
    res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen, 0);
1251
2.43k
    if (res < 0)
1252
261
        return(handler);
1253
2.16k
    out[outlen] = 0;
1254
1255
60.6k
    for (i = 0; i < outlen; i++) {
1256
60.0k
        if (out[i] == '>')
1257
198
            break;
1258
59.8k
        if ((out[i] == 'e') &&
1259
59.8k
            (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1260
1.42k
            int start, cur, quote;
1261
1262
1.42k
            i += 8;
1263
1.42k
            while (IS_BLANK_CH(out[i]))
1264
1.50k
                i += 1;
1265
1.42k
            if (out[i++] != '=')
1266
304
                break;
1267
1.12k
            while (IS_BLANK_CH(out[i]))
1268
830
                i += 1;
1269
1.12k
            quote = out[i++];
1270
1.12k
            if ((quote != '\'') && (quote != '"'))
1271
383
                break;
1272
739
            start = i;
1273
739
            cur = out[i];
1274
2.55k
            while (((cur >= 'a') && (cur <= 'z')) ||
1275
2.55k
                   ((cur >= 'A') && (cur <= 'Z')) ||
1276
2.55k
                   ((cur >= '0') && (cur <= '9')) ||
1277
2.55k
                   (cur == '.') || (cur == '_') ||
1278
2.55k
                   (cur == '-'))
1279
1.82k
                cur = out[++i];
1280
739
            if (cur != quote)
1281
487
                break;
1282
252
            out[i] = 0;
1283
252
            xmlCharEncCloseFunc(handler);
1284
252
            handler = xmlFindCharEncodingHandler((char *) out + start);
1285
252
            break;
1286
739
        }
1287
59.8k
    }
1288
1289
2.16k
    return(handler);
1290
2.43k
}
1291
1292
/**
1293
 * xmlSwitchEncoding:
1294
 * @ctxt:  the parser context
1295
 * @enc:  the encoding value (number)
1296
 *
1297
 * change the input functions when discovering the character encoding
1298
 * of a given entity.
1299
 *
1300
 * Returns 0 in case of success, -1 otherwise
1301
 */
1302
int
1303
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1304
236k
{
1305
236k
    xmlCharEncodingHandlerPtr handler;
1306
236k
    int ret;
1307
1308
236k
    if (ctxt == NULL) return(-1);
1309
236k
    switch (enc) {
1310
0
  case XML_CHAR_ENCODING_ERROR:
1311
0
      __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
1312
0
                     "encoding unknown\n", NULL, NULL);
1313
0
      return(-1);
1314
0
  case XML_CHAR_ENCODING_NONE:
1315
      /* let's assume it's UTF-8 without the XML decl */
1316
0
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
1317
0
      return(0);
1318
229k
  case XML_CHAR_ENCODING_UTF8:
1319
      /* default encoding, no conversion should be needed */
1320
229k
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
1321
1322
      /*
1323
       * Errata on XML-1.0 June 20 2001
1324
       * Specific handling of the Byte Order Mark for
1325
       * UTF-8
1326
       */
1327
229k
      if ((ctxt->input != NULL) &&
1328
229k
    (ctxt->input->cur[0] == 0xEF) &&
1329
229k
    (ctxt->input->cur[1] == 0xBB) &&
1330
229k
    (ctxt->input->cur[2] == 0xBF)) {
1331
12.2k
    ctxt->input->cur += 3;
1332
12.2k
      }
1333
229k
      return(0);
1334
2.43k
        case XML_CHAR_ENCODING_EBCDIC:
1335
2.43k
            handler = xmlDetectEBCDIC(ctxt->input);
1336
2.43k
            break;
1337
3.78k
        default:
1338
3.78k
            handler = xmlGetCharEncodingHandler(enc);
1339
3.78k
            break;
1340
236k
    }
1341
6.21k
    if (handler == NULL) {
1342
  /*
1343
   * Default handlers.
1344
   */
1345
677
  switch (enc) {
1346
0
      case XML_CHAR_ENCODING_ASCII:
1347
    /* default encoding, no conversion should be needed */
1348
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1349
0
    return(0);
1350
0
      case XML_CHAR_ENCODING_8859_1:
1351
0
    if ((ctxt->inputNr == 1) &&
1352
0
        (ctxt->encoding == NULL) &&
1353
0
        (ctxt->input != NULL) &&
1354
0
        (ctxt->input->encoding != NULL)) {
1355
0
        ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1356
0
    }
1357
0
    ctxt->charset = enc;
1358
0
    return(0);
1359
677
      default:
1360
677
    __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1361
677
                        "encoding not supported: %s\n",
1362
677
      BAD_CAST xmlGetCharEncodingName(enc), NULL);
1363
                /*
1364
                 * TODO: We could recover from errors in external entities
1365
                 * if we didn't stop the parser. But most callers of this
1366
                 * function don't check the return value.
1367
                 */
1368
677
                xmlStopParser(ctxt);
1369
677
                return(-1);
1370
677
        }
1371
677
    }
1372
5.53k
    ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1373
5.53k
    if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
1374
        /*
1375
   * on encoding conversion errors, stop the parser
1376
   */
1377
216
        xmlStopParser(ctxt);
1378
216
  ctxt->errNo = XML_I18N_CONV_FAILED;
1379
216
    }
1380
5.53k
    return(ret);
1381
6.21k
}
1382
1383
/**
1384
 * xmlSwitchInputEncoding:
1385
 * @ctxt:  the parser context
1386
 * @input:  the input stream
1387
 * @handler:  the encoding handler
1388
 *
1389
 * change the input functions when discovering the character encoding
1390
 * of a given entity.
1391
 *
1392
 * Returns 0 in case of success, -1 otherwise
1393
 */
1394
int
1395
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1396
                       xmlCharEncodingHandlerPtr handler)
1397
181k
{
1398
181k
    int nbchars;
1399
181k
    xmlParserInputBufferPtr in;
1400
1401
181k
    if (handler == NULL)
1402
0
        return (-1);
1403
181k
    if (input == NULL)
1404
0
        return (-1);
1405
181k
    in = input->buf;
1406
181k
    if (in == NULL) {
1407
0
  xmlErrInternal(ctxt,
1408
0
                "static memory buffer doesn't support encoding\n", NULL);
1409
        /*
1410
         * Callers assume that the input buffer takes ownership of the
1411
         * encoding handler. xmlCharEncCloseFunc frees unregistered
1412
         * handlers and avoids a memory leak.
1413
         */
1414
0
        xmlCharEncCloseFunc(handler);
1415
0
  return (-1);
1416
0
    }
1417
1418
181k
    if (in->encoder != NULL) {
1419
0
        if (in->encoder == handler)
1420
0
            return (0);
1421
1422
        /*
1423
         * Switching encodings during parsing is a really bad idea,
1424
         * but WebKit/Chromium switches from ISO-8859-1 to UTF-16 as soon as
1425
         * it finds Unicode characters with code points larger than 255.
1426
         *
1427
         * TODO: We should check whether the "raw" input buffer is empty and
1428
         * convert the old content using the old encoder.
1429
         */
1430
1431
0
        xmlCharEncCloseFunc(in->encoder);
1432
0
        in->encoder = handler;
1433
0
        return (0);
1434
0
    }
1435
1436
181k
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1437
181k
    in->encoder = handler;
1438
1439
    /*
1440
     * Is there already some content down the pipe to convert ?
1441
     */
1442
181k
    if (xmlBufIsEmpty(in->buffer) == 0) {
1443
181k
        size_t processed, use, consumed;
1444
1445
        /*
1446
         * Specific handling of the Byte Order Mark for
1447
         * UTF-16
1448
         */
1449
181k
        if ((handler->name != NULL) &&
1450
181k
            (!strcmp(handler->name, "UTF-16LE") ||
1451
181k
             !strcmp(handler->name, "UTF-16")) &&
1452
181k
            (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1453
661
            input->cur += 2;
1454
661
        }
1455
181k
        if ((handler->name != NULL) &&
1456
181k
            (!strcmp(handler->name, "UTF-16BE")) &&
1457
181k
            (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1458
375
            input->cur += 2;
1459
375
        }
1460
        /*
1461
         * Errata on XML-1.0 June 20 2001
1462
         * Specific handling of the Byte Order Mark for
1463
         * UTF-8
1464
         */
1465
181k
        if ((handler->name != NULL) &&
1466
181k
            (!strcmp(handler->name, "UTF-8")) &&
1467
181k
            (input->cur[0] == 0xEF) &&
1468
181k
            (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1469
0
            input->cur += 3;
1470
0
        }
1471
1472
        /*
1473
         * Shrink the current input buffer.
1474
         * Move it as the raw buffer and create a new input buffer
1475
         */
1476
181k
        processed = input->cur - input->base;
1477
181k
        xmlBufShrink(in->buffer, processed);
1478
181k
        input->consumed += processed;
1479
181k
        in->raw = in->buffer;
1480
181k
        in->buffer = xmlBufCreate();
1481
181k
        in->rawconsumed = processed;
1482
181k
        use = xmlBufUse(in->raw);
1483
1484
        /*
1485
         * TODO: We must flush and decode the whole buffer to make functions
1486
         * like xmlReadMemory work with a user-provided encoding. If the
1487
         * encoding is specified directly, we should probably set
1488
         * XML_PARSE_IGNORE_ENC in xmlDoRead to avoid switching encodings
1489
         * twice. Then we could set "flush" to false which should save
1490
         * a considerable amount of memory when parsing from memory.
1491
         * It's probably even possible to remove this whole if-block
1492
         * completely.
1493
         */
1494
181k
        nbchars = xmlCharEncInput(in, 1);
1495
181k
        xmlBufResetInput(in->buffer, input);
1496
181k
        if (nbchars < 0) {
1497
            /* TODO: This could be an out of memory or an encoding error. */
1498
536
            xmlErrInternal(ctxt,
1499
536
                           "switching encoding: encoder error\n",
1500
536
                           NULL);
1501
536
            xmlHaltParser(ctxt);
1502
536
            return (-1);
1503
536
        }
1504
181k
        consumed = use - xmlBufUse(in->raw);
1505
181k
        if ((consumed > ULONG_MAX) ||
1506
181k
            (in->rawconsumed > ULONG_MAX - (unsigned long)consumed))
1507
0
            in->rawconsumed = ULONG_MAX;
1508
181k
        else
1509
181k
      in->rawconsumed += consumed;
1510
181k
    }
1511
181k
    return (0);
1512
181k
}
1513
1514
/**
1515
 * xmlSwitchToEncoding:
1516
 * @ctxt:  the parser context
1517
 * @handler:  the encoding handler
1518
 *
1519
 * change the input functions when discovering the character encoding
1520
 * of a given entity.
1521
 *
1522
 * Returns 0 in case of success, -1 otherwise
1523
 */
1524
int
1525
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1526
176k
{
1527
176k
    if (ctxt == NULL)
1528
0
        return(-1);
1529
176k
    return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
1530
176k
}
1531
1532
/************************************************************************
1533
 *                  *
1534
 *  Commodity functions to handle entities processing   *
1535
 *                  *
1536
 ************************************************************************/
1537
1538
/**
1539
 * xmlFreeInputStream:
1540
 * @input:  an xmlParserInputPtr
1541
 *
1542
 * Free up an input stream.
1543
 */
1544
void
1545
390k
xmlFreeInputStream(xmlParserInputPtr input) {
1546
390k
    if (input == NULL) return;
1547
1548
383k
    if (input->filename != NULL) xmlFree((char *) input->filename);
1549
383k
    if (input->directory != NULL) xmlFree((char *) input->directory);
1550
383k
    if (input->encoding != NULL) xmlFree((char *) input->encoding);
1551
383k
    if (input->version != NULL) xmlFree((char *) input->version);
1552
383k
    if ((input->free != NULL) && (input->base != NULL))
1553
0
        input->free((xmlChar *) input->base);
1554
383k
    if (input->buf != NULL)
1555
297k
        xmlFreeParserInputBuffer(input->buf);
1556
383k
    xmlFree(input);
1557
383k
}
1558
1559
/**
1560
 * xmlNewInputStream:
1561
 * @ctxt:  an XML parser context
1562
 *
1563
 * Create a new input stream structure.
1564
 *
1565
 * Returns the new input stream or NULL
1566
 */
1567
xmlParserInputPtr
1568
383k
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1569
383k
    xmlParserInputPtr input;
1570
1571
383k
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1572
383k
    if (input == NULL) {
1573
71
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1574
71
  return(NULL);
1575
71
    }
1576
383k
    memset(input, 0, sizeof(xmlParserInput));
1577
383k
    input->line = 1;
1578
383k
    input->col = 1;
1579
383k
    input->standalone = -1;
1580
1581
    /*
1582
     * If the context is NULL the id cannot be initialized, but that
1583
     * should not happen while parsing which is the situation where
1584
     * the id is actually needed.
1585
     */
1586
383k
    if (ctxt != NULL) {
1587
383k
        if (input->id >= INT_MAX) {
1588
0
            xmlErrMemory(ctxt, "Input ID overflow\n");
1589
0
            return(NULL);
1590
0
        }
1591
383k
        input->id = ctxt->input_id++;
1592
383k
    }
1593
1594
383k
    return(input);
1595
383k
}
1596
1597
/**
1598
 * xmlNewIOInputStream:
1599
 * @ctxt:  an XML parser context
1600
 * @input:  an I/O Input
1601
 * @enc:  the charset encoding if known
1602
 *
1603
 * Create a new input stream structure encapsulating the @input into
1604
 * a stream suitable for the parser.
1605
 *
1606
 * Returns the new input stream or NULL
1607
 */
1608
xmlParserInputPtr
1609
xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1610
0
              xmlCharEncoding enc) {
1611
0
    xmlParserInputPtr inputStream;
1612
1613
0
    if (input == NULL) return(NULL);
1614
0
    if (xmlParserDebugEntities)
1615
0
  xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1616
0
    inputStream = xmlNewInputStream(ctxt);
1617
0
    if (inputStream == NULL) {
1618
0
  return(NULL);
1619
0
    }
1620
0
    inputStream->filename = NULL;
1621
0
    inputStream->buf = input;
1622
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1623
1624
0
    if (enc != XML_CHAR_ENCODING_NONE) {
1625
0
        xmlSwitchEncoding(ctxt, enc);
1626
0
    }
1627
1628
0
    return(inputStream);
1629
0
}
1630
1631
/**
1632
 * xmlNewEntityInputStream:
1633
 * @ctxt:  an XML parser context
1634
 * @entity:  an Entity pointer
1635
 *
1636
 * DEPRECATED: Internal function, do not use.
1637
 *
1638
 * Create a new input stream based on an xmlEntityPtr
1639
 *
1640
 * Returns the new input stream or NULL
1641
 */
1642
xmlParserInputPtr
1643
42.0k
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1644
42.0k
    xmlParserInputPtr input;
1645
1646
42.0k
    if (entity == NULL) {
1647
0
        xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1648
0
                 NULL);
1649
0
  return(NULL);
1650
0
    }
1651
42.0k
    if (xmlParserDebugEntities)
1652
0
  xmlGenericError(xmlGenericErrorContext,
1653
0
    "new input from entity: %s\n", entity->name);
1654
42.0k
    if (entity->content == NULL) {
1655
24.5k
  switch (entity->etype) {
1656
0
            case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1657
0
          xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1658
0
                   entity->name);
1659
0
                break;
1660
0
            case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1661
24.4k
            case XML_EXTERNAL_PARAMETER_ENTITY:
1662
24.4k
    input = xmlLoadExternalEntity((char *) entity->URI,
1663
24.4k
           (char *) entity->ExternalID, ctxt);
1664
24.4k
                if (input != NULL)
1665
17.4k
                    input->entity = entity;
1666
24.4k
                return(input);
1667
0
            case XML_INTERNAL_GENERAL_ENTITY:
1668
0
          xmlErrInternal(ctxt,
1669
0
          "Internal entity %s without content !\n",
1670
0
                   entity->name);
1671
0
                break;
1672
90
            case XML_INTERNAL_PARAMETER_ENTITY:
1673
90
          xmlErrInternal(ctxt,
1674
90
          "Internal parameter entity %s without content !\n",
1675
90
                   entity->name);
1676
90
                break;
1677
0
            case XML_INTERNAL_PREDEFINED_ENTITY:
1678
0
          xmlErrInternal(ctxt,
1679
0
          "Predefined entity %s without content !\n",
1680
0
                   entity->name);
1681
0
                break;
1682
24.5k
  }
1683
90
  return(NULL);
1684
24.5k
    }
1685
17.5k
    input = xmlNewInputStream(ctxt);
1686
17.5k
    if (input == NULL) {
1687
7
  return(NULL);
1688
7
    }
1689
17.5k
    if (entity->URI != NULL)
1690
52
  input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1691
17.5k
    input->base = entity->content;
1692
17.5k
    if (entity->length == 0)
1693
68
        entity->length = xmlStrlen(entity->content);
1694
17.5k
    input->cur = entity->content;
1695
17.5k
    input->length = entity->length;
1696
17.5k
    input->end = &entity->content[input->length];
1697
17.5k
    input->entity = entity;
1698
17.5k
    return(input);
1699
17.5k
}
1700
1701
/**
1702
 * xmlNewStringInputStream:
1703
 * @ctxt:  an XML parser context
1704
 * @buffer:  an memory buffer
1705
 *
1706
 * Create a new input stream based on a memory buffer.
1707
 * Returns the new input stream
1708
 */
1709
xmlParserInputPtr
1710
0
xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1711
0
    xmlParserInputPtr input;
1712
0
    xmlParserInputBufferPtr buf;
1713
1714
0
    if (buffer == NULL) {
1715
0
        xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1716
0
                 NULL);
1717
0
  return(NULL);
1718
0
    }
1719
0
    if (xmlParserDebugEntities)
1720
0
  xmlGenericError(xmlGenericErrorContext,
1721
0
    "new fixed input: %.30s\n", buffer);
1722
0
    buf = xmlParserInputBufferCreateMem((const char *) buffer,
1723
0
                                        xmlStrlen(buffer),
1724
0
                                        XML_CHAR_ENCODING_NONE);
1725
0
    if (buf == NULL) {
1726
0
  xmlErrMemory(ctxt, NULL);
1727
0
        return(NULL);
1728
0
    }
1729
0
    input = xmlNewInputStream(ctxt);
1730
0
    if (input == NULL) {
1731
0
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1732
0
  xmlFreeParserInputBuffer(buf);
1733
0
  return(NULL);
1734
0
    }
1735
0
    input->buf = buf;
1736
0
    xmlBufResetInput(input->buf->buffer, input);
1737
0
    return(input);
1738
0
}
1739
1740
/**
1741
 * xmlNewInputFromFile:
1742
 * @ctxt:  an XML parser context
1743
 * @filename:  the filename to use as entity
1744
 *
1745
 * Create a new input stream based on a file or an URL.
1746
 *
1747
 * Returns the new input stream or NULL in case of error
1748
 */
1749
xmlParserInputPtr
1750
0
xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1751
0
    xmlParserInputBufferPtr buf;
1752
0
    xmlParserInputPtr inputStream;
1753
0
    char *directory = NULL;
1754
0
    xmlChar *URI = NULL;
1755
1756
0
    if (xmlParserDebugEntities)
1757
0
  xmlGenericError(xmlGenericErrorContext,
1758
0
    "new input from file: %s\n", filename);
1759
0
    if (ctxt == NULL) return(NULL);
1760
0
    buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1761
0
    if (buf == NULL) {
1762
0
  if (filename == NULL)
1763
0
      __xmlLoaderErr(ctxt,
1764
0
                     "failed to load external entity: NULL filename \n",
1765
0
         NULL);
1766
0
  else
1767
0
      __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1768
0
         (const char *) filename);
1769
0
  return(NULL);
1770
0
    }
1771
1772
0
    inputStream = xmlNewInputStream(ctxt);
1773
0
    if (inputStream == NULL) {
1774
0
  xmlFreeParserInputBuffer(buf);
1775
0
  return(NULL);
1776
0
    }
1777
1778
0
    inputStream->buf = buf;
1779
0
    inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1780
0
    if (inputStream == NULL)
1781
0
        return(NULL);
1782
1783
0
    if (inputStream->filename == NULL)
1784
0
  URI = xmlStrdup((xmlChar *) filename);
1785
0
    else
1786
0
  URI = xmlStrdup((xmlChar *) inputStream->filename);
1787
0
    directory = xmlParserGetDirectory((const char *) URI);
1788
0
    if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1789
0
    inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1790
0
    if (URI != NULL) xmlFree((char *) URI);
1791
0
    inputStream->directory = directory;
1792
1793
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1794
0
    if ((ctxt->directory == NULL) && (directory != NULL))
1795
0
        ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1796
0
    return(inputStream);
1797
0
}
1798
1799
/************************************************************************
1800
 *                  *
1801
 *    Commodity functions to handle parser contexts   *
1802
 *                  *
1803
 ************************************************************************/
1804
1805
/**
1806
 * xmlInitSAXParserCtxt:
1807
 * @ctxt:  XML parser context
1808
 * @sax:  SAX handlert
1809
 * @userData:  user data
1810
 *
1811
 * Initialize a SAX parser context
1812
 *
1813
 * Returns 0 in case of success and -1 in case of error
1814
 */
1815
1816
static int
1817
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
1818
                     void *userData)
1819
421k
{
1820
421k
    xmlParserInputPtr input;
1821
1822
421k
    if(ctxt==NULL) {
1823
0
        xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1824
0
        return(-1);
1825
0
    }
1826
1827
421k
    xmlInitParser();
1828
1829
421k
    if (ctxt->dict == NULL)
1830
421k
  ctxt->dict = xmlDictCreate();
1831
421k
    if (ctxt->dict == NULL) {
1832
63
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1833
63
  return(-1);
1834
63
    }
1835
421k
    xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1836
1837
421k
    if (ctxt->sax == NULL)
1838
421k
  ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1839
421k
    if (ctxt->sax == NULL) {
1840
40
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1841
40
  return(-1);
1842
40
    }
1843
421k
    if (sax == NULL) {
1844
409k
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1845
409k
        xmlSAXVersion(ctxt->sax, 2);
1846
409k
        ctxt->userData = ctxt;
1847
409k
    } else {
1848
11.2k
  if (sax->initialized == XML_SAX2_MAGIC) {
1849
11.2k
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
1850
11.2k
        } else {
1851
0
      memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1852
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
1853
0
        }
1854
11.2k
        ctxt->userData = userData ? userData : ctxt;
1855
11.2k
    }
1856
1857
421k
    ctxt->maxatts = 0;
1858
421k
    ctxt->atts = NULL;
1859
    /* Allocate the Input stack */
1860
421k
    if (ctxt->inputTab == NULL) {
1861
421k
  ctxt->inputTab = (xmlParserInputPtr *)
1862
421k
        xmlMalloc(5 * sizeof(xmlParserInputPtr));
1863
421k
  ctxt->inputMax = 5;
1864
421k
    }
1865
421k
    if (ctxt->inputTab == NULL) {
1866
41
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1867
41
  ctxt->inputNr = 0;
1868
41
  ctxt->inputMax = 0;
1869
41
  ctxt->input = NULL;
1870
41
  return(-1);
1871
41
    }
1872
421k
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1873
0
        xmlFreeInputStream(input);
1874
0
    }
1875
421k
    ctxt->inputNr = 0;
1876
421k
    ctxt->input = NULL;
1877
1878
421k
    ctxt->version = NULL;
1879
421k
    ctxt->encoding = NULL;
1880
421k
    ctxt->standalone = -1;
1881
421k
    ctxt->hasExternalSubset = 0;
1882
421k
    ctxt->hasPErefs = 0;
1883
421k
    ctxt->html = 0;
1884
421k
    ctxt->external = 0;
1885
421k
    ctxt->instate = XML_PARSER_START;
1886
421k
    ctxt->token = 0;
1887
421k
    ctxt->directory = NULL;
1888
1889
    /* Allocate the Node stack */
1890
421k
    if (ctxt->nodeTab == NULL) {
1891
421k
  ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1892
421k
  ctxt->nodeMax = 10;
1893
421k
    }
1894
421k
    if (ctxt->nodeTab == NULL) {
1895
49
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1896
49
  ctxt->nodeNr = 0;
1897
49
  ctxt->nodeMax = 0;
1898
49
  ctxt->node = NULL;
1899
49
  ctxt->inputNr = 0;
1900
49
  ctxt->inputMax = 0;
1901
49
  ctxt->input = NULL;
1902
49
  return(-1);
1903
49
    }
1904
421k
    ctxt->nodeNr = 0;
1905
421k
    ctxt->node = NULL;
1906
1907
    /* Allocate the Name stack */
1908
421k
    if (ctxt->nameTab == NULL) {
1909
421k
  ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1910
421k
  ctxt->nameMax = 10;
1911
421k
    }
1912
421k
    if (ctxt->nameTab == NULL) {
1913
35
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1914
35
  ctxt->nodeNr = 0;
1915
35
  ctxt->nodeMax = 0;
1916
35
  ctxt->node = NULL;
1917
35
  ctxt->inputNr = 0;
1918
35
  ctxt->inputMax = 0;
1919
35
  ctxt->input = NULL;
1920
35
  ctxt->nameNr = 0;
1921
35
  ctxt->nameMax = 0;
1922
35
  ctxt->name = NULL;
1923
35
  return(-1);
1924
35
    }
1925
421k
    ctxt->nameNr = 0;
1926
421k
    ctxt->name = NULL;
1927
1928
    /* Allocate the space stack */
1929
421k
    if (ctxt->spaceTab == NULL) {
1930
421k
  ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1931
421k
  ctxt->spaceMax = 10;
1932
421k
    }
1933
421k
    if (ctxt->spaceTab == NULL) {
1934
38
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1935
38
  ctxt->nodeNr = 0;
1936
38
  ctxt->nodeMax = 0;
1937
38
  ctxt->node = NULL;
1938
38
  ctxt->inputNr = 0;
1939
38
  ctxt->inputMax = 0;
1940
38
  ctxt->input = NULL;
1941
38
  ctxt->nameNr = 0;
1942
38
  ctxt->nameMax = 0;
1943
38
  ctxt->name = NULL;
1944
38
  ctxt->spaceNr = 0;
1945
38
  ctxt->spaceMax = 0;
1946
38
  ctxt->space = NULL;
1947
38
  return(-1);
1948
38
    }
1949
421k
    ctxt->spaceNr = 1;
1950
421k
    ctxt->spaceMax = 10;
1951
421k
    ctxt->spaceTab[0] = -1;
1952
421k
    ctxt->space = &ctxt->spaceTab[0];
1953
421k
    ctxt->myDoc = NULL;
1954
421k
    ctxt->wellFormed = 1;
1955
421k
    ctxt->nsWellFormed = 1;
1956
421k
    ctxt->valid = 1;
1957
421k
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1958
421k
    if (ctxt->loadsubset) {
1959
0
        ctxt->options |= XML_PARSE_DTDLOAD;
1960
0
    }
1961
421k
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
1962
421k
    ctxt->pedantic = xmlPedanticParserDefaultValue;
1963
421k
    if (ctxt->pedantic) {
1964
0
        ctxt->options |= XML_PARSE_PEDANTIC;
1965
0
    }
1966
421k
    ctxt->linenumbers = xmlLineNumbersDefaultValue;
1967
421k
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1968
421k
    if (ctxt->keepBlanks == 0) {
1969
0
  ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1970
0
  ctxt->options |= XML_PARSE_NOBLANKS;
1971
0
    }
1972
1973
421k
    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
1974
421k
    ctxt->vctxt.userData = ctxt;
1975
421k
    ctxt->vctxt.error = xmlParserValidityError;
1976
421k
    ctxt->vctxt.warning = xmlParserValidityWarning;
1977
421k
    if (ctxt->validate) {
1978
0
  if (xmlGetWarningsDefaultValue == 0)
1979
0
      ctxt->vctxt.warning = NULL;
1980
0
  else
1981
0
      ctxt->vctxt.warning = xmlParserValidityWarning;
1982
0
  ctxt->vctxt.nodeMax = 0;
1983
0
        ctxt->options |= XML_PARSE_DTDVALID;
1984
0
    }
1985
421k
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1986
421k
    if (ctxt->replaceEntities) {
1987
0
        ctxt->options |= XML_PARSE_NOENT;
1988
0
    }
1989
421k
    ctxt->record_info = 0;
1990
421k
    ctxt->checkIndex = 0;
1991
421k
    ctxt->inSubset = 0;
1992
421k
    ctxt->errNo = XML_ERR_OK;
1993
421k
    ctxt->depth = 0;
1994
421k
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1995
421k
    ctxt->catalogs = NULL;
1996
421k
    ctxt->sizeentities = 0;
1997
421k
    ctxt->sizeentcopy = 0;
1998
421k
    ctxt->input_id = 1;
1999
421k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
2000
421k
    return(0);
2001
421k
}
2002
2003
/**
2004
 * xmlInitParserCtxt:
2005
 * @ctxt:  an XML parser context
2006
 *
2007
 * DEPRECATED: Internal function which will be made private in a future
2008
 * version.
2009
 *
2010
 * Initialize a parser context
2011
 *
2012
 * Returns 0 in case of success and -1 in case of error
2013
 */
2014
2015
int
2016
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2017
0
{
2018
0
    return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
2019
0
}
2020
2021
/**
2022
 * xmlFreeParserCtxt:
2023
 * @ctxt:  an XML parser context
2024
 *
2025
 * Free all the memory used by a parser context. However the parsed
2026
 * document in ctxt->myDoc is not freed.
2027
 */
2028
2029
void
2030
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2031
421k
{
2032
421k
    xmlParserInputPtr input;
2033
2034
421k
    if (ctxt == NULL) return;
2035
2036
762k
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2037
341k
        xmlFreeInputStream(input);
2038
341k
    }
2039
421k
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2040
421k
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
2041
421k
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2042
421k
    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
2043
421k
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2044
421k
    if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2045
421k
    if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2046
421k
    if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2047
421k
    if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2048
#ifdef LIBXML_SAX1_ENABLED
2049
    if ((ctxt->sax != NULL) &&
2050
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
2051
#else
2052
421k
    if (ctxt->sax != NULL)
2053
421k
#endif /* LIBXML_SAX1_ENABLED */
2054
421k
        xmlFree(ctxt->sax);
2055
421k
    if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2056
421k
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2057
421k
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
2058
421k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
2059
421k
    if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
2060
421k
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2061
421k
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2062
421k
    if (ctxt->attsDefault != NULL)
2063
12.1k
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2064
421k
    if (ctxt->attsSpecial != NULL)
2065
12.6k
        xmlHashFree(ctxt->attsSpecial, NULL);
2066
421k
    if (ctxt->freeElems != NULL) {
2067
0
        xmlNodePtr cur, next;
2068
2069
0
  cur = ctxt->freeElems;
2070
0
  while (cur != NULL) {
2071
0
      next = cur->next;
2072
0
      xmlFree(cur);
2073
0
      cur = next;
2074
0
  }
2075
0
    }
2076
421k
    if (ctxt->freeAttrs != NULL) {
2077
0
        xmlAttrPtr cur, next;
2078
2079
0
  cur = ctxt->freeAttrs;
2080
0
  while (cur != NULL) {
2081
0
      next = cur->next;
2082
0
      xmlFree(cur);
2083
0
      cur = next;
2084
0
  }
2085
0
    }
2086
    /*
2087
     * cleanup the error strings
2088
     */
2089
421k
    if (ctxt->lastError.message != NULL)
2090
295k
        xmlFree(ctxt->lastError.message);
2091
421k
    if (ctxt->lastError.file != NULL)
2092
292k
        xmlFree(ctxt->lastError.file);
2093
421k
    if (ctxt->lastError.str1 != NULL)
2094
228k
        xmlFree(ctxt->lastError.str1);
2095
421k
    if (ctxt->lastError.str2 != NULL)
2096
76.0k
        xmlFree(ctxt->lastError.str2);
2097
421k
    if (ctxt->lastError.str3 != NULL)
2098
51.3k
        xmlFree(ctxt->lastError.str3);
2099
2100
421k
#ifdef LIBXML_CATALOG_ENABLED
2101
421k
    if (ctxt->catalogs != NULL)
2102
655
  xmlCatalogFreeLocal(ctxt->catalogs);
2103
421k
#endif
2104
421k
    xmlFree(ctxt);
2105
421k
}
2106
2107
/**
2108
 * xmlNewParserCtxt:
2109
 *
2110
 * Allocate and initialize a new parser context.
2111
 *
2112
 * Returns the xmlParserCtxtPtr or NULL
2113
 */
2114
2115
xmlParserCtxtPtr
2116
xmlNewParserCtxt(void)
2117
410k
{
2118
410k
    return(xmlNewSAXParserCtxt(NULL, NULL));
2119
410k
}
2120
2121
/**
2122
 * xmlNewSAXParserCtxt:
2123
 * @sax:  SAX handler
2124
 * @userData:  user data
2125
 *
2126
 * Allocate and initialize a new SAX parser context. If userData is NULL,
2127
 * the parser context will be passed as user data.
2128
 *
2129
 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
2130
 */
2131
2132
xmlParserCtxtPtr
2133
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
2134
421k
{
2135
421k
    xmlParserCtxtPtr ctxt;
2136
2137
421k
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2138
421k
    if (ctxt == NULL) {
2139
256
  xmlErrMemory(NULL, "cannot allocate parser context\n");
2140
256
  return(NULL);
2141
256
    }
2142
421k
    memset(ctxt, 0, sizeof(xmlParserCtxt));
2143
421k
    if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
2144
266
        xmlFreeParserCtxt(ctxt);
2145
266
  return(NULL);
2146
266
    }
2147
421k
    return(ctxt);
2148
421k
}
2149
2150
/************************************************************************
2151
 *                  *
2152
 *    Handling of node information        *
2153
 *                  *
2154
 ************************************************************************/
2155
2156
/**
2157
 * xmlClearParserCtxt:
2158
 * @ctxt:  an XML parser context
2159
 *
2160
 * Clear (release owned resources) and reinitialize a parser context
2161
 */
2162
2163
void
2164
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2165
0
{
2166
0
  if (ctxt==NULL)
2167
0
    return;
2168
0
  xmlClearNodeInfoSeq(&ctxt->node_seq);
2169
0
  xmlCtxtReset(ctxt);
2170
0
}
2171
2172
2173
/**
2174
 * xmlParserFindNodeInfo:
2175
 * @ctx:  an XML parser context
2176
 * @node:  an XML node within the tree
2177
 *
2178
 * DEPRECATED: Don't use.
2179
 *
2180
 * Find the parser node info struct for a given node
2181
 *
2182
 * Returns an xmlParserNodeInfo block pointer or NULL
2183
 */
2184
const xmlParserNodeInfo *
2185
xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
2186
0
{
2187
0
    unsigned long pos;
2188
2189
0
    if ((ctx == NULL) || (node == NULL))
2190
0
        return (NULL);
2191
    /* Find position where node should be at */
2192
0
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2193
0
    if (pos < ctx->node_seq.length
2194
0
        && ctx->node_seq.buffer[pos].node == node)
2195
0
        return &ctx->node_seq.buffer[pos];
2196
0
    else
2197
0
        return NULL;
2198
0
}
2199
2200
2201
/**
2202
 * xmlInitNodeInfoSeq:
2203
 * @seq:  a node info sequence pointer
2204
 *
2205
 * DEPRECATED: Don't use.
2206
 *
2207
 * -- Initialize (set to initial state) node info sequence
2208
 */
2209
void
2210
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2211
421k
{
2212
421k
    if (seq == NULL)
2213
0
        return;
2214
421k
    seq->length = 0;
2215
421k
    seq->maximum = 0;
2216
421k
    seq->buffer = NULL;
2217
421k
}
2218
2219
/**
2220
 * xmlClearNodeInfoSeq:
2221
 * @seq:  a node info sequence pointer
2222
 *
2223
 * DEPRECATED: Don't use.
2224
 *
2225
 * -- Clear (release memory and reinitialize) node
2226
 *   info sequence
2227
 */
2228
void
2229
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2230
0
{
2231
0
    if (seq == NULL)
2232
0
        return;
2233
0
    if (seq->buffer != NULL)
2234
0
        xmlFree(seq->buffer);
2235
0
    xmlInitNodeInfoSeq(seq);
2236
0
}
2237
2238
/**
2239
 * xmlParserFindNodeInfoIndex:
2240
 * @seq:  a node info sequence pointer
2241
 * @node:  an XML node pointer
2242
 *
2243
 * DEPRECATED: Don't use.
2244
 *
2245
 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2246
 *   the given node is or should be at in a sorted sequence
2247
 *
2248
 * Returns a long indicating the position of the record
2249
 */
2250
unsigned long
2251
xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2252
                           const xmlNodePtr node)
2253
0
{
2254
0
    unsigned long upper, lower, middle;
2255
0
    int found = 0;
2256
2257
0
    if ((seq == NULL) || (node == NULL))
2258
0
        return ((unsigned long) -1);
2259
2260
    /* Do a binary search for the key */
2261
0
    lower = 1;
2262
0
    upper = seq->length;
2263
0
    middle = 0;
2264
0
    while (lower <= upper && !found) {
2265
0
        middle = lower + (upper - lower) / 2;
2266
0
        if (node == seq->buffer[middle - 1].node)
2267
0
            found = 1;
2268
0
        else if (node < seq->buffer[middle - 1].node)
2269
0
            upper = middle - 1;
2270
0
        else
2271
0
            lower = middle + 1;
2272
0
    }
2273
2274
    /* Return position */
2275
0
    if (middle == 0 || seq->buffer[middle - 1].node < node)
2276
0
        return middle;
2277
0
    else
2278
0
        return middle - 1;
2279
0
}
2280
2281
2282
/**
2283
 * xmlParserAddNodeInfo:
2284
 * @ctxt:  an XML parser context
2285
 * @info:  a node info sequence pointer
2286
 *
2287
 * DEPRECATED: Don't use.
2288
 *
2289
 * Insert node info record into the sorted sequence
2290
 */
2291
void
2292
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2293
                     const xmlParserNodeInfoPtr info)
2294
0
{
2295
0
    unsigned long pos;
2296
2297
0
    if ((ctxt == NULL) || (info == NULL)) return;
2298
2299
    /* Find pos and check to see if node is already in the sequence */
2300
0
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2301
0
                                     info->node);
2302
2303
0
    if ((pos < ctxt->node_seq.length) &&
2304
0
        (ctxt->node_seq.buffer != NULL) &&
2305
0
        (ctxt->node_seq.buffer[pos].node == info->node)) {
2306
0
        ctxt->node_seq.buffer[pos] = *info;
2307
0
    }
2308
2309
    /* Otherwise, we need to add new node to buffer */
2310
0
    else {
2311
0
        if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2312
0
      (ctxt->node_seq.buffer == NULL)) {
2313
0
            xmlParserNodeInfo *tmp_buffer;
2314
0
            unsigned int byte_size;
2315
2316
0
            if (ctxt->node_seq.maximum == 0)
2317
0
                ctxt->node_seq.maximum = 2;
2318
0
            byte_size = (sizeof(*ctxt->node_seq.buffer) *
2319
0
      (2 * ctxt->node_seq.maximum));
2320
2321
0
            if (ctxt->node_seq.buffer == NULL)
2322
0
                tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2323
0
            else
2324
0
                tmp_buffer =
2325
0
                    (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2326
0
                                                     byte_size);
2327
2328
0
            if (tmp_buffer == NULL) {
2329
0
    xmlErrMemory(ctxt, "failed to allocate buffer\n");
2330
0
                return;
2331
0
            }
2332
0
            ctxt->node_seq.buffer = tmp_buffer;
2333
0
            ctxt->node_seq.maximum *= 2;
2334
0
        }
2335
2336
        /* If position is not at end, move elements out of the way */
2337
0
        if (pos != ctxt->node_seq.length) {
2338
0
            unsigned long i;
2339
2340
0
            for (i = ctxt->node_seq.length; i > pos; i--)
2341
0
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2342
0
        }
2343
2344
        /* Copy element and increase length */
2345
0
        ctxt->node_seq.buffer[pos] = *info;
2346
0
        ctxt->node_seq.length++;
2347
0
    }
2348
0
}
2349
2350
/************************************************************************
2351
 *                  *
2352
 *    Defaults settings         *
2353
 *                  *
2354
 ************************************************************************/
2355
/**
2356
 * xmlPedanticParserDefault:
2357
 * @val:  int 0 or 1
2358
 *
2359
 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
2360
 *
2361
 * Set and return the previous value for enabling pedantic warnings.
2362
 *
2363
 * Returns the last value for 0 for no substitution, 1 for substitution.
2364
 */
2365
2366
int
2367
0
xmlPedanticParserDefault(int val) {
2368
0
    int old = xmlPedanticParserDefaultValue;
2369
2370
0
    xmlPedanticParserDefaultValue = val;
2371
0
    return(old);
2372
0
}
2373
2374
/**
2375
 * xmlLineNumbersDefault:
2376
 * @val:  int 0 or 1
2377
 *
2378
 * DEPRECATED: The modern options API always enables line numbers.
2379
 *
2380
 * Set and return the previous value for enabling line numbers in elements
2381
 * contents. This may break on old application and is turned off by default.
2382
 *
2383
 * Returns the last value for 0 for no substitution, 1 for substitution.
2384
 */
2385
2386
int
2387
0
xmlLineNumbersDefault(int val) {
2388
0
    int old = xmlLineNumbersDefaultValue;
2389
2390
0
    xmlLineNumbersDefaultValue = val;
2391
0
    return(old);
2392
0
}
2393
2394
/**
2395
 * xmlSubstituteEntitiesDefault:
2396
 * @val:  int 0 or 1
2397
 *
2398
 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
2399
 *
2400
 * Set and return the previous value for default entity support.
2401
 * Initially the parser always keep entity references instead of substituting
2402
 * entity values in the output. This function has to be used to change the
2403
 * default parser behavior
2404
 * SAX::substituteEntities() has to be used for changing that on a file by
2405
 * file basis.
2406
 *
2407
 * Returns the last value for 0 for no substitution, 1 for substitution.
2408
 */
2409
2410
int
2411
0
xmlSubstituteEntitiesDefault(int val) {
2412
0
    int old = xmlSubstituteEntitiesDefaultValue;
2413
2414
0
    xmlSubstituteEntitiesDefaultValue = val;
2415
0
    return(old);
2416
0
}
2417
2418
/**
2419
 * xmlKeepBlanksDefault:
2420
 * @val:  int 0 or 1
2421
 *
2422
 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
2423
 *
2424
 * Set and return the previous value for default blanks text nodes support.
2425
 * The 1.x version of the parser used an heuristic to try to detect
2426
 * ignorable white spaces. As a result the SAX callback was generating
2427
 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2428
 * using the DOM output text nodes containing those blanks were not generated.
2429
 * The 2.x and later version will switch to the XML standard way and
2430
 * ignorableWhitespace() are only generated when running the parser in
2431
 * validating mode and when the current element doesn't allow CDATA or
2432
 * mixed content.
2433
 * This function is provided as a way to force the standard behavior
2434
 * on 1.X libs and to switch back to the old mode for compatibility when
2435
 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2436
 * by using xmlIsBlankNode() commodity function to detect the "empty"
2437
 * nodes generated.
2438
 * This value also affect autogeneration of indentation when saving code
2439
 * if blanks sections are kept, indentation is not generated.
2440
 *
2441
 * Returns the last value for 0 for no substitution, 1 for substitution.
2442
 */
2443
2444
int
2445
0
xmlKeepBlanksDefault(int val) {
2446
0
    int old = xmlKeepBlanksDefaultValue;
2447
2448
0
    xmlKeepBlanksDefaultValue = val;
2449
0
    if (!val) xmlIndentTreeOutput = 1;
2450
0
    return(old);
2451
0
}
2452