Coverage Report

Created: 2025-07-23 08:18

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * Author: Daniel Veillard
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
#include "private/tree.h"
79
80
0
#define NS_INDEX_EMPTY  INT_MAX
81
0
#define NS_INDEX_XML    (INT_MAX - 1)
82
0
#define URI_HASH_EMPTY  0xD943A04E
83
0
#define URI_HASH_XML    0xF0451F02
84
85
#ifndef STDIN_FILENO
86
0
  #define STDIN_FILENO 0
87
#endif
88
89
#ifndef SIZE_MAX
90
  #define SIZE_MAX ((size_t) -1)
91
#endif
92
93
11.7k
#define XML_MAX_ATTRS 100000000 /* 100 million */
94
95
672k
#define XML_SPECIAL_EXTERNAL    (1 << 20)
96
672k
#define XML_SPECIAL_TYPE_MASK   (XML_SPECIAL_EXTERNAL - 1)
97
98
657k
#define XML_ATTVAL_ALLOC        (1 << 0)
99
0
#define XML_ATTVAL_NORM_CHANGE  (1 << 1)
100
101
struct _xmlStartTag {
102
    const xmlChar *prefix;
103
    const xmlChar *URI;
104
    int line;
105
    int nsNr;
106
};
107
108
typedef struct {
109
    void *saxData;
110
    unsigned prefixHashValue;
111
    unsigned uriHashValue;
112
    unsigned elementId;
113
    int oldIndex;
114
} xmlParserNsExtra;
115
116
typedef struct {
117
    unsigned hashValue;
118
    int index;
119
} xmlParserNsBucket;
120
121
struct _xmlParserNsData {
122
    xmlParserNsExtra *extra;
123
124
    unsigned hashSize;
125
    unsigned hashElems;
126
    xmlParserNsBucket *hash;
127
128
    unsigned elementId;
129
    int defaultNsIndex;
130
    int minNsIndex;
131
};
132
133
static int
134
xmlParseElementStart(xmlParserCtxtPtr ctxt);
135
136
static void
137
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
138
139
static xmlEntityPtr
140
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
141
142
static const xmlChar *
143
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
144
145
/************************************************************************
146
 *                  *
147
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
148
 *                  *
149
 ************************************************************************/
150
151
#define XML_PARSER_BIG_ENTITY 1000
152
#define XML_PARSER_LOT_ENTITY 5000
153
154
/*
155
 * Constants for protection against abusive entity expansion
156
 * ("billion laughs").
157
 */
158
159
/*
160
 * A certain amount of entity expansion which is always allowed.
161
 */
162
28.6k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
163
164
/*
165
 * Fixed cost for each entity reference. This crudely models processing time
166
 * as well to protect, for example, against exponential expansion of empty
167
 * or very short entities.
168
 */
169
29.3k
#define XML_ENT_FIXED_COST 20
170
171
32.3M
#define XML_PARSER_BIG_BUFFER_SIZE 300
172
1.38M
#define XML_PARSER_BUFFER_SIZE 100
173
58.5k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
174
175
/**
176
 * XML_PARSER_CHUNK_SIZE
177
 *
178
 * When calling GROW that's the minimal amount of data
179
 * the parser expected to have received. It is not a hard
180
 * limit but an optimization when reading strings like Names
181
 * It is not strictly needed as long as inputs available characters
182
 * are followed by 0, which should be provided by the I/O level
183
 */
184
#define XML_PARSER_CHUNK_SIZE 100
185
186
/**
187
 * Constant string describing the version of the library used at
188
 * run-time.
189
 */
190
const char *const
191
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
192
193
/*
194
 * List of XML prefixed PI allowed by W3C specs
195
 */
196
197
static const char* const xmlW3CPIs[] = {
198
    "xml-stylesheet",
199
    "xml-model",
200
    NULL
201
};
202
203
204
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206
                                              const xmlChar **str);
207
208
static void
209
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
210
211
static int
212
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
213
214
static void
215
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl);
216
217
/************************************************************************
218
 *                  *
219
 *    Some factorized error routines        *
220
 *                  *
221
 ************************************************************************/
222
223
static void
224
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
225
0
    xmlCtxtErrMemory(ctxt);
226
0
}
227
228
/**
229
 * Handle a redefinition of attribute error
230
 *
231
 * @param ctxt  an XML parser context
232
 * @param prefix  the attribute prefix
233
 * @param localname  the attribute localname
234
 */
235
static void
236
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237
                   const xmlChar * localname)
238
9.19k
{
239
9.19k
    if (prefix == NULL)
240
9.19k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
9.19k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
242
9.19k
                   "Attribute %s redefined\n", localname);
243
0
    else
244
0
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245
0
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
246
0
                   "Attribute %s:%s redefined\n", prefix, localname);
247
9.19k
}
248
249
/**
250
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
251
 *
252
 * @param ctxt  an XML parser context
253
 * @param error  the error number
254
 * @param msg  the error message
255
 */
256
static void LIBXML_ATTR_FORMAT(3,0)
257
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
258
               const char *msg)
259
34.6M
{
260
34.6M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
261
34.6M
               NULL, NULL, NULL, 0, "%s", msg);
262
34.6M
}
263
264
/**
265
 * Handle a warning.
266
 *
267
 * @param ctxt  an XML parser context
268
 * @param error  the error number
269
 * @param msg  the error message
270
 * @param str1  extra data
271
 * @param str2  extra data
272
 */
273
void LIBXML_ATTR_FORMAT(3,0)
274
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
275
              const char *msg, const xmlChar *str1, const xmlChar *str2)
276
27.7k
{
277
27.7k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
278
27.7k
               str1, str2, NULL, 0, msg, str1, str2);
279
27.7k
}
280
281
#ifdef LIBXML_VALID_ENABLED
282
/**
283
 * Handle a validity error.
284
 *
285
 * @param ctxt  an XML parser context
286
 * @param error  the error number
287
 * @param msg  the error message
288
 * @param str1  extra data
289
 * @param str2  extra data
290
 */
291
static void LIBXML_ATTR_FORMAT(3,0)
292
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
293
              const char *msg, const xmlChar *str1, const xmlChar *str2)
294
0
{
295
0
    ctxt->valid = 0;
296
297
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
298
0
               str1, str2, NULL, 0, msg, str1, str2);
299
0
}
300
#endif
301
302
/**
303
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
304
 *
305
 * @param ctxt  an XML parser context
306
 * @param error  the error number
307
 * @param msg  the error message
308
 * @param val  an integer value
309
 */
310
static void LIBXML_ATTR_FORMAT(3,0)
311
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
312
                  const char *msg, int val)
313
53.9k
{
314
53.9k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
315
53.9k
               NULL, NULL, NULL, val, msg, val);
316
53.9k
}
317
318
/**
319
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
320
 *
321
 * @param ctxt  an XML parser context
322
 * @param error  the error number
323
 * @param msg  the error message
324
 * @param str1  an string info
325
 * @param val  an integer value
326
 * @param str2  an string info
327
 */
328
static void LIBXML_ATTR_FORMAT(3,0)
329
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
330
                  const char *msg, const xmlChar *str1, int val,
331
      const xmlChar *str2)
332
55.9k
{
333
55.9k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
334
55.9k
               str1, str2, NULL, val, msg, str1, val, str2);
335
55.9k
}
336
337
/**
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 *
340
 * @param ctxt  an XML parser context
341
 * @param error  the error number
342
 * @param msg  the error message
343
 * @param val  a string value
344
 */
345
static void LIBXML_ATTR_FORMAT(3,0)
346
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
347
                  const char *msg, const xmlChar * val)
348
709k
{
349
709k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
350
709k
               val, NULL, NULL, 0, msg, val);
351
709k
}
352
353
/**
354
 * Handle a non fatal parser error
355
 *
356
 * @param ctxt  an XML parser context
357
 * @param error  the error number
358
 * @param msg  the error message
359
 * @param val  a string value
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
0
{
365
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
0
               val, NULL, NULL, 0, msg, val);
367
0
}
368
369
/**
370
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
371
 *
372
 * @param ctxt  an XML parser context
373
 * @param error  the error number
374
 * @param msg  the message
375
 * @param info1  extra information string
376
 * @param info2  extra information string
377
 * @param info3  extra information string
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
832
{
385
832
    ctxt->nsWellFormed = 0;
386
387
832
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
832
               info1, info2, info3, 0, msg, info1, info2, info3);
389
832
}
390
391
/**
392
 * Handle a namespace warning error
393
 *
394
 * @param ctxt  an XML parser context
395
 * @param error  the error number
396
 * @param msg  the message
397
 * @param info1  extra information string
398
 * @param info2  extra information string
399
 * @param info3  extra information string
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
0
{
407
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
0
               info1, info2, info3, 0, msg, info1, info2, info3);
409
0
}
410
411
static void
412
88.8k
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
413
88.8k
    if (val > ULONG_MAX - *dst)
414
0
        *dst = ULONG_MAX;
415
88.8k
    else
416
88.8k
        *dst += val;
417
88.8k
}
418
419
static void
420
46.3k
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
421
46.3k
    if (val > ULONG_MAX - *dst)
422
0
        *dst = ULONG_MAX;
423
46.3k
    else
424
46.3k
        *dst += val;
425
46.3k
}
426
427
/**
428
 * Check for non-linear entity expansion behaviour.
429
 *
430
 * In some cases like xmlExpandEntityInAttValue, this function is called
431
 * for each, possibly nested entity and its unexpanded content length.
432
 *
433
 * In other cases like #xmlParseReference, it's only called for each
434
 * top-level entity with its unexpanded content length plus the sum of
435
 * the unexpanded content lengths (plus fixed cost) of all nested
436
 * entities.
437
 *
438
 * Summing the unexpanded lengths also adds the length of the reference.
439
 * This is by design. Taking the length of the entity name into account
440
 * discourages attacks that try to waste CPU time with abusively long
441
 * entity names. See test/recurse/lol6.xml for example. Each call also
442
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
443
 * short entities.
444
 *
445
 * @param ctxt  parser context
446
 * @param extra  sum of unexpanded entity sizes
447
 * @returns 1 on error, 0 on success.
448
 */
449
static int
450
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
451
40.1k
{
452
40.1k
    unsigned long consumed;
453
40.1k
    unsigned long *expandedSize;
454
40.1k
    xmlParserInputPtr input = ctxt->input;
455
40.1k
    xmlEntityPtr entity = input->entity;
456
457
40.1k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
458
11.4k
        return(0);
459
460
    /*
461
     * Compute total consumed bytes so far, including input streams of
462
     * external entities.
463
     */
464
28.6k
    consumed = input->consumed;
465
28.6k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
466
28.6k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
467
468
28.6k
    if (entity)
469
742
        expandedSize = &entity->expandedSize;
470
27.9k
    else
471
27.9k
        expandedSize = &ctxt->sizeentcopy;
472
473
    /*
474
     * Add extra cost and some fixed cost.
475
     */
476
28.6k
    xmlSaturatedAdd(expandedSize, extra);
477
28.6k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
478
479
    /*
480
     * It's important to always use saturation arithmetic when tracking
481
     * entity sizes to make the size checks reliable. If "sizeentcopy"
482
     * overflows, we have to abort.
483
     */
484
28.6k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
485
28.6k
        ((*expandedSize >= ULONG_MAX) ||
486
5.77k
         (*expandedSize / ctxt->maxAmpl > consumed))) {
487
31
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
488
31
                       "Maximum entity amplification factor exceeded, see "
489
31
                       "xmlCtxtSetMaxAmplification.\n");
490
31
        xmlHaltParser(ctxt);
491
31
        return(1);
492
31
    }
493
494
28.6k
    return(0);
495
28.6k
}
496
497
/************************************************************************
498
 *                  *
499
 *    Library wide options          *
500
 *                  *
501
 ************************************************************************/
502
503
/**
504
 * Examines if the library has been compiled with a given feature.
505
 *
506
 * @param feature  the feature to be examined
507
 * @returns zero (0) if the feature does not exist or an unknown
508
 * feature is requested, non-zero otherwise.
509
 */
510
int
511
xmlHasFeature(xmlFeature feature)
512
0
{
513
0
    switch (feature) {
514
0
  case XML_WITH_THREAD:
515
0
#ifdef LIBXML_THREAD_ENABLED
516
0
      return(1);
517
#else
518
      return(0);
519
#endif
520
0
        case XML_WITH_TREE:
521
0
            return(1);
522
0
        case XML_WITH_OUTPUT:
523
0
#ifdef LIBXML_OUTPUT_ENABLED
524
0
            return(1);
525
#else
526
            return(0);
527
#endif
528
0
        case XML_WITH_PUSH:
529
0
#ifdef LIBXML_PUSH_ENABLED
530
0
            return(1);
531
#else
532
            return(0);
533
#endif
534
0
        case XML_WITH_READER:
535
0
#ifdef LIBXML_READER_ENABLED
536
0
            return(1);
537
#else
538
            return(0);
539
#endif
540
0
        case XML_WITH_PATTERN:
541
0
#ifdef LIBXML_PATTERN_ENABLED
542
0
            return(1);
543
#else
544
            return(0);
545
#endif
546
0
        case XML_WITH_WRITER:
547
0
#ifdef LIBXML_WRITER_ENABLED
548
0
            return(1);
549
#else
550
            return(0);
551
#endif
552
0
        case XML_WITH_SAX1:
553
0
#ifdef LIBXML_SAX1_ENABLED
554
0
            return(1);
555
#else
556
            return(0);
557
#endif
558
0
        case XML_WITH_HTTP:
559
0
            return(0);
560
0
        case XML_WITH_VALID:
561
0
#ifdef LIBXML_VALID_ENABLED
562
0
            return(1);
563
#else
564
            return(0);
565
#endif
566
0
        case XML_WITH_HTML:
567
0
#ifdef LIBXML_HTML_ENABLED
568
0
            return(1);
569
#else
570
            return(0);
571
#endif
572
0
        case XML_WITH_LEGACY:
573
0
            return(0);
574
0
        case XML_WITH_C14N:
575
0
#ifdef LIBXML_C14N_ENABLED
576
0
            return(1);
577
#else
578
            return(0);
579
#endif
580
0
        case XML_WITH_CATALOG:
581
0
#ifdef LIBXML_CATALOG_ENABLED
582
0
            return(1);
583
#else
584
            return(0);
585
#endif
586
0
        case XML_WITH_XPATH:
587
0
#ifdef LIBXML_XPATH_ENABLED
588
0
            return(1);
589
#else
590
            return(0);
591
#endif
592
0
        case XML_WITH_XPTR:
593
0
#ifdef LIBXML_XPTR_ENABLED
594
0
            return(1);
595
#else
596
            return(0);
597
#endif
598
0
        case XML_WITH_XINCLUDE:
599
0
#ifdef LIBXML_XINCLUDE_ENABLED
600
0
            return(1);
601
#else
602
            return(0);
603
#endif
604
0
        case XML_WITH_ICONV:
605
0
#ifdef LIBXML_ICONV_ENABLED
606
0
            return(1);
607
#else
608
            return(0);
609
#endif
610
0
        case XML_WITH_ISO8859X:
611
0
#ifdef LIBXML_ISO8859X_ENABLED
612
0
            return(1);
613
#else
614
            return(0);
615
#endif
616
0
        case XML_WITH_UNICODE:
617
0
            return(0);
618
0
        case XML_WITH_REGEXP:
619
0
#ifdef LIBXML_REGEXP_ENABLED
620
0
            return(1);
621
#else
622
            return(0);
623
#endif
624
0
        case XML_WITH_AUTOMATA:
625
0
#ifdef LIBXML_REGEXP_ENABLED
626
0
            return(1);
627
#else
628
            return(0);
629
#endif
630
0
        case XML_WITH_EXPR:
631
0
            return(0);
632
0
        case XML_WITH_RELAXNG:
633
0
#ifdef LIBXML_RELAXNG_ENABLED
634
0
            return(1);
635
#else
636
            return(0);
637
#endif
638
0
        case XML_WITH_SCHEMAS:
639
0
#ifdef LIBXML_SCHEMAS_ENABLED
640
0
            return(1);
641
#else
642
            return(0);
643
#endif
644
0
        case XML_WITH_SCHEMATRON:
645
#ifdef LIBXML_SCHEMATRON_ENABLED
646
            return(1);
647
#else
648
0
            return(0);
649
0
#endif
650
0
        case XML_WITH_MODULES:
651
0
#ifdef LIBXML_MODULES_ENABLED
652
0
            return(1);
653
#else
654
            return(0);
655
#endif
656
0
        case XML_WITH_DEBUG:
657
#ifdef LIBXML_DEBUG_ENABLED
658
            return(1);
659
#else
660
0
            return(0);
661
0
#endif
662
0
        case XML_WITH_DEBUG_MEM:
663
0
            return(0);
664
0
        case XML_WITH_ZLIB:
665
0
#ifdef LIBXML_ZLIB_ENABLED
666
0
            return(1);
667
#else
668
            return(0);
669
#endif
670
0
        case XML_WITH_LZMA:
671
0
#ifdef LIBXML_LZMA_ENABLED
672
0
            return(1);
673
#else
674
            return(0);
675
#endif
676
0
        case XML_WITH_ICU:
677
#ifdef LIBXML_ICU_ENABLED
678
            return(1);
679
#else
680
0
            return(0);
681
0
#endif
682
0
        default:
683
0
      break;
684
0
     }
685
0
     return(0);
686
0
}
687
688
/************************************************************************
689
 *                  *
690
 *      Simple string buffer        *
691
 *                  *
692
 ************************************************************************/
693
694
typedef struct {
695
    xmlChar *mem;
696
    unsigned size;
697
    unsigned cap; /* size < cap */
698
    unsigned max; /* size <= max */
699
    xmlParserErrors code;
700
} xmlSBuf;
701
702
static void
703
735k
xmlSBufInit(xmlSBuf *buf, unsigned max) {
704
735k
    buf->mem = NULL;
705
735k
    buf->size = 0;
706
735k
    buf->cap = 0;
707
735k
    buf->max = max;
708
735k
    buf->code = XML_ERR_OK;
709
735k
}
710
711
static int
712
740k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
713
740k
    xmlChar *mem;
714
740k
    unsigned cap;
715
716
740k
    if (len >= UINT_MAX / 2 - buf->size) {
717
0
        if (buf->code == XML_ERR_OK)
718
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
719
0
        return(-1);
720
0
    }
721
722
740k
    cap = (buf->size + len) * 2;
723
740k
    if (cap < 240)
724
673k
        cap = 240;
725
726
740k
    mem = xmlRealloc(buf->mem, cap);
727
740k
    if (mem == NULL) {
728
0
        buf->code = XML_ERR_NO_MEMORY;
729
0
        return(-1);
730
0
    }
731
732
740k
    buf->mem = mem;
733
740k
    buf->cap = cap;
734
735
740k
    return(0);
736
740k
}
737
738
static void
739
93.8M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
740
93.8M
    if (buf->max - buf->size < len) {
741
0
        if (buf->code == XML_ERR_OK)
742
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
743
0
        return;
744
0
    }
745
746
93.8M
    if (buf->cap - buf->size <= len) {
747
732k
        if (xmlSBufGrow(buf, len) < 0)
748
0
            return;
749
732k
    }
750
751
93.8M
    if (len > 0)
752
93.8M
        memcpy(buf->mem + buf->size, str, len);
753
93.8M
    buf->size += len;
754
93.8M
}
755
756
static void
757
89.5M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
758
89.5M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
759
89.5M
}
760
761
static void
762
284k
xmlSBufAddChar(xmlSBuf *buf, int c) {
763
284k
    xmlChar *end;
764
765
284k
    if (buf->max - buf->size < 4) {
766
0
        if (buf->code == XML_ERR_OK)
767
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
768
0
        return;
769
0
    }
770
771
284k
    if (buf->cap - buf->size <= 4) {
772
8.34k
        if (xmlSBufGrow(buf, 4) < 0)
773
0
            return;
774
8.34k
    }
775
776
284k
    end = buf->mem + buf->size;
777
778
284k
    if (c < 0x80) {
779
128k
        *end = (xmlChar) c;
780
128k
        buf->size += 1;
781
156k
    } else {
782
156k
        buf->size += xmlCopyCharMultiByte(end, c);
783
156k
    }
784
284k
}
785
786
static void
787
85.5M
xmlSBufAddReplChar(xmlSBuf *buf) {
788
85.5M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
789
85.5M
}
790
791
static void
792
0
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
793
0
    if (buf->code == XML_ERR_NO_MEMORY)
794
0
        xmlCtxtErrMemory(ctxt);
795
0
    else
796
0
        xmlFatalErr(ctxt, buf->code, errMsg);
797
0
}
798
799
static xmlChar *
800
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
801
719k
              const char *errMsg) {
802
719k
    if (buf->mem == NULL) {
803
40.8k
        buf->mem = xmlMalloc(1);
804
40.8k
        if (buf->mem == NULL) {
805
0
            buf->code = XML_ERR_NO_MEMORY;
806
40.8k
        } else {
807
40.8k
            buf->mem[0] = 0;
808
40.8k
        }
809
678k
    } else {
810
678k
        buf->mem[buf->size] = 0;
811
678k
    }
812
813
719k
    if (buf->code == XML_ERR_OK) {
814
719k
        if (sizeOut != NULL)
815
0
            *sizeOut = buf->size;
816
719k
        return(buf->mem);
817
719k
    }
818
819
0
    xmlSBufReportError(buf, ctxt, errMsg);
820
821
0
    xmlFree(buf->mem);
822
823
0
    if (sizeOut != NULL)
824
0
        *sizeOut = 0;
825
0
    return(NULL);
826
719k
}
827
828
static void
829
9.62k
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
830
9.62k
    if (buf->code != XML_ERR_OK)
831
0
        xmlSBufReportError(buf, ctxt, errMsg);
832
833
9.62k
    xmlFree(buf->mem);
834
9.62k
}
835
836
static int
837
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
838
70.7M
                    const char *errMsg) {
839
70.7M
    int c = str[0];
840
70.7M
    int c1 = str[1];
841
842
70.7M
    if ((c1 & 0xC0) != 0x80)
843
25.5M
        goto encoding_error;
844
845
45.1M
    if (c < 0xE0) {
846
        /* 2-byte sequence */
847
40.6M
        if (c < 0xC2)
848
22.7M
            goto encoding_error;
849
850
17.9M
        return(2);
851
40.6M
    } else {
852
4.47M
        int c2 = str[2];
853
854
4.47M
        if ((c2 & 0xC0) != 0x80)
855
2.10M
            goto encoding_error;
856
857
2.36M
        if (c < 0xF0) {
858
            /* 3-byte sequence */
859
1.06M
            if (c == 0xE0) {
860
                /* overlong */
861
32.6k
                if (c1 < 0xA0)
862
26.4k
                    goto encoding_error;
863
1.03M
            } else if (c == 0xED) {
864
                /* surrogate */
865
1.11k
                if (c1 >= 0xA0)
866
580
                    goto encoding_error;
867
1.03M
            } else if (c == 0xEF) {
868
                /* U+FFFE and U+FFFF are invalid Chars */
869
459k
                if ((c1 == 0xBF) && (c2 >= 0xBE))
870
8.86k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
871
459k
            }
872
873
1.03M
            return(3);
874
1.29M
        } else {
875
            /* 4-byte sequence */
876
1.29M
            if ((str[3] & 0xC0) != 0x80)
877
815k
                goto encoding_error;
878
481k
            if (c == 0xF0) {
879
                /* overlong */
880
9.83k
                if (c1 < 0x90)
881
2.24k
                    goto encoding_error;
882
471k
            } else if (c >= 0xF4) {
883
                /* greater than 0x10FFFF */
884
18.5k
                if ((c > 0xF4) || (c1 >= 0x90))
885
16.6k
                    goto encoding_error;
886
18.5k
            }
887
888
462k
            return(4);
889
481k
        }
890
2.36M
    }
891
892
51.2M
encoding_error:
893
    /* Only report the first error */
894
51.2M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
895
2.73k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
896
2.73k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
897
2.73k
    }
898
899
51.2M
    return(0);
900
45.1M
}
901
902
/************************************************************************
903
 *                  *
904
 *    SAX2 defaulted attributes handling      *
905
 *                  *
906
 ************************************************************************/
907
908
/**
909
 * Final initialization of the parser context before starting to parse.
910
 *
911
 * This accounts for users modifying struct members of parser context
912
 * directly.
913
 *
914
 * @param ctxt  an XML parser context
915
 */
916
static void
917
28.0k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
918
28.0k
    xmlSAXHandlerPtr sax;
919
920
    /* Avoid unused variable warning if features are disabled. */
921
28.0k
    (void) sax;
922
923
    /*
924
     * Changing the SAX struct directly is still widespread practice
925
     * in internal and external code.
926
     */
927
28.0k
    if (ctxt == NULL) return;
928
28.0k
    sax = ctxt->sax;
929
28.0k
#ifdef LIBXML_SAX1_ENABLED
930
    /*
931
     * Only enable SAX2 if there SAX2 element handlers, except when there
932
     * are no element handlers at all.
933
     */
934
28.0k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
935
28.0k
        (sax) &&
936
28.0k
        (sax->initialized == XML_SAX2_MAGIC) &&
937
28.0k
        ((sax->startElementNs != NULL) ||
938
0
         (sax->endElementNs != NULL) ||
939
0
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
940
0
        ctxt->sax2 = 1;
941
#else
942
    ctxt->sax2 = 1;
943
#endif /* LIBXML_SAX1_ENABLED */
944
945
    /*
946
     * Some users replace the dictionary directly in the context struct.
947
     * We really need an API function to do that cleanly.
948
     */
949
28.0k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
950
28.0k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
951
28.0k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
952
28.0k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
953
28.0k
    (ctxt->str_xml_ns == NULL)) {
954
0
        xmlErrMemory(ctxt);
955
0
    }
956
957
28.0k
    xmlDictSetLimit(ctxt->dict,
958
28.0k
                    (ctxt->options & XML_PARSE_HUGE) ?
959
0
                        0 :
960
28.0k
                        XML_MAX_DICTIONARY_LIMIT);
961
962
28.0k
#ifdef LIBXML_VALID_ENABLED
963
28.0k
    if (ctxt->validate)
964
0
        ctxt->vctxt.flags |= XML_VCTXT_VALIDATE;
965
28.0k
    else
966
28.0k
        ctxt->vctxt.flags &= ~XML_VCTXT_VALIDATE;
967
28.0k
#endif /* LIBXML_VALID_ENABLED */
968
28.0k
}
969
970
typedef struct {
971
    xmlHashedString prefix;
972
    xmlHashedString name;
973
    xmlHashedString value;
974
    const xmlChar *valueEnd;
975
    int external;
976
    int expandedSize;
977
} xmlDefAttr;
978
979
typedef struct _xmlDefAttrs xmlDefAttrs;
980
typedef xmlDefAttrs *xmlDefAttrsPtr;
981
struct _xmlDefAttrs {
982
    int nbAttrs;  /* number of defaulted attributes on that element */
983
    int maxAttrs;       /* the size of the array */
984
#if __STDC_VERSION__ >= 199901L
985
    /* Using a C99 flexible array member avoids UBSan errors. */
986
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
987
#else
988
    xmlDefAttr attrs[1];
989
#endif
990
};
991
992
/**
993
 * Normalize the space in non CDATA attribute values:
994
 * If the attribute type is not CDATA, then the XML processor MUST further
995
 * process the normalized attribute value by discarding any leading and
996
 * trailing space (\#x20) characters, and by replacing sequences of space
997
 * (\#x20) characters by a single space (\#x20) character.
998
 * Note that the size of dst need to be at least src, and if one doesn't need
999
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1000
 * passing src as dst is just fine.
1001
 *
1002
 * @param src  the source string
1003
 * @param dst  the target string
1004
 * @returns a pointer to the normalized value (dst) or NULL if no conversion
1005
 *         is needed.
1006
 */
1007
static xmlChar *
1008
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1009
22.8k
{
1010
22.8k
    if ((src == NULL) || (dst == NULL))
1011
0
        return(NULL);
1012
1013
49.5k
    while (*src == 0x20) src++;
1014
4.63M
    while (*src != 0) {
1015
4.60M
  if (*src == 0x20) {
1016
579k
      while (*src == 0x20) src++;
1017
88.8k
      if (*src != 0)
1018
82.4k
    *dst++ = 0x20;
1019
4.52M
  } else {
1020
4.52M
      *dst++ = *src++;
1021
4.52M
  }
1022
4.60M
    }
1023
22.8k
    *dst = 0;
1024
22.8k
    if (dst == src)
1025
11.6k
       return(NULL);
1026
11.1k
    return(dst);
1027
22.8k
}
1028
1029
/**
1030
 * Add a defaulted attribute for an element
1031
 *
1032
 * @param ctxt  an XML parser context
1033
 * @param fullname  the element fullname
1034
 * @param fullattr  the attribute fullname
1035
 * @param value  the attribute value
1036
 */
1037
static void
1038
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1039
               const xmlChar *fullname,
1040
               const xmlChar *fullattr,
1041
0
               const xmlChar *value) {
1042
0
    xmlDefAttrsPtr defaults;
1043
0
    xmlDefAttr *attr;
1044
0
    int len, expandedSize;
1045
0
    xmlHashedString name;
1046
0
    xmlHashedString prefix;
1047
0
    xmlHashedString hvalue;
1048
0
    const xmlChar *localname;
1049
1050
    /*
1051
     * Allows to detect attribute redefinitions
1052
     */
1053
0
    if (ctxt->attsSpecial != NULL) {
1054
0
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1055
0
      return;
1056
0
    }
1057
1058
0
    if (ctxt->attsDefault == NULL) {
1059
0
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1060
0
  if (ctxt->attsDefault == NULL)
1061
0
      goto mem_error;
1062
0
    }
1063
1064
    /*
1065
     * split the element name into prefix:localname , the string found
1066
     * are within the DTD and then not associated to namespace names.
1067
     */
1068
0
    localname = xmlSplitQName3(fullname, &len);
1069
0
    if (localname == NULL) {
1070
0
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1071
0
  prefix.name = NULL;
1072
0
    } else {
1073
0
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1074
0
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1075
0
        if (prefix.name == NULL)
1076
0
            goto mem_error;
1077
0
    }
1078
0
    if (name.name == NULL)
1079
0
        goto mem_error;
1080
1081
    /*
1082
     * make sure there is some storage
1083
     */
1084
0
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1085
0
    if ((defaults == NULL) ||
1086
0
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1087
0
        xmlDefAttrsPtr temp;
1088
0
        int newSize;
1089
1090
0
        if (defaults == NULL) {
1091
0
            newSize = 4;
1092
0
        } else {
1093
0
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1094
0
                ((size_t) defaults->maxAttrs >
1095
0
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1096
0
                goto mem_error;
1097
1098
0
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1099
0
                newSize = XML_MAX_ATTRS;
1100
0
            else
1101
0
                newSize = defaults->maxAttrs * 2;
1102
0
        }
1103
0
        temp = xmlRealloc(defaults,
1104
0
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1105
0
  if (temp == NULL)
1106
0
      goto mem_error;
1107
0
        if (defaults == NULL)
1108
0
            temp->nbAttrs = 0;
1109
0
  temp->maxAttrs = newSize;
1110
0
        defaults = temp;
1111
0
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1112
0
                          defaults, NULL) < 0) {
1113
0
      xmlFree(defaults);
1114
0
      goto mem_error;
1115
0
  }
1116
0
    }
1117
1118
    /*
1119
     * Split the attribute name into prefix:localname , the string found
1120
     * are within the DTD and hen not associated to namespace names.
1121
     */
1122
0
    localname = xmlSplitQName3(fullattr, &len);
1123
0
    if (localname == NULL) {
1124
0
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1125
0
  prefix.name = NULL;
1126
0
    } else {
1127
0
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1128
0
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1129
0
        if (prefix.name == NULL)
1130
0
            goto mem_error;
1131
0
    }
1132
0
    if (name.name == NULL)
1133
0
        goto mem_error;
1134
1135
    /* intern the string and precompute the end */
1136
0
    len = strlen((const char *) value);
1137
0
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1138
0
    if (hvalue.name == NULL)
1139
0
        goto mem_error;
1140
1141
0
    expandedSize = strlen((const char *) name.name);
1142
0
    if (prefix.name != NULL)
1143
0
        expandedSize += strlen((const char *) prefix.name);
1144
0
    expandedSize += len;
1145
1146
0
    attr = &defaults->attrs[defaults->nbAttrs++];
1147
0
    attr->name = name;
1148
0
    attr->prefix = prefix;
1149
0
    attr->value = hvalue;
1150
0
    attr->valueEnd = hvalue.name + len;
1151
0
    attr->external = PARSER_EXTERNAL(ctxt);
1152
0
    attr->expandedSize = expandedSize;
1153
1154
0
    return;
1155
1156
0
mem_error:
1157
0
    xmlErrMemory(ctxt);
1158
0
}
1159
1160
/**
1161
 * Register this attribute type
1162
 *
1163
 * @param ctxt  an XML parser context
1164
 * @param fullname  the element fullname
1165
 * @param fullattr  the attribute fullname
1166
 * @param type  the attribute type
1167
 */
1168
static void
1169
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1170
      const xmlChar *fullname,
1171
      const xmlChar *fullattr,
1172
      int type)
1173
0
{
1174
0
    if (ctxt->attsSpecial == NULL) {
1175
0
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1176
0
  if (ctxt->attsSpecial == NULL)
1177
0
      goto mem_error;
1178
0
    }
1179
1180
0
    if (PARSER_EXTERNAL(ctxt))
1181
0
        type |= XML_SPECIAL_EXTERNAL;
1182
1183
0
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1184
0
                    XML_INT_TO_PTR(type)) < 0)
1185
0
        goto mem_error;
1186
0
    return;
1187
1188
0
mem_error:
1189
0
    xmlErrMemory(ctxt);
1190
0
}
1191
1192
/**
1193
 * Removes CDATA attributes from the special attribute table
1194
 */
1195
static void
1196
xmlCleanSpecialAttrCallback(void *payload, void *data,
1197
                            const xmlChar *fullname, const xmlChar *fullattr,
1198
0
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1199
0
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1200
1201
0
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1202
0
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1203
0
    }
1204
0
}
1205
1206
/**
1207
 * Trim the list of attributes defined to remove all those of type
1208
 * CDATA as they are not special. This call should be done when finishing
1209
 * to parse the DTD and before starting to parse the document root.
1210
 *
1211
 * @param ctxt  an XML parser context
1212
 */
1213
static void
1214
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1215
9.27k
{
1216
9.27k
    if (ctxt->attsSpecial == NULL)
1217
9.27k
        return;
1218
1219
0
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1220
1221
0
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1222
0
        xmlHashFree(ctxt->attsSpecial, NULL);
1223
0
        ctxt->attsSpecial = NULL;
1224
0
    }
1225
0
}
1226
1227
/**
1228
 * Checks that the value conforms to the LanguageID production:
1229
 *
1230
 * @deprecated Internal function, do not use.
1231
 *
1232
 * NOTE: this is somewhat deprecated, those productions were removed from
1233
 * the XML Second edition.
1234
 *
1235
 *     [33] LanguageID ::= Langcode ('-' Subcode)*
1236
 *     [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1237
 *     [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1238
 *     [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1239
 *     [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1240
 *     [38] Subcode ::= ([a-z] | [A-Z])+
1241
 *
1242
 * The current REC reference the successors of RFC 1766, currently 5646
1243
 *
1244
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1245
 *
1246
 *     langtag       = language
1247
 *                     ["-" script]
1248
 *                     ["-" region]
1249
 *                     *("-" variant)
1250
 *                     *("-" extension)
1251
 *                     ["-" privateuse]
1252
 *     language      = 2*3ALPHA            ; shortest ISO 639 code
1253
 *                     ["-" extlang]       ; sometimes followed by
1254
 *                                         ; extended language subtags
1255
 *                   / 4ALPHA              ; or reserved for future use
1256
 *                   / 5*8ALPHA            ; or registered language subtag
1257
 *
1258
 *     extlang       = 3ALPHA              ; selected ISO 639 codes
1259
 *                     *2("-" 3ALPHA)      ; permanently reserved
1260
 *
1261
 *     script        = 4ALPHA              ; ISO 15924 code
1262
 *
1263
 *     region        = 2ALPHA              ; ISO 3166-1 code
1264
 *                   / 3DIGIT              ; UN M.49 code
1265
 *
1266
 *     variant       = 5*8alphanum         ; registered variants
1267
 *                   / (DIGIT 3alphanum)
1268
 *
1269
 *     extension     = singleton 1*("-" (2*8alphanum))
1270
 *
1271
 *                                         ; Single alphanumerics
1272
 *                                         ; "x" reserved for private use
1273
 *     singleton     = DIGIT               ; 0 - 9
1274
 *                   / %x41-57             ; A - W
1275
 *                   / %x59-5A             ; Y - Z
1276
 *                   / %x61-77             ; a - w
1277
 *                   / %x79-7A             ; y - z
1278
 *
1279
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1280
 * The parser below doesn't try to cope with extension or privateuse
1281
 * that could be added but that's not interoperable anyway
1282
 *
1283
 * @param lang  pointer to the string value
1284
 * @returns 1 if correct 0 otherwise
1285
 **/
1286
int
1287
xmlCheckLanguageID(const xmlChar * lang)
1288
0
{
1289
0
    const xmlChar *cur = lang, *nxt;
1290
1291
0
    if (cur == NULL)
1292
0
        return (0);
1293
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1294
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1295
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1296
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1297
        /*
1298
         * Still allow IANA code and user code which were coming
1299
         * from the previous version of the XML-1.0 specification
1300
         * it's deprecated but we should not fail
1301
         */
1302
0
        cur += 2;
1303
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1304
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1305
0
            cur++;
1306
0
        return(cur[0] == 0);
1307
0
    }
1308
0
    nxt = cur;
1309
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1310
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1311
0
           nxt++;
1312
0
    if (nxt - cur >= 4) {
1313
        /*
1314
         * Reserved
1315
         */
1316
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1317
0
            return(0);
1318
0
        return(1);
1319
0
    }
1320
0
    if (nxt - cur < 2)
1321
0
        return(0);
1322
    /* we got an ISO 639 code */
1323
0
    if (nxt[0] == 0)
1324
0
        return(1);
1325
0
    if (nxt[0] != '-')
1326
0
        return(0);
1327
1328
0
    nxt++;
1329
0
    cur = nxt;
1330
    /* now we can have extlang or script or region or variant */
1331
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1332
0
        goto region_m49;
1333
1334
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1335
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1336
0
           nxt++;
1337
0
    if (nxt - cur == 4)
1338
0
        goto script;
1339
0
    if (nxt - cur == 2)
1340
0
        goto region;
1341
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1342
0
        goto variant;
1343
0
    if (nxt - cur != 3)
1344
0
        return(0);
1345
    /* we parsed an extlang */
1346
0
    if (nxt[0] == 0)
1347
0
        return(1);
1348
0
    if (nxt[0] != '-')
1349
0
        return(0);
1350
1351
0
    nxt++;
1352
0
    cur = nxt;
1353
    /* now we can have script or region or variant */
1354
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1355
0
        goto region_m49;
1356
1357
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1358
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1359
0
           nxt++;
1360
0
    if (nxt - cur == 2)
1361
0
        goto region;
1362
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1363
0
        goto variant;
1364
0
    if (nxt - cur != 4)
1365
0
        return(0);
1366
    /* we parsed a script */
1367
0
script:
1368
0
    if (nxt[0] == 0)
1369
0
        return(1);
1370
0
    if (nxt[0] != '-')
1371
0
        return(0);
1372
1373
0
    nxt++;
1374
0
    cur = nxt;
1375
    /* now we can have region or variant */
1376
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1377
0
        goto region_m49;
1378
1379
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1380
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1381
0
           nxt++;
1382
1383
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1384
0
        goto variant;
1385
0
    if (nxt - cur != 2)
1386
0
        return(0);
1387
    /* we parsed a region */
1388
0
region:
1389
0
    if (nxt[0] == 0)
1390
0
        return(1);
1391
0
    if (nxt[0] != '-')
1392
0
        return(0);
1393
1394
0
    nxt++;
1395
0
    cur = nxt;
1396
    /* now we can just have a variant */
1397
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1398
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1399
0
           nxt++;
1400
1401
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1402
0
        return(0);
1403
1404
    /* we parsed a variant */
1405
0
variant:
1406
0
    if (nxt[0] == 0)
1407
0
        return(1);
1408
0
    if (nxt[0] != '-')
1409
0
        return(0);
1410
    /* extensions and private use subtags not checked */
1411
0
    return (1);
1412
1413
0
region_m49:
1414
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1415
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1416
0
        nxt += 3;
1417
0
        goto region;
1418
0
    }
1419
0
    return(0);
1420
0
}
1421
1422
/************************************************************************
1423
 *                  *
1424
 *    Parser stacks related functions and macros    *
1425
 *                  *
1426
 ************************************************************************/
1427
1428
static xmlChar *
1429
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1430
1431
/**
1432
 * Create a new namespace database.
1433
 *
1434
 * @returns the new obejct.
1435
 */
1436
xmlParserNsData *
1437
27.4k
xmlParserNsCreate(void) {
1438
27.4k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1439
1440
27.4k
    if (nsdb == NULL)
1441
0
        return(NULL);
1442
27.4k
    memset(nsdb, 0, sizeof(*nsdb));
1443
27.4k
    nsdb->defaultNsIndex = INT_MAX;
1444
1445
27.4k
    return(nsdb);
1446
27.4k
}
1447
1448
/**
1449
 * Free a namespace database.
1450
 *
1451
 * @param nsdb  namespace database
1452
 */
1453
void
1454
27.4k
xmlParserNsFree(xmlParserNsData *nsdb) {
1455
27.4k
    if (nsdb == NULL)
1456
0
        return;
1457
1458
27.4k
    xmlFree(nsdb->extra);
1459
27.4k
    xmlFree(nsdb->hash);
1460
27.4k
    xmlFree(nsdb);
1461
27.4k
}
1462
1463
/**
1464
 * Reset a namespace database.
1465
 *
1466
 * @param nsdb  namespace database
1467
 */
1468
static void
1469
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1470
0
    if (nsdb == NULL)
1471
0
        return;
1472
1473
0
    nsdb->hashElems = 0;
1474
0
    nsdb->elementId = 0;
1475
0
    nsdb->defaultNsIndex = INT_MAX;
1476
1477
0
    if (nsdb->hash)
1478
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1479
0
}
1480
1481
/**
1482
 * Signal that a new element has started.
1483
 *
1484
 * @param nsdb  namespace database
1485
 * @returns 0 on success, -1 if the element counter overflowed.
1486
 */
1487
static int
1488
0
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1489
0
    if (nsdb->elementId == UINT_MAX)
1490
0
        return(-1);
1491
0
    nsdb->elementId++;
1492
1493
0
    return(0);
1494
0
}
1495
1496
/**
1497
 * Lookup namespace with given prefix. If `bucketPtr` is non-NULL, it will
1498
 * be set to the matching bucket, or the first empty bucket if no match
1499
 * was found.
1500
 *
1501
 * @param ctxt  parser context
1502
 * @param prefix  namespace prefix
1503
 * @param bucketPtr  optional bucket (return value)
1504
 * @returns the namespace index on success, INT_MAX if no namespace was
1505
 * found.
1506
 */
1507
static int
1508
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1509
0
                  xmlParserNsBucket **bucketPtr) {
1510
0
    xmlParserNsBucket *bucket, *tombstone;
1511
0
    unsigned index, hashValue;
1512
1513
0
    if (prefix->name == NULL)
1514
0
        return(ctxt->nsdb->defaultNsIndex);
1515
1516
0
    if (ctxt->nsdb->hashSize == 0)
1517
0
        return(INT_MAX);
1518
1519
0
    hashValue = prefix->hashValue;
1520
0
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1521
0
    bucket = &ctxt->nsdb->hash[index];
1522
0
    tombstone = NULL;
1523
1524
0
    while (bucket->hashValue) {
1525
0
        if (bucket->index == INT_MAX) {
1526
0
            if (tombstone == NULL)
1527
0
                tombstone = bucket;
1528
0
        } else if (bucket->hashValue == hashValue) {
1529
0
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1530
0
                if (bucketPtr != NULL)
1531
0
                    *bucketPtr = bucket;
1532
0
                return(bucket->index);
1533
0
            }
1534
0
        }
1535
1536
0
        index++;
1537
0
        bucket++;
1538
0
        if (index == ctxt->nsdb->hashSize) {
1539
0
            index = 0;
1540
0
            bucket = ctxt->nsdb->hash;
1541
0
        }
1542
0
    }
1543
1544
0
    if (bucketPtr != NULL)
1545
0
        *bucketPtr = tombstone ? tombstone : bucket;
1546
0
    return(INT_MAX);
1547
0
}
1548
1549
/**
1550
 * Lookup namespace URI with given prefix.
1551
 *
1552
 * @param ctxt  parser context
1553
 * @param prefix  namespace prefix
1554
 * @returns the namespace URI on success, NULL if no namespace was found.
1555
 */
1556
static const xmlChar *
1557
0
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1558
0
    const xmlChar *ret;
1559
0
    int nsIndex;
1560
1561
0
    if (prefix->name == ctxt->str_xml)
1562
0
        return(ctxt->str_xml_ns);
1563
1564
    /*
1565
     * minNsIndex is used when building an entity tree. We must
1566
     * ignore namespaces declared outside the entity.
1567
     */
1568
0
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1569
0
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1570
0
        return(NULL);
1571
1572
0
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1573
0
    if (ret[0] == 0)
1574
0
        ret = NULL;
1575
0
    return(ret);
1576
0
}
1577
1578
/**
1579
 * Lookup extra data for the given prefix. This returns data stored
1580
 * with xmlParserNsUdpateSax.
1581
 *
1582
 * @param ctxt  parser context
1583
 * @param prefix  namespace prefix
1584
 * @returns the data on success, NULL if no namespace was found.
1585
 */
1586
void *
1587
0
xmlParserNsLookupSax(xmlParserCtxt *ctxt, const xmlChar *prefix) {
1588
0
    xmlHashedString hprefix;
1589
0
    int nsIndex;
1590
1591
0
    if (prefix == ctxt->str_xml)
1592
0
        return(NULL);
1593
1594
0
    hprefix.name = prefix;
1595
0
    if (prefix != NULL)
1596
0
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1597
0
    else
1598
0
        hprefix.hashValue = 0;
1599
0
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1600
0
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1601
0
        return(NULL);
1602
1603
0
    return(ctxt->nsdb->extra[nsIndex].saxData);
1604
0
}
1605
1606
/**
1607
 * Sets or updates extra data for the given prefix. This value will be
1608
 * returned by xmlParserNsLookupSax as long as the namespace with the
1609
 * given prefix is in scope.
1610
 *
1611
 * @param ctxt  parser context
1612
 * @param prefix  namespace prefix
1613
 * @param saxData  extra data for SAX handler
1614
 * @returns the data on success, NULL if no namespace was found.
1615
 */
1616
int
1617
xmlParserNsUpdateSax(xmlParserCtxt *ctxt, const xmlChar *prefix,
1618
0
                     void *saxData) {
1619
0
    xmlHashedString hprefix;
1620
0
    int nsIndex;
1621
1622
0
    if (prefix == ctxt->str_xml)
1623
0
        return(-1);
1624
1625
0
    hprefix.name = prefix;
1626
0
    if (prefix != NULL)
1627
0
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1628
0
    else
1629
0
        hprefix.hashValue = 0;
1630
0
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1631
0
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1632
0
        return(-1);
1633
1634
0
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1635
0
    return(0);
1636
0
}
1637
1638
/**
1639
 * Grows the namespace tables.
1640
 *
1641
 * @param ctxt  parser context
1642
 * @returns 0 on success, -1 if a memory allocation failed.
1643
 */
1644
static int
1645
0
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1646
0
    const xmlChar **table;
1647
0
    xmlParserNsExtra *extra;
1648
0
    int newSize;
1649
1650
0
    newSize = xmlGrowCapacity(ctxt->nsMax,
1651
0
                              sizeof(table[0]) + sizeof(extra[0]),
1652
0
                              16, XML_MAX_ITEMS);
1653
0
    if (newSize < 0)
1654
0
        goto error;
1655
1656
0
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1657
0
    if (table == NULL)
1658
0
        goto error;
1659
0
    ctxt->nsTab = table;
1660
1661
0
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1662
0
    if (extra == NULL)
1663
0
        goto error;
1664
0
    ctxt->nsdb->extra = extra;
1665
1666
0
    ctxt->nsMax = newSize;
1667
0
    return(0);
1668
1669
0
error:
1670
0
    xmlErrMemory(ctxt);
1671
0
    return(-1);
1672
0
}
1673
1674
/**
1675
 * Push a new namespace on the table.
1676
 *
1677
 * @param ctxt  parser context
1678
 * @param prefix  prefix with hash value
1679
 * @param uri  uri with hash value
1680
 * @param saxData  extra data for SAX handler
1681
 * @param defAttr  whether the namespace comes from a default attribute
1682
 * @returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1683
 * -1 if a memory allocation failed.
1684
 */
1685
static int
1686
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1687
0
                const xmlHashedString *uri, void *saxData, int defAttr) {
1688
0
    xmlParserNsBucket *bucket = NULL;
1689
0
    xmlParserNsExtra *extra;
1690
0
    const xmlChar **ns;
1691
0
    unsigned hashValue, nsIndex, oldIndex;
1692
1693
0
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1694
0
        return(0);
1695
1696
0
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1697
0
        xmlErrMemory(ctxt);
1698
0
        return(-1);
1699
0
    }
1700
1701
    /*
1702
     * Default namespace and 'xml' namespace
1703
     */
1704
0
    if ((prefix == NULL) || (prefix->name == NULL)) {
1705
0
        oldIndex = ctxt->nsdb->defaultNsIndex;
1706
1707
0
        if (oldIndex != INT_MAX) {
1708
0
            extra = &ctxt->nsdb->extra[oldIndex];
1709
1710
0
            if (extra->elementId == ctxt->nsdb->elementId) {
1711
0
                if (defAttr == 0)
1712
0
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1713
0
                return(0);
1714
0
            }
1715
1716
0
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1717
0
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1718
0
                return(0);
1719
0
        }
1720
1721
0
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1722
0
        goto populate_entry;
1723
0
    }
1724
1725
    /*
1726
     * Hash table lookup
1727
     */
1728
0
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1729
0
    if (oldIndex != INT_MAX) {
1730
0
        extra = &ctxt->nsdb->extra[oldIndex];
1731
1732
        /*
1733
         * Check for duplicate definitions on the same element.
1734
         */
1735
0
        if (extra->elementId == ctxt->nsdb->elementId) {
1736
0
            if (defAttr == 0)
1737
0
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1738
0
            return(0);
1739
0
        }
1740
1741
0
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1742
0
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1743
0
            return(0);
1744
1745
0
        bucket->index = ctxt->nsNr;
1746
0
        goto populate_entry;
1747
0
    }
1748
1749
    /*
1750
     * Insert new bucket
1751
     */
1752
1753
0
    hashValue = prefix->hashValue;
1754
1755
    /*
1756
     * Grow hash table, 50% fill factor
1757
     */
1758
0
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1759
0
        xmlParserNsBucket *newHash;
1760
0
        unsigned newSize, i, index;
1761
1762
0
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1763
0
            xmlErrMemory(ctxt);
1764
0
            return(-1);
1765
0
        }
1766
0
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1767
0
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1768
0
        if (newHash == NULL) {
1769
0
            xmlErrMemory(ctxt);
1770
0
            return(-1);
1771
0
        }
1772
0
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1773
1774
0
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1775
0
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1776
0
            unsigned newIndex;
1777
1778
0
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1779
0
                continue;
1780
0
            newIndex = hv & (newSize - 1);
1781
1782
0
            while (newHash[newIndex].hashValue != 0) {
1783
0
                newIndex++;
1784
0
                if (newIndex == newSize)
1785
0
                    newIndex = 0;
1786
0
            }
1787
1788
0
            newHash[newIndex] = ctxt->nsdb->hash[i];
1789
0
        }
1790
1791
0
        xmlFree(ctxt->nsdb->hash);
1792
0
        ctxt->nsdb->hash = newHash;
1793
0
        ctxt->nsdb->hashSize = newSize;
1794
1795
        /*
1796
         * Relookup
1797
         */
1798
0
        index = hashValue & (newSize - 1);
1799
1800
0
        while (newHash[index].hashValue != 0) {
1801
0
            index++;
1802
0
            if (index == newSize)
1803
0
                index = 0;
1804
0
        }
1805
1806
0
        bucket = &newHash[index];
1807
0
    }
1808
1809
0
    bucket->hashValue = hashValue;
1810
0
    bucket->index = ctxt->nsNr;
1811
0
    ctxt->nsdb->hashElems++;
1812
0
    oldIndex = INT_MAX;
1813
1814
0
populate_entry:
1815
0
    nsIndex = ctxt->nsNr;
1816
1817
0
    ns = &ctxt->nsTab[nsIndex * 2];
1818
0
    ns[0] = prefix ? prefix->name : NULL;
1819
0
    ns[1] = uri->name;
1820
1821
0
    extra = &ctxt->nsdb->extra[nsIndex];
1822
0
    extra->saxData = saxData;
1823
0
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1824
0
    extra->uriHashValue = uri->hashValue;
1825
0
    extra->elementId = ctxt->nsdb->elementId;
1826
0
    extra->oldIndex = oldIndex;
1827
1828
0
    ctxt->nsNr++;
1829
1830
0
    return(1);
1831
0
}
1832
1833
/**
1834
 * Pops the top `nr` namespaces and restores the hash table.
1835
 *
1836
 * @param ctxt  an XML parser context
1837
 * @param nr  the number to pop
1838
 * @returns the number of namespaces popped.
1839
 */
1840
static int
1841
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1842
0
{
1843
0
    int i;
1844
1845
    /* assert(nr <= ctxt->nsNr); */
1846
1847
0
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1848
0
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1849
0
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1850
1851
0
        if (prefix == NULL) {
1852
0
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1853
0
        } else {
1854
0
            xmlHashedString hprefix;
1855
0
            xmlParserNsBucket *bucket = NULL;
1856
1857
0
            hprefix.name = prefix;
1858
0
            hprefix.hashValue = extra->prefixHashValue;
1859
0
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1860
            /* assert(bucket && bucket->hashValue); */
1861
0
            bucket->index = extra->oldIndex;
1862
0
        }
1863
0
    }
1864
1865
0
    ctxt->nsNr -= nr;
1866
0
    return(nr);
1867
0
}
1868
1869
static int
1870
0
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1871
0
    const xmlChar **atts;
1872
0
    unsigned *attallocs;
1873
0
    int newSize;
1874
1875
0
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1876
0
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1877
0
                              10, XML_MAX_ATTRS);
1878
0
    if (newSize < 0) {
1879
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1880
0
                    "Maximum number of attributes exceeded");
1881
0
        return(-1);
1882
0
    }
1883
1884
0
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1885
0
    if (atts == NULL)
1886
0
        goto mem_error;
1887
0
    ctxt->atts = atts;
1888
1889
0
    attallocs = xmlRealloc(ctxt->attallocs,
1890
0
                           newSize * sizeof(attallocs[0]));
1891
0
    if (attallocs == NULL)
1892
0
        goto mem_error;
1893
0
    ctxt->attallocs = attallocs;
1894
1895
0
    ctxt->maxatts = newSize * 5;
1896
1897
0
    return(0);
1898
1899
0
mem_error:
1900
0
    xmlErrMemory(ctxt);
1901
0
    return(-1);
1902
0
}
1903
1904
/**
1905
 * Pushes a new parser input on top of the input stack
1906
 *
1907
 * @param ctxt  an XML parser context
1908
 * @param value  the parser input
1909
 * @returns -1 in case of error, the index in the stack otherwise
1910
 */
1911
int
1912
xmlCtxtPushInput(xmlParserCtxt *ctxt, xmlParserInput *value)
1913
45.1k
{
1914
45.1k
    char *directory = NULL;
1915
45.1k
    int maxDepth;
1916
1917
45.1k
    if ((ctxt == NULL) || (value == NULL))
1918
0
        return(-1);
1919
1920
45.1k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1921
1922
45.1k
    if (ctxt->inputNr >= ctxt->inputMax) {
1923
1.48k
        xmlParserInputPtr *tmp;
1924
1.48k
        int newSize;
1925
1926
1.48k
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1927
1.48k
                                  5, maxDepth);
1928
1.48k
        if (newSize < 0) {
1929
0
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1930
0
                           "Maximum entity nesting depth exceeded");
1931
0
            xmlHaltParser(ctxt);
1932
0
            return(-1);
1933
0
        }
1934
1.48k
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1935
1.48k
        if (tmp == NULL) {
1936
0
            xmlErrMemory(ctxt);
1937
0
            return(-1);
1938
0
        }
1939
1.48k
        ctxt->inputTab = tmp;
1940
1.48k
        ctxt->inputMax = newSize;
1941
1.48k
    }
1942
1943
45.1k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1944
27.4k
        directory = xmlParserGetDirectory(value->filename);
1945
27.4k
        if (directory == NULL) {
1946
0
            xmlErrMemory(ctxt);
1947
0
            return(-1);
1948
0
        }
1949
27.4k
    }
1950
1951
45.1k
    if (ctxt->input_id >= INT_MAX) {
1952
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1953
0
        return(-1);
1954
0
    }
1955
1956
45.1k
    ctxt->inputTab[ctxt->inputNr] = value;
1957
45.1k
    ctxt->input = value;
1958
1959
45.1k
    if (ctxt->inputNr == 0) {
1960
27.4k
        xmlFree(ctxt->directory);
1961
27.4k
        ctxt->directory = directory;
1962
27.4k
    }
1963
1964
    /*
1965
     * The input ID is unused internally, but there are entity
1966
     * loaders in downstream code that detect the main document
1967
     * by checking for "input_id == 1".
1968
     */
1969
45.1k
    value->id = ctxt->input_id++;
1970
1971
45.1k
    return(ctxt->inputNr++);
1972
45.1k
}
1973
1974
/**
1975
 * Pops the top parser input from the input stack
1976
 *
1977
 * @param ctxt  an XML parser context
1978
 * @returns the input just removed
1979
 */
1980
xmlParserInput *
1981
xmlCtxtPopInput(xmlParserCtxt *ctxt)
1982
100k
{
1983
100k
    xmlParserInputPtr ret;
1984
1985
100k
    if (ctxt == NULL)
1986
0
        return(NULL);
1987
100k
    if (ctxt->inputNr <= 0)
1988
54.8k
        return (NULL);
1989
45.1k
    ctxt->inputNr--;
1990
45.1k
    if (ctxt->inputNr > 0)
1991
17.7k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1992
27.4k
    else
1993
27.4k
        ctxt->input = NULL;
1994
45.1k
    ret = ctxt->inputTab[ctxt->inputNr];
1995
45.1k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1996
45.1k
    return (ret);
1997
100k
}
1998
1999
/**
2000
 * Pushes a new element node on top of the node stack
2001
 *
2002
 * @deprecated Internal function, do not use.
2003
 *
2004
 * @param ctxt  an XML parser context
2005
 * @param value  the element node
2006
 * @returns -1 in case of error, the index in the stack otherwise
2007
 */
2008
int
2009
nodePush(xmlParserCtxt *ctxt, xmlNode *value)
2010
0
{
2011
0
    if (ctxt == NULL)
2012
0
        return(0);
2013
2014
0
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2015
0
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2016
0
        xmlNodePtr *tmp;
2017
0
        int newSize;
2018
2019
0
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
2020
0
                                  10, maxDepth);
2021
0
        if (newSize < 0) {
2022
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2023
0
                    "Excessive depth in document: %d,"
2024
0
                    " use XML_PARSE_HUGE option\n",
2025
0
                    ctxt->nodeNr);
2026
0
            xmlHaltParser(ctxt);
2027
0
            return(-1);
2028
0
        }
2029
2030
0
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2031
0
        if (tmp == NULL) {
2032
0
            xmlErrMemory(ctxt);
2033
0
            return (-1);
2034
0
        }
2035
0
        ctxt->nodeTab = tmp;
2036
0
  ctxt->nodeMax = newSize;
2037
0
    }
2038
2039
0
    ctxt->nodeTab[ctxt->nodeNr] = value;
2040
0
    ctxt->node = value;
2041
0
    return (ctxt->nodeNr++);
2042
0
}
2043
2044
/**
2045
 * Pops the top element node from the node stack
2046
 *
2047
 * @deprecated Internal function, do not use.
2048
 *
2049
 * @param ctxt  an XML parser context
2050
 * @returns the node just removed
2051
 */
2052
xmlNode *
2053
nodePop(xmlParserCtxt *ctxt)
2054
45.7k
{
2055
45.7k
    xmlNodePtr ret;
2056
2057
45.7k
    if (ctxt == NULL) return(NULL);
2058
45.7k
    if (ctxt->nodeNr <= 0)
2059
45.7k
        return (NULL);
2060
0
    ctxt->nodeNr--;
2061
0
    if (ctxt->nodeNr > 0)
2062
0
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2063
0
    else
2064
0
        ctxt->node = NULL;
2065
0
    ret = ctxt->nodeTab[ctxt->nodeNr];
2066
0
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2067
0
    return (ret);
2068
45.7k
}
2069
2070
/**
2071
 * Pushes a new element name/prefix/URL on top of the name stack
2072
 *
2073
 * @param ctxt  an XML parser context
2074
 * @param value  the element name
2075
 * @param prefix  the element prefix
2076
 * @param URI  the element namespace name
2077
 * @param line  the current line number for error messages
2078
 * @param nsNr  the number of namespaces pushed on the namespace table
2079
 * @returns -1 in case of error, the index in the stack otherwise
2080
 */
2081
static int
2082
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2083
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2084
1.26M
{
2085
1.26M
    xmlStartTag *tag;
2086
2087
1.26M
    if (ctxt->nameNr >= ctxt->nameMax) {
2088
38.8k
        const xmlChar **tmp;
2089
38.8k
        xmlStartTag *tmp2;
2090
38.8k
        int newSize;
2091
2092
38.8k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2093
38.8k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2094
38.8k
                                  10, XML_MAX_ITEMS);
2095
38.8k
        if (newSize < 0)
2096
0
            goto mem_error;
2097
2098
38.8k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2099
38.8k
        if (tmp == NULL)
2100
0
      goto mem_error;
2101
38.8k
  ctxt->nameTab = tmp;
2102
2103
38.8k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2104
38.8k
        if (tmp2 == NULL)
2105
0
      goto mem_error;
2106
38.8k
  ctxt->pushTab = tmp2;
2107
2108
38.8k
        ctxt->nameMax = newSize;
2109
1.22M
    } else if (ctxt->pushTab == NULL) {
2110
12.2k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2111
12.2k
        if (ctxt->pushTab == NULL)
2112
0
            goto mem_error;
2113
12.2k
    }
2114
1.26M
    ctxt->nameTab[ctxt->nameNr] = value;
2115
1.26M
    ctxt->name = value;
2116
1.26M
    tag = &ctxt->pushTab[ctxt->nameNr];
2117
1.26M
    tag->prefix = prefix;
2118
1.26M
    tag->URI = URI;
2119
1.26M
    tag->line = line;
2120
1.26M
    tag->nsNr = nsNr;
2121
1.26M
    return (ctxt->nameNr++);
2122
0
mem_error:
2123
0
    xmlErrMemory(ctxt);
2124
0
    return (-1);
2125
1.26M
}
2126
#ifdef LIBXML_PUSH_ENABLED
2127
/**
2128
 * Pops the top element/prefix/URI name from the name stack
2129
 *
2130
 * @param ctxt  an XML parser context
2131
 * @returns the name just removed
2132
 */
2133
static const xmlChar *
2134
nameNsPop(xmlParserCtxtPtr ctxt)
2135
0
{
2136
0
    const xmlChar *ret;
2137
2138
0
    if (ctxt->nameNr <= 0)
2139
0
        return (NULL);
2140
0
    ctxt->nameNr--;
2141
0
    if (ctxt->nameNr > 0)
2142
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2143
0
    else
2144
0
        ctxt->name = NULL;
2145
0
    ret = ctxt->nameTab[ctxt->nameNr];
2146
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
2147
0
    return (ret);
2148
0
}
2149
#endif /* LIBXML_PUSH_ENABLED */
2150
2151
/**
2152
 * Pops the top element name from the name stack
2153
 *
2154
 * @deprecated Internal function, do not use.
2155
 *
2156
 * @param ctxt  an XML parser context
2157
 * @returns the name just removed
2158
 */
2159
static const xmlChar *
2160
namePop(xmlParserCtxtPtr ctxt)
2161
298k
{
2162
298k
    const xmlChar *ret;
2163
2164
298k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2165
0
        return (NULL);
2166
298k
    ctxt->nameNr--;
2167
298k
    if (ctxt->nameNr > 0)
2168
296k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2169
1.92k
    else
2170
1.92k
        ctxt->name = NULL;
2171
298k
    ret = ctxt->nameTab[ctxt->nameNr];
2172
298k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2173
298k
    return (ret);
2174
298k
}
2175
2176
1.57M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2177
1.57M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2178
53.9k
        int *tmp;
2179
53.9k
        int newSize;
2180
2181
53.9k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2182
53.9k
                                  10, XML_MAX_ITEMS);
2183
53.9k
        if (newSize < 0) {
2184
0
      xmlErrMemory(ctxt);
2185
0
      return(-1);
2186
0
        }
2187
2188
53.9k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2189
53.9k
        if (tmp == NULL) {
2190
0
      xmlErrMemory(ctxt);
2191
0
      return(-1);
2192
0
  }
2193
53.9k
  ctxt->spaceTab = tmp;
2194
2195
53.9k
        ctxt->spaceMax = newSize;
2196
53.9k
    }
2197
1.57M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2198
1.57M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2199
1.57M
    return(ctxt->spaceNr++);
2200
1.57M
}
2201
2202
608k
static int spacePop(xmlParserCtxtPtr ctxt) {
2203
608k
    int ret;
2204
608k
    if (ctxt->spaceNr <= 0) return(0);
2205
608k
    ctxt->spaceNr--;
2206
608k
    if (ctxt->spaceNr > 0)
2207
608k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2208
0
    else
2209
0
        ctxt->space = &ctxt->spaceTab[0];
2210
608k
    ret = ctxt->spaceTab[ctxt->spaceNr];
2211
608k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2212
608k
    return(ret);
2213
608k
}
2214
2215
/*
2216
 * Macros for accessing the content. Those should be used only by the parser,
2217
 * and not exported.
2218
 *
2219
 * Dirty macros, i.e. one often need to make assumption on the context to
2220
 * use them
2221
 *
2222
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2223
 *           To be used with extreme caution since operations consuming
2224
 *           characters may move the input buffer to a different location !
2225
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2226
 *           This should be used internally by the parser
2227
 *           only to compare to ASCII values otherwise it would break when
2228
 *           running with UTF-8 encoding.
2229
 *   RAW     same as CUR but in the input buffer, bypass any token
2230
 *           extraction that may have been done
2231
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2232
 *           to compare on ASCII based substring.
2233
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2234
 *           strings without newlines within the parser.
2235
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2236
 *           defined char within the parser.
2237
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2238
 *
2239
 *   NEXT    Skip to the next character, this does the proper decoding
2240
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2241
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2242
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2243
 *            the index
2244
 *   GROW, SHRINK  handling of input buffers
2245
 */
2246
2247
18.3M
#define RAW (*ctxt->input->cur)
2248
265M
#define CUR (*ctxt->input->cur)
2249
8.21M
#define NXT(val) ctxt->input->cur[(val)]
2250
572M
#define CUR_PTR ctxt->input->cur
2251
0
#define BASE_PTR ctxt->input->base
2252
2253
#define CMP4( s, c1, c2, c3, c4 ) \
2254
2.42M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2255
1.22M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2256
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2257
2.30M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2258
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2259
1.89M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2260
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2261
1.50M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2262
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2263
1.32M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2264
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2265
616k
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2266
616k
    ((unsigned char *) s)[ 8 ] == c9 )
2267
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2268
1.10k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2269
1.10k
    ((unsigned char *) s)[ 9 ] == c10 )
2270
2271
3.42M
#define SKIP(val) do {             \
2272
3.42M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2273
3.42M
    if (*ctxt->input->cur == 0)           \
2274
3.42M
        xmlParserGrow(ctxt);           \
2275
3.42M
  } while (0)
2276
2277
#define SKIPL(val) do {             \
2278
    int skipl;                \
2279
    for(skipl=0; skipl<val; skipl++) {          \
2280
  if (*(ctxt->input->cur) == '\n') {        \
2281
  ctxt->input->line++; ctxt->input->col = 1;      \
2282
  } else ctxt->input->col++;          \
2283
  ctxt->input->cur++;           \
2284
    }                 \
2285
    if (*ctxt->input->cur == 0)           \
2286
        xmlParserGrow(ctxt);            \
2287
  } while (0)
2288
2289
#define SHRINK \
2290
2.76M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2291
2.76M
  xmlParserShrink(ctxt);
2292
2293
#define GROW \
2294
13.7M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2295
13.7M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2296
318k
  xmlParserGrow(ctxt);
2297
2298
4.68M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2299
2300
514k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2301
2302
6.40M
#define NEXT xmlNextChar(ctxt)
2303
2304
1.79M
#define NEXT1 {               \
2305
1.79M
  ctxt->input->col++;           \
2306
1.79M
  ctxt->input->cur++;           \
2307
1.79M
  if (*ctxt->input->cur == 0)         \
2308
1.79M
      xmlParserGrow(ctxt);           \
2309
1.79M
    }
2310
2311
348M
#define NEXTL(l) do {             \
2312
348M
    if (*(ctxt->input->cur) == '\n') {         \
2313
3.39M
  ctxt->input->line++; ctxt->input->col = 1;      \
2314
344M
    } else ctxt->input->col++;           \
2315
348M
    ctxt->input->cur += l;        \
2316
348M
  } while (0)
2317
2318
#define COPY_BUF(b, i, v)           \
2319
80.5M
    if (v < 0x80) b[i++] = v;           \
2320
80.5M
    else i += xmlCopyCharMultiByte(&b[i],v)
2321
2322
static int
2323
79.6M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2324
79.6M
    int c = xmlCurrentChar(ctxt, len);
2325
2326
79.6M
    if (c == XML_INVALID_CHAR)
2327
1.36M
        c = 0xFFFD; /* replacement character */
2328
2329
79.6M
    return(c);
2330
79.6M
}
2331
2332
/**
2333
 * Skip whitespace in the input stream.
2334
 *
2335
 * @deprecated Internal function, do not use.
2336
 *
2337
 * @param ctxt  the XML parser context
2338
 * @returns the number of space chars skipped
2339
 */
2340
int
2341
5.20M
xmlSkipBlankChars(xmlParserCtxt *ctxt) {
2342
5.20M
    const xmlChar *cur;
2343
5.20M
    int res = 0;
2344
2345
5.20M
    cur = ctxt->input->cur;
2346
5.59M
    while (IS_BLANK_CH(*cur)) {
2347
5.59M
        if (*cur == '\n') {
2348
632k
            ctxt->input->line++; ctxt->input->col = 1;
2349
4.95M
        } else {
2350
4.95M
            ctxt->input->col++;
2351
4.95M
        }
2352
5.59M
        cur++;
2353
5.59M
        if (res < INT_MAX)
2354
5.59M
            res++;
2355
5.59M
        if (*cur == 0) {
2356
1.25k
            ctxt->input->cur = cur;
2357
1.25k
            xmlParserGrow(ctxt);
2358
1.25k
            cur = ctxt->input->cur;
2359
1.25k
        }
2360
5.59M
    }
2361
5.20M
    ctxt->input->cur = cur;
2362
2363
5.20M
    if (res > 4)
2364
26.0k
        GROW;
2365
2366
5.20M
    return(res);
2367
5.20M
}
2368
2369
static void
2370
0
xmlPopPE(xmlParserCtxtPtr ctxt) {
2371
0
    unsigned long consumed;
2372
0
    xmlEntityPtr ent;
2373
2374
0
    ent = ctxt->input->entity;
2375
2376
0
    ent->flags &= ~XML_ENT_EXPANDING;
2377
2378
0
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2379
0
        int result;
2380
2381
        /*
2382
         * Read the rest of the stream in case of errors. We want
2383
         * to account for the whole entity size.
2384
         */
2385
0
        do {
2386
0
            ctxt->input->cur = ctxt->input->end;
2387
0
            xmlParserShrink(ctxt);
2388
0
            result = xmlParserGrow(ctxt);
2389
0
        } while (result > 0);
2390
2391
0
        consumed = ctxt->input->consumed;
2392
0
        xmlSaturatedAddSizeT(&consumed,
2393
0
                             ctxt->input->end - ctxt->input->base);
2394
2395
0
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2396
2397
        /*
2398
         * Add to sizeentities when parsing an external entity
2399
         * for the first time.
2400
         */
2401
0
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2402
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2403
0
        }
2404
2405
0
        ent->flags |= XML_ENT_CHECKED;
2406
0
    }
2407
2408
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2409
2410
0
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2411
2412
0
    GROW;
2413
0
}
2414
2415
/**
2416
 * Skip whitespace in the input stream, also handling parameter
2417
 * entities.
2418
 *
2419
 * @param ctxt  the XML parser context
2420
 * @returns the number of space chars skipped
2421
 */
2422
static int
2423
514k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2424
514k
    int res = 0;
2425
514k
    int inParam;
2426
514k
    int expandParam;
2427
2428
514k
    inParam = PARSER_IN_PE(ctxt);
2429
514k
    expandParam = PARSER_EXTERNAL(ctxt);
2430
2431
514k
    if (!inParam && !expandParam)
2432
514k
        return(xmlSkipBlankChars(ctxt));
2433
2434
    /*
2435
     * It's Okay to use CUR/NEXT here since all the blanks are on
2436
     * the ASCII range.
2437
     */
2438
0
    while (PARSER_STOPPED(ctxt) == 0) {
2439
0
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2440
0
            NEXT;
2441
0
        } else if (CUR == '%') {
2442
0
            if ((expandParam == 0) ||
2443
0
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2444
0
                break;
2445
2446
            /*
2447
             * Expand parameter entity. We continue to consume
2448
             * whitespace at the start of the entity and possible
2449
             * even consume the whole entity and pop it. We might
2450
             * even pop multiple PEs in this loop.
2451
             */
2452
0
            xmlParsePERefInternal(ctxt, 0);
2453
2454
0
            inParam = PARSER_IN_PE(ctxt);
2455
0
            expandParam = PARSER_EXTERNAL(ctxt);
2456
0
        } else if (CUR == 0) {
2457
0
            if (inParam == 0)
2458
0
                break;
2459
2460
            /*
2461
             * Don't pop parameter entities that start a markup
2462
             * declaration to detect Well-formedness constraint:
2463
             * PE Between Declarations.
2464
             */
2465
0
            if (ctxt->input->flags & XML_INPUT_MARKUP_DECL)
2466
0
                break;
2467
2468
0
            xmlPopPE(ctxt);
2469
2470
0
            inParam = PARSER_IN_PE(ctxt);
2471
0
            expandParam = PARSER_EXTERNAL(ctxt);
2472
0
        } else {
2473
0
            break;
2474
0
        }
2475
2476
        /*
2477
         * Also increase the counter when entering or exiting a PERef.
2478
         * The spec says: "When a parameter-entity reference is recognized
2479
         * in the DTD and included, its replacement text MUST be enlarged
2480
         * by the attachment of one leading and one following space (#x20)
2481
         * character."
2482
         */
2483
0
        if (res < INT_MAX)
2484
0
            res++;
2485
0
    }
2486
2487
0
    return(res);
2488
514k
}
2489
2490
/************************************************************************
2491
 *                  *
2492
 *    Commodity functions to handle entities      *
2493
 *                  *
2494
 ************************************************************************/
2495
2496
/**
2497
 * @deprecated Internal function, don't use.
2498
 *
2499
 * @param ctxt  an XML parser context
2500
 * @returns the current xmlChar in the parser context
2501
 */
2502
xmlChar
2503
0
xmlPopInput(xmlParserCtxt *ctxt) {
2504
0
    xmlParserInputPtr input;
2505
2506
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2507
0
    input = xmlCtxtPopInput(ctxt);
2508
0
    xmlFreeInputStream(input);
2509
0
    if (*ctxt->input->cur == 0)
2510
0
        xmlParserGrow(ctxt);
2511
0
    return(CUR);
2512
0
}
2513
2514
/**
2515
 * Push an input stream onto the stack.
2516
 *
2517
 * @deprecated Internal function, don't use.
2518
 *
2519
 * @param ctxt  an XML parser context
2520
 * @param input  an XML parser input fragment (entity, XML fragment ...).
2521
 * @returns -1 in case of error or the index in the input stack
2522
 */
2523
int
2524
0
xmlPushInput(xmlParserCtxt *ctxt, xmlParserInput *input) {
2525
0
    int ret;
2526
2527
0
    if ((ctxt == NULL) || (input == NULL))
2528
0
        return(-1);
2529
2530
0
    ret = xmlCtxtPushInput(ctxt, input);
2531
0
    if (ret >= 0)
2532
0
        GROW;
2533
0
    return(ret);
2534
0
}
2535
2536
/**
2537
 * Parse a numeric character reference. Always consumes '&'.
2538
 *
2539
 * @deprecated Internal function, don't use.
2540
 *
2541
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2542
 *                      '&#x' [0-9a-fA-F]+ ';'
2543
 *
2544
 * [ WFC: Legal Character ]
2545
 * Characters referred to using character references must match the
2546
 * production for Char.
2547
 *
2548
 * @param ctxt  an XML parser context
2549
 * @returns the value parsed (as an int), 0 in case of error
2550
 */
2551
int
2552
278k
xmlParseCharRef(xmlParserCtxt *ctxt) {
2553
278k
    int val = 0;
2554
278k
    int count = 0;
2555
2556
    /*
2557
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2558
     */
2559
278k
    if ((RAW == '&') && (NXT(1) == '#') &&
2560
278k
        (NXT(2) == 'x')) {
2561
240k
  SKIP(3);
2562
240k
  GROW;
2563
865k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2564
658k
      if (count++ > 20) {
2565
7.41k
    count = 0;
2566
7.41k
    GROW;
2567
7.41k
      }
2568
658k
      if ((RAW >= '0') && (RAW <= '9'))
2569
411k
          val = val * 16 + (CUR - '0');
2570
246k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2571
126k
          val = val * 16 + (CUR - 'a') + 10;
2572
119k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2573
86.2k
          val = val * 16 + (CUR - 'A') + 10;
2574
33.3k
      else {
2575
33.3k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2576
33.3k
    val = 0;
2577
33.3k
    break;
2578
33.3k
      }
2579
624k
      if (val > 0x110000)
2580
97.5k
          val = 0x110000;
2581
2582
624k
      NEXT;
2583
624k
      count++;
2584
624k
  }
2585
240k
  if (RAW == ';') {
2586
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2587
206k
      ctxt->input->col++;
2588
206k
      ctxt->input->cur++;
2589
206k
  }
2590
240k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2591
38.3k
  SKIP(2);
2592
38.3k
  GROW;
2593
196k
  while (RAW != ';') { /* loop blocked by count */
2594
163k
      if (count++ > 20) {
2595
6.07k
    count = 0;
2596
6.07k
    GROW;
2597
6.07k
      }
2598
163k
      if ((RAW >= '0') && (RAW <= '9'))
2599
158k
          val = val * 10 + (CUR - '0');
2600
5.34k
      else {
2601
5.34k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2602
5.34k
    val = 0;
2603
5.34k
    break;
2604
5.34k
      }
2605
158k
      if (val > 0x110000)
2606
64.7k
          val = 0x110000;
2607
2608
158k
      NEXT;
2609
158k
      count++;
2610
158k
  }
2611
38.3k
  if (RAW == ';') {
2612
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2613
33.0k
      ctxt->input->col++;
2614
33.0k
      ctxt->input->cur++;
2615
33.0k
  }
2616
38.3k
    } else {
2617
0
        if (RAW == '&')
2618
0
            SKIP(1);
2619
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2620
0
    }
2621
2622
    /*
2623
     * [ WFC: Legal Character ]
2624
     * Characters referred to using character references must match the
2625
     * production for Char.
2626
     */
2627
278k
    if (val >= 0x110000) {
2628
588
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2629
588
                "xmlParseCharRef: character reference out of bounds\n",
2630
588
          val);
2631
588
        val = 0xFFFD;
2632
277k
    } else if (!IS_CHAR(val)) {
2633
39.6k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2634
39.6k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2635
39.6k
                    val);
2636
39.6k
    }
2637
278k
    return(val);
2638
278k
}
2639
2640
/**
2641
 * Parse Reference declarations, variant parsing from a string rather
2642
 * than an an input flow.
2643
 *
2644
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2645
 *                      '&#x' [0-9a-fA-F]+ ';'
2646
 *
2647
 * [ WFC: Legal Character ]
2648
 * Characters referred to using character references must match the
2649
 * production for Char.
2650
 *
2651
 * @param ctxt  an XML parser context
2652
 * @param str  a pointer to an index in the string
2653
 * @returns the value parsed (as an int), 0 in case of error, str will be
2654
 *         updated to the current value of the index
2655
 */
2656
static int
2657
250k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2658
250k
    const xmlChar *ptr;
2659
250k
    xmlChar cur;
2660
250k
    int val = 0;
2661
2662
250k
    if ((str == NULL) || (*str == NULL)) return(0);
2663
250k
    ptr = *str;
2664
250k
    cur = *ptr;
2665
250k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2666
234k
  ptr += 3;
2667
234k
  cur = *ptr;
2668
834k
  while (cur != ';') { /* Non input consuming loop */
2669
606k
      if ((cur >= '0') && (cur <= '9'))
2670
180k
          val = val * 16 + (cur - '0');
2671
425k
      else if ((cur >= 'a') && (cur <= 'f'))
2672
281k
          val = val * 16 + (cur - 'a') + 10;
2673
143k
      else if ((cur >= 'A') && (cur <= 'F'))
2674
137k
          val = val * 16 + (cur - 'A') + 10;
2675
6.06k
      else {
2676
6.06k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2677
6.06k
    val = 0;
2678
6.06k
    break;
2679
6.06k
      }
2680
600k
      if (val > 0x110000)
2681
9.67k
          val = 0x110000;
2682
2683
600k
      ptr++;
2684
600k
      cur = *ptr;
2685
600k
  }
2686
234k
  if (cur == ';')
2687
228k
      ptr++;
2688
234k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2689
16.4k
  ptr += 2;
2690
16.4k
  cur = *ptr;
2691
76.8k
  while (cur != ';') { /* Non input consuming loops */
2692
63.6k
      if ((cur >= '0') && (cur <= '9'))
2693
60.4k
          val = val * 10 + (cur - '0');
2694
3.15k
      else {
2695
3.15k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2696
3.15k
    val = 0;
2697
3.15k
    break;
2698
3.15k
      }
2699
60.4k
      if (val > 0x110000)
2700
4.62k
          val = 0x110000;
2701
2702
60.4k
      ptr++;
2703
60.4k
      cur = *ptr;
2704
60.4k
  }
2705
16.4k
  if (cur == ';')
2706
13.2k
      ptr++;
2707
16.4k
    } else {
2708
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2709
0
  return(0);
2710
0
    }
2711
250k
    *str = ptr;
2712
2713
    /*
2714
     * [ WFC: Legal Character ]
2715
     * Characters referred to using character references must match the
2716
     * production for Char.
2717
     */
2718
250k
    if (val >= 0x110000) {
2719
897
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2720
897
                "xmlParseStringCharRef: character reference out of bounds\n",
2721
897
                val);
2722
249k
    } else if (IS_CHAR(val)) {
2723
237k
        return(val);
2724
237k
    } else {
2725
11.9k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2726
11.9k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2727
11.9k
        val);
2728
11.9k
    }
2729
12.8k
    return(0);
2730
250k
}
2731
2732
/**
2733
 *     [69] PEReference ::= '%' Name ';'
2734
 *
2735
 * @deprecated Internal function, do not use.
2736
 *
2737
 * [ WFC: No Recursion ]
2738
 * A parsed entity must not contain a recursive
2739
 * reference to itself, either directly or indirectly.
2740
 *
2741
 * [ WFC: Entity Declared ]
2742
 * In a document without any DTD, a document with only an internal DTD
2743
 * subset which contains no parameter entity references, or a document
2744
 * with "standalone='yes'", ...  ... The declaration of a parameter
2745
 * entity must precede any reference to it...
2746
 *
2747
 * [ VC: Entity Declared ]
2748
 * In a document with an external subset or external parameter entities
2749
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2750
 * must precede any reference to it...
2751
 *
2752
 * [ WFC: In DTD ]
2753
 * Parameter-entity references may only appear in the DTD.
2754
 * NOTE: misleading but this is handled.
2755
 *
2756
 * A PEReference may have been detected in the current input stream
2757
 * the handling is done accordingly to
2758
 *      http://www.w3.org/TR/REC-xml#entproc
2759
 * i.e.
2760
 *   - Included in literal in entity values
2761
 *   - Included as Parameter Entity reference within DTDs
2762
 * @param ctxt  the parser context
2763
 */
2764
void
2765
0
xmlParserHandlePEReference(xmlParserCtxt *ctxt) {
2766
0
    xmlParsePERefInternal(ctxt, 0);
2767
0
}
2768
2769
/**
2770
 * @deprecated Internal function, don't use.
2771
 *
2772
 * @param ctxt  the parser context
2773
 * @param str  the input string
2774
 * @param len  the string length
2775
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2776
 * @param end  an end marker xmlChar, 0 if none
2777
 * @param end2  an end marker xmlChar, 0 if none
2778
 * @param end3  an end marker xmlChar, 0 if none
2779
 * @returns A newly allocated string with the substitution done. The caller
2780
 *      must deallocate it !
2781
 */
2782
xmlChar *
2783
xmlStringLenDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str, int len,
2784
                           int what ATTRIBUTE_UNUSED,
2785
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2786
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2787
0
        return(NULL);
2788
2789
0
    if ((str[len] != 0) ||
2790
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2791
0
        return(NULL);
2792
2793
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2794
0
}
2795
2796
/**
2797
 * @deprecated Internal function, don't use.
2798
 *
2799
 * @param ctxt  the parser context
2800
 * @param str  the input string
2801
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2802
 * @param end  an end marker xmlChar, 0 if none
2803
 * @param end2  an end marker xmlChar, 0 if none
2804
 * @param end3  an end marker xmlChar, 0 if none
2805
 * @returns A newly allocated string with the substitution done. The caller
2806
 *      must deallocate it !
2807
 */
2808
xmlChar *
2809
xmlStringDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str,
2810
                        int what ATTRIBUTE_UNUSED,
2811
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2812
0
    if ((ctxt == NULL) || (str == NULL))
2813
0
        return(NULL);
2814
2815
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2816
0
        return(NULL);
2817
2818
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2819
0
}
2820
2821
/************************************************************************
2822
 *                  *
2823
 *    Commodity functions, cleanup needed ?     *
2824
 *                  *
2825
 ************************************************************************/
2826
2827
/**
2828
 * Is this a sequence of blank chars that one can ignore ?
2829
 *
2830
 * @param ctxt  an XML parser context
2831
 * @param str  a xmlChar *
2832
 * @param len  the size of `str`
2833
 * @param blank_chars  we know the chars are blanks
2834
 * @returns 1 if ignorable 0 otherwise.
2835
 */
2836
2837
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2838
1.13M
                     int blank_chars) {
2839
1.13M
    int i;
2840
1.13M
    xmlNodePtr lastChild;
2841
2842
    /*
2843
     * Check for xml:space value.
2844
     */
2845
1.13M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2846
1.13M
        (*(ctxt->space) == -2))
2847
641k
  return(0);
2848
2849
    /*
2850
     * Check that the string is made of blanks
2851
     */
2852
496k
    if (blank_chars == 0) {
2853
749k
  for (i = 0;i < len;i++)
2854
742k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2855
410k
    }
2856
2857
    /*
2858
     * Look if the element is mixed content in the DTD if available
2859
     */
2860
91.9k
    if (ctxt->node == NULL) return(0);
2861
0
    if (ctxt->myDoc != NULL) {
2862
0
        xmlElementPtr elemDecl = NULL;
2863
0
        xmlDocPtr doc = ctxt->myDoc;
2864
0
        const xmlChar *prefix = NULL;
2865
2866
0
        if (ctxt->node->ns)
2867
0
            prefix = ctxt->node->ns->prefix;
2868
0
        if (doc->intSubset != NULL)
2869
0
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2870
0
                                      prefix);
2871
0
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2872
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2873
0
                                      prefix);
2874
0
        if (elemDecl != NULL) {
2875
0
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2876
0
                return(1);
2877
0
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2878
0
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2879
0
                return(0);
2880
0
        }
2881
0
    }
2882
2883
    /*
2884
     * Otherwise, heuristic :-\
2885
     *
2886
     * When push parsing, we could be at the end of a chunk.
2887
     * This makes the look-ahead and consequently the NOBLANKS
2888
     * option unreliable.
2889
     */
2890
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2891
0
    if ((ctxt->node->children == NULL) &&
2892
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2893
2894
0
    lastChild = xmlGetLastChild(ctxt->node);
2895
0
    if (lastChild == NULL) {
2896
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2897
0
            (ctxt->node->content != NULL)) return(0);
2898
0
    } else if (xmlNodeIsText(lastChild))
2899
0
        return(0);
2900
0
    else if ((ctxt->node->children != NULL) &&
2901
0
             (xmlNodeIsText(ctxt->node->children)))
2902
0
        return(0);
2903
0
    return(1);
2904
0
}
2905
2906
/************************************************************************
2907
 *                  *
2908
 *    Extra stuff for namespace support     *
2909
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2910
 *                  *
2911
 ************************************************************************/
2912
2913
/**
2914
 * Parse an UTF8 encoded XML qualified name string
2915
 *
2916
 * @deprecated Don't use.
2917
 *
2918
 * @param ctxt  an XML parser context
2919
 * @param name  an XML parser context
2920
 * @param prefixOut  a xmlChar **
2921
 * @returns the local part, and prefix is updated
2922
 *   to get the Prefix if any.
2923
 */
2924
2925
xmlChar *
2926
0
xmlSplitQName(xmlParserCtxt *ctxt, const xmlChar *name, xmlChar **prefixOut) {
2927
0
    xmlChar *ret;
2928
0
    const xmlChar *localname;
2929
2930
0
    localname = xmlSplitQName4(name, prefixOut);
2931
0
    if (localname == NULL) {
2932
0
        xmlCtxtErrMemory(ctxt);
2933
0
        return(NULL);
2934
0
    }
2935
2936
0
    ret = xmlStrdup(localname);
2937
0
    if (ret == NULL) {
2938
0
        xmlCtxtErrMemory(ctxt);
2939
0
        xmlFree(*prefixOut);
2940
0
    }
2941
2942
0
    return(ret);
2943
0
}
2944
2945
/************************************************************************
2946
 *                  *
2947
 *      The parser itself       *
2948
 *  Relates to http://www.w3.org/TR/REC-xml       *
2949
 *                  *
2950
 ************************************************************************/
2951
2952
/************************************************************************
2953
 *                  *
2954
 *  Routines to parse Name, NCName and NmToken      *
2955
 *                  *
2956
 ************************************************************************/
2957
2958
/*
2959
 * The two following functions are related to the change of accepted
2960
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2961
 * They correspond to the modified production [4] and the new production [4a]
2962
 * changes in that revision. Also note that the macros used for the
2963
 * productions Letter, Digit, CombiningChar and Extender are not needed
2964
 * anymore.
2965
 * We still keep compatibility to pre-revision5 parsing semantic if the
2966
 * new XML_PARSE_OLD10 option is given to the parser.
2967
 */
2968
2969
static int
2970
764k
xmlIsNameStartCharNew(int c) {
2971
    /*
2972
     * Use the new checks of production [4] [4a] amd [5] of the
2973
     * Update 5 of XML-1.0
2974
     */
2975
764k
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2976
764k
        (((c >= 'a') && (c <= 'z')) ||
2977
727k
         ((c >= 'A') && (c <= 'Z')) ||
2978
727k
         (c == '_') || (c == ':') ||
2979
727k
         ((c >= 0xC0) && (c <= 0xD6)) ||
2980
727k
         ((c >= 0xD8) && (c <= 0xF6)) ||
2981
727k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2982
727k
         ((c >= 0x370) && (c <= 0x37D)) ||
2983
727k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2984
727k
         ((c >= 0x200C) && (c <= 0x200D)) ||
2985
727k
         ((c >= 0x2070) && (c <= 0x218F)) ||
2986
727k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2987
727k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2988
727k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2989
727k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2990
727k
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2991
491k
        return(1);
2992
273k
    return(0);
2993
764k
}
2994
2995
static int
2996
10.8M
xmlIsNameCharNew(int c) {
2997
    /*
2998
     * Use the new checks of production [4] [4a] amd [5] of the
2999
     * Update 5 of XML-1.0
3000
     */
3001
10.8M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3002
10.8M
        (((c >= 'a') && (c <= 'z')) ||
3003
10.8M
         ((c >= 'A') && (c <= 'Z')) ||
3004
10.8M
         ((c >= '0') && (c <= '9')) || /* !start */
3005
10.8M
         (c == '_') || (c == ':') ||
3006
10.8M
         (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3007
10.8M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3008
10.8M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3009
10.8M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3010
10.8M
         ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3011
10.8M
         ((c >= 0x370) && (c <= 0x37D)) ||
3012
10.8M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3013
10.8M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3014
10.8M
         ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3015
10.8M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3016
10.8M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3017
10.8M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3018
10.8M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3019
10.8M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3020
10.8M
         ((c >= 0x10000) && (c <= 0xEFFFF))))
3021
10.3M
         return(1);
3022
509k
    return(0);
3023
10.8M
}
3024
3025
static int
3026
0
xmlIsNameStartCharOld(int c) {
3027
0
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3028
0
        ((IS_LETTER(c) || (c == '_') || (c == ':'))))
3029
0
        return(1);
3030
0
    return(0);
3031
0
}
3032
3033
static int
3034
0
xmlIsNameCharOld(int c) {
3035
0
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3036
0
        ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3037
0
         (c == '.') || (c == '-') ||
3038
0
         (c == '_') || (c == ':') ||
3039
0
         (IS_COMBINING(c)) ||
3040
0
         (IS_EXTENDER(c))))
3041
0
        return(1);
3042
0
    return(0);
3043
0
}
3044
3045
static int
3046
764k
xmlIsNameStartChar(int c, int old10) {
3047
764k
    if (!old10)
3048
764k
        return(xmlIsNameStartCharNew(c));
3049
0
    else
3050
0
        return(xmlIsNameStartCharOld(c));
3051
764k
}
3052
3053
static int
3054
10.8M
xmlIsNameChar(int c, int old10) {
3055
10.8M
    if (!old10)
3056
10.8M
        return(xmlIsNameCharNew(c));
3057
0
    else
3058
0
        return(xmlIsNameCharOld(c));
3059
10.8M
}
3060
3061
/*
3062
 * Scan an XML Name, NCName or Nmtoken.
3063
 *
3064
 * Returns a pointer to the end of the name on success. If the
3065
 * name is invalid, returns `ptr`. If the name is longer than
3066
 * `maxSize` bytes, returns NULL.
3067
 *
3068
 * @param ptr  pointer to the start of the name
3069
 * @param maxSize  maximum size in bytes
3070
 * @param flags  XML_SCAN_* flags
3071
 * @returns a pointer to the end of the name or NULL
3072
 */
3073
const xmlChar *
3074
152k
xmlScanName(const xmlChar *ptr, size_t maxSize, int flags) {
3075
152k
    int stop = flags & XML_SCAN_NC ? ':' : 0;
3076
152k
    int old10 = flags & XML_SCAN_OLD10 ? 1 : 0;
3077
3078
2.82M
    while (1) {
3079
2.82M
        int c, len;
3080
3081
2.82M
        c = *ptr;
3082
2.82M
        if (c < 0x80) {
3083
2.80M
            if (c == stop)
3084
2
                break;
3085
2.80M
            len = 1;
3086
2.80M
        } else {
3087
25.4k
            len = 4;
3088
25.4k
            c = xmlGetUTF8Char(ptr, &len);
3089
25.4k
            if (c < 0)
3090
11.7k
                break;
3091
25.4k
        }
3092
3093
2.81M
        if (flags & XML_SCAN_NMTOKEN ?
3094
2.67M
                !xmlIsNameChar(c, old10) :
3095
2.81M
                !xmlIsNameStartChar(c, old10))
3096
140k
            break;
3097
3098
2.67M
        if ((size_t) len > maxSize)
3099
33
            return(NULL);
3100
2.67M
        ptr += len;
3101
2.67M
        maxSize -= len;
3102
2.67M
        flags |= XML_SCAN_NMTOKEN;
3103
2.67M
    }
3104
3105
151k
    return(ptr);
3106
152k
}
3107
3108
static const xmlChar *
3109
619k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3110
619k
    const xmlChar *ret;
3111
619k
    int len = 0, l;
3112
619k
    int c;
3113
619k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3114
0
                    XML_MAX_TEXT_LENGTH :
3115
619k
                    XML_MAX_NAME_LENGTH;
3116
619k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3117
3118
    /*
3119
     * Handler for more complex cases
3120
     */
3121
619k
    c = xmlCurrentChar(ctxt, &l);
3122
619k
    if (!xmlIsNameStartChar(c, old10))
3123
265k
        return(NULL);
3124
353k
    len += l;
3125
353k
    NEXTL(l);
3126
353k
    c = xmlCurrentChar(ctxt, &l);
3127
6.43M
    while (xmlIsNameChar(c, old10)) {
3128
6.08M
        if (len <= INT_MAX - l)
3129
6.08M
            len += l;
3130
6.08M
        NEXTL(l);
3131
6.08M
        c = xmlCurrentChar(ctxt, &l);
3132
6.08M
    }
3133
353k
    if (len > maxLength) {
3134
32
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3135
32
        return(NULL);
3136
32
    }
3137
353k
    if (ctxt->input->cur - ctxt->input->base < len) {
3138
        /*
3139
         * There were a couple of bugs where PERefs lead to to a change
3140
         * of the buffer. Check the buffer size to avoid passing an invalid
3141
         * pointer to xmlDictLookup.
3142
         */
3143
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3144
0
                    "unexpected change of input buffer");
3145
0
        return (NULL);
3146
0
    }
3147
353k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3148
4.28k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3149
349k
    else
3150
349k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3151
353k
    if (ret == NULL)
3152
0
        xmlErrMemory(ctxt);
3153
353k
    return(ret);
3154
353k
}
3155
3156
/**
3157
 * Parse an XML name.
3158
 *
3159
 * @deprecated Internal function, don't use.
3160
 *
3161
 *     [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3162
 *                      CombiningChar | Extender
3163
 *
3164
 *     [5] Name ::= (Letter | '_' | ':') (NameChar)*
3165
 *
3166
 *     [6] Names ::= Name (#x20 Name)*
3167
 *
3168
 * @param ctxt  an XML parser context
3169
 * @returns the Name parsed or NULL
3170
 */
3171
3172
const xmlChar *
3173
3.75M
xmlParseName(xmlParserCtxt *ctxt) {
3174
3.75M
    const xmlChar *in;
3175
3.75M
    const xmlChar *ret;
3176
3.75M
    size_t count = 0;
3177
3.75M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3178
0
                       XML_MAX_TEXT_LENGTH :
3179
3.75M
                       XML_MAX_NAME_LENGTH;
3180
3181
3.75M
    GROW;
3182
3183
    /*
3184
     * Accelerator for simple ASCII names
3185
     */
3186
3.75M
    in = ctxt->input->cur;
3187
3.75M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3188
3.75M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3189
3.75M
  (*in == '_') || (*in == ':')) {
3190
3.42M
  in++;
3191
27.6M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3192
27.6M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3193
27.6M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3194
27.6M
         (*in == '_') || (*in == '-') ||
3195
27.6M
         (*in == ':') || (*in == '.'))
3196
24.1M
      in++;
3197
3.42M
  if ((*in > 0) && (*in < 0x80)) {
3198
3.13M
      count = in - ctxt->input->cur;
3199
3.13M
            if (count > maxLength) {
3200
42
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3201
42
                return(NULL);
3202
42
            }
3203
3.13M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3204
3.13M
      ctxt->input->cur = in;
3205
3.13M
      ctxt->input->col += count;
3206
3.13M
      if (ret == NULL)
3207
0
          xmlErrMemory(ctxt);
3208
3.13M
      return(ret);
3209
3.13M
  }
3210
3.42M
    }
3211
    /* accelerator for special cases */
3212
619k
    return(xmlParseNameComplex(ctxt));
3213
3.75M
}
3214
3215
static xmlHashedString
3216
0
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3217
0
    xmlHashedString ret;
3218
0
    int len = 0, l;
3219
0
    int c;
3220
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3221
0
                    XML_MAX_TEXT_LENGTH :
3222
0
                    XML_MAX_NAME_LENGTH;
3223
0
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3224
0
    size_t startPosition = 0;
3225
3226
0
    ret.name = NULL;
3227
0
    ret.hashValue = 0;
3228
3229
    /*
3230
     * Handler for more complex cases
3231
     */
3232
0
    startPosition = CUR_PTR - BASE_PTR;
3233
0
    c = xmlCurrentChar(ctxt, &l);
3234
0
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3235
0
  (!xmlIsNameStartChar(c, old10) || (c == ':'))) {
3236
0
  return(ret);
3237
0
    }
3238
3239
0
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3240
0
     (xmlIsNameChar(c, old10) && (c != ':'))) {
3241
0
        if (len <= INT_MAX - l)
3242
0
      len += l;
3243
0
  NEXTL(l);
3244
0
  c = xmlCurrentChar(ctxt, &l);
3245
0
    }
3246
0
    if (len > maxLength) {
3247
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3248
0
        return(ret);
3249
0
    }
3250
0
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3251
0
    if (ret.name == NULL)
3252
0
        xmlErrMemory(ctxt);
3253
0
    return(ret);
3254
0
}
3255
3256
/**
3257
 * Parse an XML name.
3258
 *
3259
 *     [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3260
 *                          CombiningChar | Extender
3261
 *
3262
 *     [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3263
 *
3264
 * @param ctxt  an XML parser context
3265
 * @returns the Name parsed or NULL
3266
 */
3267
3268
static xmlHashedString
3269
0
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3270
0
    const xmlChar *in, *e;
3271
0
    xmlHashedString ret;
3272
0
    size_t count = 0;
3273
0
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3274
0
                       XML_MAX_TEXT_LENGTH :
3275
0
                       XML_MAX_NAME_LENGTH;
3276
3277
0
    ret.name = NULL;
3278
3279
    /*
3280
     * Accelerator for simple ASCII names
3281
     */
3282
0
    in = ctxt->input->cur;
3283
0
    e = ctxt->input->end;
3284
0
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3285
0
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3286
0
   (*in == '_')) && (in < e)) {
3287
0
  in++;
3288
0
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3289
0
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3290
0
          ((*in >= 0x30) && (*in <= 0x39)) ||
3291
0
          (*in == '_') || (*in == '-') ||
3292
0
          (*in == '.')) && (in < e))
3293
0
      in++;
3294
0
  if (in >= e)
3295
0
      goto complex;
3296
0
  if ((*in > 0) && (*in < 0x80)) {
3297
0
      count = in - ctxt->input->cur;
3298
0
            if (count > maxLength) {
3299
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3300
0
                return(ret);
3301
0
            }
3302
0
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3303
0
      ctxt->input->cur = in;
3304
0
      ctxt->input->col += count;
3305
0
      if (ret.name == NULL) {
3306
0
          xmlErrMemory(ctxt);
3307
0
      }
3308
0
      return(ret);
3309
0
  }
3310
0
    }
3311
0
complex:
3312
0
    return(xmlParseNCNameComplex(ctxt));
3313
0
}
3314
3315
/**
3316
 * Parse an XML name and compares for match
3317
 * (specialized for endtag parsing)
3318
 *
3319
 * @param ctxt  an XML parser context
3320
 * @param other  the name to compare with
3321
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
3322
 * and the name for mismatch
3323
 */
3324
3325
static const xmlChar *
3326
185k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3327
185k
    register const xmlChar *cmp = other;
3328
185k
    register const xmlChar *in;
3329
185k
    const xmlChar *ret;
3330
3331
185k
    GROW;
3332
3333
185k
    in = ctxt->input->cur;
3334
974k
    while (*in != 0 && *in == *cmp) {
3335
789k
  ++in;
3336
789k
  ++cmp;
3337
789k
    }
3338
185k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3339
  /* success */
3340
171k
  ctxt->input->col += in - ctxt->input->cur;
3341
171k
  ctxt->input->cur = in;
3342
171k
  return (const xmlChar*) 1;
3343
171k
    }
3344
    /* failure (or end of input buffer), check with full function */
3345
14.4k
    ret = xmlParseName (ctxt);
3346
    /* strings coming from the dictionary direct compare possible */
3347
14.4k
    if (ret == other) {
3348
553
  return (const xmlChar*) 1;
3349
553
    }
3350
13.8k
    return ret;
3351
14.4k
}
3352
3353
/**
3354
 * Parse an XML name.
3355
 *
3356
 * @param ctxt  an XML parser context
3357
 * @param str  a pointer to the string pointer (IN/OUT)
3358
 * @returns the Name parsed or NULL. The `str` pointer
3359
 * is updated to the current location in the string.
3360
 */
3361
3362
static xmlChar *
3363
152k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3364
152k
    xmlChar *ret;
3365
152k
    const xmlChar *cur = *str;
3366
152k
    int flags = 0;
3367
152k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3368
0
                    XML_MAX_TEXT_LENGTH :
3369
152k
                    XML_MAX_NAME_LENGTH;
3370
3371
152k
    if (ctxt->options & XML_PARSE_OLD10)
3372
0
        flags |= XML_SCAN_OLD10;
3373
3374
152k
    cur = xmlScanName(*str, maxLength, flags);
3375
152k
    if (cur == NULL) {
3376
33
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3377
33
        return(NULL);
3378
33
    }
3379
151k
    if (cur == *str)
3380
14.7k
        return(NULL);
3381
3382
137k
    ret = xmlStrndup(*str, cur - *str);
3383
137k
    if (ret == NULL)
3384
0
        xmlErrMemory(ctxt);
3385
137k
    *str = cur;
3386
137k
    return(ret);
3387
151k
}
3388
3389
/**
3390
 * Parse an XML Nmtoken.
3391
 *
3392
 * @deprecated Internal function, don't use.
3393
 *
3394
 *     [7] Nmtoken ::= (NameChar)+
3395
 *
3396
 *     [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3397
 *
3398
 * @param ctxt  an XML parser context
3399
 * @returns the Nmtoken parsed or NULL
3400
 */
3401
3402
xmlChar *
3403
23.3k
xmlParseNmtoken(xmlParserCtxt *ctxt) {
3404
23.3k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3405
23.3k
    xmlChar *ret;
3406
23.3k
    int len = 0, l;
3407
23.3k
    int c;
3408
23.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3409
0
                    XML_MAX_TEXT_LENGTH :
3410
23.3k
                    XML_MAX_NAME_LENGTH;
3411
23.3k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3412
3413
23.3k
    c = xmlCurrentChar(ctxt, &l);
3414
3415
299k
    while (xmlIsNameChar(c, old10)) {
3416
278k
  COPY_BUF(buf, len, c);
3417
278k
  NEXTL(l);
3418
278k
  c = xmlCurrentChar(ctxt, &l);
3419
278k
  if (len >= XML_MAX_NAMELEN) {
3420
      /*
3421
       * Okay someone managed to make a huge token, so he's ready to pay
3422
       * for the processing speed.
3423
       */
3424
1.92k
      xmlChar *buffer;
3425
1.92k
      int max = len * 2;
3426
3427
1.92k
      buffer = xmlMalloc(max);
3428
1.92k
      if (buffer == NULL) {
3429
0
          xmlErrMemory(ctxt);
3430
0
    return(NULL);
3431
0
      }
3432
1.92k
      memcpy(buffer, buf, len);
3433
1.46M
      while (xmlIsNameChar(c, old10)) {
3434
1.46M
    if (len + 10 > max) {
3435
3.62k
        xmlChar *tmp;
3436
3.62k
                    int newSize;
3437
3438
3.62k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3439
3.62k
                    if (newSize < 0) {
3440
3
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3441
3
                        xmlFree(buffer);
3442
3
                        return(NULL);
3443
3
                    }
3444
3.61k
        tmp = xmlRealloc(buffer, newSize);
3445
3.61k
        if (tmp == NULL) {
3446
0
      xmlErrMemory(ctxt);
3447
0
      xmlFree(buffer);
3448
0
      return(NULL);
3449
0
        }
3450
3.61k
        buffer = tmp;
3451
3.61k
                    max = newSize;
3452
3.61k
    }
3453
1.46M
    COPY_BUF(buffer, len, c);
3454
1.46M
    NEXTL(l);
3455
1.46M
    c = xmlCurrentChar(ctxt, &l);
3456
1.46M
      }
3457
1.92k
      buffer[len] = 0;
3458
1.92k
      return(buffer);
3459
1.92k
  }
3460
278k
    }
3461
21.3k
    if (len == 0)
3462
132
        return(NULL);
3463
21.2k
    if (len > maxLength) {
3464
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3465
0
        return(NULL);
3466
0
    }
3467
21.2k
    ret = xmlStrndup(buf, len);
3468
21.2k
    if (ret == NULL)
3469
0
        xmlErrMemory(ctxt);
3470
21.2k
    return(ret);
3471
21.2k
}
3472
3473
/**
3474
 * Validate an entity value and expand parameter entities.
3475
 *
3476
 * @param ctxt  parser context
3477
 * @param buf  string buffer
3478
 * @param str  entity value
3479
 * @param length  size of entity value
3480
 * @param depth  nesting depth
3481
 */
3482
static void
3483
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3484
62.5k
                          const xmlChar *str, int length, int depth) {
3485
62.5k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3486
62.5k
    const xmlChar *end, *chunk;
3487
62.5k
    int c, l;
3488
3489
62.5k
    if (str == NULL)
3490
0
        return;
3491
3492
62.5k
    depth += 1;
3493
62.5k
    if (depth > maxDepth) {
3494
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3495
0
                       "Maximum entity nesting depth exceeded");
3496
0
  return;
3497
0
    }
3498
3499
62.5k
    end = str + length;
3500
62.5k
    chunk = str;
3501
3502
65.3M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3503
65.3M
        c = *str;
3504
3505
65.3M
        if (c >= 0x80) {
3506
24.1M
            l = xmlUTF8MultibyteLen(ctxt, str,
3507
24.1M
                    "invalid character in entity value\n");
3508
24.1M
            if (l == 0) {
3509
19.4M
                if (chunk < str)
3510
171k
                    xmlSBufAddString(buf, chunk, str - chunk);
3511
19.4M
                xmlSBufAddReplChar(buf);
3512
19.4M
                str += 1;
3513
19.4M
                chunk = str;
3514
19.4M
            } else {
3515
4.71M
                str += l;
3516
4.71M
            }
3517
41.1M
        } else if (c == '&') {
3518
397k
            if (str[1] == '#') {
3519
250k
                if (chunk < str)
3520
172k
                    xmlSBufAddString(buf, chunk, str - chunk);
3521
3522
250k
                c = xmlParseStringCharRef(ctxt, &str);
3523
250k
                if (c == 0)
3524
12.8k
                    return;
3525
3526
237k
                xmlSBufAddChar(buf, c);
3527
3528
237k
                chunk = str;
3529
237k
            } else {
3530
146k
                xmlChar *name;
3531
3532
                /*
3533
                 * General entity references are checked for
3534
                 * syntactic validity.
3535
                 */
3536
146k
                str++;
3537
146k
                name = xmlParseStringName(ctxt, &str);
3538
3539
146k
                if ((name == NULL) || (*str++ != ';')) {
3540
21.7k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3541
21.7k
                            "EntityValue: '&' forbidden except for entities "
3542
21.7k
                            "references\n");
3543
21.7k
                    xmlFree(name);
3544
21.7k
                    return;
3545
21.7k
                }
3546
3547
124k
                xmlFree(name);
3548
124k
            }
3549
40.8M
        } else if (c == '%') {
3550
4.27k
            xmlEntityPtr ent;
3551
3552
4.27k
            if (chunk < str)
3553
3.07k
                xmlSBufAddString(buf, chunk, str - chunk);
3554
3555
4.27k
            ent = xmlParseStringPEReference(ctxt, &str);
3556
4.27k
            if (ent == NULL)
3557
4.27k
                return;
3558
3559
0
            if (!PARSER_EXTERNAL(ctxt)) {
3560
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3561
0
                return;
3562
0
            }
3563
3564
0
            if (ent->content == NULL) {
3565
                /*
3566
                 * Note: external parsed entities will not be loaded,
3567
                 * it is not required for a non-validating parser to
3568
                 * complete external PEReferences coming from the
3569
                 * internal subset
3570
                 */
3571
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3572
0
                    ((ctxt->replaceEntities) ||
3573
0
                     (ctxt->validate))) {
3574
0
                    xmlLoadEntityContent(ctxt, ent);
3575
0
                } else {
3576
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3577
0
                                  "not validating will not read content for "
3578
0
                                  "PE entity %s\n", ent->name, NULL);
3579
0
                }
3580
0
            }
3581
3582
            /*
3583
             * TODO: Skip if ent->content is still NULL.
3584
             */
3585
3586
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3587
0
                return;
3588
3589
0
            if (ent->flags & XML_ENT_EXPANDING) {
3590
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3591
0
                xmlHaltParser(ctxt);
3592
0
                return;
3593
0
            }
3594
3595
0
            ent->flags |= XML_ENT_EXPANDING;
3596
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3597
0
                                      depth);
3598
0
            ent->flags &= ~XML_ENT_EXPANDING;
3599
3600
0
            chunk = str;
3601
40.7M
        } else {
3602
            /* Normal ASCII char */
3603
40.7M
            if (!IS_BYTE_CHAR(c)) {
3604
3.58M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3605
3.58M
                        "invalid character in entity value\n");
3606
3.58M
                if (chunk < str)
3607
37.5k
                    xmlSBufAddString(buf, chunk, str - chunk);
3608
3.58M
                xmlSBufAddReplChar(buf);
3609
3.58M
                str += 1;
3610
3.58M
                chunk = str;
3611
37.2M
            } else {
3612
37.2M
                str += 1;
3613
37.2M
            }
3614
40.7M
        }
3615
65.3M
    }
3616
3617
23.6k
    if (chunk < str)
3618
22.8k
        xmlSBufAddString(buf, chunk, str - chunk);
3619
23.6k
}
3620
3621
/**
3622
 * Parse a value for ENTITY declarations
3623
 *
3624
 * @deprecated Internal function, don't use.
3625
 *
3626
 *     [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3627
 *                         "'" ([^%&'] | PEReference | Reference)* "'"
3628
 *
3629
 * @param ctxt  an XML parser context
3630
 * @param orig  if non-NULL store a copy of the original entity value
3631
 * @returns the EntityValue parsed with reference substituted or NULL
3632
 */
3633
xmlChar *
3634
62.7k
xmlParseEntityValue(xmlParserCtxt *ctxt, xmlChar **orig) {
3635
62.7k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3636
0
                         XML_MAX_HUGE_LENGTH :
3637
62.7k
                         XML_MAX_TEXT_LENGTH;
3638
62.7k
    xmlSBuf buf;
3639
62.7k
    const xmlChar *start;
3640
62.7k
    int quote, length;
3641
3642
62.7k
    xmlSBufInit(&buf, maxLength);
3643
3644
62.7k
    GROW;
3645
3646
62.7k
    quote = CUR;
3647
62.7k
    if ((quote != '"') && (quote != '\'')) {
3648
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3649
0
  return(NULL);
3650
0
    }
3651
62.7k
    CUR_PTR++;
3652
3653
62.7k
    length = 0;
3654
3655
    /*
3656
     * Copy raw content of the entity into a buffer
3657
     */
3658
87.1M
    while (1) {
3659
87.1M
        int c;
3660
3661
87.1M
        if (PARSER_STOPPED(ctxt))
3662
9
            goto error;
3663
3664
87.1M
        if (CUR_PTR >= ctxt->input->end) {
3665
143
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3666
143
            goto error;
3667
143
        }
3668
3669
87.1M
        c = CUR;
3670
3671
87.1M
        if (c == 0) {
3672
94
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3673
94
                    "invalid character in entity value\n");
3674
94
            goto error;
3675
94
        }
3676
87.1M
        if (c == quote)
3677
62.5k
            break;
3678
87.1M
        NEXTL(1);
3679
87.1M
        length += 1;
3680
3681
        /*
3682
         * TODO: Check growth threshold
3683
         */
3684
87.1M
        if (ctxt->input->end - CUR_PTR < 10)
3685
3.51k
            GROW;
3686
87.1M
    }
3687
3688
62.5k
    start = CUR_PTR - length;
3689
3690
62.5k
    if (orig != NULL) {
3691
62.5k
        *orig = xmlStrndup(start, length);
3692
62.5k
        if (*orig == NULL)
3693
0
            xmlErrMemory(ctxt);
3694
62.5k
    }
3695
3696
62.5k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3697
3698
62.5k
    NEXTL(1);
3699
3700
62.5k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3701
3702
246
error:
3703
246
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3704
246
    return(NULL);
3705
62.7k
}
3706
3707
/**
3708
 * Check an entity reference in an attribute value for validity
3709
 * without expanding it.
3710
 *
3711
 * @param ctxt  parser context
3712
 * @param pent  entity
3713
 * @param depth  nesting depth
3714
 */
3715
static void
3716
779
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3717
779
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3718
779
    const xmlChar *str;
3719
779
    unsigned long expandedSize = pent->length;
3720
779
    int c, flags;
3721
3722
779
    depth += 1;
3723
779
    if (depth > maxDepth) {
3724
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3725
0
                       "Maximum entity nesting depth exceeded");
3726
0
  return;
3727
0
    }
3728
3729
779
    if (pent->flags & XML_ENT_EXPANDING) {
3730
42
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3731
42
        xmlHaltParser(ctxt);
3732
42
        return;
3733
42
    }
3734
3735
    /*
3736
     * If we're parsing a default attribute value in DTD content,
3737
     * the entity might reference other entities which weren't
3738
     * defined yet, so the check isn't reliable.
3739
     */
3740
737
    if (ctxt->inSubset == 0)
3741
662
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3742
75
    else
3743
75
        flags = XML_ENT_VALIDATED;
3744
3745
737
    str = pent->content;
3746
737
    if (str == NULL)
3747
0
        goto done;
3748
3749
    /*
3750
     * Note that entity values are already validated. We only check
3751
     * for illegal less-than signs and compute the expanded size
3752
     * of the entity. No special handling for multi-byte characters
3753
     * is needed.
3754
     */
3755
750k
    while (!PARSER_STOPPED(ctxt)) {
3756
750k
        c = *str;
3757
3758
750k
  if (c != '&') {
3759
748k
            if (c == 0)
3760
657
                break;
3761
3762
748k
            if (c == '<')
3763
8.82k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3764
8.82k
                        "'<' in entity '%s' is not allowed in attributes "
3765
8.82k
                        "values\n", pent->name);
3766
3767
748k
            str += 1;
3768
748k
        } else if (str[1] == '#') {
3769
12
            int val;
3770
3771
12
      val = xmlParseStringCharRef(ctxt, &str);
3772
12
      if (val == 0) {
3773
2
                pent->content[0] = 0;
3774
2
                break;
3775
2
            }
3776
1.36k
  } else {
3777
1.36k
            xmlChar *name;
3778
1.36k
            xmlEntityPtr ent;
3779
3780
1.36k
      name = xmlParseStringEntityRef(ctxt, &str);
3781
1.36k
      if (name == NULL) {
3782
29
                pent->content[0] = 0;
3783
29
                break;
3784
29
            }
3785
3786
1.33k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3787
1.33k
            xmlFree(name);
3788
3789
1.33k
            if ((ent != NULL) &&
3790
1.33k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
3791
687
                if ((ent->flags & flags) != flags) {
3792
356
                    pent->flags |= XML_ENT_EXPANDING;
3793
356
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
3794
356
                    pent->flags &= ~XML_ENT_EXPANDING;
3795
356
                }
3796
3797
687
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
3798
687
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
3799
687
            }
3800
1.33k
        }
3801
750k
    }
3802
3803
737
done:
3804
737
    if (ctxt->inSubset == 0)
3805
662
        pent->expandedSize = expandedSize;
3806
3807
737
    pent->flags |= flags;
3808
737
}
3809
3810
/**
3811
 * Expand general entity references in an entity or attribute value.
3812
 * Perform attribute value normalization.
3813
 *
3814
 * @param ctxt  parser context
3815
 * @param buf  string buffer
3816
 * @param str  entity or attribute value
3817
 * @param pent  entity for entity value, NULL for attribute values
3818
 * @param normalize  whether to collapse whitespace
3819
 * @param inSpace  whitespace state
3820
 * @param depth  nesting depth
3821
 * @param check  whether to check for amplification
3822
 * @returns  whether there was a normalization change
3823
 */
3824
static int
3825
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3826
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
3827
0
                          int *inSpace, int depth, int check) {
3828
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3829
0
    int c, chunkSize;
3830
0
    int normChange = 0;
3831
3832
0
    if (str == NULL)
3833
0
        return(0);
3834
3835
0
    depth += 1;
3836
0
    if (depth > maxDepth) {
3837
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3838
0
                       "Maximum entity nesting depth exceeded");
3839
0
  return(0);
3840
0
    }
3841
3842
0
    if (pent != NULL) {
3843
0
        if (pent->flags & XML_ENT_EXPANDING) {
3844
0
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3845
0
            xmlHaltParser(ctxt);
3846
0
            return(0);
3847
0
        }
3848
3849
0
        if (check) {
3850
0
            if (xmlParserEntityCheck(ctxt, pent->length))
3851
0
                return(0);
3852
0
        }
3853
0
    }
3854
3855
0
    chunkSize = 0;
3856
3857
    /*
3858
     * Note that entity values are already validated. No special
3859
     * handling for multi-byte characters is needed.
3860
     */
3861
0
    while (!PARSER_STOPPED(ctxt)) {
3862
0
        c = *str;
3863
3864
0
  if (c != '&') {
3865
0
            if (c == 0)
3866
0
                break;
3867
3868
            /*
3869
             * If this function is called without an entity, it is used to
3870
             * expand entities in an attribute content where less-than was
3871
             * already unscaped and is allowed.
3872
             */
3873
0
            if ((pent != NULL) && (c == '<')) {
3874
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3875
0
                        "'<' in entity '%s' is not allowed in attributes "
3876
0
                        "values\n", pent->name);
3877
0
                break;
3878
0
            }
3879
3880
0
            if (c <= 0x20) {
3881
0
                if ((normalize) && (*inSpace)) {
3882
                    /* Skip char */
3883
0
                    if (chunkSize > 0) {
3884
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3885
0
                        chunkSize = 0;
3886
0
                    }
3887
0
                    normChange = 1;
3888
0
                } else if (c < 0x20) {
3889
0
                    if (chunkSize > 0) {
3890
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3891
0
                        chunkSize = 0;
3892
0
                    }
3893
3894
0
                    xmlSBufAddCString(buf, " ", 1);
3895
0
                } else {
3896
0
                    chunkSize += 1;
3897
0
                }
3898
3899
0
                *inSpace = 1;
3900
0
            } else {
3901
0
                chunkSize += 1;
3902
0
                *inSpace = 0;
3903
0
            }
3904
3905
0
            str += 1;
3906
0
        } else if (str[1] == '#') {
3907
0
            int val;
3908
3909
0
            if (chunkSize > 0) {
3910
0
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3911
0
                chunkSize = 0;
3912
0
            }
3913
3914
0
      val = xmlParseStringCharRef(ctxt, &str);
3915
0
      if (val == 0) {
3916
0
                if (pent != NULL)
3917
0
                    pent->content[0] = 0;
3918
0
                break;
3919
0
            }
3920
3921
0
            if (val == ' ') {
3922
0
                if ((normalize) && (*inSpace))
3923
0
                    normChange = 1;
3924
0
                else
3925
0
                    xmlSBufAddCString(buf, " ", 1);
3926
0
                *inSpace = 1;
3927
0
            } else {
3928
0
                xmlSBufAddChar(buf, val);
3929
0
                *inSpace = 0;
3930
0
            }
3931
0
  } else {
3932
0
            xmlChar *name;
3933
0
            xmlEntityPtr ent;
3934
3935
0
            if (chunkSize > 0) {
3936
0
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3937
0
                chunkSize = 0;
3938
0
            }
3939
3940
0
      name = xmlParseStringEntityRef(ctxt, &str);
3941
0
            if (name == NULL) {
3942
0
                if (pent != NULL)
3943
0
                    pent->content[0] = 0;
3944
0
                break;
3945
0
            }
3946
3947
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3948
0
            xmlFree(name);
3949
3950
0
      if ((ent != NULL) &&
3951
0
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3952
0
    if (ent->content == NULL) {
3953
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
3954
0
          "predefined entity has no content\n");
3955
0
                    break;
3956
0
                }
3957
3958
0
                xmlSBufAddString(buf, ent->content, ent->length);
3959
3960
0
                *inSpace = 0;
3961
0
      } else if ((ent != NULL) && (ent->content != NULL)) {
3962
0
                if (pent != NULL)
3963
0
                    pent->flags |= XML_ENT_EXPANDING;
3964
0
    normChange |= xmlExpandEntityInAttValue(ctxt, buf,
3965
0
                        ent->content, ent, normalize, inSpace, depth, check);
3966
0
                if (pent != NULL)
3967
0
                    pent->flags &= ~XML_ENT_EXPANDING;
3968
0
      }
3969
0
        }
3970
0
    }
3971
3972
0
    if (chunkSize > 0)
3973
0
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3974
3975
0
    return(normChange);
3976
0
}
3977
3978
/**
3979
 * Expand general entity references in an entity or attribute value.
3980
 * Perform attribute value normalization.
3981
 *
3982
 * @param ctxt  parser context
3983
 * @param str  entity or attribute value
3984
 * @param normalize  whether to collapse whitespace
3985
 * @returns the expanded attribtue value.
3986
 */
3987
xmlChar *
3988
xmlExpandEntitiesInAttValue(xmlParserCtxt *ctxt, const xmlChar *str,
3989
0
                            int normalize) {
3990
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3991
0
                         XML_MAX_HUGE_LENGTH :
3992
0
                         XML_MAX_TEXT_LENGTH;
3993
0
    xmlSBuf buf;
3994
0
    int inSpace = 1;
3995
3996
0
    xmlSBufInit(&buf, maxLength);
3997
3998
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
3999
0
                              ctxt->inputNr, /* check */ 0);
4000
4001
0
    if ((normalize) && (inSpace) && (buf.size > 0))
4002
0
        buf.size--;
4003
4004
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4005
0
}
4006
4007
/**
4008
 * Parse a value for an attribute.
4009
 *
4010
 * NOTE: if no normalization is needed, the routine will return pointers
4011
 * directly from the data buffer.
4012
 *
4013
 * 3.3.3 Attribute-Value Normalization:
4014
 *
4015
 * Before the value of an attribute is passed to the application or
4016
 * checked for validity, the XML processor must normalize it as follows:
4017
 *
4018
 * - a character reference is processed by appending the referenced
4019
 *   character to the attribute value
4020
 * - an entity reference is processed by recursively processing the
4021
 *   replacement text of the entity
4022
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4023
 *   appending \#x20 to the normalized value, except that only a single
4024
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4025
 *   parsed entity or the literal entity value of an internal parsed entity
4026
 * - other characters are processed by appending them to the normalized value
4027
 *
4028
 * If the declared value is not CDATA, then the XML processor must further
4029
 * process the normalized attribute value by discarding any leading and
4030
 * trailing space (\#x20) characters, and by replacing sequences of space
4031
 * (\#x20) characters by a single space (\#x20) character.
4032
 * All attributes for which no declaration has been read should be treated
4033
 * by a non-validating parser as if declared CDATA.
4034
 *
4035
 * @param ctxt  an XML parser context
4036
 * @param attlen  attribute len result
4037
 * @param outFlags  resulting XML_ATTVAL_* flags
4038
 * @param special  value from attsSpecial
4039
 * @param isNamespace  whether this is a namespace declaration
4040
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4041
 *     caller if it was copied, this can be detected by val[*len] == 0.
4042
 */
4043
static xmlChar *
4044
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *outFlags,
4045
672k
                         int special, int isNamespace) {
4046
672k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4047
0
                         XML_MAX_HUGE_LENGTH :
4048
672k
                         XML_MAX_TEXT_LENGTH;
4049
672k
    xmlSBuf buf;
4050
672k
    xmlChar *ret;
4051
672k
    int c, l, quote, entFlags, chunkSize;
4052
672k
    int inSpace = 1;
4053
672k
    int replaceEntities;
4054
672k
    int normalize = (special & XML_SPECIAL_TYPE_MASK) != 0;
4055
672k
    int attvalFlags = 0;
4056
4057
    /* Always expand namespace URIs */
4058
672k
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4059
4060
672k
    xmlSBufInit(&buf, maxLength);
4061
4062
672k
    GROW;
4063
4064
672k
    quote = CUR;
4065
672k
    if ((quote != '"') && (quote != '\'')) {
4066
5.95k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4067
5.95k
  return(NULL);
4068
5.95k
    }
4069
666k
    NEXTL(1);
4070
4071
666k
    if (ctxt->inSubset == 0)
4072
642k
        entFlags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4073
24.3k
    else
4074
24.3k
        entFlags = XML_ENT_VALIDATED;
4075
4076
666k
    inSpace = 1;
4077
666k
    chunkSize = 0;
4078
4079
173M
    while (1) {
4080
173M
        if (PARSER_STOPPED(ctxt))
4081
42
            goto error;
4082
4083
173M
        if (CUR_PTR >= ctxt->input->end) {
4084
1.87k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4085
1.87k
                           "AttValue: ' expected\n");
4086
1.87k
            goto error;
4087
1.87k
        }
4088
4089
        /*
4090
         * TODO: Check growth threshold
4091
         */
4092
173M
        if (ctxt->input->end - CUR_PTR < 10)
4093
35.1k
            GROW;
4094
4095
173M
        c = CUR;
4096
4097
173M
        if (c >= 0x80) {
4098
46.5M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4099
46.5M
                    "invalid character in attribute value\n");
4100
46.5M
            if (l == 0) {
4101
31.8M
                if (chunkSize > 0) {
4102
516k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4103
516k
                    chunkSize = 0;
4104
516k
                }
4105
31.8M
                xmlSBufAddReplChar(&buf);
4106
31.8M
                NEXTL(1);
4107
31.8M
            } else {
4108
14.7M
                chunkSize += l;
4109
14.7M
                NEXTL(l);
4110
14.7M
            }
4111
4112
46.5M
            inSpace = 0;
4113
126M
        } else if (c != '&') {
4114
126M
            if (c > 0x20) {
4115
89.2M
                if (c == quote)
4116
657k
                    break;
4117
4118
88.5M
                if (c == '<')
4119
1.20M
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4120
4121
88.5M
                chunkSize += 1;
4122
88.5M
                inSpace = 0;
4123
88.5M
            } else if (!IS_BYTE_CHAR(c)) {
4124
30.7M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4125
30.7M
                        "invalid character in attribute value\n");
4126
30.7M
                if (chunkSize > 0) {
4127
356k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4128
356k
                    chunkSize = 0;
4129
356k
                }
4130
30.7M
                xmlSBufAddReplChar(&buf);
4131
30.7M
                inSpace = 0;
4132
30.7M
            } else {
4133
                /* Whitespace */
4134
6.32M
                if ((normalize) && (inSpace)) {
4135
                    /* Skip char */
4136
0
                    if (chunkSize > 0) {
4137
0
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4138
0
                        chunkSize = 0;
4139
0
                    }
4140
0
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4141
6.32M
                } else if (c < 0x20) {
4142
                    /* Convert to space */
4143
3.62M
                    if (chunkSize > 0) {
4144
1.99M
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4145
1.99M
                        chunkSize = 0;
4146
1.99M
                    }
4147
4148
3.62M
                    xmlSBufAddCString(&buf, " ", 1);
4149
3.62M
                } else {
4150
2.70M
                    chunkSize += 1;
4151
2.70M
                }
4152
4153
6.32M
                inSpace = 1;
4154
4155
6.32M
                if ((c == 0xD) && (NXT(1) == 0xA))
4156
77.8k
                    CUR_PTR++;
4157
6.32M
            }
4158
4159
125M
            NEXTL(1);
4160
125M
        } else if (NXT(1) == '#') {
4161
233k
            int val;
4162
4163
233k
            if (chunkSize > 0) {
4164
200k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4165
200k
                chunkSize = 0;
4166
200k
            }
4167
4168
233k
            val = xmlParseCharRef(ctxt);
4169
233k
            if (val == 0)
4170
7.43k
                goto error;
4171
4172
226k
            if ((val == '&') && (!replaceEntities)) {
4173
                /*
4174
                 * The reparsing will be done in xmlNodeParseContent()
4175
                 * called from SAX2.c
4176
                 */
4177
92.8k
                xmlSBufAddCString(&buf, "&#38;", 5);
4178
92.8k
                inSpace = 0;
4179
133k
            } else if (val == ' ') {
4180
86.9k
                if ((normalize) && (inSpace))
4181
0
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4182
86.9k
                else
4183
86.9k
                    xmlSBufAddCString(&buf, " ", 1);
4184
86.9k
                inSpace = 1;
4185
86.9k
            } else {
4186
46.4k
                xmlSBufAddChar(&buf, val);
4187
46.4k
                inSpace = 0;
4188
46.4k
            }
4189
459k
        } else {
4190
459k
            const xmlChar *name;
4191
459k
            xmlEntityPtr ent;
4192
4193
459k
            if (chunkSize > 0) {
4194
217k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4195
217k
                chunkSize = 0;
4196
217k
            }
4197
4198
459k
            name = xmlParseEntityRefInternal(ctxt);
4199
459k
            if (name == NULL) {
4200
                /*
4201
                 * Probably a literal '&' which wasn't escaped.
4202
                 * TODO: Handle gracefully in recovery mode.
4203
                 */
4204
182k
                continue;
4205
182k
            }
4206
4207
276k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4208
276k
            if (ent == NULL)
4209
132k
                continue;
4210
4211
144k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4212
122k
                if ((ent->content[0] == '&') && (!replaceEntities))
4213
99.2k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4214
23.1k
                else
4215
23.1k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4216
122k
                inSpace = 0;
4217
122k
            } else if (replaceEntities) {
4218
0
                if (xmlExpandEntityInAttValue(ctxt, &buf,
4219
0
                        ent->content, ent, normalize, &inSpace, ctxt->inputNr,
4220
0
                        /* check */ 1) > 0)
4221
0
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4222
22.1k
            } else {
4223
22.1k
                if ((ent->flags & entFlags) != entFlags)
4224
423
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4225
4226
22.1k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4227
28
                    ent->content[0] = 0;
4228
28
                    goto error;
4229
28
                }
4230
4231
                /*
4232
                 * Just output the reference
4233
                 */
4234
22.1k
                xmlSBufAddCString(&buf, "&", 1);
4235
22.1k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4236
22.1k
                xmlSBufAddCString(&buf, ";", 1);
4237
4238
22.1k
                inSpace = 0;
4239
22.1k
            }
4240
144k
  }
4241
173M
    }
4242
4243
657k
    if ((buf.mem == NULL) && (outFlags != NULL)) {
4244
0
        ret = (xmlChar *) CUR_PTR - chunkSize;
4245
4246
0
        if (attlen != NULL)
4247
0
            *attlen = chunkSize;
4248
0
        if ((normalize) && (inSpace) && (chunkSize > 0)) {
4249
0
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4250
0
            *attlen -= 1;
4251
0
        }
4252
4253
        /* Report potential error */
4254
0
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4255
657k
    } else {
4256
657k
        if (chunkSize > 0)
4257
614k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4258
4259
657k
        if ((normalize) && (inSpace) && (buf.size > 0)) {
4260
0
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4261
0
            buf.size--;
4262
0
        }
4263
4264
657k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4265
657k
        attvalFlags |= XML_ATTVAL_ALLOC;
4266
4267
657k
        if (ret != NULL) {
4268
657k
            if (attlen != NULL)
4269
0
                *attlen = buf.size;
4270
657k
        }
4271
657k
    }
4272
4273
657k
    if (outFlags != NULL)
4274
0
        *outFlags = attvalFlags;
4275
4276
657k
    NEXTL(1);
4277
4278
657k
    return(ret);
4279
4280
9.38k
error:
4281
9.38k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4282
9.38k
    return(NULL);
4283
666k
}
4284
4285
/**
4286
 * Parse a value for an attribute
4287
 * Note: the parser won't do substitution of entities here, this
4288
 * will be handled later in #xmlStringGetNodeList
4289
 *
4290
 * @deprecated Internal function, don't use.
4291
 *
4292
 *     [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4293
 *                       "'" ([^<&'] | Reference)* "'"
4294
 *
4295
 * 3.3.3 Attribute-Value Normalization:
4296
 *
4297
 * Before the value of an attribute is passed to the application or
4298
 * checked for validity, the XML processor must normalize it as follows:
4299
 *
4300
 * - a character reference is processed by appending the referenced
4301
 *   character to the attribute value
4302
 * - an entity reference is processed by recursively processing the
4303
 *   replacement text of the entity
4304
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4305
 *   appending \#x20 to the normalized value, except that only a single
4306
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4307
 *   parsed entity or the literal entity value of an internal parsed entity
4308
 * - other characters are processed by appending them to the normalized value
4309
 *
4310
 * If the declared value is not CDATA, then the XML processor must further
4311
 * process the normalized attribute value by discarding any leading and
4312
 * trailing space (\#x20) characters, and by replacing sequences of space
4313
 * (\#x20) characters by a single space (\#x20) character.
4314
 * All attributes for which no declaration has been read should be treated
4315
 * by a non-validating parser as if declared CDATA.
4316
 *
4317
 * @param ctxt  an XML parser context
4318
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4319
 * caller.
4320
 */
4321
xmlChar *
4322
672k
xmlParseAttValue(xmlParserCtxt *ctxt) {
4323
672k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4324
672k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4325
672k
}
4326
4327
/**
4328
 * Parse an XML Literal
4329
 *
4330
 * @deprecated Internal function, don't use.
4331
 *
4332
 *     [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4333
 *
4334
 * @param ctxt  an XML parser context
4335
 * @returns the SystemLiteral parsed or NULL
4336
 */
4337
4338
xmlChar *
4339
15.3k
xmlParseSystemLiteral(xmlParserCtxt *ctxt) {
4340
15.3k
    xmlChar *buf = NULL;
4341
15.3k
    int len = 0;
4342
15.3k
    int size = XML_PARSER_BUFFER_SIZE;
4343
15.3k
    int cur, l;
4344
15.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4345
0
                    XML_MAX_TEXT_LENGTH :
4346
15.3k
                    XML_MAX_NAME_LENGTH;
4347
15.3k
    xmlChar stop;
4348
4349
15.3k
    if (RAW == '"') {
4350
6.94k
        NEXT;
4351
6.94k
  stop = '"';
4352
8.39k
    } else if (RAW == '\'') {
4353
7.49k
        NEXT;
4354
7.49k
  stop = '\'';
4355
7.49k
    } else {
4356
894
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4357
894
  return(NULL);
4358
894
    }
4359
4360
14.4k
    buf = xmlMalloc(size);
4361
14.4k
    if (buf == NULL) {
4362
0
        xmlErrMemory(ctxt);
4363
0
  return(NULL);
4364
0
    }
4365
14.4k
    cur = xmlCurrentCharRecover(ctxt, &l);
4366
3.32M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4367
3.30M
  if (len + 5 >= size) {
4368
11.7k
      xmlChar *tmp;
4369
11.7k
            int newSize;
4370
4371
11.7k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4372
11.7k
            if (newSize < 0) {
4373
4
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4374
4
                xmlFree(buf);
4375
4
                return(NULL);
4376
4
            }
4377
11.7k
      tmp = xmlRealloc(buf, newSize);
4378
11.7k
      if (tmp == NULL) {
4379
0
          xmlFree(buf);
4380
0
    xmlErrMemory(ctxt);
4381
0
    return(NULL);
4382
0
      }
4383
11.7k
      buf = tmp;
4384
11.7k
            size = newSize;
4385
11.7k
  }
4386
3.30M
  COPY_BUF(buf, len, cur);
4387
3.30M
  NEXTL(l);
4388
3.30M
  cur = xmlCurrentCharRecover(ctxt, &l);
4389
3.30M
    }
4390
14.4k
    buf[len] = 0;
4391
14.4k
    if (!IS_CHAR(cur)) {
4392
208
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4393
14.2k
    } else {
4394
14.2k
  NEXT;
4395
14.2k
    }
4396
14.4k
    return(buf);
4397
14.4k
}
4398
4399
/**
4400
 * Parse an XML public literal
4401
 *
4402
 * @deprecated Internal function, don't use.
4403
 *
4404
 *     [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4405
 *
4406
 * @param ctxt  an XML parser context
4407
 * @returns the PubidLiteral parsed or NULL.
4408
 */
4409
4410
xmlChar *
4411
16.0k
xmlParsePubidLiteral(xmlParserCtxt *ctxt) {
4412
16.0k
    xmlChar *buf = NULL;
4413
16.0k
    int len = 0;
4414
16.0k
    int size = XML_PARSER_BUFFER_SIZE;
4415
16.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4416
0
                    XML_MAX_TEXT_LENGTH :
4417
16.0k
                    XML_MAX_NAME_LENGTH;
4418
16.0k
    xmlChar cur;
4419
16.0k
    xmlChar stop;
4420
4421
16.0k
    if (RAW == '"') {
4422
6.41k
        NEXT;
4423
6.41k
  stop = '"';
4424
9.60k
    } else if (RAW == '\'') {
4425
9.35k
        NEXT;
4426
9.35k
  stop = '\'';
4427
9.35k
    } else {
4428
248
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4429
248
  return(NULL);
4430
248
    }
4431
15.7k
    buf = xmlMalloc(size);
4432
15.7k
    if (buf == NULL) {
4433
0
  xmlErrMemory(ctxt);
4434
0
  return(NULL);
4435
0
    }
4436
15.7k
    cur = CUR;
4437
496k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4438
496k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4439
480k
  if (len + 1 >= size) {
4440
583
      xmlChar *tmp;
4441
583
            int newSize;
4442
4443
583
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4444
583
            if (newSize < 0) {
4445
4
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4446
4
                xmlFree(buf);
4447
4
                return(NULL);
4448
4
            }
4449
579
      tmp = xmlRealloc(buf, newSize);
4450
579
      if (tmp == NULL) {
4451
0
    xmlErrMemory(ctxt);
4452
0
    xmlFree(buf);
4453
0
    return(NULL);
4454
0
      }
4455
579
      buf = tmp;
4456
579
            size = newSize;
4457
579
  }
4458
480k
  buf[len++] = cur;
4459
480k
  NEXT;
4460
480k
  cur = CUR;
4461
480k
    }
4462
15.7k
    buf[len] = 0;
4463
15.7k
    if (cur != stop) {
4464
710
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4465
15.0k
    } else {
4466
15.0k
  NEXTL(1);
4467
15.0k
    }
4468
15.7k
    return(buf);
4469
15.7k
}
4470
4471
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4472
4473
/*
4474
 * used for the test in the inner loop of the char data testing
4475
 */
4476
static const unsigned char test_char_data[256] = {
4477
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4478
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4479
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4482
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4483
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4484
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4485
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4486
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4487
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4488
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4489
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4490
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4491
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4492
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4493
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4494
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4495
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4496
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4497
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4498
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4499
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4500
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4501
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4502
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4503
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4504
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4505
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4506
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4507
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4508
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4509
};
4510
4511
static void
4512
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4513
1.38M
              int isBlank) {
4514
1.38M
    int checkBlanks;
4515
4516
1.38M
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4517
248k
        return;
4518
4519
1.13M
    checkBlanks = (!ctxt->keepBlanks) ||
4520
1.13M
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4521
4522
    /*
4523
     * Calling areBlanks with only parts of a text node
4524
     * is fundamentally broken, making the NOBLANKS option
4525
     * essentially unusable.
4526
     */
4527
1.13M
    if ((checkBlanks) &&
4528
1.13M
        (areBlanks(ctxt, buf, size, isBlank))) {
4529
0
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4530
0
            (ctxt->keepBlanks))
4531
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4532
1.13M
    } else {
4533
1.13M
        if (ctxt->sax->characters != NULL)
4534
1.13M
            ctxt->sax->characters(ctxt->userData, buf, size);
4535
4536
        /*
4537
         * The old code used to update this value for "complex" data
4538
         * even if checkBlanks was false. This was probably a bug.
4539
         */
4540
1.13M
        if ((checkBlanks) && (*ctxt->space == -1))
4541
496k
            *ctxt->space = -2;
4542
1.13M
    }
4543
1.13M
}
4544
4545
/**
4546
 * Parse character data. Always makes progress if the first char isn't
4547
 * '<' or '&'.
4548
 *
4549
 * The right angle bracket (>) may be represented using the string "&gt;",
4550
 * and must, for compatibility, be escaped using "&gt;" or a character
4551
 * reference when it appears in the string "]]>" in content, when that
4552
 * string is not marking the end of a CDATA section.
4553
 *
4554
 *     [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4555
 * @param ctxt  an XML parser context
4556
 * @param partial  buffer may contain partial UTF-8 sequences
4557
 */
4558
static void
4559
1.12M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4560
1.12M
    const xmlChar *in;
4561
1.12M
    int line = ctxt->input->line;
4562
1.12M
    int col = ctxt->input->col;
4563
1.12M
    int ccol;
4564
1.12M
    int terminate = 0;
4565
4566
1.12M
    GROW;
4567
    /*
4568
     * Accelerated common case where input don't need to be
4569
     * modified before passing it to the handler.
4570
     */
4571
1.12M
    in = ctxt->input->cur;
4572
1.14M
    do {
4573
1.38M
get_more_space:
4574
2.16M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4575
1.38M
        if (*in == 0xA) {
4576
254k
            do {
4577
254k
                ctxt->input->line++; ctxt->input->col = 1;
4578
254k
                in++;
4579
254k
            } while (*in == 0xA);
4580
243k
            goto get_more_space;
4581
243k
        }
4582
1.14M
        if (*in == '<') {
4583
405k
            while (in > ctxt->input->cur) {
4584
202k
                const xmlChar *tmp = ctxt->input->cur;
4585
202k
                size_t nbchar = in - tmp;
4586
4587
202k
                if (nbchar > XML_MAX_ITEMS)
4588
0
                    nbchar = XML_MAX_ITEMS;
4589
202k
                ctxt->input->cur += nbchar;
4590
4591
202k
                xmlCharacters(ctxt, tmp, nbchar, 1);
4592
202k
            }
4593
202k
            return;
4594
202k
        }
4595
4596
2.58M
get_more:
4597
2.58M
        ccol = ctxt->input->col;
4598
26.2M
        while (test_char_data[*in]) {
4599
23.6M
            in++;
4600
23.6M
            ccol++;
4601
23.6M
        }
4602
2.58M
        ctxt->input->col = ccol;
4603
2.58M
        if (*in == 0xA) {
4604
271k
            do {
4605
271k
                ctxt->input->line++; ctxt->input->col = 1;
4606
271k
                in++;
4607
271k
            } while (*in == 0xA);
4608
228k
            goto get_more;
4609
228k
        }
4610
2.35M
        if (*in == ']') {
4611
1.41M
            size_t avail = ctxt->input->end - in;
4612
4613
1.41M
            if (partial && avail < 2) {
4614
4
                terminate = 1;
4615
4
                goto invoke_callback;
4616
4
            }
4617
1.41M
            if (in[1] == ']') {
4618
1.37M
                if (partial && avail < 3) {
4619
549
                    terminate = 1;
4620
549
                    goto invoke_callback;
4621
549
                }
4622
1.37M
                if (in[2] == '>')
4623
3.72k
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4624
1.37M
            }
4625
4626
1.41M
            in++;
4627
1.41M
            ctxt->input->col++;
4628
1.41M
            goto get_more;
4629
1.41M
        }
4630
4631
942k
invoke_callback:
4632
1.83M
        while (in > ctxt->input->cur) {
4633
897k
            const xmlChar *tmp = ctxt->input->cur;
4634
897k
            size_t nbchar = in - tmp;
4635
4636
897k
            if (nbchar > XML_MAX_ITEMS)
4637
0
                nbchar = XML_MAX_ITEMS;
4638
897k
            ctxt->input->cur += nbchar;
4639
4640
897k
            xmlCharacters(ctxt, tmp, nbchar, 0);
4641
4642
897k
            line = ctxt->input->line;
4643
897k
            col = ctxt->input->col;
4644
897k
        }
4645
942k
        ctxt->input->cur = in;
4646
942k
        if (*in == 0xD) {
4647
102k
            in++;
4648
102k
            if (*in == 0xA) {
4649
25.7k
                ctxt->input->cur = in;
4650
25.7k
                in++;
4651
25.7k
                ctxt->input->line++; ctxt->input->col = 1;
4652
25.7k
                continue; /* while */
4653
25.7k
            }
4654
76.5k
            in--;
4655
76.5k
        }
4656
916k
        if (*in == '<') {
4657
687k
            return;
4658
687k
        }
4659
229k
        if (*in == '&') {
4660
50.5k
            return;
4661
50.5k
        }
4662
179k
        if (terminate) {
4663
553
            return;
4664
553
        }
4665
178k
        SHRINK;
4666
178k
        GROW;
4667
178k
        in = ctxt->input->cur;
4668
204k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4669
204k
             (*in == 0x09) || (*in == 0x0a));
4670
179k
    ctxt->input->line = line;
4671
179k
    ctxt->input->col = col;
4672
179k
    xmlParseCharDataComplex(ctxt, partial);
4673
179k
}
4674
4675
/**
4676
 * Always makes progress if the first char isn't '<' or '&'.
4677
 *
4678
 * parse a CharData section.this is the fallback function
4679
 * of #xmlParseCharData when the parsing requires handling
4680
 * of non-ASCII characters.
4681
 *
4682
 * @param ctxt  an XML parser context
4683
 * @param partial  whether the input can end with truncated UTF-8
4684
 */
4685
static void
4686
179k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4687
179k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4688
179k
    int nbchar = 0;
4689
179k
    int cur, l;
4690
4691
179k
    cur = xmlCurrentCharRecover(ctxt, &l);
4692
31.6M
    while ((cur != '<') && /* checked */
4693
31.6M
           (cur != '&') &&
4694
31.6M
     (IS_CHAR(cur))) {
4695
31.5M
        if (cur == ']') {
4696
495k
            size_t avail = ctxt->input->end - ctxt->input->cur;
4697
4698
495k
            if (partial && avail < 2)
4699
15
                break;
4700
495k
            if (NXT(1) == ']') {
4701
474k
                if (partial && avail < 3)
4702
364
                    break;
4703
473k
                if (NXT(2) == '>')
4704
4.89k
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4705
473k
            }
4706
495k
        }
4707
4708
31.5M
  COPY_BUF(buf, nbchar, cur);
4709
  /* move current position before possible calling of ctxt->sax->characters */
4710
31.5M
  NEXTL(l);
4711
31.5M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4712
114k
      buf[nbchar] = 0;
4713
4714
114k
            xmlCharacters(ctxt, buf, nbchar, 0);
4715
114k
      nbchar = 0;
4716
114k
            SHRINK;
4717
114k
  }
4718
31.5M
  cur = xmlCurrentCharRecover(ctxt, &l);
4719
31.5M
    }
4720
179k
    if (nbchar != 0) {
4721
171k
        buf[nbchar] = 0;
4722
4723
171k
        xmlCharacters(ctxt, buf, nbchar, 0);
4724
171k
    }
4725
    /*
4726
     * cur == 0 can mean
4727
     *
4728
     * - End of buffer.
4729
     * - An actual 0 character.
4730
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4731
     */
4732
179k
    if (ctxt->input->cur < ctxt->input->end) {
4733
156k
        if ((cur == 0) && (CUR != 0)) {
4734
816
            if (partial == 0) {
4735
214
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4736
214
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4737
214
                NEXTL(1);
4738
214
            }
4739
155k
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4740
            /* Generate the error and skip the offending character */
4741
530
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4742
530
                              "PCDATA invalid Char value %d\n", cur);
4743
530
            NEXTL(l);
4744
530
        }
4745
156k
    }
4746
179k
}
4747
4748
/**
4749
 * @deprecated Internal function, don't use.
4750
 * @param ctxt  an XML parser context
4751
 * @param cdata  unused
4752
 */
4753
void
4754
0
xmlParseCharData(xmlParserCtxt *ctxt, ATTRIBUTE_UNUSED int cdata) {
4755
0
    xmlParseCharDataInternal(ctxt, 0);
4756
0
}
4757
4758
/**
4759
 * Parse an External ID or a Public ID
4760
 *
4761
 * @deprecated Internal function, don't use.
4762
 *
4763
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4764
 * `'PUBLIC' S PubidLiteral S SystemLiteral`
4765
 *
4766
 *     [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4767
 *                       | 'PUBLIC' S PubidLiteral S SystemLiteral
4768
 *
4769
 *     [83] PublicID ::= 'PUBLIC' S PubidLiteral
4770
 *
4771
 * @param ctxt  an XML parser context
4772
 * @param publicId  a xmlChar** receiving PubidLiteral
4773
 * @param strict  indicate whether we should restrict parsing to only
4774
 *          production [75], see NOTE below
4775
 * @returns the function returns SystemLiteral and in the second
4776
 *                case publicID receives PubidLiteral, is strict is off
4777
 *                it is possible to return NULL and have publicID set.
4778
 */
4779
4780
xmlChar *
4781
27.1k
xmlParseExternalID(xmlParserCtxt *ctxt, xmlChar **publicId, int strict) {
4782
27.1k
    xmlChar *URI = NULL;
4783
4784
27.1k
    *publicId = NULL;
4785
27.1k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4786
1.71k
        SKIP(6);
4787
1.71k
  if (SKIP_BLANKS == 0) {
4788
34
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4789
34
                     "Space required after 'SYSTEM'\n");
4790
34
  }
4791
1.71k
  URI = xmlParseSystemLiteral(ctxt);
4792
1.71k
  if (URI == NULL) {
4793
34
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4794
34
        }
4795
25.4k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4796
16.0k
        SKIP(6);
4797
16.0k
  if (SKIP_BLANKS == 0) {
4798
456
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4799
456
        "Space required after 'PUBLIC'\n");
4800
456
  }
4801
16.0k
  *publicId = xmlParsePubidLiteral(ctxt);
4802
16.0k
  if (*publicId == NULL) {
4803
252
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4804
252
  }
4805
16.0k
  if (strict) {
4806
      /*
4807
       * We don't handle [83] so "S SystemLiteral" is required.
4808
       */
4809
8.07k
      if (SKIP_BLANKS == 0) {
4810
865
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4811
865
      "Space required after the Public Identifier\n");
4812
865
      }
4813
8.07k
  } else {
4814
      /*
4815
       * We handle [83] so we return immediately, if
4816
       * "S SystemLiteral" is not detected. We skip blanks if no
4817
             * system literal was found, but this is harmless since we must
4818
             * be at the end of a NotationDecl.
4819
       */
4820
7.93k
      if (SKIP_BLANKS == 0) return(NULL);
4821
7.12k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4822
7.12k
  }
4823
13.6k
  URI = xmlParseSystemLiteral(ctxt);
4824
13.6k
  if (URI == NULL) {
4825
864
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4826
864
        }
4827
13.6k
    }
4828
24.8k
    return(URI);
4829
27.1k
}
4830
4831
/**
4832
 * Skip an XML (SGML) comment <!-- .... -->
4833
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4834
 *  must not occur within comments. "
4835
 * This is the slow routine in case the accelerator for ascii didn't work
4836
 *
4837
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4838
 * @param ctxt  an XML parser context
4839
 * @param buf  the already parsed part of the buffer
4840
 * @param len  number of bytes in the buffer
4841
 * @param size  allocated size of the buffer
4842
 */
4843
static void
4844
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4845
45.4k
                       size_t len, size_t size) {
4846
45.4k
    int q, ql;
4847
45.4k
    int r, rl;
4848
45.4k
    int cur, l;
4849
45.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4850
0
                    XML_MAX_HUGE_LENGTH :
4851
45.4k
                    XML_MAX_TEXT_LENGTH;
4852
4853
45.4k
    if (buf == NULL) {
4854
4.62k
        len = 0;
4855
4.62k
  size = XML_PARSER_BUFFER_SIZE;
4856
4.62k
  buf = xmlMalloc(size);
4857
4.62k
  if (buf == NULL) {
4858
0
      xmlErrMemory(ctxt);
4859
0
      return;
4860
0
  }
4861
4.62k
    }
4862
45.4k
    q = xmlCurrentCharRecover(ctxt, &ql);
4863
45.4k
    if (q == 0)
4864
312
        goto not_terminated;
4865
45.1k
    if (!IS_CHAR(q)) {
4866
56
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4867
56
                          "xmlParseComment: invalid xmlChar value %d\n",
4868
56
                    q);
4869
56
  xmlFree (buf);
4870
56
  return;
4871
56
    }
4872
45.0k
    NEXTL(ql);
4873
45.0k
    r = xmlCurrentCharRecover(ctxt, &rl);
4874
45.0k
    if (r == 0)
4875
77
        goto not_terminated;
4876
44.9k
    if (!IS_CHAR(r)) {
4877
10
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4878
10
                          "xmlParseComment: invalid xmlChar value %d\n",
4879
10
                    r);
4880
10
  xmlFree (buf);
4881
10
  return;
4882
10
    }
4883
44.9k
    NEXTL(rl);
4884
44.9k
    cur = xmlCurrentCharRecover(ctxt, &l);
4885
44.9k
    if (cur == 0)
4886
81
        goto not_terminated;
4887
6.49M
    while (IS_CHAR(cur) && /* checked */
4888
6.49M
           ((cur != '>') ||
4889
6.49M
      (r != '-') || (q != '-'))) {
4890
6.44M
  if ((r == '-') && (q == '-')) {
4891
8.42k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4892
8.42k
  }
4893
6.44M
  if (len + 5 >= size) {
4894
13.8k
      xmlChar *tmp;
4895
13.8k
            int newSize;
4896
4897
13.8k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4898
13.8k
            if (newSize < 0) {
4899
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4900
0
                             "Comment too big found", NULL);
4901
0
                xmlFree (buf);
4902
0
                return;
4903
0
            }
4904
13.8k
      tmp = xmlRealloc(buf, newSize);
4905
13.8k
      if (tmp == NULL) {
4906
0
    xmlErrMemory(ctxt);
4907
0
    xmlFree(buf);
4908
0
    return;
4909
0
      }
4910
13.8k
      buf = tmp;
4911
13.8k
            size = newSize;
4912
13.8k
  }
4913
6.44M
  COPY_BUF(buf, len, q);
4914
4915
6.44M
  q = r;
4916
6.44M
  ql = rl;
4917
6.44M
  r = cur;
4918
6.44M
  rl = l;
4919
4920
6.44M
  NEXTL(l);
4921
6.44M
  cur = xmlCurrentCharRecover(ctxt, &l);
4922
4923
6.44M
    }
4924
44.9k
    buf[len] = 0;
4925
44.9k
    if (cur == 0) {
4926
579
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4927
579
                       "Comment not terminated \n<!--%.50s\n", buf);
4928
44.3k
    } else if (!IS_CHAR(cur)) {
4929
114
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4930
114
                          "xmlParseComment: invalid xmlChar value %d\n",
4931
114
                    cur);
4932
44.2k
    } else {
4933
44.2k
        NEXT;
4934
44.2k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4935
44.2k
      (!ctxt->disableSAX))
4936
39.1k
      ctxt->sax->comment(ctxt->userData, buf);
4937
44.2k
    }
4938
44.9k
    xmlFree(buf);
4939
44.9k
    return;
4940
470
not_terminated:
4941
470
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4942
470
       "Comment not terminated\n", NULL);
4943
470
    xmlFree(buf);
4944
470
}
4945
4946
/**
4947
 * Parse an XML (SGML) comment. Always consumes '<!'.
4948
 *
4949
 * @deprecated Internal function, don't use.
4950
 *
4951
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4952
 *  must not occur within comments. "
4953
 *
4954
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4955
 * @param ctxt  an XML parser context
4956
 */
4957
void
4958
324k
xmlParseComment(xmlParserCtxt *ctxt) {
4959
324k
    xmlChar *buf = NULL;
4960
324k
    size_t size = XML_PARSER_BUFFER_SIZE;
4961
324k
    size_t len = 0;
4962
324k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4963
0
                       XML_MAX_HUGE_LENGTH :
4964
324k
                       XML_MAX_TEXT_LENGTH;
4965
324k
    const xmlChar *in;
4966
324k
    size_t nbchar = 0;
4967
324k
    int ccol;
4968
4969
    /*
4970
     * Check that there is a comment right here.
4971
     */
4972
324k
    if ((RAW != '<') || (NXT(1) != '!'))
4973
0
        return;
4974
324k
    SKIP(2);
4975
324k
    if ((RAW != '-') || (NXT(1) != '-'))
4976
19
        return;
4977
324k
    SKIP(2);
4978
324k
    GROW;
4979
4980
    /*
4981
     * Accelerated common case where input don't need to be
4982
     * modified before passing it to the handler.
4983
     */
4984
324k
    in = ctxt->input->cur;
4985
324k
    do {
4986
324k
  if (*in == 0xA) {
4987
19.9k
      do {
4988
19.9k
    ctxt->input->line++; ctxt->input->col = 1;
4989
19.9k
    in++;
4990
19.9k
      } while (*in == 0xA);
4991
18.8k
  }
4992
1.25M
get_more:
4993
1.25M
        ccol = ctxt->input->col;
4994
5.45M
  while (((*in > '-') && (*in <= 0x7F)) ||
4995
5.45M
         ((*in >= 0x20) && (*in < '-')) ||
4996
5.45M
         (*in == 0x09)) {
4997
4.20M
        in++;
4998
4.20M
        ccol++;
4999
4.20M
  }
5000
1.25M
  ctxt->input->col = ccol;
5001
1.25M
  if (*in == 0xA) {
5002
190k
      do {
5003
190k
    ctxt->input->line++; ctxt->input->col = 1;
5004
190k
    in++;
5005
190k
      } while (*in == 0xA);
5006
62.3k
      goto get_more;
5007
62.3k
  }
5008
1.18M
  nbchar = in - ctxt->input->cur;
5009
  /*
5010
   * save current set of data
5011
   */
5012
1.18M
  if (nbchar > 0) {
5013
1.16M
            if (nbchar > maxLength - len) {
5014
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5015
0
                                  "Comment too big found", NULL);
5016
0
                xmlFree(buf);
5017
0
                return;
5018
0
            }
5019
1.16M
            if (buf == NULL) {
5020
309k
                if ((*in == '-') && (in[1] == '-'))
5021
15.0k
                    size = nbchar + 1;
5022
294k
                else
5023
294k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5024
309k
                buf = xmlMalloc(size);
5025
309k
                if (buf == NULL) {
5026
0
                    xmlErrMemory(ctxt);
5027
0
                    return;
5028
0
                }
5029
309k
                len = 0;
5030
853k
            } else if (len + nbchar + 1 >= size) {
5031
5.79k
                xmlChar *new_buf;
5032
5.79k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5033
5.79k
                new_buf = xmlRealloc(buf, size);
5034
5.79k
                if (new_buf == NULL) {
5035
0
                    xmlErrMemory(ctxt);
5036
0
                    xmlFree(buf);
5037
0
                    return;
5038
0
                }
5039
5.79k
                buf = new_buf;
5040
5.79k
            }
5041
1.16M
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5042
1.16M
            len += nbchar;
5043
1.16M
            buf[len] = 0;
5044
1.16M
  }
5045
1.18M
  ctxt->input->cur = in;
5046
1.18M
  if (*in == 0xA) {
5047
0
      in++;
5048
0
      ctxt->input->line++; ctxt->input->col = 1;
5049
0
  }
5050
1.18M
  if (*in == 0xD) {
5051
48.0k
      in++;
5052
48.0k
      if (*in == 0xA) {
5053
14.1k
    ctxt->input->cur = in;
5054
14.1k
    in++;
5055
14.1k
    ctxt->input->line++; ctxt->input->col = 1;
5056
14.1k
    goto get_more;
5057
14.1k
      }
5058
33.9k
      in--;
5059
33.9k
  }
5060
1.17M
  SHRINK;
5061
1.17M
  GROW;
5062
1.17M
  in = ctxt->input->cur;
5063
1.17M
  if (*in == '-') {
5064
1.12M
      if (in[1] == '-') {
5065
789k
          if (in[2] == '>') {
5066
278k
        SKIP(3);
5067
278k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5068
278k
            (!ctxt->disableSAX)) {
5069
260k
      if (buf != NULL)
5070
259k
          ctxt->sax->comment(ctxt->userData, buf);
5071
1.57k
      else
5072
1.57k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5073
260k
        }
5074
278k
        if (buf != NULL)
5075
268k
            xmlFree(buf);
5076
278k
        return;
5077
278k
    }
5078
511k
    if (buf != NULL) {
5079
509k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5080
509k
                          "Double hyphen within comment: "
5081
509k
                                      "<!--%.50s\n",
5082
509k
              buf);
5083
509k
    } else
5084
2.16k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
2.16k
                          "Double hyphen within comment\n", NULL);
5086
511k
    in++;
5087
511k
    ctxt->input->col++;
5088
511k
      }
5089
851k
      in++;
5090
851k
      ctxt->input->col++;
5091
851k
      goto get_more;
5092
1.12M
  }
5093
1.17M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5094
45.4k
    xmlParseCommentComplex(ctxt, buf, len, size);
5095
45.4k
}
5096
5097
5098
/**
5099
 * Parse the name of a PI
5100
 *
5101
 * @deprecated Internal function, don't use.
5102
 *
5103
 *     [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5104
 *
5105
 * @param ctxt  an XML parser context
5106
 * @returns the PITarget name or NULL
5107
 */
5108
5109
const xmlChar *
5110
616k
xmlParsePITarget(xmlParserCtxt *ctxt) {
5111
616k
    const xmlChar *name;
5112
5113
616k
    name = xmlParseName(ctxt);
5114
616k
    if ((name != NULL) &&
5115
616k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5116
616k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5117
616k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5118
17.1k
  int i;
5119
17.1k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5120
17.1k
      (name[2] == 'l') && (name[3] == 0)) {
5121
2.12k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5122
2.12k
     "XML declaration allowed only at the start of the document\n");
5123
2.12k
      return(name);
5124
14.9k
  } else if (name[3] == 0) {
5125
423
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5126
423
      return(name);
5127
423
  }
5128
38.4k
  for (i = 0;;i++) {
5129
38.4k
      if (xmlW3CPIs[i] == NULL) break;
5130
29.0k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5131
5.13k
          return(name);
5132
29.0k
  }
5133
9.41k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5134
9.41k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5135
9.41k
          NULL, NULL);
5136
9.41k
    }
5137
608k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5138
257
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5139
257
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5140
257
    }
5141
608k
    return(name);
5142
616k
}
5143
5144
#ifdef LIBXML_CATALOG_ENABLED
5145
/**
5146
 * Parse an XML Catalog Processing Instruction.
5147
 *
5148
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5149
 *
5150
 * Occurs only if allowed by the user and if happening in the Misc
5151
 * part of the document before any doctype information
5152
 * This will add the given catalog to the parsing context in order
5153
 * to be used if there is a resolution need further down in the document
5154
 *
5155
 * @param ctxt  an XML parser context
5156
 * @param catalog  the PI value string
5157
 */
5158
5159
static void
5160
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5161
0
    xmlChar *URL = NULL;
5162
0
    const xmlChar *tmp, *base;
5163
0
    xmlChar marker;
5164
5165
0
    tmp = catalog;
5166
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5167
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5168
0
  goto error;
5169
0
    tmp += 7;
5170
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5171
0
    if (*tmp != '=') {
5172
0
  return;
5173
0
    }
5174
0
    tmp++;
5175
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5176
0
    marker = *tmp;
5177
0
    if ((marker != '\'') && (marker != '"'))
5178
0
  goto error;
5179
0
    tmp++;
5180
0
    base = tmp;
5181
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5182
0
    if (*tmp == 0)
5183
0
  goto error;
5184
0
    URL = xmlStrndup(base, tmp - base);
5185
0
    tmp++;
5186
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5187
0
    if (*tmp != 0)
5188
0
  goto error;
5189
5190
0
    if (URL != NULL) {
5191
        /*
5192
         * Unfortunately, the catalog API doesn't report OOM errors.
5193
         * xmlGetLastError isn't very helpful since we don't know
5194
         * where the last error came from. We'd have to reset it
5195
         * before this call and restore it afterwards.
5196
         */
5197
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5198
0
  xmlFree(URL);
5199
0
    }
5200
0
    return;
5201
5202
0
error:
5203
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5204
0
            "Catalog PI syntax error: %s\n",
5205
0
      catalog, NULL);
5206
0
    if (URL != NULL)
5207
0
  xmlFree(URL);
5208
0
}
5209
#endif
5210
5211
/**
5212
 * Parse an XML Processing Instruction.
5213
 *
5214
 * @deprecated Internal function, don't use.
5215
 *
5216
 *     [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5217
 *
5218
 * The processing is transferred to SAX once parsed.
5219
 *
5220
 * @param ctxt  an XML parser context
5221
 */
5222
5223
void
5224
616k
xmlParsePI(xmlParserCtxt *ctxt) {
5225
616k
    xmlChar *buf = NULL;
5226
616k
    size_t len = 0;
5227
616k
    size_t size = XML_PARSER_BUFFER_SIZE;
5228
616k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5229
0
                       XML_MAX_HUGE_LENGTH :
5230
616k
                       XML_MAX_TEXT_LENGTH;
5231
616k
    int cur, l;
5232
616k
    const xmlChar *target;
5233
5234
616k
    if ((RAW == '<') && (NXT(1) == '?')) {
5235
  /*
5236
   * this is a Processing Instruction.
5237
   */
5238
616k
  SKIP(2);
5239
5240
  /*
5241
   * Parse the target name and check for special support like
5242
   * namespace.
5243
   */
5244
616k
        target = xmlParsePITarget(ctxt);
5245
616k
  if (target != NULL) {
5246
609k
      if ((RAW == '?') && (NXT(1) == '>')) {
5247
421k
    SKIP(2);
5248
5249
    /*
5250
     * SAX: PI detected.
5251
     */
5252
421k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5253
421k
        (ctxt->sax->processingInstruction != NULL))
5254
0
        ctxt->sax->processingInstruction(ctxt->userData,
5255
0
                                         target, NULL);
5256
421k
    return;
5257
421k
      }
5258
188k
      buf = xmlMalloc(size);
5259
188k
      if (buf == NULL) {
5260
0
    xmlErrMemory(ctxt);
5261
0
    return;
5262
0
      }
5263
188k
      if (SKIP_BLANKS == 0) {
5264
13.2k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5265
13.2k
        "ParsePI: PI %s space expected\n", target);
5266
13.2k
      }
5267
188k
      cur = xmlCurrentCharRecover(ctxt, &l);
5268
17.2M
      while (IS_CHAR(cur) && /* checked */
5269
17.2M
       ((cur != '?') || (NXT(1) != '>'))) {
5270
17.0M
    if (len + 5 >= size) {
5271
32.3k
        xmlChar *tmp;
5272
32.3k
                    int newSize;
5273
5274
32.3k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5275
32.3k
                    if (newSize < 0) {
5276
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5277
0
                                          "PI %s too big found", target);
5278
0
                        xmlFree(buf);
5279
0
                        return;
5280
0
                    }
5281
32.3k
        tmp = xmlRealloc(buf, newSize);
5282
32.3k
        if (tmp == NULL) {
5283
0
      xmlErrMemory(ctxt);
5284
0
      xmlFree(buf);
5285
0
      return;
5286
0
        }
5287
32.3k
        buf = tmp;
5288
32.3k
                    size = newSize;
5289
32.3k
    }
5290
17.0M
    COPY_BUF(buf, len, cur);
5291
17.0M
    NEXTL(l);
5292
17.0M
    cur = xmlCurrentCharRecover(ctxt, &l);
5293
17.0M
      }
5294
188k
      buf[len] = 0;
5295
188k
      if (cur != '?') {
5296
1.64k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5297
1.64k
          "ParsePI: PI %s never end ...\n", target);
5298
186k
      } else {
5299
186k
    SKIP(2);
5300
5301
186k
#ifdef LIBXML_CATALOG_ENABLED
5302
186k
    if ((ctxt->inSubset == 0) &&
5303
186k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5304
81.2k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5305
5306
81.2k
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5307
81.2k
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5308
0
       (allow == XML_CATA_ALLOW_ALL)))
5309
0
      xmlParseCatalogPI(ctxt, buf);
5310
81.2k
    }
5311
186k
#endif
5312
5313
    /*
5314
     * SAX: PI detected.
5315
     */
5316
186k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5317
186k
        (ctxt->sax->processingInstruction != NULL))
5318
0
        ctxt->sax->processingInstruction(ctxt->userData,
5319
0
                                         target, buf);
5320
186k
      }
5321
188k
      xmlFree(buf);
5322
188k
  } else {
5323
6.67k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5324
6.67k
  }
5325
616k
    }
5326
616k
}
5327
5328
/**
5329
 * Parse a notation declaration. Always consumes '<!'.
5330
 *
5331
 * @deprecated Internal function, don't use.
5332
 *
5333
 *     [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID)
5334
 *                           S? '>'
5335
 *
5336
 * Hence there is actually 3 choices:
5337
 *
5338
 *     'PUBLIC' S PubidLiteral
5339
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5340
 *     'SYSTEM' S SystemLiteral
5341
 *
5342
 * See the NOTE on #xmlParseExternalID.
5343
 *
5344
 * @param ctxt  an XML parser context
5345
 */
5346
5347
void
5348
9.20k
xmlParseNotationDecl(xmlParserCtxt *ctxt) {
5349
9.20k
    const xmlChar *name;
5350
9.20k
    xmlChar *Pubid;
5351
9.20k
    xmlChar *Systemid;
5352
5353
9.20k
    if ((CUR != '<') || (NXT(1) != '!'))
5354
0
        return;
5355
9.20k
    SKIP(2);
5356
5357
9.20k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5358
9.12k
#ifdef LIBXML_VALID_ENABLED
5359
9.12k
  int oldInputNr = ctxt->inputNr;
5360
9.12k
#endif
5361
5362
9.12k
  SKIP(8);
5363
9.12k
  if (SKIP_BLANKS_PE == 0) {
5364
247
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5365
247
         "Space required after '<!NOTATION'\n");
5366
247
      return;
5367
247
  }
5368
5369
8.87k
        name = xmlParseName(ctxt);
5370
8.87k
  if (name == NULL) {
5371
482
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5372
482
      return;
5373
482
  }
5374
8.39k
  if (xmlStrchr(name, ':') != NULL) {
5375
2
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5376
2
         "colons are forbidden from notation names '%s'\n",
5377
2
         name, NULL, NULL);
5378
2
  }
5379
8.39k
  if (SKIP_BLANKS_PE == 0) {
5380
173
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5381
173
         "Space required after the NOTATION name'\n");
5382
173
      return;
5383
173
  }
5384
5385
  /*
5386
   * Parse the IDs.
5387
   */
5388
8.21k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5389
8.21k
  SKIP_BLANKS_PE;
5390
5391
8.21k
  if (RAW == '>') {
5392
7.89k
#ifdef LIBXML_VALID_ENABLED
5393
7.89k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5394
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5395
0
                           "Notation declaration doesn't start and stop"
5396
0
                                 " in the same entity\n",
5397
0
                                 NULL, NULL);
5398
0
      }
5399
7.89k
#endif
5400
7.89k
      NEXT;
5401
7.89k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5402
7.89k
    (ctxt->sax->notationDecl != NULL))
5403
1.35k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5404
7.89k
  } else {
5405
326
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5406
326
  }
5407
8.21k
  if (Systemid != NULL) xmlFree(Systemid);
5408
8.21k
  if (Pubid != NULL) xmlFree(Pubid);
5409
8.21k
    }
5410
9.20k
}
5411
5412
/**
5413
 * Parse an entity declaration. Always consumes '<!'.
5414
 *
5415
 * @deprecated Internal function, don't use.
5416
 *
5417
 *     [70] EntityDecl ::= GEDecl | PEDecl
5418
 *
5419
 *     [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5420
 *
5421
 *     [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5422
 *
5423
 *     [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5424
 *
5425
 *     [74] PEDef ::= EntityValue | ExternalID
5426
 *
5427
 *     [76] NDataDecl ::= S 'NDATA' S Name
5428
 *
5429
 * [ VC: Notation Declared ]
5430
 * The Name must match the declared name of a notation.
5431
 *
5432
 * @param ctxt  an XML parser context
5433
 */
5434
5435
void
5436
72.8k
xmlParseEntityDecl(xmlParserCtxt *ctxt) {
5437
72.8k
    const xmlChar *name = NULL;
5438
72.8k
    xmlChar *value = NULL;
5439
72.8k
    xmlChar *URI = NULL, *literal = NULL;
5440
72.8k
    const xmlChar *ndata = NULL;
5441
72.8k
    int isParameter = 0;
5442
72.8k
    xmlChar *orig = NULL;
5443
5444
72.8k
    if ((CUR != '<') || (NXT(1) != '!'))
5445
0
        return;
5446
72.8k
    SKIP(2);
5447
5448
    /* GROW; done in the caller */
5449
72.8k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5450
72.6k
#ifdef LIBXML_VALID_ENABLED
5451
72.6k
  int oldInputNr = ctxt->inputNr;
5452
72.6k
#endif
5453
5454
72.6k
  SKIP(6);
5455
72.6k
  if (SKIP_BLANKS_PE == 0) {
5456
13.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5457
13.9k
         "Space required after '<!ENTITY'\n");
5458
13.9k
  }
5459
5460
72.6k
  if (RAW == '%') {
5461
10.1k
      NEXT;
5462
10.1k
      if (SKIP_BLANKS_PE == 0) {
5463
6.71k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5464
6.71k
             "Space required after '%%'\n");
5465
6.71k
      }
5466
10.1k
      isParameter = 1;
5467
10.1k
  }
5468
5469
72.6k
        name = xmlParseName(ctxt);
5470
72.6k
  if (name == NULL) {
5471
191
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5472
191
                     "xmlParseEntityDecl: no name\n");
5473
191
            return;
5474
191
  }
5475
72.4k
  if (xmlStrchr(name, ':') != NULL) {
5476
573
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5477
573
         "colons are forbidden from entities names '%s'\n",
5478
573
         name, NULL, NULL);
5479
573
  }
5480
72.4k
  if (SKIP_BLANKS_PE == 0) {
5481
8.81k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5482
8.81k
         "Space required after the entity name\n");
5483
8.81k
  }
5484
5485
  /*
5486
   * handle the various case of definitions...
5487
   */
5488
72.4k
  if (isParameter) {
5489
10.1k
      if ((RAW == '"') || (RAW == '\'')) {
5490
8.17k
          value = xmlParseEntityValue(ctxt, &orig);
5491
8.17k
    if (value) {
5492
8.13k
        if ((ctxt->sax != NULL) &&
5493
8.13k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5494
317
      ctxt->sax->entityDecl(ctxt->userData, name,
5495
317
                        XML_INTERNAL_PARAMETER_ENTITY,
5496
317
            NULL, NULL, value);
5497
8.13k
    }
5498
8.17k
      } else {
5499
1.95k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5500
1.95k
    if ((URI == NULL) && (literal == NULL)) {
5501
130
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5502
130
    }
5503
1.95k
    if (URI) {
5504
1.42k
                    if (xmlStrchr(URI, '#')) {
5505
328
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5506
1.09k
                    } else {
5507
1.09k
                        if ((ctxt->sax != NULL) &&
5508
1.09k
                            (!ctxt->disableSAX) &&
5509
1.09k
                            (ctxt->sax->entityDecl != NULL))
5510
462
                            ctxt->sax->entityDecl(ctxt->userData, name,
5511
462
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5512
462
                                        literal, URI, NULL);
5513
1.09k
                    }
5514
1.42k
    }
5515
1.95k
      }
5516
62.3k
  } else {
5517
62.3k
      if ((RAW == '"') || (RAW == '\'')) {
5518
54.6k
          value = xmlParseEntityValue(ctxt, &orig);
5519
54.6k
    if ((ctxt->sax != NULL) &&
5520
54.6k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5521
4.55k
        ctxt->sax->entityDecl(ctxt->userData, name,
5522
4.55k
        XML_INTERNAL_GENERAL_ENTITY,
5523
4.55k
        NULL, NULL, value);
5524
    /*
5525
     * For expat compatibility in SAX mode.
5526
     */
5527
54.6k
    if ((ctxt->myDoc == NULL) ||
5528
54.6k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5529
0
        if (ctxt->myDoc == NULL) {
5530
0
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5531
0
      if (ctxt->myDoc == NULL) {
5532
0
          xmlErrMemory(ctxt);
5533
0
          goto done;
5534
0
      }
5535
0
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5536
0
        }
5537
0
        if (ctxt->myDoc->intSubset == NULL) {
5538
0
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5539
0
              BAD_CAST "fake", NULL, NULL);
5540
0
                        if (ctxt->myDoc->intSubset == NULL) {
5541
0
                            xmlErrMemory(ctxt);
5542
0
                            goto done;
5543
0
                        }
5544
0
                    }
5545
5546
0
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5547
0
                    NULL, NULL, value);
5548
0
    }
5549
54.6k
      } else {
5550
7.73k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5551
7.73k
    if ((URI == NULL) && (literal == NULL)) {
5552
380
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5553
380
    }
5554
7.73k
    if (URI) {
5555
7.06k
                    if (xmlStrchr(URI, '#')) {
5556
244
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5557
244
                    }
5558
7.06k
    }
5559
7.73k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5560
452
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5561
452
           "Space required before 'NDATA'\n");
5562
452
    }
5563
7.73k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5564
801
        SKIP(5);
5565
801
        if (SKIP_BLANKS_PE == 0) {
5566
16
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5567
16
               "Space required after 'NDATA'\n");
5568
16
        }
5569
801
        ndata = xmlParseName(ctxt);
5570
801
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5571
801
            (ctxt->sax->unparsedEntityDecl != NULL))
5572
373
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5573
373
            literal, URI, ndata);
5574
6.93k
    } else {
5575
6.93k
        if ((ctxt->sax != NULL) &&
5576
6.93k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5577
4.00k
      ctxt->sax->entityDecl(ctxt->userData, name,
5578
4.00k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5579
4.00k
            literal, URI, NULL);
5580
        /*
5581
         * For expat compatibility in SAX mode.
5582
         * assuming the entity replacement was asked for
5583
         */
5584
6.93k
        if ((ctxt->replaceEntities != 0) &&
5585
6.93k
      ((ctxt->myDoc == NULL) ||
5586
0
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5587
0
      if (ctxt->myDoc == NULL) {
5588
0
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5589
0
          if (ctxt->myDoc == NULL) {
5590
0
              xmlErrMemory(ctxt);
5591
0
        goto done;
5592
0
          }
5593
0
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5594
0
      }
5595
5596
0
      if (ctxt->myDoc->intSubset == NULL) {
5597
0
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5598
0
            BAD_CAST "fake", NULL, NULL);
5599
0
                            if (ctxt->myDoc->intSubset == NULL) {
5600
0
                                xmlErrMemory(ctxt);
5601
0
                                goto done;
5602
0
                            }
5603
0
                        }
5604
0
      xmlSAX2EntityDecl(ctxt, name,
5605
0
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5606
0
                  literal, URI, NULL);
5607
0
        }
5608
6.93k
    }
5609
7.73k
      }
5610
62.3k
  }
5611
72.4k
  SKIP_BLANKS_PE;
5612
72.4k
  if (RAW != '>') {
5613
1.57k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5614
1.57k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5615
1.57k
      xmlHaltParser(ctxt);
5616
70.8k
  } else {
5617
70.8k
#ifdef LIBXML_VALID_ENABLED
5618
70.8k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5619
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5620
0
                           "Entity declaration doesn't start and stop in"
5621
0
                                 " the same entity\n",
5622
0
                                 NULL, NULL);
5623
0
      }
5624
70.8k
#endif
5625
70.8k
      NEXT;
5626
70.8k
  }
5627
72.4k
  if (orig != NULL) {
5628
      /*
5629
       * Ugly mechanism to save the raw entity value.
5630
       */
5631
62.5k
      xmlEntityPtr cur = NULL;
5632
5633
62.5k
      if (isParameter) {
5634
8.13k
          if ((ctxt->sax != NULL) &&
5635
8.13k
        (ctxt->sax->getParameterEntity != NULL))
5636
8.13k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5637
54.4k
      } else {
5638
54.4k
          if ((ctxt->sax != NULL) &&
5639
54.4k
        (ctxt->sax->getEntity != NULL))
5640
54.4k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5641
54.4k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5642
43.4k
        cur = xmlSAX2GetEntity(ctxt, name);
5643
43.4k
    }
5644
54.4k
      }
5645
62.5k
            if ((cur != NULL) && (cur->orig == NULL)) {
5646
3.75k
    cur->orig = orig;
5647
3.75k
                orig = NULL;
5648
3.75k
      }
5649
62.5k
  }
5650
5651
72.4k
done:
5652
72.4k
  if (value != NULL) xmlFree(value);
5653
72.4k
  if (URI != NULL) xmlFree(URI);
5654
72.4k
  if (literal != NULL) xmlFree(literal);
5655
72.4k
        if (orig != NULL) xmlFree(orig);
5656
72.4k
    }
5657
72.8k
}
5658
5659
/**
5660
 * Parse an attribute default declaration
5661
 *
5662
 * @deprecated Internal function, don't use.
5663
 *
5664
 *     [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5665
 *
5666
 * [ VC: Required Attribute ]
5667
 * if the default declaration is the keyword \#REQUIRED, then the
5668
 * attribute must be specified for all elements of the type in the
5669
 * attribute-list declaration.
5670
 *
5671
 * [ VC: Attribute Default Legal ]
5672
 * The declared default value must meet the lexical constraints of
5673
 * the declared attribute type c.f. #xmlValidateAttributeDecl
5674
 *
5675
 * [ VC: Fixed Attribute Default ]
5676
 * if an attribute has a default value declared with the \#FIXED
5677
 * keyword, instances of that attribute must match the default value.
5678
 *
5679
 * [ WFC: No < in Attribute Values ]
5680
 * handled in #xmlParseAttValue
5681
 *
5682
 * @param ctxt  an XML parser context
5683
 * @param value  Receive a possible fixed default value for the attribute
5684
 * @returns XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5685
 *          or XML_ATTRIBUTE_FIXED.
5686
 */
5687
5688
int
5689
34.5k
xmlParseDefaultDecl(xmlParserCtxt *ctxt, xmlChar **value) {
5690
34.5k
    int val;
5691
34.5k
    xmlChar *ret;
5692
5693
34.5k
    *value = NULL;
5694
34.5k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5695
839
  SKIP(9);
5696
839
  return(XML_ATTRIBUTE_REQUIRED);
5697
839
    }
5698
33.7k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5699
8.35k
  SKIP(8);
5700
8.35k
  return(XML_ATTRIBUTE_IMPLIED);
5701
8.35k
    }
5702
25.3k
    val = XML_ATTRIBUTE_NONE;
5703
25.3k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5704
1.65k
  SKIP(6);
5705
1.65k
  val = XML_ATTRIBUTE_FIXED;
5706
1.65k
  if (SKIP_BLANKS_PE == 0) {
5707
573
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5708
573
         "Space required after '#FIXED'\n");
5709
573
  }
5710
1.65k
    }
5711
25.3k
    ret = xmlParseAttValue(ctxt);
5712
25.3k
    if (ret == NULL) {
5713
2.57k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5714
2.57k
           "Attribute default value declaration error\n");
5715
2.57k
    } else
5716
22.8k
        *value = ret;
5717
25.3k
    return(val);
5718
33.7k
}
5719
5720
/**
5721
 * Parse an Notation attribute type.
5722
 *
5723
 * @deprecated Internal function, don't use.
5724
 *
5725
 * Note: the leading 'NOTATION' S part has already being parsed...
5726
 *
5727
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5728
 *
5729
 * [ VC: Notation Attributes ]
5730
 * Values of this type must match one of the notation names included
5731
 * in the declaration; all notation names in the declaration must be declared.
5732
 *
5733
 * @param ctxt  an XML parser context
5734
 * @returns the notation attribute tree built while parsing
5735
 */
5736
5737
xmlEnumeration *
5738
373
xmlParseNotationType(xmlParserCtxt *ctxt) {
5739
373
    const xmlChar *name;
5740
373
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5741
5742
373
    if (RAW != '(') {
5743
15
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5744
15
  return(NULL);
5745
15
    }
5746
926
    do {
5747
926
        NEXT;
5748
926
  SKIP_BLANKS_PE;
5749
926
        name = xmlParseName(ctxt);
5750
926
  if (name == NULL) {
5751
14
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5752
14
         "Name expected in NOTATION declaration\n");
5753
14
            xmlFreeEnumeration(ret);
5754
14
      return(NULL);
5755
14
  }
5756
912
        tmp = NULL;
5757
912
#ifdef LIBXML_VALID_ENABLED
5758
912
        if (ctxt->validate) {
5759
0
            tmp = ret;
5760
0
            while (tmp != NULL) {
5761
0
                if (xmlStrEqual(name, tmp->name)) {
5762
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5763
0
              "standalone: attribute notation value token %s duplicated\n",
5764
0
                                     name, NULL);
5765
0
                    if (!xmlDictOwns(ctxt->dict, name))
5766
0
                        xmlFree((xmlChar *) name);
5767
0
                    break;
5768
0
                }
5769
0
                tmp = tmp->next;
5770
0
            }
5771
0
        }
5772
912
#endif /* LIBXML_VALID_ENABLED */
5773
912
  if (tmp == NULL) {
5774
912
      cur = xmlCreateEnumeration(name);
5775
912
      if (cur == NULL) {
5776
0
                xmlErrMemory(ctxt);
5777
0
                xmlFreeEnumeration(ret);
5778
0
                return(NULL);
5779
0
            }
5780
912
      if (last == NULL) ret = last = cur;
5781
558
      else {
5782
558
    last->next = cur;
5783
558
    last = cur;
5784
558
      }
5785
912
  }
5786
912
  SKIP_BLANKS_PE;
5787
912
    } while (RAW == '|');
5788
344
    if (RAW != ')') {
5789
31
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5790
31
        xmlFreeEnumeration(ret);
5791
31
  return(NULL);
5792
31
    }
5793
313
    NEXT;
5794
313
    return(ret);
5795
344
}
5796
5797
/**
5798
 * Parse an Enumeration attribute type.
5799
 *
5800
 * @deprecated Internal function, don't use.
5801
 *
5802
 *     [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5803
 *
5804
 * [ VC: Enumeration ]
5805
 * Values of this type must match one of the Nmtoken tokens in
5806
 * the declaration
5807
 *
5808
 * @param ctxt  an XML parser context
5809
 * @returns the enumeration attribute tree built while parsing
5810
 */
5811
5812
xmlEnumeration *
5813
13.2k
xmlParseEnumerationType(xmlParserCtxt *ctxt) {
5814
13.2k
    xmlChar *name;
5815
13.2k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5816
5817
13.2k
    if (RAW != '(') {
5818
638
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5819
638
  return(NULL);
5820
638
    }
5821
23.3k
    do {
5822
23.3k
        NEXT;
5823
23.3k
  SKIP_BLANKS_PE;
5824
23.3k
        name = xmlParseNmtoken(ctxt);
5825
23.3k
  if (name == NULL) {
5826
135
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5827
135
      return(ret);
5828
135
  }
5829
23.1k
        tmp = NULL;
5830
23.1k
#ifdef LIBXML_VALID_ENABLED
5831
23.1k
        if (ctxt->validate) {
5832
0
            tmp = ret;
5833
0
            while (tmp != NULL) {
5834
0
                if (xmlStrEqual(name, tmp->name)) {
5835
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5836
0
              "standalone: attribute enumeration value token %s duplicated\n",
5837
0
                                     name, NULL);
5838
0
                    if (!xmlDictOwns(ctxt->dict, name))
5839
0
                        xmlFree(name);
5840
0
                    break;
5841
0
                }
5842
0
                tmp = tmp->next;
5843
0
            }
5844
0
        }
5845
23.1k
#endif /* LIBXML_VALID_ENABLED */
5846
23.1k
  if (tmp == NULL) {
5847
23.1k
      cur = xmlCreateEnumeration(name);
5848
23.1k
      if (!xmlDictOwns(ctxt->dict, name))
5849
23.1k
    xmlFree(name);
5850
23.1k
      if (cur == NULL) {
5851
0
                xmlErrMemory(ctxt);
5852
0
                xmlFreeEnumeration(ret);
5853
0
                return(NULL);
5854
0
            }
5855
23.1k
      if (last == NULL) ret = last = cur;
5856
10.6k
      else {
5857
10.6k
    last->next = cur;
5858
10.6k
    last = cur;
5859
10.6k
      }
5860
23.1k
  }
5861
23.1k
  SKIP_BLANKS_PE;
5862
23.1k
    } while (RAW == '|');
5863
12.4k
    if (RAW != ')') {
5864
603
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5865
603
  return(ret);
5866
603
    }
5867
11.8k
    NEXT;
5868
11.8k
    return(ret);
5869
12.4k
}
5870
5871
/**
5872
 * Parse an Enumerated attribute type.
5873
 *
5874
 * @deprecated Internal function, don't use.
5875
 *
5876
 *     [57] EnumeratedType ::= NotationType | Enumeration
5877
 *
5878
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5879
 *
5880
 * @param ctxt  an XML parser context
5881
 * @param tree  the enumeration tree built while parsing
5882
 * @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5883
 */
5884
5885
int
5886
13.6k
xmlParseEnumeratedType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5887
13.6k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5888
378
  SKIP(8);
5889
378
  if (SKIP_BLANKS_PE == 0) {
5890
5
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5891
5
         "Space required after 'NOTATION'\n");
5892
5
      return(0);
5893
5
  }
5894
373
  *tree = xmlParseNotationType(ctxt);
5895
373
  if (*tree == NULL) return(0);
5896
313
  return(XML_ATTRIBUTE_NOTATION);
5897
373
    }
5898
13.2k
    *tree = xmlParseEnumerationType(ctxt);
5899
13.2k
    if (*tree == NULL) return(0);
5900
12.5k
    return(XML_ATTRIBUTE_ENUMERATION);
5901
13.2k
}
5902
5903
/**
5904
 * Parse the Attribute list def for an element
5905
 *
5906
 * @deprecated Internal function, don't use.
5907
 *
5908
 *     [54] AttType ::= StringType | TokenizedType | EnumeratedType
5909
 *
5910
 *     [55] StringType ::= 'CDATA'
5911
 *
5912
 *     [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5913
 *                            'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5914
 *
5915
 * Validity constraints for attribute values syntax are checked in
5916
 * #xmlValidateAttributeValue
5917
 *
5918
 * [ VC: ID ]
5919
 * Values of type ID must match the Name production. A name must not
5920
 * appear more than once in an XML document as a value of this type;
5921
 * i.e., ID values must uniquely identify the elements which bear them.
5922
 *
5923
 * [ VC: One ID per Element Type ]
5924
 * No element type may have more than one ID attribute specified.
5925
 *
5926
 * [ VC: ID Attribute Default ]
5927
 * An ID attribute must have a declared default of \#IMPLIED or \#REQUIRED.
5928
 *
5929
 * [ VC: IDREF ]
5930
 * Values of type IDREF must match the Name production, and values
5931
 * of type IDREFS must match Names; each IDREF Name must match the value
5932
 * of an ID attribute on some element in the XML document; i.e. IDREF
5933
 * values must match the value of some ID attribute.
5934
 *
5935
 * [ VC: Entity Name ]
5936
 * Values of type ENTITY must match the Name production, values
5937
 * of type ENTITIES must match Names; each Entity Name must match the
5938
 * name of an unparsed entity declared in the DTD.
5939
 *
5940
 * [ VC: Name Token ]
5941
 * Values of type NMTOKEN must match the Nmtoken production; values
5942
 * of type NMTOKENS must match Nmtokens.
5943
 *
5944
 * @param ctxt  an XML parser context
5945
 * @param tree  the enumeration tree built while parsing
5946
 * @returns the attribute type
5947
 */
5948
int
5949
36.1k
xmlParseAttributeType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5950
36.1k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5951
293
  SKIP(5);
5952
293
  return(XML_ATTRIBUTE_CDATA);
5953
35.8k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5954
7.66k
  SKIP(6);
5955
7.66k
  return(XML_ATTRIBUTE_IDREFS);
5956
28.2k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5957
1.50k
  SKIP(5);
5958
1.50k
  return(XML_ATTRIBUTE_IDREF);
5959
26.7k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5960
6.76k
        SKIP(2);
5961
6.76k
  return(XML_ATTRIBUTE_ID);
5962
19.9k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5963
240
  SKIP(6);
5964
240
  return(XML_ATTRIBUTE_ENTITY);
5965
19.7k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5966
3.42k
  SKIP(8);
5967
3.42k
  return(XML_ATTRIBUTE_ENTITIES);
5968
16.2k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5969
391
  SKIP(8);
5970
391
  return(XML_ATTRIBUTE_NMTOKENS);
5971
15.8k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5972
2.24k
  SKIP(7);
5973
2.24k
  return(XML_ATTRIBUTE_NMTOKEN);
5974
2.24k
     }
5975
13.6k
     return(xmlParseEnumeratedType(ctxt, tree));
5976
36.1k
}
5977
5978
/**
5979
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5980
 *
5981
 * @deprecated Internal function, don't use.
5982
 *
5983
 *     [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5984
 *
5985
 *     [53] AttDef ::= S Name S AttType S DefaultDecl
5986
 * @param ctxt  an XML parser context
5987
 */
5988
void
5989
16.4k
xmlParseAttributeListDecl(xmlParserCtxt *ctxt) {
5990
16.4k
    const xmlChar *elemName;
5991
16.4k
    const xmlChar *attrName;
5992
16.4k
    xmlEnumerationPtr tree;
5993
5994
16.4k
    if ((CUR != '<') || (NXT(1) != '!'))
5995
0
        return;
5996
16.4k
    SKIP(2);
5997
5998
16.4k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5999
16.3k
#ifdef LIBXML_VALID_ENABLED
6000
16.3k
  int oldInputNr = ctxt->inputNr;
6001
16.3k
#endif
6002
6003
16.3k
  SKIP(7);
6004
16.3k
  if (SKIP_BLANKS_PE == 0) {
6005
1.58k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6006
1.58k
                     "Space required after '<!ATTLIST'\n");
6007
1.58k
  }
6008
16.3k
        elemName = xmlParseName(ctxt);
6009
16.3k
  if (elemName == NULL) {
6010
476
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6011
476
         "ATTLIST: no name for Element\n");
6012
476
      return;
6013
476
  }
6014
15.9k
  SKIP_BLANKS_PE;
6015
15.9k
  GROW;
6016
47.3k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6017
38.6k
      int type;
6018
38.6k
      int def;
6019
38.6k
      xmlChar *defaultValue = NULL;
6020
6021
38.6k
      GROW;
6022
38.6k
            tree = NULL;
6023
38.6k
      attrName = xmlParseName(ctxt);
6024
38.6k
      if (attrName == NULL) {
6025
2.04k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6026
2.04k
             "ATTLIST: no name for Attribute\n");
6027
2.04k
    break;
6028
2.04k
      }
6029
36.5k
      GROW;
6030
36.5k
      if (SKIP_BLANKS_PE == 0) {
6031
400
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6032
400
            "Space required after the attribute name\n");
6033
400
    break;
6034
400
      }
6035
6036
36.1k
      type = xmlParseAttributeType(ctxt, &tree);
6037
36.1k
      if (type <= 0) {
6038
818
          break;
6039
818
      }
6040
6041
35.3k
      GROW;
6042
35.3k
      if (SKIP_BLANKS_PE == 0) {
6043
777
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6044
777
             "Space required after the attribute type\n");
6045
777
          if (tree != NULL)
6046
632
        xmlFreeEnumeration(tree);
6047
777
    break;
6048
777
      }
6049
6050
34.5k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6051
34.5k
      if (def <= 0) {
6052
0
                if (defaultValue != NULL)
6053
0
        xmlFree(defaultValue);
6054
0
          if (tree != NULL)
6055
0
        xmlFreeEnumeration(tree);
6056
0
          break;
6057
0
      }
6058
34.5k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6059
22.8k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6060
6061
34.5k
      GROW;
6062
34.5k
            if (RAW != '>') {
6063
28.3k
    if (SKIP_BLANKS_PE == 0) {
6064
3.16k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6065
3.16k
      "Space required after the attribute default value\n");
6066
3.16k
        if (defaultValue != NULL)
6067
633
      xmlFree(defaultValue);
6068
3.16k
        if (tree != NULL)
6069
492
      xmlFreeEnumeration(tree);
6070
3.16k
        break;
6071
3.16k
    }
6072
28.3k
      }
6073
31.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6074
31.4k
    (ctxt->sax->attributeDecl != NULL))
6075
22.0k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6076
22.0k
                          type, def, defaultValue, tree);
6077
9.33k
      else if (tree != NULL)
6078
3.14k
    xmlFreeEnumeration(tree);
6079
6080
31.4k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6081
31.4k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6082
31.4k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6083
0
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6084
0
      }
6085
31.4k
      if (ctxt->sax2) {
6086
0
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6087
0
      }
6088
31.4k
      if (defaultValue != NULL)
6089
22.1k
          xmlFree(defaultValue);
6090
31.4k
      GROW;
6091
31.4k
  }
6092
15.9k
  if (RAW == '>') {
6093
8.76k
#ifdef LIBXML_VALID_ENABLED
6094
8.76k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6095
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6096
0
                                 "Attribute list declaration doesn't start and"
6097
0
                                 " stop in the same entity\n",
6098
0
                                 NULL, NULL);
6099
0
      }
6100
8.76k
#endif
6101
8.76k
      NEXT;
6102
8.76k
  }
6103
15.9k
    }
6104
16.4k
}
6105
6106
/**
6107
 * Handle PEs and check that we don't pop the entity that started
6108
 * a balanced group.
6109
 *
6110
 * @param ctxt  parser context
6111
 * @param openInputNr  input nr of the entity with opening '('
6112
 */
6113
static void
6114
395k
xmlSkipBlankCharsPEBalanced(xmlParserCtxt *ctxt, int openInputNr) {
6115
395k
    SKIP_BLANKS;
6116
395k
    GROW;
6117
6118
395k
    (void) openInputNr;
6119
6120
395k
    if (!PARSER_EXTERNAL(ctxt) && !PARSER_IN_PE(ctxt))
6121
395k
        return;
6122
6123
0
    while (!PARSER_STOPPED(ctxt)) {
6124
0
        if (ctxt->input->cur >= ctxt->input->end) {
6125
0
#ifdef LIBXML_VALID_ENABLED
6126
0
            if ((ctxt->validate) && (ctxt->inputNr <= openInputNr)) {
6127
0
                xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6128
0
                                 "Element content declaration doesn't start "
6129
0
                                 "and stop in the same entity\n",
6130
0
                                 NULL, NULL);
6131
0
            }
6132
0
#endif
6133
0
            if (PARSER_IN_PE(ctxt))
6134
0
                xmlPopPE(ctxt);
6135
0
            else
6136
0
                break;
6137
0
        } else if (RAW == '%') {
6138
0
            xmlParsePERefInternal(ctxt, 0);
6139
0
        } else {
6140
0
            break;
6141
0
        }
6142
6143
0
        SKIP_BLANKS;
6144
0
        GROW;
6145
0
    }
6146
0
}
6147
6148
/**
6149
 * Parse the declaration for a Mixed Element content
6150
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6151
 *
6152
 * @deprecated Internal function, don't use.
6153
 *
6154
 *     [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6155
 *                    '(' S? '#PCDATA' S? ')'
6156
 *
6157
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6158
 *
6159
 * [ VC: No Duplicate Types ]
6160
 * The same name must not appear more than once in a single
6161
 * mixed-content declaration.
6162
 *
6163
 * @param ctxt  an XML parser context
6164
 * @param openInputNr  the input used for the current entity, needed for
6165
 * boundary checks
6166
 * @returns the list of the xmlElementContent describing the element choices
6167
 */
6168
xmlElementContent *
6169
2.13k
xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int openInputNr) {
6170
2.13k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6171
2.13k
    const xmlChar *elem = NULL;
6172
6173
2.13k
    GROW;
6174
2.13k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6175
2.13k
  SKIP(7);
6176
2.13k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6177
2.13k
  if (RAW == ')') {
6178
1.83k
#ifdef LIBXML_VALID_ENABLED
6179
1.83k
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6180
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6181
0
                                 "Element content declaration doesn't start "
6182
0
                                 "and stop in the same entity\n",
6183
0
                                 NULL, NULL);
6184
0
      }
6185
1.83k
#endif
6186
1.83k
      NEXT;
6187
1.83k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6188
1.83k
      if (ret == NULL)
6189
0
                goto mem_error;
6190
1.83k
      if (RAW == '*') {
6191
44
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6192
44
    NEXT;
6193
44
      }
6194
1.83k
      return(ret);
6195
1.83k
  }
6196
300
  if ((RAW == '(') || (RAW == '|')) {
6197
254
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6198
254
      if (ret == NULL)
6199
0
                goto mem_error;
6200
254
  }
6201
931
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6202
645
      NEXT;
6203
645
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6204
645
            if (n == NULL)
6205
0
                goto mem_error;
6206
645
      if (elem == NULL) {
6207
251
    n->c1 = cur;
6208
251
    if (cur != NULL)
6209
251
        cur->parent = n;
6210
251
    ret = cur = n;
6211
394
      } else {
6212
394
          cur->c2 = n;
6213
394
    n->parent = cur;
6214
394
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6215
394
                if (n->c1 == NULL)
6216
0
                    goto mem_error;
6217
394
    n->c1->parent = n;
6218
394
    cur = n;
6219
394
      }
6220
645
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6221
645
      elem = xmlParseName(ctxt);
6222
645
      if (elem == NULL) {
6223
14
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6224
14
      "xmlParseElementMixedContentDecl : Name expected\n");
6225
14
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6226
14
    return(NULL);
6227
14
      }
6228
631
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6229
631
  }
6230
286
  if ((RAW == ')') && (NXT(1) == '*')) {
6231
208
      if (elem != NULL) {
6232
208
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6233
208
                                   XML_ELEMENT_CONTENT_ELEMENT);
6234
208
    if (cur->c2 == NULL)
6235
0
                    goto mem_error;
6236
208
    cur->c2->parent = cur;
6237
208
            }
6238
208
            if (ret != NULL)
6239
208
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6240
208
#ifdef LIBXML_VALID_ENABLED
6241
208
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6242
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6243
0
                                 "Element content declaration doesn't start "
6244
0
                                 "and stop in the same entity\n",
6245
0
                                 NULL, NULL);
6246
0
      }
6247
208
#endif
6248
208
      SKIP(2);
6249
208
  } else {
6250
78
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6251
78
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6252
78
      return(NULL);
6253
78
  }
6254
6255
286
    } else {
6256
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6257
0
    }
6258
208
    return(ret);
6259
6260
0
mem_error:
6261
0
    xmlErrMemory(ctxt);
6262
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6263
0
    return(NULL);
6264
2.13k
}
6265
6266
/**
6267
 * Parse the declaration for a Mixed Element content
6268
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6269
 *
6270
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6271
 *
6272
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6273
 *
6274
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6275
 *
6276
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6277
 *
6278
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6279
 * TODO Parameter-entity replacement text must be properly nested
6280
 *  with parenthesized groups. That is to say, if either of the
6281
 *  opening or closing parentheses in a choice, seq, or Mixed
6282
 *  construct is contained in the replacement text for a parameter
6283
 *  entity, both must be contained in the same replacement text. For
6284
 *  interoperability, if a parameter-entity reference appears in a
6285
 *  choice, seq, or Mixed construct, its replacement text should not
6286
 *  be empty, and neither the first nor last non-blank character of
6287
 *  the replacement text should be a connector (| or ,).
6288
 *
6289
 * @param ctxt  an XML parser context
6290
 * @param openInputNr  the input used for the current entity, needed for
6291
 * boundary checks
6292
 * @param depth  the level of recursion
6293
 * @returns the tree of xmlElementContent describing the element
6294
 *          hierarchy.
6295
 */
6296
static xmlElementContentPtr
6297
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int openInputNr,
6298
36.4k
                                       int depth) {
6299
36.4k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6300
36.4k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6301
36.4k
    const xmlChar *elem;
6302
36.4k
    xmlChar type = 0;
6303
6304
36.4k
    if (depth > maxDepth) {
6305
4
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6306
4
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6307
4
                "use XML_PARSE_HUGE\n", depth);
6308
4
  return(NULL);
6309
4
    }
6310
36.4k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6311
36.4k
    if (RAW == '(') {
6312
12.1k
        int newInputNr = ctxt->inputNr;
6313
6314
        /* Recurse on first child */
6315
12.1k
  NEXT;
6316
12.1k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6317
12.1k
                                                           depth + 1);
6318
12.1k
        if (cur == NULL)
6319
8.38k
            return(NULL);
6320
24.3k
    } else {
6321
24.3k
  elem = xmlParseName(ctxt);
6322
24.3k
  if (elem == NULL) {
6323
173
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6324
173
      return(NULL);
6325
173
  }
6326
24.1k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6327
24.1k
  if (cur == NULL) {
6328
0
      xmlErrMemory(ctxt);
6329
0
      return(NULL);
6330
0
  }
6331
24.1k
  GROW;
6332
24.1k
  if (RAW == '?') {
6333
511
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6334
511
      NEXT;
6335
23.6k
  } else if (RAW == '*') {
6336
2.56k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6337
2.56k
      NEXT;
6338
21.1k
  } else if (RAW == '+') {
6339
986
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6340
986
      NEXT;
6341
20.1k
  } else {
6342
20.1k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6343
20.1k
  }
6344
24.1k
  GROW;
6345
24.1k
    }
6346
181k
    while (!PARSER_STOPPED(ctxt)) {
6347
181k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6348
181k
        if (RAW == ')')
6349
23.5k
            break;
6350
        /*
6351
   * Each loop we parse one separator and one element.
6352
   */
6353
157k
        if (RAW == ',') {
6354
74.8k
      if (type == 0) type = CUR;
6355
6356
      /*
6357
       * Detect "Name | Name , Name" error
6358
       */
6359
73.1k
      else if (type != CUR) {
6360
5
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6361
5
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6362
5
                      type);
6363
5
    if ((last != NULL) && (last != ret))
6364
5
        xmlFreeDocElementContent(ctxt->myDoc, last);
6365
5
    if (ret != NULL)
6366
5
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6367
5
    return(NULL);
6368
5
      }
6369
74.8k
      NEXT;
6370
6371
74.8k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6372
74.8k
      if (op == NULL) {
6373
0
                xmlErrMemory(ctxt);
6374
0
    if ((last != NULL) && (last != ret))
6375
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6376
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6377
0
    return(NULL);
6378
0
      }
6379
74.8k
      if (last == NULL) {
6380
1.76k
    op->c1 = ret;
6381
1.76k
    if (ret != NULL)
6382
1.76k
        ret->parent = op;
6383
1.76k
    ret = cur = op;
6384
73.1k
      } else {
6385
73.1k
          cur->c2 = op;
6386
73.1k
    if (op != NULL)
6387
73.1k
        op->parent = cur;
6388
73.1k
    op->c1 = last;
6389
73.1k
    if (last != NULL)
6390
73.1k
        last->parent = op;
6391
73.1k
    cur =op;
6392
73.1k
    last = NULL;
6393
73.1k
      }
6394
82.7k
  } else if (RAW == '|') {
6395
81.9k
      if (type == 0) type = CUR;
6396
6397
      /*
6398
       * Detect "Name , Name | Name" error
6399
       */
6400
72.6k
      else if (type != CUR) {
6401
9
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6402
9
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6403
9
          type);
6404
9
    if ((last != NULL) && (last != ret))
6405
9
        xmlFreeDocElementContent(ctxt->myDoc, last);
6406
9
    if (ret != NULL)
6407
9
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6408
9
    return(NULL);
6409
9
      }
6410
81.8k
      NEXT;
6411
6412
81.8k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6413
81.8k
      if (op == NULL) {
6414
0
                xmlErrMemory(ctxt);
6415
0
    if ((last != NULL) && (last != ret))
6416
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6417
0
    if (ret != NULL)
6418
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
0
    return(NULL);
6420
0
      }
6421
81.8k
      if (last == NULL) {
6422
9.25k
    op->c1 = ret;
6423
9.25k
    if (ret != NULL)
6424
9.25k
        ret->parent = op;
6425
9.25k
    ret = cur = op;
6426
72.6k
      } else {
6427
72.6k
          cur->c2 = op;
6428
72.6k
    if (op != NULL)
6429
72.6k
        op->parent = cur;
6430
72.6k
    op->c1 = last;
6431
72.6k
    if (last != NULL)
6432
72.6k
        last->parent = op;
6433
72.6k
    cur =op;
6434
72.6k
    last = NULL;
6435
72.6k
      }
6436
81.8k
  } else {
6437
839
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6438
839
      if ((last != NULL) && (last != ret))
6439
183
          xmlFreeDocElementContent(ctxt->myDoc, last);
6440
839
      if (ret != NULL)
6441
839
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
839
      return(NULL);
6443
839
  }
6444
156k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6445
156k
        if (RAW == '(') {
6446
8.67k
            int newInputNr = ctxt->inputNr;
6447
6448
      /* Recurse on second child */
6449
8.67k
      NEXT;
6450
8.67k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6451
8.67k
                                                          depth + 1);
6452
8.67k
            if (last == NULL) {
6453
3.43k
    if (ret != NULL)
6454
3.43k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6455
3.43k
    return(NULL);
6456
3.43k
            }
6457
148k
  } else {
6458
148k
      elem = xmlParseName(ctxt);
6459
148k
      if (elem == NULL) {
6460
47
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6461
47
    if (ret != NULL)
6462
47
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6463
47
    return(NULL);
6464
47
      }
6465
148k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6466
148k
      if (last == NULL) {
6467
0
                xmlErrMemory(ctxt);
6468
0
    if (ret != NULL)
6469
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6470
0
    return(NULL);
6471
0
      }
6472
148k
      if (RAW == '?') {
6473
10.4k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6474
10.4k
    NEXT;
6475
137k
      } else if (RAW == '*') {
6476
5.02k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6477
5.02k
    NEXT;
6478
132k
      } else if (RAW == '+') {
6479
1.95k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6480
1.95k
    NEXT;
6481
130k
      } else {
6482
130k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6483
130k
      }
6484
148k
  }
6485
156k
    }
6486
23.5k
    if ((cur != NULL) && (last != NULL)) {
6487
7.34k
        cur->c2 = last;
6488
7.34k
  if (last != NULL)
6489
7.34k
      last->parent = cur;
6490
7.34k
    }
6491
23.5k
#ifdef LIBXML_VALID_ENABLED
6492
23.5k
    if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6493
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6494
0
                         "Element content declaration doesn't start "
6495
0
                         "and stop in the same entity\n",
6496
0
                         NULL, NULL);
6497
0
    }
6498
23.5k
#endif
6499
23.5k
    NEXT;
6500
23.5k
    if (RAW == '?') {
6501
1.49k
  if (ret != NULL) {
6502
1.49k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6503
1.49k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6504
503
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6505
990
      else
6506
990
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6507
1.49k
  }
6508
1.49k
  NEXT;
6509
22.0k
    } else if (RAW == '*') {
6510
5.97k
  if (ret != NULL) {
6511
5.97k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6512
5.97k
      cur = ret;
6513
      /*
6514
       * Some normalization:
6515
       * (a | b* | c?)* == (a | b | c)*
6516
       */
6517
62.3k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6518
56.3k
    if ((cur->c1 != NULL) &&
6519
56.3k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6520
56.3k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6521
10.1k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6522
56.3k
    if ((cur->c2 != NULL) &&
6523
56.3k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6524
56.3k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6525
4.73k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6526
56.3k
    cur = cur->c2;
6527
56.3k
      }
6528
5.97k
  }
6529
5.97k
  NEXT;
6530
16.0k
    } else if (RAW == '+') {
6531
1.57k
  if (ret != NULL) {
6532
1.57k
      int found = 0;
6533
6534
1.57k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6535
1.57k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6536
149
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6537
1.42k
      else
6538
1.42k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6539
      /*
6540
       * Some normalization:
6541
       * (a | b*)+ == (a | b)*
6542
       * (a | b?)+ == (a | b)*
6543
       */
6544
2.19k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6545
625
    if ((cur->c1 != NULL) &&
6546
625
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6547
625
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6548
349
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6549
349
        found = 1;
6550
349
    }
6551
625
    if ((cur->c2 != NULL) &&
6552
625
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6553
625
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6554
205
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6555
205
        found = 1;
6556
205
    }
6557
625
    cur = cur->c2;
6558
625
      }
6559
1.57k
      if (found)
6560
327
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
1.57k
  }
6562
1.57k
  NEXT;
6563
1.57k
    }
6564
23.5k
    return(ret);
6565
27.8k
}
6566
6567
/**
6568
 * Parse the declaration for a Mixed Element content
6569
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6570
 *
6571
 * @deprecated Internal function, don't use.
6572
 *
6573
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6574
 *
6575
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6576
 *
6577
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6578
 *
6579
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6580
 *
6581
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6582
 * TODO Parameter-entity replacement text must be properly nested
6583
 *  with parenthesized groups. That is to say, if either of the
6584
 *  opening or closing parentheses in a choice, seq, or Mixed
6585
 *  construct is contained in the replacement text for a parameter
6586
 *  entity, both must be contained in the same replacement text. For
6587
 *  interoperability, if a parameter-entity reference appears in a
6588
 *  choice, seq, or Mixed construct, its replacement text should not
6589
 *  be empty, and neither the first nor last non-blank character of
6590
 *  the replacement text should be a connector (| or ,).
6591
 *
6592
 * @param ctxt  an XML parser context
6593
 * @param inputchk  the input used for the current entity, needed for boundary checks
6594
 * @returns the tree of xmlElementContent describing the element
6595
 *          hierarchy.
6596
 */
6597
xmlElementContent *
6598
0
xmlParseElementChildrenContentDecl(xmlParserCtxt *ctxt, int inputchk) {
6599
    /* stub left for API/ABI compat */
6600
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6601
0
}
6602
6603
/**
6604
 * Parse the declaration for an Element content either Mixed or Children,
6605
 * the cases EMPTY and ANY are handled directly in #xmlParseElementDecl
6606
 *
6607
 * @deprecated Internal function, don't use.
6608
 *
6609
 *     [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6610
 *
6611
 * @param ctxt  an XML parser context
6612
 * @param name  the name of the element being defined.
6613
 * @param result  the Element Content pointer will be stored here if any
6614
 * @returns an xmlElementTypeVal value or -1 on error
6615
 */
6616
6617
int
6618
xmlParseElementContentDecl(xmlParserCtxt *ctxt, const xmlChar *name,
6619
17.8k
                           xmlElementContent **result) {
6620
6621
17.8k
    xmlElementContentPtr tree = NULL;
6622
17.8k
    int openInputNr = ctxt->inputNr;
6623
17.8k
    int res;
6624
6625
17.8k
    *result = NULL;
6626
6627
17.8k
    if (RAW != '(') {
6628
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6629
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6630
0
  return(-1);
6631
0
    }
6632
17.8k
    NEXT;
6633
17.8k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6634
17.8k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6635
2.13k
        tree = xmlParseElementMixedContentDecl(ctxt, openInputNr);
6636
2.13k
  res = XML_ELEMENT_TYPE_MIXED;
6637
15.6k
    } else {
6638
15.6k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, openInputNr, 1);
6639
15.6k
  res = XML_ELEMENT_TYPE_ELEMENT;
6640
15.6k
    }
6641
17.8k
    if (tree == NULL)
6642
1.16k
        return(-1);
6643
16.6k
    SKIP_BLANKS_PE;
6644
16.6k
    *result = tree;
6645
16.6k
    return(res);
6646
17.8k
}
6647
6648
/**
6649
 * Parse an element declaration. Always consumes '<!'.
6650
 *
6651
 * @deprecated Internal function, don't use.
6652
 *
6653
 *     [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6654
 *
6655
 * [ VC: Unique Element Type Declaration ]
6656
 * No element type may be declared more than once
6657
 *
6658
 * @param ctxt  an XML parser context
6659
 * @returns the type of the element, or -1 in case of error
6660
 */
6661
int
6662
20.8k
xmlParseElementDecl(xmlParserCtxt *ctxt) {
6663
20.8k
    const xmlChar *name;
6664
20.8k
    int ret = -1;
6665
20.8k
    xmlElementContentPtr content  = NULL;
6666
6667
20.8k
    if ((CUR != '<') || (NXT(1) != '!'))
6668
0
        return(ret);
6669
20.8k
    SKIP(2);
6670
6671
    /* GROW; done in the caller */
6672
20.8k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6673
20.8k
#ifdef LIBXML_VALID_ENABLED
6674
20.8k
  int oldInputNr = ctxt->inputNr;
6675
20.8k
#endif
6676
6677
20.8k
  SKIP(7);
6678
20.8k
  if (SKIP_BLANKS_PE == 0) {
6679
41
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6680
41
               "Space required after 'ELEMENT'\n");
6681
41
      return(-1);
6682
41
  }
6683
20.7k
        name = xmlParseName(ctxt);
6684
20.7k
  if (name == NULL) {
6685
33
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6686
33
         "xmlParseElementDecl: no name for Element\n");
6687
33
      return(-1);
6688
33
  }
6689
20.7k
  if (SKIP_BLANKS_PE == 0) {
6690
966
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6691
966
         "Space required after the element name\n");
6692
966
  }
6693
20.7k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6694
639
      SKIP(5);
6695
      /*
6696
       * Element must always be empty.
6697
       */
6698
639
      ret = XML_ELEMENT_TYPE_EMPTY;
6699
20.0k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6700
20.0k
             (NXT(2) == 'Y')) {
6701
559
      SKIP(3);
6702
      /*
6703
       * Element is a generic container.
6704
       */
6705
559
      ret = XML_ELEMENT_TYPE_ANY;
6706
19.5k
  } else if (RAW == '(') {
6707
17.8k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6708
17.8k
            if (ret <= 0)
6709
1.16k
                return(-1);
6710
17.8k
  } else {
6711
      /*
6712
       * [ WFC: PEs in Internal Subset ] error handling.
6713
       */
6714
1.73k
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6715
1.73k
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6716
1.73k
      return(-1);
6717
1.73k
  }
6718
6719
17.8k
  SKIP_BLANKS_PE;
6720
6721
17.8k
  if (RAW != '>') {
6722
147
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6723
147
      if (content != NULL) {
6724
115
    xmlFreeDocElementContent(ctxt->myDoc, content);
6725
115
      }
6726
17.6k
  } else {
6727
17.6k
#ifdef LIBXML_VALID_ENABLED
6728
17.6k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6729
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6730
0
                                 "Element declaration doesn't start and stop in"
6731
0
                                 " the same entity\n",
6732
0
                                 NULL, NULL);
6733
0
      }
6734
17.6k
#endif
6735
6736
17.6k
      NEXT;
6737
17.6k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6738
17.6k
    (ctxt->sax->elementDecl != NULL)) {
6739
10.1k
    if (content != NULL)
6740
9.40k
        content->parent = NULL;
6741
10.1k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6742
10.1k
                           content);
6743
10.1k
    if ((content != NULL) && (content->parent == NULL)) {
6744
        /*
6745
         * this is a trick: if xmlAddElementDecl is called,
6746
         * instead of copying the full tree it is plugged directly
6747
         * if called from the parser. Avoid duplicating the
6748
         * interfaces or change the API/ABI
6749
         */
6750
8.08k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6751
8.08k
    }
6752
10.1k
      } else if (content != NULL) {
6753
7.11k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6754
7.11k
      }
6755
17.6k
  }
6756
17.8k
    }
6757
17.8k
    return(ret);
6758
20.8k
}
6759
6760
/**
6761
 * Parse a conditional section. Always consumes '<!['.
6762
 *
6763
 *     [61] conditionalSect ::= includeSect | ignoreSect
6764
 *     [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6765
 *     [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6766
 *     [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>'
6767
 *                                 Ignore)*
6768
 *     [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6769
 * @param ctxt  an XML parser context
6770
 */
6771
6772
static void
6773
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6774
0
    size_t depth = 0;
6775
0
    int isFreshPE = 0;
6776
0
    int oldInputNr = ctxt->inputNr;
6777
0
    int declInputNr = ctxt->inputNr;
6778
6779
0
    while (!PARSER_STOPPED(ctxt)) {
6780
0
        if (ctxt->input->cur >= ctxt->input->end) {
6781
0
            if (ctxt->inputNr <= oldInputNr) {
6782
0
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6783
0
                return;
6784
0
            }
6785
6786
0
            xmlPopPE(ctxt);
6787
0
            declInputNr = ctxt->inputNr;
6788
0
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6789
0
            SKIP(3);
6790
0
            SKIP_BLANKS_PE;
6791
6792
0
            isFreshPE = 0;
6793
6794
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6795
0
                SKIP(7);
6796
0
                SKIP_BLANKS_PE;
6797
0
                if (RAW != '[') {
6798
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6799
0
                    return;
6800
0
                }
6801
0
#ifdef LIBXML_VALID_ENABLED
6802
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6803
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6804
0
                                     "All markup of the conditional section is"
6805
0
                                     " not in the same entity\n",
6806
0
                                     NULL, NULL);
6807
0
                }
6808
0
#endif
6809
0
                NEXT;
6810
6811
0
                depth++;
6812
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6813
0
                size_t ignoreDepth = 0;
6814
6815
0
                SKIP(6);
6816
0
                SKIP_BLANKS_PE;
6817
0
                if (RAW != '[') {
6818
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6819
0
                    return;
6820
0
                }
6821
0
#ifdef LIBXML_VALID_ENABLED
6822
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6823
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6824
0
                                     "All markup of the conditional section is"
6825
0
                                     " not in the same entity\n",
6826
0
                                     NULL, NULL);
6827
0
                }
6828
0
#endif
6829
0
                NEXT;
6830
6831
0
                while (PARSER_STOPPED(ctxt) == 0) {
6832
0
                    if (RAW == 0) {
6833
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6834
0
                        return;
6835
0
                    }
6836
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6837
0
                        SKIP(3);
6838
0
                        ignoreDepth++;
6839
                        /* Check for integer overflow */
6840
0
                        if (ignoreDepth == 0) {
6841
0
                            xmlErrMemory(ctxt);
6842
0
                            return;
6843
0
                        }
6844
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6845
0
                               (NXT(2) == '>')) {
6846
0
                        SKIP(3);
6847
0
                        if (ignoreDepth == 0)
6848
0
                            break;
6849
0
                        ignoreDepth--;
6850
0
                    } else {
6851
0
                        NEXT;
6852
0
                    }
6853
0
                }
6854
6855
0
#ifdef LIBXML_VALID_ENABLED
6856
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6857
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6858
0
                                     "All markup of the conditional section is"
6859
0
                                     " not in the same entity\n",
6860
0
                                     NULL, NULL);
6861
0
                }
6862
0
#endif
6863
0
            } else {
6864
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6865
0
                return;
6866
0
            }
6867
0
        } else if ((depth > 0) &&
6868
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6869
0
            if (isFreshPE) {
6870
0
                xmlFatalErrMsg(ctxt, XML_ERR_CONDSEC_INVALID,
6871
0
                               "Parameter entity must match "
6872
0
                               "extSubsetDecl\n");
6873
0
                return;
6874
0
            }
6875
6876
0
            depth--;
6877
0
#ifdef LIBXML_VALID_ENABLED
6878
0
            if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6879
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6880
0
                                 "All markup of the conditional section is not"
6881
0
                                 " in the same entity\n",
6882
0
                                 NULL, NULL);
6883
0
            }
6884
0
#endif
6885
0
            SKIP(3);
6886
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6887
0
            isFreshPE = 0;
6888
0
            xmlParseMarkupDecl(ctxt);
6889
0
        } else if (RAW == '%') {
6890
0
            xmlParsePERefInternal(ctxt, 1);
6891
0
            if (ctxt->inputNr > declInputNr) {
6892
0
                isFreshPE = 1;
6893
0
                declInputNr = ctxt->inputNr;
6894
0
            }
6895
0
        } else {
6896
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6897
0
            return;
6898
0
        }
6899
6900
0
        if (depth == 0)
6901
0
            break;
6902
6903
0
        SKIP_BLANKS;
6904
0
        SHRINK;
6905
0
        GROW;
6906
0
    }
6907
0
}
6908
6909
/**
6910
 * Parse markup declarations. Always consumes '<!' or '<?'.
6911
 *
6912
 * @deprecated Internal function, don't use.
6913
 *
6914
 *     [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6915
 *                         NotationDecl | PI | Comment
6916
 *
6917
 * [ VC: Proper Declaration/PE Nesting ]
6918
 * Parameter-entity replacement text must be properly nested with
6919
 * markup declarations. That is to say, if either the first character
6920
 * or the last character of a markup declaration (markupdecl above) is
6921
 * contained in the replacement text for a parameter-entity reference,
6922
 * both must be contained in the same replacement text.
6923
 *
6924
 * [ WFC: PEs in Internal Subset ]
6925
 * In the internal DTD subset, parameter-entity references can occur
6926
 * only where markup declarations can occur, not within markup declarations.
6927
 * (This does not apply to references that occur in external parameter
6928
 * entities or to the external subset.)
6929
 *
6930
 * @param ctxt  an XML parser context
6931
 */
6932
void
6933
149k
xmlParseMarkupDecl(xmlParserCtxt *ctxt) {
6934
149k
    GROW;
6935
149k
    if (CUR == '<') {
6936
149k
        if (NXT(1) == '!') {
6937
126k
      switch (NXT(2)) {
6938
93.7k
          case 'E':
6939
93.7k
        if (NXT(3) == 'L')
6940
20.8k
      xmlParseElementDecl(ctxt);
6941
72.8k
        else if (NXT(3) == 'N')
6942
72.8k
      xmlParseEntityDecl(ctxt);
6943
72
                    else
6944
72
                        SKIP(2);
6945
93.7k
        break;
6946
16.4k
          case 'A':
6947
16.4k
        xmlParseAttributeListDecl(ctxt);
6948
16.4k
        break;
6949
9.20k
          case 'N':
6950
9.20k
        xmlParseNotationDecl(ctxt);
6951
9.20k
        break;
6952
6.52k
          case '-':
6953
6.52k
        xmlParseComment(ctxt);
6954
6.52k
        break;
6955
456
    default:
6956
456
                    xmlFatalErr(ctxt,
6957
456
                                ctxt->inSubset == 2 ?
6958
0
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
6959
456
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
6960
456
                                NULL);
6961
456
                    SKIP(2);
6962
456
        break;
6963
126k
      }
6964
126k
  } else if (NXT(1) == '?') {
6965
22.7k
      xmlParsePI(ctxt);
6966
22.7k
  }
6967
149k
    }
6968
149k
}
6969
6970
/**
6971
 * Parse an XML declaration header for external entities
6972
 *
6973
 * @deprecated Internal function, don't use.
6974
 *
6975
 *     [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6976
 * @param ctxt  an XML parser context
6977
 */
6978
6979
void
6980
0
xmlParseTextDecl(xmlParserCtxt *ctxt) {
6981
0
    xmlChar *version;
6982
6983
    /*
6984
     * We know that '<?xml' is here.
6985
     */
6986
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6987
0
  SKIP(5);
6988
0
    } else {
6989
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6990
0
  return;
6991
0
    }
6992
6993
0
    if (SKIP_BLANKS == 0) {
6994
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6995
0
           "Space needed after '<?xml'\n");
6996
0
    }
6997
6998
    /*
6999
     * We may have the VersionInfo here.
7000
     */
7001
0
    version = xmlParseVersionInfo(ctxt);
7002
0
    if (version == NULL) {
7003
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7004
0
        if (version == NULL) {
7005
0
            xmlErrMemory(ctxt);
7006
0
            return;
7007
0
        }
7008
0
    } else {
7009
0
  if (SKIP_BLANKS == 0) {
7010
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7011
0
               "Space needed here\n");
7012
0
  }
7013
0
    }
7014
0
    ctxt->input->version = version;
7015
7016
    /*
7017
     * We must have the encoding declaration
7018
     */
7019
0
    xmlParseEncodingDecl(ctxt);
7020
7021
0
    SKIP_BLANKS;
7022
0
    if ((RAW == '?') && (NXT(1) == '>')) {
7023
0
        SKIP(2);
7024
0
    } else if (RAW == '>') {
7025
        /* Deprecated old WD ... */
7026
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7027
0
  NEXT;
7028
0
    } else {
7029
0
        int c;
7030
7031
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7032
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7033
0
            NEXT;
7034
0
            if (c == '>')
7035
0
                break;
7036
0
        }
7037
0
    }
7038
0
}
7039
7040
/**
7041
 * Parse Markup declarations from an external subset
7042
 *
7043
 * @deprecated Internal function, don't use.
7044
 *
7045
 *     [30] extSubset ::= textDecl? extSubsetDecl
7046
 *
7047
 *     [31] extSubsetDecl ::= (markupdecl | conditionalSect |
7048
 *                             PEReference | S) *
7049
 * @param ctxt  an XML parser context
7050
 * @param publicId  the public identifier
7051
 * @param systemId  the system identifier (URL)
7052
 */
7053
void
7054
xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
7055
0
                       const xmlChar *systemId) {
7056
0
    int oldInputNr;
7057
7058
0
    xmlCtxtInitializeLate(ctxt);
7059
7060
0
    xmlDetectEncoding(ctxt);
7061
7062
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7063
0
  xmlParseTextDecl(ctxt);
7064
0
    }
7065
0
    if (ctxt->myDoc == NULL) {
7066
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7067
0
  if (ctxt->myDoc == NULL) {
7068
0
      xmlErrMemory(ctxt);
7069
0
      return;
7070
0
  }
7071
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7072
0
    }
7073
0
    if ((ctxt->myDoc->intSubset == NULL) &&
7074
0
        (xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) {
7075
0
        xmlErrMemory(ctxt);
7076
0
    }
7077
7078
0
    ctxt->inSubset = 2;
7079
0
    oldInputNr = ctxt->inputNr;
7080
7081
0
    SKIP_BLANKS;
7082
0
    while (!PARSER_STOPPED(ctxt)) {
7083
0
        if (ctxt->input->cur >= ctxt->input->end) {
7084
0
            if (ctxt->inputNr <= oldInputNr) {
7085
0
                xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7086
0
                break;
7087
0
            }
7088
7089
0
            xmlPopPE(ctxt);
7090
0
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7091
0
            xmlParseConditionalSections(ctxt);
7092
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7093
0
            xmlParseMarkupDecl(ctxt);
7094
0
        } else if (RAW == '%') {
7095
0
            xmlParsePERefInternal(ctxt, 1);
7096
0
        } else {
7097
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7098
7099
0
            while (ctxt->inputNr > oldInputNr)
7100
0
                xmlPopPE(ctxt);
7101
0
            break;
7102
0
        }
7103
0
        SKIP_BLANKS;
7104
0
        SHRINK;
7105
0
        GROW;
7106
0
    }
7107
0
}
7108
7109
/**
7110
 * Parse and handle entity references in content, depending on the SAX
7111
 * interface, this may end-up in a call to character() if this is a
7112
 * CharRef, a predefined entity, if there is no reference() callback.
7113
 * or if the parser was asked to switch to that mode.
7114
 *
7115
 * @deprecated Internal function, don't use.
7116
 *
7117
 * Always consumes '&'.
7118
 *
7119
 *     [67] Reference ::= EntityRef | CharRef
7120
 * @param ctxt  an XML parser context
7121
 */
7122
void
7123
86.5k
xmlParseReference(xmlParserCtxt *ctxt) {
7124
86.5k
    xmlEntityPtr ent = NULL;
7125
86.5k
    const xmlChar *name;
7126
86.5k
    xmlChar *val;
7127
7128
86.5k
    if (RAW != '&')
7129
0
        return;
7130
7131
    /*
7132
     * Simple case of a CharRef
7133
     */
7134
86.5k
    if (NXT(1) == '#') {
7135
44.6k
  int i = 0;
7136
44.6k
  xmlChar out[16];
7137
44.6k
  int value = xmlParseCharRef(ctxt);
7138
7139
44.6k
  if (value == 0)
7140
31.8k
      return;
7141
7142
        /*
7143
         * Just encode the value in UTF-8
7144
         */
7145
12.8k
        COPY_BUF(out, i, value);
7146
12.8k
        out[i] = 0;
7147
12.8k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7148
12.8k
            (!ctxt->disableSAX))
7149
5.06k
            ctxt->sax->characters(ctxt->userData, out, i);
7150
12.8k
  return;
7151
44.6k
    }
7152
7153
    /*
7154
     * We are seeing an entity reference
7155
     */
7156
41.9k
    name = xmlParseEntityRefInternal(ctxt);
7157
41.9k
    if (name == NULL)
7158
1.79k
        return;
7159
40.1k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7160
40.1k
    if (ent == NULL) {
7161
        /*
7162
         * Create a reference for undeclared entities.
7163
         */
7164
12.8k
        if ((ctxt->replaceEntities == 0) &&
7165
12.8k
            (ctxt->sax != NULL) &&
7166
12.8k
            (ctxt->disableSAX == 0) &&
7167
12.8k
            (ctxt->sax->reference != NULL)) {
7168
0
            ctxt->sax->reference(ctxt->userData, name);
7169
0
        }
7170
12.8k
        return;
7171
12.8k
    }
7172
27.3k
    if (!ctxt->wellFormed)
7173
2.44k
  return;
7174
7175
    /* special case of predefined entities */
7176
24.8k
    if ((ent->name == NULL) ||
7177
24.8k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7178
6.90k
  val = ent->content;
7179
6.90k
  if (val == NULL) return;
7180
  /*
7181
   * inline the entity.
7182
   */
7183
6.90k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7184
6.90k
      (!ctxt->disableSAX))
7185
6.90k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7186
6.90k
  return;
7187
6.90k
    }
7188
7189
    /*
7190
     * Some users try to parse entities on their own and used to set
7191
     * the renamed "checked" member. Fix the flags to cover this
7192
     * case.
7193
     */
7194
17.9k
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7195
0
        ent->flags |= XML_ENT_PARSED;
7196
7197
    /*
7198
     * The first reference to the entity trigger a parsing phase
7199
     * where the ent->children is filled with the result from
7200
     * the parsing.
7201
     * Note: external parsed entities will not be loaded, it is not
7202
     * required for a non-validating parser, unless the parsing option
7203
     * of validating, or substituting entities were given. Doing so is
7204
     * far more secure as the parser will only process data coming from
7205
     * the document entity by default.
7206
     *
7207
     * FIXME: This doesn't work correctly since entities can be
7208
     * expanded with different namespace declarations in scope.
7209
     * For example:
7210
     *
7211
     * <!DOCTYPE doc [
7212
     *   <!ENTITY ent "<ns:elem/>">
7213
     * ]>
7214
     * <doc>
7215
     *   <decl1 xmlns:ns="urn:ns1">
7216
     *     &ent;
7217
     *   </decl1>
7218
     *   <decl2 xmlns:ns="urn:ns2">
7219
     *     &ent;
7220
     *   </decl2>
7221
     * </doc>
7222
     *
7223
     * Proposed fix:
7224
     *
7225
     * - Ignore current namespace declarations when parsing the
7226
     *   entity. If a prefix can't be resolved, don't report an error
7227
     *   but mark it as unresolved.
7228
     * - Try to resolve these prefixes when expanding the entity.
7229
     *   This will require a specialized version of xmlStaticCopyNode
7230
     *   which can also make use of the namespace hash table to avoid
7231
     *   quadratic behavior.
7232
     *
7233
     * Alternatively, we could simply reparse the entity on each
7234
     * expansion like we already do with custom SAX callbacks.
7235
     * External entity content should be cached in this case.
7236
     */
7237
17.9k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7238
17.9k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7239
238
         ((ctxt->replaceEntities) ||
7240
17.7k
          (ctxt->validate)))) {
7241
17.7k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7242
1.57k
            xmlCtxtParseEntity(ctxt, ent);
7243
16.1k
        } else if (ent->children == NULL) {
7244
            /*
7245
             * Probably running in SAX mode and the callbacks don't
7246
             * build the entity content. Parse the entity again.
7247
             *
7248
             * This will also be triggered in normal tree builder mode
7249
             * if an entity happens to be empty, causing unnecessary
7250
             * reloads. It's hard to come up with a reliable check in
7251
             * which mode we're running.
7252
             */
7253
16.1k
            xmlCtxtParseEntity(ctxt, ent);
7254
16.1k
        }
7255
17.7k
    }
7256
7257
    /*
7258
     * We also check for amplification if entities aren't substituted.
7259
     * They might be expanded later.
7260
     */
7261
17.9k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7262
3
        return;
7263
7264
17.9k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7265
1.11k
        return;
7266
7267
16.8k
    if (ctxt->replaceEntities == 0) {
7268
  /*
7269
   * Create a reference
7270
   */
7271
16.8k
        if (ctxt->sax->reference != NULL)
7272
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7273
16.8k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7274
0
        xmlNodePtr copy, cur;
7275
7276
        /*
7277
         * Seems we are generating the DOM content, copy the tree
7278
   */
7279
0
        cur = ent->children;
7280
7281
        /*
7282
         * Handle first text node with SAX to coalesce text efficiently
7283
         */
7284
0
        if ((cur->type == XML_TEXT_NODE) ||
7285
0
            (cur->type == XML_CDATA_SECTION_NODE)) {
7286
0
            int len = xmlStrlen(cur->content);
7287
7288
0
            if ((cur->type == XML_TEXT_NODE) ||
7289
0
                (ctxt->options & XML_PARSE_NOCDATA)) {
7290
0
                if (ctxt->sax->characters != NULL)
7291
0
                    ctxt->sax->characters(ctxt, cur->content, len);
7292
0
            } else {
7293
0
                if (ctxt->sax->cdataBlock != NULL)
7294
0
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7295
0
            }
7296
7297
0
            cur = cur->next;
7298
0
        }
7299
7300
0
        while (cur != NULL) {
7301
0
            xmlNodePtr last;
7302
7303
            /*
7304
             * Handle last text node with SAX to coalesce text efficiently
7305
             */
7306
0
            if ((cur->next == NULL) &&
7307
0
                ((cur->type == XML_TEXT_NODE) ||
7308
0
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7309
0
                int len = xmlStrlen(cur->content);
7310
7311
0
                if ((cur->type == XML_TEXT_NODE) ||
7312
0
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7313
0
                    if (ctxt->sax->characters != NULL)
7314
0
                        ctxt->sax->characters(ctxt, cur->content, len);
7315
0
                } else {
7316
0
                    if (ctxt->sax->cdataBlock != NULL)
7317
0
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7318
0
                }
7319
7320
0
                break;
7321
0
            }
7322
7323
            /*
7324
             * Reset coalesce buffer stats only for non-text nodes.
7325
             */
7326
0
            ctxt->nodemem = 0;
7327
0
            ctxt->nodelen = 0;
7328
7329
0
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7330
7331
0
            if (copy == NULL) {
7332
0
                xmlErrMemory(ctxt);
7333
0
                break;
7334
0
            }
7335
7336
0
            if (ctxt->parseMode == XML_PARSE_READER) {
7337
                /* Needed for reader */
7338
0
                copy->extra = cur->extra;
7339
                /* Maybe needed for reader */
7340
0
                copy->_private = cur->_private;
7341
0
            }
7342
7343
0
            copy->parent = ctxt->node;
7344
0
            last = ctxt->node->last;
7345
0
            if (last == NULL) {
7346
0
                ctxt->node->children = copy;
7347
0
            } else {
7348
0
                last->next = copy;
7349
0
                copy->prev = last;
7350
0
            }
7351
0
            ctxt->node->last = copy;
7352
7353
0
            cur = cur->next;
7354
0
        }
7355
0
    }
7356
16.8k
}
7357
7358
static void
7359
145k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7360
    /*
7361
     * [ WFC: Entity Declared ]
7362
     * In a document without any DTD, a document with only an
7363
     * internal DTD subset which contains no parameter entity
7364
     * references, or a document with "standalone='yes'", the
7365
     * Name given in the entity reference must match that in an
7366
     * entity declaration, except that well-formed documents
7367
     * need not declare any of the following entities: amp, lt,
7368
     * gt, apos, quot.
7369
     * The declaration of a parameter entity must precede any
7370
     * reference to it.
7371
     * Similarly, the declaration of a general entity must
7372
     * precede any reference to it which appears in a default
7373
     * value in an attribute-list declaration. Note that if
7374
     * entities are declared in the external subset or in
7375
     * external parameter entities, a non-validating processor
7376
     * is not obligated to read and process their declarations;
7377
     * for such documents, the rule that an entity must be
7378
     * declared is a well-formedness constraint only if
7379
     * standalone='yes'.
7380
     */
7381
145k
    if ((ctxt->standalone == 1) ||
7382
145k
        ((ctxt->hasExternalSubset == 0) &&
7383
145k
         (ctxt->hasPErefs == 0))) {
7384
129k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7385
129k
                          "Entity '%s' not defined\n", name);
7386
129k
#ifdef LIBXML_VALID_ENABLED
7387
129k
    } else if (ctxt->validate) {
7388
        /*
7389
         * [ VC: Entity Declared ]
7390
         * In a document with an external subset or external
7391
         * parameter entities with "standalone='no'", ...
7392
         * ... The declaration of a parameter entity must
7393
         * precede any reference to it...
7394
         */
7395
0
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7396
0
                         "Entity '%s' not defined\n", name, NULL);
7397
0
#endif
7398
16.0k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7399
16.0k
               ((ctxt->replaceEntities) &&
7400
16.0k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7401
        /*
7402
         * Also raise a non-fatal error
7403
         *
7404
         * - if the external subset is loaded and all entity declarations
7405
         *   should be available, or
7406
         * - entity substition was requested without restricting
7407
         *   external entity access.
7408
         */
7409
0
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7410
0
                     "Entity '%s' not defined\n", name);
7411
16.0k
    } else {
7412
16.0k
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7413
16.0k
                      "Entity '%s' not defined\n", name, NULL);
7414
16.0k
    }
7415
7416
145k
    ctxt->valid = 0;
7417
145k
}
7418
7419
static xmlEntityPtr
7420
318k
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7421
318k
    xmlEntityPtr ent = NULL;
7422
7423
    /*
7424
     * Predefined entities override any extra definition
7425
     */
7426
318k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7427
318k
        ent = xmlGetPredefinedEntity(name);
7428
318k
        if (ent != NULL)
7429
130k
            return(ent);
7430
318k
    }
7431
7432
    /*
7433
     * Ask first SAX for entity resolution, otherwise try the
7434
     * entities which may have stored in the parser context.
7435
     */
7436
188k
    if (ctxt->sax != NULL) {
7437
188k
  if (ctxt->sax->getEntity != NULL)
7438
188k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7439
188k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7440
188k
      (ctxt->options & XML_PARSE_OLDSAX))
7441
0
      ent = xmlGetPredefinedEntity(name);
7442
188k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7443
188k
      (ctxt->userData==ctxt)) {
7444
9.89k
      ent = xmlSAX2GetEntity(ctxt, name);
7445
9.89k
  }
7446
188k
    }
7447
7448
188k
    if (ent == NULL) {
7449
144k
        xmlHandleUndeclaredEntity(ctxt, name);
7450
144k
    }
7451
7452
    /*
7453
     * [ WFC: Parsed Entity ]
7454
     * An entity reference must not contain the name of an
7455
     * unparsed entity
7456
     */
7457
43.1k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7458
381
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7459
381
     "Entity reference to unparsed entity %s\n", name);
7460
381
        ent = NULL;
7461
381
    }
7462
7463
    /*
7464
     * [ WFC: No External Entity References ]
7465
     * Attribute values cannot contain direct or indirect
7466
     * entity references to external entities.
7467
     */
7468
42.7k
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7469
527
        if (inAttr) {
7470
289
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7471
289
                 "Attribute references external entity '%s'\n", name);
7472
289
            ent = NULL;
7473
289
        }
7474
527
    }
7475
7476
188k
    return(ent);
7477
318k
}
7478
7479
/**
7480
 * Parse an entity reference. Always consumes '&'.
7481
 *
7482
 *     [68] EntityRef ::= '&' Name ';'
7483
 *
7484
 * @param ctxt  an XML parser context
7485
 * @returns the name, or NULL in case of error.
7486
 */
7487
static const xmlChar *
7488
501k
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7489
501k
    const xmlChar *name;
7490
7491
501k
    GROW;
7492
7493
501k
    if (RAW != '&')
7494
0
        return(NULL);
7495
501k
    NEXT;
7496
501k
    name = xmlParseName(ctxt);
7497
501k
    if (name == NULL) {
7498
144k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7499
144k
           "xmlParseEntityRef: no name\n");
7500
144k
        return(NULL);
7501
144k
    }
7502
356k
    if (RAW != ';') {
7503
39.8k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7504
39.8k
  return(NULL);
7505
39.8k
    }
7506
316k
    NEXT;
7507
7508
316k
    return(name);
7509
356k
}
7510
7511
/**
7512
 * @deprecated Internal function, don't use.
7513
 *
7514
 * @param ctxt  an XML parser context
7515
 * @returns the xmlEntity if found, or NULL otherwise.
7516
 */
7517
xmlEntity *
7518
0
xmlParseEntityRef(xmlParserCtxt *ctxt) {
7519
0
    const xmlChar *name;
7520
7521
0
    if (ctxt == NULL)
7522
0
        return(NULL);
7523
7524
0
    name = xmlParseEntityRefInternal(ctxt);
7525
0
    if (name == NULL)
7526
0
        return(NULL);
7527
7528
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7529
0
}
7530
7531
/**
7532
 * Parse ENTITY references declarations, but this version parses it from
7533
 * a string value.
7534
 *
7535
 *     [68] EntityRef ::= '&' Name ';'
7536
 *
7537
 * [ WFC: Entity Declared ]
7538
 * In a document without any DTD, a document with only an internal DTD
7539
 * subset which contains no parameter entity references, or a document
7540
 * with "standalone='yes'", the Name given in the entity reference
7541
 * must match that in an entity declaration, except that well-formed
7542
 * documents need not declare any of the following entities: amp, lt,
7543
 * gt, apos, quot.  The declaration of a parameter entity must precede
7544
 * any reference to it.  Similarly, the declaration of a general entity
7545
 * must precede any reference to it which appears in a default value in an
7546
 * attribute-list declaration. Note that if entities are declared in the
7547
 * external subset or in external parameter entities, a non-validating
7548
 * processor is not obligated to read and process their declarations;
7549
 * for such documents, the rule that an entity must be declared is a
7550
 * well-formedness constraint only if standalone='yes'.
7551
 *
7552
 * [ WFC: Parsed Entity ]
7553
 * An entity reference must not contain the name of an unparsed entity
7554
 *
7555
 * @param ctxt  an XML parser context
7556
 * @param str  a pointer to an index in the string
7557
 * @returns the xmlEntity if found, or NULL otherwise. The str pointer
7558
 * is updated to the current location in the string.
7559
 */
7560
static xmlChar *
7561
1.36k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7562
1.36k
    xmlChar *name;
7563
1.36k
    const xmlChar *ptr;
7564
1.36k
    xmlChar cur;
7565
7566
1.36k
    if ((str == NULL) || (*str == NULL))
7567
0
        return(NULL);
7568
1.36k
    ptr = *str;
7569
1.36k
    cur = *ptr;
7570
1.36k
    if (cur != '&')
7571
0
  return(NULL);
7572
7573
1.36k
    ptr++;
7574
1.36k
    name = xmlParseStringName(ctxt, &ptr);
7575
1.36k
    if (name == NULL) {
7576
25
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7577
25
           "xmlParseStringEntityRef: no name\n");
7578
25
  *str = ptr;
7579
25
  return(NULL);
7580
25
    }
7581
1.34k
    if (*ptr != ';') {
7582
4
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7583
4
        xmlFree(name);
7584
4
  *str = ptr;
7585
4
  return(NULL);
7586
4
    }
7587
1.33k
    ptr++;
7588
7589
1.33k
    *str = ptr;
7590
1.33k
    return(name);
7591
1.34k
}
7592
7593
/**
7594
 * Parse a parameter entity reference. Always consumes '%'.
7595
 *
7596
 * The entity content is handled directly by pushing it's content as
7597
 * a new input stream.
7598
 *
7599
 *     [69] PEReference ::= '%' Name ';'
7600
 *
7601
 * [ WFC: No Recursion ]
7602
 * A parsed entity must not contain a recursive
7603
 * reference to itself, either directly or indirectly.
7604
 *
7605
 * [ WFC: Entity Declared ]
7606
 * In a document without any DTD, a document with only an internal DTD
7607
 * subset which contains no parameter entity references, or a document
7608
 * with "standalone='yes'", ...  ... The declaration of a parameter
7609
 * entity must precede any reference to it...
7610
 *
7611
 * [ VC: Entity Declared ]
7612
 * In a document with an external subset or external parameter entities
7613
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7614
 * must precede any reference to it...
7615
 *
7616
 * [ WFC: In DTD ]
7617
 * Parameter-entity references may only appear in the DTD.
7618
 * NOTE: misleading but this is handled.
7619
 *
7620
 * @param ctxt  an XML parser context
7621
 * @param markupDecl  whether the PERef starts a markup declaration
7622
 */
7623
static void
7624
1.52k
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl) {
7625
1.52k
    const xmlChar *name;
7626
1.52k
    xmlEntityPtr entity = NULL;
7627
1.52k
    xmlParserInputPtr input;
7628
7629
1.52k
    if (RAW != '%')
7630
0
        return;
7631
1.52k
    NEXT;
7632
1.52k
    name = xmlParseName(ctxt);
7633
1.52k
    if (name == NULL) {
7634
662
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7635
662
  return;
7636
662
    }
7637
865
    if (RAW != ';') {
7638
620
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7639
620
        return;
7640
620
    }
7641
7642
245
    NEXT;
7643
7644
    /* Must be set before xmlHandleUndeclaredEntity */
7645
245
    ctxt->hasPErefs = 1;
7646
7647
    /*
7648
     * Request the entity from SAX
7649
     */
7650
245
    if ((ctxt->sax != NULL) &&
7651
245
  (ctxt->sax->getParameterEntity != NULL))
7652
245
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7653
7654
245
    if (entity == NULL) {
7655
245
        xmlHandleUndeclaredEntity(ctxt, name);
7656
245
    } else {
7657
  /*
7658
   * Internal checking in case the entity quest barfed
7659
   */
7660
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7661
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7662
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7663
0
      "Internal: %%%s; is not a parameter entity\n",
7664
0
        name, NULL);
7665
0
  } else {
7666
0
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7667
0
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7668
0
     (((ctxt->loadsubset & ~XML_SKIP_IDS) == 0) &&
7669
0
      (ctxt->replaceEntities == 0) &&
7670
0
      (ctxt->validate == 0))))
7671
0
    return;
7672
7673
0
            if (entity->flags & XML_ENT_EXPANDING) {
7674
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7675
0
                xmlHaltParser(ctxt);
7676
0
                return;
7677
0
            }
7678
7679
0
      input = xmlNewEntityInputStream(ctxt, entity);
7680
0
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7681
0
                xmlFreeInputStream(input);
7682
0
    return;
7683
0
            }
7684
7685
0
            entity->flags |= XML_ENT_EXPANDING;
7686
7687
0
            if (markupDecl)
7688
0
                input->flags |= XML_INPUT_MARKUP_DECL;
7689
7690
0
            GROW;
7691
7692
0
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7693
0
                xmlDetectEncoding(ctxt);
7694
7695
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7696
0
                    (IS_BLANK_CH(NXT(5)))) {
7697
0
                    xmlParseTextDecl(ctxt);
7698
0
                }
7699
0
            }
7700
0
  }
7701
0
    }
7702
245
}
7703
7704
/**
7705
 * Parse a parameter entity reference.
7706
 *
7707
 * @deprecated Internal function, don't use.
7708
 *
7709
 * @param ctxt  an XML parser context
7710
 */
7711
void
7712
0
xmlParsePEReference(xmlParserCtxt *ctxt) {
7713
0
    xmlParsePERefInternal(ctxt, 0);
7714
0
}
7715
7716
/**
7717
 * Load the content of an entity.
7718
 *
7719
 * @param ctxt  an XML parser context
7720
 * @param entity  an unloaded system entity
7721
 * @returns 0 in case of success and -1 in case of failure
7722
 */
7723
static int
7724
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7725
0
    xmlParserInputPtr oldinput, input = NULL;
7726
0
    xmlParserInputPtr *oldinputTab;
7727
0
    xmlChar *oldencoding;
7728
0
    xmlChar *content = NULL;
7729
0
    xmlResourceType rtype;
7730
0
    size_t length, i;
7731
0
    int oldinputNr, oldinputMax;
7732
0
    int ret = -1;
7733
0
    int res;
7734
7735
0
    if ((ctxt == NULL) || (entity == NULL) ||
7736
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7737
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7738
0
  (entity->content != NULL)) {
7739
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7740
0
              "xmlLoadEntityContent parameter error");
7741
0
        return(-1);
7742
0
    }
7743
7744
0
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7745
0
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7746
0
    else
7747
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7748
7749
0
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7750
0
                            (char *) entity->ExternalID, rtype);
7751
0
    if (input == NULL)
7752
0
        return(-1);
7753
7754
0
    oldinput = ctxt->input;
7755
0
    oldinputNr = ctxt->inputNr;
7756
0
    oldinputMax = ctxt->inputMax;
7757
0
    oldinputTab = ctxt->inputTab;
7758
0
    oldencoding = ctxt->encoding;
7759
7760
0
    ctxt->input = NULL;
7761
0
    ctxt->inputNr = 0;
7762
0
    ctxt->inputMax = 1;
7763
0
    ctxt->encoding = NULL;
7764
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7765
0
    if (ctxt->inputTab == NULL) {
7766
0
        xmlErrMemory(ctxt);
7767
0
        xmlFreeInputStream(input);
7768
0
        goto error;
7769
0
    }
7770
7771
0
    xmlBufResetInput(input->buf->buffer, input);
7772
7773
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7774
0
        xmlFreeInputStream(input);
7775
0
        goto error;
7776
0
    }
7777
7778
0
    xmlDetectEncoding(ctxt);
7779
7780
    /*
7781
     * Parse a possible text declaration first
7782
     */
7783
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7784
0
  xmlParseTextDecl(ctxt);
7785
        /*
7786
         * An XML-1.0 document can't reference an entity not XML-1.0
7787
         */
7788
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7789
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7790
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7791
0
                           "Version mismatch between document and entity\n");
7792
0
        }
7793
0
    }
7794
7795
0
    length = input->cur - input->base;
7796
0
    xmlBufShrink(input->buf->buffer, length);
7797
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7798
7799
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7800
0
        ;
7801
7802
0
    xmlBufResetInput(input->buf->buffer, input);
7803
7804
0
    if (res < 0) {
7805
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7806
0
        goto error;
7807
0
    }
7808
7809
0
    length = xmlBufUse(input->buf->buffer);
7810
0
    if (length > INT_MAX) {
7811
0
        xmlErrMemory(ctxt);
7812
0
        goto error;
7813
0
    }
7814
7815
0
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
7816
0
    if (content == NULL) {
7817
0
        xmlErrMemory(ctxt);
7818
0
        goto error;
7819
0
    }
7820
7821
0
    for (i = 0; i < length; ) {
7822
0
        int clen = length - i;
7823
0
        int c = xmlGetUTF8Char(content + i, &clen);
7824
7825
0
        if ((c < 0) || (!IS_CHAR(c))) {
7826
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7827
0
                              "xmlLoadEntityContent: invalid char value %d\n",
7828
0
                              content[i]);
7829
0
            goto error;
7830
0
        }
7831
0
        i += clen;
7832
0
    }
7833
7834
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7835
0
    entity->content = content;
7836
0
    entity->length = length;
7837
0
    content = NULL;
7838
0
    ret = 0;
7839
7840
0
error:
7841
0
    while (ctxt->inputNr > 0)
7842
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
7843
0
    xmlFree(ctxt->inputTab);
7844
0
    xmlFree(ctxt->encoding);
7845
7846
0
    ctxt->input = oldinput;
7847
0
    ctxt->inputNr = oldinputNr;
7848
0
    ctxt->inputMax = oldinputMax;
7849
0
    ctxt->inputTab = oldinputTab;
7850
0
    ctxt->encoding = oldencoding;
7851
7852
0
    xmlFree(content);
7853
7854
0
    return(ret);
7855
0
}
7856
7857
/**
7858
 * Parse PEReference declarations
7859
 *
7860
 *     [69] PEReference ::= '%' Name ';'
7861
 *
7862
 * [ WFC: No Recursion ]
7863
 * A parsed entity must not contain a recursive
7864
 * reference to itself, either directly or indirectly.
7865
 *
7866
 * [ WFC: Entity Declared ]
7867
 * In a document without any DTD, a document with only an internal DTD
7868
 * subset which contains no parameter entity references, or a document
7869
 * with "standalone='yes'", ...  ... The declaration of a parameter
7870
 * entity must precede any reference to it...
7871
 *
7872
 * [ VC: Entity Declared ]
7873
 * In a document with an external subset or external parameter entities
7874
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7875
 * must precede any reference to it...
7876
 *
7877
 * [ WFC: In DTD ]
7878
 * Parameter-entity references may only appear in the DTD.
7879
 * NOTE: misleading but this is handled.
7880
 *
7881
 * @param ctxt  an XML parser context
7882
 * @param str  a pointer to an index in the string
7883
 * @returns the string of the entity content.
7884
 *         str is updated to the current value of the index
7885
 */
7886
static xmlEntityPtr
7887
4.27k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7888
4.27k
    const xmlChar *ptr;
7889
4.27k
    xmlChar cur;
7890
4.27k
    xmlChar *name;
7891
4.27k
    xmlEntityPtr entity = NULL;
7892
7893
4.27k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7894
4.27k
    ptr = *str;
7895
4.27k
    cur = *ptr;
7896
4.27k
    if (cur != '%')
7897
0
        return(NULL);
7898
4.27k
    ptr++;
7899
4.27k
    name = xmlParseStringName(ctxt, &ptr);
7900
4.27k
    if (name == NULL) {
7901
1.93k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7902
1.93k
           "xmlParseStringPEReference: no name\n");
7903
1.93k
  *str = ptr;
7904
1.93k
  return(NULL);
7905
1.93k
    }
7906
2.33k
    cur = *ptr;
7907
2.33k
    if (cur != ';') {
7908
1.83k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7909
1.83k
  xmlFree(name);
7910
1.83k
  *str = ptr;
7911
1.83k
  return(NULL);
7912
1.83k
    }
7913
508
    ptr++;
7914
7915
    /* Must be set before xmlHandleUndeclaredEntity */
7916
508
    ctxt->hasPErefs = 1;
7917
7918
    /*
7919
     * Request the entity from SAX
7920
     */
7921
508
    if ((ctxt->sax != NULL) &&
7922
508
  (ctxt->sax->getParameterEntity != NULL))
7923
508
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7924
7925
508
    if (entity == NULL) {
7926
508
        xmlHandleUndeclaredEntity(ctxt, name);
7927
508
    } else {
7928
  /*
7929
   * Internal checking in case the entity quest barfed
7930
   */
7931
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7932
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7933
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7934
0
        "%%%s; is not a parameter entity\n",
7935
0
        name, NULL);
7936
0
  }
7937
0
    }
7938
7939
508
    xmlFree(name);
7940
508
    *str = ptr;
7941
508
    return(entity);
7942
2.33k
}
7943
7944
/**
7945
 * Parse a DOCTYPE declaration
7946
 *
7947
 * @deprecated Internal function, don't use.
7948
 *
7949
 *     [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7950
 *                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7951
 *
7952
 * [ VC: Root Element Type ]
7953
 * The Name in the document type declaration must match the element
7954
 * type of the root element.
7955
 *
7956
 * @param ctxt  an XML parser context
7957
 */
7958
7959
void
7960
9.29k
xmlParseDocTypeDecl(xmlParserCtxt *ctxt) {
7961
9.29k
    const xmlChar *name = NULL;
7962
9.29k
    xmlChar *publicId = NULL;
7963
9.29k
    xmlChar *URI = NULL;
7964
7965
    /*
7966
     * We know that '<!DOCTYPE' has been detected.
7967
     */
7968
9.29k
    SKIP(9);
7969
7970
9.29k
    if (SKIP_BLANKS == 0) {
7971
118
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7972
118
                       "Space required after 'DOCTYPE'\n");
7973
118
    }
7974
7975
    /*
7976
     * Parse the DOCTYPE name.
7977
     */
7978
9.29k
    name = xmlParseName(ctxt);
7979
9.29k
    if (name == NULL) {
7980
35
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7981
35
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7982
35
    }
7983
9.29k
    ctxt->intSubName = name;
7984
7985
9.29k
    SKIP_BLANKS;
7986
7987
    /*
7988
     * Check for public and system identifier (URI)
7989
     */
7990
9.29k
    URI = xmlParseExternalID(ctxt, &publicId, 1);
7991
7992
9.29k
    if ((URI != NULL) || (publicId != NULL)) {
7993
210
        ctxt->hasExternalSubset = 1;
7994
210
    }
7995
9.29k
    ctxt->extSubURI = URI;
7996
9.29k
    ctxt->extSubSystem = publicId;
7997
7998
9.29k
    SKIP_BLANKS;
7999
8000
    /*
8001
     * Create and update the internal subset.
8002
     */
8003
9.29k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8004
9.29k
  (!ctxt->disableSAX))
8005
9.11k
  ctxt->sax->internalSubset(ctxt->userData, name, publicId, URI);
8006
8007
9.29k
    if ((RAW != '[') && (RAW != '>')) {
8008
180
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8009
180
    }
8010
9.29k
}
8011
8012
/**
8013
 * Parse the internal subset declaration
8014
 *
8015
 *     [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8016
 * @param ctxt  an XML parser context
8017
 */
8018
8019
static void
8020
8.58k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8021
    /*
8022
     * Is there any DTD definition ?
8023
     */
8024
8.58k
    if (RAW == '[') {
8025
8.58k
        int oldInputNr = ctxt->inputNr;
8026
8027
8.58k
        NEXT;
8028
  /*
8029
   * Parse the succession of Markup declarations and
8030
   * PEReferences.
8031
   * Subsequence (markupdecl | PEReference | S)*
8032
   */
8033
8.58k
  SKIP_BLANKS;
8034
159k
        while (1) {
8035
159k
            if (PARSER_STOPPED(ctxt)) {
8036
1.58k
                return;
8037
157k
            } else if (ctxt->input->cur >= ctxt->input->end) {
8038
606
                if (ctxt->inputNr <= oldInputNr) {
8039
606
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8040
606
                    return;
8041
606
                }
8042
0
                xmlPopPE(ctxt);
8043
157k
            } else if ((RAW == ']') && (ctxt->inputNr <= oldInputNr)) {
8044
2.85k
                NEXT;
8045
2.85k
                SKIP_BLANKS;
8046
2.85k
                break;
8047
154k
            } else if ((PARSER_EXTERNAL(ctxt)) &&
8048
154k
                       (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8049
                /*
8050
                 * Conditional sections are allowed in external entities
8051
                 * included by PE References in the internal subset.
8052
                 */
8053
0
                xmlParseConditionalSections(ctxt);
8054
154k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8055
149k
                xmlParseMarkupDecl(ctxt);
8056
149k
            } else if (RAW == '%') {
8057
1.52k
                xmlParsePERefInternal(ctxt, 1);
8058
3.54k
            } else {
8059
3.54k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8060
8061
3.54k
                while (ctxt->inputNr > oldInputNr)
8062
0
                    xmlPopPE(ctxt);
8063
3.54k
                return;
8064
3.54k
            }
8065
150k
            SKIP_BLANKS;
8066
150k
            SHRINK;
8067
150k
            GROW;
8068
150k
        }
8069
8.58k
    }
8070
8071
    /*
8072
     * We should be at the end of the DOCTYPE declaration.
8073
     */
8074
2.85k
    if (RAW != '>') {
8075
176
        xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8076
176
        return;
8077
176
    }
8078
2.67k
    NEXT;
8079
2.67k
}
8080
8081
#ifdef LIBXML_SAX1_ENABLED
8082
/**
8083
 * Parse an attribute
8084
 *
8085
 * @deprecated Internal function, don't use.
8086
 *
8087
 *     [41] Attribute ::= Name Eq AttValue
8088
 *
8089
 * [ WFC: No External Entity References ]
8090
 * Attribute values cannot contain direct or indirect entity references
8091
 * to external entities.
8092
 *
8093
 * [ WFC: No < in Attribute Values ]
8094
 * The replacement text of any entity referred to directly or indirectly in
8095
 * an attribute value (other than "&lt;") must not contain a <.
8096
 *
8097
 * [ VC: Attribute Value Type ]
8098
 * The attribute must have been declared; the value must be of the type
8099
 * declared for it.
8100
 *
8101
 *     [25] Eq ::= S? '=' S?
8102
 *
8103
 * With namespace:
8104
 *
8105
 *     [NS 11] Attribute ::= QName Eq AttValue
8106
 *
8107
 * Also the case QName == xmlns:??? is handled independently as a namespace
8108
 * definition.
8109
 *
8110
 * @param ctxt  an XML parser context
8111
 * @param value  a xmlChar ** used to store the value of the attribute
8112
 * @returns the attribute name, and the value in *value.
8113
 */
8114
8115
const xmlChar *
8116
722k
xmlParseAttribute(xmlParserCtxt *ctxt, xmlChar **value) {
8117
722k
    const xmlChar *name;
8118
722k
    xmlChar *val;
8119
8120
722k
    *value = NULL;
8121
722k
    GROW;
8122
722k
    name = xmlParseName(ctxt);
8123
722k
    if (name == NULL) {
8124
41.4k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8125
41.4k
                 "error parsing attribute name\n");
8126
41.4k
        return(NULL);
8127
41.4k
    }
8128
8129
    /*
8130
     * read the value
8131
     */
8132
681k
    SKIP_BLANKS;
8133
681k
    if (RAW == '=') {
8134
647k
        NEXT;
8135
647k
  SKIP_BLANKS;
8136
647k
  val = xmlParseAttValue(ctxt);
8137
647k
    } else {
8138
34.5k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8139
34.5k
         "Specification mandates value for attribute %s\n", name);
8140
34.5k
  return(name);
8141
34.5k
    }
8142
8143
    /*
8144
     * Check that xml:lang conforms to the specification
8145
     * No more registered as an error, just generate a warning now
8146
     * since this was deprecated in XML second edition
8147
     */
8148
647k
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8149
0
  if (!xmlCheckLanguageID(val)) {
8150
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8151
0
              "Malformed value for xml:lang : %s\n",
8152
0
        val, NULL);
8153
0
  }
8154
0
    }
8155
8156
    /*
8157
     * Check that xml:space conforms to the specification
8158
     */
8159
647k
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8160
1.88k
  if (xmlStrEqual(val, BAD_CAST "default"))
8161
20
      *(ctxt->space) = 0;
8162
1.86k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8163
189
      *(ctxt->space) = 1;
8164
1.67k
  else {
8165
1.67k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8166
1.67k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8167
1.67k
                                 val, NULL);
8168
1.67k
  }
8169
1.88k
    }
8170
8171
647k
    *value = val;
8172
647k
    return(name);
8173
681k
}
8174
8175
/**
8176
 * Parse a start tag. Always consumes '<'.
8177
 *
8178
 * @deprecated Internal function, don't use.
8179
 *
8180
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8181
 *
8182
 * [ WFC: Unique Att Spec ]
8183
 * No attribute name may appear more than once in the same start-tag or
8184
 * empty-element tag.
8185
 *
8186
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8187
 *
8188
 * [ WFC: Unique Att Spec ]
8189
 * No attribute name may appear more than once in the same start-tag or
8190
 * empty-element tag.
8191
 *
8192
 * With namespace:
8193
 *
8194
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8195
 *
8196
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8197
 *
8198
 * @param ctxt  an XML parser context
8199
 * @returns the element name parsed
8200
 */
8201
8202
const xmlChar *
8203
1.55M
xmlParseStartTag(xmlParserCtxt *ctxt) {
8204
1.55M
    const xmlChar *name;
8205
1.55M
    const xmlChar *attname;
8206
1.55M
    xmlChar *attvalue;
8207
1.55M
    const xmlChar **atts = ctxt->atts;
8208
1.55M
    int nbatts = 0;
8209
1.55M
    int maxatts = ctxt->maxatts;
8210
1.55M
    int i;
8211
8212
1.55M
    if (RAW != '<') return(NULL);
8213
1.55M
    NEXT1;
8214
8215
1.55M
    name = xmlParseName(ctxt);
8216
1.55M
    if (name == NULL) {
8217
66.4k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8218
66.4k
       "xmlParseStartTag: invalid element name\n");
8219
66.4k
        return(NULL);
8220
66.4k
    }
8221
8222
    /*
8223
     * Now parse the attributes, it ends up with the ending
8224
     *
8225
     * (S Attribute)* S?
8226
     */
8227
1.49M
    SKIP_BLANKS;
8228
1.49M
    GROW;
8229
8230
2.00M
    while (((RAW != '>') &&
8231
2.00M
     ((RAW != '/') || (NXT(1) != '>')) &&
8232
2.00M
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8233
722k
  attname = xmlParseAttribute(ctxt, &attvalue);
8234
722k
        if (attname == NULL)
8235
41.4k
      break;
8236
681k
        if (attvalue != NULL) {
8237
      /*
8238
       * [ WFC: Unique Att Spec ]
8239
       * No attribute name may appear more than once in the same
8240
       * start-tag or empty-element tag.
8241
       */
8242
1.22M
      for (i = 0; i < nbatts;i += 2) {
8243
601k
          if (xmlStrEqual(atts[i], attname)) {
8244
9.19k
        xmlErrAttributeDup(ctxt, NULL, attname);
8245
9.19k
        goto failed;
8246
9.19k
    }
8247
601k
      }
8248
      /*
8249
       * Add the pair to atts
8250
       */
8251
625k
      if (nbatts + 4 > maxatts) {
8252
11.7k
          const xmlChar **n;
8253
11.7k
                int newSize;
8254
8255
11.7k
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8256
11.7k
                                          11, XML_MAX_ATTRS);
8257
11.7k
                if (newSize < 0) {
8258
0
        xmlErrMemory(ctxt);
8259
0
        goto failed;
8260
0
    }
8261
11.7k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8262
11.7k
                if (newSize < 2)
8263
7.26k
                    newSize = 2;
8264
11.7k
#endif
8265
11.7k
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8266
11.7k
    if (n == NULL) {
8267
0
        xmlErrMemory(ctxt);
8268
0
        goto failed;
8269
0
    }
8270
11.7k
    atts = n;
8271
11.7k
                maxatts = newSize * 2;
8272
11.7k
    ctxt->atts = atts;
8273
11.7k
    ctxt->maxatts = maxatts;
8274
11.7k
      }
8275
8276
625k
      atts[nbatts++] = attname;
8277
625k
      atts[nbatts++] = attvalue;
8278
625k
      atts[nbatts] = NULL;
8279
625k
      atts[nbatts + 1] = NULL;
8280
8281
625k
            attvalue = NULL;
8282
625k
  }
8283
8284
681k
failed:
8285
8286
681k
        if (attvalue != NULL)
8287
9.19k
            xmlFree(attvalue);
8288
8289
681k
  GROW
8290
681k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8291
168k
      break;
8292
513k
  if (SKIP_BLANKS == 0) {
8293
52.1k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8294
52.1k
         "attributes construct error\n");
8295
52.1k
  }
8296
513k
  SHRINK;
8297
513k
        GROW;
8298
513k
    }
8299
8300
    /*
8301
     * SAX: Start of Element !
8302
     */
8303
1.49M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8304
1.49M
  (!ctxt->disableSAX)) {
8305
1.36M
  if (nbatts > 0)
8306
289k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8307
1.07M
  else
8308
1.07M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8309
1.36M
    }
8310
8311
1.49M
    if (atts != NULL) {
8312
        /* Free only the content strings */
8313
1.66M
        for (i = 1;i < nbatts;i+=2)
8314
625k
      if (atts[i] != NULL)
8315
625k
         xmlFree((xmlChar *) atts[i]);
8316
1.04M
    }
8317
1.49M
    return(name);
8318
1.49M
}
8319
8320
/**
8321
 * Parse an end tag. Always consumes '</'.
8322
 *
8323
 *     [42] ETag ::= '</' Name S? '>'
8324
 *
8325
 * With namespace
8326
 *
8327
 *     [NS 9] ETag ::= '</' QName S? '>'
8328
 * @param ctxt  an XML parser context
8329
 * @param line  line of the start tag
8330
 */
8331
8332
static void
8333
185k
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8334
185k
    const xmlChar *name;
8335
8336
185k
    GROW;
8337
185k
    if ((RAW != '<') || (NXT(1) != '/')) {
8338
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8339
0
           "xmlParseEndTag: '</' not found\n");
8340
0
  return;
8341
0
    }
8342
185k
    SKIP(2);
8343
8344
185k
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8345
8346
    /*
8347
     * We should definitely be at the ending "S? '>'" part
8348
     */
8349
185k
    GROW;
8350
185k
    SKIP_BLANKS;
8351
185k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8352
8.65k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8353
8.65k
    } else
8354
176k
  NEXT1;
8355
8356
    /*
8357
     * [ WFC: Element Type Match ]
8358
     * The Name in an element's end-tag must match the element type in the
8359
     * start-tag.
8360
     *
8361
     */
8362
185k
    if (name != (xmlChar*)1) {
8363
13.8k
        if (name == NULL) name = BAD_CAST "unparsable";
8364
13.8k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8365
13.8k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8366
13.8k
                    ctxt->name, line, name);
8367
13.8k
    }
8368
8369
    /*
8370
     * SAX: End of Tag
8371
     */
8372
185k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8373
185k
  (!ctxt->disableSAX))
8374
150k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8375
8376
185k
    namePop(ctxt);
8377
185k
    spacePop(ctxt);
8378
185k
}
8379
8380
/**
8381
 * Parse an end of tag
8382
 *
8383
 * @deprecated Internal function, don't use.
8384
 *
8385
 *     [42] ETag ::= '</' Name S? '>'
8386
 *
8387
 * With namespace
8388
 *
8389
 *     [NS 9] ETag ::= '</' QName S? '>'
8390
 * @param ctxt  an XML parser context
8391
 */
8392
8393
void
8394
0
xmlParseEndTag(xmlParserCtxt *ctxt) {
8395
0
    xmlParseEndTag1(ctxt, 0);
8396
0
}
8397
#endif /* LIBXML_SAX1_ENABLED */
8398
8399
/************************************************************************
8400
 *                  *
8401
 *          SAX 2 specific operations       *
8402
 *                  *
8403
 ************************************************************************/
8404
8405
/**
8406
 * Parse an XML Namespace QName
8407
 *
8408
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8409
 *     [7]  Prefix  ::= NCName
8410
 *     [8]  LocalPart  ::= NCName
8411
 *
8412
 * @param ctxt  an XML parser context
8413
 * @param prefix  pointer to store the prefix part
8414
 * @returns the Name parsed or NULL
8415
 */
8416
8417
static xmlHashedString
8418
0
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8419
0
    xmlHashedString l, p;
8420
0
    int start, isNCName = 0;
8421
8422
0
    l.name = NULL;
8423
0
    p.name = NULL;
8424
8425
0
    GROW;
8426
0
    start = CUR_PTR - BASE_PTR;
8427
8428
0
    l = xmlParseNCName(ctxt);
8429
0
    if (l.name != NULL) {
8430
0
        isNCName = 1;
8431
0
        if (CUR == ':') {
8432
0
            NEXT;
8433
0
            p = l;
8434
0
            l = xmlParseNCName(ctxt);
8435
0
        }
8436
0
    }
8437
0
    if ((l.name == NULL) || (CUR == ':')) {
8438
0
        xmlChar *tmp;
8439
8440
0
        l.name = NULL;
8441
0
        p.name = NULL;
8442
0
        if ((isNCName == 0) && (CUR != ':'))
8443
0
            return(l);
8444
0
        tmp = xmlParseNmtoken(ctxt);
8445
0
        if (tmp != NULL)
8446
0
            xmlFree(tmp);
8447
0
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8448
0
                                CUR_PTR - (BASE_PTR + start));
8449
0
        if (l.name == NULL) {
8450
0
            xmlErrMemory(ctxt);
8451
0
            return(l);
8452
0
        }
8453
0
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8454
0
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8455
0
    }
8456
8457
0
    *prefix = p;
8458
0
    return(l);
8459
0
}
8460
8461
/**
8462
 * Parse an XML Namespace QName
8463
 *
8464
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8465
 *     [7]  Prefix  ::= NCName
8466
 *     [8]  LocalPart  ::= NCName
8467
 *
8468
 * @param ctxt  an XML parser context
8469
 * @param prefix  pointer to store the prefix part
8470
 * @returns the Name parsed or NULL
8471
 */
8472
8473
static const xmlChar *
8474
0
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8475
0
    xmlHashedString n, p;
8476
8477
0
    n = xmlParseQNameHashed(ctxt, &p);
8478
0
    if (n.name == NULL)
8479
0
        return(NULL);
8480
0
    *prefix = p.name;
8481
0
    return(n.name);
8482
0
}
8483
8484
/**
8485
 * Parse an XML name and compares for match
8486
 * (specialized for endtag parsing)
8487
 *
8488
 * @param ctxt  an XML parser context
8489
 * @param name  the localname
8490
 * @param prefix  the prefix, if any.
8491
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
8492
 * and the name for mismatch
8493
 */
8494
8495
static const xmlChar *
8496
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8497
0
                        xmlChar const *prefix) {
8498
0
    const xmlChar *cmp;
8499
0
    const xmlChar *in;
8500
0
    const xmlChar *ret;
8501
0
    const xmlChar *prefix2;
8502
8503
0
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8504
8505
0
    GROW;
8506
0
    in = ctxt->input->cur;
8507
8508
0
    cmp = prefix;
8509
0
    while (*in != 0 && *in == *cmp) {
8510
0
  ++in;
8511
0
  ++cmp;
8512
0
    }
8513
0
    if ((*cmp == 0) && (*in == ':')) {
8514
0
        in++;
8515
0
  cmp = name;
8516
0
  while (*in != 0 && *in == *cmp) {
8517
0
      ++in;
8518
0
      ++cmp;
8519
0
  }
8520
0
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8521
      /* success */
8522
0
            ctxt->input->col += in - ctxt->input->cur;
8523
0
      ctxt->input->cur = in;
8524
0
      return((const xmlChar*) 1);
8525
0
  }
8526
0
    }
8527
    /*
8528
     * all strings coms from the dictionary, equality can be done directly
8529
     */
8530
0
    ret = xmlParseQName (ctxt, &prefix2);
8531
0
    if (ret == NULL)
8532
0
        return(NULL);
8533
0
    if ((ret == name) && (prefix == prefix2))
8534
0
  return((const xmlChar*) 1);
8535
0
    return ret;
8536
0
}
8537
8538
/**
8539
 * Parse an attribute in the new SAX2 framework.
8540
 *
8541
 * @param ctxt  an XML parser context
8542
 * @param pref  the element prefix
8543
 * @param elem  the element name
8544
 * @param hprefix  resulting attribute prefix
8545
 * @param value  resulting value of the attribute
8546
 * @param len  resulting length of the attribute
8547
 * @param alloc  resulting indicator if the attribute was allocated
8548
 * @returns the attribute name, and the value in *value, .
8549
 */
8550
8551
static xmlHashedString
8552
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8553
                   const xmlChar * pref, const xmlChar * elem,
8554
                   xmlHashedString * hprefix, xmlChar ** value,
8555
                   int *len, int *alloc)
8556
0
{
8557
0
    xmlHashedString hname;
8558
0
    const xmlChar *prefix, *name;
8559
0
    xmlChar *val = NULL, *internal_val = NULL;
8560
0
    int special = 0;
8561
0
    int isNamespace;
8562
0
    int flags;
8563
8564
0
    *value = NULL;
8565
0
    GROW;
8566
0
    hname = xmlParseQNameHashed(ctxt, hprefix);
8567
0
    if (hname.name == NULL) {
8568
0
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8569
0
                       "error parsing attribute name\n");
8570
0
        return(hname);
8571
0
    }
8572
0
    name = hname.name;
8573
0
    prefix = hprefix->name;
8574
8575
    /*
8576
     * get the type if needed
8577
     */
8578
0
    if (ctxt->attsSpecial != NULL) {
8579
0
        special = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8580
0
                                              prefix, name));
8581
0
    }
8582
8583
    /*
8584
     * read the value
8585
     */
8586
0
    SKIP_BLANKS;
8587
0
    if (RAW != '=') {
8588
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8589
0
                          "Specification mandates value for attribute %s\n",
8590
0
                          name);
8591
0
        goto error;
8592
0
    }
8593
8594
8595
0
    NEXT;
8596
0
    SKIP_BLANKS;
8597
0
    flags = 0;
8598
0
    isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8599
0
                   (prefix == ctxt->str_xmlns));
8600
0
    val = xmlParseAttValueInternal(ctxt, len, &flags, special,
8601
0
                                   isNamespace);
8602
0
    if (val == NULL)
8603
0
        goto error;
8604
8605
0
    *alloc = (flags & XML_ATTVAL_ALLOC) != 0;
8606
8607
0
#ifdef LIBXML_VALID_ENABLED
8608
0
    if ((ctxt->validate) &&
8609
0
        (ctxt->standalone) &&
8610
0
        (special & XML_SPECIAL_EXTERNAL) &&
8611
0
        (flags & XML_ATTVAL_NORM_CHANGE)) {
8612
0
        xmlValidityError(ctxt, XML_DTD_NOT_STANDALONE,
8613
0
                         "standalone: normalization of attribute %s on %s "
8614
0
                         "by external subset declaration\n",
8615
0
                         name, elem);
8616
0
    }
8617
0
#endif
8618
8619
0
    if (prefix == ctxt->str_xml) {
8620
        /*
8621
         * Check that xml:lang conforms to the specification
8622
         * No more registered as an error, just generate a warning now
8623
         * since this was deprecated in XML second edition
8624
         */
8625
0
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8626
0
            internal_val = xmlStrndup(val, *len);
8627
0
            if (internal_val == NULL)
8628
0
                goto mem_error;
8629
0
            if (!xmlCheckLanguageID(internal_val)) {
8630
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8631
0
                              "Malformed value for xml:lang : %s\n",
8632
0
                              internal_val, NULL);
8633
0
            }
8634
0
        }
8635
8636
        /*
8637
         * Check that xml:space conforms to the specification
8638
         */
8639
0
        if (xmlStrEqual(name, BAD_CAST "space")) {
8640
0
            internal_val = xmlStrndup(val, *len);
8641
0
            if (internal_val == NULL)
8642
0
                goto mem_error;
8643
0
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8644
0
                *(ctxt->space) = 0;
8645
0
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8646
0
                *(ctxt->space) = 1;
8647
0
            else {
8648
0
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8649
0
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8650
0
                              internal_val, NULL);
8651
0
            }
8652
0
        }
8653
0
        if (internal_val) {
8654
0
            xmlFree(internal_val);
8655
0
        }
8656
0
    }
8657
8658
0
    *value = val;
8659
0
    return (hname);
8660
8661
0
mem_error:
8662
0
    xmlErrMemory(ctxt);
8663
0
error:
8664
0
    if ((val != NULL) && (*alloc != 0))
8665
0
        xmlFree(val);
8666
0
    return(hname);
8667
0
}
8668
8669
/**
8670
 * Inserts a new attribute into the hash table.
8671
 *
8672
 * @param ctxt  parser context
8673
 * @param size  size of the hash table
8674
 * @param name  attribute name
8675
 * @param uri  namespace uri
8676
 * @param hashValue  combined hash value of name and uri
8677
 * @param aindex  attribute index (this is a multiple of 5)
8678
 * @returns INT_MAX if no existing attribute was found, the attribute
8679
 * index if an attribute was found, -1 if a memory allocation failed.
8680
 */
8681
static int
8682
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8683
0
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8684
0
    xmlAttrHashBucket *table = ctxt->attrHash;
8685
0
    xmlAttrHashBucket *bucket;
8686
0
    unsigned hindex;
8687
8688
0
    hindex = hashValue & (size - 1);
8689
0
    bucket = &table[hindex];
8690
8691
0
    while (bucket->index >= 0) {
8692
0
        const xmlChar **atts = &ctxt->atts[bucket->index];
8693
8694
0
        if (name == atts[0]) {
8695
0
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8696
8697
0
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8698
0
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8699
0
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8700
0
                return(bucket->index);
8701
0
        }
8702
8703
0
        hindex++;
8704
0
        bucket++;
8705
0
        if (hindex >= size) {
8706
0
            hindex = 0;
8707
0
            bucket = table;
8708
0
        }
8709
0
    }
8710
8711
0
    bucket->index = aindex;
8712
8713
0
    return(INT_MAX);
8714
0
}
8715
8716
static int
8717
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8718
                       const xmlChar *name, const xmlChar *prefix,
8719
0
                       unsigned hashValue, int aindex) {
8720
0
    xmlAttrHashBucket *table = ctxt->attrHash;
8721
0
    xmlAttrHashBucket *bucket;
8722
0
    unsigned hindex;
8723
8724
0
    hindex = hashValue & (size - 1);
8725
0
    bucket = &table[hindex];
8726
8727
0
    while (bucket->index >= 0) {
8728
0
        const xmlChar **atts = &ctxt->atts[bucket->index];
8729
8730
0
        if ((name == atts[0]) && (prefix == atts[1]))
8731
0
            return(bucket->index);
8732
8733
0
        hindex++;
8734
0
        bucket++;
8735
0
        if (hindex >= size) {
8736
0
            hindex = 0;
8737
0
            bucket = table;
8738
0
        }
8739
0
    }
8740
8741
0
    bucket->index = aindex;
8742
8743
0
    return(INT_MAX);
8744
0
}
8745
/**
8746
 * Parse a start tag. Always consumes '<'.
8747
 *
8748
 * This routine is called when running SAX2 parsing
8749
 *
8750
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8751
 *
8752
 * [ WFC: Unique Att Spec ]
8753
 * No attribute name may appear more than once in the same start-tag or
8754
 * empty-element tag.
8755
 *
8756
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8757
 *
8758
 * [ WFC: Unique Att Spec ]
8759
 * No attribute name may appear more than once in the same start-tag or
8760
 * empty-element tag.
8761
 *
8762
 * With namespace:
8763
 *
8764
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8765
 *
8766
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8767
 *
8768
 * @param ctxt  an XML parser context
8769
 * @param pref  resulting namespace prefix
8770
 * @param URI  resulting namespace URI
8771
 * @param nbNsPtr  resulting number of namespace declarations
8772
 * @returns the element name parsed
8773
 */
8774
8775
static const xmlChar *
8776
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8777
0
                  const xmlChar **URI, int *nbNsPtr) {
8778
0
    xmlHashedString hlocalname;
8779
0
    xmlHashedString hprefix;
8780
0
    xmlHashedString hattname;
8781
0
    xmlHashedString haprefix;
8782
0
    const xmlChar *localname;
8783
0
    const xmlChar *prefix;
8784
0
    const xmlChar *attname;
8785
0
    const xmlChar *aprefix;
8786
0
    const xmlChar *uri;
8787
0
    xmlChar *attvalue = NULL;
8788
0
    const xmlChar **atts = ctxt->atts;
8789
0
    unsigned attrHashSize = 0;
8790
0
    int maxatts = ctxt->maxatts;
8791
0
    int nratts, nbatts, nbdef;
8792
0
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8793
0
    int alloc = 0;
8794
0
    int numNsErr = 0;
8795
0
    int numDupErr = 0;
8796
8797
0
    if (RAW != '<') return(NULL);
8798
0
    NEXT1;
8799
8800
0
    nbatts = 0;
8801
0
    nratts = 0;
8802
0
    nbdef = 0;
8803
0
    nbNs = 0;
8804
0
    nbTotalDef = 0;
8805
0
    attval = 0;
8806
8807
0
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8808
0
        xmlErrMemory(ctxt);
8809
0
        return(NULL);
8810
0
    }
8811
8812
0
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8813
0
    if (hlocalname.name == NULL) {
8814
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8815
0
           "StartTag: invalid element name\n");
8816
0
        return(NULL);
8817
0
    }
8818
0
    localname = hlocalname.name;
8819
0
    prefix = hprefix.name;
8820
8821
    /*
8822
     * Now parse the attributes, it ends up with the ending
8823
     *
8824
     * (S Attribute)* S?
8825
     */
8826
0
    SKIP_BLANKS;
8827
0
    GROW;
8828
8829
    /*
8830
     * The ctxt->atts array will be ultimately passed to the SAX callback
8831
     * containing five xmlChar pointers for each attribute:
8832
     *
8833
     * [0] attribute name
8834
     * [1] attribute prefix
8835
     * [2] namespace URI
8836
     * [3] attribute value
8837
     * [4] end of attribute value
8838
     *
8839
     * To save memory, we reuse this array temporarily and store integers
8840
     * in these pointer variables.
8841
     *
8842
     * [0] attribute name
8843
     * [1] attribute prefix
8844
     * [2] hash value of attribute prefix, and later namespace index
8845
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
8846
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
8847
     *
8848
     * The ctxt->attallocs array contains an additional unsigned int for
8849
     * each attribute, containing the hash value of the attribute name
8850
     * and the alloc flag in bit 31.
8851
     */
8852
8853
0
    while (((RAW != '>') &&
8854
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8855
0
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8856
0
  int len = -1;
8857
8858
0
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
8859
0
                                          &haprefix, &attvalue, &len,
8860
0
                                          &alloc);
8861
0
        if (hattname.name == NULL)
8862
0
      break;
8863
0
        if (attvalue == NULL)
8864
0
            goto next_attr;
8865
0
        attname = hattname.name;
8866
0
        aprefix = haprefix.name;
8867
0
  if (len < 0) len = xmlStrlen(attvalue);
8868
8869
0
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8870
0
            xmlHashedString huri;
8871
0
            xmlURIPtr parsedUri;
8872
8873
0
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8874
0
            uri = huri.name;
8875
0
            if (uri == NULL) {
8876
0
                xmlErrMemory(ctxt);
8877
0
                goto next_attr;
8878
0
            }
8879
0
            if (*uri != 0) {
8880
0
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8881
0
                    xmlErrMemory(ctxt);
8882
0
                    goto next_attr;
8883
0
                }
8884
0
                if (parsedUri == NULL) {
8885
0
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8886
0
                             "xmlns: '%s' is not a valid URI\n",
8887
0
                                       uri, NULL, NULL);
8888
0
                } else {
8889
0
                    if (parsedUri->scheme == NULL) {
8890
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8891
0
                                  "xmlns: URI %s is not absolute\n",
8892
0
                                  uri, NULL, NULL);
8893
0
                    }
8894
0
                    xmlFreeURI(parsedUri);
8895
0
                }
8896
0
                if (uri == ctxt->str_xml_ns) {
8897
0
                    if (attname != ctxt->str_xml) {
8898
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8899
0
                     "xml namespace URI cannot be the default namespace\n",
8900
0
                                 NULL, NULL, NULL);
8901
0
                    }
8902
0
                    goto next_attr;
8903
0
                }
8904
0
                if ((len == 29) &&
8905
0
                    (xmlStrEqual(uri,
8906
0
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8907
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8908
0
                         "reuse of the xmlns namespace name is forbidden\n",
8909
0
                             NULL, NULL, NULL);
8910
0
                    goto next_attr;
8911
0
                }
8912
0
            }
8913
8914
0
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
8915
0
                nbNs++;
8916
0
        } else if (aprefix == ctxt->str_xmlns) {
8917
0
            xmlHashedString huri;
8918
0
            xmlURIPtr parsedUri;
8919
8920
0
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8921
0
            uri = huri.name;
8922
0
            if (uri == NULL) {
8923
0
                xmlErrMemory(ctxt);
8924
0
                goto next_attr;
8925
0
            }
8926
8927
0
            if (attname == ctxt->str_xml) {
8928
0
                if (uri != ctxt->str_xml_ns) {
8929
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8930
0
                             "xml namespace prefix mapped to wrong URI\n",
8931
0
                             NULL, NULL, NULL);
8932
0
                }
8933
                /*
8934
                 * Do not keep a namespace definition node
8935
                 */
8936
0
                goto next_attr;
8937
0
            }
8938
0
            if (uri == ctxt->str_xml_ns) {
8939
0
                if (attname != ctxt->str_xml) {
8940
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8941
0
                             "xml namespace URI mapped to wrong prefix\n",
8942
0
                             NULL, NULL, NULL);
8943
0
                }
8944
0
                goto next_attr;
8945
0
            }
8946
0
            if (attname == ctxt->str_xmlns) {
8947
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8948
0
                         "redefinition of the xmlns prefix is forbidden\n",
8949
0
                         NULL, NULL, NULL);
8950
0
                goto next_attr;
8951
0
            }
8952
0
            if ((len == 29) &&
8953
0
                (xmlStrEqual(uri,
8954
0
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8955
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8956
0
                         "reuse of the xmlns namespace name is forbidden\n",
8957
0
                         NULL, NULL, NULL);
8958
0
                goto next_attr;
8959
0
            }
8960
0
            if ((uri == NULL) || (uri[0] == 0)) {
8961
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8962
0
                         "xmlns:%s: Empty XML namespace is not allowed\n",
8963
0
                              attname, NULL, NULL);
8964
0
                goto next_attr;
8965
0
            } else {
8966
0
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8967
0
                    xmlErrMemory(ctxt);
8968
0
                    goto next_attr;
8969
0
                }
8970
0
                if (parsedUri == NULL) {
8971
0
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8972
0
                         "xmlns:%s: '%s' is not a valid URI\n",
8973
0
                                       attname, uri, NULL);
8974
0
                } else {
8975
0
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
8976
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8977
0
                                  "xmlns:%s: URI %s is not absolute\n",
8978
0
                                  attname, uri, NULL);
8979
0
                    }
8980
0
                    xmlFreeURI(parsedUri);
8981
0
                }
8982
0
            }
8983
8984
0
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
8985
0
                nbNs++;
8986
0
        } else {
8987
            /*
8988
             * Populate attributes array, see above for repurposing
8989
             * of xmlChar pointers.
8990
             */
8991
0
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8992
0
                int res = xmlCtxtGrowAttrs(ctxt);
8993
8994
0
                maxatts = ctxt->maxatts;
8995
0
                atts = ctxt->atts;
8996
8997
0
                if (res < 0)
8998
0
                    goto next_attr;
8999
0
            }
9000
0
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9001
0
                                        ((unsigned) alloc << 31);
9002
0
            atts[nbatts++] = attname;
9003
0
            atts[nbatts++] = aprefix;
9004
0
            atts[nbatts++] = XML_INT_TO_PTR(haprefix.hashValue);
9005
0
            if (alloc) {
9006
0
                atts[nbatts++] = attvalue;
9007
0
                attvalue += len;
9008
0
                atts[nbatts++] = attvalue;
9009
0
            } else {
9010
                /*
9011
                 * attvalue points into the input buffer which can be
9012
                 * reallocated. Store differences to input->base instead.
9013
                 * The pointers will be reconstructed later.
9014
                 */
9015
0
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
9016
0
                attvalue += len;
9017
0
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
9018
0
            }
9019
            /*
9020
             * tag if some deallocation is needed
9021
             */
9022
0
            if (alloc != 0) attval = 1;
9023
0
            attvalue = NULL; /* moved into atts */
9024
0
        }
9025
9026
0
next_attr:
9027
0
        if ((attvalue != NULL) && (alloc != 0)) {
9028
0
            xmlFree(attvalue);
9029
0
            attvalue = NULL;
9030
0
        }
9031
9032
0
  GROW
9033
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9034
0
      break;
9035
0
  if (SKIP_BLANKS == 0) {
9036
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9037
0
         "attributes construct error\n");
9038
0
      break;
9039
0
  }
9040
0
        GROW;
9041
0
    }
9042
9043
    /*
9044
     * Namespaces from default attributes
9045
     */
9046
0
    if (ctxt->attsDefault != NULL) {
9047
0
        xmlDefAttrsPtr defaults;
9048
9049
0
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9050
0
  if (defaults != NULL) {
9051
0
      for (i = 0; i < defaults->nbAttrs; i++) {
9052
0
                xmlDefAttr *attr = &defaults->attrs[i];
9053
9054
0
          attname = attr->name.name;
9055
0
    aprefix = attr->prefix.name;
9056
9057
0
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9058
0
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9059
9060
0
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9061
0
                        nbNs++;
9062
0
    } else if (aprefix == ctxt->str_xmlns) {
9063
0
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9064
9065
0
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9066
0
                                      NULL, 1) > 0)
9067
0
                        nbNs++;
9068
0
    } else {
9069
0
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9070
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9071
0
                                    "Maximum number of attributes exceeded");
9072
0
                        break;
9073
0
                    }
9074
0
                    nbTotalDef += 1;
9075
0
                }
9076
0
      }
9077
0
  }
9078
0
    }
9079
9080
    /*
9081
     * Resolve attribute namespaces
9082
     */
9083
0
    for (i = 0; i < nbatts; i += 5) {
9084
0
        attname = atts[i];
9085
0
        aprefix = atts[i+1];
9086
9087
        /*
9088
  * The default namespace does not apply to attribute names.
9089
  */
9090
0
  if (aprefix == NULL) {
9091
0
            nsIndex = NS_INDEX_EMPTY;
9092
0
        } else if (aprefix == ctxt->str_xml) {
9093
0
            nsIndex = NS_INDEX_XML;
9094
0
        } else {
9095
0
            haprefix.name = aprefix;
9096
0
            haprefix.hashValue = (size_t) atts[i+2];
9097
0
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9098
9099
0
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9100
0
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9101
0
        "Namespace prefix %s for %s on %s is not defined\n",
9102
0
        aprefix, attname, localname);
9103
0
                nsIndex = NS_INDEX_EMPTY;
9104
0
            }
9105
0
        }
9106
9107
0
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9108
0
    }
9109
9110
    /*
9111
     * Maximum number of attributes including default attributes.
9112
     */
9113
0
    maxAtts = nratts + nbTotalDef;
9114
9115
    /*
9116
     * Verify that attribute names are unique.
9117
     */
9118
0
    if (maxAtts > 1) {
9119
0
        attrHashSize = 4;
9120
0
        while (attrHashSize / 2 < (unsigned) maxAtts)
9121
0
            attrHashSize *= 2;
9122
9123
0
        if (attrHashSize > ctxt->attrHashMax) {
9124
0
            xmlAttrHashBucket *tmp;
9125
9126
0
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9127
0
            if (tmp == NULL) {
9128
0
                xmlErrMemory(ctxt);
9129
0
                goto done;
9130
0
            }
9131
9132
0
            ctxt->attrHash = tmp;
9133
0
            ctxt->attrHashMax = attrHashSize;
9134
0
        }
9135
9136
0
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9137
9138
0
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9139
0
            const xmlChar *nsuri;
9140
0
            unsigned hashValue, nameHashValue, uriHashValue;
9141
0
            int res;
9142
9143
0
            attname = atts[i];
9144
0
            aprefix = atts[i+1];
9145
0
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9146
            /* Hash values always have bit 31 set, see dict.c */
9147
0
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9148
9149
0
            if (nsIndex == NS_INDEX_EMPTY) {
9150
                /*
9151
                 * Prefix with empty namespace means an undeclared
9152
                 * prefix which was already reported above.
9153
                 */
9154
0
                if (aprefix != NULL)
9155
0
                    continue;
9156
0
                nsuri = NULL;
9157
0
                uriHashValue = URI_HASH_EMPTY;
9158
0
            } else if (nsIndex == NS_INDEX_XML) {
9159
0
                nsuri = ctxt->str_xml_ns;
9160
0
                uriHashValue = URI_HASH_XML;
9161
0
            } else {
9162
0
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9163
0
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9164
0
            }
9165
9166
0
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9167
0
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9168
0
                                    hashValue, i);
9169
0
            if (res < 0)
9170
0
                continue;
9171
9172
            /*
9173
             * [ WFC: Unique Att Spec ]
9174
             * No attribute name may appear more than once in the same
9175
             * start-tag or empty-element tag.
9176
             * As extended by the Namespace in XML REC.
9177
             */
9178
0
            if (res < INT_MAX) {
9179
0
                if (aprefix == atts[res+1]) {
9180
0
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9181
0
                    numDupErr += 1;
9182
0
                } else {
9183
0
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9184
0
                             "Namespaced Attribute %s in '%s' redefined\n",
9185
0
                             attname, nsuri, NULL);
9186
0
                    numNsErr += 1;
9187
0
                }
9188
0
            }
9189
0
        }
9190
0
    }
9191
9192
    /*
9193
     * Default attributes
9194
     */
9195
0
    if (ctxt->attsDefault != NULL) {
9196
0
        xmlDefAttrsPtr defaults;
9197
9198
0
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9199
0
  if (defaults != NULL) {
9200
0
      for (i = 0; i < defaults->nbAttrs; i++) {
9201
0
                xmlDefAttr *attr = &defaults->attrs[i];
9202
0
                const xmlChar *nsuri = NULL;
9203
0
                unsigned hashValue, uriHashValue = 0;
9204
0
                int res;
9205
9206
0
          attname = attr->name.name;
9207
0
    aprefix = attr->prefix.name;
9208
9209
0
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9210
0
                    continue;
9211
0
    if (aprefix == ctxt->str_xmlns)
9212
0
                    continue;
9213
9214
0
                if (aprefix == NULL) {
9215
0
                    nsIndex = NS_INDEX_EMPTY;
9216
0
                    nsuri = NULL;
9217
0
                    uriHashValue = URI_HASH_EMPTY;
9218
0
                } else if (aprefix == ctxt->str_xml) {
9219
0
                    nsIndex = NS_INDEX_XML;
9220
0
                    nsuri = ctxt->str_xml_ns;
9221
0
                    uriHashValue = URI_HASH_XML;
9222
0
                } else {
9223
0
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9224
0
                    if ((nsIndex == INT_MAX) ||
9225
0
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9226
0
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9227
0
                                 "Namespace prefix %s for %s on %s is not "
9228
0
                                 "defined\n",
9229
0
                                 aprefix, attname, localname);
9230
0
                        nsIndex = NS_INDEX_EMPTY;
9231
0
                        nsuri = NULL;
9232
0
                        uriHashValue = URI_HASH_EMPTY;
9233
0
                    } else {
9234
0
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9235
0
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9236
0
                    }
9237
0
                }
9238
9239
                /*
9240
                 * Check whether the attribute exists
9241
                 */
9242
0
                if (maxAtts > 1) {
9243
0
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9244
0
                                                   uriHashValue);
9245
0
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9246
0
                                            hashValue, nbatts);
9247
0
                    if (res < 0)
9248
0
                        continue;
9249
0
                    if (res < INT_MAX) {
9250
0
                        if (aprefix == atts[res+1])
9251
0
                            continue;
9252
0
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9253
0
                                 "Namespaced Attribute %s in '%s' redefined\n",
9254
0
                                 attname, nsuri, NULL);
9255
0
                    }
9256
0
                }
9257
9258
0
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9259
9260
0
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9261
0
                    res = xmlCtxtGrowAttrs(ctxt);
9262
9263
0
                    maxatts = ctxt->maxatts;
9264
0
                    atts = ctxt->atts;
9265
9266
0
                    if (res < 0) {
9267
0
                        localname = NULL;
9268
0
                        goto done;
9269
0
                    }
9270
0
                }
9271
9272
0
                atts[nbatts++] = attname;
9273
0
                atts[nbatts++] = aprefix;
9274
0
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9275
0
                atts[nbatts++] = attr->value.name;
9276
0
                atts[nbatts++] = attr->valueEnd;
9277
9278
0
#ifdef LIBXML_VALID_ENABLED
9279
                /*
9280
                 * This should be moved to valid.c, but we don't keep track
9281
                 * whether an attribute was defaulted.
9282
                 */
9283
0
                if ((ctxt->validate) &&
9284
0
                    (ctxt->standalone == 1) &&
9285
0
                    (attr->external != 0)) {
9286
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9287
0
                            "standalone: attribute %s on %s defaulted "
9288
0
                            "from external subset\n",
9289
0
                            attname, localname);
9290
0
                }
9291
0
#endif
9292
0
                nbdef++;
9293
0
      }
9294
0
  }
9295
0
    }
9296
9297
    /*
9298
     * Using a single hash table for nsUri/localName pairs cannot
9299
     * detect duplicate QNames reliably. The following example will
9300
     * only result in two namespace errors.
9301
     *
9302
     * <doc xmlns:a="a" xmlns:b="a">
9303
     *   <elem a:a="" b:a="" b:a=""/>
9304
     * </doc>
9305
     *
9306
     * If we saw more than one namespace error but no duplicate QNames
9307
     * were found, we have to scan for duplicate QNames.
9308
     */
9309
0
    if ((numDupErr == 0) && (numNsErr > 1)) {
9310
0
        memset(ctxt->attrHash, -1,
9311
0
               attrHashSize * sizeof(ctxt->attrHash[0]));
9312
9313
0
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9314
0
            unsigned hashValue, nameHashValue, prefixHashValue;
9315
0
            int res;
9316
9317
0
            aprefix = atts[i+1];
9318
0
            if (aprefix == NULL)
9319
0
                continue;
9320
9321
0
            attname = atts[i];
9322
            /* Hash values always have bit 31 set, see dict.c */
9323
0
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9324
0
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9325
9326
0
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9327
0
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9328
0
                                         aprefix, hashValue, i);
9329
0
            if (res < INT_MAX)
9330
0
                xmlErrAttributeDup(ctxt, aprefix, attname);
9331
0
        }
9332
0
    }
9333
9334
    /*
9335
     * Reconstruct attribute pointers
9336
     */
9337
0
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9338
        /* namespace URI */
9339
0
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9340
0
        if (nsIndex == INT_MAX)
9341
0
            atts[i+2] = NULL;
9342
0
        else if (nsIndex == INT_MAX - 1)
9343
0
            atts[i+2] = ctxt->str_xml_ns;
9344
0
        else
9345
0
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9346
9347
0
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9348
0
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9349
0
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9350
0
        }
9351
0
    }
9352
9353
0
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9354
0
    if ((prefix != NULL) && (uri == NULL)) {
9355
0
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9356
0
           "Namespace prefix %s on %s is not defined\n",
9357
0
     prefix, localname, NULL);
9358
0
    }
9359
0
    *pref = prefix;
9360
0
    *URI = uri;
9361
9362
    /*
9363
     * SAX callback
9364
     */
9365
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9366
0
  (!ctxt->disableSAX)) {
9367
0
  if (nbNs > 0)
9368
0
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9369
0
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9370
0
        nbatts / 5, nbdef, atts);
9371
0
  else
9372
0
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9373
0
                          0, NULL, nbatts / 5, nbdef, atts);
9374
0
    }
9375
9376
0
done:
9377
    /*
9378
     * Free allocated attribute values
9379
     */
9380
0
    if (attval != 0) {
9381
0
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9382
0
      if (ctxt->attallocs[j] & 0x80000000)
9383
0
          xmlFree((xmlChar *) atts[i+3]);
9384
0
    }
9385
9386
0
    *nbNsPtr = nbNs;
9387
0
    return(localname);
9388
0
}
9389
9390
/**
9391
 * Parse an end tag. Always consumes '</'.
9392
 *
9393
 *     [42] ETag ::= '</' Name S? '>'
9394
 *
9395
 * With namespace
9396
 *
9397
 *     [NS 9] ETag ::= '</' QName S? '>'
9398
 * @param ctxt  an XML parser context
9399
 * @param tag  the corresponding start tag
9400
 */
9401
9402
static void
9403
0
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9404
0
    const xmlChar *name;
9405
9406
0
    GROW;
9407
0
    if ((RAW != '<') || (NXT(1) != '/')) {
9408
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9409
0
  return;
9410
0
    }
9411
0
    SKIP(2);
9412
9413
0
    if (tag->prefix == NULL)
9414
0
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9415
0
    else
9416
0
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9417
9418
    /*
9419
     * We should definitely be at the ending "S? '>'" part
9420
     */
9421
0
    GROW;
9422
0
    SKIP_BLANKS;
9423
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9424
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9425
0
    } else
9426
0
  NEXT1;
9427
9428
    /*
9429
     * [ WFC: Element Type Match ]
9430
     * The Name in an element's end-tag must match the element type in the
9431
     * start-tag.
9432
     *
9433
     */
9434
0
    if (name != (xmlChar*)1) {
9435
0
        if (name == NULL) name = BAD_CAST "unparsable";
9436
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9437
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
9438
0
                    ctxt->name, tag->line, name);
9439
0
    }
9440
9441
    /*
9442
     * SAX: End of Tag
9443
     */
9444
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9445
0
  (!ctxt->disableSAX))
9446
0
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9447
0
                                tag->URI);
9448
9449
0
    spacePop(ctxt);
9450
0
    if (tag->nsNr != 0)
9451
0
  xmlParserNsPop(ctxt, tag->nsNr);
9452
0
}
9453
9454
/**
9455
 * Parse escaped pure raw content. Always consumes '<!['.
9456
 *
9457
 * @deprecated Internal function, don't use.
9458
 *
9459
 *     [18] CDSect ::= CDStart CData CDEnd
9460
 *
9461
 *     [19] CDStart ::= '<![CDATA['
9462
 *
9463
 *     [20] Data ::= (Char* - (Char* ']]>' Char*))
9464
 *
9465
 *     [21] CDEnd ::= ']]>'
9466
 * @param ctxt  an XML parser context
9467
 */
9468
void
9469
108k
xmlParseCDSect(xmlParserCtxt *ctxt) {
9470
108k
    xmlChar *buf = NULL;
9471
108k
    int len = 0;
9472
108k
    int size = XML_PARSER_BUFFER_SIZE;
9473
108k
    int r, rl;
9474
108k
    int s, sl;
9475
108k
    int cur, l;
9476
108k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9477
0
                    XML_MAX_HUGE_LENGTH :
9478
108k
                    XML_MAX_TEXT_LENGTH;
9479
9480
108k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9481
0
        return;
9482
108k
    SKIP(3);
9483
9484
108k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9485
0
        return;
9486
108k
    SKIP(6);
9487
9488
108k
    r = xmlCurrentCharRecover(ctxt, &rl);
9489
108k
    if (!IS_CHAR(r)) {
9490
8
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9491
8
        goto out;
9492
8
    }
9493
108k
    NEXTL(rl);
9494
108k
    s = xmlCurrentCharRecover(ctxt, &sl);
9495
108k
    if (!IS_CHAR(s)) {
9496
45
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9497
45
        goto out;
9498
45
    }
9499
108k
    NEXTL(sl);
9500
108k
    cur = xmlCurrentCharRecover(ctxt, &l);
9501
108k
    buf = xmlMalloc(size);
9502
108k
    if (buf == NULL) {
9503
0
  xmlErrMemory(ctxt);
9504
0
        goto out;
9505
0
    }
9506
20.5M
    while (IS_CHAR(cur) &&
9507
20.5M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9508
20.4M
  if (len + 5 >= size) {
9509
46.9k
      xmlChar *tmp;
9510
46.9k
            int newSize;
9511
9512
46.9k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9513
46.9k
            if (newSize < 0) {
9514
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9515
0
                               "CData section too big found\n");
9516
0
                goto out;
9517
0
            }
9518
46.9k
      tmp = xmlRealloc(buf, newSize);
9519
46.9k
      if (tmp == NULL) {
9520
0
    xmlErrMemory(ctxt);
9521
0
                goto out;
9522
0
      }
9523
46.9k
      buf = tmp;
9524
46.9k
      size = newSize;
9525
46.9k
  }
9526
20.4M
  COPY_BUF(buf, len, r);
9527
20.4M
  r = s;
9528
20.4M
  rl = sl;
9529
20.4M
  s = cur;
9530
20.4M
  sl = l;
9531
20.4M
  NEXTL(l);
9532
20.4M
  cur = xmlCurrentCharRecover(ctxt, &l);
9533
20.4M
    }
9534
108k
    buf[len] = 0;
9535
108k
    if (cur != '>') {
9536
425
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9537
425
                       "CData section not finished\n%.50s\n", buf);
9538
425
        goto out;
9539
425
    }
9540
108k
    NEXTL(l);
9541
9542
    /*
9543
     * OK the buffer is to be consumed as cdata.
9544
     */
9545
108k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9546
98.0k
        if ((ctxt->sax->cdataBlock != NULL) &&
9547
98.0k
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9548
98.0k
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9549
98.0k
        } else if (ctxt->sax->characters != NULL) {
9550
0
            ctxt->sax->characters(ctxt->userData, buf, len);
9551
0
        }
9552
98.0k
    }
9553
9554
108k
out:
9555
108k
    xmlFree(buf);
9556
108k
}
9557
9558
/**
9559
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9560
 * unexpected EOF to the caller.
9561
 *
9562
 * @param ctxt  an XML parser context
9563
 */
9564
9565
static void
9566
17.7k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9567
17.7k
    int oldNameNr = ctxt->nameNr;
9568
17.7k
    int oldSpaceNr = ctxt->spaceNr;
9569
17.7k
    int oldNodeNr = ctxt->nodeNr;
9570
9571
17.7k
    GROW;
9572
648k
    while ((ctxt->input->cur < ctxt->input->end) &&
9573
648k
     (PARSER_STOPPED(ctxt) == 0)) {
9574
630k
  const xmlChar *cur = ctxt->input->cur;
9575
9576
  /*
9577
   * First case : a Processing Instruction.
9578
   */
9579
630k
  if ((*cur == '<') && (cur[1] == '?')) {
9580
51.4k
      xmlParsePI(ctxt);
9581
51.4k
  }
9582
9583
  /*
9584
   * Second case : a CDSection
9585
   */
9586
  /* 2.6.0 test was *cur not RAW */
9587
579k
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9588
10.6k
      xmlParseCDSect(ctxt);
9589
10.6k
  }
9590
9591
  /*
9592
   * Third case :  a comment
9593
   */
9594
568k
  else if ((*cur == '<') && (NXT(1) == '!') &&
9595
568k
     (NXT(2) == '-') && (NXT(3) == '-')) {
9596
21.3k
      xmlParseComment(ctxt);
9597
21.3k
  }
9598
9599
  /*
9600
   * Fourth case :  a sub-element.
9601
   */
9602
547k
  else if (*cur == '<') {
9603
231k
            if (NXT(1) == '/') {
9604
35.4k
                if (ctxt->nameNr <= oldNameNr)
9605
11
                    break;
9606
35.4k
          xmlParseElementEnd(ctxt);
9607
195k
            } else {
9608
195k
          xmlParseElementStart(ctxt);
9609
195k
            }
9610
231k
  }
9611
9612
  /*
9613
   * Fifth case : a reference. If if has not been resolved,
9614
   *    parsing returns it's Name, create the node
9615
   */
9616
9617
316k
  else if (*cur == '&') {
9618
68.9k
      xmlParseReference(ctxt);
9619
68.9k
  }
9620
9621
  /*
9622
   * Last case, text. Note that References are handled directly.
9623
   */
9624
247k
  else {
9625
247k
      xmlParseCharDataInternal(ctxt, 0);
9626
247k
  }
9627
9628
630k
  SHRINK;
9629
630k
  GROW;
9630
630k
    }
9631
9632
17.7k
    if ((ctxt->nameNr > oldNameNr) &&
9633
17.7k
        (ctxt->input->cur >= ctxt->input->end) &&
9634
17.7k
        (ctxt->wellFormed)) {
9635
16
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9636
16
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9637
16
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9638
16
                "Premature end of data in tag %s line %d\n",
9639
16
                name, line, NULL);
9640
16
    }
9641
9642
    /*
9643
     * Clean up in error case
9644
     */
9645
9646
17.7k
    while (ctxt->nodeNr > oldNodeNr)
9647
0
        nodePop(ctxt);
9648
9649
42.2k
    while (ctxt->nameNr > oldNameNr) {
9650
24.5k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9651
9652
24.5k
        if (tag->nsNr != 0)
9653
0
            xmlParserNsPop(ctxt, tag->nsNr);
9654
9655
24.5k
        namePop(ctxt);
9656
24.5k
    }
9657
9658
42.2k
    while (ctxt->spaceNr > oldSpaceNr)
9659
24.5k
        spacePop(ctxt);
9660
17.7k
}
9661
9662
/**
9663
 * Parse XML element content. This is useful if you're only interested
9664
 * in custom SAX callbacks. If you want a node list, use
9665
 * #xmlCtxtParseContent.
9666
 *
9667
 * @param ctxt  an XML parser context
9668
 */
9669
void
9670
0
xmlParseContent(xmlParserCtxt *ctxt) {
9671
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9672
0
        return;
9673
9674
0
    xmlCtxtInitializeLate(ctxt);
9675
9676
0
    xmlParseContentInternal(ctxt);
9677
9678
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9679
0
}
9680
9681
/**
9682
 * Parse an XML element
9683
 *
9684
 * @deprecated Internal function, don't use.
9685
 *
9686
 *     [39] element ::= EmptyElemTag | STag content ETag
9687
 *
9688
 * [ WFC: Element Type Match ]
9689
 * The Name in an element's end-tag must match the element type in the
9690
 * start-tag.
9691
 *
9692
 * @param ctxt  an XML parser context
9693
 */
9694
9695
void
9696
0
xmlParseElement(xmlParserCtxt *ctxt) {
9697
0
    if (xmlParseElementStart(ctxt) != 0)
9698
0
        return;
9699
9700
0
    xmlParseContentInternal(ctxt);
9701
9702
0
    if (ctxt->input->cur >= ctxt->input->end) {
9703
0
        if (ctxt->wellFormed) {
9704
0
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9705
0
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9706
0
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9707
0
                    "Premature end of data in tag %s line %d\n",
9708
0
                    name, line, NULL);
9709
0
        }
9710
0
        return;
9711
0
    }
9712
9713
0
    xmlParseElementEnd(ctxt);
9714
0
}
9715
9716
/**
9717
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9718
 * opening tag was parsed, 1 if an empty element was parsed.
9719
 *
9720
 * Always consumes '<'.
9721
 *
9722
 * @param ctxt  an XML parser context
9723
 */
9724
static int
9725
195k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9726
195k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9727
195k
    const xmlChar *name;
9728
195k
    const xmlChar *prefix = NULL;
9729
195k
    const xmlChar *URI = NULL;
9730
195k
    xmlParserNodeInfo node_info;
9731
195k
    int line;
9732
195k
    xmlNodePtr cur;
9733
195k
    int nbNs = 0;
9734
9735
195k
    if (ctxt->nameNr > maxDepth) {
9736
41
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9737
41
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9738
41
                ctxt->nameNr);
9739
41
  xmlHaltParser(ctxt);
9740
41
  return(-1);
9741
41
    }
9742
9743
    /* Capture start position */
9744
195k
    if (ctxt->record_info) {
9745
0
        node_info.begin_pos = ctxt->input->consumed +
9746
0
                          (CUR_PTR - ctxt->input->base);
9747
0
  node_info.begin_line = ctxt->input->line;
9748
0
    }
9749
9750
195k
    if (ctxt->spaceNr == 0)
9751
0
  spacePush(ctxt, -1);
9752
195k
    else if (*ctxt->space == -2)
9753
26.2k
  spacePush(ctxt, -1);
9754
169k
    else
9755
169k
  spacePush(ctxt, *ctxt->space);
9756
9757
195k
    line = ctxt->input->line;
9758
195k
#ifdef LIBXML_SAX1_ENABLED
9759
195k
    if (ctxt->sax2)
9760
0
#endif /* LIBXML_SAX1_ENABLED */
9761
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9762
195k
#ifdef LIBXML_SAX1_ENABLED
9763
195k
    else
9764
195k
  name = xmlParseStartTag(ctxt);
9765
195k
#endif /* LIBXML_SAX1_ENABLED */
9766
195k
    if (name == NULL) {
9767
65.1k
  spacePop(ctxt);
9768
65.1k
        return(-1);
9769
65.1k
    }
9770
130k
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9771
130k
    cur = ctxt->node;
9772
9773
130k
#ifdef LIBXML_VALID_ENABLED
9774
    /*
9775
     * [ VC: Root Element Type ]
9776
     * The Name in the document type declaration must match the element
9777
     * type of the root element.
9778
     */
9779
130k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9780
130k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9781
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9782
130k
#endif /* LIBXML_VALID_ENABLED */
9783
9784
    /*
9785
     * Check for an Empty Element.
9786
     */
9787
130k
    if ((RAW == '/') && (NXT(1) == '>')) {
9788
31.1k
        SKIP(2);
9789
31.1k
  if (ctxt->sax2) {
9790
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9791
0
    (!ctxt->disableSAX))
9792
0
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9793
0
#ifdef LIBXML_SAX1_ENABLED
9794
31.1k
  } else {
9795
31.1k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9796
31.1k
    (!ctxt->disableSAX))
9797
6.30k
    ctxt->sax->endElement(ctxt->userData, name);
9798
31.1k
#endif /* LIBXML_SAX1_ENABLED */
9799
31.1k
  }
9800
31.1k
  namePop(ctxt);
9801
31.1k
  spacePop(ctxt);
9802
31.1k
  if (nbNs > 0)
9803
0
      xmlParserNsPop(ctxt, nbNs);
9804
31.1k
  if (cur != NULL && ctxt->record_info) {
9805
0
            node_info.node = cur;
9806
0
            node_info.end_pos = ctxt->input->consumed +
9807
0
                                (CUR_PTR - ctxt->input->base);
9808
0
            node_info.end_line = ctxt->input->line;
9809
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9810
0
  }
9811
31.1k
  return(1);
9812
31.1k
    }
9813
99.4k
    if (RAW == '>') {
9814
60.0k
        NEXT1;
9815
60.0k
        if (cur != NULL && ctxt->record_info) {
9816
0
            node_info.node = cur;
9817
0
            node_info.end_pos = 0;
9818
0
            node_info.end_line = 0;
9819
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9820
0
        }
9821
60.0k
    } else {
9822
39.3k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9823
39.3k
         "Couldn't find end of Start Tag %s line %d\n",
9824
39.3k
                    name, line, NULL);
9825
9826
  /*
9827
   * end of parsing of this node.
9828
   */
9829
39.3k
  nodePop(ctxt);
9830
39.3k
  namePop(ctxt);
9831
39.3k
  spacePop(ctxt);
9832
39.3k
  if (nbNs > 0)
9833
0
      xmlParserNsPop(ctxt, nbNs);
9834
39.3k
  return(-1);
9835
39.3k
    }
9836
9837
60.0k
    return(0);
9838
99.4k
}
9839
9840
/**
9841
 * Parse the end of an XML element. Always consumes '</'.
9842
 *
9843
 * @param ctxt  an XML parser context
9844
 */
9845
static void
9846
35.4k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9847
35.4k
    xmlNodePtr cur = ctxt->node;
9848
9849
35.4k
    if (ctxt->nameNr <= 0) {
9850
0
        if ((RAW == '<') && (NXT(1) == '/'))
9851
0
            SKIP(2);
9852
0
        return;
9853
0
    }
9854
9855
    /*
9856
     * parse the end of tag: '</' should be here.
9857
     */
9858
35.4k
    if (ctxt->sax2) {
9859
0
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9860
0
  namePop(ctxt);
9861
0
    }
9862
35.4k
#ifdef LIBXML_SAX1_ENABLED
9863
35.4k
    else
9864
35.4k
  xmlParseEndTag1(ctxt, 0);
9865
35.4k
#endif /* LIBXML_SAX1_ENABLED */
9866
9867
    /*
9868
     * Capture end position
9869
     */
9870
35.4k
    if (cur != NULL && ctxt->record_info) {
9871
0
        xmlParserNodeInfoPtr node_info;
9872
9873
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9874
0
        if (node_info != NULL) {
9875
0
            node_info->end_pos = ctxt->input->consumed +
9876
0
                                 (CUR_PTR - ctxt->input->base);
9877
0
            node_info->end_line = ctxt->input->line;
9878
0
        }
9879
0
    }
9880
35.4k
}
9881
9882
/**
9883
 * Parse the XML version value.
9884
 *
9885
 * @deprecated Internal function, don't use.
9886
 *
9887
 *     [26] VersionNum ::= '1.' [0-9]+
9888
 *
9889
 * In practice allow [0-9].[0-9]+ at that level
9890
 *
9891
 * @param ctxt  an XML parser context
9892
 * @returns the string giving the XML version number, or NULL
9893
 */
9894
xmlChar *
9895
4.47k
xmlParseVersionNum(xmlParserCtxt *ctxt) {
9896
4.47k
    xmlChar *buf = NULL;
9897
4.47k
    int len = 0;
9898
4.47k
    int size = 10;
9899
4.47k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9900
0
                    XML_MAX_TEXT_LENGTH :
9901
4.47k
                    XML_MAX_NAME_LENGTH;
9902
4.47k
    xmlChar cur;
9903
9904
4.47k
    buf = xmlMalloc(size);
9905
4.47k
    if (buf == NULL) {
9906
0
  xmlErrMemory(ctxt);
9907
0
  return(NULL);
9908
0
    }
9909
4.47k
    cur = CUR;
9910
4.47k
    if (!((cur >= '0') && (cur <= '9'))) {
9911
31
  xmlFree(buf);
9912
31
  return(NULL);
9913
31
    }
9914
4.44k
    buf[len++] = cur;
9915
4.44k
    NEXT;
9916
4.44k
    cur=CUR;
9917
4.44k
    if (cur != '.') {
9918
22
  xmlFree(buf);
9919
22
  return(NULL);
9920
22
    }
9921
4.42k
    buf[len++] = cur;
9922
4.42k
    NEXT;
9923
4.42k
    cur=CUR;
9924
184k
    while ((cur >= '0') && (cur <= '9')) {
9925
179k
  if (len + 1 >= size) {
9926
233
      xmlChar *tmp;
9927
233
            int newSize;
9928
9929
233
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9930
233
            if (newSize < 0) {
9931
1
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
9932
1
                xmlFree(buf);
9933
1
                return(NULL);
9934
1
            }
9935
232
      tmp = xmlRealloc(buf, newSize);
9936
232
      if (tmp == NULL) {
9937
0
    xmlErrMemory(ctxt);
9938
0
          xmlFree(buf);
9939
0
    return(NULL);
9940
0
      }
9941
232
      buf = tmp;
9942
232
            size = newSize;
9943
232
  }
9944
179k
  buf[len++] = cur;
9945
179k
  NEXT;
9946
179k
  cur=CUR;
9947
179k
    }
9948
4.42k
    buf[len] = 0;
9949
4.42k
    return(buf);
9950
4.42k
}
9951
9952
/**
9953
 * Parse the XML version.
9954
 *
9955
 * @deprecated Internal function, don't use.
9956
 *
9957
 *     [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9958
 *
9959
 *     [25] Eq ::= S? '=' S?
9960
 *
9961
 * @param ctxt  an XML parser context
9962
 * @returns the version string, e.g. "1.0"
9963
 */
9964
9965
xmlChar *
9966
4.90k
xmlParseVersionInfo(xmlParserCtxt *ctxt) {
9967
4.90k
    xmlChar *version = NULL;
9968
9969
4.90k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9970
4.52k
  SKIP(7);
9971
4.52k
  SKIP_BLANKS;
9972
4.52k
  if (RAW != '=') {
9973
24
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9974
24
      return(NULL);
9975
24
        }
9976
4.49k
  NEXT;
9977
4.49k
  SKIP_BLANKS;
9978
4.49k
  if (RAW == '"') {
9979
2.40k
      NEXT;
9980
2.40k
      version = xmlParseVersionNum(ctxt);
9981
2.40k
      if (RAW != '"') {
9982
73
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9983
73
      } else
9984
2.33k
          NEXT;
9985
2.40k
  } else if (RAW == '\''){
9986
2.07k
      NEXT;
9987
2.07k
      version = xmlParseVersionNum(ctxt);
9988
2.07k
      if (RAW != '\'') {
9989
15
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9990
15
      } else
9991
2.05k
          NEXT;
9992
2.07k
  } else {
9993
21
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9994
21
  }
9995
4.49k
    }
9996
4.88k
    return(version);
9997
4.90k
}
9998
9999
/**
10000
 * Parse the XML encoding name
10001
 *
10002
 * @deprecated Internal function, don't use.
10003
 *
10004
 *     [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10005
 *
10006
 * @param ctxt  an XML parser context
10007
 * @returns the encoding name value or NULL
10008
 */
10009
xmlChar *
10010
4.22k
xmlParseEncName(xmlParserCtxt *ctxt) {
10011
4.22k
    xmlChar *buf = NULL;
10012
4.22k
    int len = 0;
10013
4.22k
    int size = 10;
10014
4.22k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10015
0
                    XML_MAX_TEXT_LENGTH :
10016
4.22k
                    XML_MAX_NAME_LENGTH;
10017
4.22k
    xmlChar cur;
10018
10019
4.22k
    cur = CUR;
10020
4.22k
    if (((cur >= 'a') && (cur <= 'z')) ||
10021
4.22k
        ((cur >= 'A') && (cur <= 'Z'))) {
10022
4.18k
  buf = xmlMalloc(size);
10023
4.18k
  if (buf == NULL) {
10024
0
      xmlErrMemory(ctxt);
10025
0
      return(NULL);
10026
0
  }
10027
10028
4.18k
  buf[len++] = cur;
10029
4.18k
  NEXT;
10030
4.18k
  cur = CUR;
10031
319k
  while (((cur >= 'a') && (cur <= 'z')) ||
10032
319k
         ((cur >= 'A') && (cur <= 'Z')) ||
10033
319k
         ((cur >= '0') && (cur <= '9')) ||
10034
319k
         (cur == '.') || (cur == '_') ||
10035
319k
         (cur == '-')) {
10036
315k
      if (len + 1 >= size) {
10037
2.04k
          xmlChar *tmp;
10038
2.04k
                int newSize;
10039
10040
2.04k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10041
2.04k
                if (newSize < 0) {
10042
2
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10043
2
                    xmlFree(buf);
10044
2
                    return(NULL);
10045
2
                }
10046
2.04k
    tmp = xmlRealloc(buf, newSize);
10047
2.04k
    if (tmp == NULL) {
10048
0
        xmlErrMemory(ctxt);
10049
0
        xmlFree(buf);
10050
0
        return(NULL);
10051
0
    }
10052
2.04k
    buf = tmp;
10053
2.04k
                size = newSize;
10054
2.04k
      }
10055
315k
      buf[len++] = cur;
10056
315k
      NEXT;
10057
315k
      cur = CUR;
10058
315k
        }
10059
4.18k
  buf[len] = 0;
10060
4.18k
    } else {
10061
42
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10062
42
    }
10063
4.22k
    return(buf);
10064
4.22k
}
10065
10066
/**
10067
 * Parse the XML encoding declaration
10068
 *
10069
 * @deprecated Internal function, don't use.
10070
 *
10071
 *     [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | 
10072
 *                           "'" EncName "'")
10073
 *
10074
 * this setups the conversion filters.
10075
 *
10076
 * @param ctxt  an XML parser context
10077
 * @returns the encoding value or NULL
10078
 */
10079
10080
const xmlChar *
10081
4.78k
xmlParseEncodingDecl(xmlParserCtxt *ctxt) {
10082
4.78k
    xmlChar *encoding = NULL;
10083
10084
4.78k
    SKIP_BLANKS;
10085
4.78k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10086
536
        return(NULL);
10087
10088
4.24k
    SKIP(8);
10089
4.24k
    SKIP_BLANKS;
10090
4.24k
    if (RAW != '=') {
10091
10
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10092
10
        return(NULL);
10093
10
    }
10094
4.23k
    NEXT;
10095
4.23k
    SKIP_BLANKS;
10096
4.23k
    if (RAW == '"') {
10097
2.17k
        NEXT;
10098
2.17k
        encoding = xmlParseEncName(ctxt);
10099
2.17k
        if (RAW != '"') {
10100
113
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10101
113
            xmlFree(encoding);
10102
113
            return(NULL);
10103
113
        } else
10104
2.05k
            NEXT;
10105
2.17k
    } else if (RAW == '\''){
10106
2.05k
        NEXT;
10107
2.05k
        encoding = xmlParseEncName(ctxt);
10108
2.05k
        if (RAW != '\'') {
10109
25
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10110
25
            xmlFree(encoding);
10111
25
            return(NULL);
10112
25
        } else
10113
2.03k
            NEXT;
10114
2.05k
    } else {
10115
7
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10116
7
    }
10117
10118
4.09k
    if (encoding == NULL)
10119
8
        return(NULL);
10120
10121
4.08k
    xmlSetDeclaredEncoding(ctxt, encoding);
10122
10123
4.08k
    return(ctxt->encoding);
10124
4.09k
}
10125
10126
/**
10127
 * Parse the XML standalone declaration
10128
 *
10129
 * @deprecated Internal function, don't use.
10130
 *
10131
 *     [32] SDDecl ::= S 'standalone' Eq
10132
 *                     (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10133
 *
10134
 * [ VC: Standalone Document Declaration ]
10135
 * TODO The standalone document declaration must have the value "no"
10136
 * if any external markup declarations contain declarations of:
10137
 *  - attributes with default values, if elements to which these
10138
 *    attributes apply appear in the document without specifications
10139
 *    of values for these attributes, or
10140
 *  - entities (other than amp, lt, gt, apos, quot), if references
10141
 *    to those entities appear in the document, or
10142
 *  - attributes with values subject to normalization, where the
10143
 *    attribute appears in the document with a value which will change
10144
 *    as a result of normalization, or
10145
 *  - element types with element content, if white space occurs directly
10146
 *    within any instance of those types.
10147
 *
10148
 * @param ctxt  an XML parser context
10149
 * @returns
10150
 *   1 if standalone="yes"
10151
 *   0 if standalone="no"
10152
 *  -2 if standalone attribute is missing or invalid
10153
 *    (A standalone value of -2 means that the XML declaration was found,
10154
 *     but no value was specified for the standalone attribute).
10155
 */
10156
10157
int
10158
1.10k
xmlParseSDDecl(xmlParserCtxt *ctxt) {
10159
1.10k
    int standalone = -2;
10160
10161
1.10k
    SKIP_BLANKS;
10162
1.10k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10163
98
  SKIP(10);
10164
98
        SKIP_BLANKS;
10165
98
  if (RAW != '=') {
10166
3
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10167
3
      return(standalone);
10168
3
        }
10169
95
  NEXT;
10170
95
  SKIP_BLANKS;
10171
95
        if (RAW == '\''){
10172
17
      NEXT;
10173
17
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10174
4
          standalone = 0;
10175
4
                SKIP(2);
10176
13
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10177
13
                 (NXT(2) == 's')) {
10178
4
          standalone = 1;
10179
4
    SKIP(3);
10180
9
            } else {
10181
9
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10182
9
      }
10183
17
      if (RAW != '\'') {
10184
15
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10185
15
      } else
10186
2
          NEXT;
10187
78
  } else if (RAW == '"'){
10188
76
      NEXT;
10189
76
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10190
9
          standalone = 0;
10191
9
    SKIP(2);
10192
67
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10193
67
                 (NXT(2) == 's')) {
10194
57
          standalone = 1;
10195
57
                SKIP(3);
10196
57
            } else {
10197
10
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10198
10
      }
10199
76
      if (RAW != '"') {
10200
17
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10201
17
      } else
10202
59
          NEXT;
10203
76
  } else {
10204
2
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10205
2
        }
10206
95
    }
10207
1.10k
    return(standalone);
10208
1.10k
}
10209
10210
/**
10211
 * Parse an XML declaration header
10212
 *
10213
 * @deprecated Internal function, don't use.
10214
 *
10215
 *     [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10216
 * @param ctxt  an XML parser context
10217
 */
10218
10219
void
10220
4.90k
xmlParseXMLDecl(xmlParserCtxt *ctxt) {
10221
4.90k
    xmlChar *version;
10222
10223
    /*
10224
     * This value for standalone indicates that the document has an
10225
     * XML declaration but it does not have a standalone attribute.
10226
     * It will be overwritten later if a standalone attribute is found.
10227
     */
10228
10229
4.90k
    ctxt->standalone = -2;
10230
10231
    /*
10232
     * We know that '<?xml' is here.
10233
     */
10234
4.90k
    SKIP(5);
10235
10236
4.90k
    if (!IS_BLANK_CH(RAW)) {
10237
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10238
0
                 "Blank needed after '<?xml'\n");
10239
0
    }
10240
4.90k
    SKIP_BLANKS;
10241
10242
    /*
10243
     * We must have the VersionInfo here.
10244
     */
10245
4.90k
    version = xmlParseVersionInfo(ctxt);
10246
4.90k
    if (version == NULL) {
10247
481
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10248
4.42k
    } else {
10249
4.42k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10250
      /*
10251
       * Changed here for XML-1.0 5th edition
10252
       */
10253
723
      if (ctxt->options & XML_PARSE_OLD10) {
10254
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10255
0
                "Unsupported version '%s'\n",
10256
0
                version);
10257
723
      } else {
10258
723
          if ((version[0] == '1') && ((version[1] == '.'))) {
10259
662
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10260
662
                      "Unsupported version '%s'\n",
10261
662
          version, NULL);
10262
662
    } else {
10263
61
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10264
61
              "Unsupported version '%s'\n",
10265
61
              version);
10266
61
    }
10267
723
      }
10268
723
  }
10269
4.42k
  if (ctxt->version != NULL)
10270
0
      xmlFree(ctxt->version);
10271
4.42k
  ctxt->version = version;
10272
4.42k
    }
10273
10274
    /*
10275
     * We may have the encoding declaration
10276
     */
10277
4.90k
    if (!IS_BLANK_CH(RAW)) {
10278
623
        if ((RAW == '?') && (NXT(1) == '>')) {
10279
124
      SKIP(2);
10280
124
      return;
10281
124
  }
10282
499
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10283
499
    }
10284
4.78k
    xmlParseEncodingDecl(ctxt);
10285
10286
    /*
10287
     * We may have the standalone status.
10288
     */
10289
4.78k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10290
3.78k
        if ((RAW == '?') && (NXT(1) == '>')) {
10291
3.67k
      SKIP(2);
10292
3.67k
      return;
10293
3.67k
  }
10294
108
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10295
108
    }
10296
10297
    /*
10298
     * We can grow the input buffer freely at that point
10299
     */
10300
1.10k
    GROW;
10301
10302
1.10k
    SKIP_BLANKS;
10303
1.10k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10304
10305
1.10k
    SKIP_BLANKS;
10306
1.10k
    if ((RAW == '?') && (NXT(1) == '>')) {
10307
233
        SKIP(2);
10308
875
    } else if (RAW == '>') {
10309
        /* Deprecated old WD ... */
10310
14
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10311
14
  NEXT;
10312
861
    } else {
10313
861
        int c;
10314
10315
861
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10316
1.51M
        while ((PARSER_STOPPED(ctxt) == 0) &&
10317
1.51M
               ((c = CUR) != 0)) {
10318
1.51M
            NEXT;
10319
1.51M
            if (c == '>')
10320
167
                break;
10321
1.51M
        }
10322
861
    }
10323
1.10k
}
10324
10325
/**
10326
 * @since 2.14.0
10327
 *
10328
 * @param ctxt  parser context
10329
 * @returns the version from the XML declaration.
10330
 */
10331
const xmlChar *
10332
0
xmlCtxtGetVersion(xmlParserCtxt *ctxt) {
10333
0
    if (ctxt == NULL)
10334
0
        return(NULL);
10335
10336
0
    return(ctxt->version);
10337
0
}
10338
10339
/**
10340
 * @since 2.14.0
10341
 *
10342
 * @param ctxt  parser context
10343
 * @returns the value from the standalone document declaration.
10344
 */
10345
int
10346
0
xmlCtxtGetStandalone(xmlParserCtxt *ctxt) {
10347
0
    if (ctxt == NULL)
10348
0
        return(0);
10349
10350
0
    return(ctxt->standalone);
10351
0
}
10352
10353
/**
10354
 * Parse an XML Misc* optional field.
10355
 *
10356
 * @deprecated Internal function, don't use.
10357
 *
10358
 *     [27] Misc ::= Comment | PI |  S
10359
 * @param ctxt  an XML parser context
10360
 */
10361
10362
void
10363
0
xmlParseMisc(xmlParserCtxt *ctxt) {
10364
0
    while (PARSER_STOPPED(ctxt) == 0) {
10365
0
        SKIP_BLANKS;
10366
0
        GROW;
10367
0
        if ((RAW == '<') && (NXT(1) == '?')) {
10368
0
      xmlParsePI(ctxt);
10369
0
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10370
0
      xmlParseComment(ctxt);
10371
0
        } else {
10372
0
            break;
10373
0
        }
10374
0
    }
10375
0
}
10376
10377
static void
10378
3.96k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10379
3.96k
    xmlDocPtr doc;
10380
10381
    /*
10382
     * SAX: end of the document processing.
10383
     */
10384
3.96k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10385
3.96k
        ctxt->sax->endDocument(ctxt->userData);
10386
10387
    /*
10388
     * Remove locally kept entity definitions if the tree was not built
10389
     */
10390
3.96k
    doc = ctxt->myDoc;
10391
3.96k
    if ((doc != NULL) &&
10392
3.96k
        (xmlStrEqual(doc->version, SAX_COMPAT_MODE))) {
10393
0
        xmlFreeDoc(doc);
10394
0
        ctxt->myDoc = NULL;
10395
0
    }
10396
3.96k
}
10397
10398
/**
10399
 * Parse an XML document and invoke the SAX handlers. This is useful
10400
 * if you're only interested in custom SAX callbacks. If you want a
10401
 * document tree, use #xmlCtxtParseDocument.
10402
 *
10403
 * @param ctxt  an XML parser context
10404
 * @returns 0, -1 in case of error.
10405
 */
10406
10407
int
10408
0
xmlParseDocument(xmlParserCtxt *ctxt) {
10409
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10410
0
        return(-1);
10411
10412
0
    GROW;
10413
10414
    /*
10415
     * SAX: detecting the level.
10416
     */
10417
0
    xmlCtxtInitializeLate(ctxt);
10418
10419
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10420
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10421
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10422
0
    }
10423
10424
0
    xmlDetectEncoding(ctxt);
10425
10426
0
    if (CUR == 0) {
10427
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10428
0
  return(-1);
10429
0
    }
10430
10431
0
    GROW;
10432
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10433
10434
  /*
10435
   * Note that we will switch encoding on the fly.
10436
   */
10437
0
  xmlParseXMLDecl(ctxt);
10438
0
  SKIP_BLANKS;
10439
0
    } else {
10440
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10441
0
        if (ctxt->version == NULL) {
10442
0
            xmlErrMemory(ctxt);
10443
0
            return(-1);
10444
0
        }
10445
0
    }
10446
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10447
0
        ctxt->sax->startDocument(ctxt->userData);
10448
0
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10449
0
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10450
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10451
0
    }
10452
10453
    /*
10454
     * The Misc part of the Prolog
10455
     */
10456
0
    xmlParseMisc(ctxt);
10457
10458
    /*
10459
     * Then possibly doc type declaration(s) and more Misc
10460
     * (doctypedecl Misc*)?
10461
     */
10462
0
    GROW;
10463
0
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10464
10465
0
  ctxt->inSubset = 1;
10466
0
  xmlParseDocTypeDecl(ctxt);
10467
0
  if (RAW == '[') {
10468
0
      xmlParseInternalSubset(ctxt);
10469
0
  } else if (RAW == '>') {
10470
0
            NEXT;
10471
0
        }
10472
10473
  /*
10474
   * Create and update the external subset.
10475
   */
10476
0
  ctxt->inSubset = 2;
10477
0
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10478
0
      (!ctxt->disableSAX))
10479
0
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10480
0
                                ctxt->extSubSystem, ctxt->extSubURI);
10481
0
  ctxt->inSubset = 0;
10482
10483
0
        xmlCleanSpecialAttr(ctxt);
10484
10485
0
  xmlParseMisc(ctxt);
10486
0
    }
10487
10488
    /*
10489
     * Time to start parsing the tree itself
10490
     */
10491
0
    GROW;
10492
0
    if (RAW != '<') {
10493
0
        if (ctxt->wellFormed)
10494
0
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10495
0
                           "Start tag expected, '<' not found\n");
10496
0
    } else {
10497
0
  xmlParseElement(ctxt);
10498
10499
  /*
10500
   * The Misc part at the end
10501
   */
10502
0
  xmlParseMisc(ctxt);
10503
10504
0
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10505
0
    }
10506
10507
0
    ctxt->instate = XML_PARSER_EOF;
10508
0
    xmlFinishDocument(ctxt);
10509
10510
0
    if (! ctxt->wellFormed) {
10511
0
  ctxt->valid = 0;
10512
0
  return(-1);
10513
0
    }
10514
10515
0
    return(0);
10516
0
}
10517
10518
/**
10519
 * Parse a general parsed entity
10520
 * An external general parsed entity is well-formed if it matches the
10521
 * production labeled extParsedEnt.
10522
 *
10523
 * @deprecated Internal function, don't use.
10524
 *
10525
 *     [78] extParsedEnt ::= TextDecl? content
10526
 *
10527
 * @param ctxt  an XML parser context
10528
 * @returns 0, -1 in case of error. the parser context is augmented
10529
 *                as a result of the parsing.
10530
 */
10531
10532
int
10533
0
xmlParseExtParsedEnt(xmlParserCtxt *ctxt) {
10534
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10535
0
        return(-1);
10536
10537
0
    xmlCtxtInitializeLate(ctxt);
10538
10539
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10540
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10541
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10542
0
    }
10543
10544
0
    xmlDetectEncoding(ctxt);
10545
10546
0
    if (CUR == 0) {
10547
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10548
0
    }
10549
10550
    /*
10551
     * Check for the XMLDecl in the Prolog.
10552
     */
10553
0
    GROW;
10554
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10555
10556
  /*
10557
   * Note that we will switch encoding on the fly.
10558
   */
10559
0
  xmlParseXMLDecl(ctxt);
10560
0
  SKIP_BLANKS;
10561
0
    } else {
10562
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10563
0
    }
10564
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10565
0
        ctxt->sax->startDocument(ctxt->userData);
10566
10567
    /*
10568
     * Doing validity checking on chunk doesn't make sense
10569
     */
10570
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10571
0
    ctxt->validate = 0;
10572
0
    ctxt->depth = 0;
10573
10574
0
    xmlParseContentInternal(ctxt);
10575
10576
0
    if (ctxt->input->cur < ctxt->input->end)
10577
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10578
10579
    /*
10580
     * SAX: end of the document processing.
10581
     */
10582
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10583
0
        ctxt->sax->endDocument(ctxt->userData);
10584
10585
0
    if (! ctxt->wellFormed) return(-1);
10586
0
    return(0);
10587
0
}
10588
10589
#ifdef LIBXML_PUSH_ENABLED
10590
/************************************************************************
10591
 *                  *
10592
 *    Progressive parsing interfaces        *
10593
 *                  *
10594
 ************************************************************************/
10595
10596
/**
10597
 * Check whether the input buffer contains a character.
10598
 *
10599
 * @param ctxt  an XML parser context
10600
 * @param c  character
10601
 */
10602
static int
10603
169k
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10604
169k
    const xmlChar *cur;
10605
10606
169k
    if (ctxt->checkIndex == 0) {
10607
166k
        cur = ctxt->input->cur + 1;
10608
166k
    } else {
10609
3.25k
        cur = ctxt->input->cur + ctxt->checkIndex;
10610
3.25k
    }
10611
10612
169k
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10613
3.62k
        size_t index = ctxt->input->end - ctxt->input->cur;
10614
10615
3.62k
        if (index > LONG_MAX) {
10616
0
            ctxt->checkIndex = 0;
10617
0
            return(1);
10618
0
        }
10619
3.62k
        ctxt->checkIndex = index;
10620
3.62k
        return(0);
10621
166k
    } else {
10622
166k
        ctxt->checkIndex = 0;
10623
166k
        return(1);
10624
166k
    }
10625
169k
}
10626
10627
/**
10628
 * Check whether the input buffer contains a string.
10629
 *
10630
 * @param ctxt  an XML parser context
10631
 * @param startDelta  delta to apply at the start
10632
 * @param str  string
10633
 * @param strLen  length of string
10634
 */
10635
static const xmlChar *
10636
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10637
982k
                     const char *str, size_t strLen) {
10638
982k
    const xmlChar *cur, *term;
10639
10640
982k
    if (ctxt->checkIndex == 0) {
10641
934k
        cur = ctxt->input->cur + startDelta;
10642
934k
    } else {
10643
48.2k
        cur = ctxt->input->cur + ctxt->checkIndex;
10644
48.2k
    }
10645
10646
982k
    term = BAD_CAST strstr((const char *) cur, str);
10647
982k
    if (term == NULL) {
10648
51.8k
        const xmlChar *end = ctxt->input->end;
10649
51.8k
        size_t index;
10650
10651
        /* Rescan (strLen - 1) characters. */
10652
51.8k
        if ((size_t) (end - cur) < strLen)
10653
1.31k
            end = cur;
10654
50.4k
        else
10655
50.4k
            end -= strLen - 1;
10656
51.8k
        index = end - ctxt->input->cur;
10657
51.8k
        if (index > LONG_MAX) {
10658
0
            ctxt->checkIndex = 0;
10659
0
            return(ctxt->input->end - strLen);
10660
0
        }
10661
51.8k
        ctxt->checkIndex = index;
10662
931k
    } else {
10663
931k
        ctxt->checkIndex = 0;
10664
931k
    }
10665
10666
982k
    return(term);
10667
982k
}
10668
10669
/**
10670
 * Check whether the input buffer contains terminated char data.
10671
 *
10672
 * @param ctxt  an XML parser context
10673
 */
10674
static int
10675
147k
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10676
147k
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10677
147k
    const xmlChar *end = ctxt->input->end;
10678
147k
    size_t index;
10679
10680
2.39M
    while (cur < end) {
10681
2.38M
        if ((*cur == '<') || (*cur == '&')) {
10682
134k
            ctxt->checkIndex = 0;
10683
134k
            return(1);
10684
134k
        }
10685
2.24M
        cur++;
10686
2.24M
    }
10687
10688
12.4k
    index = cur - ctxt->input->cur;
10689
12.4k
    if (index > LONG_MAX) {
10690
0
        ctxt->checkIndex = 0;
10691
0
        return(1);
10692
0
    }
10693
12.4k
    ctxt->checkIndex = index;
10694
12.4k
    return(0);
10695
12.4k
}
10696
10697
/**
10698
 * Check whether there's enough data in the input buffer to finish parsing
10699
 * a start tag. This has to take quotes into account.
10700
 *
10701
 * @param ctxt  an XML parser context
10702
 */
10703
static int
10704
1.48M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10705
1.48M
    const xmlChar *cur;
10706
1.48M
    const xmlChar *end = ctxt->input->end;
10707
1.48M
    int state = ctxt->endCheckState;
10708
1.48M
    size_t index;
10709
10710
1.48M
    if (ctxt->checkIndex == 0)
10711
1.35M
        cur = ctxt->input->cur + 1;
10712
130k
    else
10713
130k
        cur = ctxt->input->cur + ctxt->checkIndex;
10714
10715
300M
    while (cur < end) {
10716
300M
        if (state) {
10717
265M
            if (*cur == state)
10718
593k
                state = 0;
10719
265M
        } else if (*cur == '\'' || *cur == '"') {
10720
596k
            state = *cur;
10721
34.7M
        } else if (*cur == '>') {
10722
1.34M
            ctxt->checkIndex = 0;
10723
1.34M
            ctxt->endCheckState = 0;
10724
1.34M
            return(1);
10725
1.34M
        }
10726
299M
        cur++;
10727
299M
    }
10728
10729
137k
    index = cur - ctxt->input->cur;
10730
137k
    if (index > LONG_MAX) {
10731
0
        ctxt->checkIndex = 0;
10732
0
        ctxt->endCheckState = 0;
10733
0
        return(1);
10734
0
    }
10735
137k
    ctxt->checkIndex = index;
10736
137k
    ctxt->endCheckState = state;
10737
137k
    return(0);
10738
137k
}
10739
10740
/**
10741
 * Check whether there's enough data in the input buffer to finish parsing
10742
 * the internal subset.
10743
 *
10744
 * @param ctxt  an XML parser context
10745
 */
10746
static int
10747
35.9k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10748
    /*
10749
     * Sorry, but progressive parsing of the internal subset is not
10750
     * supported. We first check that the full content of the internal
10751
     * subset is available and parsing is launched only at that point.
10752
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10753
     * not in a ']]>' sequence which are conditional sections.
10754
     */
10755
35.9k
    const xmlChar *cur, *start;
10756
35.9k
    const xmlChar *end = ctxt->input->end;
10757
35.9k
    int state = ctxt->endCheckState;
10758
35.9k
    size_t index;
10759
10760
35.9k
    if (ctxt->checkIndex == 0) {
10761
6.98k
        cur = ctxt->input->cur + 1;
10762
28.9k
    } else {
10763
28.9k
        cur = ctxt->input->cur + ctxt->checkIndex;
10764
28.9k
    }
10765
35.9k
    start = cur;
10766
10767
129M
    while (cur < end) {
10768
129M
        if (state == '-') {
10769
4.53M
            if ((*cur == '-') &&
10770
4.53M
                (cur[1] == '-') &&
10771
4.53M
                (cur[2] == '>')) {
10772
83.6k
                state = 0;
10773
83.6k
                cur += 3;
10774
83.6k
                start = cur;
10775
83.6k
                continue;
10776
83.6k
            }
10777
4.53M
        }
10778
124M
        else if (state == ']') {
10779
1.05M
            if (*cur == '>') {
10780
3.64k
                ctxt->checkIndex = 0;
10781
3.64k
                ctxt->endCheckState = 0;
10782
3.64k
                return(1);
10783
3.64k
            }
10784
1.05M
            if (IS_BLANK_CH(*cur)) {
10785
5.06k
                state = ' ';
10786
1.04M
            } else if (*cur != ']') {
10787
79.0k
                state = 0;
10788
79.0k
                start = cur;
10789
79.0k
                continue;
10790
79.0k
            }
10791
1.05M
        }
10792
123M
        else if (state == ' ') {
10793
118k
            if (*cur == '>') {
10794
38
                ctxt->checkIndex = 0;
10795
38
                ctxt->endCheckState = 0;
10796
38
                return(1);
10797
38
            }
10798
118k
            if (!IS_BLANK_CH(*cur)) {
10799
5.01k
                state = 0;
10800
5.01k
                start = cur;
10801
5.01k
                continue;
10802
5.01k
            }
10803
118k
        }
10804
123M
        else if (state != 0) {
10805
105M
            if (*cur == state) {
10806
215k
                state = 0;
10807
215k
                start = cur + 1;
10808
215k
            }
10809
105M
        }
10810
18.4M
        else if (*cur == '<') {
10811
315k
            if ((cur[1] == '!') &&
10812
315k
                (cur[2] == '-') &&
10813
315k
                (cur[3] == '-')) {
10814
83.8k
                state = '-';
10815
83.8k
                cur += 4;
10816
                /* Don't treat <!--> as comment */
10817
83.8k
                start = cur;
10818
83.8k
                continue;
10819
83.8k
            }
10820
315k
        }
10821
18.1M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10822
304k
            state = *cur;
10823
304k
        }
10824
10825
129M
        cur++;
10826
129M
    }
10827
10828
    /*
10829
     * Rescan the three last characters to detect "<!--" and "-->"
10830
     * split across chunks.
10831
     */
10832
32.2k
    if ((state == 0) || (state == '-')) {
10833
10.4k
        if (cur - start < 3)
10834
850
            cur = start;
10835
9.59k
        else
10836
9.59k
            cur -= 3;
10837
10.4k
    }
10838
32.2k
    index = cur - ctxt->input->cur;
10839
32.2k
    if (index > LONG_MAX) {
10840
0
        ctxt->checkIndex = 0;
10841
0
        ctxt->endCheckState = 0;
10842
0
        return(1);
10843
0
    }
10844
32.2k
    ctxt->checkIndex = index;
10845
32.2k
    ctxt->endCheckState = state;
10846
32.2k
    return(0);
10847
32.2k
}
10848
10849
/**
10850
 * Try to progress on parsing
10851
 *
10852
 * @param ctxt  an XML parser context
10853
 * @param terminate  last chunk indicator
10854
 * @returns zero if no parsing was possible
10855
 */
10856
static int
10857
286k
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10858
286k
    int ret = 0;
10859
286k
    size_t avail;
10860
286k
    xmlChar cur, next;
10861
10862
286k
    if (ctxt->input == NULL)
10863
0
        return(0);
10864
10865
286k
    if ((ctxt->input != NULL) &&
10866
286k
        (ctxt->input->cur - ctxt->input->base > 4096)) {
10867
22.8k
        xmlParserShrink(ctxt);
10868
22.8k
    }
10869
10870
5.20M
    while (ctxt->disableSAX == 0) {
10871
5.18M
        avail = ctxt->input->end - ctxt->input->cur;
10872
5.18M
        if (avail < 1)
10873
22.3k
      goto done;
10874
5.16M
        switch (ctxt->instate) {
10875
0
            case XML_PARSER_EOF:
10876
          /*
10877
     * Document parsing is done !
10878
     */
10879
0
          goto done;
10880
28.0k
            case XML_PARSER_START:
10881
                /*
10882
                 * Very first chars read from the document flow.
10883
                 */
10884
28.0k
                if ((!terminate) && (avail < 4))
10885
347
                    goto done;
10886
10887
                /*
10888
                 * We need more bytes to detect EBCDIC code pages.
10889
                 * See xmlDetectEBCDIC.
10890
                 */
10891
27.6k
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
10892
27.6k
                    (!terminate) && (avail < 200))
10893
239
                    goto done;
10894
10895
27.4k
                xmlDetectEncoding(ctxt);
10896
27.4k
                ctxt->instate = XML_PARSER_XML_DECL;
10897
27.4k
    break;
10898
10899
43.9k
            case XML_PARSER_XML_DECL:
10900
43.9k
    if ((!terminate) && (avail < 2))
10901
28
        goto done;
10902
43.8k
    cur = ctxt->input->cur[0];
10903
43.8k
    next = ctxt->input->cur[1];
10904
43.8k
          if ((cur == '<') && (next == '?')) {
10905
        /* PI or XML decl */
10906
23.7k
        if ((!terminate) &&
10907
23.7k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
10908
16.5k
      goto done;
10909
7.19k
        if ((ctxt->input->cur[2] == 'x') &&
10910
7.19k
      (ctxt->input->cur[3] == 'm') &&
10911
7.19k
      (ctxt->input->cur[4] == 'l') &&
10912
7.19k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
10913
4.90k
      ret += 5;
10914
4.90k
      xmlParseXMLDecl(ctxt);
10915
4.90k
        } else {
10916
2.29k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10917
2.29k
                        if (ctxt->version == NULL) {
10918
0
                            xmlErrMemory(ctxt);
10919
0
                            break;
10920
0
                        }
10921
2.29k
        }
10922
20.1k
    } else {
10923
20.1k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10924
20.1k
        if (ctxt->version == NULL) {
10925
0
            xmlErrMemory(ctxt);
10926
0
      break;
10927
0
        }
10928
20.1k
    }
10929
27.3k
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10930
27.3k
                    ctxt->sax->setDocumentLocator(ctxt->userData,
10931
27.3k
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
10932
27.3k
                }
10933
27.3k
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10934
27.3k
                    (!ctxt->disableSAX))
10935
26.3k
                    ctxt->sax->startDocument(ctxt->userData);
10936
27.3k
                ctxt->instate = XML_PARSER_MISC;
10937
27.3k
    break;
10938
1.46M
            case XML_PARSER_START_TAG: {
10939
1.46M
          const xmlChar *name;
10940
1.46M
    const xmlChar *prefix = NULL;
10941
1.46M
    const xmlChar *URI = NULL;
10942
1.46M
                int line = ctxt->input->line;
10943
1.46M
    int nbNs = 0;
10944
10945
1.46M
    if ((!terminate) && (avail < 2))
10946
50
        goto done;
10947
1.46M
    cur = ctxt->input->cur[0];
10948
1.46M
          if (cur != '<') {
10949
615
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10950
615
                                   "Start tag expected, '<' not found");
10951
615
                    ctxt->instate = XML_PARSER_EOF;
10952
615
                    xmlFinishDocument(ctxt);
10953
615
        goto done;
10954
615
    }
10955
1.46M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
10956
100k
                    goto done;
10957
1.36M
    if (ctxt->spaceNr == 0)
10958
0
        spacePush(ctxt, -1);
10959
1.36M
    else if (*ctxt->space == -2)
10960
640k
        spacePush(ctxt, -1);
10961
721k
    else
10962
721k
        spacePush(ctxt, *ctxt->space);
10963
1.36M
#ifdef LIBXML_SAX1_ENABLED
10964
1.36M
    if (ctxt->sax2)
10965
0
#endif /* LIBXML_SAX1_ENABLED */
10966
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10967
1.36M
#ifdef LIBXML_SAX1_ENABLED
10968
1.36M
    else
10969
1.36M
        name = xmlParseStartTag(ctxt);
10970
1.36M
#endif /* LIBXML_SAX1_ENABLED */
10971
1.36M
    if (name == NULL) {
10972
1.27k
        spacePop(ctxt);
10973
1.27k
                    ctxt->instate = XML_PARSER_EOF;
10974
1.27k
                    xmlFinishDocument(ctxt);
10975
1.27k
        goto done;
10976
1.27k
    }
10977
1.36M
#ifdef LIBXML_VALID_ENABLED
10978
    /*
10979
     * [ VC: Root Element Type ]
10980
     * The Name in the document type declaration must match
10981
     * the element type of the root element.
10982
     */
10983
1.36M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10984
1.36M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10985
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10986
1.36M
#endif /* LIBXML_VALID_ENABLED */
10987
10988
    /*
10989
     * Check for an Empty Element.
10990
     */
10991
1.36M
    if ((RAW == '/') && (NXT(1) == '>')) {
10992
237k
        SKIP(2);
10993
10994
237k
        if (ctxt->sax2) {
10995
0
      if ((ctxt->sax != NULL) &&
10996
0
          (ctxt->sax->endElementNs != NULL) &&
10997
0
          (!ctxt->disableSAX))
10998
0
          ctxt->sax->endElementNs(ctxt->userData, name,
10999
0
                                  prefix, URI);
11000
0
      if (nbNs > 0)
11001
0
          xmlParserNsPop(ctxt, nbNs);
11002
0
#ifdef LIBXML_SAX1_ENABLED
11003
237k
        } else {
11004
237k
      if ((ctxt->sax != NULL) &&
11005
237k
          (ctxt->sax->endElement != NULL) &&
11006
237k
          (!ctxt->disableSAX))
11007
237k
          ctxt->sax->endElement(ctxt->userData, name);
11008
237k
#endif /* LIBXML_SAX1_ENABLED */
11009
237k
        }
11010
237k
        spacePop(ctxt);
11011
1.12M
    } else if (RAW == '>') {
11012
1.11M
        NEXT;
11013
1.11M
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11014
1.11M
    } else {
11015
6.34k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11016
6.34k
           "Couldn't find end of Start Tag %s\n",
11017
6.34k
           name);
11018
6.34k
        nodePop(ctxt);
11019
6.34k
        spacePop(ctxt);
11020
6.34k
                    if (nbNs > 0)
11021
0
                        xmlParserNsPop(ctxt, nbNs);
11022
6.34k
    }
11023
11024
1.36M
                if (ctxt->nameNr == 0)
11025
3.95k
                    ctxt->instate = XML_PARSER_EPILOG;
11026
1.35M
                else
11027
1.35M
                    ctxt->instate = XML_PARSER_CONTENT;
11028
1.36M
                break;
11029
1.36M
      }
11030
3.11M
            case XML_PARSER_CONTENT: {
11031
3.11M
    cur = ctxt->input->cur[0];
11032
11033
3.11M
    if (cur == '<') {
11034
2.20M
                    if ((!terminate) && (avail < 2))
11035
1.17k
                        goto done;
11036
2.20M
        next = ctxt->input->cur[1];
11037
11038
2.20M
                    if (next == '/') {
11039
150k
                        ctxt->instate = XML_PARSER_END_TAG;
11040
150k
                        break;
11041
2.05M
                    } else if (next == '?') {
11042
334k
                        if ((!terminate) &&
11043
334k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11044
4.39k
                            goto done;
11045
329k
                        xmlParsePI(ctxt);
11046
329k
                        ctxt->instate = XML_PARSER_CONTENT;
11047
329k
                        break;
11048
1.71M
                    } else if (next == '!') {
11049
374k
                        if ((!terminate) && (avail < 3))
11050
190
                            goto done;
11051
374k
                        next = ctxt->input->cur[2];
11052
11053
374k
                        if (next == '-') {
11054
260k
                            if ((!terminate) && (avail < 4))
11055
167
                                goto done;
11056
260k
                            if (ctxt->input->cur[3] == '-') {
11057
260k
                                if ((!terminate) &&
11058
260k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11059
2.29k
                                    goto done;
11060
258k
                                xmlParseComment(ctxt);
11061
258k
                                ctxt->instate = XML_PARSER_CONTENT;
11062
258k
                                break;
11063
260k
                            }
11064
260k
                        } else if (next == '[') {
11065
113k
                            if ((!terminate) && (avail < 9))
11066
340
                                goto done;
11067
113k
                            if ((ctxt->input->cur[2] == '[') &&
11068
113k
                                (ctxt->input->cur[3] == 'C') &&
11069
113k
                                (ctxt->input->cur[4] == 'D') &&
11070
113k
                                (ctxt->input->cur[5] == 'A') &&
11071
113k
                                (ctxt->input->cur[6] == 'T') &&
11072
113k
                                (ctxt->input->cur[7] == 'A') &&
11073
113k
                                (ctxt->input->cur[8] == '[')) {
11074
113k
                                if ((!terminate) &&
11075
113k
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11076
14.9k
                                    goto done;
11077
98.3k
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11078
98.3k
                                xmlParseCDSect(ctxt);
11079
98.3k
                                ctxt->instate = XML_PARSER_CONTENT;
11080
98.3k
                                break;
11081
113k
                            }
11082
113k
                        }
11083
374k
                    }
11084
2.20M
    } else if (cur == '&') {
11085
19.3k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11086
1.76k
      goto done;
11087
17.6k
        xmlParseReference(ctxt);
11088
17.6k
                    break;
11089
886k
    } else {
11090
        /* TODO Avoid the extra copy, handle directly !!! */
11091
        /*
11092
         * Goal of the following test is:
11093
         *  - minimize calls to the SAX 'character' callback
11094
         *    when they are mergeable
11095
         *  - handle an problem for isBlank when we only parse
11096
         *    a sequence of blank chars and the next one is
11097
         *    not available to check against '<' presence.
11098
         *  - tries to homogenize the differences in SAX
11099
         *    callbacks between the push and pull versions
11100
         *    of the parser.
11101
         */
11102
886k
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11103
149k
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11104
12.4k
          goto done;
11105
149k
                    }
11106
873k
                    ctxt->checkIndex = 0;
11107
873k
        xmlParseCharDataInternal(ctxt, !terminate);
11108
873k
                    break;
11109
886k
    }
11110
11111
1.34M
                ctxt->instate = XML_PARSER_START_TAG;
11112
1.34M
    break;
11113
3.11M
      }
11114
151k
            case XML_PARSER_END_TAG:
11115
151k
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11116
1.86k
        goto done;
11117
149k
    if (ctxt->sax2) {
11118
0
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11119
0
        nameNsPop(ctxt);
11120
0
    }
11121
149k
#ifdef LIBXML_SAX1_ENABLED
11122
149k
      else
11123
149k
        xmlParseEndTag1(ctxt, 0);
11124
149k
#endif /* LIBXML_SAX1_ENABLED */
11125
149k
    if (ctxt->nameNr == 0) {
11126
1.92k
        ctxt->instate = XML_PARSER_EPILOG;
11127
148k
    } else {
11128
148k
        ctxt->instate = XML_PARSER_CONTENT;
11129
148k
    }
11130
149k
    break;
11131
314k
            case XML_PARSER_MISC:
11132
325k
            case XML_PARSER_PROLOG:
11133
329k
            case XML_PARSER_EPILOG:
11134
329k
    SKIP_BLANKS;
11135
329k
                avail = ctxt->input->end - ctxt->input->cur;
11136
329k
    if (avail < 1)
11137
673
        goto done;
11138
328k
    if (ctxt->input->cur[0] == '<') {
11139
328k
                    if ((!terminate) && (avail < 2))
11140
311
                        goto done;
11141
327k
                    next = ctxt->input->cur[1];
11142
327k
                    if (next == '?') {
11143
218k
                        if ((!terminate) &&
11144
218k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11145
6.14k
                            goto done;
11146
212k
                        xmlParsePI(ctxt);
11147
212k
                        break;
11148
218k
                    } else if (next == '!') {
11149
92.7k
                        if ((!terminate) && (avail < 3))
11150
47
                            goto done;
11151
11152
92.7k
                        if (ctxt->input->cur[2] == '-') {
11153
45.6k
                            if ((!terminate) && (avail < 4))
11154
49
                                goto done;
11155
45.6k
                            if (ctxt->input->cur[3] == '-') {
11156
45.5k
                                if ((!terminate) &&
11157
45.5k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11158
7.55k
                                    goto done;
11159
38.0k
                                xmlParseComment(ctxt);
11160
38.0k
                                break;
11161
45.5k
                            }
11162
47.0k
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11163
47.0k
                            if ((!terminate) && (avail < 9))
11164
33
                                goto done;
11165
47.0k
                            if ((ctxt->input->cur[2] == 'D') &&
11166
47.0k
                                (ctxt->input->cur[3] == 'O') &&
11167
47.0k
                                (ctxt->input->cur[4] == 'C') &&
11168
47.0k
                                (ctxt->input->cur[5] == 'T') &&
11169
47.0k
                                (ctxt->input->cur[6] == 'Y') &&
11170
47.0k
                                (ctxt->input->cur[7] == 'P') &&
11171
47.0k
                                (ctxt->input->cur[8] == 'E')) {
11172
46.9k
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11173
37.6k
                                    goto done;
11174
9.29k
                                ctxt->inSubset = 1;
11175
9.29k
                                xmlParseDocTypeDecl(ctxt);
11176
9.29k
                                if (RAW == '[') {
11177
8.60k
                                    ctxt->instate = XML_PARSER_DTD;
11178
8.60k
                                } else {
11179
689
                                    if (RAW == '>')
11180
509
                                        NEXT;
11181
                                    /*
11182
                                     * Create and update the external subset.
11183
                                     */
11184
689
                                    ctxt->inSubset = 2;
11185
689
                                    if ((ctxt->sax != NULL) &&
11186
689
                                        (!ctxt->disableSAX) &&
11187
689
                                        (ctxt->sax->externalSubset != NULL))
11188
501
                                        ctxt->sax->externalSubset(
11189
501
                                                ctxt->userData,
11190
501
                                                ctxt->intSubName,
11191
501
                                                ctxt->extSubSystem,
11192
501
                                                ctxt->extSubURI);
11193
689
                                    ctxt->inSubset = 0;
11194
689
                                    xmlCleanSpecialAttr(ctxt);
11195
689
                                    ctxt->instate = XML_PARSER_PROLOG;
11196
689
                                }
11197
9.29k
                                break;
11198
46.9k
                            }
11199
47.0k
                        }
11200
92.7k
                    }
11201
327k
                }
11202
11203
17.5k
                if (ctxt->instate == XML_PARSER_EPILOG) {
11204
139
                    if (ctxt->errNo == XML_ERR_OK)
11205
139
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11206
139
        ctxt->instate = XML_PARSER_EOF;
11207
139
                    xmlFinishDocument(ctxt);
11208
17.3k
                } else {
11209
17.3k
        ctxt->instate = XML_PARSER_START_TAG;
11210
17.3k
    }
11211
17.5k
    break;
11212
40.8k
            case XML_PARSER_DTD: {
11213
40.8k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11214
32.2k
                    goto done;
11215
8.58k
    xmlParseInternalSubset(ctxt);
11216
8.58k
    ctxt->inSubset = 2;
11217
8.58k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11218
8.58k
        (ctxt->sax->externalSubset != NULL))
11219
2.25k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11220
2.25k
          ctxt->extSubSystem, ctxt->extSubURI);
11221
8.58k
    ctxt->inSubset = 0;
11222
8.58k
    xmlCleanSpecialAttr(ctxt);
11223
8.58k
    ctxt->instate = XML_PARSER_PROLOG;
11224
8.58k
                break;
11225
40.8k
      }
11226
0
            default:
11227
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11228
0
      "PP: internal error\n");
11229
0
    ctxt->instate = XML_PARSER_EOF;
11230
0
    break;
11231
5.16M
  }
11232
5.16M
    }
11233
286k
done:
11234
286k
    return(ret);
11235
286k
}
11236
11237
/**
11238
 * Parse a chunk of memory in push parser mode.
11239
 *
11240
 * Assumes that the parser context was initialized with
11241
 * #xmlCreatePushParserCtxt.
11242
 *
11243
 * The last chunk, which will often be empty, must be marked with
11244
 * the `terminate` flag. With the default SAX callbacks, the resulting
11245
 * document will be available in ctxt->myDoc. This pointer will not
11246
 * be freed when calling #xmlFreeParserCtxt and must be freed by the
11247
 * caller. If the document isn't well-formed, it will still be returned
11248
 * in ctxt->myDoc.
11249
 *
11250
 * As an exception, #xmlCtxtResetPush will free the document in
11251
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11252
 * the document.
11253
 *
11254
 * Since 2.14.0, #xmlCtxtGetDocument can be used to retrieve the
11255
 * result document.
11256
 *
11257
 * @param ctxt  an XML parser context
11258
 * @param chunk  chunk of memory
11259
 * @param size  size of chunk in bytes
11260
 * @param terminate  last chunk indicator
11261
 * @returns an xmlParserErrors code (0 on success).
11262
 */
11263
int
11264
xmlParseChunk(xmlParserCtxt *ctxt, const char *chunk, int size,
11265
293k
              int terminate) {
11266
293k
    size_t curBase;
11267
293k
    size_t maxLength;
11268
293k
    size_t pos;
11269
293k
    int end_in_lf = 0;
11270
293k
    int res;
11271
11272
293k
    if ((ctxt == NULL) || (size < 0))
11273
0
        return(XML_ERR_ARGUMENT);
11274
293k
    if ((chunk == NULL) && (size > 0))
11275
0
        return(XML_ERR_ARGUMENT);
11276
293k
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11277
0
        return(XML_ERR_ARGUMENT);
11278
293k
    if (ctxt->disableSAX != 0)
11279
6.27k
        return(ctxt->errNo);
11280
11281
286k
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11282
286k
    if (ctxt->instate == XML_PARSER_START)
11283
28.0k
        xmlCtxtInitializeLate(ctxt);
11284
286k
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11285
286k
        (chunk[size - 1] == '\r')) {
11286
7.75k
  end_in_lf = 1;
11287
7.75k
  size--;
11288
7.75k
    }
11289
11290
    /*
11291
     * Also push an empty chunk to make sure that the raw buffer
11292
     * will be flushed if there is an encoder.
11293
     */
11294
286k
    pos = ctxt->input->cur - ctxt->input->base;
11295
286k
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11296
286k
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11297
286k
    if (res < 0) {
11298
149
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11299
149
        xmlHaltParser(ctxt);
11300
149
        return(ctxt->errNo);
11301
149
    }
11302
11303
286k
    xmlParseTryOrFinish(ctxt, terminate);
11304
11305
286k
    curBase = ctxt->input->cur - ctxt->input->base;
11306
286k
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11307
0
                XML_MAX_HUGE_LENGTH :
11308
286k
                XML_MAX_LOOKUP_LIMIT;
11309
286k
    if (curBase > maxLength) {
11310
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11311
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11312
0
        xmlHaltParser(ctxt);
11313
0
    }
11314
11315
286k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11316
22.5k
        return(ctxt->errNo);
11317
11318
264k
    if (end_in_lf == 1) {
11319
7.54k
  pos = ctxt->input->cur - ctxt->input->base;
11320
7.54k
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11321
7.54k
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11322
7.54k
        if (res < 0) {
11323
2
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11324
2
            xmlHaltParser(ctxt);
11325
2
            return(ctxt->errNo);
11326
2
        }
11327
7.54k
    }
11328
264k
    if (terminate) {
11329
  /*
11330
   * Check for termination
11331
   */
11332
4.74k
        if ((ctxt->instate != XML_PARSER_EOF) &&
11333
4.74k
            (ctxt->instate != XML_PARSER_EPILOG)) {
11334
2.80k
            if (ctxt->nameNr > 0) {
11335
2.66k
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11336
2.66k
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11337
2.66k
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11338
2.66k
                        "Premature end of data in tag %s line %d\n",
11339
2.66k
                        name, line, NULL);
11340
2.66k
            } else if (ctxt->instate == XML_PARSER_START) {
11341
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11342
134
            } else {
11343
134
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11344
134
                               "Start tag expected, '<' not found\n");
11345
134
            }
11346
2.80k
        } else {
11347
1.94k
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11348
1.94k
        }
11349
4.74k
  if (ctxt->instate != XML_PARSER_EOF) {
11350
1.94k
            ctxt->instate = XML_PARSER_EOF;
11351
1.94k
            xmlFinishDocument(ctxt);
11352
1.94k
  }
11353
4.74k
    }
11354
264k
    if (ctxt->wellFormed == 0)
11355
2.80k
  return((xmlParserErrors) ctxt->errNo);
11356
261k
    else
11357
261k
        return(0);
11358
264k
}
11359
11360
/************************************************************************
11361
 *                  *
11362
 *    I/O front end functions to the parser     *
11363
 *                  *
11364
 ************************************************************************/
11365
11366
/**
11367
 * Create a parser context for using the XML parser in push mode.
11368
 * See #xmlParseChunk.
11369
 *
11370
 * Passing an initial chunk is useless and deprecated.
11371
 *
11372
 * The push parser doesn't support recovery mode or the
11373
 * XML_PARSE_NOBLANKS option.
11374
 *
11375
 * `filename` is used as base URI to fetch external entities and for
11376
 * error reports.
11377
 *
11378
 * @param sax  a SAX handler (optional)
11379
 * @param user_data  user data for SAX callbacks (optional)
11380
 * @param chunk  initial chunk (optional, deprecated)
11381
 * @param size  size of initial chunk in bytes
11382
 * @param filename  file name or URI (optional)
11383
 * @returns the new parser context or NULL if a memory allocation
11384
 * failed.
11385
 */
11386
11387
xmlParserCtxt *
11388
xmlCreatePushParserCtxt(xmlSAXHandler *sax, void *user_data,
11389
27.4k
                        const char *chunk, int size, const char *filename) {
11390
27.4k
    xmlParserCtxtPtr ctxt;
11391
27.4k
    xmlParserInputPtr input;
11392
11393
27.4k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11394
27.4k
    if (ctxt == NULL)
11395
0
  return(NULL);
11396
11397
27.4k
    ctxt->options &= ~XML_PARSE_NODICT;
11398
27.4k
    ctxt->dictNames = 1;
11399
11400
27.4k
    input = xmlNewPushInput(filename, chunk, size);
11401
27.4k
    if (input == NULL) {
11402
0
  xmlFreeParserCtxt(ctxt);
11403
0
  return(NULL);
11404
0
    }
11405
27.4k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11406
0
        xmlFreeInputStream(input);
11407
0
        xmlFreeParserCtxt(ctxt);
11408
0
        return(NULL);
11409
0
    }
11410
11411
27.4k
    return(ctxt);
11412
27.4k
}
11413
#endif /* LIBXML_PUSH_ENABLED */
11414
11415
/**
11416
 * Blocks further parser processing
11417
 *
11418
 * @param ctxt  an XML parser context
11419
 */
11420
void
11421
294k
xmlStopParser(xmlParserCtxt *ctxt) {
11422
294k
    if (ctxt == NULL)
11423
0
        return;
11424
294k
    xmlHaltParser(ctxt);
11425
    /*
11426
     * TODO: Update ctxt->lastError and ctxt->wellFormed?
11427
     */
11428
294k
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11429
294k
        ctxt->errNo = XML_ERR_USER_STOP;
11430
294k
}
11431
11432
/**
11433
 * Create a parser context for using the XML parser with an existing
11434
 * I/O stream
11435
 *
11436
 * @param sax  a SAX handler (optional)
11437
 * @param user_data  user data for SAX callbacks (optional)
11438
 * @param ioread  an I/O read function
11439
 * @param ioclose  an I/O close function (optional)
11440
 * @param ioctx  an I/O handler
11441
 * @param enc  the charset encoding if known (deprecated)
11442
 * @returns the new parser context or NULL
11443
 */
11444
xmlParserCtxt *
11445
xmlCreateIOParserCtxt(xmlSAXHandler *sax, void *user_data,
11446
                      xmlInputReadCallback ioread,
11447
                      xmlInputCloseCallback ioclose,
11448
0
                      void *ioctx, xmlCharEncoding enc) {
11449
0
    xmlParserCtxtPtr ctxt;
11450
0
    xmlParserInputPtr input;
11451
0
    const char *encoding;
11452
11453
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11454
0
    if (ctxt == NULL)
11455
0
  return(NULL);
11456
11457
0
    encoding = xmlGetCharEncodingName(enc);
11458
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11459
0
                                  encoding, 0);
11460
0
    if (input == NULL) {
11461
0
  xmlFreeParserCtxt(ctxt);
11462
0
        return (NULL);
11463
0
    }
11464
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11465
0
        xmlFreeInputStream(input);
11466
0
        xmlFreeParserCtxt(ctxt);
11467
0
        return(NULL);
11468
0
    }
11469
11470
0
    return(ctxt);
11471
0
}
11472
11473
#ifdef LIBXML_VALID_ENABLED
11474
/************************************************************************
11475
 *                  *
11476
 *    Front ends when parsing a DTD       *
11477
 *                  *
11478
 ************************************************************************/
11479
11480
/**
11481
 * Parse a DTD.
11482
 *
11483
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11484
 * to make external entities work.
11485
 *
11486
 * @since 2.14.0
11487
 *
11488
 * @param ctxt  a parser context
11489
 * @param input  a parser input
11490
 * @param publicId  public ID of the DTD (optional)
11491
 * @param systemId  system ID of the DTD (optional)
11492
 * @returns the resulting xmlDtd or NULL in case of error.
11493
 * `input` will be freed by the function in any case.
11494
 */
11495
xmlDtd *
11496
xmlCtxtParseDtd(xmlParserCtxt *ctxt, xmlParserInput *input,
11497
0
                const xmlChar *publicId, const xmlChar *systemId) {
11498
0
    xmlDtdPtr ret = NULL;
11499
11500
0
    if ((ctxt == NULL) || (input == NULL)) {
11501
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11502
0
        xmlFreeInputStream(input);
11503
0
        return(NULL);
11504
0
    }
11505
11506
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11507
0
        xmlFreeInputStream(input);
11508
0
        return(NULL);
11509
0
    }
11510
11511
0
    if (publicId == NULL)
11512
0
        publicId = BAD_CAST "none";
11513
0
    if (systemId == NULL)
11514
0
        systemId = BAD_CAST "none";
11515
11516
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11517
0
    if (ctxt->myDoc == NULL) {
11518
0
        xmlErrMemory(ctxt);
11519
0
        goto error;
11520
0
    }
11521
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11522
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11523
0
                                       publicId, systemId);
11524
0
    if (ctxt->myDoc->extSubset == NULL) {
11525
0
        xmlErrMemory(ctxt);
11526
0
        xmlFreeDoc(ctxt->myDoc);
11527
0
        goto error;
11528
0
    }
11529
11530
0
    xmlParseExternalSubset(ctxt, publicId, systemId);
11531
11532
0
    if (ctxt->wellFormed) {
11533
0
        ret = ctxt->myDoc->extSubset;
11534
0
        ctxt->myDoc->extSubset = NULL;
11535
0
        if (ret != NULL) {
11536
0
            xmlNodePtr tmp;
11537
11538
0
            ret->doc = NULL;
11539
0
            tmp = ret->children;
11540
0
            while (tmp != NULL) {
11541
0
                tmp->doc = NULL;
11542
0
                tmp = tmp->next;
11543
0
            }
11544
0
        }
11545
0
    } else {
11546
0
        ret = NULL;
11547
0
    }
11548
0
    xmlFreeDoc(ctxt->myDoc);
11549
0
    ctxt->myDoc = NULL;
11550
11551
0
error:
11552
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11553
11554
0
    return(ret);
11555
0
}
11556
11557
/**
11558
 * Load and parse a DTD
11559
 *
11560
 * @deprecated Use #xmlCtxtParseDtd.
11561
 *
11562
 * @param sax  the SAX handler block or NULL
11563
 * @param input  an Input Buffer
11564
 * @param enc  the charset encoding if known
11565
 * @returns the resulting xmlDtd or NULL in case of error.
11566
 * `input` will be freed by the function in any case.
11567
 */
11568
11569
xmlDtd *
11570
xmlIOParseDTD(xmlSAXHandler *sax, xmlParserInputBuffer *input,
11571
0
        xmlCharEncoding enc) {
11572
0
    xmlDtdPtr ret = NULL;
11573
0
    xmlParserCtxtPtr ctxt;
11574
0
    xmlParserInputPtr pinput = NULL;
11575
11576
0
    if (input == NULL)
11577
0
  return(NULL);
11578
11579
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11580
0
    if (ctxt == NULL) {
11581
0
        xmlFreeParserInputBuffer(input);
11582
0
  return(NULL);
11583
0
    }
11584
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11585
11586
    /*
11587
     * generate a parser input from the I/O handler
11588
     */
11589
11590
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11591
0
    if (pinput == NULL) {
11592
0
        xmlFreeParserInputBuffer(input);
11593
0
  xmlFreeParserCtxt(ctxt);
11594
0
  return(NULL);
11595
0
    }
11596
11597
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11598
0
        xmlSwitchEncoding(ctxt, enc);
11599
0
    }
11600
11601
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11602
11603
0
    xmlFreeParserCtxt(ctxt);
11604
0
    return(ret);
11605
0
}
11606
11607
/**
11608
 * Load and parse an external subset.
11609
 *
11610
 * @deprecated Use #xmlCtxtParseDtd.
11611
 *
11612
 * @param sax  the SAX handler block
11613
 * @param publicId  public identifier of the DTD (optional)
11614
 * @param systemId  system identifier (URL) of the DTD
11615
 * @returns the resulting xmlDtd or NULL in case of error.
11616
 */
11617
11618
xmlDtd *
11619
xmlSAXParseDTD(xmlSAXHandler *sax, const xmlChar *publicId,
11620
0
               const xmlChar *systemId) {
11621
0
    xmlDtdPtr ret = NULL;
11622
0
    xmlParserCtxtPtr ctxt;
11623
0
    xmlParserInputPtr input = NULL;
11624
0
    xmlChar* systemIdCanonic;
11625
11626
0
    if ((publicId == NULL) && (systemId == NULL)) return(NULL);
11627
11628
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11629
0
    if (ctxt == NULL) {
11630
0
  return(NULL);
11631
0
    }
11632
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11633
11634
    /*
11635
     * Canonicalise the system ID
11636
     */
11637
0
    systemIdCanonic = xmlCanonicPath(systemId);
11638
0
    if ((systemId != NULL) && (systemIdCanonic == NULL)) {
11639
0
  xmlFreeParserCtxt(ctxt);
11640
0
  return(NULL);
11641
0
    }
11642
11643
    /*
11644
     * Ask the Entity resolver to load the damn thing
11645
     */
11646
11647
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11648
0
  input = ctxt->sax->resolveEntity(ctxt->userData, publicId,
11649
0
                                   systemIdCanonic);
11650
0
    if (input == NULL) {
11651
0
  xmlFreeParserCtxt(ctxt);
11652
0
  if (systemIdCanonic != NULL)
11653
0
      xmlFree(systemIdCanonic);
11654
0
  return(NULL);
11655
0
    }
11656
11657
0
    if (input->filename == NULL)
11658
0
  input->filename = (char *) systemIdCanonic;
11659
0
    else
11660
0
  xmlFree(systemIdCanonic);
11661
11662
0
    ret = xmlCtxtParseDtd(ctxt, input, publicId, systemId);
11663
11664
0
    xmlFreeParserCtxt(ctxt);
11665
0
    return(ret);
11666
0
}
11667
11668
11669
/**
11670
 * Load and parse an external subset.
11671
 *
11672
 * @param publicId  public identifier of the DTD (optional)
11673
 * @param systemId  system identifier (URL) of the DTD
11674
 * @returns the resulting xmlDtd or NULL in case of error.
11675
 */
11676
11677
xmlDtd *
11678
0
xmlParseDTD(const xmlChar *publicId, const xmlChar *systemId) {
11679
0
    return(xmlSAXParseDTD(NULL, publicId, systemId));
11680
0
}
11681
#endif /* LIBXML_VALID_ENABLED */
11682
11683
/************************************************************************
11684
 *                  *
11685
 *    Front ends when parsing an Entity     *
11686
 *                  *
11687
 ************************************************************************/
11688
11689
static xmlNodePtr
11690
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11691
17.7k
                            int hasTextDecl, int buildTree) {
11692
17.7k
    xmlNodePtr root = NULL;
11693
17.7k
    xmlNodePtr list = NULL;
11694
17.7k
    xmlChar *rootName = BAD_CAST "#root";
11695
17.7k
    int result;
11696
11697
17.7k
    if (buildTree) {
11698
0
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11699
0
        if (root == NULL) {
11700
0
            xmlErrMemory(ctxt);
11701
0
            goto error;
11702
0
        }
11703
0
    }
11704
11705
17.7k
    if (xmlCtxtPushInput(ctxt, input) < 0)
11706
0
        goto error;
11707
11708
17.7k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11709
17.7k
    spacePush(ctxt, -1);
11710
11711
17.7k
    if (buildTree)
11712
0
        nodePush(ctxt, root);
11713
11714
17.7k
    if (hasTextDecl) {
11715
0
        xmlDetectEncoding(ctxt);
11716
11717
        /*
11718
         * Parse a possible text declaration first
11719
         */
11720
0
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11721
0
            (IS_BLANK_CH(NXT(5)))) {
11722
0
            xmlParseTextDecl(ctxt);
11723
            /*
11724
             * An XML-1.0 document can't reference an entity not XML-1.0
11725
             */
11726
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11727
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11728
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11729
0
                               "Version mismatch between document and "
11730
0
                               "entity\n");
11731
0
            }
11732
0
        }
11733
0
    }
11734
11735
17.7k
    xmlParseContentInternal(ctxt);
11736
11737
17.7k
    if (ctxt->input->cur < ctxt->input->end)
11738
105
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11739
11740
17.7k
    if ((ctxt->wellFormed) ||
11741
17.7k
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
11742
16.6k
        if (root != NULL) {
11743
0
            xmlNodePtr cur;
11744
11745
            /*
11746
             * Unlink newly created node list.
11747
             */
11748
0
            list = root->children;
11749
0
            root->children = NULL;
11750
0
            root->last = NULL;
11751
0
            for (cur = list; cur != NULL; cur = cur->next)
11752
0
                cur->parent = NULL;
11753
0
        }
11754
16.6k
    }
11755
11756
    /*
11757
     * Read the rest of the stream in case of errors. We want
11758
     * to account for the whole entity size.
11759
     */
11760
17.7k
    do {
11761
17.7k
        ctxt->input->cur = ctxt->input->end;
11762
17.7k
        xmlParserShrink(ctxt);
11763
17.7k
        result = xmlParserGrow(ctxt);
11764
17.7k
    } while (result > 0);
11765
11766
17.7k
    if (buildTree)
11767
0
        nodePop(ctxt);
11768
11769
17.7k
    namePop(ctxt);
11770
17.7k
    spacePop(ctxt);
11771
11772
17.7k
    xmlCtxtPopInput(ctxt);
11773
11774
17.7k
error:
11775
17.7k
    xmlFreeNode(root);
11776
11777
17.7k
    return(list);
11778
17.7k
}
11779
11780
static void
11781
17.7k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11782
17.7k
    xmlParserInputPtr input;
11783
17.7k
    xmlNodePtr list;
11784
17.7k
    unsigned long consumed;
11785
17.7k
    int isExternal;
11786
17.7k
    int buildTree;
11787
17.7k
    int oldMinNsIndex;
11788
17.7k
    int oldNodelen, oldNodemem;
11789
11790
17.7k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11791
17.7k
    buildTree = (ctxt->node != NULL);
11792
11793
    /*
11794
     * Recursion check
11795
     */
11796
17.7k
    if (ent->flags & XML_ENT_EXPANDING) {
11797
19
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11798
19
        xmlHaltParser(ctxt);
11799
19
        goto error;
11800
19
    }
11801
11802
    /*
11803
     * Load entity
11804
     */
11805
17.7k
    input = xmlNewEntityInputStream(ctxt, ent);
11806
17.7k
    if (input == NULL)
11807
0
        goto error;
11808
11809
    /*
11810
     * When building a tree, we need to limit the scope of namespace
11811
     * declarations, so that entities don't reference xmlNs structs
11812
     * from the parent of a reference.
11813
     */
11814
17.7k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
11815
17.7k
    if (buildTree)
11816
0
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
11817
11818
17.7k
    oldNodelen = ctxt->nodelen;
11819
17.7k
    oldNodemem = ctxt->nodemem;
11820
17.7k
    ctxt->nodelen = 0;
11821
17.7k
    ctxt->nodemem = 0;
11822
11823
    /*
11824
     * Parse content
11825
     *
11826
     * This initiates a recursive call chain:
11827
     *
11828
     * - xmlCtxtParseContentInternal
11829
     * - xmlParseContentInternal
11830
     * - xmlParseReference
11831
     * - xmlCtxtParseEntity
11832
     *
11833
     * The nesting depth is limited by the maximum number of inputs,
11834
     * see xmlCtxtPushInput.
11835
     *
11836
     * It's possible to make this non-recursive (minNsIndex must be
11837
     * stored in the input struct) at the expense of code readability.
11838
     */
11839
11840
17.7k
    ent->flags |= XML_ENT_EXPANDING;
11841
11842
17.7k
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
11843
11844
17.7k
    ent->flags &= ~XML_ENT_EXPANDING;
11845
11846
17.7k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
11847
17.7k
    ctxt->nodelen = oldNodelen;
11848
17.7k
    ctxt->nodemem = oldNodemem;
11849
11850
    /*
11851
     * Entity size accounting
11852
     */
11853
17.7k
    consumed = input->consumed;
11854
17.7k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
11855
11856
17.7k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
11857
1.47k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
11858
11859
17.7k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
11860
1.53k
        if (isExternal)
11861
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
11862
11863
1.53k
        ent->children = list;
11864
11865
1.53k
        while (list != NULL) {
11866
0
            list->parent = (xmlNodePtr) ent;
11867
11868
            /*
11869
             * Downstream code like the nginx xslt module can set
11870
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
11871
             * might have a different or a NULL document.
11872
             */
11873
0
            if (list->doc != ent->doc)
11874
0
                xmlSetTreeDoc(list, ent->doc);
11875
11876
0
            if (list->next == NULL)
11877
0
                ent->last = list;
11878
0
            list = list->next;
11879
0
        }
11880
16.1k
    } else {
11881
16.1k
        xmlFreeNodeList(list);
11882
16.1k
    }
11883
11884
17.7k
    xmlFreeInputStream(input);
11885
11886
17.7k
error:
11887
17.7k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
11888
17.7k
}
11889
11890
/**
11891
 * Parse an external general entity within an existing parsing context
11892
 * An external general parsed entity is well-formed if it matches the
11893
 * production labeled extParsedEnt.
11894
 *
11895
 *     [78] extParsedEnt ::= TextDecl? content
11896
 *
11897
 * @param ctxt  the existing parsing context
11898
 * @param URL  the URL for the entity to load
11899
 * @param ID  the System ID for the entity to load
11900
 * @param listOut  the return value for the set of parsed nodes
11901
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11902
 *    the parser error code otherwise
11903
 */
11904
11905
int
11906
xmlParseCtxtExternalEntity(xmlParserCtxt *ctxt, const xmlChar *URL,
11907
0
                           const xmlChar *ID, xmlNode **listOut) {
11908
0
    xmlParserInputPtr input;
11909
0
    xmlNodePtr list;
11910
11911
0
    if (listOut != NULL)
11912
0
        *listOut = NULL;
11913
11914
0
    if (ctxt == NULL)
11915
0
        return(XML_ERR_ARGUMENT);
11916
11917
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
11918
0
                            XML_RESOURCE_GENERAL_ENTITY);
11919
0
    if (input == NULL)
11920
0
        return(ctxt->errNo);
11921
11922
0
    xmlCtxtInitializeLate(ctxt);
11923
11924
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
11925
0
    if (listOut != NULL)
11926
0
        *listOut = list;
11927
0
    else
11928
0
        xmlFreeNodeList(list);
11929
11930
0
    xmlFreeInputStream(input);
11931
0
    return(ctxt->errNo);
11932
0
}
11933
11934
#ifdef LIBXML_SAX1_ENABLED
11935
/**
11936
 * Parse an external general entity
11937
 * An external general parsed entity is well-formed if it matches the
11938
 * production labeled extParsedEnt.
11939
 *
11940
 * @deprecated Use #xmlParseCtxtExternalEntity.
11941
 *
11942
 *     [78] extParsedEnt ::= TextDecl? content
11943
 *
11944
 * @param doc  the document the chunk pertains to
11945
 * @param sax  the SAX handler block (possibly NULL)
11946
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11947
 * @param depth  Used for loop detection, use 0
11948
 * @param URL  the URL for the entity to load
11949
 * @param ID  the System ID for the entity to load
11950
 * @param list  the return value for the set of parsed nodes
11951
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11952
 *    the parser error code otherwise
11953
 */
11954
11955
int
11956
xmlParseExternalEntity(xmlDoc *doc, xmlSAXHandler *sax, void *user_data,
11957
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNode **list) {
11958
0
    xmlParserCtxtPtr ctxt;
11959
0
    int ret;
11960
11961
0
    if (list != NULL)
11962
0
        *list = NULL;
11963
11964
0
    if (doc == NULL)
11965
0
        return(XML_ERR_ARGUMENT);
11966
11967
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11968
0
    if (ctxt == NULL)
11969
0
        return(XML_ERR_NO_MEMORY);
11970
11971
0
    ctxt->depth = depth;
11972
0
    ctxt->myDoc = doc;
11973
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
11974
11975
0
    xmlFreeParserCtxt(ctxt);
11976
0
    return(ret);
11977
0
}
11978
11979
/**
11980
 * Parse a well-balanced chunk of an XML document
11981
 * called by the parser
11982
 * The allowed sequence for the Well Balanced Chunk is the one defined by
11983
 * the content production in the XML grammar:
11984
 *
11985
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
11986
 *                       Comment)*
11987
 *
11988
 * @param doc  the document the chunk pertains to (must not be NULL)
11989
 * @param sax  the SAX handler block (possibly NULL)
11990
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11991
 * @param depth  Used for loop detection, use 0
11992
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
11993
 * @param lst  the return value for the set of parsed nodes
11994
 * @returns 0 if the chunk is well balanced, -1 in case of args problem and
11995
 *    the parser error code otherwise
11996
 */
11997
11998
int
11999
xmlParseBalancedChunkMemory(xmlDoc *doc, xmlSAXHandler *sax,
12000
0
     void *user_data, int depth, const xmlChar *string, xmlNode **lst) {
12001
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12002
0
                                                depth, string, lst, 0 );
12003
0
}
12004
#endif /* LIBXML_SAX1_ENABLED */
12005
12006
/**
12007
 * Parse a well-balanced chunk of XML matching the 'content' production.
12008
 *
12009
 * Namespaces in scope of `node` and entities of `node`'s document are
12010
 * recognized. When validating, the DTD of `node`'s document is used.
12011
 *
12012
 * Always consumes `input` even in error case.
12013
 *
12014
 * @since 2.14.0
12015
 *
12016
 * @param ctxt  parser context
12017
 * @param input  parser input
12018
 * @param node  target node or document
12019
 * @param hasTextDecl  whether to parse text declaration
12020
 * @returns a node list or NULL in case of error.
12021
 */
12022
xmlNode *
12023
xmlCtxtParseContent(xmlParserCtxt *ctxt, xmlParserInput *input,
12024
0
                    xmlNode *node, int hasTextDecl) {
12025
0
    xmlDocPtr doc;
12026
0
    xmlNodePtr cur, list = NULL;
12027
0
    int nsnr = 0;
12028
0
    xmlDictPtr oldDict;
12029
0
    int oldOptions, oldDictNames, oldLoadSubset;
12030
12031
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12032
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12033
0
        goto exit;
12034
0
    }
12035
12036
0
    doc = node->doc;
12037
0
    if (doc == NULL) {
12038
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12039
0
        goto exit;
12040
0
    }
12041
12042
0
    switch (node->type) {
12043
0
        case XML_ELEMENT_NODE:
12044
0
        case XML_DOCUMENT_NODE:
12045
0
        case XML_HTML_DOCUMENT_NODE:
12046
0
            break;
12047
12048
0
        case XML_ATTRIBUTE_NODE:
12049
0
        case XML_TEXT_NODE:
12050
0
        case XML_CDATA_SECTION_NODE:
12051
0
        case XML_ENTITY_REF_NODE:
12052
0
        case XML_PI_NODE:
12053
0
        case XML_COMMENT_NODE:
12054
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12055
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12056
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12057
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12058
0
                    node = cur;
12059
0
                    break;
12060
0
                }
12061
0
            }
12062
0
            break;
12063
12064
0
        default:
12065
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12066
0
            goto exit;
12067
0
    }
12068
12069
0
    xmlCtxtReset(ctxt);
12070
12071
0
    oldDict = ctxt->dict;
12072
0
    oldOptions = ctxt->options;
12073
0
    oldDictNames = ctxt->dictNames;
12074
0
    oldLoadSubset = ctxt->loadsubset;
12075
12076
    /*
12077
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12078
     */
12079
0
    if (doc->dict != NULL) {
12080
0
        ctxt->dict = doc->dict;
12081
0
    } else {
12082
0
        ctxt->options |= XML_PARSE_NODICT;
12083
0
        ctxt->dictNames = 0;
12084
0
    }
12085
12086
    /*
12087
     * Disable IDs
12088
     */
12089
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12090
0
    ctxt->options |= XML_PARSE_SKIP_IDS;
12091
12092
0
    ctxt->myDoc = doc;
12093
12094
0
#ifdef LIBXML_HTML_ENABLED
12095
0
    if (ctxt->html) {
12096
        /*
12097
         * When parsing in context, it makes no sense to add implied
12098
         * elements like html/body/etc...
12099
         */
12100
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12101
12102
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12103
0
    } else
12104
0
#endif
12105
0
    {
12106
0
        xmlCtxtInitializeLate(ctxt);
12107
12108
        /*
12109
         * initialize the SAX2 namespaces stack
12110
         */
12111
0
        cur = node;
12112
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12113
0
            xmlNsPtr ns = cur->nsDef;
12114
0
            xmlHashedString hprefix, huri;
12115
12116
0
            while (ns != NULL) {
12117
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12118
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12119
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12120
0
                    nsnr++;
12121
0
                ns = ns->next;
12122
0
            }
12123
0
            cur = cur->parent;
12124
0
        }
12125
12126
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12127
12128
0
        if (nsnr > 0)
12129
0
            xmlParserNsPop(ctxt, nsnr);
12130
0
    }
12131
12132
0
    ctxt->dict = oldDict;
12133
0
    ctxt->options = oldOptions;
12134
0
    ctxt->dictNames = oldDictNames;
12135
0
    ctxt->loadsubset = oldLoadSubset;
12136
0
    ctxt->myDoc = NULL;
12137
0
    ctxt->node = NULL;
12138
12139
0
exit:
12140
0
    xmlFreeInputStream(input);
12141
0
    return(list);
12142
0
}
12143
12144
/**
12145
 * Parse a well-balanced chunk of an XML document
12146
 * within the context (DTD, namespaces, etc ...) of the given node.
12147
 *
12148
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12149
 * the content production in the XML grammar:
12150
 *
12151
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12152
 *                       Comment)*
12153
 *
12154
 * This function assumes the encoding of `node`'s document which is
12155
 * typically not what you want. A better alternative is
12156
 * #xmlCtxtParseContent.
12157
 *
12158
 * @param node  the context node
12159
 * @param data  the input string
12160
 * @param datalen  the input string length in bytes
12161
 * @param options  a combination of xmlParserOption
12162
 * @param listOut  the return value for the set of parsed nodes
12163
 * @returns XML_ERR_OK if the chunk is well balanced, and the parser
12164
 * error code otherwise
12165
 */
12166
xmlParserErrors
12167
xmlParseInNodeContext(xmlNode *node, const char *data, int datalen,
12168
0
                      int options, xmlNode **listOut) {
12169
0
    xmlParserCtxtPtr ctxt;
12170
0
    xmlParserInputPtr input;
12171
0
    xmlDocPtr doc;
12172
0
    xmlNodePtr list;
12173
0
    xmlParserErrors ret;
12174
12175
0
    if (listOut == NULL)
12176
0
        return(XML_ERR_INTERNAL_ERROR);
12177
0
    *listOut = NULL;
12178
12179
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12180
0
        return(XML_ERR_INTERNAL_ERROR);
12181
12182
0
    doc = node->doc;
12183
0
    if (doc == NULL)
12184
0
        return(XML_ERR_INTERNAL_ERROR);
12185
12186
0
#ifdef LIBXML_HTML_ENABLED
12187
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12188
0
        ctxt = htmlNewParserCtxt();
12189
0
    }
12190
0
    else
12191
0
#endif
12192
0
        ctxt = xmlNewParserCtxt();
12193
12194
0
    if (ctxt == NULL)
12195
0
        return(XML_ERR_NO_MEMORY);
12196
12197
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12198
0
                                      (const char *) doc->encoding,
12199
0
                                      XML_INPUT_BUF_STATIC);
12200
0
    if (input == NULL) {
12201
0
        xmlFreeParserCtxt(ctxt);
12202
0
        return(XML_ERR_NO_MEMORY);
12203
0
    }
12204
12205
0
    xmlCtxtUseOptions(ctxt, options);
12206
12207
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12208
12209
0
    if (list == NULL) {
12210
0
        ret = ctxt->errNo;
12211
0
        if (ret == XML_ERR_ARGUMENT)
12212
0
            ret = XML_ERR_INTERNAL_ERROR;
12213
0
    } else {
12214
0
        ret = XML_ERR_OK;
12215
0
        *listOut = list;
12216
0
    }
12217
12218
0
    xmlFreeParserCtxt(ctxt);
12219
12220
0
    return(ret);
12221
0
}
12222
12223
#ifdef LIBXML_SAX1_ENABLED
12224
/**
12225
 * Parse a well-balanced chunk of an XML document
12226
 *
12227
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12228
 * the content production in the XML grammar:
12229
 *
12230
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12231
 *                       Comment)*
12232
 *
12233
 * In case recover is set to 1, the nodelist will not be empty even if
12234
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12235
 * some extent.
12236
 *
12237
 * @param doc  the document the chunk pertains to (must not be NULL)
12238
 * @param sax  the SAX handler block (possibly NULL)
12239
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
12240
 * @param depth  Used for loop detection, use 0
12241
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
12242
 * @param listOut  the return value for the set of parsed nodes
12243
 * @param recover  return nodes even if the data is broken (use 0)
12244
 * @returns 0 if the chunk is well balanced, or thehe parser error code
12245
 * otherwise.
12246
 */
12247
int
12248
xmlParseBalancedChunkMemoryRecover(xmlDoc *doc, xmlSAXHandler *sax,
12249
     void *user_data, int depth, const xmlChar *string, xmlNode **listOut,
12250
0
     int recover) {
12251
0
    xmlParserCtxtPtr ctxt;
12252
0
    xmlParserInputPtr input;
12253
0
    xmlNodePtr list;
12254
0
    int ret;
12255
12256
0
    if (listOut != NULL)
12257
0
        *listOut = NULL;
12258
12259
0
    if (string == NULL)
12260
0
        return(XML_ERR_ARGUMENT);
12261
12262
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12263
0
    if (ctxt == NULL)
12264
0
        return(XML_ERR_NO_MEMORY);
12265
12266
0
    xmlCtxtInitializeLate(ctxt);
12267
12268
0
    ctxt->depth = depth;
12269
0
    ctxt->myDoc = doc;
12270
0
    if (recover) {
12271
0
        ctxt->options |= XML_PARSE_RECOVER;
12272
0
        ctxt->recovery = 1;
12273
0
    }
12274
12275
0
    input = xmlNewStringInputStream(ctxt, string);
12276
0
    if (input == NULL) {
12277
0
        ret = ctxt->errNo;
12278
0
        goto error;
12279
0
    }
12280
12281
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12282
0
    if (listOut != NULL)
12283
0
        *listOut = list;
12284
0
    else
12285
0
        xmlFreeNodeList(list);
12286
12287
0
    if (!ctxt->wellFormed)
12288
0
        ret = ctxt->errNo;
12289
0
    else
12290
0
        ret = XML_ERR_OK;
12291
12292
0
error:
12293
0
    xmlFreeInputStream(input);
12294
0
    xmlFreeParserCtxt(ctxt);
12295
0
    return(ret);
12296
0
}
12297
12298
/**
12299
 * Parse an XML external entity out of context and build a tree.
12300
 * It use the given SAX function block to handle the parsing callback.
12301
 * If sax is NULL, fallback to the default DOM tree building routines.
12302
 *
12303
 * @deprecated Don't use.
12304
 *
12305
 *     [78] extParsedEnt ::= TextDecl? content
12306
 *
12307
 * This correspond to a "Well Balanced" chunk
12308
 *
12309
 * @param sax  the SAX handler block
12310
 * @param filename  the filename
12311
 * @returns the resulting document tree
12312
 */
12313
12314
xmlDoc *
12315
0
xmlSAXParseEntity(xmlSAXHandler *sax, const char *filename) {
12316
0
    xmlDocPtr ret;
12317
0
    xmlParserCtxtPtr ctxt;
12318
12319
0
    ctxt = xmlCreateFileParserCtxt(filename);
12320
0
    if (ctxt == NULL) {
12321
0
  return(NULL);
12322
0
    }
12323
0
    if (sax != NULL) {
12324
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12325
0
            *ctxt->sax = *sax;
12326
0
        } else {
12327
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12328
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12329
0
        }
12330
0
        ctxt->userData = NULL;
12331
0
    }
12332
12333
0
    xmlParseExtParsedEnt(ctxt);
12334
12335
0
    if (ctxt->wellFormed) {
12336
0
  ret = ctxt->myDoc;
12337
0
    } else {
12338
0
        ret = NULL;
12339
0
        xmlFreeDoc(ctxt->myDoc);
12340
0
    }
12341
12342
0
    xmlFreeParserCtxt(ctxt);
12343
12344
0
    return(ret);
12345
0
}
12346
12347
/**
12348
 * Parse an XML external entity out of context and build a tree.
12349
 *
12350
 *     [78] extParsedEnt ::= TextDecl? content
12351
 *
12352
 * This correspond to a "Well Balanced" chunk
12353
 *
12354
 * @param filename  the filename
12355
 * @returns the resulting document tree
12356
 */
12357
12358
xmlDoc *
12359
0
xmlParseEntity(const char *filename) {
12360
0
    return(xmlSAXParseEntity(NULL, filename));
12361
0
}
12362
#endif /* LIBXML_SAX1_ENABLED */
12363
12364
/**
12365
 * Create a parser context for an external entity
12366
 * Automatic support for ZLIB/Compress compressed document is provided
12367
 * by default if found at compile-time.
12368
 *
12369
 * @deprecated Don't use.
12370
 *
12371
 * @param URL  the entity URL
12372
 * @param ID  the entity PUBLIC ID
12373
 * @param base  a possible base for the target URI
12374
 * @returns the new parser context or NULL
12375
 */
12376
xmlParserCtxt *
12377
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12378
0
                    const xmlChar *base) {
12379
0
    xmlParserCtxtPtr ctxt;
12380
0
    xmlParserInputPtr input;
12381
0
    xmlChar *uri = NULL;
12382
12383
0
    ctxt = xmlNewParserCtxt();
12384
0
    if (ctxt == NULL)
12385
0
  return(NULL);
12386
12387
0
    if (base != NULL) {
12388
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12389
0
            goto error;
12390
0
        if (uri != NULL)
12391
0
            URL = uri;
12392
0
    }
12393
12394
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12395
0
                            XML_RESOURCE_UNKNOWN);
12396
0
    if (input == NULL)
12397
0
        goto error;
12398
12399
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12400
0
        xmlFreeInputStream(input);
12401
0
        goto error;
12402
0
    }
12403
12404
0
    xmlFree(uri);
12405
0
    return(ctxt);
12406
12407
0
error:
12408
0
    xmlFree(uri);
12409
0
    xmlFreeParserCtxt(ctxt);
12410
0
    return(NULL);
12411
0
}
12412
12413
/************************************************************************
12414
 *                  *
12415
 *    Front ends when parsing from a file     *
12416
 *                  *
12417
 ************************************************************************/
12418
12419
/**
12420
 * Create a parser context for a file or URL content.
12421
 * Automatic support for ZLIB/Compress compressed document is provided
12422
 * by default if found at compile-time and for file accesses
12423
 *
12424
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12425
 *
12426
 * @param filename  the filename or URL
12427
 * @param options  a combination of xmlParserOption
12428
 * @returns the new parser context or NULL
12429
 */
12430
xmlParserCtxt *
12431
xmlCreateURLParserCtxt(const char *filename, int options)
12432
0
{
12433
0
    xmlParserCtxtPtr ctxt;
12434
0
    xmlParserInputPtr input;
12435
12436
0
    ctxt = xmlNewParserCtxt();
12437
0
    if (ctxt == NULL)
12438
0
  return(NULL);
12439
12440
0
    options |= XML_PARSE_UNZIP;
12441
12442
0
    xmlCtxtUseOptions(ctxt, options);
12443
12444
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12445
0
    if (input == NULL) {
12446
0
  xmlFreeParserCtxt(ctxt);
12447
0
  return(NULL);
12448
0
    }
12449
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12450
0
        xmlFreeInputStream(input);
12451
0
        xmlFreeParserCtxt(ctxt);
12452
0
        return(NULL);
12453
0
    }
12454
12455
0
    return(ctxt);
12456
0
}
12457
12458
/**
12459
 * Create a parser context for a file content.
12460
 * Automatic support for ZLIB/Compress compressed document is provided
12461
 * by default if found at compile-time.
12462
 *
12463
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12464
 *
12465
 * @param filename  the filename
12466
 * @returns the new parser context or NULL
12467
 */
12468
xmlParserCtxt *
12469
xmlCreateFileParserCtxt(const char *filename)
12470
0
{
12471
0
    return(xmlCreateURLParserCtxt(filename, 0));
12472
0
}
12473
12474
#ifdef LIBXML_SAX1_ENABLED
12475
/**
12476
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12477
 * compressed document is provided by default if found at compile-time.
12478
 * It use the given SAX function block to handle the parsing callback.
12479
 * If sax is NULL, fallback to the default DOM tree building routines.
12480
 *
12481
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12482
 *
12483
 * User data (void *) is stored within the parser context in the
12484
 * context's _private member, so it is available nearly everywhere in libxml
12485
 *
12486
 * @param sax  the SAX handler block
12487
 * @param filename  the filename
12488
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12489
 *             documents
12490
 * @param data  the userdata
12491
 * @returns the resulting document tree
12492
 */
12493
12494
xmlDoc *
12495
xmlSAXParseFileWithData(xmlSAXHandler *sax, const char *filename,
12496
0
                        int recovery, void *data) {
12497
0
    xmlDocPtr ret = NULL;
12498
0
    xmlParserCtxtPtr ctxt;
12499
0
    xmlParserInputPtr input;
12500
12501
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12502
0
    if (ctxt == NULL)
12503
0
  return(NULL);
12504
12505
0
    if (data != NULL)
12506
0
  ctxt->_private = data;
12507
12508
0
    if (recovery) {
12509
0
        ctxt->options |= XML_PARSE_RECOVER;
12510
0
        ctxt->recovery = 1;
12511
0
    }
12512
12513
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12514
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12515
0
    else
12516
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12517
12518
0
    if (input != NULL)
12519
0
        ret = xmlCtxtParseDocument(ctxt, input);
12520
12521
0
    xmlFreeParserCtxt(ctxt);
12522
0
    return(ret);
12523
0
}
12524
12525
/**
12526
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12527
 * compressed document is provided by default if found at compile-time.
12528
 * It use the given SAX function block to handle the parsing callback.
12529
 * If sax is NULL, fallback to the default DOM tree building routines.
12530
 *
12531
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12532
 *
12533
 * @param sax  the SAX handler block
12534
 * @param filename  the filename
12535
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12536
 *             documents
12537
 * @returns the resulting document tree
12538
 */
12539
12540
xmlDoc *
12541
xmlSAXParseFile(xmlSAXHandler *sax, const char *filename,
12542
0
                          int recovery) {
12543
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12544
0
}
12545
12546
/**
12547
 * Parse an XML in-memory document and build a tree.
12548
 * In the case the document is not Well Formed, a attempt to build a
12549
 * tree is tried anyway
12550
 *
12551
 * @deprecated Use #xmlReadDoc with XML_PARSE_RECOVER.
12552
 *
12553
 * @param cur  a pointer to an array of xmlChar
12554
 * @returns the resulting document tree or NULL in case of failure
12555
 */
12556
12557
xmlDoc *
12558
0
xmlRecoverDoc(const xmlChar *cur) {
12559
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12560
0
}
12561
12562
/**
12563
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12564
 * compressed document is provided by default if found at compile-time.
12565
 *
12566
 * @deprecated Use #xmlReadFile.
12567
 *
12568
 * @param filename  the filename
12569
 * @returns the resulting document tree if the file was wellformed,
12570
 * NULL otherwise.
12571
 */
12572
12573
xmlDoc *
12574
0
xmlParseFile(const char *filename) {
12575
0
    return(xmlSAXParseFile(NULL, filename, 0));
12576
0
}
12577
12578
/**
12579
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12580
 * compressed document is provided by default if found at compile-time.
12581
 * In the case the document is not Well Formed, it attempts to build
12582
 * a tree anyway
12583
 *
12584
 * @deprecated Use #xmlReadFile with XML_PARSE_RECOVER.
12585
 *
12586
 * @param filename  the filename
12587
 * @returns the resulting document tree or NULL in case of failure
12588
 */
12589
12590
xmlDoc *
12591
0
xmlRecoverFile(const char *filename) {
12592
0
    return(xmlSAXParseFile(NULL, filename, 1));
12593
0
}
12594
12595
12596
/**
12597
 * Setup the parser context to parse a new buffer; Clears any prior
12598
 * contents from the parser context. The buffer parameter must not be
12599
 * NULL, but the filename parameter can be
12600
 *
12601
 * @deprecated Don't use.
12602
 *
12603
 * @param ctxt  an XML parser context
12604
 * @param buffer  a xmlChar * buffer
12605
 * @param filename  a file name
12606
 */
12607
void
12608
xmlSetupParserForBuffer(xmlParserCtxt *ctxt, const xmlChar* buffer,
12609
                             const char* filename)
12610
0
{
12611
0
    xmlParserInputPtr input;
12612
12613
0
    if ((ctxt == NULL) || (buffer == NULL))
12614
0
        return;
12615
12616
0
    xmlCtxtReset(ctxt);
12617
12618
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12619
0
                                      NULL, 0);
12620
0
    if (input == NULL)
12621
0
        return;
12622
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12623
0
        xmlFreeInputStream(input);
12624
0
}
12625
12626
/**
12627
 * Parse an XML file and call the given SAX handler routines.
12628
 * Automatic support for ZLIB/Compress compressed document is provided
12629
 *
12630
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12631
 *
12632
 * @param sax  a SAX handler
12633
 * @param user_data  The user data returned on SAX callbacks
12634
 * @param filename  a file name
12635
 * @returns 0 in case of success or a error number otherwise
12636
 */
12637
int
12638
xmlSAXUserParseFile(xmlSAXHandler *sax, void *user_data,
12639
0
                    const char *filename) {
12640
0
    int ret = 0;
12641
0
    xmlParserCtxtPtr ctxt;
12642
12643
0
    ctxt = xmlCreateFileParserCtxt(filename);
12644
0
    if (ctxt == NULL) return -1;
12645
0
    if (sax != NULL) {
12646
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12647
0
            *ctxt->sax = *sax;
12648
0
        } else {
12649
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12650
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12651
0
        }
12652
0
  ctxt->userData = user_data;
12653
0
    }
12654
12655
0
    xmlParseDocument(ctxt);
12656
12657
0
    if (ctxt->wellFormed)
12658
0
  ret = 0;
12659
0
    else {
12660
0
        if (ctxt->errNo != 0)
12661
0
      ret = ctxt->errNo;
12662
0
  else
12663
0
      ret = -1;
12664
0
    }
12665
0
    if (ctxt->myDoc != NULL) {
12666
0
        xmlFreeDoc(ctxt->myDoc);
12667
0
  ctxt->myDoc = NULL;
12668
0
    }
12669
0
    xmlFreeParserCtxt(ctxt);
12670
12671
0
    return ret;
12672
0
}
12673
#endif /* LIBXML_SAX1_ENABLED */
12674
12675
/************************************************************************
12676
 *                  *
12677
 *    Front ends when parsing from memory     *
12678
 *                  *
12679
 ************************************************************************/
12680
12681
/**
12682
 * Create a parser context for an XML in-memory document. The input buffer
12683
 * must not contain a terminating null byte.
12684
 *
12685
 * @param buffer  a pointer to a char array
12686
 * @param size  the size of the array
12687
 * @returns the new parser context or NULL
12688
 */
12689
xmlParserCtxt *
12690
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12691
0
    xmlParserCtxtPtr ctxt;
12692
0
    xmlParserInputPtr input;
12693
12694
0
    if (size < 0)
12695
0
  return(NULL);
12696
12697
0
    ctxt = xmlNewParserCtxt();
12698
0
    if (ctxt == NULL)
12699
0
  return(NULL);
12700
12701
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12702
0
    if (input == NULL) {
12703
0
  xmlFreeParserCtxt(ctxt);
12704
0
  return(NULL);
12705
0
    }
12706
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12707
0
        xmlFreeInputStream(input);
12708
0
        xmlFreeParserCtxt(ctxt);
12709
0
        return(NULL);
12710
0
    }
12711
12712
0
    return(ctxt);
12713
0
}
12714
12715
#ifdef LIBXML_SAX1_ENABLED
12716
/**
12717
 * Parse an XML in-memory block and use the given SAX function block
12718
 * to handle the parsing callback. If sax is NULL, fallback to the default
12719
 * DOM tree building routines.
12720
 *
12721
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12722
 *
12723
 * User data (void *) is stored within the parser context in the
12724
 * context's _private member, so it is available nearly everywhere in libxml
12725
 *
12726
 * @param sax  the SAX handler block
12727
 * @param buffer  an pointer to a char array
12728
 * @param size  the size of the array
12729
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12730
 *             documents
12731
 * @param data  the userdata
12732
 * @returns the resulting document tree
12733
 */
12734
12735
xmlDoc *
12736
xmlSAXParseMemoryWithData(xmlSAXHandler *sax, const char *buffer,
12737
0
                          int size, int recovery, void *data) {
12738
0
    xmlDocPtr ret = NULL;
12739
0
    xmlParserCtxtPtr ctxt;
12740
0
    xmlParserInputPtr input;
12741
12742
0
    if (size < 0)
12743
0
        return(NULL);
12744
12745
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12746
0
    if (ctxt == NULL)
12747
0
        return(NULL);
12748
12749
0
    if (data != NULL)
12750
0
  ctxt->_private=data;
12751
12752
0
    if (recovery) {
12753
0
        ctxt->options |= XML_PARSE_RECOVER;
12754
0
        ctxt->recovery = 1;
12755
0
    }
12756
12757
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12758
0
                                      XML_INPUT_BUF_STATIC);
12759
12760
0
    if (input != NULL)
12761
0
        ret = xmlCtxtParseDocument(ctxt, input);
12762
12763
0
    xmlFreeParserCtxt(ctxt);
12764
0
    return(ret);
12765
0
}
12766
12767
/**
12768
 * Parse an XML in-memory block and use the given SAX function block
12769
 * to handle the parsing callback. If sax is NULL, fallback to the default
12770
 * DOM tree building routines.
12771
 *
12772
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12773
 *
12774
 * @param sax  the SAX handler block
12775
 * @param buffer  an pointer to a char array
12776
 * @param size  the size of the array
12777
 * @param recovery  work in recovery mode, i.e. tries to read not Well Formed
12778
 *             documents
12779
 * @returns the resulting document tree
12780
 */
12781
xmlDoc *
12782
xmlSAXParseMemory(xmlSAXHandler *sax, const char *buffer,
12783
0
            int size, int recovery) {
12784
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12785
0
}
12786
12787
/**
12788
 * Parse an XML in-memory block and build a tree.
12789
 *
12790
 * @deprecated Use #xmlReadMemory.
12791
 *
12792
 * @param buffer  an pointer to a char array
12793
 * @param size  the size of the array
12794
 * @returns the resulting document tree
12795
 */
12796
12797
0
xmlDoc *xmlParseMemory(const char *buffer, int size) {
12798
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
12799
0
}
12800
12801
/**
12802
 * Parse an XML in-memory block and build a tree.
12803
 * In the case the document is not Well Formed, an attempt to
12804
 * build a tree is tried anyway
12805
 *
12806
 * @deprecated Use #xmlReadMemory with XML_PARSE_RECOVER.
12807
 *
12808
 * @param buffer  an pointer to a char array
12809
 * @param size  the size of the array
12810
 * @returns the resulting document tree or NULL in case of error
12811
 */
12812
12813
0
xmlDoc *xmlRecoverMemory(const char *buffer, int size) {
12814
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
12815
0
}
12816
12817
/**
12818
 * Parse an XML in-memory buffer and call the given SAX handler routines.
12819
 *
12820
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12821
 *
12822
 * @param sax  a SAX handler
12823
 * @param user_data  The user data returned on SAX callbacks
12824
 * @param buffer  an in-memory XML document input
12825
 * @param size  the length of the XML document in bytes
12826
 * @returns 0 in case of success or a error number otherwise
12827
 */
12828
int xmlSAXUserParseMemory(xmlSAXHandler *sax, void *user_data,
12829
0
        const char *buffer, int size) {
12830
0
    int ret = 0;
12831
0
    xmlParserCtxtPtr ctxt;
12832
12833
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12834
0
    if (ctxt == NULL) return -1;
12835
0
    if (sax != NULL) {
12836
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12837
0
            *ctxt->sax = *sax;
12838
0
        } else {
12839
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12840
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12841
0
        }
12842
0
  ctxt->userData = user_data;
12843
0
    }
12844
12845
0
    xmlParseDocument(ctxt);
12846
12847
0
    if (ctxt->wellFormed)
12848
0
  ret = 0;
12849
0
    else {
12850
0
        if (ctxt->errNo != 0)
12851
0
      ret = ctxt->errNo;
12852
0
  else
12853
0
      ret = -1;
12854
0
    }
12855
0
    if (ctxt->myDoc != NULL) {
12856
0
        xmlFreeDoc(ctxt->myDoc);
12857
0
  ctxt->myDoc = NULL;
12858
0
    }
12859
0
    xmlFreeParserCtxt(ctxt);
12860
12861
0
    return ret;
12862
0
}
12863
#endif /* LIBXML_SAX1_ENABLED */
12864
12865
/**
12866
 * Creates a parser context for an XML in-memory document.
12867
 *
12868
 * @param str  a pointer to an array of xmlChar
12869
 * @returns the new parser context or NULL
12870
 */
12871
xmlParserCtxt *
12872
0
xmlCreateDocParserCtxt(const xmlChar *str) {
12873
0
    xmlParserCtxtPtr ctxt;
12874
0
    xmlParserInputPtr input;
12875
12876
0
    ctxt = xmlNewParserCtxt();
12877
0
    if (ctxt == NULL)
12878
0
  return(NULL);
12879
12880
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
12881
0
    if (input == NULL) {
12882
0
  xmlFreeParserCtxt(ctxt);
12883
0
  return(NULL);
12884
0
    }
12885
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12886
0
        xmlFreeInputStream(input);
12887
0
        xmlFreeParserCtxt(ctxt);
12888
0
        return(NULL);
12889
0
    }
12890
12891
0
    return(ctxt);
12892
0
}
12893
12894
#ifdef LIBXML_SAX1_ENABLED
12895
/**
12896
 * Parse an XML in-memory document and build a tree.
12897
 * It use the given SAX function block to handle the parsing callback.
12898
 * If sax is NULL, fallback to the default DOM tree building routines.
12899
 *
12900
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadDoc.
12901
 *
12902
 * @param sax  the SAX handler block
12903
 * @param cur  a pointer to an array of xmlChar
12904
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12905
 *             documents
12906
 * @returns the resulting document tree
12907
 */
12908
12909
xmlDoc *
12910
0
xmlSAXParseDoc(xmlSAXHandler *sax, const xmlChar *cur, int recovery) {
12911
0
    xmlDocPtr ret;
12912
0
    xmlParserCtxtPtr ctxt;
12913
0
    xmlSAXHandlerPtr oldsax = NULL;
12914
12915
0
    if (cur == NULL) return(NULL);
12916
12917
12918
0
    ctxt = xmlCreateDocParserCtxt(cur);
12919
0
    if (ctxt == NULL) return(NULL);
12920
0
    if (sax != NULL) {
12921
0
        oldsax = ctxt->sax;
12922
0
        ctxt->sax = sax;
12923
0
        ctxt->userData = NULL;
12924
0
    }
12925
12926
0
    xmlParseDocument(ctxt);
12927
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12928
0
    else {
12929
0
       ret = NULL;
12930
0
       xmlFreeDoc(ctxt->myDoc);
12931
0
       ctxt->myDoc = NULL;
12932
0
    }
12933
0
    if (sax != NULL)
12934
0
  ctxt->sax = oldsax;
12935
0
    xmlFreeParserCtxt(ctxt);
12936
12937
0
    return(ret);
12938
0
}
12939
12940
/**
12941
 * Parse an XML in-memory document and build a tree.
12942
 *
12943
 * @deprecated Use #xmlReadDoc.
12944
 *
12945
 * @param cur  a pointer to an array of xmlChar
12946
 * @returns the resulting document tree
12947
 */
12948
12949
xmlDoc *
12950
0
xmlParseDoc(const xmlChar *cur) {
12951
0
    return(xmlSAXParseDoc(NULL, cur, 0));
12952
0
}
12953
#endif /* LIBXML_SAX1_ENABLED */
12954
12955
/************************************************************************
12956
 *                  *
12957
 *  New set (2.6.0) of simpler and more flexible APIs   *
12958
 *                  *
12959
 ************************************************************************/
12960
12961
/**
12962
 * Reset a parser context
12963
 *
12964
 * @param ctxt  an XML parser context
12965
 */
12966
void
12967
xmlCtxtReset(xmlParserCtxt *ctxt)
12968
0
{
12969
0
    xmlParserInputPtr input;
12970
12971
0
    if (ctxt == NULL)
12972
0
        return;
12973
12974
0
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
12975
0
        xmlFreeInputStream(input);
12976
0
    }
12977
0
    ctxt->inputNr = 0;
12978
0
    ctxt->input = NULL;
12979
12980
0
    ctxt->spaceNr = 0;
12981
0
    if (ctxt->spaceTab != NULL) {
12982
0
  ctxt->spaceTab[0] = -1;
12983
0
  ctxt->space = &ctxt->spaceTab[0];
12984
0
    } else {
12985
0
        ctxt->space = NULL;
12986
0
    }
12987
12988
12989
0
    ctxt->nodeNr = 0;
12990
0
    ctxt->node = NULL;
12991
12992
0
    ctxt->nameNr = 0;
12993
0
    ctxt->name = NULL;
12994
12995
0
    ctxt->nsNr = 0;
12996
0
    xmlParserNsReset(ctxt->nsdb);
12997
12998
0
    if (ctxt->version != NULL) {
12999
0
        xmlFree(ctxt->version);
13000
0
        ctxt->version = NULL;
13001
0
    }
13002
0
    if (ctxt->encoding != NULL) {
13003
0
        xmlFree(ctxt->encoding);
13004
0
        ctxt->encoding = NULL;
13005
0
    }
13006
0
    if (ctxt->extSubURI != NULL) {
13007
0
        xmlFree(ctxt->extSubURI);
13008
0
        ctxt->extSubURI = NULL;
13009
0
    }
13010
0
    if (ctxt->extSubSystem != NULL) {
13011
0
        xmlFree(ctxt->extSubSystem);
13012
0
        ctxt->extSubSystem = NULL;
13013
0
    }
13014
0
    if (ctxt->directory != NULL) {
13015
0
        xmlFree(ctxt->directory);
13016
0
        ctxt->directory = NULL;
13017
0
    }
13018
13019
0
    if (ctxt->myDoc != NULL)
13020
0
        xmlFreeDoc(ctxt->myDoc);
13021
0
    ctxt->myDoc = NULL;
13022
13023
0
    ctxt->standalone = -1;
13024
0
    ctxt->hasExternalSubset = 0;
13025
0
    ctxt->hasPErefs = 0;
13026
0
    ctxt->html = ctxt->html ? 1 : 0;
13027
0
    ctxt->instate = XML_PARSER_START;
13028
13029
0
    ctxt->wellFormed = 1;
13030
0
    ctxt->nsWellFormed = 1;
13031
0
    ctxt->disableSAX = 0;
13032
0
    ctxt->valid = 1;
13033
0
    ctxt->record_info = 0;
13034
0
    ctxt->checkIndex = 0;
13035
0
    ctxt->endCheckState = 0;
13036
0
    ctxt->inSubset = 0;
13037
0
    ctxt->errNo = XML_ERR_OK;
13038
0
    ctxt->depth = 0;
13039
0
    ctxt->catalogs = NULL;
13040
0
    ctxt->sizeentities = 0;
13041
0
    ctxt->sizeentcopy = 0;
13042
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13043
13044
0
    if (ctxt->attsDefault != NULL) {
13045
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13046
0
        ctxt->attsDefault = NULL;
13047
0
    }
13048
0
    if (ctxt->attsSpecial != NULL) {
13049
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13050
0
        ctxt->attsSpecial = NULL;
13051
0
    }
13052
13053
0
#ifdef LIBXML_CATALOG_ENABLED
13054
0
    if (ctxt->catalogs != NULL)
13055
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13056
0
#endif
13057
0
    ctxt->nbErrors = 0;
13058
0
    ctxt->nbWarnings = 0;
13059
0
    if (ctxt->lastError.code != XML_ERR_OK)
13060
0
        xmlResetError(&ctxt->lastError);
13061
0
}
13062
13063
/**
13064
 * Reset a push parser context
13065
 *
13066
 * @param ctxt  an XML parser context
13067
 * @param chunk  a pointer to an array of chars
13068
 * @param size  number of chars in the array
13069
 * @param filename  an optional file name or URI
13070
 * @param encoding  the document encoding, or NULL
13071
 * @returns 0 in case of success and 1 in case of error
13072
 */
13073
int
13074
xmlCtxtResetPush(xmlParserCtxt *ctxt, const char *chunk,
13075
                 int size, const char *filename, const char *encoding)
13076
0
{
13077
0
    xmlParserInputPtr input;
13078
13079
0
    if (ctxt == NULL)
13080
0
        return(1);
13081
13082
0
    xmlCtxtReset(ctxt);
13083
13084
0
    input = xmlNewPushInput(filename, chunk, size);
13085
0
    if (input == NULL)
13086
0
        return(1);
13087
13088
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13089
0
        xmlFreeInputStream(input);
13090
0
        return(1);
13091
0
    }
13092
13093
0
    if (encoding != NULL)
13094
0
        xmlSwitchEncodingName(ctxt, encoding);
13095
13096
0
    return(0);
13097
0
}
13098
13099
static int
13100
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13101
0
{
13102
0
    int allMask;
13103
13104
0
    if (ctxt == NULL)
13105
0
        return(-1);
13106
13107
    /*
13108
     * XInclude options aren't handled by the parser.
13109
     *
13110
     * XML_PARSE_XINCLUDE
13111
     * XML_PARSE_NOXINCNODE
13112
     * XML_PARSE_NOBASEFIX
13113
     */
13114
0
    allMask = XML_PARSE_RECOVER |
13115
0
              XML_PARSE_NOENT |
13116
0
              XML_PARSE_DTDLOAD |
13117
0
              XML_PARSE_DTDATTR |
13118
0
              XML_PARSE_DTDVALID |
13119
0
              XML_PARSE_NOERROR |
13120
0
              XML_PARSE_NOWARNING |
13121
0
              XML_PARSE_PEDANTIC |
13122
0
              XML_PARSE_NOBLANKS |
13123
0
#ifdef LIBXML_SAX1_ENABLED
13124
0
              XML_PARSE_SAX1 |
13125
0
#endif
13126
0
              XML_PARSE_NONET |
13127
0
              XML_PARSE_NODICT |
13128
0
              XML_PARSE_NSCLEAN |
13129
0
              XML_PARSE_NOCDATA |
13130
0
              XML_PARSE_COMPACT |
13131
0
              XML_PARSE_OLD10 |
13132
0
              XML_PARSE_HUGE |
13133
0
              XML_PARSE_OLDSAX |
13134
0
              XML_PARSE_IGNORE_ENC |
13135
0
              XML_PARSE_BIG_LINES |
13136
0
              XML_PARSE_NO_XXE |
13137
0
              XML_PARSE_UNZIP |
13138
0
              XML_PARSE_NO_SYS_CATALOG |
13139
0
              XML_PARSE_CATALOG_PI;
13140
13141
0
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13142
13143
    /*
13144
     * For some options, struct members are historically the source
13145
     * of truth. The values are initalized from global variables and
13146
     * old code could also modify them directly. Several older API
13147
     * functions that don't take an options argument rely on these
13148
     * deprecated mechanisms.
13149
     *
13150
     * Once public access to struct members and the globals are
13151
     * disabled, we can use the options bitmask as source of
13152
     * truth, making all these struct members obsolete.
13153
     *
13154
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13155
     * loading of the external subset.
13156
     */
13157
0
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13158
0
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13159
0
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13160
0
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13161
0
    ctxt->loadsubset |= (options & XML_PARSE_SKIP_IDS) ? XML_SKIP_IDS : 0;
13162
0
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13163
0
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13164
0
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13165
0
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13166
13167
0
    return(options & ~allMask);
13168
0
}
13169
13170
/**
13171
 * Applies the options to the parser context. Unset options are
13172
 * cleared.
13173
 *
13174
 * @since 2.13.0
13175
 *
13176
 * With older versions, you can use #xmlCtxtUseOptions.
13177
 *
13178
 * @param ctxt  an XML parser context
13179
 * @param options  a bitmask of xmlParserOption values
13180
 * @returns 0 in case of success, the set of unknown or unimplemented options
13181
 *         in case of error.
13182
 */
13183
int
13184
xmlCtxtSetOptions(xmlParserCtxt *ctxt, int options)
13185
0
{
13186
0
#ifdef LIBXML_HTML_ENABLED
13187
0
    if ((ctxt != NULL) && (ctxt->html))
13188
0
        return(htmlCtxtSetOptions(ctxt, options));
13189
0
#endif
13190
13191
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13192
0
}
13193
13194
/**
13195
 * Get the current options of the parser context.
13196
 *
13197
 * @since 2.14.0
13198
 *
13199
 * @param ctxt  an XML parser context
13200
 * @returns the current options set in the parser context, or -1 if ctxt is NULL.
13201
 */
13202
int
13203
xmlCtxtGetOptions(xmlParserCtxt *ctxt)
13204
0
{
13205
0
    if (ctxt == NULL)
13206
0
        return(-1);
13207
13208
0
    return(ctxt->options);
13209
0
}
13210
13211
/**
13212
 * Applies the options to the parser context. The following options
13213
 * are never cleared and can only be enabled:
13214
 *
13215
 * - XML_PARSE_NOERROR
13216
 * - XML_PARSE_NOWARNING
13217
 * - XML_PARSE_NONET
13218
 * - XML_PARSE_NSCLEAN
13219
 * - XML_PARSE_NOCDATA
13220
 * - XML_PARSE_COMPACT
13221
 * - XML_PARSE_OLD10
13222
 * - XML_PARSE_HUGE
13223
 * - XML_PARSE_OLDSAX
13224
 * - XML_PARSE_IGNORE_ENC
13225
 * - XML_PARSE_BIG_LINES
13226
 *
13227
 * @deprecated Use #xmlCtxtSetOptions.
13228
 *
13229
 * @param ctxt  an XML parser context
13230
 * @param options  a combination of xmlParserOption
13231
 * @returns 0 in case of success, the set of unknown or unimplemented options
13232
 *         in case of error.
13233
 */
13234
int
13235
xmlCtxtUseOptions(xmlParserCtxt *ctxt, int options)
13236
0
{
13237
0
    int keepMask;
13238
13239
0
#ifdef LIBXML_HTML_ENABLED
13240
0
    if ((ctxt != NULL) && (ctxt->html))
13241
0
        return(htmlCtxtUseOptions(ctxt, options));
13242
0
#endif
13243
13244
    /*
13245
     * For historic reasons, some options can only be enabled.
13246
     */
13247
0
    keepMask = XML_PARSE_NOERROR |
13248
0
               XML_PARSE_NOWARNING |
13249
0
               XML_PARSE_NONET |
13250
0
               XML_PARSE_NSCLEAN |
13251
0
               XML_PARSE_NOCDATA |
13252
0
               XML_PARSE_COMPACT |
13253
0
               XML_PARSE_OLD10 |
13254
0
               XML_PARSE_HUGE |
13255
0
               XML_PARSE_OLDSAX |
13256
0
               XML_PARSE_IGNORE_ENC |
13257
0
               XML_PARSE_BIG_LINES;
13258
13259
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13260
0
}
13261
13262
/**
13263
 * To protect against exponential entity expansion ("billion laughs"), the
13264
 * size of serialized output is (roughly) limited to the input size
13265
 * multiplied by this factor. The default value is 5.
13266
 *
13267
 * When working with documents making heavy use of entity expansion, it can
13268
 * be necessary to increase the value. For security reasons, this should only
13269
 * be considered when processing trusted input.
13270
 *
13271
 * @param ctxt  an XML parser context
13272
 * @param maxAmpl  maximum amplification factor
13273
 */
13274
void
13275
xmlCtxtSetMaxAmplification(xmlParserCtxt *ctxt, unsigned maxAmpl)
13276
0
{
13277
0
    if (ctxt == NULL)
13278
0
        return;
13279
0
    ctxt->maxAmpl = maxAmpl;
13280
0
}
13281
13282
/**
13283
 * Parse an XML document and return the resulting document tree.
13284
 * Takes ownership of the input object.
13285
 *
13286
 * @since 2.13.0
13287
 *
13288
 * @param ctxt  an XML parser context
13289
 * @param input  parser input
13290
 * @returns the resulting document tree or NULL
13291
 */
13292
xmlDoc *
13293
xmlCtxtParseDocument(xmlParserCtxt *ctxt, xmlParserInput *input)
13294
0
{
13295
0
    xmlDocPtr ret = NULL;
13296
13297
0
    if ((ctxt == NULL) || (input == NULL)) {
13298
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13299
0
        xmlFreeInputStream(input);
13300
0
        return(NULL);
13301
0
    }
13302
13303
    /* assert(ctxt->inputNr == 0); */
13304
0
    while (ctxt->inputNr > 0)
13305
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13306
13307
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13308
0
        xmlFreeInputStream(input);
13309
0
        return(NULL);
13310
0
    }
13311
13312
0
    xmlParseDocument(ctxt);
13313
13314
0
    ret = xmlCtxtGetDocument(ctxt);
13315
13316
    /* assert(ctxt->inputNr == 1); */
13317
0
    while (ctxt->inputNr > 0)
13318
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13319
13320
0
    return(ret);
13321
0
}
13322
13323
/**
13324
 * Convenience function to parse an XML document from a
13325
 * zero-terminated string.
13326
 *
13327
 * See #xmlCtxtReadDoc for details.
13328
 *
13329
 * @param cur  a pointer to a zero terminated string
13330
 * @param URL  base URL (optional)
13331
 * @param encoding  the document encoding (optional)
13332
 * @param options  a combination of xmlParserOption
13333
 * @returns the resulting document tree
13334
 */
13335
xmlDoc *
13336
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13337
           int options)
13338
0
{
13339
0
    xmlParserCtxtPtr ctxt;
13340
0
    xmlParserInputPtr input;
13341
0
    xmlDocPtr doc = NULL;
13342
13343
0
    ctxt = xmlNewParserCtxt();
13344
0
    if (ctxt == NULL)
13345
0
        return(NULL);
13346
13347
0
    xmlCtxtUseOptions(ctxt, options);
13348
13349
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13350
0
                                      XML_INPUT_BUF_STATIC);
13351
13352
0
    if (input != NULL)
13353
0
        doc = xmlCtxtParseDocument(ctxt, input);
13354
13355
0
    xmlFreeParserCtxt(ctxt);
13356
0
    return(doc);
13357
0
}
13358
13359
/**
13360
 * Convenience function to parse an XML file from the filesystem
13361
 * or a global, user-defined resource loader.
13362
 *
13363
 * This function always enables the XML_PARSE_UNZIP option for
13364
 * backward compatibility. If a "-" filename is passed, it will
13365
 * read from stdin. Both of these features are potentially
13366
 * insecure and might be removed from later versions.
13367
 *
13368
 * See #xmlCtxtReadFile for details.
13369
 *
13370
 * @param filename  a file or URL
13371
 * @param encoding  the document encoding (optional)
13372
 * @param options  a combination of xmlParserOption
13373
 * @returns the resulting document tree
13374
 */
13375
xmlDoc *
13376
xmlReadFile(const char *filename, const char *encoding, int options)
13377
0
{
13378
0
    xmlParserCtxtPtr ctxt;
13379
0
    xmlParserInputPtr input;
13380
0
    xmlDocPtr doc = NULL;
13381
13382
0
    ctxt = xmlNewParserCtxt();
13383
0
    if (ctxt == NULL)
13384
0
        return(NULL);
13385
13386
0
    options |= XML_PARSE_UNZIP;
13387
13388
0
    xmlCtxtUseOptions(ctxt, options);
13389
13390
    /*
13391
     * Backward compatibility for users of command line utilities like
13392
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13393
     * should be removed at some point.
13394
     */
13395
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13396
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13397
0
                                      encoding, 0);
13398
0
    else
13399
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13400
13401
0
    if (input != NULL)
13402
0
        doc = xmlCtxtParseDocument(ctxt, input);
13403
13404
0
    xmlFreeParserCtxt(ctxt);
13405
0
    return(doc);
13406
0
}
13407
13408
/**
13409
 * Parse an XML in-memory document and build a tree. The input buffer must
13410
 * not contain a terminating null byte.
13411
 *
13412
 * See #xmlCtxtReadMemory for details.
13413
 *
13414
 * @param buffer  a pointer to a char array
13415
 * @param size  the size of the array
13416
 * @param url  base URL (optional)
13417
 * @param encoding  the document encoding (optional)
13418
 * @param options  a combination of xmlParserOption
13419
 * @returns the resulting document tree
13420
 */
13421
xmlDoc *
13422
xmlReadMemory(const char *buffer, int size, const char *url,
13423
              const char *encoding, int options)
13424
0
{
13425
0
    xmlParserCtxtPtr ctxt;
13426
0
    xmlParserInputPtr input;
13427
0
    xmlDocPtr doc = NULL;
13428
13429
0
    if (size < 0)
13430
0
  return(NULL);
13431
13432
0
    ctxt = xmlNewParserCtxt();
13433
0
    if (ctxt == NULL)
13434
0
        return(NULL);
13435
13436
0
    xmlCtxtUseOptions(ctxt, options);
13437
13438
0
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13439
0
                                      XML_INPUT_BUF_STATIC);
13440
13441
0
    if (input != NULL)
13442
0
        doc = xmlCtxtParseDocument(ctxt, input);
13443
13444
0
    xmlFreeParserCtxt(ctxt);
13445
0
    return(doc);
13446
0
}
13447
13448
/**
13449
 * Parse an XML from a file descriptor and build a tree.
13450
 *
13451
 * See #xmlCtxtReadFd for details.
13452
 *
13453
 * NOTE that the file descriptor will not be closed when the
13454
 * context is freed or reset.
13455
 *
13456
 * @param fd  an open file descriptor
13457
 * @param URL  base URL (optional)
13458
 * @param encoding  the document encoding (optional)
13459
 * @param options  a combination of xmlParserOption
13460
 * @returns the resulting document tree
13461
 */
13462
xmlDoc *
13463
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13464
0
{
13465
0
    xmlParserCtxtPtr ctxt;
13466
0
    xmlParserInputPtr input;
13467
0
    xmlDocPtr doc = NULL;
13468
13469
0
    ctxt = xmlNewParserCtxt();
13470
0
    if (ctxt == NULL)
13471
0
        return(NULL);
13472
13473
0
    xmlCtxtUseOptions(ctxt, options);
13474
13475
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13476
13477
0
    if (input != NULL)
13478
0
        doc = xmlCtxtParseDocument(ctxt, input);
13479
13480
0
    xmlFreeParserCtxt(ctxt);
13481
0
    return(doc);
13482
0
}
13483
13484
/**
13485
 * Parse an XML document from I/O functions and context and build a tree.
13486
 *
13487
 * See #xmlCtxtReadIO for details.
13488
 *
13489
 * @param ioread  an I/O read function
13490
 * @param ioclose  an I/O close function (optional)
13491
 * @param ioctx  an I/O handler
13492
 * @param URL  base URL (optional)
13493
 * @param encoding  the document encoding (optional)
13494
 * @param options  a combination of xmlParserOption
13495
 * @returns the resulting document tree
13496
 */
13497
xmlDoc *
13498
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13499
          void *ioctx, const char *URL, const char *encoding, int options)
13500
0
{
13501
0
    xmlParserCtxtPtr ctxt;
13502
0
    xmlParserInputPtr input;
13503
0
    xmlDocPtr doc = NULL;
13504
13505
0
    ctxt = xmlNewParserCtxt();
13506
0
    if (ctxt == NULL)
13507
0
        return(NULL);
13508
13509
0
    xmlCtxtUseOptions(ctxt, options);
13510
13511
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13512
0
                                  encoding, 0);
13513
13514
0
    if (input != NULL)
13515
0
        doc = xmlCtxtParseDocument(ctxt, input);
13516
13517
0
    xmlFreeParserCtxt(ctxt);
13518
0
    return(doc);
13519
0
}
13520
13521
/**
13522
 * Parse an XML in-memory document and build a tree.
13523
 *
13524
 * `URL` is used as base to resolve external entities and for error
13525
 * reporting.
13526
 *
13527
 * @param ctxt  an XML parser context
13528
 * @param str  a pointer to a zero terminated string
13529
 * @param URL  base URL (optional)
13530
 * @param encoding  the document encoding (optional)
13531
 * @param options  a combination of xmlParserOption
13532
 * @returns the resulting document tree
13533
 */
13534
xmlDoc *
13535
xmlCtxtReadDoc(xmlParserCtxt *ctxt, const xmlChar *str,
13536
               const char *URL, const char *encoding, int options)
13537
0
{
13538
0
    xmlParserInputPtr input;
13539
13540
0
    if (ctxt == NULL)
13541
0
        return(NULL);
13542
13543
0
    xmlCtxtReset(ctxt);
13544
0
    xmlCtxtUseOptions(ctxt, options);
13545
13546
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13547
0
                                      XML_INPUT_BUF_STATIC);
13548
0
    if (input == NULL)
13549
0
        return(NULL);
13550
13551
0
    return(xmlCtxtParseDocument(ctxt, input));
13552
0
}
13553
13554
/**
13555
 * Parse an XML file from the filesystem or a global, user-defined
13556
 * resource loader.
13557
 *
13558
 * This function always enables the XML_PARSE_UNZIP option for
13559
 * backward compatibility. This feature is potentially insecure
13560
 * and might be removed from later versions.
13561
 *
13562
 * @param ctxt  an XML parser context
13563
 * @param filename  a file or URL
13564
 * @param encoding  the document encoding (optional)
13565
 * @param options  a combination of xmlParserOption
13566
 * @returns the resulting document tree
13567
 */
13568
xmlDoc *
13569
xmlCtxtReadFile(xmlParserCtxt *ctxt, const char *filename,
13570
                const char *encoding, int options)
13571
0
{
13572
0
    xmlParserInputPtr input;
13573
13574
0
    if (ctxt == NULL)
13575
0
        return(NULL);
13576
13577
0
    options |= XML_PARSE_UNZIP;
13578
13579
0
    xmlCtxtReset(ctxt);
13580
0
    xmlCtxtUseOptions(ctxt, options);
13581
13582
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13583
0
    if (input == NULL)
13584
0
        return(NULL);
13585
13586
0
    return(xmlCtxtParseDocument(ctxt, input));
13587
0
}
13588
13589
/**
13590
 * Parse an XML in-memory document and build a tree. The input buffer must
13591
 * not contain a terminating null byte.
13592
 *
13593
 * `URL` is used as base to resolve external entities and for error
13594
 * reporting.
13595
 *
13596
 * @param ctxt  an XML parser context
13597
 * @param buffer  a pointer to a char array
13598
 * @param size  the size of the array
13599
 * @param URL  base URL (optional)
13600
 * @param encoding  the document encoding (optional)
13601
 * @param options  a combination of xmlParserOption
13602
 * @returns the resulting document tree
13603
 */
13604
xmlDoc *
13605
xmlCtxtReadMemory(xmlParserCtxt *ctxt, const char *buffer, int size,
13606
                  const char *URL, const char *encoding, int options)
13607
0
{
13608
0
    xmlParserInputPtr input;
13609
13610
0
    if ((ctxt == NULL) || (size < 0))
13611
0
        return(NULL);
13612
13613
0
    xmlCtxtReset(ctxt);
13614
0
    xmlCtxtUseOptions(ctxt, options);
13615
13616
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
13617
0
                                      XML_INPUT_BUF_STATIC);
13618
0
    if (input == NULL)
13619
0
        return(NULL);
13620
13621
0
    return(xmlCtxtParseDocument(ctxt, input));
13622
0
}
13623
13624
/**
13625
 * Parse an XML document from a file descriptor and build a tree.
13626
 *
13627
 * NOTE that the file descriptor will not be closed when the
13628
 * context is freed or reset.
13629
 *
13630
 * `URL` is used as base to resolve external entities and for error
13631
 * reporting.
13632
 *
13633
 * @param ctxt  an XML parser context
13634
 * @param fd  an open file descriptor
13635
 * @param URL  base URL (optional)
13636
 * @param encoding  the document encoding (optional)
13637
 * @param options  a combination of xmlParserOption
13638
 * @returns the resulting document tree
13639
 */
13640
xmlDoc *
13641
xmlCtxtReadFd(xmlParserCtxt *ctxt, int fd,
13642
              const char *URL, const char *encoding, int options)
13643
0
{
13644
0
    xmlParserInputPtr input;
13645
13646
0
    if (ctxt == NULL)
13647
0
        return(NULL);
13648
13649
0
    xmlCtxtReset(ctxt);
13650
0
    xmlCtxtUseOptions(ctxt, options);
13651
13652
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13653
0
    if (input == NULL)
13654
0
        return(NULL);
13655
13656
0
    return(xmlCtxtParseDocument(ctxt, input));
13657
0
}
13658
13659
/**
13660
 * Parse an XML document from I/O functions and source and build a tree.
13661
 * This reuses the existing `ctxt` parser context
13662
 *
13663
 * `URL` is used as base to resolve external entities and for error
13664
 * reporting.
13665
 *
13666
 * @param ctxt  an XML parser context
13667
 * @param ioread  an I/O read function
13668
 * @param ioclose  an I/O close function
13669
 * @param ioctx  an I/O handler
13670
 * @param URL  the base URL to use for the document
13671
 * @param encoding  the document encoding, or NULL
13672
 * @param options  a combination of xmlParserOption
13673
 * @returns the resulting document tree
13674
 */
13675
xmlDoc *
13676
xmlCtxtReadIO(xmlParserCtxt *ctxt, xmlInputReadCallback ioread,
13677
              xmlInputCloseCallback ioclose, void *ioctx,
13678
        const char *URL,
13679
              const char *encoding, int options)
13680
0
{
13681
0
    xmlParserInputPtr input;
13682
13683
0
    if (ctxt == NULL)
13684
0
        return(NULL);
13685
13686
0
    xmlCtxtReset(ctxt);
13687
0
    xmlCtxtUseOptions(ctxt, options);
13688
13689
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13690
0
                                  encoding, 0);
13691
0
    if (input == NULL)
13692
0
        return(NULL);
13693
13694
0
    return(xmlCtxtParseDocument(ctxt, input));
13695
0
}
13696