Coverage Report

Created: 2025-07-18 06:56

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * Author: Daniel Veillard
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
#include "private/tree.h"
79
80
592k
#define NS_INDEX_EMPTY  INT_MAX
81
78.1k
#define NS_INDEX_XML    (INT_MAX - 1)
82
423k
#define URI_HASH_EMPTY  0xD943A04E
83
39.6k
#define URI_HASH_XML    0xF0451F02
84
85
#ifndef STDIN_FILENO
86
0
  #define STDIN_FILENO 0
87
#endif
88
89
#ifndef SIZE_MAX
90
  #define SIZE_MAX ((size_t) -1)
91
#endif
92
93
461k
#define XML_MAX_ATTRS 100000000 /* 100 million */
94
95
641k
#define XML_SPECIAL_EXTERNAL    (1 << 20)
96
473k
#define XML_SPECIAL_TYPE_MASK   (XML_SPECIAL_EXTERNAL - 1)
97
98
501k
#define XML_ATTVAL_ALLOC        (1 << 0)
99
1.28M
#define XML_ATTVAL_NORM_CHANGE  (1 << 1)
100
101
struct _xmlStartTag {
102
    const xmlChar *prefix;
103
    const xmlChar *URI;
104
    int line;
105
    int nsNr;
106
};
107
108
typedef struct {
109
    void *saxData;
110
    unsigned prefixHashValue;
111
    unsigned uriHashValue;
112
    unsigned elementId;
113
    int oldIndex;
114
} xmlParserNsExtra;
115
116
typedef struct {
117
    unsigned hashValue;
118
    int index;
119
} xmlParserNsBucket;
120
121
struct _xmlParserNsData {
122
    xmlParserNsExtra *extra;
123
124
    unsigned hashSize;
125
    unsigned hashElems;
126
    xmlParserNsBucket *hash;
127
128
    unsigned elementId;
129
    int defaultNsIndex;
130
    int minNsIndex;
131
};
132
133
static int
134
xmlParseElementStart(xmlParserCtxtPtr ctxt);
135
136
static void
137
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
138
139
static xmlEntityPtr
140
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
141
142
static const xmlChar *
143
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
144
145
/************************************************************************
146
 *                  *
147
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
148
 *                  *
149
 ************************************************************************/
150
151
#define XML_PARSER_BIG_ENTITY 1000
152
#define XML_PARSER_LOT_ENTITY 5000
153
154
/*
155
 * Constants for protection against abusive entity expansion
156
 * ("billion laughs").
157
 */
158
159
/*
160
 * A certain amount of entity expansion which is always allowed.
161
 */
162
3.65M
#define XML_PARSER_ALLOWED_EXPANSION 1000000
163
164
/*
165
 * Fixed cost for each entity reference. This crudely models processing time
166
 * as well to protect, for example, against exponential expansion of empty
167
 * or very short entities.
168
 */
169
3.67M
#define XML_ENT_FIXED_COST 20
170
171
86.8M
#define XML_PARSER_BIG_BUFFER_SIZE 300
172
1.09M
#define XML_PARSER_BUFFER_SIZE 100
173
104k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
174
175
/**
176
 * XML_PARSER_CHUNK_SIZE
177
 *
178
 * When calling GROW that's the minimal amount of data
179
 * the parser expected to have received. It is not a hard
180
 * limit but an optimization when reading strings like Names
181
 * It is not strictly needed as long as inputs available characters
182
 * are followed by 0, which should be provided by the I/O level
183
 */
184
#define XML_PARSER_CHUNK_SIZE 100
185
186
/**
187
 * Constant string describing the version of the library used at
188
 * run-time.
189
 */
190
const char *const
191
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
192
193
/*
194
 * List of XML prefixed PI allowed by W3C specs
195
 */
196
197
static const char* const xmlW3CPIs[] = {
198
    "xml-stylesheet",
199
    "xml-model",
200
    NULL
201
};
202
203
204
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206
                                              const xmlChar **str);
207
208
static void
209
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
210
211
static int
212
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
213
214
static void
215
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl);
216
217
/************************************************************************
218
 *                  *
219
 *    Some factorized error routines        *
220
 *                  *
221
 ************************************************************************/
222
223
static void
224
3.70k
xmlErrMemory(xmlParserCtxtPtr ctxt) {
225
3.70k
    xmlCtxtErrMemory(ctxt);
226
3.70k
}
227
228
/**
229
 * Handle a redefinition of attribute error
230
 *
231
 * @param ctxt  an XML parser context
232
 * @param prefix  the attribute prefix
233
 * @param localname  the attribute localname
234
 */
235
static void
236
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237
                   const xmlChar * localname)
238
22.5k
{
239
22.5k
    if (prefix == NULL)
240
17.2k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
17.2k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
242
17.2k
                   "Attribute %s redefined\n", localname);
243
5.29k
    else
244
5.29k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245
5.29k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
246
5.29k
                   "Attribute %s:%s redefined\n", prefix, localname);
247
22.5k
}
248
249
/**
250
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
251
 *
252
 * @param ctxt  an XML parser context
253
 * @param error  the error number
254
 * @param msg  the error message
255
 */
256
static void LIBXML_ATTR_FORMAT(3,0)
257
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
258
               const char *msg)
259
29.9M
{
260
29.9M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
261
29.9M
               NULL, NULL, NULL, 0, "%s", msg);
262
29.9M
}
263
264
/**
265
 * Handle a warning.
266
 *
267
 * @param ctxt  an XML parser context
268
 * @param error  the error number
269
 * @param msg  the error message
270
 * @param str1  extra data
271
 * @param str2  extra data
272
 */
273
void LIBXML_ATTR_FORMAT(3,0)
274
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
275
              const char *msg, const xmlChar *str1, const xmlChar *str2)
276
18.6k
{
277
18.6k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
278
18.6k
               str1, str2, NULL, 0, msg, str1, str2);
279
18.6k
}
280
281
#ifdef LIBXML_VALID_ENABLED
282
/**
283
 * Handle a validity error.
284
 *
285
 * @param ctxt  an XML parser context
286
 * @param error  the error number
287
 * @param msg  the error message
288
 * @param str1  extra data
289
 * @param str2  extra data
290
 */
291
static void LIBXML_ATTR_FORMAT(3,0)
292
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
293
              const char *msg, const xmlChar *str1, const xmlChar *str2)
294
44.4k
{
295
44.4k
    ctxt->valid = 0;
296
297
44.4k
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
298
44.4k
               str1, str2, NULL, 0, msg, str1, str2);
299
44.4k
}
300
#endif
301
302
/**
303
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
304
 *
305
 * @param ctxt  an XML parser context
306
 * @param error  the error number
307
 * @param msg  the error message
308
 * @param val  an integer value
309
 */
310
static void LIBXML_ATTR_FORMAT(3,0)
311
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
312
                  const char *msg, int val)
313
2.46M
{
314
2.46M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
315
2.46M
               NULL, NULL, NULL, val, msg, val);
316
2.46M
}
317
318
/**
319
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
320
 *
321
 * @param ctxt  an XML parser context
322
 * @param error  the error number
323
 * @param msg  the error message
324
 * @param str1  an string info
325
 * @param val  an integer value
326
 * @param str2  an string info
327
 */
328
static void LIBXML_ATTR_FORMAT(3,0)
329
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
330
                  const char *msg, const xmlChar *str1, int val,
331
      const xmlChar *str2)
332
343k
{
333
343k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
334
343k
               str1, str2, NULL, val, msg, str1, val, str2);
335
343k
}
336
337
/**
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 *
340
 * @param ctxt  an XML parser context
341
 * @param error  the error number
342
 * @param msg  the error message
343
 * @param val  a string value
344
 */
345
static void LIBXML_ATTR_FORMAT(3,0)
346
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
347
                  const char *msg, const xmlChar * val)
348
2.07M
{
349
2.07M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
350
2.07M
               val, NULL, NULL, 0, msg, val);
351
2.07M
}
352
353
/**
354
 * Handle a non fatal parser error
355
 *
356
 * @param ctxt  an XML parser context
357
 * @param error  the error number
358
 * @param msg  the error message
359
 * @param val  a string value
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
9.56k
{
365
9.56k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
9.56k
               val, NULL, NULL, 0, msg, val);
367
9.56k
}
368
369
/**
370
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
371
 *
372
 * @param ctxt  an XML parser context
373
 * @param error  the error number
374
 * @param msg  the message
375
 * @param info1  extra information string
376
 * @param info2  extra information string
377
 * @param info3  extra information string
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
251k
{
385
251k
    ctxt->nsWellFormed = 0;
386
387
251k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
251k
               info1, info2, info3, 0, msg, info1, info2, info3);
389
251k
}
390
391
/**
392
 * Handle a namespace warning error
393
 *
394
 * @param ctxt  an XML parser context
395
 * @param error  the error number
396
 * @param msg  the message
397
 * @param info1  extra information string
398
 * @param info2  extra information string
399
 * @param info3  extra information string
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
40.0k
{
407
40.0k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
40.0k
               info1, info2, info3, 0, msg, info1, info2, info3);
409
40.0k
}
410
411
static void
412
11.0M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
413
11.0M
    if (val > ULONG_MAX - *dst)
414
0
        *dst = ULONG_MAX;
415
11.0M
    else
416
11.0M
        *dst += val;
417
11.0M
}
418
419
static void
420
3.67M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
421
3.67M
    if (val > ULONG_MAX - *dst)
422
0
        *dst = ULONG_MAX;
423
3.67M
    else
424
3.67M
        *dst += val;
425
3.67M
}
426
427
/**
428
 * Check for non-linear entity expansion behaviour.
429
 *
430
 * In some cases like xmlExpandEntityInAttValue, this function is called
431
 * for each, possibly nested entity and its unexpanded content length.
432
 *
433
 * In other cases like #xmlParseReference, it's only called for each
434
 * top-level entity with its unexpanded content length plus the sum of
435
 * the unexpanded content lengths (plus fixed cost) of all nested
436
 * entities.
437
 *
438
 * Summing the unexpanded lengths also adds the length of the reference.
439
 * This is by design. Taking the length of the entity name into account
440
 * discourages attacks that try to waste CPU time with abusively long
441
 * entity names. See test/recurse/lol6.xml for example. Each call also
442
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
443
 * short entities.
444
 *
445
 * @param ctxt  parser context
446
 * @param extra  sum of unexpanded entity sizes
447
 * @returns 1 on error, 0 on success.
448
 */
449
static int
450
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
451
3.71M
{
452
3.71M
    unsigned long consumed;
453
3.71M
    unsigned long *expandedSize;
454
3.71M
    xmlParserInputPtr input = ctxt->input;
455
3.71M
    xmlEntityPtr entity = input->entity;
456
457
3.71M
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
458
55.2k
        return(0);
459
460
    /*
461
     * Compute total consumed bytes so far, including input streams of
462
     * external entities.
463
     */
464
3.65M
    consumed = input->consumed;
465
3.65M
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
466
3.65M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
467
468
3.65M
    if (entity)
469
109k
        expandedSize = &entity->expandedSize;
470
3.54M
    else
471
3.54M
        expandedSize = &ctxt->sizeentcopy;
472
473
    /*
474
     * Add extra cost and some fixed cost.
475
     */
476
3.65M
    xmlSaturatedAdd(expandedSize, extra);
477
3.65M
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
478
479
    /*
480
     * It's important to always use saturation arithmetic when tracking
481
     * entity sizes to make the size checks reliable. If "sizeentcopy"
482
     * overflows, we have to abort.
483
     */
484
3.65M
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
485
3.65M
        ((*expandedSize >= ULONG_MAX) ||
486
1.19M
         (*expandedSize / ctxt->maxAmpl > consumed))) {
487
1.35k
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
488
1.35k
                       "Maximum entity amplification factor exceeded, see "
489
1.35k
                       "xmlCtxtSetMaxAmplification.\n");
490
1.35k
        xmlHaltParser(ctxt);
491
1.35k
        return(1);
492
1.35k
    }
493
494
3.65M
    return(0);
495
3.65M
}
496
497
/************************************************************************
498
 *                  *
499
 *    Library wide options          *
500
 *                  *
501
 ************************************************************************/
502
503
/**
504
 * Examines if the library has been compiled with a given feature.
505
 *
506
 * @param feature  the feature to be examined
507
 * @returns zero (0) if the feature does not exist or an unknown
508
 * feature is requested, non-zero otherwise.
509
 */
510
int
511
xmlHasFeature(xmlFeature feature)
512
0
{
513
0
    switch (feature) {
514
0
  case XML_WITH_THREAD:
515
0
#ifdef LIBXML_THREAD_ENABLED
516
0
      return(1);
517
#else
518
      return(0);
519
#endif
520
0
        case XML_WITH_TREE:
521
0
            return(1);
522
0
        case XML_WITH_OUTPUT:
523
0
#ifdef LIBXML_OUTPUT_ENABLED
524
0
            return(1);
525
#else
526
            return(0);
527
#endif
528
0
        case XML_WITH_PUSH:
529
0
#ifdef LIBXML_PUSH_ENABLED
530
0
            return(1);
531
#else
532
            return(0);
533
#endif
534
0
        case XML_WITH_READER:
535
0
#ifdef LIBXML_READER_ENABLED
536
0
            return(1);
537
#else
538
            return(0);
539
#endif
540
0
        case XML_WITH_PATTERN:
541
0
#ifdef LIBXML_PATTERN_ENABLED
542
0
            return(1);
543
#else
544
            return(0);
545
#endif
546
0
        case XML_WITH_WRITER:
547
0
#ifdef LIBXML_WRITER_ENABLED
548
0
            return(1);
549
#else
550
            return(0);
551
#endif
552
0
        case XML_WITH_SAX1:
553
0
#ifdef LIBXML_SAX1_ENABLED
554
0
            return(1);
555
#else
556
            return(0);
557
#endif
558
0
        case XML_WITH_HTTP:
559
0
            return(0);
560
0
        case XML_WITH_VALID:
561
0
#ifdef LIBXML_VALID_ENABLED
562
0
            return(1);
563
#else
564
            return(0);
565
#endif
566
0
        case XML_WITH_HTML:
567
0
#ifdef LIBXML_HTML_ENABLED
568
0
            return(1);
569
#else
570
            return(0);
571
#endif
572
0
        case XML_WITH_LEGACY:
573
0
            return(0);
574
0
        case XML_WITH_C14N:
575
0
#ifdef LIBXML_C14N_ENABLED
576
0
            return(1);
577
#else
578
            return(0);
579
#endif
580
0
        case XML_WITH_CATALOG:
581
0
#ifdef LIBXML_CATALOG_ENABLED
582
0
            return(1);
583
#else
584
            return(0);
585
#endif
586
0
        case XML_WITH_XPATH:
587
0
#ifdef LIBXML_XPATH_ENABLED
588
0
            return(1);
589
#else
590
            return(0);
591
#endif
592
0
        case XML_WITH_XPTR:
593
0
#ifdef LIBXML_XPTR_ENABLED
594
0
            return(1);
595
#else
596
            return(0);
597
#endif
598
0
        case XML_WITH_XINCLUDE:
599
0
#ifdef LIBXML_XINCLUDE_ENABLED
600
0
            return(1);
601
#else
602
            return(0);
603
#endif
604
0
        case XML_WITH_ICONV:
605
0
#ifdef LIBXML_ICONV_ENABLED
606
0
            return(1);
607
#else
608
            return(0);
609
#endif
610
0
        case XML_WITH_ISO8859X:
611
0
#ifdef LIBXML_ISO8859X_ENABLED
612
0
            return(1);
613
#else
614
            return(0);
615
#endif
616
0
        case XML_WITH_UNICODE:
617
0
            return(0);
618
0
        case XML_WITH_REGEXP:
619
0
#ifdef LIBXML_REGEXP_ENABLED
620
0
            return(1);
621
#else
622
            return(0);
623
#endif
624
0
        case XML_WITH_AUTOMATA:
625
0
#ifdef LIBXML_REGEXP_ENABLED
626
0
            return(1);
627
#else
628
            return(0);
629
#endif
630
0
        case XML_WITH_EXPR:
631
0
            return(0);
632
0
        case XML_WITH_RELAXNG:
633
0
#ifdef LIBXML_RELAXNG_ENABLED
634
0
            return(1);
635
#else
636
            return(0);
637
#endif
638
0
        case XML_WITH_SCHEMAS:
639
0
#ifdef LIBXML_SCHEMAS_ENABLED
640
0
            return(1);
641
#else
642
            return(0);
643
#endif
644
0
        case XML_WITH_SCHEMATRON:
645
#ifdef LIBXML_SCHEMATRON_ENABLED
646
            return(1);
647
#else
648
0
            return(0);
649
0
#endif
650
0
        case XML_WITH_MODULES:
651
0
#ifdef LIBXML_MODULES_ENABLED
652
0
            return(1);
653
#else
654
            return(0);
655
#endif
656
0
        case XML_WITH_DEBUG:
657
#ifdef LIBXML_DEBUG_ENABLED
658
            return(1);
659
#else
660
0
            return(0);
661
0
#endif
662
0
        case XML_WITH_DEBUG_MEM:
663
0
            return(0);
664
0
        case XML_WITH_ZLIB:
665
0
#ifdef LIBXML_ZLIB_ENABLED
666
0
            return(1);
667
#else
668
            return(0);
669
#endif
670
0
        case XML_WITH_LZMA:
671
0
#ifdef LIBXML_LZMA_ENABLED
672
0
            return(1);
673
#else
674
            return(0);
675
#endif
676
0
        case XML_WITH_ICU:
677
#ifdef LIBXML_ICU_ENABLED
678
            return(1);
679
#else
680
0
            return(0);
681
0
#endif
682
0
        default:
683
0
      break;
684
0
     }
685
0
     return(0);
686
0
}
687
688
/************************************************************************
689
 *                  *
690
 *      Simple string buffer        *
691
 *                  *
692
 ************************************************************************/
693
694
typedef struct {
695
    xmlChar *mem;
696
    unsigned size;
697
    unsigned cap; /* size < cap */
698
    unsigned max; /* size <= max */
699
    xmlParserErrors code;
700
} xmlSBuf;
701
702
static void
703
566k
xmlSBufInit(xmlSBuf *buf, unsigned max) {
704
566k
    buf->mem = NULL;
705
566k
    buf->size = 0;
706
566k
    buf->cap = 0;
707
566k
    buf->max = max;
708
566k
    buf->code = XML_ERR_OK;
709
566k
}
710
711
static int
712
333k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
713
333k
    xmlChar *mem;
714
333k
    unsigned cap;
715
716
333k
    if (len >= UINT_MAX / 2 - buf->size) {
717
0
        if (buf->code == XML_ERR_OK)
718
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
719
0
        return(-1);
720
0
    }
721
722
333k
    cap = (buf->size + len) * 2;
723
333k
    if (cap < 240)
724
260k
        cap = 240;
725
726
333k
    mem = xmlRealloc(buf->mem, cap);
727
333k
    if (mem == NULL) {
728
578
        buf->code = XML_ERR_NO_MEMORY;
729
578
        return(-1);
730
578
    }
731
732
332k
    buf->mem = mem;
733
332k
    buf->cap = cap;
734
735
332k
    return(0);
736
333k
}
737
738
static void
739
335M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
740
335M
    if (buf->max - buf->size < len) {
741
0
        if (buf->code == XML_ERR_OK)
742
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
743
0
        return;
744
0
    }
745
746
335M
    if (buf->cap - buf->size <= len) {
747
324k
        if (xmlSBufGrow(buf, len) < 0)
748
527
            return;
749
324k
    }
750
751
335M
    if (len > 0)
752
335M
        memcpy(buf->mem + buf->size, str, len);
753
335M
    buf->size += len;
754
335M
}
755
756
static void
757
330M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
758
330M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
759
330M
}
760
761
static void
762
695k
xmlSBufAddChar(xmlSBuf *buf, int c) {
763
695k
    xmlChar *end;
764
765
695k
    if (buf->max - buf->size < 4) {
766
0
        if (buf->code == XML_ERR_OK)
767
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
768
0
        return;
769
0
    }
770
771
695k
    if (buf->cap - buf->size <= 4) {
772
9.07k
        if (xmlSBufGrow(buf, 4) < 0)
773
51
            return;
774
9.07k
    }
775
776
695k
    end = buf->mem + buf->size;
777
778
695k
    if (c < 0x80) {
779
682k
        *end = (xmlChar) c;
780
682k
        buf->size += 1;
781
682k
    } else {
782
12.9k
        buf->size += xmlCopyCharMultiByte(end, c);
783
12.9k
    }
784
695k
}
785
786
static void
787
275M
xmlSBufAddReplChar(xmlSBuf *buf) {
788
275M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
789
275M
}
790
791
static void
792
614
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
793
614
    if (buf->code == XML_ERR_NO_MEMORY)
794
614
        xmlCtxtErrMemory(ctxt);
795
0
    else
796
0
        xmlFatalErr(ctxt, buf->code, errMsg);
797
614
}
798
799
static xmlChar *
800
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
801
272k
              const char *errMsg) {
802
272k
    if (buf->mem == NULL) {
803
17.4k
        buf->mem = xmlMalloc(1);
804
17.4k
        if (buf->mem == NULL) {
805
36
            buf->code = XML_ERR_NO_MEMORY;
806
17.4k
        } else {
807
17.4k
            buf->mem[0] = 0;
808
17.4k
        }
809
254k
    } else {
810
254k
        buf->mem[buf->size] = 0;
811
254k
    }
812
813
272k
    if (buf->code == XML_ERR_OK) {
814
271k
        if (sizeOut != NULL)
815
60.8k
            *sizeOut = buf->size;
816
271k
        return(buf->mem);
817
271k
    }
818
819
431
    xmlSBufReportError(buf, ctxt, errMsg);
820
821
431
    xmlFree(buf->mem);
822
823
431
    if (sizeOut != NULL)
824
38
        *sizeOut = 0;
825
431
    return(NULL);
826
272k
}
827
828
static void
829
274k
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
830
274k
    if (buf->code != XML_ERR_OK)
831
183
        xmlSBufReportError(buf, ctxt, errMsg);
832
833
274k
    xmlFree(buf->mem);
834
274k
}
835
836
static int
837
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
838
390M
                    const char *errMsg) {
839
390M
    int c = str[0];
840
390M
    int c1 = str[1];
841
842
390M
    if ((c1 & 0xC0) != 0x80)
843
79.4M
        goto encoding_error;
844
845
311M
    if (c < 0xE0) {
846
        /* 2-byte sequence */
847
178M
        if (c < 0xC2)
848
173M
            goto encoding_error;
849
850
5.21M
        return(2);
851
178M
    } else {
852
133M
        int c2 = str[2];
853
854
133M
        if ((c2 & 0xC0) != 0x80)
855
7.46k
            goto encoding_error;
856
857
133M
        if (c < 0xF0) {
858
            /* 3-byte sequence */
859
133M
            if (c == 0xE0) {
860
                /* overlong */
861
6.73k
                if (c1 < 0xA0)
862
320
                    goto encoding_error;
863
132M
            } else if (c == 0xED) {
864
                /* surrogate */
865
9.64k
                if (c1 >= 0xA0)
866
249
                    goto encoding_error;
867
132M
            } else if (c == 0xEF) {
868
                /* U+FFFE and U+FFFF are invalid Chars */
869
131M
                if ((c1 == 0xBF) && (c2 >= 0xBE))
870
1.48k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
871
131M
            }
872
873
132M
            return(3);
874
133M
        } else {
875
            /* 4-byte sequence */
876
26.1k
            if ((str[3] & 0xC0) != 0x80)
877
3.84k
                goto encoding_error;
878
22.3k
            if (c == 0xF0) {
879
                /* overlong */
880
1.09k
                if (c1 < 0x90)
881
443
                    goto encoding_error;
882
21.2k
            } else if (c >= 0xF4) {
883
                /* greater than 0x10FFFF */
884
9.43k
                if ((c > 0xF4) || (c1 >= 0x90))
885
8.94k
                    goto encoding_error;
886
9.43k
            }
887
888
12.9k
            return(4);
889
22.3k
        }
890
133M
    }
891
892
252M
encoding_error:
893
    /* Only report the first error */
894
252M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
895
31.5k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
896
31.5k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
897
31.5k
    }
898
899
252M
    return(0);
900
311M
}
901
902
/************************************************************************
903
 *                  *
904
 *    SAX2 defaulted attributes handling      *
905
 *                  *
906
 ************************************************************************/
907
908
/**
909
 * Final initialization of the parser context before starting to parse.
910
 *
911
 * This accounts for users modifying struct members of parser context
912
 * directly.
913
 *
914
 * @param ctxt  an XML parser context
915
 */
916
static void
917
80.5k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
918
80.5k
    xmlSAXHandlerPtr sax;
919
920
    /* Avoid unused variable warning if features are disabled. */
921
80.5k
    (void) sax;
922
923
    /*
924
     * Changing the SAX struct directly is still widespread practice
925
     * in internal and external code.
926
     */
927
80.5k
    if (ctxt == NULL) return;
928
80.5k
    sax = ctxt->sax;
929
80.5k
#ifdef LIBXML_SAX1_ENABLED
930
    /*
931
     * Only enable SAX2 if there SAX2 element handlers, except when there
932
     * are no element handlers at all.
933
     */
934
80.5k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
935
80.5k
        (sax) &&
936
80.5k
        (sax->initialized == XML_SAX2_MAGIC) &&
937
80.5k
        ((sax->startElementNs != NULL) ||
938
49.3k
         (sax->endElementNs != NULL) ||
939
49.3k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
940
49.3k
        ctxt->sax2 = 1;
941
#else
942
    ctxt->sax2 = 1;
943
#endif /* LIBXML_SAX1_ENABLED */
944
945
    /*
946
     * Some users replace the dictionary directly in the context struct.
947
     * We really need an API function to do that cleanly.
948
     */
949
80.5k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
950
80.5k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
951
80.5k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
952
80.5k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
953
80.5k
    (ctxt->str_xml_ns == NULL)) {
954
669
        xmlErrMemory(ctxt);
955
669
    }
956
957
80.5k
    xmlDictSetLimit(ctxt->dict,
958
80.5k
                    (ctxt->options & XML_PARSE_HUGE) ?
959
34.5k
                        0 :
960
80.5k
                        XML_MAX_DICTIONARY_LIMIT);
961
962
80.5k
#ifdef LIBXML_VALID_ENABLED
963
80.5k
    if (ctxt->validate)
964
53.9k
        ctxt->vctxt.flags |= XML_VCTXT_VALIDATE;
965
26.6k
    else
966
26.6k
        ctxt->vctxt.flags &= ~XML_VCTXT_VALIDATE;
967
80.5k
#endif /* LIBXML_VALID_ENABLED */
968
80.5k
}
969
970
typedef struct {
971
    xmlHashedString prefix;
972
    xmlHashedString name;
973
    xmlHashedString value;
974
    const xmlChar *valueEnd;
975
    int external;
976
    int expandedSize;
977
} xmlDefAttr;
978
979
typedef struct _xmlDefAttrs xmlDefAttrs;
980
typedef xmlDefAttrs *xmlDefAttrsPtr;
981
struct _xmlDefAttrs {
982
    int nbAttrs;  /* number of defaulted attributes on that element */
983
    int maxAttrs;       /* the size of the array */
984
#if __STDC_VERSION__ >= 199901L
985
    /* Using a C99 flexible array member avoids UBSan errors. */
986
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
987
#else
988
    xmlDefAttr attrs[1];
989
#endif
990
};
991
992
/**
993
 * Normalize the space in non CDATA attribute values:
994
 * If the attribute type is not CDATA, then the XML processor MUST further
995
 * process the normalized attribute value by discarding any leading and
996
 * trailing space (\#x20) characters, and by replacing sequences of space
997
 * (\#x20) characters by a single space (\#x20) character.
998
 * Note that the size of dst need to be at least src, and if one doesn't need
999
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1000
 * passing src as dst is just fine.
1001
 *
1002
 * @param src  the source string
1003
 * @param dst  the target string
1004
 * @returns a pointer to the normalized value (dst) or NULL if no conversion
1005
 *         is needed.
1006
 */
1007
static xmlChar *
1008
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1009
29.8k
{
1010
29.8k
    if ((src == NULL) || (dst == NULL))
1011
0
        return(NULL);
1012
1013
30.6k
    while (*src == 0x20) src++;
1014
295M
    while (*src != 0) {
1015
295M
  if (*src == 0x20) {
1016
12.2M
      while (*src == 0x20) src++;
1017
73.1k
      if (*src != 0)
1018
72.3k
    *dst++ = 0x20;
1019
295M
  } else {
1020
295M
      *dst++ = *src++;
1021
295M
  }
1022
295M
    }
1023
29.8k
    *dst = 0;
1024
29.8k
    if (dst == src)
1025
28.2k
       return(NULL);
1026
1.51k
    return(dst);
1027
29.8k
}
1028
1029
/**
1030
 * Add a defaulted attribute for an element
1031
 *
1032
 * @param ctxt  an XML parser context
1033
 * @param fullname  the element fullname
1034
 * @param fullattr  the attribute fullname
1035
 * @param value  the attribute value
1036
 */
1037
static void
1038
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1039
               const xmlChar *fullname,
1040
               const xmlChar *fullattr,
1041
26.6k
               const xmlChar *value) {
1042
26.6k
    xmlDefAttrsPtr defaults;
1043
26.6k
    xmlDefAttr *attr;
1044
26.6k
    int len, expandedSize;
1045
26.6k
    xmlHashedString name;
1046
26.6k
    xmlHashedString prefix;
1047
26.6k
    xmlHashedString hvalue;
1048
26.6k
    const xmlChar *localname;
1049
1050
    /*
1051
     * Allows to detect attribute redefinitions
1052
     */
1053
26.6k
    if (ctxt->attsSpecial != NULL) {
1054
20.5k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1055
3.45k
      return;
1056
20.5k
    }
1057
1058
23.1k
    if (ctxt->attsDefault == NULL) {
1059
6.32k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1060
6.32k
  if (ctxt->attsDefault == NULL)
1061
27
      goto mem_error;
1062
6.32k
    }
1063
1064
    /*
1065
     * split the element name into prefix:localname , the string found
1066
     * are within the DTD and then not associated to namespace names.
1067
     */
1068
23.1k
    localname = xmlSplitQName3(fullname, &len);
1069
23.1k
    if (localname == NULL) {
1070
22.4k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1071
22.4k
  prefix.name = NULL;
1072
22.4k
    } else {
1073
658
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1074
658
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1075
658
        if (prefix.name == NULL)
1076
3
            goto mem_error;
1077
658
    }
1078
23.1k
    if (name.name == NULL)
1079
6
        goto mem_error;
1080
1081
    /*
1082
     * make sure there is some storage
1083
     */
1084
23.1k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1085
23.1k
    if ((defaults == NULL) ||
1086
23.1k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1087
8.04k
        xmlDefAttrsPtr temp;
1088
8.04k
        int newSize;
1089
1090
8.04k
        if (defaults == NULL) {
1091
7.00k
            newSize = 4;
1092
7.00k
        } else {
1093
1.04k
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1094
1.04k
                ((size_t) defaults->maxAttrs >
1095
1.04k
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1096
0
                goto mem_error;
1097
1098
1.04k
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1099
0
                newSize = XML_MAX_ATTRS;
1100
1.04k
            else
1101
1.04k
                newSize = defaults->maxAttrs * 2;
1102
1.04k
        }
1103
8.04k
        temp = xmlRealloc(defaults,
1104
8.04k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1105
8.04k
  if (temp == NULL)
1106
25
      goto mem_error;
1107
8.02k
        if (defaults == NULL)
1108
6.98k
            temp->nbAttrs = 0;
1109
8.02k
  temp->maxAttrs = newSize;
1110
8.02k
        defaults = temp;
1111
8.02k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1112
8.02k
                          defaults, NULL) < 0) {
1113
0
      xmlFree(defaults);
1114
0
      goto mem_error;
1115
0
  }
1116
8.02k
    }
1117
1118
    /*
1119
     * Split the attribute name into prefix:localname , the string found
1120
     * are within the DTD and hen not associated to namespace names.
1121
     */
1122
23.1k
    localname = xmlSplitQName3(fullattr, &len);
1123
23.1k
    if (localname == NULL) {
1124
19.9k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1125
19.9k
  prefix.name = NULL;
1126
19.9k
    } else {
1127
3.13k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1128
3.13k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1129
3.13k
        if (prefix.name == NULL)
1130
8
            goto mem_error;
1131
3.13k
    }
1132
23.1k
    if (name.name == NULL)
1133
6
        goto mem_error;
1134
1135
    /* intern the string and precompute the end */
1136
23.1k
    len = strlen((const char *) value);
1137
23.1k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1138
23.1k
    if (hvalue.name == NULL)
1139
11
        goto mem_error;
1140
1141
23.0k
    expandedSize = strlen((const char *) name.name);
1142
23.0k
    if (prefix.name != NULL)
1143
3.11k
        expandedSize += strlen((const char *) prefix.name);
1144
23.0k
    expandedSize += len;
1145
1146
23.0k
    attr = &defaults->attrs[defaults->nbAttrs++];
1147
23.0k
    attr->name = name;
1148
23.0k
    attr->prefix = prefix;
1149
23.0k
    attr->value = hvalue;
1150
23.0k
    attr->valueEnd = hvalue.name + len;
1151
23.0k
    attr->external = PARSER_EXTERNAL(ctxt);
1152
23.0k
    attr->expandedSize = expandedSize;
1153
1154
23.0k
    return;
1155
1156
86
mem_error:
1157
86
    xmlErrMemory(ctxt);
1158
86
}
1159
1160
/**
1161
 * Register this attribute type
1162
 *
1163
 * @param ctxt  an XML parser context
1164
 * @param fullname  the element fullname
1165
 * @param fullattr  the attribute fullname
1166
 * @param type  the attribute type
1167
 */
1168
static void
1169
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1170
      const xmlChar *fullname,
1171
      const xmlChar *fullattr,
1172
      int type)
1173
47.8k
{
1174
47.8k
    if (ctxt->attsSpecial == NULL) {
1175
8.46k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1176
8.46k
  if (ctxt->attsSpecial == NULL)
1177
48
      goto mem_error;
1178
8.46k
    }
1179
1180
47.7k
    if (PARSER_EXTERNAL(ctxt))
1181
19.1k
        type |= XML_SPECIAL_EXTERNAL;
1182
1183
47.7k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1184
47.7k
                    XML_INT_TO_PTR(type)) < 0)
1185
6
        goto mem_error;
1186
47.7k
    return;
1187
1188
47.7k
mem_error:
1189
54
    xmlErrMemory(ctxt);
1190
54
}
1191
1192
/**
1193
 * Removes CDATA attributes from the special attribute table
1194
 */
1195
static void
1196
xmlCleanSpecialAttrCallback(void *payload, void *data,
1197
                            const xmlChar *fullname, const xmlChar *fullattr,
1198
39.8k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1199
39.8k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1200
1201
39.8k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1202
2.79k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1203
2.79k
    }
1204
39.8k
}
1205
1206
/**
1207
 * Trim the list of attributes defined to remove all those of type
1208
 * CDATA as they are not special. This call should be done when finishing
1209
 * to parse the DTD and before starting to parse the document root.
1210
 *
1211
 * @param ctxt  an XML parser context
1212
 */
1213
static void
1214
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1215
44.1k
{
1216
44.1k
    if (ctxt->attsSpecial == NULL)
1217
35.7k
        return;
1218
1219
8.36k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1220
1221
8.36k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1222
461
        xmlHashFree(ctxt->attsSpecial, NULL);
1223
461
        ctxt->attsSpecial = NULL;
1224
461
    }
1225
8.36k
}
1226
1227
/**
1228
 * Checks that the value conforms to the LanguageID production:
1229
 *
1230
 * @deprecated Internal function, do not use.
1231
 *
1232
 * NOTE: this is somewhat deprecated, those productions were removed from
1233
 * the XML Second edition.
1234
 *
1235
 *     [33] LanguageID ::= Langcode ('-' Subcode)*
1236
 *     [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1237
 *     [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1238
 *     [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1239
 *     [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1240
 *     [38] Subcode ::= ([a-z] | [A-Z])+
1241
 *
1242
 * The current REC reference the successors of RFC 1766, currently 5646
1243
 *
1244
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1245
 *
1246
 *     langtag       = language
1247
 *                     ["-" script]
1248
 *                     ["-" region]
1249
 *                     *("-" variant)
1250
 *                     *("-" extension)
1251
 *                     ["-" privateuse]
1252
 *     language      = 2*3ALPHA            ; shortest ISO 639 code
1253
 *                     ["-" extlang]       ; sometimes followed by
1254
 *                                         ; extended language subtags
1255
 *                   / 4ALPHA              ; or reserved for future use
1256
 *                   / 5*8ALPHA            ; or registered language subtag
1257
 *
1258
 *     extlang       = 3ALPHA              ; selected ISO 639 codes
1259
 *                     *2("-" 3ALPHA)      ; permanently reserved
1260
 *
1261
 *     script        = 4ALPHA              ; ISO 15924 code
1262
 *
1263
 *     region        = 2ALPHA              ; ISO 3166-1 code
1264
 *                   / 3DIGIT              ; UN M.49 code
1265
 *
1266
 *     variant       = 5*8alphanum         ; registered variants
1267
 *                   / (DIGIT 3alphanum)
1268
 *
1269
 *     extension     = singleton 1*("-" (2*8alphanum))
1270
 *
1271
 *                                         ; Single alphanumerics
1272
 *                                         ; "x" reserved for private use
1273
 *     singleton     = DIGIT               ; 0 - 9
1274
 *                   / %x41-57             ; A - W
1275
 *                   / %x59-5A             ; Y - Z
1276
 *                   / %x61-77             ; a - w
1277
 *                   / %x79-7A             ; y - z
1278
 *
1279
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1280
 * The parser below doesn't try to cope with extension or privateuse
1281
 * that could be added but that's not interoperable anyway
1282
 *
1283
 * @param lang  pointer to the string value
1284
 * @returns 1 if correct 0 otherwise
1285
 **/
1286
int
1287
xmlCheckLanguageID(const xmlChar * lang)
1288
13.7k
{
1289
13.7k
    const xmlChar *cur = lang, *nxt;
1290
1291
13.7k
    if (cur == NULL)
1292
364
        return (0);
1293
13.4k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1294
13.4k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1295
13.4k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1296
13.4k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1297
        /*
1298
         * Still allow IANA code and user code which were coming
1299
         * from the previous version of the XML-1.0 specification
1300
         * it's deprecated but we should not fail
1301
         */
1302
964
        cur += 2;
1303
2.51k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1304
2.51k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1305
1.54k
            cur++;
1306
964
        return(cur[0] == 0);
1307
964
    }
1308
12.4k
    nxt = cur;
1309
45.5k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1310
45.5k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1311
33.1k
           nxt++;
1312
12.4k
    if (nxt - cur >= 4) {
1313
        /*
1314
         * Reserved
1315
         */
1316
947
        if ((nxt - cur > 8) || (nxt[0] != 0))
1317
619
            return(0);
1318
328
        return(1);
1319
947
    }
1320
11.4k
    if (nxt - cur < 2)
1321
526
        return(0);
1322
    /* we got an ISO 639 code */
1323
10.9k
    if (nxt[0] == 0)
1324
416
        return(1);
1325
10.5k
    if (nxt[0] != '-')
1326
432
        return(0);
1327
1328
10.1k
    nxt++;
1329
10.1k
    cur = nxt;
1330
    /* now we can have extlang or script or region or variant */
1331
10.1k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1332
780
        goto region_m49;
1333
1334
42.3k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1335
42.3k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1336
33.0k
           nxt++;
1337
9.34k
    if (nxt - cur == 4)
1338
3.25k
        goto script;
1339
6.08k
    if (nxt - cur == 2)
1340
1.42k
        goto region;
1341
4.65k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1342
422
        goto variant;
1343
4.23k
    if (nxt - cur != 3)
1344
907
        return(0);
1345
    /* we parsed an extlang */
1346
3.32k
    if (nxt[0] == 0)
1347
207
        return(1);
1348
3.12k
    if (nxt[0] != '-')
1349
424
        return(0);
1350
1351
2.69k
    nxt++;
1352
2.69k
    cur = nxt;
1353
    /* now we can have script or region or variant */
1354
2.69k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1355
222
        goto region_m49;
1356
1357
11.2k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1358
11.2k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1359
8.76k
           nxt++;
1360
2.47k
    if (nxt - cur == 2)
1361
373
        goto region;
1362
2.10k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1363
246
        goto variant;
1364
1.85k
    if (nxt - cur != 4)
1365
931
        return(0);
1366
    /* we parsed a script */
1367
4.17k
script:
1368
4.17k
    if (nxt[0] == 0)
1369
258
        return(1);
1370
3.92k
    if (nxt[0] != '-')
1371
233
        return(0);
1372
1373
3.68k
    nxt++;
1374
3.68k
    cur = nxt;
1375
    /* now we can have region or variant */
1376
3.68k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1377
352
        goto region_m49;
1378
1379
27.4k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1380
27.4k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1381
24.1k
           nxt++;
1382
1383
3.33k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1384
896
        goto variant;
1385
2.44k
    if (nxt - cur != 2)
1386
2.19k
        return(0);
1387
    /* we parsed a region */
1388
2.43k
region:
1389
2.43k
    if (nxt[0] == 0)
1390
330
        return(1);
1391
2.10k
    if (nxt[0] != '-')
1392
773
        return(0);
1393
1394
1.33k
    nxt++;
1395
1.33k
    cur = nxt;
1396
    /* now we can just have a variant */
1397
7.16k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1398
7.16k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1399
5.83k
           nxt++;
1400
1401
1.33k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1402
895
        return(0);
1403
1404
    /* we parsed a variant */
1405
2.00k
variant:
1406
2.00k
    if (nxt[0] == 0)
1407
356
        return(1);
1408
1.64k
    if (nxt[0] != '-')
1409
1.34k
        return(0);
1410
    /* extensions and private use subtags not checked */
1411
296
    return (1);
1412
1413
1.35k
region_m49:
1414
1.35k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1415
1.35k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1416
389
        nxt += 3;
1417
389
        goto region;
1418
389
    }
1419
965
    return(0);
1420
1.35k
}
1421
1422
/************************************************************************
1423
 *                  *
1424
 *    Parser stacks related functions and macros    *
1425
 *                  *
1426
 ************************************************************************/
1427
1428
static xmlChar *
1429
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1430
1431
/**
1432
 * Create a new namespace database.
1433
 *
1434
 * @returns the new obejct.
1435
 */
1436
xmlParserNsData *
1437
75.9k
xmlParserNsCreate(void) {
1438
75.9k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1439
1440
75.9k
    if (nsdb == NULL)
1441
6
        return(NULL);
1442
75.9k
    memset(nsdb, 0, sizeof(*nsdb));
1443
75.9k
    nsdb->defaultNsIndex = INT_MAX;
1444
1445
75.9k
    return(nsdb);
1446
75.9k
}
1447
1448
/**
1449
 * Free a namespace database.
1450
 *
1451
 * @param nsdb  namespace database
1452
 */
1453
void
1454
75.9k
xmlParserNsFree(xmlParserNsData *nsdb) {
1455
75.9k
    if (nsdb == NULL)
1456
0
        return;
1457
1458
75.9k
    xmlFree(nsdb->extra);
1459
75.9k
    xmlFree(nsdb->hash);
1460
75.9k
    xmlFree(nsdb);
1461
75.9k
}
1462
1463
/**
1464
 * Reset a namespace database.
1465
 *
1466
 * @param nsdb  namespace database
1467
 */
1468
static void
1469
64.4k
xmlParserNsReset(xmlParserNsData *nsdb) {
1470
64.4k
    if (nsdb == NULL)
1471
0
        return;
1472
1473
64.4k
    nsdb->hashElems = 0;
1474
64.4k
    nsdb->elementId = 0;
1475
64.4k
    nsdb->defaultNsIndex = INT_MAX;
1476
1477
64.4k
    if (nsdb->hash)
1478
794
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1479
64.4k
}
1480
1481
/**
1482
 * Signal that a new element has started.
1483
 *
1484
 * @param nsdb  namespace database
1485
 * @returns 0 on success, -1 if the element counter overflowed.
1486
 */
1487
static int
1488
6.29M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1489
6.29M
    if (nsdb->elementId == UINT_MAX)
1490
0
        return(-1);
1491
6.29M
    nsdb->elementId++;
1492
1493
6.29M
    return(0);
1494
6.29M
}
1495
1496
/**
1497
 * Lookup namespace with given prefix. If `bucketPtr` is non-NULL, it will
1498
 * be set to the matching bucket, or the first empty bucket if no match
1499
 * was found.
1500
 *
1501
 * @param ctxt  parser context
1502
 * @param prefix  namespace prefix
1503
 * @param bucketPtr  optional bucket (return value)
1504
 * @returns the namespace index on success, INT_MAX if no namespace was
1505
 * found.
1506
 */
1507
static int
1508
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1509
1.67M
                  xmlParserNsBucket **bucketPtr) {
1510
1.67M
    xmlParserNsBucket *bucket, *tombstone;
1511
1.67M
    unsigned index, hashValue;
1512
1513
1.67M
    if (prefix->name == NULL)
1514
836k
        return(ctxt->nsdb->defaultNsIndex);
1515
1516
842k
    if (ctxt->nsdb->hashSize == 0)
1517
37.7k
        return(INT_MAX);
1518
1519
805k
    hashValue = prefix->hashValue;
1520
805k
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1521
805k
    bucket = &ctxt->nsdb->hash[index];
1522
805k
    tombstone = NULL;
1523
1524
1.01M
    while (bucket->hashValue) {
1525
866k
        if (bucket->index == INT_MAX) {
1526
108k
            if (tombstone == NULL)
1527
89.0k
                tombstone = bucket;
1528
758k
        } else if (bucket->hashValue == hashValue) {
1529
659k
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1530
659k
                if (bucketPtr != NULL)
1531
416k
                    *bucketPtr = bucket;
1532
659k
                return(bucket->index);
1533
659k
            }
1534
659k
        }
1535
1536
207k
        index++;
1537
207k
        bucket++;
1538
207k
        if (index == ctxt->nsdb->hashSize) {
1539
30.7k
            index = 0;
1540
30.7k
            bucket = ctxt->nsdb->hash;
1541
30.7k
        }
1542
207k
    }
1543
1544
146k
    if (bucketPtr != NULL)
1545
66.4k
        *bucketPtr = tombstone ? tombstone : bucket;
1546
146k
    return(INT_MAX);
1547
805k
}
1548
1549
/**
1550
 * Lookup namespace URI with given prefix.
1551
 *
1552
 * @param ctxt  parser context
1553
 * @param prefix  namespace prefix
1554
 * @returns the namespace URI on success, NULL if no namespace was found.
1555
 */
1556
static const xmlChar *
1557
777k
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1558
777k
    const xmlChar *ret;
1559
777k
    int nsIndex;
1560
1561
777k
    if (prefix->name == ctxt->str_xml)
1562
1.58k
        return(ctxt->str_xml_ns);
1563
1564
    /*
1565
     * minNsIndex is used when building an entity tree. We must
1566
     * ignore namespaces declared outside the entity.
1567
     */
1568
776k
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1569
776k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1570
528k
        return(NULL);
1571
1572
248k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1573
248k
    if (ret[0] == 0)
1574
2.39k
        ret = NULL;
1575
248k
    return(ret);
1576
776k
}
1577
1578
/**
1579
 * Lookup extra data for the given prefix. This returns data stored
1580
 * with xmlParserNsUdpateSax.
1581
 *
1582
 * @param ctxt  parser context
1583
 * @param prefix  namespace prefix
1584
 * @returns the data on success, NULL if no namespace was found.
1585
 */
1586
void *
1587
121k
xmlParserNsLookupSax(xmlParserCtxt *ctxt, const xmlChar *prefix) {
1588
121k
    xmlHashedString hprefix;
1589
121k
    int nsIndex;
1590
1591
121k
    if (prefix == ctxt->str_xml)
1592
15.5k
        return(NULL);
1593
1594
105k
    hprefix.name = prefix;
1595
105k
    if (prefix != NULL)
1596
20.4k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1597
85.2k
    else
1598
85.2k
        hprefix.hashValue = 0;
1599
105k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1600
105k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1601
0
        return(NULL);
1602
1603
105k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1604
105k
}
1605
1606
/**
1607
 * Sets or updates extra data for the given prefix. This value will be
1608
 * returned by xmlParserNsLookupSax as long as the namespace with the
1609
 * given prefix is in scope.
1610
 *
1611
 * @param ctxt  parser context
1612
 * @param prefix  namespace prefix
1613
 * @param saxData  extra data for SAX handler
1614
 * @returns the data on success, NULL if no namespace was found.
1615
 */
1616
int
1617
xmlParserNsUpdateSax(xmlParserCtxt *ctxt, const xmlChar *prefix,
1618
242k
                     void *saxData) {
1619
242k
    xmlHashedString hprefix;
1620
242k
    int nsIndex;
1621
1622
242k
    if (prefix == ctxt->str_xml)
1623
0
        return(-1);
1624
1625
242k
    hprefix.name = prefix;
1626
242k
    if (prefix != NULL)
1627
168k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1628
73.6k
    else
1629
73.6k
        hprefix.hashValue = 0;
1630
242k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1631
242k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1632
0
        return(-1);
1633
1634
242k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1635
242k
    return(0);
1636
242k
}
1637
1638
/**
1639
 * Grows the namespace tables.
1640
 *
1641
 * @param ctxt  parser context
1642
 * @returns 0 on success, -1 if a memory allocation failed.
1643
 */
1644
static int
1645
17.4k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1646
17.4k
    const xmlChar **table;
1647
17.4k
    xmlParserNsExtra *extra;
1648
17.4k
    int newSize;
1649
1650
17.4k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1651
17.4k
                              sizeof(table[0]) + sizeof(extra[0]),
1652
17.4k
                              16, XML_MAX_ITEMS);
1653
17.4k
    if (newSize < 0)
1654
0
        goto error;
1655
1656
17.4k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1657
17.4k
    if (table == NULL)
1658
39
        goto error;
1659
17.3k
    ctxt->nsTab = table;
1660
1661
17.3k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1662
17.3k
    if (extra == NULL)
1663
45
        goto error;
1664
17.3k
    ctxt->nsdb->extra = extra;
1665
1666
17.3k
    ctxt->nsMax = newSize;
1667
17.3k
    return(0);
1668
1669
84
error:
1670
84
    xmlErrMemory(ctxt);
1671
84
    return(-1);
1672
17.3k
}
1673
1674
/**
1675
 * Push a new namespace on the table.
1676
 *
1677
 * @param ctxt  parser context
1678
 * @param prefix  prefix with hash value
1679
 * @param uri  uri with hash value
1680
 * @param saxData  extra data for SAX handler
1681
 * @param defAttr  whether the namespace comes from a default attribute
1682
 * @returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1683
 * -1 if a memory allocation failed.
1684
 */
1685
static int
1686
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1687
347k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1688
347k
    xmlParserNsBucket *bucket = NULL;
1689
347k
    xmlParserNsExtra *extra;
1690
347k
    const xmlChar **ns;
1691
347k
    unsigned hashValue, nsIndex, oldIndex;
1692
1693
347k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1694
201
        return(0);
1695
1696
347k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1697
84
        xmlErrMemory(ctxt);
1698
84
        return(-1);
1699
84
    }
1700
1701
    /*
1702
     * Default namespace and 'xml' namespace
1703
     */
1704
347k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1705
96.2k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1706
1707
96.2k
        if (oldIndex != INT_MAX) {
1708
91.7k
            extra = &ctxt->nsdb->extra[oldIndex];
1709
1710
91.7k
            if (extra->elementId == ctxt->nsdb->elementId) {
1711
531
                if (defAttr == 0)
1712
303
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1713
531
                return(0);
1714
531
            }
1715
1716
91.1k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1717
91.1k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1718
5.50k
                return(0);
1719
91.1k
        }
1720
1721
90.1k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1722
90.1k
        goto populate_entry;
1723
96.2k
    }
1724
1725
    /*
1726
     * Hash table lookup
1727
     */
1728
251k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1729
251k
    if (oldIndex != INT_MAX) {
1730
181k
        extra = &ctxt->nsdb->extra[oldIndex];
1731
1732
        /*
1733
         * Check for duplicate definitions on the same element.
1734
         */
1735
181k
        if (extra->elementId == ctxt->nsdb->elementId) {
1736
673
            if (defAttr == 0)
1737
572
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1738
673
            return(0);
1739
673
        }
1740
1741
180k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1742
180k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1743
6.39k
            return(0);
1744
1745
174k
        bucket->index = ctxt->nsNr;
1746
174k
        goto populate_entry;
1747
180k
    }
1748
1749
    /*
1750
     * Insert new bucket
1751
     */
1752
1753
70.3k
    hashValue = prefix->hashValue;
1754
1755
    /*
1756
     * Grow hash table, 50% fill factor
1757
     */
1758
70.3k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1759
5.13k
        xmlParserNsBucket *newHash;
1760
5.13k
        unsigned newSize, i, index;
1761
1762
5.13k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1763
0
            xmlErrMemory(ctxt);
1764
0
            return(-1);
1765
0
        }
1766
5.13k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1767
5.13k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1768
5.13k
        if (newHash == NULL) {
1769
12
            xmlErrMemory(ctxt);
1770
12
            return(-1);
1771
12
        }
1772
5.12k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1773
1774
223k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1775
218k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1776
218k
            unsigned newIndex;
1777
1778
218k
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1779
216k
                continue;
1780
2.38k
            newIndex = hv & (newSize - 1);
1781
1782
3.75k
            while (newHash[newIndex].hashValue != 0) {
1783
1.36k
                newIndex++;
1784
1.36k
                if (newIndex == newSize)
1785
485
                    newIndex = 0;
1786
1.36k
            }
1787
1788
2.38k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1789
2.38k
        }
1790
1791
5.12k
        xmlFree(ctxt->nsdb->hash);
1792
5.12k
        ctxt->nsdb->hash = newHash;
1793
5.12k
        ctxt->nsdb->hashSize = newSize;
1794
1795
        /*
1796
         * Relookup
1797
         */
1798
5.12k
        index = hashValue & (newSize - 1);
1799
1800
5.62k
        while (newHash[index].hashValue != 0) {
1801
508
            index++;
1802
508
            if (index == newSize)
1803
170
                index = 0;
1804
508
        }
1805
1806
5.12k
        bucket = &newHash[index];
1807
5.12k
    }
1808
1809
70.3k
    bucket->hashValue = hashValue;
1810
70.3k
    bucket->index = ctxt->nsNr;
1811
70.3k
    ctxt->nsdb->hashElems++;
1812
70.3k
    oldIndex = INT_MAX;
1813
1814
334k
populate_entry:
1815
334k
    nsIndex = ctxt->nsNr;
1816
1817
334k
    ns = &ctxt->nsTab[nsIndex * 2];
1818
334k
    ns[0] = prefix ? prefix->name : NULL;
1819
334k
    ns[1] = uri->name;
1820
1821
334k
    extra = &ctxt->nsdb->extra[nsIndex];
1822
334k
    extra->saxData = saxData;
1823
334k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1824
334k
    extra->uriHashValue = uri->hashValue;
1825
334k
    extra->elementId = ctxt->nsdb->elementId;
1826
334k
    extra->oldIndex = oldIndex;
1827
1828
334k
    ctxt->nsNr++;
1829
1830
334k
    return(1);
1831
70.3k
}
1832
1833
/**
1834
 * Pops the top `nr` namespaces and restores the hash table.
1835
 *
1836
 * @param ctxt  an XML parser context
1837
 * @param nr  the number to pop
1838
 * @returns the number of namespaces popped.
1839
 */
1840
static int
1841
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1842
226k
{
1843
226k
    int i;
1844
1845
    /* assert(nr <= ctxt->nsNr); */
1846
1847
541k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1848
314k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1849
314k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1850
1851
314k
        if (prefix == NULL) {
1852
79.4k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1853
235k
        } else {
1854
235k
            xmlHashedString hprefix;
1855
235k
            xmlParserNsBucket *bucket = NULL;
1856
1857
235k
            hprefix.name = prefix;
1858
235k
            hprefix.hashValue = extra->prefixHashValue;
1859
235k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1860
            /* assert(bucket && bucket->hashValue); */
1861
235k
            bucket->index = extra->oldIndex;
1862
235k
        }
1863
314k
    }
1864
1865
226k
    ctxt->nsNr -= nr;
1866
226k
    return(nr);
1867
226k
}
1868
1869
static int
1870
13.8k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1871
13.8k
    const xmlChar **atts;
1872
13.8k
    unsigned *attallocs;
1873
13.8k
    int newSize;
1874
1875
13.8k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1876
13.8k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1877
13.8k
                              10, XML_MAX_ATTRS);
1878
13.8k
    if (newSize < 0) {
1879
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1880
0
                    "Maximum number of attributes exceeded");
1881
0
        return(-1);
1882
0
    }
1883
1884
13.8k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1885
13.8k
    if (atts == NULL)
1886
37
        goto mem_error;
1887
13.7k
    ctxt->atts = atts;
1888
1889
13.7k
    attallocs = xmlRealloc(ctxt->attallocs,
1890
13.7k
                           newSize * sizeof(attallocs[0]));
1891
13.7k
    if (attallocs == NULL)
1892
51
        goto mem_error;
1893
13.7k
    ctxt->attallocs = attallocs;
1894
1895
13.7k
    ctxt->maxatts = newSize * 5;
1896
1897
13.7k
    return(0);
1898
1899
88
mem_error:
1900
88
    xmlErrMemory(ctxt);
1901
88
    return(-1);
1902
13.7k
}
1903
1904
/**
1905
 * Pushes a new parser input on top of the input stack
1906
 *
1907
 * @param ctxt  an XML parser context
1908
 * @param value  the parser input
1909
 * @returns -1 in case of error, the index in the stack otherwise
1910
 */
1911
int
1912
xmlCtxtPushInput(xmlParserCtxt *ctxt, xmlParserInput *value)
1913
211k
{
1914
211k
    char *directory = NULL;
1915
211k
    int maxDepth;
1916
1917
211k
    if ((ctxt == NULL) || (value == NULL))
1918
21.7k
        return(-1);
1919
1920
190k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1921
1922
190k
    if (ctxt->inputNr >= ctxt->inputMax) {
1923
13.4k
        xmlParserInputPtr *tmp;
1924
13.4k
        int newSize;
1925
1926
13.4k
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1927
13.4k
                                  5, maxDepth);
1928
13.4k
        if (newSize < 0) {
1929
12
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1930
12
                           "Maximum entity nesting depth exceeded");
1931
12
            xmlHaltParser(ctxt);
1932
12
            return(-1);
1933
12
        }
1934
13.4k
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1935
13.4k
        if (tmp == NULL) {
1936
74
            xmlErrMemory(ctxt);
1937
74
            return(-1);
1938
74
        }
1939
13.3k
        ctxt->inputTab = tmp;
1940
13.3k
        ctxt->inputMax = newSize;
1941
13.3k
    }
1942
1943
190k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1944
100k
        directory = xmlParserGetDirectory(value->filename);
1945
100k
        if (directory == NULL) {
1946
42
            xmlErrMemory(ctxt);
1947
42
            return(-1);
1948
42
        }
1949
100k
    }
1950
1951
190k
    if (ctxt->input_id >= INT_MAX) {
1952
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1953
0
        return(-1);
1954
0
    }
1955
1956
190k
    ctxt->inputTab[ctxt->inputNr] = value;
1957
190k
    ctxt->input = value;
1958
1959
190k
    if (ctxt->inputNr == 0) {
1960
100k
        xmlFree(ctxt->directory);
1961
100k
        ctxt->directory = directory;
1962
100k
    }
1963
1964
    /*
1965
     * The input ID is unused internally, but there are entity
1966
     * loaders in downstream code that detect the main document
1967
     * by checking for "input_id == 1".
1968
     */
1969
190k
    value->id = ctxt->input_id++;
1970
1971
190k
    return(ctxt->inputNr++);
1972
190k
}
1973
1974
/**
1975
 * Pops the top parser input from the input stack
1976
 *
1977
 * @param ctxt  an XML parser context
1978
 * @returns the input just removed
1979
 */
1980
xmlParserInput *
1981
xmlCtxtPopInput(xmlParserCtxt *ctxt)
1982
403k
{
1983
403k
    xmlParserInputPtr ret;
1984
1985
403k
    if (ctxt == NULL)
1986
0
        return(NULL);
1987
403k
    if (ctxt->inputNr <= 0)
1988
216k
        return (NULL);
1989
186k
    ctxt->inputNr--;
1990
186k
    if (ctxt->inputNr > 0)
1991
90.0k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1992
96.9k
    else
1993
96.9k
        ctxt->input = NULL;
1994
186k
    ret = ctxt->inputTab[ctxt->inputNr];
1995
186k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1996
186k
    return (ret);
1997
403k
}
1998
1999
/**
2000
 * Pushes a new element node on top of the node stack
2001
 *
2002
 * @deprecated Internal function, do not use.
2003
 *
2004
 * @param ctxt  an XML parser context
2005
 * @param value  the element node
2006
 * @returns -1 in case of error, the index in the stack otherwise
2007
 */
2008
int
2009
nodePush(xmlParserCtxt *ctxt, xmlNode *value)
2010
840k
{
2011
840k
    if (ctxt == NULL)
2012
0
        return(0);
2013
2014
840k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2015
70.9k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2016
70.9k
        xmlNodePtr *tmp;
2017
70.9k
        int newSize;
2018
2019
70.9k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
2020
70.9k
                                  10, maxDepth);
2021
70.9k
        if (newSize < 0) {
2022
38
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2023
38
                    "Excessive depth in document: %d,"
2024
38
                    " use XML_PARSE_HUGE option\n",
2025
38
                    ctxt->nodeNr);
2026
38
            xmlHaltParser(ctxt);
2027
38
            return(-1);
2028
38
        }
2029
2030
70.9k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2031
70.9k
        if (tmp == NULL) {
2032
102
            xmlErrMemory(ctxt);
2033
102
            return (-1);
2034
102
        }
2035
70.8k
        ctxt->nodeTab = tmp;
2036
70.8k
  ctxt->nodeMax = newSize;
2037
70.8k
    }
2038
2039
839k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2040
839k
    ctxt->node = value;
2041
839k
    return (ctxt->nodeNr++);
2042
840k
}
2043
2044
/**
2045
 * Pops the top element node from the node stack
2046
 *
2047
 * @deprecated Internal function, do not use.
2048
 *
2049
 * @param ctxt  an XML parser context
2050
 * @returns the node just removed
2051
 */
2052
xmlNode *
2053
nodePop(xmlParserCtxt *ctxt)
2054
840k
{
2055
840k
    xmlNodePtr ret;
2056
2057
840k
    if (ctxt == NULL) return(NULL);
2058
840k
    if (ctxt->nodeNr <= 0)
2059
84.7k
        return (NULL);
2060
755k
    ctxt->nodeNr--;
2061
755k
    if (ctxt->nodeNr > 0)
2062
742k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2063
13.5k
    else
2064
13.5k
        ctxt->node = NULL;
2065
755k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2066
755k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2067
755k
    return (ret);
2068
840k
}
2069
2070
/**
2071
 * Pushes a new element name/prefix/URL on top of the name stack
2072
 *
2073
 * @param ctxt  an XML parser context
2074
 * @param value  the element name
2075
 * @param prefix  the element prefix
2076
 * @param URI  the element namespace name
2077
 * @param line  the current line number for error messages
2078
 * @param nsNr  the number of namespaces pushed on the namespace table
2079
 * @returns -1 in case of error, the index in the stack otherwise
2080
 */
2081
static int
2082
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2083
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2084
1.04M
{
2085
1.04M
    xmlStartTag *tag;
2086
2087
1.04M
    if (ctxt->nameNr >= ctxt->nameMax) {
2088
75.6k
        const xmlChar **tmp;
2089
75.6k
        xmlStartTag *tmp2;
2090
75.6k
        int newSize;
2091
2092
75.6k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2093
75.6k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2094
75.6k
                                  10, XML_MAX_ITEMS);
2095
75.6k
        if (newSize < 0)
2096
0
            goto mem_error;
2097
2098
75.6k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2099
75.6k
        if (tmp == NULL)
2100
94
      goto mem_error;
2101
75.5k
  ctxt->nameTab = tmp;
2102
2103
75.5k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2104
75.5k
        if (tmp2 == NULL)
2105
94
      goto mem_error;
2106
75.4k
  ctxt->pushTab = tmp2;
2107
2108
75.4k
        ctxt->nameMax = newSize;
2109
968k
    } else if (ctxt->pushTab == NULL) {
2110
40.6k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2111
40.6k
        if (ctxt->pushTab == NULL)
2112
259
            goto mem_error;
2113
40.6k
    }
2114
1.04M
    ctxt->nameTab[ctxt->nameNr] = value;
2115
1.04M
    ctxt->name = value;
2116
1.04M
    tag = &ctxt->pushTab[ctxt->nameNr];
2117
1.04M
    tag->prefix = prefix;
2118
1.04M
    tag->URI = URI;
2119
1.04M
    tag->line = line;
2120
1.04M
    tag->nsNr = nsNr;
2121
1.04M
    return (ctxt->nameNr++);
2122
447
mem_error:
2123
447
    xmlErrMemory(ctxt);
2124
447
    return (-1);
2125
1.04M
}
2126
#ifdef LIBXML_PUSH_ENABLED
2127
/**
2128
 * Pops the top element/prefix/URI name from the name stack
2129
 *
2130
 * @param ctxt  an XML parser context
2131
 * @returns the name just removed
2132
 */
2133
static const xmlChar *
2134
nameNsPop(xmlParserCtxtPtr ctxt)
2135
15.7k
{
2136
15.7k
    const xmlChar *ret;
2137
2138
15.7k
    if (ctxt->nameNr <= 0)
2139
0
        return (NULL);
2140
15.7k
    ctxt->nameNr--;
2141
15.7k
    if (ctxt->nameNr > 0)
2142
15.3k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2143
304
    else
2144
304
        ctxt->name = NULL;
2145
15.7k
    ret = ctxt->nameTab[ctxt->nameNr];
2146
15.7k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2147
15.7k
    return (ret);
2148
15.7k
}
2149
#endif /* LIBXML_PUSH_ENABLED */
2150
2151
/**
2152
 * Pops the top element name from the name stack
2153
 *
2154
 * @deprecated Internal function, do not use.
2155
 *
2156
 * @param ctxt  an XML parser context
2157
 * @returns the name just removed
2158
 */
2159
static const xmlChar *
2160
namePop(xmlParserCtxtPtr ctxt)
2161
943k
{
2162
943k
    const xmlChar *ret;
2163
2164
943k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2165
96
        return (NULL);
2166
943k
    ctxt->nameNr--;
2167
943k
    if (ctxt->nameNr > 0)
2168
929k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2169
13.5k
    else
2170
13.5k
        ctxt->name = NULL;
2171
943k
    ret = ctxt->nameTab[ctxt->nameNr];
2172
943k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2173
943k
    return (ret);
2174
943k
}
2175
2176
6.63M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2177
6.63M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2178
93.0k
        int *tmp;
2179
93.0k
        int newSize;
2180
2181
93.0k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2182
93.0k
                                  10, XML_MAX_ITEMS);
2183
93.0k
        if (newSize < 0) {
2184
0
      xmlErrMemory(ctxt);
2185
0
      return(-1);
2186
0
        }
2187
2188
93.0k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2189
93.0k
        if (tmp == NULL) {
2190
152
      xmlErrMemory(ctxt);
2191
152
      return(-1);
2192
152
  }
2193
92.8k
  ctxt->spaceTab = tmp;
2194
2195
92.8k
        ctxt->spaceMax = newSize;
2196
92.8k
    }
2197
6.63M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2198
6.63M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2199
6.63M
    return(ctxt->spaceNr++);
2200
6.63M
}
2201
2202
6.54M
static int spacePop(xmlParserCtxtPtr ctxt) {
2203
6.54M
    int ret;
2204
6.54M
    if (ctxt->spaceNr <= 0) return(0);
2205
6.54M
    ctxt->spaceNr--;
2206
6.54M
    if (ctxt->spaceNr > 0)
2207
6.53M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2208
13.7k
    else
2209
13.7k
        ctxt->space = &ctxt->spaceTab[0];
2210
6.54M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2211
6.54M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2212
6.54M
    return(ret);
2213
6.54M
}
2214
2215
/*
2216
 * Macros for accessing the content. Those should be used only by the parser,
2217
 * and not exported.
2218
 *
2219
 * Dirty macros, i.e. one often need to make assumption on the context to
2220
 * use them
2221
 *
2222
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2223
 *           To be used with extreme caution since operations consuming
2224
 *           characters may move the input buffer to a different location !
2225
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2226
 *           This should be used internally by the parser
2227
 *           only to compare to ASCII values otherwise it would break when
2228
 *           running with UTF-8 encoding.
2229
 *   RAW     same as CUR but in the input buffer, bypass any token
2230
 *           extraction that may have been done
2231
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2232
 *           to compare on ASCII based substring.
2233
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2234
 *           strings without newlines within the parser.
2235
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2236
 *           defined char within the parser.
2237
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2238
 *
2239
 *   NEXT    Skip to the next character, this does the proper decoding
2240
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2241
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2242
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2243
 *            the index
2244
 *   GROW, SHRINK  handling of input buffers
2245
 */
2246
2247
33.9M
#define RAW (*ctxt->input->cur)
2248
879M
#define CUR (*ctxt->input->cur)
2249
24.4M
#define NXT(val) ctxt->input->cur[(val)]
2250
1.12G
#define CUR_PTR ctxt->input->cur
2251
12.8M
#define BASE_PTR ctxt->input->base
2252
2253
#define CMP4( s, c1, c2, c3, c4 ) \
2254
23.5M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2255
11.8M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2256
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2257
22.9M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2258
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2259
22.1M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2260
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2261
21.4M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2262
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2263
21.0M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2264
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2265
10.4M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2266
10.4M
    ((unsigned char *) s)[ 8 ] == c9 )
2267
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2268
3.94k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2269
3.94k
    ((unsigned char *) s)[ 9 ] == c10 )
2270
2271
3.73M
#define SKIP(val) do {             \
2272
3.73M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2273
3.73M
    if (*ctxt->input->cur == 0)           \
2274
3.73M
        xmlParserGrow(ctxt);           \
2275
3.73M
  } while (0)
2276
2277
#define SKIPL(val) do {             \
2278
    int skipl;                \
2279
    for(skipl=0; skipl<val; skipl++) {          \
2280
  if (*(ctxt->input->cur) == '\n') {        \
2281
  ctxt->input->line++; ctxt->input->col = 1;      \
2282
  } else ctxt->input->col++;          \
2283
  ctxt->input->cur++;           \
2284
    }                 \
2285
    if (*ctxt->input->cur == 0)           \
2286
        xmlParserGrow(ctxt);            \
2287
  } while (0)
2288
2289
#define SHRINK \
2290
16.7M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2291
16.7M
  xmlParserShrink(ctxt);
2292
2293
#define GROW \
2294
39.6M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2295
39.6M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2296
4.64M
  xmlParserGrow(ctxt);
2297
2298
7.28M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2299
2300
963k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2301
2302
448M
#define NEXT xmlNextChar(ctxt)
2303
2304
7.28M
#define NEXT1 {               \
2305
7.28M
  ctxt->input->col++;           \
2306
7.28M
  ctxt->input->cur++;           \
2307
7.28M
  if (*ctxt->input->cur == 0)         \
2308
7.28M
      xmlParserGrow(ctxt);           \
2309
7.28M
    }
2310
2311
644M
#define NEXTL(l) do {             \
2312
644M
    if (*(ctxt->input->cur) == '\n') {         \
2313
29.0M
  ctxt->input->line++; ctxt->input->col = 1;      \
2314
615M
    } else ctxt->input->col++;           \
2315
644M
    ctxt->input->cur += l;        \
2316
644M
  } while (0)
2317
2318
#define COPY_BUF(b, i, v)           \
2319
137M
    if (v < 0x80) b[i++] = v;           \
2320
137M
    else i += xmlCopyCharMultiByte(&b[i],v)
2321
2322
static int
2323
121M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2324
121M
    int c = xmlCurrentChar(ctxt, len);
2325
2326
121M
    if (c == XML_INVALID_CHAR)
2327
22.5M
        c = 0xFFFD; /* replacement character */
2328
2329
121M
    return(c);
2330
121M
}
2331
2332
/**
2333
 * Skip whitespace in the input stream.
2334
 *
2335
 * @deprecated Internal function, do not use.
2336
 *
2337
 * @param ctxt  the XML parser context
2338
 * @returns the number of space chars skipped
2339
 */
2340
int
2341
7.63M
xmlSkipBlankChars(xmlParserCtxt *ctxt) {
2342
7.63M
    const xmlChar *cur;
2343
7.63M
    int res = 0;
2344
2345
7.63M
    cur = ctxt->input->cur;
2346
7.63M
    while (IS_BLANK_CH(*cur)) {
2347
2.37M
        if (*cur == '\n') {
2348
921k
            ctxt->input->line++; ctxt->input->col = 1;
2349
1.45M
        } else {
2350
1.45M
            ctxt->input->col++;
2351
1.45M
        }
2352
2.37M
        cur++;
2353
2.37M
        if (res < INT_MAX)
2354
2.37M
            res++;
2355
2.37M
        if (*cur == 0) {
2356
20.3k
            ctxt->input->cur = cur;
2357
20.3k
            xmlParserGrow(ctxt);
2358
20.3k
            cur = ctxt->input->cur;
2359
20.3k
        }
2360
2.37M
    }
2361
7.63M
    ctxt->input->cur = cur;
2362
2363
7.63M
    if (res > 4)
2364
16.5k
        GROW;
2365
2366
7.63M
    return(res);
2367
7.63M
}
2368
2369
static void
2370
82.0k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2371
82.0k
    unsigned long consumed;
2372
82.0k
    xmlEntityPtr ent;
2373
2374
82.0k
    ent = ctxt->input->entity;
2375
2376
82.0k
    ent->flags &= ~XML_ENT_EXPANDING;
2377
2378
82.0k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2379
8.58k
        int result;
2380
2381
        /*
2382
         * Read the rest of the stream in case of errors. We want
2383
         * to account for the whole entity size.
2384
         */
2385
15.5k
        do {
2386
15.5k
            ctxt->input->cur = ctxt->input->end;
2387
15.5k
            xmlParserShrink(ctxt);
2388
15.5k
            result = xmlParserGrow(ctxt);
2389
15.5k
        } while (result > 0);
2390
2391
8.58k
        consumed = ctxt->input->consumed;
2392
8.58k
        xmlSaturatedAddSizeT(&consumed,
2393
8.58k
                             ctxt->input->end - ctxt->input->base);
2394
2395
8.58k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2396
2397
        /*
2398
         * Add to sizeentities when parsing an external entity
2399
         * for the first time.
2400
         */
2401
8.58k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2402
5.48k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2403
5.48k
        }
2404
2405
8.58k
        ent->flags |= XML_ENT_CHECKED;
2406
8.58k
    }
2407
2408
82.0k
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2409
2410
82.0k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2411
2412
82.0k
    GROW;
2413
82.0k
}
2414
2415
/**
2416
 * Skip whitespace in the input stream, also handling parameter
2417
 * entities.
2418
 *
2419
 * @param ctxt  the XML parser context
2420
 * @returns the number of space chars skipped
2421
 */
2422
static int
2423
963k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2424
963k
    int res = 0;
2425
963k
    int inParam;
2426
963k
    int expandParam;
2427
2428
963k
    inParam = PARSER_IN_PE(ctxt);
2429
963k
    expandParam = PARSER_EXTERNAL(ctxt);
2430
2431
963k
    if (!inParam && !expandParam)
2432
349k
        return(xmlSkipBlankChars(ctxt));
2433
2434
    /*
2435
     * It's Okay to use CUR/NEXT here since all the blanks are on
2436
     * the ASCII range.
2437
     */
2438
1.47M
    while (PARSER_STOPPED(ctxt) == 0) {
2439
1.47M
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2440
842k
            NEXT;
2441
842k
        } else if (CUR == '%') {
2442
50.5k
            if ((expandParam == 0) ||
2443
50.5k
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2444
37.8k
                break;
2445
2446
            /*
2447
             * Expand parameter entity. We continue to consume
2448
             * whitespace at the start of the entity and possible
2449
             * even consume the whole entity and pop it. We might
2450
             * even pop multiple PEs in this loop.
2451
             */
2452
12.7k
            xmlParsePERefInternal(ctxt, 0);
2453
2454
12.7k
            inParam = PARSER_IN_PE(ctxt);
2455
12.7k
            expandParam = PARSER_EXTERNAL(ctxt);
2456
582k
        } else if (CUR == 0) {
2457
21.3k
            if (inParam == 0)
2458
297
                break;
2459
2460
            /*
2461
             * Don't pop parameter entities that start a markup
2462
             * declaration to detect Well-formedness constraint:
2463
             * PE Between Declarations.
2464
             */
2465
21.0k
            if (ctxt->input->flags & XML_INPUT_MARKUP_DECL)
2466
13.7k
                break;
2467
2468
7.25k
            xmlPopPE(ctxt);
2469
2470
7.25k
            inParam = PARSER_IN_PE(ctxt);
2471
7.25k
            expandParam = PARSER_EXTERNAL(ctxt);
2472
560k
        } else {
2473
560k
            break;
2474
560k
        }
2475
2476
        /*
2477
         * Also increase the counter when entering or exiting a PERef.
2478
         * The spec says: "When a parameter-entity reference is recognized
2479
         * in the DTD and included, its replacement text MUST be enlarged
2480
         * by the attachment of one leading and one following space (#x20)
2481
         * character."
2482
         */
2483
862k
        if (res < INT_MAX)
2484
862k
            res++;
2485
862k
    }
2486
2487
613k
    return(res);
2488
963k
}
2489
2490
/************************************************************************
2491
 *                  *
2492
 *    Commodity functions to handle entities      *
2493
 *                  *
2494
 ************************************************************************/
2495
2496
/**
2497
 * @deprecated Internal function, don't use.
2498
 *
2499
 * @param ctxt  an XML parser context
2500
 * @returns the current xmlChar in the parser context
2501
 */
2502
xmlChar
2503
0
xmlPopInput(xmlParserCtxt *ctxt) {
2504
0
    xmlParserInputPtr input;
2505
2506
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2507
0
    input = xmlCtxtPopInput(ctxt);
2508
0
    xmlFreeInputStream(input);
2509
0
    if (*ctxt->input->cur == 0)
2510
0
        xmlParserGrow(ctxt);
2511
0
    return(CUR);
2512
0
}
2513
2514
/**
2515
 * Push an input stream onto the stack.
2516
 *
2517
 * @deprecated Internal function, don't use.
2518
 *
2519
 * @param ctxt  an XML parser context
2520
 * @param input  an XML parser input fragment (entity, XML fragment ...).
2521
 * @returns -1 in case of error or the index in the input stack
2522
 */
2523
int
2524
0
xmlPushInput(xmlParserCtxt *ctxt, xmlParserInput *input) {
2525
0
    int ret;
2526
2527
0
    if ((ctxt == NULL) || (input == NULL))
2528
0
        return(-1);
2529
2530
0
    ret = xmlCtxtPushInput(ctxt, input);
2531
0
    if (ret >= 0)
2532
0
        GROW;
2533
0
    return(ret);
2534
0
}
2535
2536
/**
2537
 * Parse a numeric character reference. Always consumes '&'.
2538
 *
2539
 * @deprecated Internal function, don't use.
2540
 *
2541
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2542
 *                      '&#x' [0-9a-fA-F]+ ';'
2543
 *
2544
 * [ WFC: Legal Character ]
2545
 * Characters referred to using character references must match the
2546
 * production for Char.
2547
 *
2548
 * @param ctxt  an XML parser context
2549
 * @returns the value parsed (as an int), 0 in case of error
2550
 */
2551
int
2552
372k
xmlParseCharRef(xmlParserCtxt *ctxt) {
2553
372k
    int val = 0;
2554
372k
    int count = 0;
2555
2556
    /*
2557
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2558
     */
2559
372k
    if ((RAW == '&') && (NXT(1) == '#') &&
2560
372k
        (NXT(2) == 'x')) {
2561
246k
  SKIP(3);
2562
246k
  GROW;
2563
1.02M
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2564
827k
      if (count++ > 20) {
2565
41.9k
    count = 0;
2566
41.9k
    GROW;
2567
41.9k
      }
2568
827k
      if ((RAW >= '0') && (RAW <= '9'))
2569
514k
          val = val * 16 + (CUR - '0');
2570
312k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2571
199k
          val = val * 16 + (CUR - 'a') + 10;
2572
113k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2573
63.8k
          val = val * 16 + (CUR - 'A') + 10;
2574
49.5k
      else {
2575
49.5k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2576
49.5k
    val = 0;
2577
49.5k
    break;
2578
49.5k
      }
2579
777k
      if (val > 0x110000)
2580
469k
          val = 0x110000;
2581
2582
777k
      NEXT;
2583
777k
      count++;
2584
777k
  }
2585
246k
  if (RAW == ';') {
2586
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2587
196k
      ctxt->input->col++;
2588
196k
      ctxt->input->cur++;
2589
196k
  }
2590
246k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2591
126k
  SKIP(2);
2592
126k
  GROW;
2593
341k
  while (RAW != ';') { /* loop blocked by count */
2594
240k
      if (count++ > 20) {
2595
1.47k
    count = 0;
2596
1.47k
    GROW;
2597
1.47k
      }
2598
240k
      if ((RAW >= '0') && (RAW <= '9'))
2599
215k
          val = val * 10 + (CUR - '0');
2600
24.5k
      else {
2601
24.5k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2602
24.5k
    val = 0;
2603
24.5k
    break;
2604
24.5k
      }
2605
215k
      if (val > 0x110000)
2606
14.9k
          val = 0x110000;
2607
2608
215k
      NEXT;
2609
215k
      count++;
2610
215k
  }
2611
126k
  if (RAW == ';') {
2612
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2613
101k
      ctxt->input->col++;
2614
101k
      ctxt->input->cur++;
2615
101k
  }
2616
126k
    } else {
2617
0
        if (RAW == '&')
2618
0
            SKIP(1);
2619
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2620
0
    }
2621
2622
    /*
2623
     * [ WFC: Legal Character ]
2624
     * Characters referred to using character references must match the
2625
     * production for Char.
2626
     */
2627
372k
    if (val >= 0x110000) {
2628
429
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2629
429
                "xmlParseCharRef: character reference out of bounds\n",
2630
429
          val);
2631
429
        val = 0xFFFD;
2632
372k
    } else if (!IS_CHAR(val)) {
2633
103k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2634
103k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2635
103k
                    val);
2636
103k
    }
2637
372k
    return(val);
2638
372k
}
2639
2640
/**
2641
 * Parse Reference declarations, variant parsing from a string rather
2642
 * than an an input flow.
2643
 *
2644
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2645
 *                      '&#x' [0-9a-fA-F]+ ';'
2646
 *
2647
 * [ WFC: Legal Character ]
2648
 * Characters referred to using character references must match the
2649
 * production for Char.
2650
 *
2651
 * @param ctxt  an XML parser context
2652
 * @param str  a pointer to an index in the string
2653
 * @returns the value parsed (as an int), 0 in case of error, str will be
2654
 *         updated to the current value of the index
2655
 */
2656
static int
2657
651k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2658
651k
    const xmlChar *ptr;
2659
651k
    xmlChar cur;
2660
651k
    int val = 0;
2661
2662
651k
    if ((str == NULL) || (*str == NULL)) return(0);
2663
651k
    ptr = *str;
2664
651k
    cur = *ptr;
2665
651k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2666
5.04k
  ptr += 3;
2667
5.04k
  cur = *ptr;
2668
21.6k
  while (cur != ';') { /* Non input consuming loop */
2669
17.7k
      if ((cur >= '0') && (cur <= '9'))
2670
12.0k
          val = val * 16 + (cur - '0');
2671
5.68k
      else if ((cur >= 'a') && (cur <= 'f'))
2672
2.64k
          val = val * 16 + (cur - 'a') + 10;
2673
3.04k
      else if ((cur >= 'A') && (cur <= 'F'))
2674
1.84k
          val = val * 16 + (cur - 'A') + 10;
2675
1.19k
      else {
2676
1.19k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2677
1.19k
    val = 0;
2678
1.19k
    break;
2679
1.19k
      }
2680
16.5k
      if (val > 0x110000)
2681
4.51k
          val = 0x110000;
2682
2683
16.5k
      ptr++;
2684
16.5k
      cur = *ptr;
2685
16.5k
  }
2686
5.04k
  if (cur == ';')
2687
3.85k
      ptr++;
2688
645k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2689
645k
  ptr += 2;
2690
645k
  cur = *ptr;
2691
1.94M
  while (cur != ';') { /* Non input consuming loops */
2692
1.29M
      if ((cur >= '0') && (cur <= '9'))
2693
1.29M
          val = val * 10 + (cur - '0');
2694
1.68k
      else {
2695
1.68k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2696
1.68k
    val = 0;
2697
1.68k
    break;
2698
1.68k
      }
2699
1.29M
      if (val > 0x110000)
2700
1.11k
          val = 0x110000;
2701
2702
1.29M
      ptr++;
2703
1.29M
      cur = *ptr;
2704
1.29M
  }
2705
645k
  if (cur == ';')
2706
644k
      ptr++;
2707
645k
    } else {
2708
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2709
0
  return(0);
2710
0
    }
2711
651k
    *str = ptr;
2712
2713
    /*
2714
     * [ WFC: Legal Character ]
2715
     * Characters referred to using character references must match the
2716
     * production for Char.
2717
     */
2718
651k
    if (val >= 0x110000) {
2719
326
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2720
326
                "xmlParseStringCharRef: character reference out of bounds\n",
2721
326
                val);
2722
650k
    } else if (IS_CHAR(val)) {
2723
647k
        return(val);
2724
647k
    } else {
2725
3.44k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2726
3.44k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2727
3.44k
        val);
2728
3.44k
    }
2729
3.77k
    return(0);
2730
651k
}
2731
2732
/**
2733
 *     [69] PEReference ::= '%' Name ';'
2734
 *
2735
 * @deprecated Internal function, do not use.
2736
 *
2737
 * [ WFC: No Recursion ]
2738
 * A parsed entity must not contain a recursive
2739
 * reference to itself, either directly or indirectly.
2740
 *
2741
 * [ WFC: Entity Declared ]
2742
 * In a document without any DTD, a document with only an internal DTD
2743
 * subset which contains no parameter entity references, or a document
2744
 * with "standalone='yes'", ...  ... The declaration of a parameter
2745
 * entity must precede any reference to it...
2746
 *
2747
 * [ VC: Entity Declared ]
2748
 * In a document with an external subset or external parameter entities
2749
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2750
 * must precede any reference to it...
2751
 *
2752
 * [ WFC: In DTD ]
2753
 * Parameter-entity references may only appear in the DTD.
2754
 * NOTE: misleading but this is handled.
2755
 *
2756
 * A PEReference may have been detected in the current input stream
2757
 * the handling is done accordingly to
2758
 *      http://www.w3.org/TR/REC-xml#entproc
2759
 * i.e.
2760
 *   - Included in literal in entity values
2761
 *   - Included as Parameter Entity reference within DTDs
2762
 * @param ctxt  the parser context
2763
 */
2764
void
2765
0
xmlParserHandlePEReference(xmlParserCtxt *ctxt) {
2766
0
    xmlParsePERefInternal(ctxt, 0);
2767
0
}
2768
2769
/**
2770
 * @deprecated Internal function, don't use.
2771
 *
2772
 * @param ctxt  the parser context
2773
 * @param str  the input string
2774
 * @param len  the string length
2775
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2776
 * @param end  an end marker xmlChar, 0 if none
2777
 * @param end2  an end marker xmlChar, 0 if none
2778
 * @param end3  an end marker xmlChar, 0 if none
2779
 * @returns A newly allocated string with the substitution done. The caller
2780
 *      must deallocate it !
2781
 */
2782
xmlChar *
2783
xmlStringLenDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str, int len,
2784
                           int what ATTRIBUTE_UNUSED,
2785
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2786
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2787
0
        return(NULL);
2788
2789
0
    if ((str[len] != 0) ||
2790
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2791
0
        return(NULL);
2792
2793
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2794
0
}
2795
2796
/**
2797
 * @deprecated Internal function, don't use.
2798
 *
2799
 * @param ctxt  the parser context
2800
 * @param str  the input string
2801
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2802
 * @param end  an end marker xmlChar, 0 if none
2803
 * @param end2  an end marker xmlChar, 0 if none
2804
 * @param end3  an end marker xmlChar, 0 if none
2805
 * @returns A newly allocated string with the substitution done. The caller
2806
 *      must deallocate it !
2807
 */
2808
xmlChar *
2809
xmlStringDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str,
2810
                        int what ATTRIBUTE_UNUSED,
2811
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2812
0
    if ((ctxt == NULL) || (str == NULL))
2813
0
        return(NULL);
2814
2815
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2816
0
        return(NULL);
2817
2818
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2819
0
}
2820
2821
/************************************************************************
2822
 *                  *
2823
 *    Commodity functions, cleanup needed ?     *
2824
 *                  *
2825
 ************************************************************************/
2826
2827
/**
2828
 * Is this a sequence of blank chars that one can ignore ?
2829
 *
2830
 * @param ctxt  an XML parser context
2831
 * @param str  a xmlChar *
2832
 * @param len  the size of `str`
2833
 * @param blank_chars  we know the chars are blanks
2834
 * @returns 1 if ignorable 0 otherwise.
2835
 */
2836
2837
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2838
470k
                     int blank_chars) {
2839
470k
    int i;
2840
470k
    xmlNodePtr lastChild;
2841
2842
    /*
2843
     * Check for xml:space value.
2844
     */
2845
470k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2846
470k
        (*(ctxt->space) == -2))
2847
396k
  return(0);
2848
2849
    /*
2850
     * Check that the string is made of blanks
2851
     */
2852
73.1k
    if (blank_chars == 0) {
2853
166k
  for (i = 0;i < len;i++)
2854
163k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2855
65.9k
    }
2856
2857
    /*
2858
     * Look if the element is mixed content in the DTD if available
2859
     */
2860
10.0k
    if (ctxt->node == NULL) return(0);
2861
10.0k
    if (ctxt->myDoc != NULL) {
2862
10.0k
        xmlElementPtr elemDecl = NULL;
2863
10.0k
        xmlDocPtr doc = ctxt->myDoc;
2864
10.0k
        const xmlChar *prefix = NULL;
2865
2866
10.0k
        if (ctxt->node->ns)
2867
2.46k
            prefix = ctxt->node->ns->prefix;
2868
10.0k
        if (doc->intSubset != NULL)
2869
7.59k
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2870
7.59k
                                      prefix);
2871
10.0k
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2872
1.21k
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2873
1.21k
                                      prefix);
2874
10.0k
        if (elemDecl != NULL) {
2875
2.46k
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2876
1.36k
                return(1);
2877
1.10k
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2878
1.10k
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2879
500
                return(0);
2880
1.10k
        }
2881
10.0k
    }
2882
2883
    /*
2884
     * Otherwise, heuristic :-\
2885
     *
2886
     * When push parsing, we could be at the end of a chunk.
2887
     * This makes the look-ahead and consequently the NOBLANKS
2888
     * option unreliable.
2889
     */
2890
8.18k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2891
6.70k
    if ((ctxt->node->children == NULL) &&
2892
6.70k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2893
2894
6.41k
    lastChild = xmlGetLastChild(ctxt->node);
2895
6.41k
    if (lastChild == NULL) {
2896
4.53k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2897
4.53k
            (ctxt->node->content != NULL)) return(0);
2898
4.53k
    } else if (xmlNodeIsText(lastChild))
2899
361
        return(0);
2900
1.51k
    else if ((ctxt->node->children != NULL) &&
2901
1.51k
             (xmlNodeIsText(ctxt->node->children)))
2902
242
        return(0);
2903
5.81k
    return(1);
2904
6.41k
}
2905
2906
/************************************************************************
2907
 *                  *
2908
 *    Extra stuff for namespace support     *
2909
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2910
 *                  *
2911
 ************************************************************************/
2912
2913
/**
2914
 * Parse an UTF8 encoded XML qualified name string
2915
 *
2916
 * @deprecated Don't use.
2917
 *
2918
 * @param ctxt  an XML parser context
2919
 * @param name  an XML parser context
2920
 * @param prefixOut  a xmlChar **
2921
 * @returns the local part, and prefix is updated
2922
 *   to get the Prefix if any.
2923
 */
2924
2925
xmlChar *
2926
0
xmlSplitQName(xmlParserCtxt *ctxt, const xmlChar *name, xmlChar **prefixOut) {
2927
0
    xmlChar *ret;
2928
0
    const xmlChar *localname;
2929
2930
0
    localname = xmlSplitQName4(name, prefixOut);
2931
0
    if (localname == NULL) {
2932
0
        xmlCtxtErrMemory(ctxt);
2933
0
        return(NULL);
2934
0
    }
2935
2936
0
    ret = xmlStrdup(localname);
2937
0
    if (ret == NULL) {
2938
0
        xmlCtxtErrMemory(ctxt);
2939
0
        xmlFree(*prefixOut);
2940
0
    }
2941
2942
0
    return(ret);
2943
0
}
2944
2945
/************************************************************************
2946
 *                  *
2947
 *      The parser itself       *
2948
 *  Relates to http://www.w3.org/TR/REC-xml       *
2949
 *                  *
2950
 ************************************************************************/
2951
2952
/************************************************************************
2953
 *                  *
2954
 *  Routines to parse Name, NCName and NmToken      *
2955
 *                  *
2956
 ************************************************************************/
2957
2958
/*
2959
 * The two following functions are related to the change of accepted
2960
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2961
 * They correspond to the modified production [4] and the new production [4a]
2962
 * changes in that revision. Also note that the macros used for the
2963
 * productions Letter, Digit, CombiningChar and Extender are not needed
2964
 * anymore.
2965
 * We still keep compatibility to pre-revision5 parsing semantic if the
2966
 * new XML_PARSE_OLD10 option is given to the parser.
2967
 */
2968
2969
static int
2970
9.43M
xmlIsNameStartCharNew(int c) {
2971
    /*
2972
     * Use the new checks of production [4] [4a] amd [5] of the
2973
     * Update 5 of XML-1.0
2974
     */
2975
9.43M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2976
9.43M
        (((c >= 'a') && (c <= 'z')) ||
2977
9.39M
         ((c >= 'A') && (c <= 'Z')) ||
2978
9.39M
         (c == '_') || (c == ':') ||
2979
9.39M
         ((c >= 0xC0) && (c <= 0xD6)) ||
2980
9.39M
         ((c >= 0xD8) && (c <= 0xF6)) ||
2981
9.39M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2982
9.39M
         ((c >= 0x370) && (c <= 0x37D)) ||
2983
9.39M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2984
9.39M
         ((c >= 0x200C) && (c <= 0x200D)) ||
2985
9.39M
         ((c >= 0x2070) && (c <= 0x218F)) ||
2986
9.39M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2987
9.39M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2988
9.39M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2989
9.39M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2990
9.39M
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2991
3.67M
        return(1);
2992
5.75M
    return(0);
2993
9.43M
}
2994
2995
static int
2996
121M
xmlIsNameCharNew(int c) {
2997
    /*
2998
     * Use the new checks of production [4] [4a] amd [5] of the
2999
     * Update 5 of XML-1.0
3000
     */
3001
121M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3002
121M
        (((c >= 'a') && (c <= 'z')) ||
3003
121M
         ((c >= 'A') && (c <= 'Z')) ||
3004
121M
         ((c >= '0') && (c <= '9')) || /* !start */
3005
121M
         (c == '_') || (c == ':') ||
3006
121M
         (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3007
121M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3008
121M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3009
121M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3010
121M
         ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3011
121M
         ((c >= 0x370) && (c <= 0x37D)) ||
3012
121M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3013
121M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3014
121M
         ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3015
121M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3016
121M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3017
121M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3018
121M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3019
121M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3020
121M
         ((c >= 0x10000) && (c <= 0xEFFFF))))
3021
117M
         return(1);
3022
3.67M
    return(0);
3023
121M
}
3024
3025
static int
3026
2.51M
xmlIsNameStartCharOld(int c) {
3027
2.51M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3028
2.51M
        ((IS_LETTER(c) || (c == '_') || (c == ':'))))
3029
2.26M
        return(1);
3030
243k
    return(0);
3031
2.51M
}
3032
3033
static int
3034
17.1M
xmlIsNameCharOld(int c) {
3035
17.1M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3036
17.1M
        ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3037
17.1M
         (c == '.') || (c == '-') ||
3038
17.1M
         (c == '_') || (c == ':') ||
3039
17.1M
         (IS_COMBINING(c)) ||
3040
17.1M
         (IS_EXTENDER(c))))
3041
14.8M
        return(1);
3042
2.27M
    return(0);
3043
17.1M
}
3044
3045
static int
3046
11.9M
xmlIsNameStartChar(int c, int old10) {
3047
11.9M
    if (!old10)
3048
9.43M
        return(xmlIsNameStartCharNew(c));
3049
2.51M
    else
3050
2.51M
        return(xmlIsNameStartCharOld(c));
3051
11.9M
}
3052
3053
static int
3054
138M
xmlIsNameChar(int c, int old10) {
3055
138M
    if (!old10)
3056
121M
        return(xmlIsNameCharNew(c));
3057
17.1M
    else
3058
17.1M
        return(xmlIsNameCharOld(c));
3059
138M
}
3060
3061
/*
3062
 * Scan an XML Name, NCName or Nmtoken.
3063
 *
3064
 * Returns a pointer to the end of the name on success. If the
3065
 * name is invalid, returns `ptr`. If the name is longer than
3066
 * `maxSize` bytes, returns NULL.
3067
 *
3068
 * @param ptr  pointer to the start of the name
3069
 * @param maxSize  maximum size in bytes
3070
 * @param flags  XML_SCAN_* flags
3071
 * @returns a pointer to the end of the name or NULL
3072
 */
3073
const xmlChar *
3074
5.90M
xmlScanName(const xmlChar *ptr, size_t maxSize, int flags) {
3075
5.90M
    int stop = flags & XML_SCAN_NC ? ':' : 0;
3076
5.90M
    int old10 = flags & XML_SCAN_OLD10 ? 1 : 0;
3077
3078
42.2M
    while (1) {
3079
42.2M
        int c, len;
3080
3081
42.2M
        c = *ptr;
3082
42.2M
        if (c < 0x80) {
3083
38.0M
            if (c == stop)
3084
155k
                break;
3085
37.8M
            len = 1;
3086
37.8M
        } else {
3087
4.23M
            len = 4;
3088
4.23M
            c = xmlGetUTF8Char(ptr, &len);
3089
4.23M
            if (c < 0)
3090
2.99k
                break;
3091
4.23M
        }
3092
3093
42.1M
        if (flags & XML_SCAN_NMTOKEN ?
3094
36.3M
                !xmlIsNameChar(c, old10) :
3095
42.1M
                !xmlIsNameStartChar(c, old10))
3096
5.74M
            break;
3097
3098
36.3M
        if ((size_t) len > maxSize)
3099
127
            return(NULL);
3100
36.3M
        ptr += len;
3101
36.3M
        maxSize -= len;
3102
36.3M
        flags |= XML_SCAN_NMTOKEN;
3103
36.3M
    }
3104
3105
5.90M
    return(ptr);
3106
5.90M
}
3107
3108
static const xmlChar *
3109
469k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3110
469k
    const xmlChar *ret;
3111
469k
    int len = 0, l;
3112
469k
    int c;
3113
469k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3114
301k
                    XML_MAX_TEXT_LENGTH :
3115
469k
                    XML_MAX_NAME_LENGTH;
3116
469k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3117
3118
    /*
3119
     * Handler for more complex cases
3120
     */
3121
469k
    c = xmlCurrentChar(ctxt, &l);
3122
469k
    if (!xmlIsNameStartChar(c, old10))
3123
364k
        return(NULL);
3124
104k
    len += l;
3125
104k
    NEXTL(l);
3126
104k
    c = xmlCurrentChar(ctxt, &l);
3127
35.3M
    while (xmlIsNameChar(c, old10)) {
3128
35.2M
        if (len <= INT_MAX - l)
3129
35.2M
            len += l;
3130
35.2M
        NEXTL(l);
3131
35.2M
        c = xmlCurrentChar(ctxt, &l);
3132
35.2M
    }
3133
104k
    if (len > maxLength) {
3134
381
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3135
381
        return(NULL);
3136
381
    }
3137
103k
    if (ctxt->input->cur - ctxt->input->base < len) {
3138
        /*
3139
         * There were a couple of bugs where PERefs lead to to a change
3140
         * of the buffer. Check the buffer size to avoid passing an invalid
3141
         * pointer to xmlDictLookup.
3142
         */
3143
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3144
0
                    "unexpected change of input buffer");
3145
0
        return (NULL);
3146
0
    }
3147
103k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3148
981
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3149
102k
    else
3150
102k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3151
103k
    if (ret == NULL)
3152
16
        xmlErrMemory(ctxt);
3153
103k
    return(ret);
3154
103k
}
3155
3156
/**
3157
 * Parse an XML name.
3158
 *
3159
 * @deprecated Internal function, don't use.
3160
 *
3161
 *     [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3162
 *                      CombiningChar | Extender
3163
 *
3164
 *     [5] Name ::= (Letter | '_' | ':') (NameChar)*
3165
 *
3166
 *     [6] Names ::= Name (#x20 Name)*
3167
 *
3168
 * @param ctxt  an XML parser context
3169
 * @returns the Name parsed or NULL
3170
 */
3171
3172
const xmlChar *
3173
3.24M
xmlParseName(xmlParserCtxt *ctxt) {
3174
3.24M
    const xmlChar *in;
3175
3.24M
    const xmlChar *ret;
3176
3.24M
    size_t count = 0;
3177
3.24M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3178
2.18M
                       XML_MAX_TEXT_LENGTH :
3179
3.24M
                       XML_MAX_NAME_LENGTH;
3180
3181
3.24M
    GROW;
3182
3183
    /*
3184
     * Accelerator for simple ASCII names
3185
     */
3186
3.24M
    in = ctxt->input->cur;
3187
3.24M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3188
3.24M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3189
3.24M
  (*in == '_') || (*in == ':')) {
3190
2.84M
  in++;
3191
31.6M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3192
31.6M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3193
31.6M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3194
31.6M
         (*in == '_') || (*in == '-') ||
3195
31.6M
         (*in == ':') || (*in == '.'))
3196
28.8M
      in++;
3197
2.84M
  if ((*in > 0) && (*in < 0x80)) {
3198
2.77M
      count = in - ctxt->input->cur;
3199
2.77M
            if (count > maxLength) {
3200
87
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3201
87
                return(NULL);
3202
87
            }
3203
2.77M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3204
2.77M
      ctxt->input->cur = in;
3205
2.77M
      ctxt->input->col += count;
3206
2.77M
      if (ret == NULL)
3207
21
          xmlErrMemory(ctxt);
3208
2.77M
      return(ret);
3209
2.77M
  }
3210
2.84M
    }
3211
    /* accelerator for special cases */
3212
469k
    return(xmlParseNameComplex(ctxt));
3213
3.24M
}
3214
3215
static xmlHashedString
3216
5.69M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3217
5.69M
    xmlHashedString ret;
3218
5.69M
    int len = 0, l;
3219
5.69M
    int c;
3220
5.69M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3221
226k
                    XML_MAX_TEXT_LENGTH :
3222
5.69M
                    XML_MAX_NAME_LENGTH;
3223
5.69M
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3224
5.69M
    size_t startPosition = 0;
3225
3226
5.69M
    ret.name = NULL;
3227
5.69M
    ret.hashValue = 0;
3228
3229
    /*
3230
     * Handler for more complex cases
3231
     */
3232
5.69M
    startPosition = CUR_PTR - BASE_PTR;
3233
5.69M
    c = xmlCurrentChar(ctxt, &l);
3234
5.69M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3235
5.69M
  (!xmlIsNameStartChar(c, old10) || (c == ':'))) {
3236
5.64M
  return(ret);
3237
5.64M
    }
3238
3239
47.6M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3240
47.6M
     (xmlIsNameChar(c, old10) && (c != ':'))) {
3241
47.6M
        if (len <= INT_MAX - l)
3242
47.6M
      len += l;
3243
47.6M
  NEXTL(l);
3244
47.6M
  c = xmlCurrentChar(ctxt, &l);
3245
47.6M
    }
3246
56.7k
    if (len > maxLength) {
3247
532
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3248
532
        return(ret);
3249
532
    }
3250
56.1k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3251
56.1k
    if (ret.name == NULL)
3252
6
        xmlErrMemory(ctxt);
3253
56.1k
    return(ret);
3254
56.7k
}
3255
3256
/**
3257
 * Parse an XML name.
3258
 *
3259
 *     [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3260
 *                          CombiningChar | Extender
3261
 *
3262
 *     [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3263
 *
3264
 * @param ctxt  an XML parser context
3265
 * @returns the Name parsed or NULL
3266
 */
3267
3268
static xmlHashedString
3269
7.11M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3270
7.11M
    const xmlChar *in, *e;
3271
7.11M
    xmlHashedString ret;
3272
7.11M
    size_t count = 0;
3273
7.11M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3274
1.26M
                       XML_MAX_TEXT_LENGTH :
3275
7.11M
                       XML_MAX_NAME_LENGTH;
3276
3277
7.11M
    ret.name = NULL;
3278
3279
    /*
3280
     * Accelerator for simple ASCII names
3281
     */
3282
7.11M
    in = ctxt->input->cur;
3283
7.11M
    e = ctxt->input->end;
3284
7.11M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3285
7.11M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3286
7.11M
   (*in == '_')) && (in < e)) {
3287
1.45M
  in++;
3288
12.5M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3289
12.5M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3290
12.5M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3291
12.5M
          (*in == '_') || (*in == '-') ||
3292
12.5M
          (*in == '.')) && (in < e))
3293
11.0M
      in++;
3294
1.45M
  if (in >= e)
3295
2.66k
      goto complex;
3296
1.45M
  if ((*in > 0) && (*in < 0x80)) {
3297
1.41M
      count = in - ctxt->input->cur;
3298
1.41M
            if (count > maxLength) {
3299
107
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3300
107
                return(ret);
3301
107
            }
3302
1.41M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3303
1.41M
      ctxt->input->cur = in;
3304
1.41M
      ctxt->input->col += count;
3305
1.41M
      if (ret.name == NULL) {
3306
9
          xmlErrMemory(ctxt);
3307
9
      }
3308
1.41M
      return(ret);
3309
1.41M
  }
3310
1.45M
    }
3311
5.69M
complex:
3312
5.69M
    return(xmlParseNCNameComplex(ctxt));
3313
7.11M
}
3314
3315
/**
3316
 * Parse an XML name and compares for match
3317
 * (specialized for endtag parsing)
3318
 *
3319
 * @param ctxt  an XML parser context
3320
 * @param other  the name to compare with
3321
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
3322
 * and the name for mismatch
3323
 */
3324
3325
static const xmlChar *
3326
121k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3327
121k
    register const xmlChar *cmp = other;
3328
121k
    register const xmlChar *in;
3329
121k
    const xmlChar *ret;
3330
3331
121k
    GROW;
3332
3333
121k
    in = ctxt->input->cur;
3334
272k
    while (*in != 0 && *in == *cmp) {
3335
151k
  ++in;
3336
151k
  ++cmp;
3337
151k
    }
3338
121k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3339
  /* success */
3340
106k
  ctxt->input->col += in - ctxt->input->cur;
3341
106k
  ctxt->input->cur = in;
3342
106k
  return (const xmlChar*) 1;
3343
106k
    }
3344
    /* failure (or end of input buffer), check with full function */
3345
15.4k
    ret = xmlParseName (ctxt);
3346
    /* strings coming from the dictionary direct compare possible */
3347
15.4k
    if (ret == other) {
3348
1.59k
  return (const xmlChar*) 1;
3349
1.59k
    }
3350
13.8k
    return ret;
3351
15.4k
}
3352
3353
/**
3354
 * Parse an XML name.
3355
 *
3356
 * @param ctxt  an XML parser context
3357
 * @param str  a pointer to the string pointer (IN/OUT)
3358
 * @returns the Name parsed or NULL. The `str` pointer
3359
 * is updated to the current location in the string.
3360
 */
3361
3362
static xmlChar *
3363
5.70M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3364
5.70M
    xmlChar *ret;
3365
5.70M
    const xmlChar *cur = *str;
3366
5.70M
    int flags = 0;
3367
5.70M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3368
2.77M
                    XML_MAX_TEXT_LENGTH :
3369
5.70M
                    XML_MAX_NAME_LENGTH;
3370
3371
5.70M
    if (ctxt->options & XML_PARSE_OLD10)
3372
2.20M
        flags |= XML_SCAN_OLD10;
3373
3374
5.70M
    cur = xmlScanName(*str, maxLength, flags);
3375
5.70M
    if (cur == NULL) {
3376
127
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3377
127
        return(NULL);
3378
127
    }
3379
5.70M
    if (cur == *str)
3380
4.70k
        return(NULL);
3381
3382
5.70M
    ret = xmlStrndup(*str, cur - *str);
3383
5.70M
    if (ret == NULL)
3384
116
        xmlErrMemory(ctxt);
3385
5.70M
    *str = cur;
3386
5.70M
    return(ret);
3387
5.70M
}
3388
3389
/**
3390
 * Parse an XML Nmtoken.
3391
 *
3392
 * @deprecated Internal function, don't use.
3393
 *
3394
 *     [7] Nmtoken ::= (NameChar)+
3395
 *
3396
 *     [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3397
 *
3398
 * @param ctxt  an XML parser context
3399
 * @returns the Nmtoken parsed or NULL
3400
 */
3401
3402
xmlChar *
3403
66.0k
xmlParseNmtoken(xmlParserCtxt *ctxt) {
3404
66.0k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3405
66.0k
    xmlChar *ret;
3406
66.0k
    int len = 0, l;
3407
66.0k
    int c;
3408
66.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3409
34.9k
                    XML_MAX_TEXT_LENGTH :
3410
66.0k
                    XML_MAX_NAME_LENGTH;
3411
66.0k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3412
3413
66.0k
    c = xmlCurrentChar(ctxt, &l);
3414
3415
338k
    while (xmlIsNameChar(c, old10)) {
3416
274k
  COPY_BUF(buf, len, c);
3417
274k
  NEXTL(l);
3418
274k
  c = xmlCurrentChar(ctxt, &l);
3419
274k
  if (len >= XML_MAX_NAMELEN) {
3420
      /*
3421
       * Okay someone managed to make a huge token, so he's ready to pay
3422
       * for the processing speed.
3423
       */
3424
2.64k
      xmlChar *buffer;
3425
2.64k
      int max = len * 2;
3426
3427
2.64k
      buffer = xmlMalloc(max);
3428
2.64k
      if (buffer == NULL) {
3429
12
          xmlErrMemory(ctxt);
3430
12
    return(NULL);
3431
12
      }
3432
2.62k
      memcpy(buffer, buf, len);
3433
19.0M
      while (xmlIsNameChar(c, old10)) {
3434
19.0M
    if (len + 10 > max) {
3435
15.6k
        xmlChar *tmp;
3436
15.6k
                    int newSize;
3437
3438
15.6k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3439
15.6k
                    if (newSize < 0) {
3440
164
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3441
164
                        xmlFree(buffer);
3442
164
                        return(NULL);
3443
164
                    }
3444
15.4k
        tmp = xmlRealloc(buffer, newSize);
3445
15.4k
        if (tmp == NULL) {
3446
8
      xmlErrMemory(ctxt);
3447
8
      xmlFree(buffer);
3448
8
      return(NULL);
3449
8
        }
3450
15.4k
        buffer = tmp;
3451
15.4k
                    max = newSize;
3452
15.4k
    }
3453
19.0M
    COPY_BUF(buffer, len, c);
3454
19.0M
    NEXTL(l);
3455
19.0M
    c = xmlCurrentChar(ctxt, &l);
3456
19.0M
      }
3457
2.45k
      buffer[len] = 0;
3458
2.45k
      return(buffer);
3459
2.62k
  }
3460
274k
    }
3461
63.3k
    if (len == 0)
3462
14.9k
        return(NULL);
3463
48.4k
    if (len > maxLength) {
3464
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3465
0
        return(NULL);
3466
0
    }
3467
48.4k
    ret = xmlStrndup(buf, len);
3468
48.4k
    if (ret == NULL)
3469
28
        xmlErrMemory(ctxt);
3470
48.4k
    return(ret);
3471
48.4k
}
3472
3473
/**
3474
 * Validate an entity value and expand parameter entities.
3475
 *
3476
 * @param ctxt  parser context
3477
 * @param buf  string buffer
3478
 * @param str  entity value
3479
 * @param length  size of entity value
3480
 * @param depth  nesting depth
3481
 */
3482
static void
3483
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3484
106k
                          const xmlChar *str, int length, int depth) {
3485
106k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3486
106k
    const xmlChar *end, *chunk;
3487
106k
    int c, l;
3488
3489
106k
    if (str == NULL)
3490
23.3k
        return;
3491
3492
82.8k
    depth += 1;
3493
82.8k
    if (depth > maxDepth) {
3494
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3495
0
                       "Maximum entity nesting depth exceeded");
3496
0
  return;
3497
0
    }
3498
3499
82.8k
    end = str + length;
3500
82.8k
    chunk = str;
3501
3502
225M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3503
225M
        c = *str;
3504
3505
225M
        if (c >= 0x80) {
3506
126M
            l = xmlUTF8MultibyteLen(ctxt, str,
3507
126M
                    "invalid character in entity value\n");
3508
126M
            if (l == 0) {
3509
86.0M
                if (chunk < str)
3510
92.3k
                    xmlSBufAddString(buf, chunk, str - chunk);
3511
86.0M
                xmlSBufAddReplChar(buf);
3512
86.0M
                str += 1;
3513
86.0M
                chunk = str;
3514
86.0M
            } else {
3515
40.5M
                str += l;
3516
40.5M
            }
3517
126M
        } else if (c == '&') {
3518
63.2k
            if (str[1] == '#') {
3519
18.4k
                if (chunk < str)
3520
7.96k
                    xmlSBufAddString(buf, chunk, str - chunk);
3521
3522
18.4k
                c = xmlParseStringCharRef(ctxt, &str);
3523
18.4k
                if (c == 0)
3524
3.72k
                    return;
3525
3526
14.6k
                xmlSBufAddChar(buf, c);
3527
3528
14.6k
                chunk = str;
3529
44.8k
            } else {
3530
44.8k
                xmlChar *name;
3531
3532
                /*
3533
                 * General entity references are checked for
3534
                 * syntactic validity.
3535
                 */
3536
44.8k
                str++;
3537
44.8k
                name = xmlParseStringName(ctxt, &str);
3538
3539
44.8k
                if ((name == NULL) || (*str++ != ';')) {
3540
3.74k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3541
3.74k
                            "EntityValue: '&' forbidden except for entities "
3542
3.74k
                            "references\n");
3543
3.74k
                    xmlFree(name);
3544
3.74k
                    return;
3545
3.74k
                }
3546
3547
41.1k
                xmlFree(name);
3548
41.1k
            }
3549
98.9M
        } else if (c == '%') {
3550
48.4k
            xmlEntityPtr ent;
3551
3552
48.4k
            if (chunk < str)
3553
16.3k
                xmlSBufAddString(buf, chunk, str - chunk);
3554
3555
48.4k
            ent = xmlParseStringPEReference(ctxt, &str);
3556
48.4k
            if (ent == NULL)
3557
6.77k
                return;
3558
3559
41.6k
            if (!PARSER_EXTERNAL(ctxt)) {
3560
210
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3561
210
                return;
3562
210
            }
3563
3564
41.4k
            if (ent->content == NULL) {
3565
                /*
3566
                 * Note: external parsed entities will not be loaded,
3567
                 * it is not required for a non-validating parser to
3568
                 * complete external PEReferences coming from the
3569
                 * internal subset
3570
                 */
3571
23.7k
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3572
23.7k
                    ((ctxt->replaceEntities) ||
3573
23.5k
                     (ctxt->validate))) {
3574
21.5k
                    xmlLoadEntityContent(ctxt, ent);
3575
21.5k
                } else {
3576
2.23k
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3577
2.23k
                                  "not validating will not read content for "
3578
2.23k
                                  "PE entity %s\n", ent->name, NULL);
3579
2.23k
                }
3580
23.7k
            }
3581
3582
            /*
3583
             * TODO: Skip if ent->content is still NULL.
3584
             */
3585
3586
41.4k
            if (xmlParserEntityCheck(ctxt, ent->length))
3587
45
                return;
3588
3589
41.4k
            if (ent->flags & XML_ENT_EXPANDING) {
3590
205
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3591
205
                xmlHaltParser(ctxt);
3592
205
                return;
3593
205
            }
3594
3595
41.2k
            ent->flags |= XML_ENT_EXPANDING;
3596
41.2k
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3597
41.2k
                                      depth);
3598
41.2k
            ent->flags &= ~XML_ENT_EXPANDING;
3599
3600
41.2k
            chunk = str;
3601
98.9M
        } else {
3602
            /* Normal ASCII char */
3603
98.9M
            if (!IS_BYTE_CHAR(c)) {
3604
222k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3605
222k
                        "invalid character in entity value\n");
3606
222k
                if (chunk < str)
3607
14.1k
                    xmlSBufAddString(buf, chunk, str - chunk);
3608
222k
                xmlSBufAddReplChar(buf);
3609
222k
                str += 1;
3610
222k
                chunk = str;
3611
98.6M
            } else {
3612
98.6M
                str += 1;
3613
98.6M
            }
3614
98.9M
        }
3615
225M
    }
3616
3617
68.1k
    if (chunk < str)
3618
49.1k
        xmlSBufAddString(buf, chunk, str - chunk);
3619
68.1k
}
3620
3621
/**
3622
 * Parse a value for ENTITY declarations
3623
 *
3624
 * @deprecated Internal function, don't use.
3625
 *
3626
 *     [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3627
 *                         "'" ([^%&'] | PEReference | Reference)* "'"
3628
 *
3629
 * @param ctxt  an XML parser context
3630
 * @param orig  if non-NULL store a copy of the original entity value
3631
 * @returns the EntityValue parsed with reference substituted or NULL
3632
 */
3633
xmlChar *
3634
65.3k
xmlParseEntityValue(xmlParserCtxt *ctxt, xmlChar **orig) {
3635
65.3k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3636
27.0k
                         XML_MAX_HUGE_LENGTH :
3637
65.3k
                         XML_MAX_TEXT_LENGTH;
3638
65.3k
    xmlSBuf buf;
3639
65.3k
    const xmlChar *start;
3640
65.3k
    int quote, length;
3641
3642
65.3k
    xmlSBufInit(&buf, maxLength);
3643
3644
65.3k
    GROW;
3645
3646
65.3k
    quote = CUR;
3647
65.3k
    if ((quote != '"') && (quote != '\'')) {
3648
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3649
0
  return(NULL);
3650
0
    }
3651
65.3k
    CUR_PTR++;
3652
3653
65.3k
    length = 0;
3654
3655
    /*
3656
     * Copy raw content of the entity into a buffer
3657
     */
3658
101M
    while (1) {
3659
101M
        int c;
3660
3661
101M
        if (PARSER_STOPPED(ctxt))
3662
17
            goto error;
3663
3664
101M
        if (CUR_PTR >= ctxt->input->end) {
3665
295
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3666
295
            goto error;
3667
295
        }
3668
3669
101M
        c = CUR;
3670
3671
101M
        if (c == 0) {
3672
41
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3673
41
                    "invalid character in entity value\n");
3674
41
            goto error;
3675
41
        }
3676
101M
        if (c == quote)
3677
64.9k
            break;
3678
101M
        NEXTL(1);
3679
101M
        length += 1;
3680
3681
        /*
3682
         * TODO: Check growth threshold
3683
         */
3684
101M
        if (ctxt->input->end - CUR_PTR < 10)
3685
53.5k
            GROW;
3686
101M
    }
3687
3688
64.9k
    start = CUR_PTR - length;
3689
3690
64.9k
    if (orig != NULL) {
3691
64.9k
        *orig = xmlStrndup(start, length);
3692
64.9k
        if (*orig == NULL)
3693
82
            xmlErrMemory(ctxt);
3694
64.9k
    }
3695
3696
64.9k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3697
3698
64.9k
    NEXTL(1);
3699
3700
64.9k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3701
3702
353
error:
3703
353
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3704
353
    return(NULL);
3705
65.3k
}
3706
3707
/**
3708
 * Check an entity reference in an attribute value for validity
3709
 * without expanding it.
3710
 *
3711
 * @param ctxt  parser context
3712
 * @param pent  entity
3713
 * @param depth  nesting depth
3714
 */
3715
static void
3716
9.07k
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3717
9.07k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3718
9.07k
    const xmlChar *str;
3719
9.07k
    unsigned long expandedSize = pent->length;
3720
9.07k
    int c, flags;
3721
3722
9.07k
    depth += 1;
3723
9.07k
    if (depth > maxDepth) {
3724
53
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3725
53
                       "Maximum entity nesting depth exceeded");
3726
53
  return;
3727
53
    }
3728
3729
9.02k
    if (pent->flags & XML_ENT_EXPANDING) {
3730
34
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3731
34
        xmlHaltParser(ctxt);
3732
34
        return;
3733
34
    }
3734
3735
    /*
3736
     * If we're parsing a default attribute value in DTD content,
3737
     * the entity might reference other entities which weren't
3738
     * defined yet, so the check isn't reliable.
3739
     */
3740
8.98k
    if (ctxt->inSubset == 0)
3741
8.82k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3742
168
    else
3743
168
        flags = XML_ENT_VALIDATED;
3744
3745
8.98k
    str = pent->content;
3746
8.98k
    if (str == NULL)
3747
70
        goto done;
3748
3749
    /*
3750
     * Note that entity values are already validated. We only check
3751
     * for illegal less-than signs and compute the expanded size
3752
     * of the entity. No special handling for multi-byte characters
3753
     * is needed.
3754
     */
3755
56.8M
    while (!PARSER_STOPPED(ctxt)) {
3756
56.8M
        c = *str;
3757
3758
56.8M
  if (c != '&') {
3759
56.7M
            if (c == 0)
3760
8.63k
                break;
3761
3762
56.7M
            if (c == '<')
3763
1.56k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3764
1.56k
                        "'<' in entity '%s' is not allowed in attributes "
3765
1.56k
                        "values\n", pent->name);
3766
3767
56.7M
            str += 1;
3768
56.7M
        } else if (str[1] == '#') {
3769
953
            int val;
3770
3771
953
      val = xmlParseStringCharRef(ctxt, &str);
3772
953
      if (val == 0) {
3773
27
                pent->content[0] = 0;
3774
27
                break;
3775
27
            }
3776
17.5k
  } else {
3777
17.5k
            xmlChar *name;
3778
17.5k
            xmlEntityPtr ent;
3779
3780
17.5k
      name = xmlParseStringEntityRef(ctxt, &str);
3781
17.5k
      if (name == NULL) {
3782
40
                pent->content[0] = 0;
3783
40
                break;
3784
40
            }
3785
3786
17.4k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3787
17.4k
            xmlFree(name);
3788
3789
17.4k
            if ((ent != NULL) &&
3790
17.4k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
3791
15.4k
                if ((ent->flags & flags) != flags) {
3792
7.32k
                    pent->flags |= XML_ENT_EXPANDING;
3793
7.32k
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
3794
7.32k
                    pent->flags &= ~XML_ENT_EXPANDING;
3795
7.32k
                }
3796
3797
15.4k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
3798
15.4k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
3799
15.4k
            }
3800
17.4k
        }
3801
56.8M
    }
3802
3803
8.98k
done:
3804
8.98k
    if (ctxt->inSubset == 0)
3805
8.82k
        pent->expandedSize = expandedSize;
3806
3807
8.98k
    pent->flags |= flags;
3808
8.98k
}
3809
3810
/**
3811
 * Expand general entity references in an entity or attribute value.
3812
 * Perform attribute value normalization.
3813
 *
3814
 * @param ctxt  parser context
3815
 * @param buf  string buffer
3816
 * @param str  entity or attribute value
3817
 * @param pent  entity for entity value, NULL for attribute values
3818
 * @param normalize  whether to collapse whitespace
3819
 * @param inSpace  whitespace state
3820
 * @param depth  nesting depth
3821
 * @param check  whether to check for amplification
3822
 * @returns  whether there was a normalization change
3823
 */
3824
static int
3825
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3826
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
3827
3.57M
                          int *inSpace, int depth, int check) {
3828
3.57M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3829
3.57M
    int c, chunkSize;
3830
3.57M
    int normChange = 0;
3831
3832
3.57M
    if (str == NULL)
3833
201
        return(0);
3834
3835
3.56M
    depth += 1;
3836
3.56M
    if (depth > maxDepth) {
3837
625k
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3838
625k
                       "Maximum entity nesting depth exceeded");
3839
625k
  return(0);
3840
625k
    }
3841
3842
2.94M
    if (pent != NULL) {
3843
2.91M
        if (pent->flags & XML_ENT_EXPANDING) {
3844
10
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3845
10
            xmlHaltParser(ctxt);
3846
10
            return(0);
3847
10
        }
3848
3849
2.91M
        if (check) {
3850
2.91M
            if (xmlParserEntityCheck(ctxt, pent->length))
3851
209
                return(0);
3852
2.91M
        }
3853
2.91M
    }
3854
3855
2.94M
    chunkSize = 0;
3856
3857
    /*
3858
     * Note that entity values are already validated. No special
3859
     * handling for multi-byte characters is needed.
3860
     */
3861
797M
    while (!PARSER_STOPPED(ctxt)) {
3862
797M
        c = *str;
3863
3864
797M
  if (c != '&') {
3865
791M
            if (c == 0)
3866
2.90M
                break;
3867
3868
            /*
3869
             * If this function is called without an entity, it is used to
3870
             * expand entities in an attribute content where less-than was
3871
             * already unscaped and is allowed.
3872
             */
3873
788M
            if ((pent != NULL) && (c == '<')) {
3874
35.2k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3875
35.2k
                        "'<' in entity '%s' is not allowed in attributes "
3876
35.2k
                        "values\n", pent->name);
3877
35.2k
                break;
3878
35.2k
            }
3879
3880
788M
            if (c <= 0x20) {
3881
42.1M
                if ((normalize) && (*inSpace)) {
3882
                    /* Skip char */
3883
121k
                    if (chunkSize > 0) {
3884
44.5k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3885
44.5k
                        chunkSize = 0;
3886
44.5k
                    }
3887
121k
                    normChange = 1;
3888
41.9M
                } else if (c < 0x20) {
3889
36.0M
                    if (chunkSize > 0) {
3890
671k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3891
671k
                        chunkSize = 0;
3892
671k
                    }
3893
3894
36.0M
                    xmlSBufAddCString(buf, " ", 1);
3895
36.0M
                } else {
3896
5.94M
                    chunkSize += 1;
3897
5.94M
                }
3898
3899
42.1M
                *inSpace = 1;
3900
746M
            } else {
3901
746M
                chunkSize += 1;
3902
746M
                *inSpace = 0;
3903
746M
            }
3904
3905
788M
            str += 1;
3906
788M
        } else if (str[1] == '#') {
3907
631k
            int val;
3908
3909
631k
            if (chunkSize > 0) {
3910
629k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3911
629k
                chunkSize = 0;
3912
629k
            }
3913
3914
631k
      val = xmlParseStringCharRef(ctxt, &str);
3915
631k
      if (val == 0) {
3916
24
                if (pent != NULL)
3917
24
                    pent->content[0] = 0;
3918
24
                break;
3919
24
            }
3920
3921
631k
            if (val == ' ') {
3922
11.2k
                if ((normalize) && (*inSpace))
3923
216
                    normChange = 1;
3924
11.0k
                else
3925
11.0k
                    xmlSBufAddCString(buf, " ", 1);
3926
11.2k
                *inSpace = 1;
3927
620k
            } else {
3928
620k
                xmlSBufAddChar(buf, val);
3929
620k
                *inSpace = 0;
3930
620k
            }
3931
5.59M
  } else {
3932
5.59M
            xmlChar *name;
3933
5.59M
            xmlEntityPtr ent;
3934
3935
5.59M
            if (chunkSize > 0) {
3936
1.53M
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3937
1.53M
                chunkSize = 0;
3938
1.53M
            }
3939
3940
5.59M
      name = xmlParseStringEntityRef(ctxt, &str);
3941
5.59M
            if (name == NULL) {
3942
101
                if (pent != NULL)
3943
91
                    pent->content[0] = 0;
3944
101
                break;
3945
101
            }
3946
3947
5.59M
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3948
5.59M
            xmlFree(name);
3949
3950
5.59M
      if ((ent != NULL) &&
3951
5.59M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3952
1.24M
    if (ent->content == NULL) {
3953
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
3954
0
          "predefined entity has no content\n");
3955
0
                    break;
3956
0
                }
3957
3958
1.24M
                xmlSBufAddString(buf, ent->content, ent->length);
3959
3960
1.24M
                *inSpace = 0;
3961
4.35M
      } else if ((ent != NULL) && (ent->content != NULL)) {
3962
3.31M
                if (pent != NULL)
3963
3.31M
                    pent->flags |= XML_ENT_EXPANDING;
3964
3.31M
    normChange |= xmlExpandEntityInAttValue(ctxt, buf,
3965
3.31M
                        ent->content, ent, normalize, inSpace, depth, check);
3966
3.31M
                if (pent != NULL)
3967
3.31M
                    pent->flags &= ~XML_ENT_EXPANDING;
3968
3.31M
      }
3969
5.59M
        }
3970
797M
    }
3971
3972
2.94M
    if (chunkSize > 0)
3973
789k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3974
3975
2.94M
    return(normChange);
3976
2.94M
}
3977
3978
/**
3979
 * Expand general entity references in an entity or attribute value.
3980
 * Perform attribute value normalization.
3981
 *
3982
 * @param ctxt  parser context
3983
 * @param str  entity or attribute value
3984
 * @param normalize  whether to collapse whitespace
3985
 * @returns the expanded attribtue value.
3986
 */
3987
xmlChar *
3988
xmlExpandEntitiesInAttValue(xmlParserCtxt *ctxt, const xmlChar *str,
3989
27.5k
                            int normalize) {
3990
27.5k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3991
12.1k
                         XML_MAX_HUGE_LENGTH :
3992
27.5k
                         XML_MAX_TEXT_LENGTH;
3993
27.5k
    xmlSBuf buf;
3994
27.5k
    int inSpace = 1;
3995
3996
27.5k
    xmlSBufInit(&buf, maxLength);
3997
3998
27.5k
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
3999
27.5k
                              ctxt->inputNr, /* check */ 0);
4000
4001
27.5k
    if ((normalize) && (inSpace) && (buf.size > 0))
4002
0
        buf.size--;
4003
4004
27.5k
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4005
27.5k
}
4006
4007
/**
4008
 * Parse a value for an attribute.
4009
 *
4010
 * NOTE: if no normalization is needed, the routine will return pointers
4011
 * directly from the data buffer.
4012
 *
4013
 * 3.3.3 Attribute-Value Normalization:
4014
 *
4015
 * Before the value of an attribute is passed to the application or
4016
 * checked for validity, the XML processor must normalize it as follows:
4017
 *
4018
 * - a character reference is processed by appending the referenced
4019
 *   character to the attribute value
4020
 * - an entity reference is processed by recursively processing the
4021
 *   replacement text of the entity
4022
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4023
 *   appending \#x20 to the normalized value, except that only a single
4024
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4025
 *   parsed entity or the literal entity value of an internal parsed entity
4026
 * - other characters are processed by appending them to the normalized value
4027
 *
4028
 * If the declared value is not CDATA, then the XML processor must further
4029
 * process the normalized attribute value by discarding any leading and
4030
 * trailing space (\#x20) characters, and by replacing sequences of space
4031
 * (\#x20) characters by a single space (\#x20) character.
4032
 * All attributes for which no declaration has been read should be treated
4033
 * by a non-validating parser as if declared CDATA.
4034
 *
4035
 * @param ctxt  an XML parser context
4036
 * @param attlen  attribute len result
4037
 * @param outFlags  resulting XML_ATTVAL_* flags
4038
 * @param special  value from attsSpecial
4039
 * @param isNamespace  whether this is a namespace declaration
4040
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4041
 *     caller if it was copied, this can be detected by val[*len] == 0.
4042
 */
4043
static xmlChar *
4044
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *outFlags,
4045
473k
                         int special, int isNamespace) {
4046
473k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4047
313k
                         XML_MAX_HUGE_LENGTH :
4048
473k
                         XML_MAX_TEXT_LENGTH;
4049
473k
    xmlSBuf buf;
4050
473k
    xmlChar *ret;
4051
473k
    int c, l, quote, entFlags, chunkSize;
4052
473k
    int inSpace = 1;
4053
473k
    int replaceEntities;
4054
473k
    int normalize = (special & XML_SPECIAL_TYPE_MASK) != 0;
4055
473k
    int attvalFlags = 0;
4056
4057
    /* Always expand namespace URIs */
4058
473k
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4059
4060
473k
    xmlSBufInit(&buf, maxLength);
4061
4062
473k
    GROW;
4063
4064
473k
    quote = CUR;
4065
473k
    if ((quote != '"') && (quote != '\'')) {
4066
19.5k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4067
19.5k
  return(NULL);
4068
19.5k
    }
4069
453k
    NEXTL(1);
4070
4071
453k
    if (ctxt->inSubset == 0)
4072
414k
        entFlags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4073
38.9k
    else
4074
38.9k
        entFlags = XML_ENT_VALIDATED;
4075
4076
453k
    inSpace = 1;
4077
453k
    chunkSize = 0;
4078
4079
319M
    while (1) {
4080
319M
        if (PARSER_STOPPED(ctxt))
4081
666
            goto error;
4082
4083
319M
        if (CUR_PTR >= ctxt->input->end) {
4084
8.16k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4085
8.16k
                           "AttValue: ' expected\n");
4086
8.16k
            goto error;
4087
8.16k
        }
4088
4089
        /*
4090
         * TODO: Check growth threshold
4091
         */
4092
319M
        if (ctxt->input->end - CUR_PTR < 10)
4093
147k
            GROW;
4094
4095
319M
        c = CUR;
4096
4097
319M
        if (c >= 0x80) {
4098
264M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4099
264M
                    "invalid character in attribute value\n");
4100
264M
            if (l == 0) {
4101
166M
                if (chunkSize > 0) {
4102
197k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4103
197k
                    chunkSize = 0;
4104
197k
                }
4105
166M
                xmlSBufAddReplChar(&buf);
4106
166M
                NEXTL(1);
4107
166M
            } else {
4108
97.6M
                chunkSize += l;
4109
97.6M
                NEXTL(l);
4110
97.6M
            }
4111
4112
264M
            inSpace = 0;
4113
264M
        } else if (c != '&') {
4114
55.0M
            if (c > 0x20) {
4115
12.1M
                if (c == quote)
4116
440k
                    break;
4117
4118
11.6M
                if (c == '<')
4119
316k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4120
4121
11.6M
                chunkSize += 1;
4122
11.6M
                inSpace = 0;
4123
42.9M
            } else if (!IS_BYTE_CHAR(c)) {
4124
22.8M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4125
22.8M
                        "invalid character in attribute value\n");
4126
22.8M
                if (chunkSize > 0) {
4127
101k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4128
101k
                    chunkSize = 0;
4129
101k
                }
4130
22.8M
                xmlSBufAddReplChar(&buf);
4131
22.8M
                inSpace = 0;
4132
22.8M
            } else {
4133
                /* Whitespace */
4134
20.0M
                if ((normalize) && (inSpace)) {
4135
                    /* Skip char */
4136
1.25M
                    if (chunkSize > 0) {
4137
14.1k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4138
14.1k
                        chunkSize = 0;
4139
14.1k
                    }
4140
1.25M
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4141
18.8M
                } else if (c < 0x20) {
4142
                    /* Convert to space */
4143
18.1M
                    if (chunkSize > 0) {
4144
110k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4145
110k
                        chunkSize = 0;
4146
110k
                    }
4147
4148
18.1M
                    xmlSBufAddCString(&buf, " ", 1);
4149
18.1M
                } else {
4150
717k
                    chunkSize += 1;
4151
717k
                }
4152
4153
20.0M
                inSpace = 1;
4154
4155
20.0M
                if ((c == 0xD) && (NXT(1) == 0xA))
4156
5.99k
                    CUR_PTR++;
4157
20.0M
            }
4158
4159
54.5M
            NEXTL(1);
4160
54.5M
        } else if (NXT(1) == '#') {
4161
88.2k
            int val;
4162
4163
88.2k
            if (chunkSize > 0) {
4164
48.9k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4165
48.9k
                chunkSize = 0;
4166
48.9k
            }
4167
4168
88.2k
            val = xmlParseCharRef(ctxt);
4169
88.2k
            if (val == 0)
4170
3.89k
                goto error;
4171
4172
84.3k
            if ((val == '&') && (!replaceEntities)) {
4173
                /*
4174
                 * The reparsing will be done in xmlNodeParseContent()
4175
                 * called from SAX2.c
4176
                 */
4177
2.02k
                xmlSBufAddCString(&buf, "&#38;", 5);
4178
2.02k
                inSpace = 0;
4179
82.3k
            } else if (val == ' ') {
4180
21.9k
                if ((normalize) && (inSpace))
4181
2.57k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4182
19.3k
                else
4183
19.3k
                    xmlSBufAddCString(&buf, " ", 1);
4184
21.9k
                inSpace = 1;
4185
60.4k
            } else {
4186
60.4k
                xmlSBufAddChar(&buf, val);
4187
60.4k
                inSpace = 0;
4188
60.4k
            }
4189
431k
        } else {
4190
431k
            const xmlChar *name;
4191
431k
            xmlEntityPtr ent;
4192
4193
431k
            if (chunkSize > 0) {
4194
103k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4195
103k
                chunkSize = 0;
4196
103k
            }
4197
4198
431k
            name = xmlParseEntityRefInternal(ctxt);
4199
431k
            if (name == NULL) {
4200
                /*
4201
                 * Probably a literal '&' which wasn't escaped.
4202
                 * TODO: Handle gracefully in recovery mode.
4203
                 */
4204
82.3k
                continue;
4205
82.3k
            }
4206
4207
348k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4208
348k
            if (ent == NULL)
4209
32.5k
                continue;
4210
4211
316k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4212
24.5k
                if ((ent->content[0] == '&') && (!replaceEntities))
4213
4.52k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4214
20.0k
                else
4215
20.0k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4216
24.5k
                inSpace = 0;
4217
291k
            } else if (replaceEntities) {
4218
223k
                if (xmlExpandEntityInAttValue(ctxt, &buf,
4219
223k
                        ent->content, ent, normalize, &inSpace, ctxt->inputNr,
4220
223k
                        /* check */ 1) > 0)
4221
31.7k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4222
223k
            } else {
4223
67.8k
                if ((ent->flags & entFlags) != entFlags)
4224
1.75k
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4225
4226
67.8k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4227
242
                    ent->content[0] = 0;
4228
242
                    goto error;
4229
242
                }
4230
4231
                /*
4232
                 * Just output the reference
4233
                 */
4234
67.5k
                xmlSBufAddCString(&buf, "&", 1);
4235
67.5k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4236
67.5k
                xmlSBufAddCString(&buf, ";", 1);
4237
4238
67.5k
                inSpace = 0;
4239
67.5k
            }
4240
316k
  }
4241
319M
    }
4242
4243
440k
    if ((buf.mem == NULL) && (outFlags != NULL)) {
4244
261k
        ret = (xmlChar *) CUR_PTR - chunkSize;
4245
4246
261k
        if (attlen != NULL)
4247
261k
            *attlen = chunkSize;
4248
261k
        if ((normalize) && (inSpace) && (chunkSize > 0)) {
4249
495
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4250
495
            *attlen -= 1;
4251
495
        }
4252
4253
        /* Report potential error */
4254
261k
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4255
261k
    } else {
4256
179k
        if (chunkSize > 0)
4257
148k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4258
4259
179k
        if ((normalize) && (inSpace) && (buf.size > 0)) {
4260
2.01k
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4261
2.01k
            buf.size--;
4262
2.01k
        }
4263
4264
179k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4265
179k
        attvalFlags |= XML_ATTVAL_ALLOC;
4266
4267
179k
        if (ret != NULL) {
4268
179k
            if (attlen != NULL)
4269
60.8k
                *attlen = buf.size;
4270
179k
        }
4271
179k
    }
4272
4273
440k
    if (outFlags != NULL)
4274
321k
        *outFlags = attvalFlags;
4275
4276
440k
    NEXTL(1);
4277
4278
440k
    return(ret);
4279
4280
12.9k
error:
4281
12.9k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4282
12.9k
    return(NULL);
4283
453k
}
4284
4285
/**
4286
 * Parse a value for an attribute
4287
 * Note: the parser won't do substitution of entities here, this
4288
 * will be handled later in #xmlStringGetNodeList
4289
 *
4290
 * @deprecated Internal function, don't use.
4291
 *
4292
 *     [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4293
 *                       "'" ([^<&'] | Reference)* "'"
4294
 *
4295
 * 3.3.3 Attribute-Value Normalization:
4296
 *
4297
 * Before the value of an attribute is passed to the application or
4298
 * checked for validity, the XML processor must normalize it as follows:
4299
 *
4300
 * - a character reference is processed by appending the referenced
4301
 *   character to the attribute value
4302
 * - an entity reference is processed by recursively processing the
4303
 *   replacement text of the entity
4304
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4305
 *   appending \#x20 to the normalized value, except that only a single
4306
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4307
 *   parsed entity or the literal entity value of an internal parsed entity
4308
 * - other characters are processed by appending them to the normalized value
4309
 *
4310
 * If the declared value is not CDATA, then the XML processor must further
4311
 * process the normalized attribute value by discarding any leading and
4312
 * trailing space (\#x20) characters, and by replacing sequences of space
4313
 * (\#x20) characters by a single space (\#x20) character.
4314
 * All attributes for which no declaration has been read should be treated
4315
 * by a non-validating parser as if declared CDATA.
4316
 *
4317
 * @param ctxt  an XML parser context
4318
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4319
 * caller.
4320
 */
4321
xmlChar *
4322
136k
xmlParseAttValue(xmlParserCtxt *ctxt) {
4323
136k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4324
136k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4325
136k
}
4326
4327
/**
4328
 * Parse an XML Literal
4329
 *
4330
 * @deprecated Internal function, don't use.
4331
 *
4332
 *     [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4333
 *
4334
 * @param ctxt  an XML parser context
4335
 * @returns the SystemLiteral parsed or NULL
4336
 */
4337
4338
xmlChar *
4339
39.4k
xmlParseSystemLiteral(xmlParserCtxt *ctxt) {
4340
39.4k
    xmlChar *buf = NULL;
4341
39.4k
    int len = 0;
4342
39.4k
    int size = XML_PARSER_BUFFER_SIZE;
4343
39.4k
    int cur, l;
4344
39.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4345
10.9k
                    XML_MAX_TEXT_LENGTH :
4346
39.4k
                    XML_MAX_NAME_LENGTH;
4347
39.4k
    xmlChar stop;
4348
4349
39.4k
    if (RAW == '"') {
4350
20.3k
        NEXT;
4351
20.3k
  stop = '"';
4352
20.3k
    } else if (RAW == '\'') {
4353
17.9k
        NEXT;
4354
17.9k
  stop = '\'';
4355
17.9k
    } else {
4356
1.02k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4357
1.02k
  return(NULL);
4358
1.02k
    }
4359
4360
38.3k
    buf = xmlMalloc(size);
4361
38.3k
    if (buf == NULL) {
4362
52
        xmlErrMemory(ctxt);
4363
52
  return(NULL);
4364
52
    }
4365
38.3k
    cur = xmlCurrentCharRecover(ctxt, &l);
4366
12.3M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4367
12.3M
  if (len + 5 >= size) {
4368
4.23k
      xmlChar *tmp;
4369
4.23k
            int newSize;
4370
4371
4.23k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4372
4.23k
            if (newSize < 0) {
4373
6
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4374
6
                xmlFree(buf);
4375
6
                return(NULL);
4376
6
            }
4377
4.22k
      tmp = xmlRealloc(buf, newSize);
4378
4.22k
      if (tmp == NULL) {
4379
9
          xmlFree(buf);
4380
9
    xmlErrMemory(ctxt);
4381
9
    return(NULL);
4382
9
      }
4383
4.21k
      buf = tmp;
4384
4.21k
            size = newSize;
4385
4.21k
  }
4386
12.3M
  COPY_BUF(buf, len, cur);
4387
12.3M
  NEXTL(l);
4388
12.3M
  cur = xmlCurrentCharRecover(ctxt, &l);
4389
12.3M
    }
4390
38.3k
    buf[len] = 0;
4391
38.3k
    if (!IS_CHAR(cur)) {
4392
2.28k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4393
36.0k
    } else {
4394
36.0k
  NEXT;
4395
36.0k
    }
4396
38.3k
    return(buf);
4397
38.3k
}
4398
4399
/**
4400
 * Parse an XML public literal
4401
 *
4402
 * @deprecated Internal function, don't use.
4403
 *
4404
 *     [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4405
 *
4406
 * @param ctxt  an XML parser context
4407
 * @returns the PubidLiteral parsed or NULL.
4408
 */
4409
4410
xmlChar *
4411
21.2k
xmlParsePubidLiteral(xmlParserCtxt *ctxt) {
4412
21.2k
    xmlChar *buf = NULL;
4413
21.2k
    int len = 0;
4414
21.2k
    int size = XML_PARSER_BUFFER_SIZE;
4415
21.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4416
3.14k
                    XML_MAX_TEXT_LENGTH :
4417
21.2k
                    XML_MAX_NAME_LENGTH;
4418
21.2k
    xmlChar cur;
4419
21.2k
    xmlChar stop;
4420
4421
21.2k
    if (RAW == '"') {
4422
376
        NEXT;
4423
376
  stop = '"';
4424
20.8k
    } else if (RAW == '\'') {
4425
20.2k
        NEXT;
4426
20.2k
  stop = '\'';
4427
20.2k
    } else {
4428
688
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4429
688
  return(NULL);
4430
688
    }
4431
20.5k
    buf = xmlMalloc(size);
4432
20.5k
    if (buf == NULL) {
4433
20
  xmlErrMemory(ctxt);
4434
20
  return(NULL);
4435
20
    }
4436
20.5k
    cur = CUR;
4437
788k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4438
788k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4439
767k
  if (len + 1 >= size) {
4440
668
      xmlChar *tmp;
4441
668
            int newSize;
4442
4443
668
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4444
668
            if (newSize < 0) {
4445
5
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4446
5
                xmlFree(buf);
4447
5
                return(NULL);
4448
5
            }
4449
663
      tmp = xmlRealloc(buf, newSize);
4450
663
      if (tmp == NULL) {
4451
6
    xmlErrMemory(ctxt);
4452
6
    xmlFree(buf);
4453
6
    return(NULL);
4454
6
      }
4455
657
      buf = tmp;
4456
657
            size = newSize;
4457
657
  }
4458
767k
  buf[len++] = cur;
4459
767k
  NEXT;
4460
767k
  cur = CUR;
4461
767k
    }
4462
20.5k
    buf[len] = 0;
4463
20.5k
    if (cur != stop) {
4464
5.63k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4465
14.9k
    } else {
4466
14.9k
  NEXTL(1);
4467
14.9k
    }
4468
20.5k
    return(buf);
4469
20.5k
}
4470
4471
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4472
4473
/*
4474
 * used for the test in the inner loop of the char data testing
4475
 */
4476
static const unsigned char test_char_data[256] = {
4477
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4478
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4479
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4482
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4483
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4484
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4485
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4486
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4487
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4488
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4489
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4490
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4491
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4492
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4493
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4494
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4495
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4496
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4497
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4498
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4499
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4500
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4501
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4502
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4503
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4504
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4505
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4506
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4507
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4508
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4509
};
4510
4511
static void
4512
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4513
2.16M
              int isBlank) {
4514
2.16M
    int checkBlanks;
4515
4516
2.16M
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4517
531k
        return;
4518
4519
1.62M
    checkBlanks = (!ctxt->keepBlanks) ||
4520
1.62M
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4521
4522
    /*
4523
     * Calling areBlanks with only parts of a text node
4524
     * is fundamentally broken, making the NOBLANKS option
4525
     * essentially unusable.
4526
     */
4527
1.62M
    if ((checkBlanks) &&
4528
1.62M
        (areBlanks(ctxt, buf, size, isBlank))) {
4529
7.17k
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4530
7.17k
            (ctxt->keepBlanks))
4531
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4532
1.62M
    } else {
4533
1.62M
        if (ctxt->sax->characters != NULL)
4534
1.62M
            ctxt->sax->characters(ctxt->userData, buf, size);
4535
4536
        /*
4537
         * The old code used to update this value for "complex" data
4538
         * even if checkBlanks was false. This was probably a bug.
4539
         */
4540
1.62M
        if ((checkBlanks) && (*ctxt->space == -1))
4541
63.5k
            *ctxt->space = -2;
4542
1.62M
    }
4543
1.62M
}
4544
4545
/**
4546
 * Parse character data. Always makes progress if the first char isn't
4547
 * '<' or '&'.
4548
 *
4549
 * The right angle bracket (>) may be represented using the string "&gt;",
4550
 * and must, for compatibility, be escaped using "&gt;" or a character
4551
 * reference when it appears in the string "]]>" in content, when that
4552
 * string is not marking the end of a CDATA section.
4553
 *
4554
 *     [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4555
 * @param ctxt  an XML parser context
4556
 * @param partial  buffer may contain partial UTF-8 sequences
4557
 */
4558
static void
4559
3.31M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4560
3.31M
    const xmlChar *in;
4561
3.31M
    int line = ctxt->input->line;
4562
3.31M
    int col = ctxt->input->col;
4563
3.31M
    int ccol;
4564
3.31M
    int terminate = 0;
4565
4566
3.31M
    GROW;
4567
    /*
4568
     * Accelerated common case where input don't need to be
4569
     * modified before passing it to the handler.
4570
     */
4571
3.31M
    in = ctxt->input->cur;
4572
3.32M
    do {
4573
3.37M
get_more_space:
4574
3.48M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4575
3.37M
        if (*in == 0xA) {
4576
10.9M
            do {
4577
10.9M
                ctxt->input->line++; ctxt->input->col = 1;
4578
10.9M
                in++;
4579
10.9M
            } while (*in == 0xA);
4580
55.9k
            goto get_more_space;
4581
55.9k
        }
4582
3.32M
        if (*in == '<') {
4583
55.8k
            while (in > ctxt->input->cur) {
4584
27.9k
                const xmlChar *tmp = ctxt->input->cur;
4585
27.9k
                size_t nbchar = in - tmp;
4586
4587
27.9k
                if (nbchar > XML_MAX_ITEMS)
4588
0
                    nbchar = XML_MAX_ITEMS;
4589
27.9k
                ctxt->input->cur += nbchar;
4590
4591
27.9k
                xmlCharacters(ctxt, tmp, nbchar, 1);
4592
27.9k
            }
4593
27.9k
            return;
4594
27.9k
        }
4595
4596
3.77M
get_more:
4597
3.77M
        ccol = ctxt->input->col;
4598
20.4M
        while (test_char_data[*in]) {
4599
16.6M
            in++;
4600
16.6M
            ccol++;
4601
16.6M
        }
4602
3.77M
        ctxt->input->col = ccol;
4603
3.77M
        if (*in == 0xA) {
4604
4.97M
            do {
4605
4.97M
                ctxt->input->line++; ctxt->input->col = 1;
4606
4.97M
                in++;
4607
4.97M
            } while (*in == 0xA);
4608
83.8k
            goto get_more;
4609
83.8k
        }
4610
3.69M
        if (*in == ']') {
4611
398k
            size_t avail = ctxt->input->end - in;
4612
4613
398k
            if (partial && avail < 2) {
4614
3
                terminate = 1;
4615
3
                goto invoke_callback;
4616
3
            }
4617
398k
            if (in[1] == ']') {
4618
358k
                if (partial && avail < 3) {
4619
237
                    terminate = 1;
4620
237
                    goto invoke_callback;
4621
237
                }
4622
358k
                if (in[2] == '>')
4623
5.14k
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4624
358k
            }
4625
4626
398k
            in++;
4627
398k
            ctxt->input->col++;
4628
398k
            goto get_more;
4629
398k
        }
4630
4631
3.29M
invoke_callback:
4632
4.23M
        while (in > ctxt->input->cur) {
4633
943k
            const xmlChar *tmp = ctxt->input->cur;
4634
943k
            size_t nbchar = in - tmp;
4635
4636
943k
            if (nbchar > XML_MAX_ITEMS)
4637
0
                nbchar = XML_MAX_ITEMS;
4638
943k
            ctxt->input->cur += nbchar;
4639
4640
943k
            xmlCharacters(ctxt, tmp, nbchar, 0);
4641
4642
943k
            line = ctxt->input->line;
4643
943k
            col = ctxt->input->col;
4644
943k
        }
4645
3.29M
        ctxt->input->cur = in;
4646
3.29M
        if (*in == 0xD) {
4647
11.5k
            in++;
4648
11.5k
            if (*in == 0xA) {
4649
6.48k
                ctxt->input->cur = in;
4650
6.48k
                in++;
4651
6.48k
                ctxt->input->line++; ctxt->input->col = 1;
4652
6.48k
                continue; /* while */
4653
6.48k
            }
4654
5.04k
            in--;
4655
5.04k
        }
4656
3.28M
        if (*in == '<') {
4657
330k
            return;
4658
330k
        }
4659
2.95M
        if (*in == '&') {
4660
290k
            return;
4661
290k
        }
4662
2.66M
        if (terminate) {
4663
240
            return;
4664
240
        }
4665
2.66M
        SHRINK;
4666
2.66M
        GROW;
4667
2.66M
        in = ctxt->input->cur;
4668
2.67M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4669
2.67M
             (*in == 0x09) || (*in == 0x0a));
4670
2.66M
    ctxt->input->line = line;
4671
2.66M
    ctxt->input->col = col;
4672
2.66M
    xmlParseCharDataComplex(ctxt, partial);
4673
2.66M
}
4674
4675
/**
4676
 * Always makes progress if the first char isn't '<' or '&'.
4677
 *
4678
 * parse a CharData section.this is the fallback function
4679
 * of #xmlParseCharData when the parsing requires handling
4680
 * of non-ASCII characters.
4681
 *
4682
 * @param ctxt  an XML parser context
4683
 * @param partial  whether the input can end with truncated UTF-8
4684
 */
4685
static void
4686
2.66M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4687
2.66M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4688
2.66M
    int nbchar = 0;
4689
2.66M
    int cur, l;
4690
4691
2.66M
    cur = xmlCurrentCharRecover(ctxt, &l);
4692
89.1M
    while ((cur != '<') && /* checked */
4693
89.1M
           (cur != '&') &&
4694
89.1M
     (IS_CHAR(cur))) {
4695
86.5M
        if (cur == ']') {
4696
1.29M
            size_t avail = ctxt->input->end - ctxt->input->cur;
4697
4698
1.29M
            if (partial && avail < 2)
4699
40
                break;
4700
1.29M
            if (NXT(1) == ']') {
4701
1.24M
                if (partial && avail < 3)
4702
1.01k
                    break;
4703
1.24M
                if (NXT(2) == '>')
4704
1.39k
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4705
1.24M
            }
4706
1.29M
        }
4707
4708
86.5M
  COPY_BUF(buf, nbchar, cur);
4709
  /* move current position before possible calling of ctxt->sax->characters */
4710
86.5M
  NEXTL(l);
4711
86.5M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4712
681k
      buf[nbchar] = 0;
4713
4714
681k
            xmlCharacters(ctxt, buf, nbchar, 0);
4715
681k
      nbchar = 0;
4716
681k
            SHRINK;
4717
681k
  }
4718
86.5M
  cur = xmlCurrentCharRecover(ctxt, &l);
4719
86.5M
    }
4720
2.66M
    if (nbchar != 0) {
4721
508k
        buf[nbchar] = 0;
4722
4723
508k
        xmlCharacters(ctxt, buf, nbchar, 0);
4724
508k
    }
4725
    /*
4726
     * cur == 0 can mean
4727
     *
4728
     * - End of buffer.
4729
     * - An actual 0 character.
4730
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4731
     */
4732
2.66M
    if (ctxt->input->cur < ctxt->input->end) {
4733
2.65M
        if ((cur == 0) && (CUR != 0)) {
4734
1.79k
            if (partial == 0) {
4735
1.52k
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4736
1.52k
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4737
1.52k
                NEXTL(1);
4738
1.52k
            }
4739
2.64M
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4740
            /* Generate the error and skip the offending character */
4741
2.33M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4742
2.33M
                              "PCDATA invalid Char value %d\n", cur);
4743
2.33M
            NEXTL(l);
4744
2.33M
        }
4745
2.65M
    }
4746
2.66M
}
4747
4748
/**
4749
 * @deprecated Internal function, don't use.
4750
 * @param ctxt  an XML parser context
4751
 * @param cdata  unused
4752
 */
4753
void
4754
0
xmlParseCharData(xmlParserCtxt *ctxt, ATTRIBUTE_UNUSED int cdata) {
4755
0
    xmlParseCharDataInternal(ctxt, 0);
4756
0
}
4757
4758
/**
4759
 * Parse an External ID or a Public ID
4760
 *
4761
 * @deprecated Internal function, don't use.
4762
 *
4763
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4764
 * `'PUBLIC' S PubidLiteral S SystemLiteral`
4765
 *
4766
 *     [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4767
 *                       | 'PUBLIC' S PubidLiteral S SystemLiteral
4768
 *
4769
 *     [83] PublicID ::= 'PUBLIC' S PubidLiteral
4770
 *
4771
 * @param ctxt  an XML parser context
4772
 * @param publicId  a xmlChar** receiving PubidLiteral
4773
 * @param strict  indicate whether we should restrict parsing to only
4774
 *          production [75], see NOTE below
4775
 * @returns the function returns SystemLiteral and in the second
4776
 *                case publicID receives PubidLiteral, is strict is off
4777
 *                it is possible to return NULL and have publicID set.
4778
 */
4779
4780
xmlChar *
4781
84.1k
xmlParseExternalID(xmlParserCtxt *ctxt, xmlChar **publicId, int strict) {
4782
84.1k
    xmlChar *URI = NULL;
4783
4784
84.1k
    *publicId = NULL;
4785
84.1k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4786
25.0k
        SKIP(6);
4787
25.0k
  if (SKIP_BLANKS == 0) {
4788
1.74k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4789
1.74k
                     "Space required after 'SYSTEM'\n");
4790
1.74k
  }
4791
25.0k
  URI = xmlParseSystemLiteral(ctxt);
4792
25.0k
  if (URI == NULL) {
4793
391
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4794
391
        }
4795
59.1k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4796
21.2k
        SKIP(6);
4797
21.2k
  if (SKIP_BLANKS == 0) {
4798
876
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4799
876
        "Space required after 'PUBLIC'\n");
4800
876
  }
4801
21.2k
  *publicId = xmlParsePubidLiteral(ctxt);
4802
21.2k
  if (*publicId == NULL) {
4803
719
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4804
719
  }
4805
21.2k
  if (strict) {
4806
      /*
4807
       * We don't handle [83] so "S SystemLiteral" is required.
4808
       */
4809
14.0k
      if (SKIP_BLANKS == 0) {
4810
1.12k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4811
1.12k
      "Space required after the Public Identifier\n");
4812
1.12k
      }
4813
14.0k
  } else {
4814
      /*
4815
       * We handle [83] so we return immediately, if
4816
       * "S SystemLiteral" is not detected. We skip blanks if no
4817
             * system literal was found, but this is harmless since we must
4818
             * be at the end of a NotationDecl.
4819
       */
4820
7.22k
      if (SKIP_BLANKS == 0) return(NULL);
4821
563
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4822
563
  }
4823
14.3k
  URI = xmlParseSystemLiteral(ctxt);
4824
14.3k
  if (URI == NULL) {
4825
703
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4826
703
        }
4827
14.3k
    }
4828
77.3k
    return(URI);
4829
84.1k
}
4830
4831
/**
4832
 * Skip an XML (SGML) comment <!-- .... -->
4833
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4834
 *  must not occur within comments. "
4835
 * This is the slow routine in case the accelerator for ascii didn't work
4836
 *
4837
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4838
 * @param ctxt  an XML parser context
4839
 * @param buf  the already parsed part of the buffer
4840
 * @param len  number of bytes in the buffer
4841
 * @param size  allocated size of the buffer
4842
 */
4843
static void
4844
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4845
88.7k
                       size_t len, size_t size) {
4846
88.7k
    int q, ql;
4847
88.7k
    int r, rl;
4848
88.7k
    int cur, l;
4849
88.7k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4850
21.6k
                    XML_MAX_HUGE_LENGTH :
4851
88.7k
                    XML_MAX_TEXT_LENGTH;
4852
4853
88.7k
    if (buf == NULL) {
4854
20.1k
        len = 0;
4855
20.1k
  size = XML_PARSER_BUFFER_SIZE;
4856
20.1k
  buf = xmlMalloc(size);
4857
20.1k
  if (buf == NULL) {
4858
72
      xmlErrMemory(ctxt);
4859
72
      return;
4860
72
  }
4861
20.1k
    }
4862
88.7k
    q = xmlCurrentCharRecover(ctxt, &ql);
4863
88.7k
    if (q == 0)
4864
7.13k
        goto not_terminated;
4865
81.5k
    if (!IS_CHAR(q)) {
4866
752
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4867
752
                          "xmlParseComment: invalid xmlChar value %d\n",
4868
752
                    q);
4869
752
  xmlFree (buf);
4870
752
  return;
4871
752
    }
4872
80.8k
    NEXTL(ql);
4873
80.8k
    r = xmlCurrentCharRecover(ctxt, &rl);
4874
80.8k
    if (r == 0)
4875
390
        goto not_terminated;
4876
80.4k
    if (!IS_CHAR(r)) {
4877
2.00k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4878
2.00k
                          "xmlParseComment: invalid xmlChar value %d\n",
4879
2.00k
                    r);
4880
2.00k
  xmlFree (buf);
4881
2.00k
  return;
4882
2.00k
    }
4883
78.4k
    NEXTL(rl);
4884
78.4k
    cur = xmlCurrentCharRecover(ctxt, &l);
4885
78.4k
    if (cur == 0)
4886
3.44k
        goto not_terminated;
4887
10.3M
    while (IS_CHAR(cur) && /* checked */
4888
10.3M
           ((cur != '>') ||
4889
10.3M
      (r != '-') || (q != '-'))) {
4890
10.2M
  if ((r == '-') && (q == '-')) {
4891
596k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4892
596k
  }
4893
10.2M
  if (len + 5 >= size) {
4894
15.3k
      xmlChar *tmp;
4895
15.3k
            int newSize;
4896
4897
15.3k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4898
15.3k
            if (newSize < 0) {
4899
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4900
0
                             "Comment too big found", NULL);
4901
0
                xmlFree (buf);
4902
0
                return;
4903
0
            }
4904
15.3k
      tmp = xmlRealloc(buf, newSize);
4905
15.3k
      if (tmp == NULL) {
4906
12
    xmlErrMemory(ctxt);
4907
12
    xmlFree(buf);
4908
12
    return;
4909
12
      }
4910
15.3k
      buf = tmp;
4911
15.3k
            size = newSize;
4912
15.3k
  }
4913
10.2M
  COPY_BUF(buf, len, q);
4914
4915
10.2M
  q = r;
4916
10.2M
  ql = rl;
4917
10.2M
  r = cur;
4918
10.2M
  rl = l;
4919
4920
10.2M
  NEXTL(l);
4921
10.2M
  cur = xmlCurrentCharRecover(ctxt, &l);
4922
4923
10.2M
    }
4924
74.9k
    buf[len] = 0;
4925
74.9k
    if (cur == 0) {
4926
19.7k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4927
19.7k
                       "Comment not terminated \n<!--%.50s\n", buf);
4928
55.1k
    } else if (!IS_CHAR(cur)) {
4929
2.18k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4930
2.18k
                          "xmlParseComment: invalid xmlChar value %d\n",
4931
2.18k
                    cur);
4932
53.0k
    } else {
4933
53.0k
        NEXT;
4934
53.0k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4935
53.0k
      (!ctxt->disableSAX))
4936
52.7k
      ctxt->sax->comment(ctxt->userData, buf);
4937
53.0k
    }
4938
74.9k
    xmlFree(buf);
4939
74.9k
    return;
4940
10.9k
not_terminated:
4941
10.9k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4942
10.9k
       "Comment not terminated\n", NULL);
4943
10.9k
    xmlFree(buf);
4944
10.9k
}
4945
4946
/**
4947
 * Parse an XML (SGML) comment. Always consumes '<!'.
4948
 *
4949
 * @deprecated Internal function, don't use.
4950
 *
4951
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4952
 *  must not occur within comments. "
4953
 *
4954
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4955
 * @param ctxt  an XML parser context
4956
 */
4957
void
4958
777k
xmlParseComment(xmlParserCtxt *ctxt) {
4959
777k
    xmlChar *buf = NULL;
4960
777k
    size_t size = XML_PARSER_BUFFER_SIZE;
4961
777k
    size_t len = 0;
4962
777k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4963
42.0k
                       XML_MAX_HUGE_LENGTH :
4964
777k
                       XML_MAX_TEXT_LENGTH;
4965
777k
    const xmlChar *in;
4966
777k
    size_t nbchar = 0;
4967
777k
    int ccol;
4968
4969
    /*
4970
     * Check that there is a comment right here.
4971
     */
4972
777k
    if ((RAW != '<') || (NXT(1) != '!'))
4973
0
        return;
4974
777k
    SKIP(2);
4975
777k
    if ((RAW != '-') || (NXT(1) != '-'))
4976
48
        return;
4977
777k
    SKIP(2);
4978
777k
    GROW;
4979
4980
    /*
4981
     * Accelerated common case where input don't need to be
4982
     * modified before passing it to the handler.
4983
     */
4984
777k
    in = ctxt->input->cur;
4985
777k
    do {
4986
777k
  if (*in == 0xA) {
4987
501k
      do {
4988
501k
    ctxt->input->line++; ctxt->input->col = 1;
4989
501k
    in++;
4990
501k
      } while (*in == 0xA);
4991
3.92k
  }
4992
2.03M
get_more:
4993
2.03M
        ccol = ctxt->input->col;
4994
7.13M
  while (((*in > '-') && (*in <= 0x7F)) ||
4995
7.13M
         ((*in >= 0x20) && (*in < '-')) ||
4996
7.13M
         (*in == 0x09)) {
4997
5.09M
        in++;
4998
5.09M
        ccol++;
4999
5.09M
  }
5000
2.03M
  ctxt->input->col = ccol;
5001
2.03M
  if (*in == 0xA) {
5002
241k
      do {
5003
241k
    ctxt->input->line++; ctxt->input->col = 1;
5004
241k
    in++;
5005
241k
      } while (*in == 0xA);
5006
14.2k
      goto get_more;
5007
14.2k
  }
5008
2.02M
  nbchar = in - ctxt->input->cur;
5009
  /*
5010
   * save current set of data
5011
   */
5012
2.02M
  if (nbchar > 0) {
5013
1.33M
            if (nbchar > maxLength - len) {
5014
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5015
0
                                  "Comment too big found", NULL);
5016
0
                xmlFree(buf);
5017
0
                return;
5018
0
            }
5019
1.33M
            if (buf == NULL) {
5020
149k
                if ((*in == '-') && (in[1] == '-'))
5021
81.2k
                    size = nbchar + 1;
5022
67.9k
                else
5023
67.9k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5024
149k
                buf = xmlMalloc(size);
5025
149k
                if (buf == NULL) {
5026
49
                    xmlErrMemory(ctxt);
5027
49
                    return;
5028
49
                }
5029
149k
                len = 0;
5030
1.18M
            } else if (len + nbchar + 1 >= size) {
5031
73.8k
                xmlChar *new_buf;
5032
73.8k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5033
73.8k
                new_buf = xmlRealloc(buf, size);
5034
73.8k
                if (new_buf == NULL) {
5035
8
                    xmlErrMemory(ctxt);
5036
8
                    xmlFree(buf);
5037
8
                    return;
5038
8
                }
5039
73.8k
                buf = new_buf;
5040
73.8k
            }
5041
1.33M
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5042
1.33M
            len += nbchar;
5043
1.33M
            buf[len] = 0;
5044
1.33M
  }
5045
2.02M
  ctxt->input->cur = in;
5046
2.02M
  if (*in == 0xA) {
5047
0
      in++;
5048
0
      ctxt->input->line++; ctxt->input->col = 1;
5049
0
  }
5050
2.02M
  if (*in == 0xD) {
5051
4.78k
      in++;
5052
4.78k
      if (*in == 0xA) {
5053
3.05k
    ctxt->input->cur = in;
5054
3.05k
    in++;
5055
3.05k
    ctxt->input->line++; ctxt->input->col = 1;
5056
3.05k
    goto get_more;
5057
3.05k
      }
5058
1.72k
      in--;
5059
1.72k
  }
5060
2.01M
  SHRINK;
5061
2.01M
  GROW;
5062
2.01M
  in = ctxt->input->cur;
5063
2.01M
  if (*in == '-') {
5064
1.92M
      if (in[1] == '-') {
5065
1.33M
          if (in[2] == '>') {
5066
688k
        SKIP(3);
5067
688k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5068
688k
            (!ctxt->disableSAX)) {
5069
324k
      if (buf != NULL)
5070
78.8k
          ctxt->sax->comment(ctxt->userData, buf);
5071
245k
      else
5072
245k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5073
324k
        }
5074
688k
        if (buf != NULL)
5075
80.5k
            xmlFree(buf);
5076
688k
        return;
5077
688k
    }
5078
645k
    if (buf != NULL) {
5079
620k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5080
620k
                          "Double hyphen within comment: "
5081
620k
                                      "<!--%.50s\n",
5082
620k
              buf);
5083
620k
    } else
5084
24.7k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
24.7k
                          "Double hyphen within comment\n", NULL);
5086
645k
    in++;
5087
645k
    ctxt->input->col++;
5088
645k
      }
5089
1.23M
      in++;
5090
1.23M
      ctxt->input->col++;
5091
1.23M
      goto get_more;
5092
1.92M
  }
5093
2.01M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5094
88.7k
    xmlParseCommentComplex(ctxt, buf, len, size);
5095
88.7k
}
5096
5097
5098
/**
5099
 * Parse the name of a PI
5100
 *
5101
 * @deprecated Internal function, don't use.
5102
 *
5103
 *     [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5104
 *
5105
 * @param ctxt  an XML parser context
5106
 * @returns the PITarget name or NULL
5107
 */
5108
5109
const xmlChar *
5110
58.5k
xmlParsePITarget(xmlParserCtxt *ctxt) {
5111
58.5k
    const xmlChar *name;
5112
5113
58.5k
    name = xmlParseName(ctxt);
5114
58.5k
    if ((name != NULL) &&
5115
58.5k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5116
58.5k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5117
58.5k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5118
4.24k
  int i;
5119
4.24k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5120
4.24k
      (name[2] == 'l') && (name[3] == 0)) {
5121
2.49k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5122
2.49k
     "XML declaration allowed only at the start of the document\n");
5123
2.49k
      return(name);
5124
2.49k
  } else if (name[3] == 0) {
5125
693
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5126
693
      return(name);
5127
693
  }
5128
2.93k
  for (i = 0;;i++) {
5129
2.93k
      if (xmlW3CPIs[i] == NULL) break;
5130
2.10k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5131
218
          return(name);
5132
2.10k
  }
5133
838
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5134
838
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5135
838
          NULL, NULL);
5136
838
    }
5137
55.1k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5138
1.17k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5139
1.17k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5140
1.17k
    }
5141
55.1k
    return(name);
5142
58.5k
}
5143
5144
#ifdef LIBXML_CATALOG_ENABLED
5145
/**
5146
 * Parse an XML Catalog Processing Instruction.
5147
 *
5148
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5149
 *
5150
 * Occurs only if allowed by the user and if happening in the Misc
5151
 * part of the document before any doctype information
5152
 * This will add the given catalog to the parsing context in order
5153
 * to be used if there is a resolution need further down in the document
5154
 *
5155
 * @param ctxt  an XML parser context
5156
 * @param catalog  the PI value string
5157
 */
5158
5159
static void
5160
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5161
0
    xmlChar *URL = NULL;
5162
0
    const xmlChar *tmp, *base;
5163
0
    xmlChar marker;
5164
5165
0
    tmp = catalog;
5166
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5167
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5168
0
  goto error;
5169
0
    tmp += 7;
5170
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5171
0
    if (*tmp != '=') {
5172
0
  return;
5173
0
    }
5174
0
    tmp++;
5175
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5176
0
    marker = *tmp;
5177
0
    if ((marker != '\'') && (marker != '"'))
5178
0
  goto error;
5179
0
    tmp++;
5180
0
    base = tmp;
5181
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5182
0
    if (*tmp == 0)
5183
0
  goto error;
5184
0
    URL = xmlStrndup(base, tmp - base);
5185
0
    tmp++;
5186
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5187
0
    if (*tmp != 0)
5188
0
  goto error;
5189
5190
0
    if (URL != NULL) {
5191
        /*
5192
         * Unfortunately, the catalog API doesn't report OOM errors.
5193
         * xmlGetLastError isn't very helpful since we don't know
5194
         * where the last error came from. We'd have to reset it
5195
         * before this call and restore it afterwards.
5196
         */
5197
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5198
0
  xmlFree(URL);
5199
0
    }
5200
0
    return;
5201
5202
0
error:
5203
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5204
0
            "Catalog PI syntax error: %s\n",
5205
0
      catalog, NULL);
5206
0
    if (URL != NULL)
5207
0
  xmlFree(URL);
5208
0
}
5209
#endif
5210
5211
/**
5212
 * Parse an XML Processing Instruction.
5213
 *
5214
 * @deprecated Internal function, don't use.
5215
 *
5216
 *     [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5217
 *
5218
 * The processing is transferred to SAX once parsed.
5219
 *
5220
 * @param ctxt  an XML parser context
5221
 */
5222
5223
void
5224
58.5k
xmlParsePI(xmlParserCtxt *ctxt) {
5225
58.5k
    xmlChar *buf = NULL;
5226
58.5k
    size_t len = 0;
5227
58.5k
    size_t size = XML_PARSER_BUFFER_SIZE;
5228
58.5k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5229
15.9k
                       XML_MAX_HUGE_LENGTH :
5230
58.5k
                       XML_MAX_TEXT_LENGTH;
5231
58.5k
    int cur, l;
5232
58.5k
    const xmlChar *target;
5233
5234
58.5k
    if ((RAW == '<') && (NXT(1) == '?')) {
5235
  /*
5236
   * this is a Processing Instruction.
5237
   */
5238
58.5k
  SKIP(2);
5239
5240
  /*
5241
   * Parse the target name and check for special support like
5242
   * namespace.
5243
   */
5244
58.5k
        target = xmlParsePITarget(ctxt);
5245
58.5k
  if (target != NULL) {
5246
46.9k
      if ((RAW == '?') && (NXT(1) == '>')) {
5247
27.7k
    SKIP(2);
5248
5249
    /*
5250
     * SAX: PI detected.
5251
     */
5252
27.7k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5253
27.7k
        (ctxt->sax->processingInstruction != NULL))
5254
27.2k
        ctxt->sax->processingInstruction(ctxt->userData,
5255
27.2k
                                         target, NULL);
5256
27.7k
    return;
5257
27.7k
      }
5258
19.1k
      buf = xmlMalloc(size);
5259
19.1k
      if (buf == NULL) {
5260
108
    xmlErrMemory(ctxt);
5261
108
    return;
5262
108
      }
5263
19.0k
      if (SKIP_BLANKS == 0) {
5264
12.0k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5265
12.0k
        "ParsePI: PI %s space expected\n", target);
5266
12.0k
      }
5267
19.0k
      cur = xmlCurrentCharRecover(ctxt, &l);
5268
5.17M
      while (IS_CHAR(cur) && /* checked */
5269
5.17M
       ((cur != '?') || (NXT(1) != '>'))) {
5270
5.15M
    if (len + 5 >= size) {
5271
2.47k
        xmlChar *tmp;
5272
2.47k
                    int newSize;
5273
5274
2.47k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5275
2.47k
                    if (newSize < 0) {
5276
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5277
0
                                          "PI %s too big found", target);
5278
0
                        xmlFree(buf);
5279
0
                        return;
5280
0
                    }
5281
2.47k
        tmp = xmlRealloc(buf, newSize);
5282
2.47k
        if (tmp == NULL) {
5283
9
      xmlErrMemory(ctxt);
5284
9
      xmlFree(buf);
5285
9
      return;
5286
9
        }
5287
2.46k
        buf = tmp;
5288
2.46k
                    size = newSize;
5289
2.46k
    }
5290
5.15M
    COPY_BUF(buf, len, cur);
5291
5.15M
    NEXTL(l);
5292
5.15M
    cur = xmlCurrentCharRecover(ctxt, &l);
5293
5.15M
      }
5294
19.0k
      buf[len] = 0;
5295
19.0k
      if (cur != '?') {
5296
8.06k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5297
8.06k
          "ParsePI: PI %s never end ...\n", target);
5298
11.0k
      } else {
5299
11.0k
    SKIP(2);
5300
5301
11.0k
#ifdef LIBXML_CATALOG_ENABLED
5302
11.0k
    if ((ctxt->inSubset == 0) &&
5303
11.0k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5304
491
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5305
5306
491
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5307
491
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5308
262
       (allow == XML_CATA_ALLOW_ALL)))
5309
0
      xmlParseCatalogPI(ctxt, buf);
5310
491
    }
5311
11.0k
#endif
5312
5313
    /*
5314
     * SAX: PI detected.
5315
     */
5316
11.0k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5317
11.0k
        (ctxt->sax->processingInstruction != NULL))
5318
10.0k
        ctxt->sax->processingInstruction(ctxt->userData,
5319
10.0k
                                         target, buf);
5320
11.0k
      }
5321
19.0k
      xmlFree(buf);
5322
19.0k
  } else {
5323
11.6k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5324
11.6k
  }
5325
58.5k
    }
5326
58.5k
}
5327
5328
/**
5329
 * Parse a notation declaration. Always consumes '<!'.
5330
 *
5331
 * @deprecated Internal function, don't use.
5332
 *
5333
 *     [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID)
5334
 *                           S? '>'
5335
 *
5336
 * Hence there is actually 3 choices:
5337
 *
5338
 *     'PUBLIC' S PubidLiteral
5339
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5340
 *     'SYSTEM' S SystemLiteral
5341
 *
5342
 * See the NOTE on #xmlParseExternalID.
5343
 *
5344
 * @param ctxt  an XML parser context
5345
 */
5346
5347
void
5348
11.1k
xmlParseNotationDecl(xmlParserCtxt *ctxt) {
5349
11.1k
    const xmlChar *name;
5350
11.1k
    xmlChar *Pubid;
5351
11.1k
    xmlChar *Systemid;
5352
5353
11.1k
    if ((CUR != '<') || (NXT(1) != '!'))
5354
0
        return;
5355
11.1k
    SKIP(2);
5356
5357
11.1k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5358
11.0k
#ifdef LIBXML_VALID_ENABLED
5359
11.0k
  int oldInputNr = ctxt->inputNr;
5360
11.0k
#endif
5361
5362
11.0k
  SKIP(8);
5363
11.0k
  if (SKIP_BLANKS_PE == 0) {
5364
341
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5365
341
         "Space required after '<!NOTATION'\n");
5366
341
      return;
5367
341
  }
5368
5369
10.7k
        name = xmlParseName(ctxt);
5370
10.7k
  if (name == NULL) {
5371
294
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5372
294
      return;
5373
294
  }
5374
10.4k
  if (xmlStrchr(name, ':') != NULL) {
5375
561
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5376
561
         "colons are forbidden from notation names '%s'\n",
5377
561
         name, NULL, NULL);
5378
561
  }
5379
10.4k
  if (SKIP_BLANKS_PE == 0) {
5380
1.05k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5381
1.05k
         "Space required after the NOTATION name'\n");
5382
1.05k
      return;
5383
1.05k
  }
5384
5385
  /*
5386
   * Parse the IDs.
5387
   */
5388
9.39k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5389
9.39k
  SKIP_BLANKS_PE;
5390
5391
9.39k
  if (RAW == '>') {
5392
2.90k
#ifdef LIBXML_VALID_ENABLED
5393
2.90k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5394
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5395
0
                           "Notation declaration doesn't start and stop"
5396
0
                                 " in the same entity\n",
5397
0
                                 NULL, NULL);
5398
0
      }
5399
2.90k
#endif
5400
2.90k
      NEXT;
5401
2.90k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5402
2.90k
    (ctxt->sax->notationDecl != NULL))
5403
2.33k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5404
6.48k
  } else {
5405
6.48k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5406
6.48k
  }
5407
9.39k
  if (Systemid != NULL) xmlFree(Systemid);
5408
9.39k
  if (Pubid != NULL) xmlFree(Pubid);
5409
9.39k
    }
5410
11.1k
}
5411
5412
/**
5413
 * Parse an entity declaration. Always consumes '<!'.
5414
 *
5415
 * @deprecated Internal function, don't use.
5416
 *
5417
 *     [70] EntityDecl ::= GEDecl | PEDecl
5418
 *
5419
 *     [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5420
 *
5421
 *     [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5422
 *
5423
 *     [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5424
 *
5425
 *     [74] PEDef ::= EntityValue | ExternalID
5426
 *
5427
 *     [76] NDataDecl ::= S 'NDATA' S Name
5428
 *
5429
 * [ VC: Notation Declared ]
5430
 * The Name must match the declared name of a notation.
5431
 *
5432
 * @param ctxt  an XML parser context
5433
 */
5434
5435
void
5436
97.9k
xmlParseEntityDecl(xmlParserCtxt *ctxt) {
5437
97.9k
    const xmlChar *name = NULL;
5438
97.9k
    xmlChar *value = NULL;
5439
97.9k
    xmlChar *URI = NULL, *literal = NULL;
5440
97.9k
    const xmlChar *ndata = NULL;
5441
97.9k
    int isParameter = 0;
5442
97.9k
    xmlChar *orig = NULL;
5443
5444
97.9k
    if ((CUR != '<') || (NXT(1) != '!'))
5445
0
        return;
5446
97.9k
    SKIP(2);
5447
5448
    /* GROW; done in the caller */
5449
97.9k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5450
97.8k
#ifdef LIBXML_VALID_ENABLED
5451
97.8k
  int oldInputNr = ctxt->inputNr;
5452
97.8k
#endif
5453
5454
97.8k
  SKIP(6);
5455
97.8k
  if (SKIP_BLANKS_PE == 0) {
5456
4.45k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5457
4.45k
         "Space required after '<!ENTITY'\n");
5458
4.45k
  }
5459
5460
97.8k
  if (RAW == '%') {
5461
49.4k
      NEXT;
5462
49.4k
      if (SKIP_BLANKS_PE == 0) {
5463
1.32k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5464
1.32k
             "Space required after '%%'\n");
5465
1.32k
      }
5466
49.4k
      isParameter = 1;
5467
49.4k
  }
5468
5469
97.8k
        name = xmlParseName(ctxt);
5470
97.8k
  if (name == NULL) {
5471
2.44k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5472
2.44k
                     "xmlParseEntityDecl: no name\n");
5473
2.44k
            return;
5474
2.44k
  }
5475
95.4k
  if (xmlStrchr(name, ':') != NULL) {
5476
428
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5477
428
         "colons are forbidden from entities names '%s'\n",
5478
428
         name, NULL, NULL);
5479
428
  }
5480
95.4k
  if (SKIP_BLANKS_PE == 0) {
5481
5.40k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5482
5.40k
         "Space required after the entity name\n");
5483
5.40k
  }
5484
5485
  /*
5486
   * handle the various case of definitions...
5487
   */
5488
95.4k
  if (isParameter) {
5489
49.0k
      if ((RAW == '"') || (RAW == '\'')) {
5490
28.5k
          value = xmlParseEntityValue(ctxt, &orig);
5491
28.5k
    if (value) {
5492
28.4k
        if ((ctxt->sax != NULL) &&
5493
28.4k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5494
23.1k
      ctxt->sax->entityDecl(ctxt->userData, name,
5495
23.1k
                        XML_INTERNAL_PARAMETER_ENTITY,
5496
23.1k
            NULL, NULL, value);
5497
28.4k
    }
5498
28.5k
      } else {
5499
20.5k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5500
20.5k
    if ((URI == NULL) && (literal == NULL)) {
5501
971
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5502
971
    }
5503
20.5k
    if (URI) {
5504
19.3k
                    if (xmlStrchr(URI, '#')) {
5505
301
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5506
19.0k
                    } else {
5507
19.0k
                        if ((ctxt->sax != NULL) &&
5508
19.0k
                            (!ctxt->disableSAX) &&
5509
19.0k
                            (ctxt->sax->entityDecl != NULL))
5510
10.1k
                            ctxt->sax->entityDecl(ctxt->userData, name,
5511
10.1k
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5512
10.1k
                                        literal, URI, NULL);
5513
19.0k
                    }
5514
19.3k
    }
5515
20.5k
      }
5516
49.0k
  } else {
5517
46.3k
      if ((RAW == '"') || (RAW == '\'')) {
5518
36.8k
          value = xmlParseEntityValue(ctxt, &orig);
5519
36.8k
    if ((ctxt->sax != NULL) &&
5520
36.8k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5521
29.4k
        ctxt->sax->entityDecl(ctxt->userData, name,
5522
29.4k
        XML_INTERNAL_GENERAL_ENTITY,
5523
29.4k
        NULL, NULL, value);
5524
    /*
5525
     * For expat compatibility in SAX mode.
5526
     */
5527
36.8k
    if ((ctxt->myDoc == NULL) ||
5528
36.8k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5529
5.29k
        if (ctxt->myDoc == NULL) {
5530
262
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5531
262
      if (ctxt->myDoc == NULL) {
5532
3
          xmlErrMemory(ctxt);
5533
3
          goto done;
5534
3
      }
5535
259
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5536
259
        }
5537
5.29k
        if (ctxt->myDoc->intSubset == NULL) {
5538
259
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5539
259
              BAD_CAST "fake", NULL, NULL);
5540
259
                        if (ctxt->myDoc->intSubset == NULL) {
5541
3
                            xmlErrMemory(ctxt);
5542
3
                            goto done;
5543
3
                        }
5544
259
                    }
5545
5546
5.28k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5547
5.28k
                    NULL, NULL, value);
5548
5.28k
    }
5549
36.8k
      } else {
5550
9.54k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5551
9.54k
    if ((URI == NULL) && (literal == NULL)) {
5552
1.36k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5553
1.36k
    }
5554
9.54k
    if (URI) {
5555
7.86k
                    if (xmlStrchr(URI, '#')) {
5556
78
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5557
78
                    }
5558
7.86k
    }
5559
9.54k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5560
871
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5561
871
           "Space required before 'NDATA'\n");
5562
871
    }
5563
9.54k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5564
1.28k
        SKIP(5);
5565
1.28k
        if (SKIP_BLANKS_PE == 0) {
5566
305
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5567
305
               "Space required after 'NDATA'\n");
5568
305
        }
5569
1.28k
        ndata = xmlParseName(ctxt);
5570
1.28k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5571
1.28k
            (ctxt->sax->unparsedEntityDecl != NULL))
5572
1.04k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5573
1.04k
            literal, URI, ndata);
5574
8.26k
    } else {
5575
8.26k
        if ((ctxt->sax != NULL) &&
5576
8.26k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5577
7.54k
      ctxt->sax->entityDecl(ctxt->userData, name,
5578
7.54k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5579
7.54k
            literal, URI, NULL);
5580
        /*
5581
         * For expat compatibility in SAX mode.
5582
         * assuming the entity replacement was asked for
5583
         */
5584
8.26k
        if ((ctxt->replaceEntities != 0) &&
5585
8.26k
      ((ctxt->myDoc == NULL) ||
5586
5.43k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5587
223
      if (ctxt->myDoc == NULL) {
5588
32
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5589
32
          if (ctxt->myDoc == NULL) {
5590
2
              xmlErrMemory(ctxt);
5591
2
        goto done;
5592
2
          }
5593
30
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5594
30
      }
5595
5596
221
      if (ctxt->myDoc->intSubset == NULL) {
5597
30
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5598
30
            BAD_CAST "fake", NULL, NULL);
5599
30
                            if (ctxt->myDoc->intSubset == NULL) {
5600
2
                                xmlErrMemory(ctxt);
5601
2
                                goto done;
5602
2
                            }
5603
30
                        }
5604
219
      xmlSAX2EntityDecl(ctxt, name,
5605
219
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5606
219
                  literal, URI, NULL);
5607
219
        }
5608
8.26k
    }
5609
9.54k
      }
5610
46.3k
  }
5611
95.4k
  SKIP_BLANKS_PE;
5612
95.4k
  if (RAW != '>') {
5613
2.43k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5614
2.43k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5615
2.43k
      xmlHaltParser(ctxt);
5616
93.0k
  } else {
5617
93.0k
#ifdef LIBXML_VALID_ENABLED
5618
93.0k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5619
9
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5620
9
                           "Entity declaration doesn't start and stop in"
5621
9
                                 " the same entity\n",
5622
9
                                 NULL, NULL);
5623
9
      }
5624
93.0k
#endif
5625
93.0k
      NEXT;
5626
93.0k
  }
5627
95.4k
  if (orig != NULL) {
5628
      /*
5629
       * Ugly mechanism to save the raw entity value.
5630
       */
5631
64.9k
      xmlEntityPtr cur = NULL;
5632
5633
64.9k
      if (isParameter) {
5634
28.4k
          if ((ctxt->sax != NULL) &&
5635
28.4k
        (ctxt->sax->getParameterEntity != NULL))
5636
28.4k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5637
36.4k
      } else {
5638
36.4k
          if ((ctxt->sax != NULL) &&
5639
36.4k
        (ctxt->sax->getEntity != NULL))
5640
36.4k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5641
36.4k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5642
1.88k
        cur = xmlSAX2GetEntity(ctxt, name);
5643
1.88k
    }
5644
36.4k
      }
5645
64.9k
            if ((cur != NULL) && (cur->orig == NULL)) {
5646
31.7k
    cur->orig = orig;
5647
31.7k
                orig = NULL;
5648
31.7k
      }
5649
64.9k
  }
5650
5651
95.4k
done:
5652
95.4k
  if (value != NULL) xmlFree(value);
5653
95.4k
  if (URI != NULL) xmlFree(URI);
5654
95.4k
  if (literal != NULL) xmlFree(literal);
5655
95.4k
        if (orig != NULL) xmlFree(orig);
5656
95.4k
    }
5657
97.9k
}
5658
5659
/**
5660
 * Parse an attribute default declaration
5661
 *
5662
 * @deprecated Internal function, don't use.
5663
 *
5664
 *     [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5665
 *
5666
 * [ VC: Required Attribute ]
5667
 * if the default declaration is the keyword \#REQUIRED, then the
5668
 * attribute must be specified for all elements of the type in the
5669
 * attribute-list declaration.
5670
 *
5671
 * [ VC: Attribute Default Legal ]
5672
 * The declared default value must meet the lexical constraints of
5673
 * the declared attribute type c.f. #xmlValidateAttributeDecl
5674
 *
5675
 * [ VC: Fixed Attribute Default ]
5676
 * if an attribute has a default value declared with the \#FIXED
5677
 * keyword, instances of that attribute must match the default value.
5678
 *
5679
 * [ WFC: No < in Attribute Values ]
5680
 * handled in #xmlParseAttValue
5681
 *
5682
 * @param ctxt  an XML parser context
5683
 * @param value  Receive a possible fixed default value for the attribute
5684
 * @returns XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5685
 *          or XML_ATTRIBUTE_FIXED.
5686
 */
5687
5688
int
5689
68.9k
xmlParseDefaultDecl(xmlParserCtxt *ctxt, xmlChar **value) {
5690
68.9k
    int val;
5691
68.9k
    xmlChar *ret;
5692
5693
68.9k
    *value = NULL;
5694
68.9k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5695
4.03k
  SKIP(9);
5696
4.03k
  return(XML_ATTRIBUTE_REQUIRED);
5697
4.03k
    }
5698
64.9k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5699
23.4k
  SKIP(8);
5700
23.4k
  return(XML_ATTRIBUTE_IMPLIED);
5701
23.4k
    }
5702
41.5k
    val = XML_ATTRIBUTE_NONE;
5703
41.5k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5704
7.89k
  SKIP(6);
5705
7.89k
  val = XML_ATTRIBUTE_FIXED;
5706
7.89k
  if (SKIP_BLANKS_PE == 0) {
5707
2.42k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5708
2.42k
         "Space required after '#FIXED'\n");
5709
2.42k
  }
5710
7.89k
    }
5711
41.5k
    ret = xmlParseAttValue(ctxt);
5712
41.5k
    if (ret == NULL) {
5713
7.66k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5714
7.66k
           "Attribute default value declaration error\n");
5715
7.66k
    } else
5716
33.8k
        *value = ret;
5717
41.5k
    return(val);
5718
64.9k
}
5719
5720
/**
5721
 * Parse an Notation attribute type.
5722
 *
5723
 * @deprecated Internal function, don't use.
5724
 *
5725
 * Note: the leading 'NOTATION' S part has already being parsed...
5726
 *
5727
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5728
 *
5729
 * [ VC: Notation Attributes ]
5730
 * Values of this type must match one of the notation names included
5731
 * in the declaration; all notation names in the declaration must be declared.
5732
 *
5733
 * @param ctxt  an XML parser context
5734
 * @returns the notation attribute tree built while parsing
5735
 */
5736
5737
xmlEnumeration *
5738
2.51k
xmlParseNotationType(xmlParserCtxt *ctxt) {
5739
2.51k
    const xmlChar *name;
5740
2.51k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5741
5742
2.51k
    if (RAW != '(') {
5743
219
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5744
219
  return(NULL);
5745
219
    }
5746
3.55k
    do {
5747
3.55k
        NEXT;
5748
3.55k
  SKIP_BLANKS_PE;
5749
3.55k
        name = xmlParseName(ctxt);
5750
3.55k
  if (name == NULL) {
5751
379
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5752
379
         "Name expected in NOTATION declaration\n");
5753
379
            xmlFreeEnumeration(ret);
5754
379
      return(NULL);
5755
379
  }
5756
3.17k
        tmp = NULL;
5757
3.17k
#ifdef LIBXML_VALID_ENABLED
5758
3.17k
        if (ctxt->validate) {
5759
2.17k
            tmp = ret;
5760
4.31k
            while (tmp != NULL) {
5761
2.57k
                if (xmlStrEqual(name, tmp->name)) {
5762
440
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5763
440
              "standalone: attribute notation value token %s duplicated\n",
5764
440
                                     name, NULL);
5765
440
                    if (!xmlDictOwns(ctxt->dict, name))
5766
0
                        xmlFree((xmlChar *) name);
5767
440
                    break;
5768
440
                }
5769
2.13k
                tmp = tmp->next;
5770
2.13k
            }
5771
2.17k
        }
5772
3.17k
#endif /* LIBXML_VALID_ENABLED */
5773
3.17k
  if (tmp == NULL) {
5774
2.73k
      cur = xmlCreateEnumeration(name);
5775
2.73k
      if (cur == NULL) {
5776
13
                xmlErrMemory(ctxt);
5777
13
                xmlFreeEnumeration(ret);
5778
13
                return(NULL);
5779
13
            }
5780
2.72k
      if (last == NULL) ret = last = cur;
5781
812
      else {
5782
812
    last->next = cur;
5783
812
    last = cur;
5784
812
      }
5785
2.72k
  }
5786
3.16k
  SKIP_BLANKS_PE;
5787
3.16k
    } while (RAW == '|');
5788
1.90k
    if (RAW != ')') {
5789
144
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5790
144
        xmlFreeEnumeration(ret);
5791
144
  return(NULL);
5792
144
    }
5793
1.75k
    NEXT;
5794
1.75k
    return(ret);
5795
1.90k
}
5796
5797
/**
5798
 * Parse an Enumeration attribute type.
5799
 *
5800
 * @deprecated Internal function, don't use.
5801
 *
5802
 *     [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5803
 *
5804
 * [ VC: Enumeration ]
5805
 * Values of this type must match one of the Nmtoken tokens in
5806
 * the declaration
5807
 *
5808
 * @param ctxt  an XML parser context
5809
 * @returns the enumeration attribute tree built while parsing
5810
 */
5811
5812
xmlEnumeration *
5813
24.3k
xmlParseEnumerationType(xmlParserCtxt *ctxt) {
5814
24.3k
    xmlChar *name;
5815
24.3k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5816
5817
24.3k
    if (RAW != '(') {
5818
1.15k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5819
1.15k
  return(NULL);
5820
1.15k
    }
5821
32.9k
    do {
5822
32.9k
        NEXT;
5823
32.9k
  SKIP_BLANKS_PE;
5824
32.9k
        name = xmlParseNmtoken(ctxt);
5825
32.9k
  if (name == NULL) {
5826
358
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5827
358
      return(ret);
5828
358
  }
5829
32.5k
        tmp = NULL;
5830
32.5k
#ifdef LIBXML_VALID_ENABLED
5831
32.5k
        if (ctxt->validate) {
5832
21.0k
            tmp = ret;
5833
36.8k
            while (tmp != NULL) {
5834
16.5k
                if (xmlStrEqual(name, tmp->name)) {
5835
693
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5836
693
              "standalone: attribute enumeration value token %s duplicated\n",
5837
693
                                     name, NULL);
5838
693
                    if (!xmlDictOwns(ctxt->dict, name))
5839
693
                        xmlFree(name);
5840
693
                    break;
5841
693
                }
5842
15.8k
                tmp = tmp->next;
5843
15.8k
            }
5844
21.0k
        }
5845
32.5k
#endif /* LIBXML_VALID_ENABLED */
5846
32.5k
  if (tmp == NULL) {
5847
31.8k
      cur = xmlCreateEnumeration(name);
5848
31.8k
      if (!xmlDictOwns(ctxt->dict, name))
5849
31.8k
    xmlFree(name);
5850
31.8k
      if (cur == NULL) {
5851
31
                xmlErrMemory(ctxt);
5852
31
                xmlFreeEnumeration(ret);
5853
31
                return(NULL);
5854
31
            }
5855
31.8k
      if (last == NULL) ret = last = cur;
5856
8.97k
      else {
5857
8.97k
    last->next = cur;
5858
8.97k
    last = cur;
5859
8.97k
      }
5860
31.8k
  }
5861
32.5k
  SKIP_BLANKS_PE;
5862
32.5k
    } while (RAW == '|');
5863
22.8k
    if (RAW != ')') {
5864
945
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5865
945
  return(ret);
5866
945
    }
5867
21.8k
    NEXT;
5868
21.8k
    return(ret);
5869
22.8k
}
5870
5871
/**
5872
 * Parse an Enumerated attribute type.
5873
 *
5874
 * @deprecated Internal function, don't use.
5875
 *
5876
 *     [57] EnumeratedType ::= NotationType | Enumeration
5877
 *
5878
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5879
 *
5880
 * @param ctxt  an XML parser context
5881
 * @param tree  the enumeration tree built while parsing
5882
 * @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5883
 */
5884
5885
int
5886
26.9k
xmlParseEnumeratedType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5887
26.9k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5888
2.59k
  SKIP(8);
5889
2.59k
  if (SKIP_BLANKS_PE == 0) {
5890
80
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5891
80
         "Space required after 'NOTATION'\n");
5892
80
      return(0);
5893
80
  }
5894
2.51k
  *tree = xmlParseNotationType(ctxt);
5895
2.51k
  if (*tree == NULL) return(0);
5896
1.75k
  return(XML_ATTRIBUTE_NOTATION);
5897
2.51k
    }
5898
24.3k
    *tree = xmlParseEnumerationType(ctxt);
5899
24.3k
    if (*tree == NULL) return(0);
5900
22.8k
    return(XML_ATTRIBUTE_ENUMERATION);
5901
24.3k
}
5902
5903
/**
5904
 * Parse the Attribute list def for an element
5905
 *
5906
 * @deprecated Internal function, don't use.
5907
 *
5908
 *     [54] AttType ::= StringType | TokenizedType | EnumeratedType
5909
 *
5910
 *     [55] StringType ::= 'CDATA'
5911
 *
5912
 *     [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5913
 *                            'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5914
 *
5915
 * Validity constraints for attribute values syntax are checked in
5916
 * #xmlValidateAttributeValue
5917
 *
5918
 * [ VC: ID ]
5919
 * Values of type ID must match the Name production. A name must not
5920
 * appear more than once in an XML document as a value of this type;
5921
 * i.e., ID values must uniquely identify the elements which bear them.
5922
 *
5923
 * [ VC: One ID per Element Type ]
5924
 * No element type may have more than one ID attribute specified.
5925
 *
5926
 * [ VC: ID Attribute Default ]
5927
 * An ID attribute must have a declared default of \#IMPLIED or \#REQUIRED.
5928
 *
5929
 * [ VC: IDREF ]
5930
 * Values of type IDREF must match the Name production, and values
5931
 * of type IDREFS must match Names; each IDREF Name must match the value
5932
 * of an ID attribute on some element in the XML document; i.e. IDREF
5933
 * values must match the value of some ID attribute.
5934
 *
5935
 * [ VC: Entity Name ]
5936
 * Values of type ENTITY must match the Name production, values
5937
 * of type ENTITIES must match Names; each Entity Name must match the
5938
 * name of an unparsed entity declared in the DTD.
5939
 *
5940
 * [ VC: Name Token ]
5941
 * Values of type NMTOKEN must match the Nmtoken production; values
5942
 * of type NMTOKENS must match Nmtokens.
5943
 *
5944
 * @param ctxt  an XML parser context
5945
 * @param tree  the enumeration tree built while parsing
5946
 * @returns the attribute type
5947
 */
5948
int
5949
73.3k
xmlParseAttributeType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5950
73.3k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5951
12.6k
  SKIP(5);
5952
12.6k
  return(XML_ATTRIBUTE_CDATA);
5953
60.7k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5954
2.48k
  SKIP(6);
5955
2.48k
  return(XML_ATTRIBUTE_IDREFS);
5956
58.2k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5957
1.28k
  SKIP(5);
5958
1.28k
  return(XML_ATTRIBUTE_IDREF);
5959
56.9k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5960
15.2k
        SKIP(2);
5961
15.2k
  return(XML_ATTRIBUTE_ID);
5962
41.7k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5963
2.19k
  SKIP(6);
5964
2.19k
  return(XML_ATTRIBUTE_ENTITY);
5965
39.5k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5966
2.69k
  SKIP(8);
5967
2.69k
  return(XML_ATTRIBUTE_ENTITIES);
5968
36.8k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5969
2.65k
  SKIP(8);
5970
2.65k
  return(XML_ATTRIBUTE_NMTOKENS);
5971
34.1k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5972
7.20k
  SKIP(7);
5973
7.20k
  return(XML_ATTRIBUTE_NMTOKEN);
5974
7.20k
     }
5975
26.9k
     return(xmlParseEnumeratedType(ctxt, tree));
5976
73.3k
}
5977
5978
/**
5979
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5980
 *
5981
 * @deprecated Internal function, don't use.
5982
 *
5983
 *     [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5984
 *
5985
 *     [53] AttDef ::= S Name S AttType S DefaultDecl
5986
 * @param ctxt  an XML parser context
5987
 */
5988
void
5989
62.7k
xmlParseAttributeListDecl(xmlParserCtxt *ctxt) {
5990
62.7k
    const xmlChar *elemName;
5991
62.7k
    const xmlChar *attrName;
5992
62.7k
    xmlEnumerationPtr tree;
5993
5994
62.7k
    if ((CUR != '<') || (NXT(1) != '!'))
5995
0
        return;
5996
62.7k
    SKIP(2);
5997
5998
62.7k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5999
62.5k
#ifdef LIBXML_VALID_ENABLED
6000
62.5k
  int oldInputNr = ctxt->inputNr;
6001
62.5k
#endif
6002
6003
62.5k
  SKIP(7);
6004
62.5k
  if (SKIP_BLANKS_PE == 0) {
6005
3.08k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6006
3.08k
                     "Space required after '<!ATTLIST'\n");
6007
3.08k
  }
6008
62.5k
        elemName = xmlParseName(ctxt);
6009
62.5k
  if (elemName == NULL) {
6010
1.05k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6011
1.05k
         "ATTLIST: no name for Element\n");
6012
1.05k
      return;
6013
1.05k
  }
6014
61.5k
  SKIP_BLANKS_PE;
6015
61.5k
  GROW;
6016
122k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6017
94.8k
      int type;
6018
94.8k
      int def;
6019
94.8k
      xmlChar *defaultValue = NULL;
6020
6021
94.8k
      GROW;
6022
94.8k
            tree = NULL;
6023
94.8k
      attrName = xmlParseName(ctxt);
6024
94.8k
      if (attrName == NULL) {
6025
16.7k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6026
16.7k
             "ATTLIST: no name for Attribute\n");
6027
16.7k
    break;
6028
16.7k
      }
6029
78.0k
      GROW;
6030
78.0k
      if (SKIP_BLANKS_PE == 0) {
6031
4.62k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6032
4.62k
            "Space required after the attribute name\n");
6033
4.62k
    break;
6034
4.62k
      }
6035
6036
73.3k
      type = xmlParseAttributeType(ctxt, &tree);
6037
73.3k
      if (type <= 0) {
6038
2.36k
          break;
6039
2.36k
      }
6040
6041
71.0k
      GROW;
6042
71.0k
      if (SKIP_BLANKS_PE == 0) {
6043
2.04k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6044
2.04k
             "Space required after the attribute type\n");
6045
2.04k
          if (tree != NULL)
6046
974
        xmlFreeEnumeration(tree);
6047
2.04k
    break;
6048
2.04k
      }
6049
6050
68.9k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6051
68.9k
      if (def <= 0) {
6052
0
                if (defaultValue != NULL)
6053
0
        xmlFree(defaultValue);
6054
0
          if (tree != NULL)
6055
0
        xmlFreeEnumeration(tree);
6056
0
          break;
6057
0
      }
6058
68.9k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6059
29.8k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6060
6061
68.9k
      GROW;
6062
68.9k
            if (RAW != '>') {
6063
50.6k
    if (SKIP_BLANKS_PE == 0) {
6064
8.00k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6065
8.00k
      "Space required after the attribute default value\n");
6066
8.00k
        if (defaultValue != NULL)
6067
373
      xmlFree(defaultValue);
6068
8.00k
        if (tree != NULL)
6069
2.47k
      xmlFreeEnumeration(tree);
6070
8.00k
        break;
6071
8.00k
    }
6072
50.6k
      }
6073
60.9k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6074
60.9k
    (ctxt->sax->attributeDecl != NULL))
6075
57.5k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6076
57.5k
                          type, def, defaultValue, tree);
6077
3.46k
      else if (tree != NULL)
6078
1.79k
    xmlFreeEnumeration(tree);
6079
6080
60.9k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6081
60.9k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6082
60.9k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6083
26.6k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6084
26.6k
      }
6085
60.9k
      if (ctxt->sax2) {
6086
47.8k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6087
47.8k
      }
6088
60.9k
      if (defaultValue != NULL)
6089
33.5k
          xmlFree(defaultValue);
6090
60.9k
      GROW;
6091
60.9k
  }
6092
61.5k
  if (RAW == '>') {
6093
29.5k
#ifdef LIBXML_VALID_ENABLED
6094
29.5k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6095
68
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6096
68
                                 "Attribute list declaration doesn't start and"
6097
68
                                 " stop in the same entity\n",
6098
68
                                 NULL, NULL);
6099
68
      }
6100
29.5k
#endif
6101
29.5k
      NEXT;
6102
29.5k
  }
6103
61.5k
    }
6104
62.7k
}
6105
6106
/**
6107
 * Handle PEs and check that we don't pop the entity that started
6108
 * a balanced group.
6109
 *
6110
 * @param ctxt  parser context
6111
 * @param openInputNr  input nr of the entity with opening '('
6112
 */
6113
static void
6114
2.75M
xmlSkipBlankCharsPEBalanced(xmlParserCtxt *ctxt, int openInputNr) {
6115
2.75M
    SKIP_BLANKS;
6116
2.75M
    GROW;
6117
6118
2.75M
    (void) openInputNr;
6119
6120
2.75M
    if (!PARSER_EXTERNAL(ctxt) && !PARSER_IN_PE(ctxt))
6121
2.54M
        return;
6122
6123
220k
    while (!PARSER_STOPPED(ctxt)) {
6124
220k
        if (ctxt->input->cur >= ctxt->input->end) {
6125
4.61k
#ifdef LIBXML_VALID_ENABLED
6126
4.61k
            if ((ctxt->validate) && (ctxt->inputNr <= openInputNr)) {
6127
1.69k
                xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6128
1.69k
                                 "Element content declaration doesn't start "
6129
1.69k
                                 "and stop in the same entity\n",
6130
1.69k
                                 NULL, NULL);
6131
1.69k
            }
6132
4.61k
#endif
6133
4.61k
            if (PARSER_IN_PE(ctxt))
6134
4.51k
                xmlPopPE(ctxt);
6135
102
            else
6136
102
                break;
6137
215k
        } else if (RAW == '%') {
6138
6.82k
            xmlParsePERefInternal(ctxt, 0);
6139
208k
        } else {
6140
208k
            break;
6141
208k
        }
6142
6143
11.3k
        SKIP_BLANKS;
6144
11.3k
        GROW;
6145
11.3k
    }
6146
209k
}
6147
6148
/**
6149
 * Parse the declaration for a Mixed Element content
6150
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6151
 *
6152
 * @deprecated Internal function, don't use.
6153
 *
6154
 *     [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6155
 *                    '(' S? '#PCDATA' S? ')'
6156
 *
6157
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6158
 *
6159
 * [ VC: No Duplicate Types ]
6160
 * The same name must not appear more than once in a single
6161
 * mixed-content declaration.
6162
 *
6163
 * @param ctxt  an XML parser context
6164
 * @param openInputNr  the input used for the current entity, needed for
6165
 * boundary checks
6166
 * @returns the list of the xmlElementContent describing the element choices
6167
 */
6168
xmlElementContent *
6169
14.6k
xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int openInputNr) {
6170
14.6k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6171
14.6k
    const xmlChar *elem = NULL;
6172
6173
14.6k
    GROW;
6174
14.6k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6175
14.6k
  SKIP(7);
6176
14.6k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6177
14.6k
  if (RAW == ')') {
6178
9.79k
#ifdef LIBXML_VALID_ENABLED
6179
9.79k
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6180
3
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6181
3
                                 "Element content declaration doesn't start "
6182
3
                                 "and stop in the same entity\n",
6183
3
                                 NULL, NULL);
6184
3
      }
6185
9.79k
#endif
6186
9.79k
      NEXT;
6187
9.79k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6188
9.79k
      if (ret == NULL)
6189
11
                goto mem_error;
6190
9.78k
      if (RAW == '*') {
6191
966
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6192
966
    NEXT;
6193
966
      }
6194
9.78k
      return(ret);
6195
9.79k
  }
6196
4.81k
  if ((RAW == '(') || (RAW == '|')) {
6197
4.58k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6198
4.58k
      if (ret == NULL)
6199
8
                goto mem_error;
6200
4.58k
  }
6201
32.3k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6202
27.8k
      NEXT;
6203
27.8k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6204
27.8k
            if (n == NULL)
6205
17
                goto mem_error;
6206
27.8k
      if (elem == NULL) {
6207
4.55k
    n->c1 = cur;
6208
4.55k
    if (cur != NULL)
6209
4.55k
        cur->parent = n;
6210
4.55k
    ret = cur = n;
6211
23.2k
      } else {
6212
23.2k
          cur->c2 = n;
6213
23.2k
    n->parent = cur;
6214
23.2k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6215
23.2k
                if (n->c1 == NULL)
6216
10
                    goto mem_error;
6217
23.2k
    n->c1->parent = n;
6218
23.2k
    cur = n;
6219
23.2k
      }
6220
27.7k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6221
27.7k
      elem = xmlParseName(ctxt);
6222
27.7k
      if (elem == NULL) {
6223
240
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6224
240
      "xmlParseElementMixedContentDecl : Name expected\n");
6225
240
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6226
240
    return(NULL);
6227
240
      }
6228
27.5k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6229
27.5k
  }
6230
4.54k
  if ((RAW == ')') && (NXT(1) == '*')) {
6231
3.93k
      if (elem != NULL) {
6232
3.93k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6233
3.93k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6234
3.93k
    if (cur->c2 == NULL)
6235
16
                    goto mem_error;
6236
3.91k
    cur->c2->parent = cur;
6237
3.91k
            }
6238
3.91k
            if (ret != NULL)
6239
3.91k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6240
3.91k
#ifdef LIBXML_VALID_ENABLED
6241
3.91k
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6242
3
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6243
3
                                 "Element content declaration doesn't start "
6244
3
                                 "and stop in the same entity\n",
6245
3
                                 NULL, NULL);
6246
3
      }
6247
3.91k
#endif
6248
3.91k
      SKIP(2);
6249
3.91k
  } else {
6250
609
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6251
609
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6252
609
      return(NULL);
6253
609
  }
6254
6255
4.54k
    } else {
6256
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6257
0
    }
6258
3.91k
    return(ret);
6259
6260
62
mem_error:
6261
62
    xmlErrMemory(ctxt);
6262
62
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6263
62
    return(NULL);
6264
14.6k
}
6265
6266
/**
6267
 * Parse the declaration for a Mixed Element content
6268
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6269
 *
6270
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6271
 *
6272
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6273
 *
6274
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6275
 *
6276
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6277
 *
6278
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6279
 * TODO Parameter-entity replacement text must be properly nested
6280
 *  with parenthesized groups. That is to say, if either of the
6281
 *  opening or closing parentheses in a choice, seq, or Mixed
6282
 *  construct is contained in the replacement text for a parameter
6283
 *  entity, both must be contained in the same replacement text. For
6284
 *  interoperability, if a parameter-entity reference appears in a
6285
 *  choice, seq, or Mixed construct, its replacement text should not
6286
 *  be empty, and neither the first nor last non-blank character of
6287
 *  the replacement text should be a connector (| or ,).
6288
 *
6289
 * @param ctxt  an XML parser context
6290
 * @param openInputNr  the input used for the current entity, needed for
6291
 * boundary checks
6292
 * @param depth  the level of recursion
6293
 * @returns the tree of xmlElementContent describing the element
6294
 *          hierarchy.
6295
 */
6296
static xmlElementContentPtr
6297
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int openInputNr,
6298
131k
                                       int depth) {
6299
131k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6300
131k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6301
131k
    const xmlChar *elem;
6302
131k
    xmlChar type = 0;
6303
6304
131k
    if (depth > maxDepth) {
6305
12
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6306
12
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6307
12
                "use XML_PARSE_HUGE\n", depth);
6308
12
  return(NULL);
6309
12
    }
6310
131k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6311
131k
    if (RAW == '(') {
6312
88.4k
        int newInputNr = ctxt->inputNr;
6313
6314
        /* Recurse on first child */
6315
88.4k
  NEXT;
6316
88.4k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6317
88.4k
                                                           depth + 1);
6318
88.4k
        if (cur == NULL)
6319
79.1k
            return(NULL);
6320
88.4k
    } else {
6321
43.4k
  elem = xmlParseName(ctxt);
6322
43.4k
  if (elem == NULL) {
6323
591
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6324
591
      return(NULL);
6325
591
  }
6326
42.8k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6327
42.8k
  if (cur == NULL) {
6328
50
      xmlErrMemory(ctxt);
6329
50
      return(NULL);
6330
50
  }
6331
42.7k
  GROW;
6332
42.7k
  if (RAW == '?') {
6333
4.04k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6334
4.04k
      NEXT;
6335
38.7k
  } else if (RAW == '*') {
6336
3.31k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6337
3.31k
      NEXT;
6338
35.4k
  } else if (RAW == '+') {
6339
1.95k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6340
1.95k
      NEXT;
6341
33.4k
  } else {
6342
33.4k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6343
33.4k
  }
6344
42.7k
  GROW;
6345
42.7k
    }
6346
1.28M
    while (!PARSER_STOPPED(ctxt)) {
6347
1.28M
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6348
1.28M
        if (RAW == ')')
6349
43.0k
            break;
6350
        /*
6351
   * Each loop we parse one separator and one element.
6352
   */
6353
1.23M
        if (RAW == ',') {
6354
1.15M
      if (type == 0) type = CUR;
6355
6356
      /*
6357
       * Detect "Name | Name , Name" error
6358
       */
6359
1.13M
      else if (type != CUR) {
6360
9
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6361
9
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6362
9
                      type);
6363
9
    if ((last != NULL) && (last != ret))
6364
9
        xmlFreeDocElementContent(ctxt->myDoc, last);
6365
9
    if (ret != NULL)
6366
9
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6367
9
    return(NULL);
6368
9
      }
6369
1.15M
      NEXT;
6370
6371
1.15M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6372
1.15M
      if (op == NULL) {
6373
22
                xmlErrMemory(ctxt);
6374
22
    if ((last != NULL) && (last != ret))
6375
9
        xmlFreeDocElementContent(ctxt->myDoc, last);
6376
22
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6377
22
    return(NULL);
6378
22
      }
6379
1.15M
      if (last == NULL) {
6380
13.3k
    op->c1 = ret;
6381
13.3k
    if (ret != NULL)
6382
13.3k
        ret->parent = op;
6383
13.3k
    ret = cur = op;
6384
1.13M
      } else {
6385
1.13M
          cur->c2 = op;
6386
1.13M
    if (op != NULL)
6387
1.13M
        op->parent = cur;
6388
1.13M
    op->c1 = last;
6389
1.13M
    if (last != NULL)
6390
1.13M
        last->parent = op;
6391
1.13M
    cur =op;
6392
1.13M
    last = NULL;
6393
1.13M
      }
6394
1.15M
  } else if (RAW == '|') {
6395
80.6k
      if (type == 0) type = CUR;
6396
6397
      /*
6398
       * Detect "Name , Name | Name" error
6399
       */
6400
65.8k
      else if (type != CUR) {
6401
9
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6402
9
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6403
9
          type);
6404
9
    if ((last != NULL) && (last != ret))
6405
9
        xmlFreeDocElementContent(ctxt->myDoc, last);
6406
9
    if (ret != NULL)
6407
9
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6408
9
    return(NULL);
6409
9
      }
6410
80.6k
      NEXT;
6411
6412
80.6k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6413
80.6k
      if (op == NULL) {
6414
27
                xmlErrMemory(ctxt);
6415
27
    if ((last != NULL) && (last != ret))
6416
8
        xmlFreeDocElementContent(ctxt->myDoc, last);
6417
27
    if (ret != NULL)
6418
27
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
27
    return(NULL);
6420
27
      }
6421
80.6k
      if (last == NULL) {
6422
14.7k
    op->c1 = ret;
6423
14.7k
    if (ret != NULL)
6424
14.7k
        ret->parent = op;
6425
14.7k
    ret = cur = op;
6426
65.8k
      } else {
6427
65.8k
          cur->c2 = op;
6428
65.8k
    if (op != NULL)
6429
65.8k
        op->parent = cur;
6430
65.8k
    op->c1 = last;
6431
65.8k
    if (last != NULL)
6432
65.8k
        last->parent = op;
6433
65.8k
    cur =op;
6434
65.8k
    last = NULL;
6435
65.8k
      }
6436
80.6k
  } else {
6437
4.30k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6438
4.30k
      if ((last != NULL) && (last != ret))
6439
3.81k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6440
4.30k
      if (ret != NULL)
6441
4.30k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
4.30k
      return(NULL);
6443
4.30k
  }
6444
1.23M
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6445
1.23M
        if (RAW == '(') {
6446
15.8k
            int newInputNr = ctxt->inputNr;
6447
6448
      /* Recurse on second child */
6449
15.8k
      NEXT;
6450
15.8k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6451
15.8k
                                                          depth + 1);
6452
15.8k
            if (last == NULL) {
6453
2.82k
    if (ret != NULL)
6454
2.82k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6455
2.82k
    return(NULL);
6456
2.82k
            }
6457
1.21M
  } else {
6458
1.21M
      elem = xmlParseName(ctxt);
6459
1.21M
      if (elem == NULL) {
6460
753
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6461
753
    if (ret != NULL)
6462
753
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6463
753
    return(NULL);
6464
753
      }
6465
1.21M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6466
1.21M
      if (last == NULL) {
6467
40
                xmlErrMemory(ctxt);
6468
40
    if (ret != NULL)
6469
40
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6470
40
    return(NULL);
6471
40
      }
6472
1.21M
      if (RAW == '?') {
6473
26.3k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6474
26.3k
    NEXT;
6475
1.19M
      } else if (RAW == '*') {
6476
4.33k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6477
4.33k
    NEXT;
6478
1.18M
      } else if (RAW == '+') {
6479
12.8k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6480
12.8k
    NEXT;
6481
1.17M
      } else {
6482
1.17M
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6483
1.17M
      }
6484
1.21M
  }
6485
1.23M
    }
6486
44.0k
    if ((cur != NULL) && (last != NULL)) {
6487
20.6k
        cur->c2 = last;
6488
20.6k
  if (last != NULL)
6489
20.6k
      last->parent = cur;
6490
20.6k
    }
6491
44.0k
#ifdef LIBXML_VALID_ENABLED
6492
44.0k
    if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6493
7
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6494
7
                         "Element content declaration doesn't start "
6495
7
                         "and stop in the same entity\n",
6496
7
                         NULL, NULL);
6497
7
    }
6498
44.0k
#endif
6499
44.0k
    NEXT;
6500
44.0k
    if (RAW == '?') {
6501
9.61k
  if (ret != NULL) {
6502
9.61k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6503
9.61k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6504
715
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6505
8.90k
      else
6506
8.90k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6507
9.61k
  }
6508
9.61k
  NEXT;
6509
34.4k
    } else if (RAW == '*') {
6510
5.88k
  if (ret != NULL) {
6511
5.88k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6512
5.88k
      cur = ret;
6513
      /*
6514
       * Some normalization:
6515
       * (a | b* | c?)* == (a | b | c)*
6516
       */
6517
17.1k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6518
11.2k
    if ((cur->c1 != NULL) &&
6519
11.2k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6520
11.2k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6521
597
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6522
11.2k
    if ((cur->c2 != NULL) &&
6523
11.2k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6524
11.2k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6525
359
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6526
11.2k
    cur = cur->c2;
6527
11.2k
      }
6528
5.88k
  }
6529
5.88k
  NEXT;
6530
28.5k
    } else if (RAW == '+') {
6531
7.32k
  if (ret != NULL) {
6532
7.32k
      int found = 0;
6533
6534
7.32k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6535
7.32k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6536
3.91k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6537
3.41k
      else
6538
3.41k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6539
      /*
6540
       * Some normalization:
6541
       * (a | b*)+ == (a | b)*
6542
       * (a | b?)+ == (a | b)*
6543
       */
6544
9.42k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6545
2.09k
    if ((cur->c1 != NULL) &&
6546
2.09k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6547
2.09k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6548
492
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6549
492
        found = 1;
6550
492
    }
6551
2.09k
    if ((cur->c2 != NULL) &&
6552
2.09k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6553
2.09k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6554
391
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6555
391
        found = 1;
6556
391
    }
6557
2.09k
    cur = cur->c2;
6558
2.09k
      }
6559
7.32k
      if (found)
6560
548
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
7.32k
  }
6562
7.32k
  NEXT;
6563
7.32k
    }
6564
44.0k
    return(ret);
6565
52.0k
}
6566
6567
/**
6568
 * Parse the declaration for a Mixed Element content
6569
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6570
 *
6571
 * @deprecated Internal function, don't use.
6572
 *
6573
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6574
 *
6575
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6576
 *
6577
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6578
 *
6579
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6580
 *
6581
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6582
 * TODO Parameter-entity replacement text must be properly nested
6583
 *  with parenthesized groups. That is to say, if either of the
6584
 *  opening or closing parentheses in a choice, seq, or Mixed
6585
 *  construct is contained in the replacement text for a parameter
6586
 *  entity, both must be contained in the same replacement text. For
6587
 *  interoperability, if a parameter-entity reference appears in a
6588
 *  choice, seq, or Mixed construct, its replacement text should not
6589
 *  be empty, and neither the first nor last non-blank character of
6590
 *  the replacement text should be a connector (| or ,).
6591
 *
6592
 * @param ctxt  an XML parser context
6593
 * @param inputchk  the input used for the current entity, needed for boundary checks
6594
 * @returns the tree of xmlElementContent describing the element
6595
 *          hierarchy.
6596
 */
6597
xmlElementContent *
6598
0
xmlParseElementChildrenContentDecl(xmlParserCtxt *ctxt, int inputchk) {
6599
    /* stub left for API/ABI compat */
6600
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6601
0
}
6602
6603
/**
6604
 * Parse the declaration for an Element content either Mixed or Children,
6605
 * the cases EMPTY and ANY are handled directly in #xmlParseElementDecl
6606
 *
6607
 * @deprecated Internal function, don't use.
6608
 *
6609
 *     [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6610
 *
6611
 * @param ctxt  an XML parser context
6612
 * @param name  the name of the element being defined.
6613
 * @param result  the Element Content pointer will be stored here if any
6614
 * @returns an xmlElementTypeVal value or -1 on error
6615
 */
6616
6617
int
6618
xmlParseElementContentDecl(xmlParserCtxt *ctxt, const xmlChar *name,
6619
42.2k
                           xmlElementContent **result) {
6620
6621
42.2k
    xmlElementContentPtr tree = NULL;
6622
42.2k
    int openInputNr = ctxt->inputNr;
6623
42.2k
    int res;
6624
6625
42.2k
    *result = NULL;
6626
6627
42.2k
    if (RAW != '(') {
6628
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6629
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6630
0
  return(-1);
6631
0
    }
6632
42.2k
    NEXT;
6633
42.2k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6634
42.2k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6635
14.6k
        tree = xmlParseElementMixedContentDecl(ctxt, openInputNr);
6636
14.6k
  res = XML_ELEMENT_TYPE_MIXED;
6637
27.5k
    } else {
6638
27.5k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, openInputNr, 1);
6639
27.5k
  res = XML_ELEMENT_TYPE_ELEMENT;
6640
27.5k
    }
6641
42.2k
    if (tree == NULL)
6642
6.72k
        return(-1);
6643
35.4k
    SKIP_BLANKS_PE;
6644
35.4k
    *result = tree;
6645
35.4k
    return(res);
6646
42.2k
}
6647
6648
/**
6649
 * Parse an element declaration. Always consumes '<!'.
6650
 *
6651
 * @deprecated Internal function, don't use.
6652
 *
6653
 *     [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6654
 *
6655
 * [ VC: Unique Element Type Declaration ]
6656
 * No element type may be declared more than once
6657
 *
6658
 * @param ctxt  an XML parser context
6659
 * @returns the type of the element, or -1 in case of error
6660
 */
6661
int
6662
49.1k
xmlParseElementDecl(xmlParserCtxt *ctxt) {
6663
49.1k
    const xmlChar *name;
6664
49.1k
    int ret = -1;
6665
49.1k
    xmlElementContentPtr content  = NULL;
6666
6667
49.1k
    if ((CUR != '<') || (NXT(1) != '!'))
6668
0
        return(ret);
6669
49.1k
    SKIP(2);
6670
6671
    /* GROW; done in the caller */
6672
49.1k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6673
49.0k
#ifdef LIBXML_VALID_ENABLED
6674
49.0k
  int oldInputNr = ctxt->inputNr;
6675
49.0k
#endif
6676
6677
49.0k
  SKIP(7);
6678
49.0k
  if (SKIP_BLANKS_PE == 0) {
6679
676
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6680
676
               "Space required after 'ELEMENT'\n");
6681
676
      return(-1);
6682
676
  }
6683
48.3k
        name = xmlParseName(ctxt);
6684
48.3k
  if (name == NULL) {
6685
380
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6686
380
         "xmlParseElementDecl: no name for Element\n");
6687
380
      return(-1);
6688
380
  }
6689
47.9k
  if (SKIP_BLANKS_PE == 0) {
6690
8.11k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6691
8.11k
         "Space required after the element name\n");
6692
8.11k
  }
6693
47.9k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6694
3.89k
      SKIP(5);
6695
      /*
6696
       * Element must always be empty.
6697
       */
6698
3.89k
      ret = XML_ELEMENT_TYPE_EMPTY;
6699
44.0k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6700
44.0k
             (NXT(2) == 'Y')) {
6701
895
      SKIP(3);
6702
      /*
6703
       * Element is a generic container.
6704
       */
6705
895
      ret = XML_ELEMENT_TYPE_ANY;
6706
43.1k
  } else if (RAW == '(') {
6707
42.2k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6708
42.2k
            if (ret <= 0)
6709
6.72k
                return(-1);
6710
42.2k
  } else {
6711
      /*
6712
       * [ WFC: PEs in Internal Subset ] error handling.
6713
       */
6714
981
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6715
981
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6716
981
      return(-1);
6717
981
  }
6718
6719
40.2k
  SKIP_BLANKS_PE;
6720
6721
40.2k
  if (RAW != '>') {
6722
3.38k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6723
3.38k
      if (content != NULL) {
6724
3.04k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6725
3.04k
      }
6726
36.8k
  } else {
6727
36.8k
#ifdef LIBXML_VALID_ENABLED
6728
36.8k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6729
6
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6730
6
                                 "Element declaration doesn't start and stop in"
6731
6
                                 " the same entity\n",
6732
6
                                 NULL, NULL);
6733
6
      }
6734
36.8k
#endif
6735
6736
36.8k
      NEXT;
6737
36.8k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6738
36.8k
    (ctxt->sax->elementDecl != NULL)) {
6739
23.3k
    if (content != NULL)
6740
19.3k
        content->parent = NULL;
6741
23.3k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6742
23.3k
                           content);
6743
23.3k
    if ((content != NULL) && (content->parent == NULL)) {
6744
        /*
6745
         * this is a trick: if xmlAddElementDecl is called,
6746
         * instead of copying the full tree it is plugged directly
6747
         * if called from the parser. Avoid duplicating the
6748
         * interfaces or change the API/ABI
6749
         */
6750
3.50k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6751
3.50k
    }
6752
23.3k
      } else if (content != NULL) {
6753
13.0k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6754
13.0k
      }
6755
36.8k
  }
6756
40.2k
    }
6757
40.3k
    return(ret);
6758
49.1k
}
6759
6760
/**
6761
 * Parse a conditional section. Always consumes '<!['.
6762
 *
6763
 *     [61] conditionalSect ::= includeSect | ignoreSect
6764
 *     [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6765
 *     [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6766
 *     [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>'
6767
 *                                 Ignore)*
6768
 *     [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6769
 * @param ctxt  an XML parser context
6770
 */
6771
6772
static void
6773
4.20k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6774
4.20k
    size_t depth = 0;
6775
4.20k
    int isFreshPE = 0;
6776
4.20k
    int oldInputNr = ctxt->inputNr;
6777
4.20k
    int declInputNr = ctxt->inputNr;
6778
6779
13.2k
    while (!PARSER_STOPPED(ctxt)) {
6780
13.2k
        if (ctxt->input->cur >= ctxt->input->end) {
6781
370
            if (ctxt->inputNr <= oldInputNr) {
6782
360
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6783
360
                return;
6784
360
            }
6785
6786
10
            xmlPopPE(ctxt);
6787
10
            declInputNr = ctxt->inputNr;
6788
12.8k
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6789
6.14k
            SKIP(3);
6790
6.14k
            SKIP_BLANKS_PE;
6791
6792
6.14k
            isFreshPE = 0;
6793
6794
6.14k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6795
4.38k
                SKIP(7);
6796
4.38k
                SKIP_BLANKS_PE;
6797
4.38k
                if (RAW != '[') {
6798
225
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6799
225
                    return;
6800
225
                }
6801
4.16k
#ifdef LIBXML_VALID_ENABLED
6802
4.16k
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6803
34
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6804
34
                                     "All markup of the conditional section is"
6805
34
                                     " not in the same entity\n",
6806
34
                                     NULL, NULL);
6807
34
                }
6808
4.16k
#endif
6809
4.16k
                NEXT;
6810
6811
4.16k
                depth++;
6812
4.16k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6813
1.41k
                size_t ignoreDepth = 0;
6814
6815
1.41k
                SKIP(6);
6816
1.41k
                SKIP_BLANKS_PE;
6817
1.41k
                if (RAW != '[') {
6818
93
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6819
93
                    return;
6820
93
                }
6821
1.32k
#ifdef LIBXML_VALID_ENABLED
6822
1.32k
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6823
2
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6824
2
                                     "All markup of the conditional section is"
6825
2
                                     " not in the same entity\n",
6826
2
                                     NULL, NULL);
6827
2
                }
6828
1.32k
#endif
6829
1.32k
                NEXT;
6830
6831
24.4k
                while (PARSER_STOPPED(ctxt) == 0) {
6832
24.4k
                    if (RAW == 0) {
6833
672
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6834
672
                        return;
6835
672
                    }
6836
23.7k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6837
774
                        SKIP(3);
6838
774
                        ignoreDepth++;
6839
                        /* Check for integer overflow */
6840
774
                        if (ignoreDepth == 0) {
6841
0
                            xmlErrMemory(ctxt);
6842
0
                            return;
6843
0
                        }
6844
23.0k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6845
23.0k
                               (NXT(2) == '>')) {
6846
878
                        SKIP(3);
6847
878
                        if (ignoreDepth == 0)
6848
638
                            break;
6849
240
                        ignoreDepth--;
6850
22.1k
                    } else {
6851
22.1k
                        NEXT;
6852
22.1k
                    }
6853
23.7k
                }
6854
6855
651
#ifdef LIBXML_VALID_ENABLED
6856
651
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6857
2
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6858
2
                                     "All markup of the conditional section is"
6859
2
                                     " not in the same entity\n",
6860
2
                                     NULL, NULL);
6861
2
                }
6862
651
#endif
6863
651
            } else {
6864
340
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6865
340
                return;
6866
340
            }
6867
6.72k
        } else if ((depth > 0) &&
6868
6.72k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6869
2.95k
            if (isFreshPE) {
6870
6
                xmlFatalErrMsg(ctxt, XML_ERR_CONDSEC_INVALID,
6871
6
                               "Parameter entity must match "
6872
6
                               "extSubsetDecl\n");
6873
6
                return;
6874
6
            }
6875
6876
2.95k
            depth--;
6877
2.95k
#ifdef LIBXML_VALID_ENABLED
6878
2.95k
            if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6879
18
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6880
18
                                 "All markup of the conditional section is not"
6881
18
                                 " in the same entity\n",
6882
18
                                 NULL, NULL);
6883
18
            }
6884
2.95k
#endif
6885
2.95k
            SKIP(3);
6886
3.77k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6887
2.64k
            isFreshPE = 0;
6888
2.64k
            xmlParseMarkupDecl(ctxt);
6889
2.64k
        } else if (RAW == '%') {
6890
1.04k
            xmlParsePERefInternal(ctxt, 1);
6891
1.04k
            if (ctxt->inputNr > declInputNr) {
6892
20
                isFreshPE = 1;
6893
20
                declInputNr = ctxt->inputNr;
6894
20
            }
6895
1.04k
        } else {
6896
77
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6897
77
            return;
6898
77
        }
6899
6900
11.4k
        if (depth == 0)
6901
2.40k
            break;
6902
6903
9.06k
        SKIP_BLANKS;
6904
9.06k
        SHRINK;
6905
9.06k
        GROW;
6906
9.06k
    }
6907
4.20k
}
6908
6909
/**
6910
 * Parse markup declarations. Always consumes '<!' or '<?'.
6911
 *
6912
 * @deprecated Internal function, don't use.
6913
 *
6914
 *     [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6915
 *                         NotationDecl | PI | Comment
6916
 *
6917
 * [ VC: Proper Declaration/PE Nesting ]
6918
 * Parameter-entity replacement text must be properly nested with
6919
 * markup declarations. That is to say, if either the first character
6920
 * or the last character of a markup declaration (markupdecl above) is
6921
 * contained in the replacement text for a parameter-entity reference,
6922
 * both must be contained in the same replacement text.
6923
 *
6924
 * [ WFC: PEs in Internal Subset ]
6925
 * In the internal DTD subset, parameter-entity references can occur
6926
 * only where markup declarations can occur, not within markup declarations.
6927
 * (This does not apply to references that occur in external parameter
6928
 * entities or to the external subset.)
6929
 *
6930
 * @param ctxt  an XML parser context
6931
 */
6932
void
6933
724k
xmlParseMarkupDecl(xmlParserCtxt *ctxt) {
6934
724k
    GROW;
6935
724k
    if (CUR == '<') {
6936
724k
        if (NXT(1) == '!') {
6937
716k
      switch (NXT(2)) {
6938
147k
          case 'E':
6939
147k
        if (NXT(3) == 'L')
6940
49.1k
      xmlParseElementDecl(ctxt);
6941
98.0k
        else if (NXT(3) == 'N')
6942
97.9k
      xmlParseEntityDecl(ctxt);
6943
56
                    else
6944
56
                        SKIP(2);
6945
147k
        break;
6946
62.7k
          case 'A':
6947
62.7k
        xmlParseAttributeListDecl(ctxt);
6948
62.7k
        break;
6949
11.1k
          case 'N':
6950
11.1k
        xmlParseNotationDecl(ctxt);
6951
11.1k
        break;
6952
488k
          case '-':
6953
488k
        xmlParseComment(ctxt);
6954
488k
        break;
6955
6.20k
    default:
6956
6.20k
                    xmlFatalErr(ctxt,
6957
6.20k
                                ctxt->inSubset == 2 ?
6958
2.44k
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
6959
6.20k
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
6960
6.20k
                                NULL);
6961
6.20k
                    SKIP(2);
6962
6.20k
        break;
6963
716k
      }
6964
716k
  } else if (NXT(1) == '?') {
6965
8.81k
      xmlParsePI(ctxt);
6966
8.81k
  }
6967
724k
    }
6968
724k
}
6969
6970
/**
6971
 * Parse an XML declaration header for external entities
6972
 *
6973
 * @deprecated Internal function, don't use.
6974
 *
6975
 *     [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6976
 * @param ctxt  an XML parser context
6977
 */
6978
6979
void
6980
23.5k
xmlParseTextDecl(xmlParserCtxt *ctxt) {
6981
23.5k
    xmlChar *version;
6982
6983
    /*
6984
     * We know that '<?xml' is here.
6985
     */
6986
23.5k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6987
23.4k
  SKIP(5);
6988
23.4k
    } else {
6989
14
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6990
14
  return;
6991
14
    }
6992
6993
23.4k
    if (SKIP_BLANKS == 0) {
6994
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6995
0
           "Space needed after '<?xml'\n");
6996
0
    }
6997
6998
    /*
6999
     * We may have the VersionInfo here.
7000
     */
7001
23.4k
    version = xmlParseVersionInfo(ctxt);
7002
23.4k
    if (version == NULL) {
7003
12.5k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7004
12.5k
        if (version == NULL) {
7005
15
            xmlErrMemory(ctxt);
7006
15
            return;
7007
15
        }
7008
12.5k
    } else {
7009
10.9k
  if (SKIP_BLANKS == 0) {
7010
571
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7011
571
               "Space needed here\n");
7012
571
  }
7013
10.9k
    }
7014
23.4k
    ctxt->input->version = version;
7015
7016
    /*
7017
     * We must have the encoding declaration
7018
     */
7019
23.4k
    xmlParseEncodingDecl(ctxt);
7020
7021
23.4k
    SKIP_BLANKS;
7022
23.4k
    if ((RAW == '?') && (NXT(1) == '>')) {
7023
1.77k
        SKIP(2);
7024
21.7k
    } else if (RAW == '>') {
7025
        /* Deprecated old WD ... */
7026
235
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7027
235
  NEXT;
7028
21.4k
    } else {
7029
21.4k
        int c;
7030
7031
21.4k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7032
407M
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7033
407M
            NEXT;
7034
407M
            if (c == '>')
7035
10.1k
                break;
7036
407M
        }
7037
21.4k
    }
7038
23.4k
}
7039
7040
/**
7041
 * Parse Markup declarations from an external subset
7042
 *
7043
 * @deprecated Internal function, don't use.
7044
 *
7045
 *     [30] extSubset ::= textDecl? extSubsetDecl
7046
 *
7047
 *     [31] extSubsetDecl ::= (markupdecl | conditionalSect |
7048
 *                             PEReference | S) *
7049
 * @param ctxt  an XML parser context
7050
 * @param publicId  the public identifier
7051
 * @param systemId  the system identifier (URL)
7052
 */
7053
void
7054
xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
7055
3.70k
                       const xmlChar *systemId) {
7056
3.70k
    int oldInputNr;
7057
7058
3.70k
    xmlCtxtInitializeLate(ctxt);
7059
7060
3.70k
    xmlDetectEncoding(ctxt);
7061
7062
3.70k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7063
362
  xmlParseTextDecl(ctxt);
7064
362
    }
7065
3.70k
    if (ctxt->myDoc == NULL) {
7066
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7067
0
  if (ctxt->myDoc == NULL) {
7068
0
      xmlErrMemory(ctxt);
7069
0
      return;
7070
0
  }
7071
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7072
0
    }
7073
3.70k
    if ((ctxt->myDoc->intSubset == NULL) &&
7074
3.70k
        (xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) {
7075
10
        xmlErrMemory(ctxt);
7076
10
    }
7077
7078
3.70k
    ctxt->inSubset = 2;
7079
3.70k
    oldInputNr = ctxt->inputNr;
7080
7081
3.70k
    SKIP_BLANKS;
7082
149k
    while (!PARSER_STOPPED(ctxt)) {
7083
148k
        if (ctxt->input->cur >= ctxt->input->end) {
7084
2.23k
            if (ctxt->inputNr <= oldInputNr) {
7085
1.34k
                xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7086
1.34k
                break;
7087
1.34k
            }
7088
7089
899
            xmlPopPE(ctxt);
7090
146k
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7091
2.51k
            xmlParseConditionalSections(ctxt);
7092
144k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7093
140k
            xmlParseMarkupDecl(ctxt);
7094
140k
        } else if (RAW == '%') {
7095
1.80k
            xmlParsePERefInternal(ctxt, 1);
7096
1.80k
        } else {
7097
1.72k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7098
7099
1.91k
            while (ctxt->inputNr > oldInputNr)
7100
184
                xmlPopPE(ctxt);
7101
1.72k
            break;
7102
1.72k
        }
7103
145k
        SKIP_BLANKS;
7104
145k
        SHRINK;
7105
145k
        GROW;
7106
145k
    }
7107
3.70k
}
7108
7109
/**
7110
 * Parse and handle entity references in content, depending on the SAX
7111
 * interface, this may end-up in a call to character() if this is a
7112
 * CharRef, a predefined entity, if there is no reference() callback.
7113
 * or if the parser was asked to switch to that mode.
7114
 *
7115
 * @deprecated Internal function, don't use.
7116
 *
7117
 * Always consumes '&'.
7118
 *
7119
 *     [67] Reference ::= EntityRef | CharRef
7120
 * @param ctxt  an XML parser context
7121
 */
7122
void
7123
664k
xmlParseReference(xmlParserCtxt *ctxt) {
7124
664k
    xmlEntityPtr ent = NULL;
7125
664k
    const xmlChar *name;
7126
664k
    xmlChar *val;
7127
7128
664k
    if (RAW != '&')
7129
0
        return;
7130
7131
    /*
7132
     * Simple case of a CharRef
7133
     */
7134
664k
    if (NXT(1) == '#') {
7135
284k
  int i = 0;
7136
284k
  xmlChar out[16];
7137
284k
  int value = xmlParseCharRef(ctxt);
7138
7139
284k
  if (value == 0)
7140
92.4k
      return;
7141
7142
        /*
7143
         * Just encode the value in UTF-8
7144
         */
7145
191k
        COPY_BUF(out, i, value);
7146
191k
        out[i] = 0;
7147
191k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7148
191k
            (!ctxt->disableSAX))
7149
184k
            ctxt->sax->characters(ctxt->userData, out, i);
7150
191k
  return;
7151
284k
    }
7152
7153
    /*
7154
     * We are seeing an entity reference
7155
     */
7156
380k
    name = xmlParseEntityRefInternal(ctxt);
7157
380k
    if (name == NULL)
7158
170k
        return;
7159
210k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7160
210k
    if (ent == NULL) {
7161
        /*
7162
         * Create a reference for undeclared entities.
7163
         */
7164
159k
        if ((ctxt->replaceEntities == 0) &&
7165
159k
            (ctxt->sax != NULL) &&
7166
159k
            (ctxt->disableSAX == 0) &&
7167
159k
            (ctxt->sax->reference != NULL)) {
7168
77.1k
            ctxt->sax->reference(ctxt->userData, name);
7169
77.1k
        }
7170
159k
        return;
7171
159k
    }
7172
50.3k
    if (!ctxt->wellFormed)
7173
34.8k
  return;
7174
7175
    /* special case of predefined entities */
7176
15.5k
    if ((ent->name == NULL) ||
7177
15.5k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7178
488
  val = ent->content;
7179
488
  if (val == NULL) return;
7180
  /*
7181
   * inline the entity.
7182
   */
7183
488
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7184
488
      (!ctxt->disableSAX))
7185
488
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7186
488
  return;
7187
488
    }
7188
7189
    /*
7190
     * Some users try to parse entities on their own and used to set
7191
     * the renamed "checked" member. Fix the flags to cover this
7192
     * case.
7193
     */
7194
15.0k
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7195
0
        ent->flags |= XML_ENT_PARSED;
7196
7197
    /*
7198
     * The first reference to the entity trigger a parsing phase
7199
     * where the ent->children is filled with the result from
7200
     * the parsing.
7201
     * Note: external parsed entities will not be loaded, it is not
7202
     * required for a non-validating parser, unless the parsing option
7203
     * of validating, or substituting entities were given. Doing so is
7204
     * far more secure as the parser will only process data coming from
7205
     * the document entity by default.
7206
     *
7207
     * FIXME: This doesn't work correctly since entities can be
7208
     * expanded with different namespace declarations in scope.
7209
     * For example:
7210
     *
7211
     * <!DOCTYPE doc [
7212
     *   <!ENTITY ent "<ns:elem/>">
7213
     * ]>
7214
     * <doc>
7215
     *   <decl1 xmlns:ns="urn:ns1">
7216
     *     &ent;
7217
     *   </decl1>
7218
     *   <decl2 xmlns:ns="urn:ns2">
7219
     *     &ent;
7220
     *   </decl2>
7221
     * </doc>
7222
     *
7223
     * Proposed fix:
7224
     *
7225
     * - Ignore current namespace declarations when parsing the
7226
     *   entity. If a prefix can't be resolved, don't report an error
7227
     *   but mark it as unresolved.
7228
     * - Try to resolve these prefixes when expanding the entity.
7229
     *   This will require a specialized version of xmlStaticCopyNode
7230
     *   which can also make use of the namespace hash table to avoid
7231
     *   quadratic behavior.
7232
     *
7233
     * Alternatively, we could simply reparse the entity on each
7234
     * expansion like we already do with custom SAX callbacks.
7235
     * External entity content should be cached in this case.
7236
     */
7237
15.0k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7238
15.0k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7239
10.4k
         ((ctxt->replaceEntities) ||
7240
14.1k
          (ctxt->validate)))) {
7241
14.1k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7242
6.19k
            xmlCtxtParseEntity(ctxt, ent);
7243
7.95k
        } else if (ent->children == NULL) {
7244
            /*
7245
             * Probably running in SAX mode and the callbacks don't
7246
             * build the entity content. Parse the entity again.
7247
             *
7248
             * This will also be triggered in normal tree builder mode
7249
             * if an entity happens to be empty, causing unnecessary
7250
             * reloads. It's hard to come up with a reliable check in
7251
             * which mode we're running.
7252
             */
7253
1.36k
            xmlCtxtParseEntity(ctxt, ent);
7254
1.36k
        }
7255
14.1k
    }
7256
7257
    /*
7258
     * We also check for amplification if entities aren't substituted.
7259
     * They might be expanded later.
7260
     */
7261
15.0k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7262
108
        return;
7263
7264
14.9k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7265
1.75k
        return;
7266
7267
13.1k
    if (ctxt->replaceEntities == 0) {
7268
  /*
7269
   * Create a reference
7270
   */
7271
3.64k
        if (ctxt->sax->reference != NULL)
7272
3.64k
      ctxt->sax->reference(ctxt->userData, ent->name);
7273
9.51k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7274
8.38k
        xmlNodePtr copy, cur;
7275
7276
        /*
7277
         * Seems we are generating the DOM content, copy the tree
7278
   */
7279
8.38k
        cur = ent->children;
7280
7281
        /*
7282
         * Handle first text node with SAX to coalesce text efficiently
7283
         */
7284
8.38k
        if ((cur->type == XML_TEXT_NODE) ||
7285
8.38k
            (cur->type == XML_CDATA_SECTION_NODE)) {
7286
7.93k
            int len = xmlStrlen(cur->content);
7287
7288
7.93k
            if ((cur->type == XML_TEXT_NODE) ||
7289
7.93k
                (ctxt->options & XML_PARSE_NOCDATA)) {
7290
7.67k
                if (ctxt->sax->characters != NULL)
7291
7.67k
                    ctxt->sax->characters(ctxt, cur->content, len);
7292
7.67k
            } else {
7293
264
                if (ctxt->sax->cdataBlock != NULL)
7294
264
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7295
264
            }
7296
7297
7.93k
            cur = cur->next;
7298
7.93k
        }
7299
7300
47.5k
        while (cur != NULL) {
7301
41.0k
            xmlNodePtr last;
7302
7303
            /*
7304
             * Handle last text node with SAX to coalesce text efficiently
7305
             */
7306
41.0k
            if ((cur->next == NULL) &&
7307
41.0k
                ((cur->type == XML_TEXT_NODE) ||
7308
4.46k
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7309
1.56k
                int len = xmlStrlen(cur->content);
7310
7311
1.56k
                if ((cur->type == XML_TEXT_NODE) ||
7312
1.56k
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7313
1.30k
                    if (ctxt->sax->characters != NULL)
7314
1.30k
                        ctxt->sax->characters(ctxt, cur->content, len);
7315
1.30k
                } else {
7316
256
                    if (ctxt->sax->cdataBlock != NULL)
7317
256
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7318
256
                }
7319
7320
1.56k
                break;
7321
1.56k
            }
7322
7323
            /*
7324
             * Reset coalesce buffer stats only for non-text nodes.
7325
             */
7326
39.4k
            ctxt->nodemem = 0;
7327
39.4k
            ctxt->nodelen = 0;
7328
7329
39.4k
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7330
7331
39.4k
            if (copy == NULL) {
7332
314
                xmlErrMemory(ctxt);
7333
314
                break;
7334
314
            }
7335
7336
39.1k
            if (ctxt->parseMode == XML_PARSE_READER) {
7337
                /* Needed for reader */
7338
0
                copy->extra = cur->extra;
7339
                /* Maybe needed for reader */
7340
0
                copy->_private = cur->_private;
7341
0
            }
7342
7343
39.1k
            copy->parent = ctxt->node;
7344
39.1k
            last = ctxt->node->last;
7345
39.1k
            if (last == NULL) {
7346
153
                ctxt->node->children = copy;
7347
39.0k
            } else {
7348
39.0k
                last->next = copy;
7349
39.0k
                copy->prev = last;
7350
39.0k
            }
7351
39.1k
            ctxt->node->last = copy;
7352
7353
39.1k
            cur = cur->next;
7354
39.1k
        }
7355
8.38k
    }
7356
13.1k
}
7357
7358
static void
7359
1.25M
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7360
    /*
7361
     * [ WFC: Entity Declared ]
7362
     * In a document without any DTD, a document with only an
7363
     * internal DTD subset which contains no parameter entity
7364
     * references, or a document with "standalone='yes'", the
7365
     * Name given in the entity reference must match that in an
7366
     * entity declaration, except that well-formed documents
7367
     * need not declare any of the following entities: amp, lt,
7368
     * gt, apos, quot.
7369
     * The declaration of a parameter entity must precede any
7370
     * reference to it.
7371
     * Similarly, the declaration of a general entity must
7372
     * precede any reference to it which appears in a default
7373
     * value in an attribute-list declaration. Note that if
7374
     * entities are declared in the external subset or in
7375
     * external parameter entities, a non-validating processor
7376
     * is not obligated to read and process their declarations;
7377
     * for such documents, the rule that an entity must be
7378
     * declared is a well-formedness constraint only if
7379
     * standalone='yes'.
7380
     */
7381
1.25M
    if ((ctxt->standalone == 1) ||
7382
1.25M
        ((ctxt->hasExternalSubset == 0) &&
7383
1.25M
         (ctxt->hasPErefs == 0))) {
7384
1.19M
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7385
1.19M
                          "Entity '%s' not defined\n", name);
7386
1.19M
#ifdef LIBXML_VALID_ENABLED
7387
1.19M
    } else if (ctxt->validate) {
7388
        /*
7389
         * [ VC: Entity Declared ]
7390
         * In a document with an external subset or external
7391
         * parameter entities with "standalone='no'", ...
7392
         * ... The declaration of a parameter entity must
7393
         * precede any reference to it...
7394
         */
7395
40.9k
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7396
40.9k
                         "Entity '%s' not defined\n", name, NULL);
7397
40.9k
#endif
7398
40.9k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7399
11.2k
               ((ctxt->replaceEntities) &&
7400
9.56k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7401
        /*
7402
         * Also raise a non-fatal error
7403
         *
7404
         * - if the external subset is loaded and all entity declarations
7405
         *   should be available, or
7406
         * - entity substition was requested without restricting
7407
         *   external entity access.
7408
         */
7409
9.56k
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7410
9.56k
                     "Entity '%s' not defined\n", name);
7411
9.56k
    } else {
7412
1.68k
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7413
1.68k
                      "Entity '%s' not defined\n", name, NULL);
7414
1.68k
    }
7415
7416
1.25M
    ctxt->valid = 0;
7417
1.25M
}
7418
7419
static xmlEntityPtr
7420
6.17M
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7421
6.17M
    xmlEntityPtr ent = NULL;
7422
7423
    /*
7424
     * Predefined entities override any extra definition
7425
     */
7426
6.17M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7427
4.01M
        ent = xmlGetPredefinedEntity(name);
7428
4.01M
        if (ent != NULL)
7429
737k
            return(ent);
7430
4.01M
    }
7431
7432
    /*
7433
     * Ask first SAX for entity resolution, otherwise try the
7434
     * entities which may have stored in the parser context.
7435
     */
7436
5.43M
    if (ctxt->sax != NULL) {
7437
5.43M
  if (ctxt->sax->getEntity != NULL)
7438
5.43M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7439
5.43M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7440
5.43M
      (ctxt->options & XML_PARSE_OLDSAX))
7441
1.85k
      ent = xmlGetPredefinedEntity(name);
7442
5.43M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7443
5.43M
      (ctxt->userData==ctxt)) {
7444
24.7k
      ent = xmlSAX2GetEntity(ctxt, name);
7445
24.7k
  }
7446
5.43M
    }
7447
7448
5.43M
    if (ent == NULL) {
7449
1.22M
        xmlHandleUndeclaredEntity(ctxt, name);
7450
1.22M
    }
7451
7452
    /*
7453
     * [ WFC: Parsed Entity ]
7454
     * An entity reference must not contain the name of an
7455
     * unparsed entity
7456
     */
7457
4.20M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7458
270
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7459
270
     "Entity reference to unparsed entity %s\n", name);
7460
270
        ent = NULL;
7461
270
    }
7462
7463
    /*
7464
     * [ WFC: No External Entity References ]
7465
     * Attribute values cannot contain direct or indirect
7466
     * entity references to external entities.
7467
     */
7468
4.20M
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7469
29.6k
        if (inAttr) {
7470
1.20k
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7471
1.20k
                 "Attribute references external entity '%s'\n", name);
7472
1.20k
            ent = NULL;
7473
1.20k
        }
7474
29.6k
    }
7475
7476
5.43M
    return(ent);
7477
6.17M
}
7478
7479
/**
7480
 * Parse an entity reference. Always consumes '&'.
7481
 *
7482
 *     [68] EntityRef ::= '&' Name ';'
7483
 *
7484
 * @param ctxt  an XML parser context
7485
 * @returns the name, or NULL in case of error.
7486
 */
7487
static const xmlChar *
7488
811k
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7489
811k
    const xmlChar *name;
7490
7491
811k
    GROW;
7492
7493
811k
    if (RAW != '&')
7494
0
        return(NULL);
7495
811k
    NEXT;
7496
811k
    name = xmlParseName(ctxt);
7497
811k
    if (name == NULL) {
7498
180k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7499
180k
           "xmlParseEntityRef: no name\n");
7500
180k
        return(NULL);
7501
180k
    }
7502
630k
    if (RAW != ';') {
7503
71.6k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7504
71.6k
  return(NULL);
7505
71.6k
    }
7506
558k
    NEXT;
7507
7508
558k
    return(name);
7509
630k
}
7510
7511
/**
7512
 * @deprecated Internal function, don't use.
7513
 *
7514
 * @param ctxt  an XML parser context
7515
 * @returns the xmlEntity if found, or NULL otherwise.
7516
 */
7517
xmlEntity *
7518
0
xmlParseEntityRef(xmlParserCtxt *ctxt) {
7519
0
    const xmlChar *name;
7520
7521
0
    if (ctxt == NULL)
7522
0
        return(NULL);
7523
7524
0
    name = xmlParseEntityRefInternal(ctxt);
7525
0
    if (name == NULL)
7526
0
        return(NULL);
7527
7528
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7529
0
}
7530
7531
/**
7532
 * Parse ENTITY references declarations, but this version parses it from
7533
 * a string value.
7534
 *
7535
 *     [68] EntityRef ::= '&' Name ';'
7536
 *
7537
 * [ WFC: Entity Declared ]
7538
 * In a document without any DTD, a document with only an internal DTD
7539
 * subset which contains no parameter entity references, or a document
7540
 * with "standalone='yes'", the Name given in the entity reference
7541
 * must match that in an entity declaration, except that well-formed
7542
 * documents need not declare any of the following entities: amp, lt,
7543
 * gt, apos, quot.  The declaration of a parameter entity must precede
7544
 * any reference to it.  Similarly, the declaration of a general entity
7545
 * must precede any reference to it which appears in a default value in an
7546
 * attribute-list declaration. Note that if entities are declared in the
7547
 * external subset or in external parameter entities, a non-validating
7548
 * processor is not obligated to read and process their declarations;
7549
 * for such documents, the rule that an entity must be declared is a
7550
 * well-formedness constraint only if standalone='yes'.
7551
 *
7552
 * [ WFC: Parsed Entity ]
7553
 * An entity reference must not contain the name of an unparsed entity
7554
 *
7555
 * @param ctxt  an XML parser context
7556
 * @param str  a pointer to an index in the string
7557
 * @returns the xmlEntity if found, or NULL otherwise. The str pointer
7558
 * is updated to the current location in the string.
7559
 */
7560
static xmlChar *
7561
5.61M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7562
5.61M
    xmlChar *name;
7563
5.61M
    const xmlChar *ptr;
7564
5.61M
    xmlChar cur;
7565
7566
5.61M
    if ((str == NULL) || (*str == NULL))
7567
0
        return(NULL);
7568
5.61M
    ptr = *str;
7569
5.61M
    cur = *ptr;
7570
5.61M
    if (cur != '&')
7571
0
  return(NULL);
7572
7573
5.61M
    ptr++;
7574
5.61M
    name = xmlParseStringName(ctxt, &ptr);
7575
5.61M
    if (name == NULL) {
7576
108
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7577
108
           "xmlParseStringEntityRef: no name\n");
7578
108
  *str = ptr;
7579
108
  return(NULL);
7580
108
    }
7581
5.61M
    if (*ptr != ';') {
7582
33
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7583
33
        xmlFree(name);
7584
33
  *str = ptr;
7585
33
  return(NULL);
7586
33
    }
7587
5.61M
    ptr++;
7588
7589
5.61M
    *str = ptr;
7590
5.61M
    return(name);
7591
5.61M
}
7592
7593
/**
7594
 * Parse a parameter entity reference. Always consumes '%'.
7595
 *
7596
 * The entity content is handled directly by pushing it's content as
7597
 * a new input stream.
7598
 *
7599
 *     [69] PEReference ::= '%' Name ';'
7600
 *
7601
 * [ WFC: No Recursion ]
7602
 * A parsed entity must not contain a recursive
7603
 * reference to itself, either directly or indirectly.
7604
 *
7605
 * [ WFC: Entity Declared ]
7606
 * In a document without any DTD, a document with only an internal DTD
7607
 * subset which contains no parameter entity references, or a document
7608
 * with "standalone='yes'", ...  ... The declaration of a parameter
7609
 * entity must precede any reference to it...
7610
 *
7611
 * [ VC: Entity Declared ]
7612
 * In a document with an external subset or external parameter entities
7613
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7614
 * must precede any reference to it...
7615
 *
7616
 * [ WFC: In DTD ]
7617
 * Parameter-entity references may only appear in the DTD.
7618
 * NOTE: misleading but this is handled.
7619
 *
7620
 * @param ctxt  an XML parser context
7621
 * @param markupDecl  whether the PERef starts a markup declaration
7622
 */
7623
static void
7624
156k
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl) {
7625
156k
    const xmlChar *name;
7626
156k
    xmlEntityPtr entity = NULL;
7627
156k
    xmlParserInputPtr input;
7628
7629
156k
    if (RAW != '%')
7630
0
        return;
7631
156k
    NEXT;
7632
156k
    name = xmlParseName(ctxt);
7633
156k
    if (name == NULL) {
7634
13.8k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7635
13.8k
  return;
7636
13.8k
    }
7637
142k
    if (RAW != ';') {
7638
12.8k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7639
12.8k
        return;
7640
12.8k
    }
7641
7642
129k
    NEXT;
7643
7644
    /* Must be set before xmlHandleUndeclaredEntity */
7645
129k
    ctxt->hasPErefs = 1;
7646
7647
    /*
7648
     * Request the entity from SAX
7649
     */
7650
129k
    if ((ctxt->sax != NULL) &&
7651
129k
  (ctxt->sax->getParameterEntity != NULL))
7652
129k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7653
7654
129k
    if (entity == NULL) {
7655
22.8k
        xmlHandleUndeclaredEntity(ctxt, name);
7656
106k
    } else {
7657
  /*
7658
   * Internal checking in case the entity quest barfed
7659
   */
7660
106k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7661
106k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7662
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7663
0
      "Internal: %%%s; is not a parameter entity\n",
7664
0
        name, NULL);
7665
106k
  } else {
7666
106k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7667
106k
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7668
88.6k
     (((ctxt->loadsubset & ~XML_SKIP_IDS) == 0) &&
7669
88.3k
      (ctxt->replaceEntities == 0) &&
7670
88.3k
      (ctxt->validate == 0))))
7671
1.10k
    return;
7672
7673
105k
            if (entity->flags & XML_ENT_EXPANDING) {
7674
13
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7675
13
                xmlHaltParser(ctxt);
7676
13
                return;
7677
13
            }
7678
7679
105k
      input = xmlNewEntityInputStream(ctxt, entity);
7680
105k
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7681
21.8k
                xmlFreeInputStream(input);
7682
21.8k
    return;
7683
21.8k
            }
7684
7685
83.6k
            entity->flags |= XML_ENT_EXPANDING;
7686
7687
83.6k
            if (markupDecl)
7688
72.0k
                input->flags |= XML_INPUT_MARKUP_DECL;
7689
7690
83.6k
            GROW;
7691
7692
83.6k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7693
65.7k
                xmlDetectEncoding(ctxt);
7694
7695
65.7k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7696
65.7k
                    (IS_BLANK_CH(NXT(5)))) {
7697
10.6k
                    xmlParseTextDecl(ctxt);
7698
10.6k
                }
7699
65.7k
            }
7700
83.6k
  }
7701
106k
    }
7702
129k
}
7703
7704
/**
7705
 * Parse a parameter entity reference.
7706
 *
7707
 * @deprecated Internal function, don't use.
7708
 *
7709
 * @param ctxt  an XML parser context
7710
 */
7711
void
7712
0
xmlParsePEReference(xmlParserCtxt *ctxt) {
7713
0
    xmlParsePERefInternal(ctxt, 0);
7714
0
}
7715
7716
/**
7717
 * Load the content of an entity.
7718
 *
7719
 * @param ctxt  an XML parser context
7720
 * @param entity  an unloaded system entity
7721
 * @returns 0 in case of success and -1 in case of failure
7722
 */
7723
static int
7724
21.5k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7725
21.5k
    xmlParserInputPtr oldinput, input = NULL;
7726
21.5k
    xmlParserInputPtr *oldinputTab;
7727
21.5k
    xmlChar *oldencoding;
7728
21.5k
    xmlChar *content = NULL;
7729
21.5k
    xmlResourceType rtype;
7730
21.5k
    size_t length, i;
7731
21.5k
    int oldinputNr, oldinputMax;
7732
21.5k
    int ret = -1;
7733
21.5k
    int res;
7734
7735
21.5k
    if ((ctxt == NULL) || (entity == NULL) ||
7736
21.5k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7737
21.5k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7738
21.5k
  (entity->content != NULL)) {
7739
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7740
0
              "xmlLoadEntityContent parameter error");
7741
0
        return(-1);
7742
0
    }
7743
7744
21.5k
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7745
21.5k
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7746
0
    else
7747
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7748
7749
21.5k
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7750
21.5k
                            (char *) entity->ExternalID, rtype);
7751
21.5k
    if (input == NULL)
7752
1.11k
        return(-1);
7753
7754
20.4k
    oldinput = ctxt->input;
7755
20.4k
    oldinputNr = ctxt->inputNr;
7756
20.4k
    oldinputMax = ctxt->inputMax;
7757
20.4k
    oldinputTab = ctxt->inputTab;
7758
20.4k
    oldencoding = ctxt->encoding;
7759
7760
20.4k
    ctxt->input = NULL;
7761
20.4k
    ctxt->inputNr = 0;
7762
20.4k
    ctxt->inputMax = 1;
7763
20.4k
    ctxt->encoding = NULL;
7764
20.4k
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7765
20.4k
    if (ctxt->inputTab == NULL) {
7766
13
        xmlErrMemory(ctxt);
7767
13
        xmlFreeInputStream(input);
7768
13
        goto error;
7769
13
    }
7770
7771
20.4k
    xmlBufResetInput(input->buf->buffer, input);
7772
7773
20.4k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7774
17
        xmlFreeInputStream(input);
7775
17
        goto error;
7776
17
    }
7777
7778
20.4k
    xmlDetectEncoding(ctxt);
7779
7780
    /*
7781
     * Parse a possible text declaration first
7782
     */
7783
20.4k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7784
11.6k
  xmlParseTextDecl(ctxt);
7785
        /*
7786
         * An XML-1.0 document can't reference an entity not XML-1.0
7787
         */
7788
11.6k
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7789
11.6k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7790
5.52k
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7791
5.52k
                           "Version mismatch between document and entity\n");
7792
5.52k
        }
7793
11.6k
    }
7794
7795
20.4k
    length = input->cur - input->base;
7796
20.4k
    xmlBufShrink(input->buf->buffer, length);
7797
20.4k
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7798
7799
68.8k
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7800
48.4k
        ;
7801
7802
20.4k
    xmlBufResetInput(input->buf->buffer, input);
7803
7804
20.4k
    if (res < 0) {
7805
4.50k
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7806
4.50k
        goto error;
7807
4.50k
    }
7808
7809
15.9k
    length = xmlBufUse(input->buf->buffer);
7810
15.9k
    if (length > INT_MAX) {
7811
0
        xmlErrMemory(ctxt);
7812
0
        goto error;
7813
0
    }
7814
7815
15.9k
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
7816
15.9k
    if (content == NULL) {
7817
28
        xmlErrMemory(ctxt);
7818
28
        goto error;
7819
28
    }
7820
7821
16.1M
    for (i = 0; i < length; ) {
7822
16.1M
        int clen = length - i;
7823
16.1M
        int c = xmlGetUTF8Char(content + i, &clen);
7824
7825
16.1M
        if ((c < 0) || (!IS_CHAR(c))) {
7826
15.6k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7827
15.6k
                              "xmlLoadEntityContent: invalid char value %d\n",
7828
15.6k
                              content[i]);
7829
15.6k
            goto error;
7830
15.6k
        }
7831
16.1M
        i += clen;
7832
16.1M
    }
7833
7834
270
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7835
270
    entity->content = content;
7836
270
    entity->length = length;
7837
270
    content = NULL;
7838
270
    ret = 0;
7839
7840
20.4k
error:
7841
40.8k
    while (ctxt->inputNr > 0)
7842
20.4k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
7843
20.4k
    xmlFree(ctxt->inputTab);
7844
20.4k
    xmlFree(ctxt->encoding);
7845
7846
20.4k
    ctxt->input = oldinput;
7847
20.4k
    ctxt->inputNr = oldinputNr;
7848
20.4k
    ctxt->inputMax = oldinputMax;
7849
20.4k
    ctxt->inputTab = oldinputTab;
7850
20.4k
    ctxt->encoding = oldencoding;
7851
7852
20.4k
    xmlFree(content);
7853
7854
20.4k
    return(ret);
7855
270
}
7856
7857
/**
7858
 * Parse PEReference declarations
7859
 *
7860
 *     [69] PEReference ::= '%' Name ';'
7861
 *
7862
 * [ WFC: No Recursion ]
7863
 * A parsed entity must not contain a recursive
7864
 * reference to itself, either directly or indirectly.
7865
 *
7866
 * [ WFC: Entity Declared ]
7867
 * In a document without any DTD, a document with only an internal DTD
7868
 * subset which contains no parameter entity references, or a document
7869
 * with "standalone='yes'", ...  ... The declaration of a parameter
7870
 * entity must precede any reference to it...
7871
 *
7872
 * [ VC: Entity Declared ]
7873
 * In a document with an external subset or external parameter entities
7874
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7875
 * must precede any reference to it...
7876
 *
7877
 * [ WFC: In DTD ]
7878
 * Parameter-entity references may only appear in the DTD.
7879
 * NOTE: misleading but this is handled.
7880
 *
7881
 * @param ctxt  an XML parser context
7882
 * @param str  a pointer to an index in the string
7883
 * @returns the string of the entity content.
7884
 *         str is updated to the current value of the index
7885
 */
7886
static xmlEntityPtr
7887
48.4k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7888
48.4k
    const xmlChar *ptr;
7889
48.4k
    xmlChar cur;
7890
48.4k
    xmlChar *name;
7891
48.4k
    xmlEntityPtr entity = NULL;
7892
7893
48.4k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7894
48.4k
    ptr = *str;
7895
48.4k
    cur = *ptr;
7896
48.4k
    if (cur != '%')
7897
0
        return(NULL);
7898
48.4k
    ptr++;
7899
48.4k
    name = xmlParseStringName(ctxt, &ptr);
7900
48.4k
    if (name == NULL) {
7901
1.99k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7902
1.99k
           "xmlParseStringPEReference: no name\n");
7903
1.99k
  *str = ptr;
7904
1.99k
  return(NULL);
7905
1.99k
    }
7906
46.4k
    cur = *ptr;
7907
46.4k
    if (cur != ';') {
7908
2.27k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7909
2.27k
  xmlFree(name);
7910
2.27k
  *str = ptr;
7911
2.27k
  return(NULL);
7912
2.27k
    }
7913
44.1k
    ptr++;
7914
7915
    /* Must be set before xmlHandleUndeclaredEntity */
7916
44.1k
    ctxt->hasPErefs = 1;
7917
7918
    /*
7919
     * Request the entity from SAX
7920
     */
7921
44.1k
    if ((ctxt->sax != NULL) &&
7922
44.1k
  (ctxt->sax->getParameterEntity != NULL))
7923
44.1k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7924
7925
44.1k
    if (entity == NULL) {
7926
2.49k
        xmlHandleUndeclaredEntity(ctxt, name);
7927
41.6k
    } else {
7928
  /*
7929
   * Internal checking in case the entity quest barfed
7930
   */
7931
41.6k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7932
41.6k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7933
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7934
0
        "%%%s; is not a parameter entity\n",
7935
0
        name, NULL);
7936
0
  }
7937
41.6k
    }
7938
7939
44.1k
    xmlFree(name);
7940
44.1k
    *str = ptr;
7941
44.1k
    return(entity);
7942
46.4k
}
7943
7944
/**
7945
 * Parse a DOCTYPE declaration
7946
 *
7947
 * @deprecated Internal function, don't use.
7948
 *
7949
 *     [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7950
 *                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7951
 *
7952
 * [ VC: Root Element Type ]
7953
 * The Name in the document type declaration must match the element
7954
 * type of the root element.
7955
 *
7956
 * @param ctxt  an XML parser context
7957
 */
7958
7959
void
7960
44.6k
xmlParseDocTypeDecl(xmlParserCtxt *ctxt) {
7961
44.6k
    const xmlChar *name = NULL;
7962
44.6k
    xmlChar *publicId = NULL;
7963
44.6k
    xmlChar *URI = NULL;
7964
7965
    /*
7966
     * We know that '<!DOCTYPE' has been detected.
7967
     */
7968
44.6k
    SKIP(9);
7969
7970
44.6k
    if (SKIP_BLANKS == 0) {
7971
11.0k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7972
11.0k
                       "Space required after 'DOCTYPE'\n");
7973
11.0k
    }
7974
7975
    /*
7976
     * Parse the DOCTYPE name.
7977
     */
7978
44.6k
    name = xmlParseName(ctxt);
7979
44.6k
    if (name == NULL) {
7980
8.04k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7981
8.04k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7982
8.04k
    }
7983
44.6k
    ctxt->intSubName = name;
7984
7985
44.6k
    SKIP_BLANKS;
7986
7987
    /*
7988
     * Check for public and system identifier (URI)
7989
     */
7990
44.6k
    URI = xmlParseExternalID(ctxt, &publicId, 1);
7991
7992
44.6k
    if ((URI != NULL) || (publicId != NULL)) {
7993
9.45k
        ctxt->hasExternalSubset = 1;
7994
9.45k
    }
7995
44.6k
    ctxt->extSubURI = URI;
7996
44.6k
    ctxt->extSubSystem = publicId;
7997
7998
44.6k
    SKIP_BLANKS;
7999
8000
    /*
8001
     * Create and update the internal subset.
8002
     */
8003
44.6k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8004
44.6k
  (!ctxt->disableSAX))
8005
42.2k
  ctxt->sax->internalSubset(ctxt->userData, name, publicId, URI);
8006
8007
44.6k
    if ((RAW != '[') && (RAW != '>')) {
8008
3.04k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8009
3.04k
    }
8010
44.6k
}
8011
8012
/**
8013
 * Parse the internal subset declaration
8014
 *
8015
 *     [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8016
 * @param ctxt  an XML parser context
8017
 */
8018
8019
static void
8020
34.9k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8021
    /*
8022
     * Is there any DTD definition ?
8023
     */
8024
34.9k
    if (RAW == '[') {
8025
34.9k
        int oldInputNr = ctxt->inputNr;
8026
8027
34.9k
        NEXT;
8028
  /*
8029
   * Parse the succession of Markup declarations and
8030
   * PEReferences.
8031
   * Subsequence (markupdecl | PEReference | S)*
8032
   */
8033
34.9k
  SKIP_BLANKS;
8034
819k
        while (1) {
8035
819k
            if (PARSER_STOPPED(ctxt)) {
8036
5.07k
                return;
8037
814k
            } else if (ctxt->input->cur >= ctxt->input->end) {
8038
70.3k
                if (ctxt->inputNr <= oldInputNr) {
8039
2.36k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8040
2.36k
                    return;
8041
2.36k
                }
8042
67.9k
                xmlPopPE(ctxt);
8043
744k
            } else if ((RAW == ']') && (ctxt->inputNr <= oldInputNr)) {
8044
19.9k
                NEXT;
8045
19.9k
                SKIP_BLANKS;
8046
19.9k
                break;
8047
724k
            } else if ((PARSER_EXTERNAL(ctxt)) &&
8048
724k
                       (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8049
                /*
8050
                 * Conditional sections are allowed in external entities
8051
                 * included by PE References in the internal subset.
8052
                 */
8053
1.69k
                xmlParseConditionalSections(ctxt);
8054
722k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8055
581k
                xmlParseMarkupDecl(ctxt);
8056
581k
            } else if (RAW == '%') {
8057
133k
                xmlParsePERefInternal(ctxt, 1);
8058
133k
            } else {
8059
7.57k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8060
8061
8.81k
                while (ctxt->inputNr > oldInputNr)
8062
1.23k
                    xmlPopPE(ctxt);
8063
7.57k
                return;
8064
7.57k
            }
8065
784k
            SKIP_BLANKS;
8066
784k
            SHRINK;
8067
784k
            GROW;
8068
784k
        }
8069
34.9k
    }
8070
8071
    /*
8072
     * We should be at the end of the DOCTYPE declaration.
8073
     */
8074
19.9k
    if (RAW != '>') {
8075
590
        xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8076
590
        return;
8077
590
    }
8078
19.3k
    NEXT;
8079
19.3k
}
8080
8081
#ifdef LIBXML_SAX1_ENABLED
8082
/**
8083
 * Parse an attribute
8084
 *
8085
 * @deprecated Internal function, don't use.
8086
 *
8087
 *     [41] Attribute ::= Name Eq AttValue
8088
 *
8089
 * [ WFC: No External Entity References ]
8090
 * Attribute values cannot contain direct or indirect entity references
8091
 * to external entities.
8092
 *
8093
 * [ WFC: No < in Attribute Values ]
8094
 * The replacement text of any entity referred to directly or indirectly in
8095
 * an attribute value (other than "&lt;") must not contain a <.
8096
 *
8097
 * [ VC: Attribute Value Type ]
8098
 * The attribute must have been declared; the value must be of the type
8099
 * declared for it.
8100
 *
8101
 *     [25] Eq ::= S? '=' S?
8102
 *
8103
 * With namespace:
8104
 *
8105
 *     [NS 11] Attribute ::= QName Eq AttValue
8106
 *
8107
 * Also the case QName == xmlns:??? is handled independently as a namespace
8108
 * definition.
8109
 *
8110
 * @param ctxt  an XML parser context
8111
 * @param value  a xmlChar ** used to store the value of the attribute
8112
 * @returns the attribute name, and the value in *value.
8113
 */
8114
8115
const xmlChar *
8116
219k
xmlParseAttribute(xmlParserCtxt *ctxt, xmlChar **value) {
8117
219k
    const xmlChar *name;
8118
219k
    xmlChar *val;
8119
8120
219k
    *value = NULL;
8121
219k
    GROW;
8122
219k
    name = xmlParseName(ctxt);
8123
219k
    if (name == NULL) {
8124
86.1k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8125
86.1k
                 "error parsing attribute name\n");
8126
86.1k
        return(NULL);
8127
86.1k
    }
8128
8129
    /*
8130
     * read the value
8131
     */
8132
133k
    SKIP_BLANKS;
8133
133k
    if (RAW == '=') {
8134
94.7k
        NEXT;
8135
94.7k
  SKIP_BLANKS;
8136
94.7k
  val = xmlParseAttValue(ctxt);
8137
94.7k
    } else {
8138
38.2k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8139
38.2k
         "Specification mandates value for attribute %s\n", name);
8140
38.2k
  return(name);
8141
38.2k
    }
8142
8143
    /*
8144
     * Check that xml:lang conforms to the specification
8145
     * No more registered as an error, just generate a warning now
8146
     * since this was deprecated in XML second edition
8147
     */
8148
94.7k
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8149
7.46k
  if (!xmlCheckLanguageID(val)) {
8150
6.27k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8151
6.27k
              "Malformed value for xml:lang : %s\n",
8152
6.27k
        val, NULL);
8153
6.27k
  }
8154
7.46k
    }
8155
8156
    /*
8157
     * Check that xml:space conforms to the specification
8158
     */
8159
94.7k
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8160
1.80k
  if (xmlStrEqual(val, BAD_CAST "default"))
8161
248
      *(ctxt->space) = 0;
8162
1.55k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8163
215
      *(ctxt->space) = 1;
8164
1.33k
  else {
8165
1.33k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8166
1.33k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8167
1.33k
                                 val, NULL);
8168
1.33k
  }
8169
1.80k
    }
8170
8171
94.7k
    *value = val;
8172
94.7k
    return(name);
8173
133k
}
8174
8175
/**
8176
 * Parse a start tag. Always consumes '<'.
8177
 *
8178
 * @deprecated Internal function, don't use.
8179
 *
8180
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8181
 *
8182
 * [ WFC: Unique Att Spec ]
8183
 * No attribute name may appear more than once in the same start-tag or
8184
 * empty-element tag.
8185
 *
8186
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8187
 *
8188
 * [ WFC: Unique Att Spec ]
8189
 * No attribute name may appear more than once in the same start-tag or
8190
 * empty-element tag.
8191
 *
8192
 * With namespace:
8193
 *
8194
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8195
 *
8196
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8197
 *
8198
 * @param ctxt  an XML parser context
8199
 * @returns the element name parsed
8200
 */
8201
8202
const xmlChar *
8203
333k
xmlParseStartTag(xmlParserCtxt *ctxt) {
8204
333k
    const xmlChar *name;
8205
333k
    const xmlChar *attname;
8206
333k
    xmlChar *attvalue;
8207
333k
    const xmlChar **atts = ctxt->atts;
8208
333k
    int nbatts = 0;
8209
333k
    int maxatts = ctxt->maxatts;
8210
333k
    int i;
8211
8212
333k
    if (RAW != '<') return(NULL);
8213
333k
    NEXT1;
8214
8215
333k
    name = xmlParseName(ctxt);
8216
333k
    if (name == NULL) {
8217
35.1k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8218
35.1k
       "xmlParseStartTag: invalid element name\n");
8219
35.1k
        return(NULL);
8220
35.1k
    }
8221
8222
    /*
8223
     * Now parse the attributes, it ends up with the ending
8224
     *
8225
     * (S Attribute)* S?
8226
     */
8227
298k
    SKIP_BLANKS;
8228
298k
    GROW;
8229
8230
386k
    while (((RAW != '>') &&
8231
386k
     ((RAW != '/') || (NXT(1) != '>')) &&
8232
386k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8233
219k
  attname = xmlParseAttribute(ctxt, &attvalue);
8234
219k
        if (attname == NULL)
8235
86.1k
      break;
8236
133k
        if (attvalue != NULL) {
8237
      /*
8238
       * [ WFC: Unique Att Spec ]
8239
       * No attribute name may appear more than once in the same
8240
       * start-tag or empty-element tag.
8241
       */
8242
301k
      for (i = 0; i < nbatts;i += 2) {
8243
220k
          if (xmlStrEqual(atts[i], attname)) {
8244
3.27k
        xmlErrAttributeDup(ctxt, NULL, attname);
8245
3.27k
        goto failed;
8246
3.27k
    }
8247
220k
      }
8248
      /*
8249
       * Add the pair to atts
8250
       */
8251
81.6k
      if (nbatts + 4 > maxatts) {
8252
8.85k
          const xmlChar **n;
8253
8.85k
                int newSize;
8254
8255
8.85k
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8256
8.85k
                                          11, XML_MAX_ATTRS);
8257
8.85k
                if (newSize < 0) {
8258
0
        xmlErrMemory(ctxt);
8259
0
        goto failed;
8260
0
    }
8261
8.85k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8262
8.85k
                if (newSize < 2)
8263
7.34k
                    newSize = 2;
8264
8.85k
#endif
8265
8.85k
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8266
8.85k
    if (n == NULL) {
8267
20
        xmlErrMemory(ctxt);
8268
20
        goto failed;
8269
20
    }
8270
8.83k
    atts = n;
8271
8.83k
                maxatts = newSize * 2;
8272
8.83k
    ctxt->atts = atts;
8273
8.83k
    ctxt->maxatts = maxatts;
8274
8.83k
      }
8275
8276
81.5k
      atts[nbatts++] = attname;
8277
81.5k
      atts[nbatts++] = attvalue;
8278
81.5k
      atts[nbatts] = NULL;
8279
81.5k
      atts[nbatts + 1] = NULL;
8280
8281
81.5k
            attvalue = NULL;
8282
81.5k
  }
8283
8284
133k
failed:
8285
8286
133k
        if (attvalue != NULL)
8287
3.29k
            xmlFree(attvalue);
8288
8289
133k
  GROW
8290
133k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8291
44.8k
      break;
8292
88.1k
  if (SKIP_BLANKS == 0) {
8293
71.4k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8294
71.4k
         "attributes construct error\n");
8295
71.4k
  }
8296
88.1k
  SHRINK;
8297
88.1k
        GROW;
8298
88.1k
    }
8299
8300
    /*
8301
     * SAX: Start of Element !
8302
     */
8303
298k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8304
298k
  (!ctxt->disableSAX)) {
8305
277k
  if (nbatts > 0)
8306
66.0k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8307
211k
  else
8308
211k
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8309
277k
    }
8310
8311
298k
    if (atts != NULL) {
8312
        /* Free only the content strings */
8313
248k
        for (i = 1;i < nbatts;i+=2)
8314
81.5k
      if (atts[i] != NULL)
8315
81.5k
         xmlFree((xmlChar *) atts[i]);
8316
167k
    }
8317
298k
    return(name);
8318
298k
}
8319
8320
/**
8321
 * Parse an end tag. Always consumes '</'.
8322
 *
8323
 *     [42] ETag ::= '</' Name S? '>'
8324
 *
8325
 * With namespace
8326
 *
8327
 *     [NS 9] ETag ::= '</' QName S? '>'
8328
 * @param ctxt  an XML parser context
8329
 * @param line  line of the start tag
8330
 */
8331
8332
static void
8333
31.5k
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8334
31.5k
    const xmlChar *name;
8335
8336
31.5k
    GROW;
8337
31.5k
    if ((RAW != '<') || (NXT(1) != '/')) {
8338
1.23k
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8339
1.23k
           "xmlParseEndTag: '</' not found\n");
8340
1.23k
  return;
8341
1.23k
    }
8342
30.3k
    SKIP(2);
8343
8344
30.3k
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8345
8346
    /*
8347
     * We should definitely be at the ending "S? '>'" part
8348
     */
8349
30.3k
    GROW;
8350
30.3k
    SKIP_BLANKS;
8351
30.3k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8352
7.59k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8353
7.59k
    } else
8354
22.7k
  NEXT1;
8355
8356
    /*
8357
     * [ WFC: Element Type Match ]
8358
     * The Name in an element's end-tag must match the element type in the
8359
     * start-tag.
8360
     *
8361
     */
8362
30.3k
    if (name != (xmlChar*)1) {
8363
4.17k
        if (name == NULL) name = BAD_CAST "unparsable";
8364
4.17k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8365
4.17k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8366
4.17k
                    ctxt->name, line, name);
8367
4.17k
    }
8368
8369
    /*
8370
     * SAX: End of Tag
8371
     */
8372
30.3k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8373
30.3k
  (!ctxt->disableSAX))
8374
28.9k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8375
8376
30.3k
    namePop(ctxt);
8377
30.3k
    spacePop(ctxt);
8378
30.3k
}
8379
8380
/**
8381
 * Parse an end of tag
8382
 *
8383
 * @deprecated Internal function, don't use.
8384
 *
8385
 *     [42] ETag ::= '</' Name S? '>'
8386
 *
8387
 * With namespace
8388
 *
8389
 *     [NS 9] ETag ::= '</' QName S? '>'
8390
 * @param ctxt  an XML parser context
8391
 */
8392
8393
void
8394
0
xmlParseEndTag(xmlParserCtxt *ctxt) {
8395
0
    xmlParseEndTag1(ctxt, 0);
8396
0
}
8397
#endif /* LIBXML_SAX1_ENABLED */
8398
8399
/************************************************************************
8400
 *                  *
8401
 *          SAX 2 specific operations       *
8402
 *                  *
8403
 ************************************************************************/
8404
8405
/**
8406
 * Parse an XML Namespace QName
8407
 *
8408
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8409
 *     [7]  Prefix  ::= NCName
8410
 *     [8]  LocalPart  ::= NCName
8411
 *
8412
 * @param ctxt  an XML parser context
8413
 * @param prefix  pointer to store the prefix part
8414
 * @returns the Name parsed or NULL
8415
 */
8416
8417
static xmlHashedString
8418
6.79M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8419
6.79M
    xmlHashedString l, p;
8420
6.79M
    int start, isNCName = 0;
8421
8422
6.79M
    l.name = NULL;
8423
6.79M
    p.name = NULL;
8424
8425
6.79M
    GROW;
8426
6.79M
    start = CUR_PTR - BASE_PTR;
8427
8428
6.79M
    l = xmlParseNCName(ctxt);
8429
6.79M
    if (l.name != NULL) {
8430
1.16M
        isNCName = 1;
8431
1.16M
        if (CUR == ':') {
8432
322k
            NEXT;
8433
322k
            p = l;
8434
322k
            l = xmlParseNCName(ctxt);
8435
322k
        }
8436
1.16M
    }
8437
6.79M
    if ((l.name == NULL) || (CUR == ':')) {
8438
5.65M
        xmlChar *tmp;
8439
8440
5.65M
        l.name = NULL;
8441
5.65M
        p.name = NULL;
8442
5.65M
        if ((isNCName == 0) && (CUR != ':'))
8443
5.62M
            return(l);
8444
33.1k
        tmp = xmlParseNmtoken(ctxt);
8445
33.1k
        if (tmp != NULL)
8446
18.3k
            xmlFree(tmp);
8447
33.1k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8448
33.1k
                                CUR_PTR - (BASE_PTR + start));
8449
33.1k
        if (l.name == NULL) {
8450
7
            xmlErrMemory(ctxt);
8451
7
            return(l);
8452
7
        }
8453
33.1k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8454
33.1k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8455
33.1k
    }
8456
8457
1.17M
    *prefix = p;
8458
1.17M
    return(l);
8459
6.79M
}
8460
8461
/**
8462
 * Parse an XML Namespace QName
8463
 *
8464
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8465
 *     [7]  Prefix  ::= NCName
8466
 *     [8]  LocalPart  ::= NCName
8467
 *
8468
 * @param ctxt  an XML parser context
8469
 * @param prefix  pointer to store the prefix part
8470
 * @returns the Name parsed or NULL
8471
 */
8472
8473
static const xmlChar *
8474
5.63k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8475
5.63k
    xmlHashedString n, p;
8476
8477
5.63k
    n = xmlParseQNameHashed(ctxt, &p);
8478
5.63k
    if (n.name == NULL)
8479
2.11k
        return(NULL);
8480
3.52k
    *prefix = p.name;
8481
3.52k
    return(n.name);
8482
5.63k
}
8483
8484
/**
8485
 * Parse an XML name and compares for match
8486
 * (specialized for endtag parsing)
8487
 *
8488
 * @param ctxt  an XML parser context
8489
 * @param name  the localname
8490
 * @param prefix  the prefix, if any.
8491
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
8492
 * and the name for mismatch
8493
 */
8494
8495
static const xmlChar *
8496
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8497
8.29k
                        xmlChar const *prefix) {
8498
8.29k
    const xmlChar *cmp;
8499
8.29k
    const xmlChar *in;
8500
8.29k
    const xmlChar *ret;
8501
8.29k
    const xmlChar *prefix2;
8502
8503
8.29k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8504
8505
8.29k
    GROW;
8506
8.29k
    in = ctxt->input->cur;
8507
8508
8.29k
    cmp = prefix;
8509
15.1k
    while (*in != 0 && *in == *cmp) {
8510
6.86k
  ++in;
8511
6.86k
  ++cmp;
8512
6.86k
    }
8513
8.29k
    if ((*cmp == 0) && (*in == ':')) {
8514
4.20k
        in++;
8515
4.20k
  cmp = name;
8516
10.4k
  while (*in != 0 && *in == *cmp) {
8517
6.24k
      ++in;
8518
6.24k
      ++cmp;
8519
6.24k
  }
8520
4.20k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8521
      /* success */
8522
2.66k
            ctxt->input->col += in - ctxt->input->cur;
8523
2.66k
      ctxt->input->cur = in;
8524
2.66k
      return((const xmlChar*) 1);
8525
2.66k
  }
8526
4.20k
    }
8527
    /*
8528
     * all strings coms from the dictionary, equality can be done directly
8529
     */
8530
5.63k
    ret = xmlParseQName (ctxt, &prefix2);
8531
5.63k
    if (ret == NULL)
8532
2.11k
        return(NULL);
8533
3.52k
    if ((ret == name) && (prefix == prefix2))
8534
550
  return((const xmlChar*) 1);
8535
2.97k
    return ret;
8536
3.52k
}
8537
8538
/**
8539
 * Parse an attribute in the new SAX2 framework.
8540
 *
8541
 * @param ctxt  an XML parser context
8542
 * @param pref  the element prefix
8543
 * @param elem  the element name
8544
 * @param hprefix  resulting attribute prefix
8545
 * @param value  resulting value of the attribute
8546
 * @param len  resulting length of the attribute
8547
 * @param alloc  resulting indicator if the attribute was allocated
8548
 * @returns the attribute name, and the value in *value, .
8549
 */
8550
8551
static xmlHashedString
8552
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8553
                   const xmlChar * pref, const xmlChar * elem,
8554
                   xmlHashedString * hprefix, xmlChar ** value,
8555
                   int *len, int *alloc)
8556
499k
{
8557
499k
    xmlHashedString hname;
8558
499k
    const xmlChar *prefix, *name;
8559
499k
    xmlChar *val = NULL, *internal_val = NULL;
8560
499k
    int special = 0;
8561
499k
    int isNamespace;
8562
499k
    int flags;
8563
8564
499k
    *value = NULL;
8565
499k
    GROW;
8566
499k
    hname = xmlParseQNameHashed(ctxt, hprefix);
8567
499k
    if (hname.name == NULL) {
8568
106k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8569
106k
                       "error parsing attribute name\n");
8570
106k
        return(hname);
8571
106k
    }
8572
392k
    name = hname.name;
8573
392k
    prefix = hprefix->name;
8574
8575
    /*
8576
     * get the type if needed
8577
     */
8578
392k
    if (ctxt->attsSpecial != NULL) {
8579
155k
        special = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8580
155k
                                              prefix, name));
8581
155k
    }
8582
8583
    /*
8584
     * read the value
8585
     */
8586
392k
    SKIP_BLANKS;
8587
392k
    if (RAW != '=') {
8588
55.9k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8589
55.9k
                          "Specification mandates value for attribute %s\n",
8590
55.9k
                          name);
8591
55.9k
        goto error;
8592
55.9k
    }
8593
8594
8595
337k
    NEXT;
8596
337k
    SKIP_BLANKS;
8597
337k
    flags = 0;
8598
337k
    isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8599
337k
                   (prefix == ctxt->str_xmlns));
8600
337k
    val = xmlParseAttValueInternal(ctxt, len, &flags, special,
8601
337k
                                   isNamespace);
8602
337k
    if (val == NULL)
8603
15.1k
        goto error;
8604
8605
321k
    *alloc = (flags & XML_ATTVAL_ALLOC) != 0;
8606
8607
321k
#ifdef LIBXML_VALID_ENABLED
8608
321k
    if ((ctxt->validate) &&
8609
321k
        (ctxt->standalone) &&
8610
321k
        (special & XML_SPECIAL_EXTERNAL) &&
8611
321k
        (flags & XML_ATTVAL_NORM_CHANGE)) {
8612
213
        xmlValidityError(ctxt, XML_DTD_NOT_STANDALONE,
8613
213
                         "standalone: normalization of attribute %s on %s "
8614
213
                         "by external subset declaration\n",
8615
213
                         name, elem);
8616
213
    }
8617
321k
#endif
8618
8619
321k
    if (prefix == ctxt->str_xml) {
8620
        /*
8621
         * Check that xml:lang conforms to the specification
8622
         * No more registered as an error, just generate a warning now
8623
         * since this was deprecated in XML second edition
8624
         */
8625
14.4k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8626
6.32k
            internal_val = xmlStrndup(val, *len);
8627
6.32k
            if (internal_val == NULL)
8628
19
                goto mem_error;
8629
6.30k
            if (!xmlCheckLanguageID(internal_val)) {
8630
5.19k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8631
5.19k
                              "Malformed value for xml:lang : %s\n",
8632
5.19k
                              internal_val, NULL);
8633
5.19k
            }
8634
6.30k
        }
8635
8636
        /*
8637
         * Check that xml:space conforms to the specification
8638
         */
8639
14.4k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8640
879
            internal_val = xmlStrndup(val, *len);
8641
879
            if (internal_val == NULL)
8642
6
                goto mem_error;
8643
873
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8644
318
                *(ctxt->space) = 0;
8645
555
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8646
216
                *(ctxt->space) = 1;
8647
339
            else {
8648
339
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8649
339
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8650
339
                              internal_val, NULL);
8651
339
            }
8652
873
        }
8653
14.4k
        if (internal_val) {
8654
7.17k
            xmlFree(internal_val);
8655
7.17k
        }
8656
14.4k
    }
8657
8658
321k
    *value = val;
8659
321k
    return (hname);
8660
8661
25
mem_error:
8662
25
    xmlErrMemory(ctxt);
8663
71.0k
error:
8664
71.0k
    if ((val != NULL) && (*alloc != 0))
8665
12
        xmlFree(val);
8666
71.0k
    return(hname);
8667
25
}
8668
8669
/**
8670
 * Inserts a new attribute into the hash table.
8671
 *
8672
 * @param ctxt  parser context
8673
 * @param size  size of the hash table
8674
 * @param name  attribute name
8675
 * @param uri  namespace uri
8676
 * @param hashValue  combined hash value of name and uri
8677
 * @param aindex  attribute index (this is a multiple of 5)
8678
 * @returns INT_MAX if no existing attribute was found, the attribute
8679
 * index if an attribute was found, -1 if a memory allocation failed.
8680
 */
8681
static int
8682
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8683
454k
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8684
454k
    xmlAttrHashBucket *table = ctxt->attrHash;
8685
454k
    xmlAttrHashBucket *bucket;
8686
454k
    unsigned hindex;
8687
8688
454k
    hindex = hashValue & (size - 1);
8689
454k
    bucket = &table[hindex];
8690
8691
523k
    while (bucket->index >= 0) {
8692
94.1k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8693
8694
94.1k
        if (name == atts[0]) {
8695
26.5k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8696
8697
26.5k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8698
26.5k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8699
7.63k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8700
25.3k
                return(bucket->index);
8701
26.5k
        }
8702
8703
68.7k
        hindex++;
8704
68.7k
        bucket++;
8705
68.7k
        if (hindex >= size) {
8706
1.13k
            hindex = 0;
8707
1.13k
            bucket = table;
8708
1.13k
        }
8709
68.7k
    }
8710
8711
428k
    bucket->index = aindex;
8712
8713
428k
    return(INT_MAX);
8714
454k
}
8715
8716
static int
8717
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8718
                       const xmlChar *name, const xmlChar *prefix,
8719
4.28k
                       unsigned hashValue, int aindex) {
8720
4.28k
    xmlAttrHashBucket *table = ctxt->attrHash;
8721
4.28k
    xmlAttrHashBucket *bucket;
8722
4.28k
    unsigned hindex;
8723
8724
4.28k
    hindex = hashValue & (size - 1);
8725
4.28k
    bucket = &table[hindex];
8726
8727
6.28k
    while (bucket->index >= 0) {
8728
3.42k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8729
8730
3.42k
        if ((name == atts[0]) && (prefix == atts[1]))
8731
1.42k
            return(bucket->index);
8732
8733
1.99k
        hindex++;
8734
1.99k
        bucket++;
8735
1.99k
        if (hindex >= size) {
8736
202
            hindex = 0;
8737
202
            bucket = table;
8738
202
        }
8739
1.99k
    }
8740
8741
2.85k
    bucket->index = aindex;
8742
8743
2.85k
    return(INT_MAX);
8744
4.28k
}
8745
/**
8746
 * Parse a start tag. Always consumes '<'.
8747
 *
8748
 * This routine is called when running SAX2 parsing
8749
 *
8750
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8751
 *
8752
 * [ WFC: Unique Att Spec ]
8753
 * No attribute name may appear more than once in the same start-tag or
8754
 * empty-element tag.
8755
 *
8756
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8757
 *
8758
 * [ WFC: Unique Att Spec ]
8759
 * No attribute name may appear more than once in the same start-tag or
8760
 * empty-element tag.
8761
 *
8762
 * With namespace:
8763
 *
8764
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8765
 *
8766
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8767
 *
8768
 * @param ctxt  an XML parser context
8769
 * @param pref  resulting namespace prefix
8770
 * @param URI  resulting namespace URI
8771
 * @param nbNsPtr  resulting number of namespace declarations
8772
 * @returns the element name parsed
8773
 */
8774
8775
static const xmlChar *
8776
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8777
6.29M
                  const xmlChar **URI, int *nbNsPtr) {
8778
6.29M
    xmlHashedString hlocalname;
8779
6.29M
    xmlHashedString hprefix;
8780
6.29M
    xmlHashedString hattname;
8781
6.29M
    xmlHashedString haprefix;
8782
6.29M
    const xmlChar *localname;
8783
6.29M
    const xmlChar *prefix;
8784
6.29M
    const xmlChar *attname;
8785
6.29M
    const xmlChar *aprefix;
8786
6.29M
    const xmlChar *uri;
8787
6.29M
    xmlChar *attvalue = NULL;
8788
6.29M
    const xmlChar **atts = ctxt->atts;
8789
6.29M
    unsigned attrHashSize = 0;
8790
6.29M
    int maxatts = ctxt->maxatts;
8791
6.29M
    int nratts, nbatts, nbdef;
8792
6.29M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8793
6.29M
    int alloc = 0;
8794
6.29M
    int numNsErr = 0;
8795
6.29M
    int numDupErr = 0;
8796
8797
6.29M
    if (RAW != '<') return(NULL);
8798
6.29M
    NEXT1;
8799
8800
6.29M
    nbatts = 0;
8801
6.29M
    nratts = 0;
8802
6.29M
    nbdef = 0;
8803
6.29M
    nbNs = 0;
8804
6.29M
    nbTotalDef = 0;
8805
6.29M
    attval = 0;
8806
8807
6.29M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8808
0
        xmlErrMemory(ctxt);
8809
0
        return(NULL);
8810
0
    }
8811
8812
6.29M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8813
6.29M
    if (hlocalname.name == NULL) {
8814
5.51M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8815
5.51M
           "StartTag: invalid element name\n");
8816
5.51M
        return(NULL);
8817
5.51M
    }
8818
777k
    localname = hlocalname.name;
8819
777k
    prefix = hprefix.name;
8820
8821
    /*
8822
     * Now parse the attributes, it ends up with the ending
8823
     *
8824
     * (S Attribute)* S?
8825
     */
8826
777k
    SKIP_BLANKS;
8827
777k
    GROW;
8828
8829
    /*
8830
     * The ctxt->atts array will be ultimately passed to the SAX callback
8831
     * containing five xmlChar pointers for each attribute:
8832
     *
8833
     * [0] attribute name
8834
     * [1] attribute prefix
8835
     * [2] namespace URI
8836
     * [3] attribute value
8837
     * [4] end of attribute value
8838
     *
8839
     * To save memory, we reuse this array temporarily and store integers
8840
     * in these pointer variables.
8841
     *
8842
     * [0] attribute name
8843
     * [1] attribute prefix
8844
     * [2] hash value of attribute prefix, and later namespace index
8845
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
8846
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
8847
     *
8848
     * The ctxt->attallocs array contains an additional unsigned int for
8849
     * each attribute, containing the hash value of the attribute name
8850
     * and the alloc flag in bit 31.
8851
     */
8852
8853
903k
    while (((RAW != '>') &&
8854
903k
     ((RAW != '/') || (NXT(1) != '>')) &&
8855
903k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8856
499k
  int len = -1;
8857
8858
499k
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
8859
499k
                                          &haprefix, &attvalue, &len,
8860
499k
                                          &alloc);
8861
499k
        if (hattname.name == NULL)
8862
106k
      break;
8863
392k
        if (attvalue == NULL)
8864
71.0k
            goto next_attr;
8865
321k
        attname = hattname.name;
8866
321k
        aprefix = haprefix.name;
8867
321k
  if (len < 0) len = xmlStrlen(attvalue);
8868
8869
321k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8870
78.9k
            xmlHashedString huri;
8871
78.9k
            xmlURIPtr parsedUri;
8872
8873
78.9k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8874
78.9k
            uri = huri.name;
8875
78.9k
            if (uri == NULL) {
8876
10
                xmlErrMemory(ctxt);
8877
10
                goto next_attr;
8878
10
            }
8879
78.9k
            if (*uri != 0) {
8880
77.5k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8881
94
                    xmlErrMemory(ctxt);
8882
94
                    goto next_attr;
8883
94
                }
8884
77.4k
                if (parsedUri == NULL) {
8885
67.5k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8886
67.5k
                             "xmlns: '%s' is not a valid URI\n",
8887
67.5k
                                       uri, NULL, NULL);
8888
67.5k
                } else {
8889
9.85k
                    if (parsedUri->scheme == NULL) {
8890
5.03k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8891
5.03k
                                  "xmlns: URI %s is not absolute\n",
8892
5.03k
                                  uri, NULL, NULL);
8893
5.03k
                    }
8894
9.85k
                    xmlFreeURI(parsedUri);
8895
9.85k
                }
8896
77.4k
                if (uri == ctxt->str_xml_ns) {
8897
197
                    if (attname != ctxt->str_xml) {
8898
197
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8899
197
                     "xml namespace URI cannot be the default namespace\n",
8900
197
                                 NULL, NULL, NULL);
8901
197
                    }
8902
197
                    goto next_attr;
8903
197
                }
8904
77.2k
                if ((len == 29) &&
8905
77.2k
                    (xmlStrEqual(uri,
8906
894
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8907
215
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8908
215
                         "reuse of the xmlns namespace name is forbidden\n",
8909
215
                             NULL, NULL, NULL);
8910
215
                    goto next_attr;
8911
215
                }
8912
77.2k
            }
8913
8914
78.4k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
8915
72.8k
                nbNs++;
8916
242k
        } else if (aprefix == ctxt->str_xmlns) {
8917
111k
            xmlHashedString huri;
8918
111k
            xmlURIPtr parsedUri;
8919
8920
111k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8921
111k
            uri = huri.name;
8922
111k
            if (uri == NULL) {
8923
8
                xmlErrMemory(ctxt);
8924
8
                goto next_attr;
8925
8
            }
8926
8927
111k
            if (attname == ctxt->str_xml) {
8928
436
                if (uri != ctxt->str_xml_ns) {
8929
232
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8930
232
                             "xml namespace prefix mapped to wrong URI\n",
8931
232
                             NULL, NULL, NULL);
8932
232
                }
8933
                /*
8934
                 * Do not keep a namespace definition node
8935
                 */
8936
436
                goto next_attr;
8937
436
            }
8938
110k
            if (uri == ctxt->str_xml_ns) {
8939
196
                if (attname != ctxt->str_xml) {
8940
196
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8941
196
                             "xml namespace URI mapped to wrong prefix\n",
8942
196
                             NULL, NULL, NULL);
8943
196
                }
8944
196
                goto next_attr;
8945
196
            }
8946
110k
            if (attname == ctxt->str_xmlns) {
8947
277
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8948
277
                         "redefinition of the xmlns prefix is forbidden\n",
8949
277
                         NULL, NULL, NULL);
8950
277
                goto next_attr;
8951
277
            }
8952
110k
            if ((len == 29) &&
8953
110k
                (xmlStrEqual(uri,
8954
649
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8955
201
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8956
201
                         "reuse of the xmlns namespace name is forbidden\n",
8957
201
                         NULL, NULL, NULL);
8958
201
                goto next_attr;
8959
201
            }
8960
110k
            if ((uri == NULL) || (uri[0] == 0)) {
8961
416
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8962
416
                         "xmlns:%s: Empty XML namespace is not allowed\n",
8963
416
                              attname, NULL, NULL);
8964
416
                goto next_attr;
8965
109k
            } else {
8966
109k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8967
33
                    xmlErrMemory(ctxt);
8968
33
                    goto next_attr;
8969
33
                }
8970
109k
                if (parsedUri == NULL) {
8971
28.6k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8972
28.6k
                         "xmlns:%s: '%s' is not a valid URI\n",
8973
28.6k
                                       attname, uri, NULL);
8974
80.9k
                } else {
8975
80.9k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
8976
34.9k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8977
34.9k
                                  "xmlns:%s: URI %s is not absolute\n",
8978
34.9k
                                  attname, uri, NULL);
8979
34.9k
                    }
8980
80.9k
                    xmlFreeURI(parsedUri);
8981
80.9k
                }
8982
109k
            }
8983
8984
109k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
8985
103k
                nbNs++;
8986
131k
        } else {
8987
            /*
8988
             * Populate attributes array, see above for repurposing
8989
             * of xmlChar pointers.
8990
             */
8991
131k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8992
10.1k
                int res = xmlCtxtGrowAttrs(ctxt);
8993
8994
10.1k
                maxatts = ctxt->maxatts;
8995
10.1k
                atts = ctxt->atts;
8996
8997
10.1k
                if (res < 0)
8998
58
                    goto next_attr;
8999
10.1k
            }
9000
131k
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9001
131k
                                        ((unsigned) alloc << 31);
9002
131k
            atts[nbatts++] = attname;
9003
131k
            atts[nbatts++] = aprefix;
9004
131k
            atts[nbatts++] = XML_INT_TO_PTR(haprefix.hashValue);
9005
131k
            if (alloc) {
9006
26.1k
                atts[nbatts++] = attvalue;
9007
26.1k
                attvalue += len;
9008
26.1k
                atts[nbatts++] = attvalue;
9009
105k
            } else {
9010
                /*
9011
                 * attvalue points into the input buffer which can be
9012
                 * reallocated. Store differences to input->base instead.
9013
                 * The pointers will be reconstructed later.
9014
                 */
9015
105k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
9016
105k
                attvalue += len;
9017
105k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
9018
105k
            }
9019
            /*
9020
             * tag if some deallocation is needed
9021
             */
9022
131k
            if (alloc != 0) attval = 1;
9023
131k
            attvalue = NULL; /* moved into atts */
9024
131k
        }
9025
9026
392k
next_attr:
9027
392k
        if ((attvalue != NULL) && (alloc != 0)) {
9028
34.6k
            xmlFree(attvalue);
9029
34.6k
            attvalue = NULL;
9030
34.6k
        }
9031
9032
392k
  GROW
9033
392k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9034
149k
      break;
9035
243k
  if (SKIP_BLANKS == 0) {
9036
118k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9037
118k
         "attributes construct error\n");
9038
118k
      break;
9039
118k
  }
9040
125k
        GROW;
9041
125k
    }
9042
9043
    /*
9044
     * Namespaces from default attributes
9045
     */
9046
777k
    if (ctxt->attsDefault != NULL) {
9047
304k
        xmlDefAttrsPtr defaults;
9048
9049
304k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9050
304k
  if (defaults != NULL) {
9051
781k
      for (i = 0; i < defaults->nbAttrs; i++) {
9052
596k
                xmlDefAttr *attr = &defaults->attrs[i];
9053
9054
596k
          attname = attr->name.name;
9055
596k
    aprefix = attr->prefix.name;
9056
9057
596k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9058
17.8k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9059
9060
17.8k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9061
17.2k
                        nbNs++;
9062
578k
    } else if (aprefix == ctxt->str_xmlns) {
9063
142k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9064
9065
142k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9066
142k
                                      NULL, 1) > 0)
9067
140k
                        nbNs++;
9068
436k
    } else {
9069
436k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9070
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9071
0
                                    "Maximum number of attributes exceeded");
9072
0
                        break;
9073
0
                    }
9074
436k
                    nbTotalDef += 1;
9075
436k
                }
9076
596k
      }
9077
185k
  }
9078
304k
    }
9079
9080
    /*
9081
     * Resolve attribute namespaces
9082
     */
9083
909k
    for (i = 0; i < nbatts; i += 5) {
9084
131k
        attname = atts[i];
9085
131k
        aprefix = atts[i+1];
9086
9087
        /*
9088
  * The default namespace does not apply to attribute names.
9089
  */
9090
131k
  if (aprefix == NULL) {
9091
61.1k
            nsIndex = NS_INDEX_EMPTY;
9092
70.6k
        } else if (aprefix == ctxt->str_xml) {
9093
14.4k
            nsIndex = NS_INDEX_XML;
9094
56.1k
        } else {
9095
56.1k
            haprefix.name = aprefix;
9096
56.1k
            haprefix.hashValue = (size_t) atts[i+2];
9097
56.1k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9098
9099
56.1k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9100
36.6k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9101
36.6k
        "Namespace prefix %s for %s on %s is not defined\n",
9102
36.6k
        aprefix, attname, localname);
9103
36.6k
                nsIndex = NS_INDEX_EMPTY;
9104
36.6k
            }
9105
56.1k
        }
9106
9107
131k
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9108
131k
    }
9109
9110
    /*
9111
     * Maximum number of attributes including default attributes.
9112
     */
9113
777k
    maxAtts = nratts + nbTotalDef;
9114
9115
    /*
9116
     * Verify that attribute names are unique.
9117
     */
9118
777k
    if (maxAtts > 1) {
9119
61.4k
        attrHashSize = 4;
9120
82.9k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9121
21.5k
            attrHashSize *= 2;
9122
9123
61.4k
        if (attrHashSize > ctxt->attrHashMax) {
9124
2.59k
            xmlAttrHashBucket *tmp;
9125
9126
2.59k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9127
2.59k
            if (tmp == NULL) {
9128
12
                xmlErrMemory(ctxt);
9129
12
                goto done;
9130
12
            }
9131
9132
2.57k
            ctxt->attrHash = tmp;
9133
2.57k
            ctxt->attrHashMax = attrHashSize;
9134
2.57k
        }
9135
9136
61.4k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9137
9138
136k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9139
74.8k
            const xmlChar *nsuri;
9140
74.8k
            unsigned hashValue, nameHashValue, uriHashValue;
9141
74.8k
            int res;
9142
9143
74.8k
            attname = atts[i];
9144
74.8k
            aprefix = atts[i+1];
9145
74.8k
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9146
            /* Hash values always have bit 31 set, see dict.c */
9147
74.8k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9148
9149
74.8k
            if (nsIndex == NS_INDEX_EMPTY) {
9150
                /*
9151
                 * Prefix with empty namespace means an undeclared
9152
                 * prefix which was already reported above.
9153
                 */
9154
56.8k
                if (aprefix != NULL)
9155
26.9k
                    continue;
9156
29.8k
                nsuri = NULL;
9157
29.8k
                uriHashValue = URI_HASH_EMPTY;
9158
29.8k
            } else if (nsIndex == NS_INDEX_XML) {
9159
1.58k
                nsuri = ctxt->str_xml_ns;
9160
1.58k
                uriHashValue = URI_HASH_XML;
9161
16.4k
            } else {
9162
16.4k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9163
16.4k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9164
16.4k
            }
9165
9166
47.8k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9167
47.8k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9168
47.8k
                                    hashValue, i);
9169
47.8k
            if (res < 0)
9170
0
                continue;
9171
9172
            /*
9173
             * [ WFC: Unique Att Spec ]
9174
             * No attribute name may appear more than once in the same
9175
             * start-tag or empty-element tag.
9176
             * As extended by the Namespace in XML REC.
9177
             */
9178
47.8k
            if (res < INT_MAX) {
9179
19.3k
                if (aprefix == atts[res+1]) {
9180
16.9k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9181
16.9k
                    numDupErr += 1;
9182
16.9k
                } else {
9183
2.39k
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9184
2.39k
                             "Namespaced Attribute %s in '%s' redefined\n",
9185
2.39k
                             attname, nsuri, NULL);
9186
2.39k
                    numNsErr += 1;
9187
2.39k
                }
9188
19.3k
            }
9189
47.8k
        }
9190
61.4k
    }
9191
9192
    /*
9193
     * Default attributes
9194
     */
9195
777k
    if (ctxt->attsDefault != NULL) {
9196
304k
        xmlDefAttrsPtr defaults;
9197
9198
304k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9199
304k
  if (defaults != NULL) {
9200
781k
      for (i = 0; i < defaults->nbAttrs; i++) {
9201
596k
                xmlDefAttr *attr = &defaults->attrs[i];
9202
596k
                const xmlChar *nsuri = NULL;
9203
596k
                unsigned hashValue, uriHashValue = 0;
9204
596k
                int res;
9205
9206
596k
          attname = attr->name.name;
9207
596k
    aprefix = attr->prefix.name;
9208
9209
596k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9210
17.8k
                    continue;
9211
578k
    if (aprefix == ctxt->str_xmlns)
9212
142k
                    continue;
9213
9214
436k
                if (aprefix == NULL) {
9215
386k
                    nsIndex = NS_INDEX_EMPTY;
9216
386k
                    nsuri = NULL;
9217
386k
                    uriHashValue = URI_HASH_EMPTY;
9218
386k
                } else if (aprefix == ctxt->str_xml) {
9219
38.0k
                    nsIndex = NS_INDEX_XML;
9220
38.0k
                    nsuri = ctxt->str_xml_ns;
9221
38.0k
                    uriHashValue = URI_HASH_XML;
9222
38.0k
                } else {
9223
11.5k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9224
11.5k
                    if ((nsIndex == INT_MAX) ||
9225
11.5k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9226
6.59k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9227
6.59k
                                 "Namespace prefix %s for %s on %s is not "
9228
6.59k
                                 "defined\n",
9229
6.59k
                                 aprefix, attname, localname);
9230
6.59k
                        nsIndex = NS_INDEX_EMPTY;
9231
6.59k
                        nsuri = NULL;
9232
6.59k
                        uriHashValue = URI_HASH_EMPTY;
9233
6.59k
                    } else {
9234
5.00k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9235
5.00k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9236
5.00k
                    }
9237
11.5k
                }
9238
9239
                /*
9240
                 * Check whether the attribute exists
9241
                 */
9242
436k
                if (maxAtts > 1) {
9243
406k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9244
406k
                                                   uriHashValue);
9245
406k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9246
406k
                                            hashValue, nbatts);
9247
406k
                    if (res < 0)
9248
0
                        continue;
9249
406k
                    if (res < INT_MAX) {
9250
5.94k
                        if (aprefix == atts[res+1])
9251
5.55k
                            continue;
9252
395
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9253
395
                                 "Namespaced Attribute %s in '%s' redefined\n",
9254
395
                                 attname, nsuri, NULL);
9255
395
                    }
9256
406k
                }
9257
9258
431k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9259
9260
431k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9261
3.68k
                    res = xmlCtxtGrowAttrs(ctxt);
9262
9263
3.68k
                    maxatts = ctxt->maxatts;
9264
3.68k
                    atts = ctxt->atts;
9265
9266
3.68k
                    if (res < 0) {
9267
30
                        localname = NULL;
9268
30
                        goto done;
9269
30
                    }
9270
3.68k
                }
9271
9272
431k
                atts[nbatts++] = attname;
9273
431k
                atts[nbatts++] = aprefix;
9274
431k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9275
431k
                atts[nbatts++] = attr->value.name;
9276
431k
                atts[nbatts++] = attr->valueEnd;
9277
9278
431k
#ifdef LIBXML_VALID_ENABLED
9279
                /*
9280
                 * This should be moved to valid.c, but we don't keep track
9281
                 * whether an attribute was defaulted.
9282
                 */
9283
431k
                if ((ctxt->validate) &&
9284
431k
                    (ctxt->standalone == 1) &&
9285
431k
                    (attr->external != 0)) {
9286
297
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9287
297
                            "standalone: attribute %s on %s defaulted "
9288
297
                            "from external subset\n",
9289
297
                            attname, localname);
9290
297
                }
9291
431k
#endif
9292
431k
                nbdef++;
9293
431k
      }
9294
185k
  }
9295
304k
    }
9296
9297
    /*
9298
     * Using a single hash table for nsUri/localName pairs cannot
9299
     * detect duplicate QNames reliably. The following example will
9300
     * only result in two namespace errors.
9301
     *
9302
     * <doc xmlns:a="a" xmlns:b="a">
9303
     *   <elem a:a="" b:a="" b:a=""/>
9304
     * </doc>
9305
     *
9306
     * If we saw more than one namespace error but no duplicate QNames
9307
     * were found, we have to scan for duplicate QNames.
9308
     */
9309
777k
    if ((numDupErr == 0) && (numNsErr > 1)) {
9310
920
        memset(ctxt->attrHash, -1,
9311
920
               attrHashSize * sizeof(ctxt->attrHash[0]));
9312
9313
5.69k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9314
4.77k
            unsigned hashValue, nameHashValue, prefixHashValue;
9315
4.77k
            int res;
9316
9317
4.77k
            aprefix = atts[i+1];
9318
4.77k
            if (aprefix == NULL)
9319
487
                continue;
9320
9321
4.28k
            attname = atts[i];
9322
            /* Hash values always have bit 31 set, see dict.c */
9323
4.28k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9324
4.28k
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9325
9326
4.28k
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9327
4.28k
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9328
4.28k
                                         aprefix, hashValue, i);
9329
4.28k
            if (res < INT_MAX)
9330
1.42k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9331
4.28k
        }
9332
920
    }
9333
9334
    /*
9335
     * Reconstruct attribute pointers
9336
     */
9337
1.34M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9338
        /* namespace URI */
9339
562k
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9340
562k
        if (nsIndex == INT_MAX)
9341
486k
            atts[i+2] = NULL;
9342
75.9k
        else if (nsIndex == INT_MAX - 1)
9343
52.3k
            atts[i+2] = ctxt->str_xml_ns;
9344
23.6k
        else
9345
23.6k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9346
9347
562k
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9348
105k
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9349
105k
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9350
105k
        }
9351
562k
    }
9352
9353
777k
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9354
777k
    if ((prefix != NULL) && (uri == NULL)) {
9355
72.0k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9356
72.0k
           "Namespace prefix %s on %s is not defined\n",
9357
72.0k
     prefix, localname, NULL);
9358
72.0k
    }
9359
777k
    *pref = prefix;
9360
777k
    *URI = uri;
9361
9362
    /*
9363
     * SAX callback
9364
     */
9365
777k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9366
777k
  (!ctxt->disableSAX)) {
9367
556k
  if (nbNs > 0)
9368
168k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9369
168k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9370
168k
        nbatts / 5, nbdef, atts);
9371
388k
  else
9372
388k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9373
388k
                          0, NULL, nbatts / 5, nbdef, atts);
9374
556k
    }
9375
9376
777k
done:
9377
    /*
9378
     * Free allocated attribute values
9379
     */
9380
777k
    if (attval != 0) {
9381
55.7k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9382
30.7k
      if (ctxt->attallocs[j] & 0x80000000)
9383
26.1k
          xmlFree((xmlChar *) atts[i+3]);
9384
24.9k
    }
9385
9386
777k
    *nbNsPtr = nbNs;
9387
777k
    return(localname);
9388
777k
}
9389
9390
/**
9391
 * Parse an end tag. Always consumes '</'.
9392
 *
9393
 *     [42] ETag ::= '</' Name S? '>'
9394
 *
9395
 * With namespace
9396
 *
9397
 *     [NS 9] ETag ::= '</' QName S? '>'
9398
 * @param ctxt  an XML parser context
9399
 * @param tag  the corresponding start tag
9400
 */
9401
9402
static void
9403
101k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9404
101k
    const xmlChar *name;
9405
9406
101k
    GROW;
9407
101k
    if ((RAW != '<') || (NXT(1) != '/')) {
9408
1.95k
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9409
1.95k
  return;
9410
1.95k
    }
9411
99.5k
    SKIP(2);
9412
9413
99.5k
    if (tag->prefix == NULL)
9414
91.2k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9415
8.29k
    else
9416
8.29k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9417
9418
    /*
9419
     * We should definitely be at the ending "S? '>'" part
9420
     */
9421
99.5k
    GROW;
9422
99.5k
    SKIP_BLANKS;
9423
99.5k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9424
11.6k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9425
11.6k
    } else
9426
87.9k
  NEXT1;
9427
9428
    /*
9429
     * [ WFC: Element Type Match ]
9430
     * The Name in an element's end-tag must match the element type in the
9431
     * start-tag.
9432
     *
9433
     */
9434
99.5k
    if (name != (xmlChar*)1) {
9435
14.7k
        if (name == NULL) name = BAD_CAST "unparsable";
9436
14.7k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9437
14.7k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9438
14.7k
                    ctxt->name, tag->line, name);
9439
14.7k
    }
9440
9441
    /*
9442
     * SAX: End of Tag
9443
     */
9444
99.5k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9445
99.5k
  (!ctxt->disableSAX))
9446
66.3k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9447
66.3k
                                tag->URI);
9448
9449
99.5k
    spacePop(ctxt);
9450
99.5k
    if (tag->nsNr != 0)
9451
5.50k
  xmlParserNsPop(ctxt, tag->nsNr);
9452
99.5k
}
9453
9454
/**
9455
 * Parse escaped pure raw content. Always consumes '<!['.
9456
 *
9457
 * @deprecated Internal function, don't use.
9458
 *
9459
 *     [18] CDSect ::= CDStart CData CDEnd
9460
 *
9461
 *     [19] CDStart ::= '<![CDATA['
9462
 *
9463
 *     [20] Data ::= (Char* - (Char* ']]>' Char*))
9464
 *
9465
 *     [21] CDEnd ::= ']]>'
9466
 * @param ctxt  an XML parser context
9467
 */
9468
void
9469
34.2k
xmlParseCDSect(xmlParserCtxt *ctxt) {
9470
34.2k
    xmlChar *buf = NULL;
9471
34.2k
    int len = 0;
9472
34.2k
    int size = XML_PARSER_BUFFER_SIZE;
9473
34.2k
    int r, rl;
9474
34.2k
    int s, sl;
9475
34.2k
    int cur, l;
9476
34.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9477
24.8k
                    XML_MAX_HUGE_LENGTH :
9478
34.2k
                    XML_MAX_TEXT_LENGTH;
9479
9480
34.2k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9481
0
        return;
9482
34.2k
    SKIP(3);
9483
9484
34.2k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9485
0
        return;
9486
34.2k
    SKIP(6);
9487
9488
34.2k
    r = xmlCurrentCharRecover(ctxt, &rl);
9489
34.2k
    if (!IS_CHAR(r)) {
9490
698
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9491
698
        goto out;
9492
698
    }
9493
33.5k
    NEXTL(rl);
9494
33.5k
    s = xmlCurrentCharRecover(ctxt, &sl);
9495
33.5k
    if (!IS_CHAR(s)) {
9496
1.03k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9497
1.03k
        goto out;
9498
1.03k
    }
9499
32.5k
    NEXTL(sl);
9500
32.5k
    cur = xmlCurrentCharRecover(ctxt, &l);
9501
32.5k
    buf = xmlMalloc(size);
9502
32.5k
    if (buf == NULL) {
9503
36
  xmlErrMemory(ctxt);
9504
36
        goto out;
9505
36
    }
9506
3.95M
    while (IS_CHAR(cur) &&
9507
3.95M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9508
3.92M
  if (len + 5 >= size) {
9509
2.72k
      xmlChar *tmp;
9510
2.72k
            int newSize;
9511
9512
2.72k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9513
2.72k
            if (newSize < 0) {
9514
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9515
0
                               "CData section too big found\n");
9516
0
                goto out;
9517
0
            }
9518
2.72k
      tmp = xmlRealloc(buf, newSize);
9519
2.72k
      if (tmp == NULL) {
9520
7
    xmlErrMemory(ctxt);
9521
7
                goto out;
9522
7
      }
9523
2.71k
      buf = tmp;
9524
2.71k
      size = newSize;
9525
2.71k
  }
9526
3.92M
  COPY_BUF(buf, len, r);
9527
3.92M
  r = s;
9528
3.92M
  rl = sl;
9529
3.92M
  s = cur;
9530
3.92M
  sl = l;
9531
3.92M
  NEXTL(l);
9532
3.92M
  cur = xmlCurrentCharRecover(ctxt, &l);
9533
3.92M
    }
9534
32.4k
    buf[len] = 0;
9535
32.4k
    if (cur != '>') {
9536
15.6k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9537
15.6k
                       "CData section not finished\n%.50s\n", buf);
9538
15.6k
        goto out;
9539
15.6k
    }
9540
16.8k
    NEXTL(l);
9541
9542
    /*
9543
     * OK the buffer is to be consumed as cdata.
9544
     */
9545
16.8k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9546
15.8k
        if ((ctxt->sax->cdataBlock != NULL) &&
9547
15.8k
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9548
4.52k
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9549
11.2k
        } else if (ctxt->sax->characters != NULL) {
9550
11.2k
            ctxt->sax->characters(ctxt->userData, buf, len);
9551
11.2k
        }
9552
15.8k
    }
9553
9554
34.2k
out:
9555
34.2k
    xmlFree(buf);
9556
34.2k
}
9557
9558
/**
9559
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9560
 * unexpected EOF to the caller.
9561
 *
9562
 * @param ctxt  an XML parser context
9563
 */
9564
9565
static void
9566
26.9k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9567
26.9k
    int oldNameNr = ctxt->nameNr;
9568
26.9k
    int oldSpaceNr = ctxt->spaceNr;
9569
26.9k
    int oldNodeNr = ctxt->nodeNr;
9570
9571
26.9k
    GROW;
9572
10.3M
    while ((ctxt->input->cur < ctxt->input->end) &&
9573
10.3M
     (PARSER_STOPPED(ctxt) == 0)) {
9574
10.3M
  const xmlChar *cur = ctxt->input->cur;
9575
9576
  /*
9577
   * First case : a Processing Instruction.
9578
   */
9579
10.3M
  if ((*cur == '<') && (cur[1] == '?')) {
9580
25.7k
      xmlParsePI(ctxt);
9581
25.7k
  }
9582
9583
  /*
9584
   * Second case : a CDSection
9585
   */
9586
  /* 2.6.0 test was *cur not RAW */
9587
10.2M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9588
29.1k
      xmlParseCDSect(ctxt);
9589
29.1k
  }
9590
9591
  /*
9592
   * Third case :  a comment
9593
   */
9594
10.2M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9595
10.2M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9596
91.4k
      xmlParseComment(ctxt);
9597
91.4k
  }
9598
9599
  /*
9600
   * Fourth case :  a sub-element.
9601
   */
9602
10.1M
  else if (*cur == '<') {
9603
6.57M
            if (NXT(1) == '/') {
9604
110k
                if (ctxt->nameNr <= oldNameNr)
9605
1.22k
                    break;
9606
109k
          xmlParseElementEnd(ctxt);
9607
6.46M
            } else {
9608
6.46M
          xmlParseElementStart(ctxt);
9609
6.46M
            }
9610
6.57M
  }
9611
9612
  /*
9613
   * Fifth case : a reference. If if has not been resolved,
9614
   *    parsing returns it's Name, create the node
9615
   */
9616
9617
3.59M
  else if (*cur == '&') {
9618
652k
      xmlParseReference(ctxt);
9619
652k
  }
9620
9621
  /*
9622
   * Last case, text. Note that References are handled directly.
9623
   */
9624
2.93M
  else {
9625
2.93M
      xmlParseCharDataInternal(ctxt, 0);
9626
2.93M
  }
9627
9628
10.3M
  SHRINK;
9629
10.3M
  GROW;
9630
10.3M
    }
9631
9632
26.9k
    if ((ctxt->nameNr > oldNameNr) &&
9633
26.9k
        (ctxt->input->cur >= ctxt->input->end) &&
9634
26.9k
        (ctxt->wellFormed)) {
9635
561
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9636
561
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9637
561
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9638
561
                "Premature end of data in tag %s line %d\n",
9639
561
                name, line, NULL);
9640
561
    }
9641
9642
    /*
9643
     * Clean up in error case
9644
     */
9645
9646
364k
    while (ctxt->nodeNr > oldNodeNr)
9647
337k
        nodePop(ctxt);
9648
9649
446k
    while (ctxt->nameNr > oldNameNr) {
9650
419k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9651
9652
419k
        if (tag->nsNr != 0)
9653
123k
            xmlParserNsPop(ctxt, tag->nsNr);
9654
9655
419k
        namePop(ctxt);
9656
419k
    }
9657
9658
446k
    while (ctxt->spaceNr > oldSpaceNr)
9659
419k
        spacePop(ctxt);
9660
26.9k
}
9661
9662
/**
9663
 * Parse XML element content. This is useful if you're only interested
9664
 * in custom SAX callbacks. If you want a node list, use
9665
 * #xmlCtxtParseContent.
9666
 *
9667
 * @param ctxt  an XML parser context
9668
 */
9669
void
9670
0
xmlParseContent(xmlParserCtxt *ctxt) {
9671
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9672
0
        return;
9673
9674
0
    xmlCtxtInitializeLate(ctxt);
9675
9676
0
    xmlParseContentInternal(ctxt);
9677
9678
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9679
0
}
9680
9681
/**
9682
 * Parse an XML element
9683
 *
9684
 * @deprecated Internal function, don't use.
9685
 *
9686
 *     [39] element ::= EmptyElemTag | STag content ETag
9687
 *
9688
 * [ WFC: Element Type Match ]
9689
 * The Name in an element's end-tag must match the element type in the
9690
 * start-tag.
9691
 *
9692
 * @param ctxt  an XML parser context
9693
 */
9694
9695
void
9696
32.8k
xmlParseElement(xmlParserCtxt *ctxt) {
9697
32.8k
    if (xmlParseElementStart(ctxt) != 0)
9698
12.2k
        return;
9699
9700
20.5k
    xmlParseContentInternal(ctxt);
9701
9702
20.5k
    if (ctxt->input->cur >= ctxt->input->end) {
9703
15.8k
        if (ctxt->wellFormed) {
9704
693
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9705
693
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9706
693
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9707
693
                    "Premature end of data in tag %s line %d\n",
9708
693
                    name, line, NULL);
9709
693
        }
9710
15.8k
        return;
9711
15.8k
    }
9712
9713
4.75k
    xmlParseElementEnd(ctxt);
9714
4.75k
}
9715
9716
/**
9717
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9718
 * opening tag was parsed, 1 if an empty element was parsed.
9719
 *
9720
 * Always consumes '<'.
9721
 *
9722
 * @param ctxt  an XML parser context
9723
 */
9724
static int
9725
6.49M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9726
6.49M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9727
6.49M
    const xmlChar *name;
9728
6.49M
    const xmlChar *prefix = NULL;
9729
6.49M
    const xmlChar *URI = NULL;
9730
6.49M
    xmlParserNodeInfo node_info;
9731
6.49M
    int line;
9732
6.49M
    xmlNodePtr cur;
9733
6.49M
    int nbNs = 0;
9734
9735
6.49M
    if (ctxt->nameNr > maxDepth) {
9736
6
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9737
6
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9738
6
                ctxt->nameNr);
9739
6
  xmlHaltParser(ctxt);
9740
6
  return(-1);
9741
6
    }
9742
9743
    /* Capture start position */
9744
6.49M
    if (ctxt->record_info) {
9745
0
        node_info.begin_pos = ctxt->input->consumed +
9746
0
                          (CUR_PTR - ctxt->input->base);
9747
0
  node_info.begin_line = ctxt->input->line;
9748
0
    }
9749
9750
6.49M
    if (ctxt->spaceNr == 0)
9751
32.8k
  spacePush(ctxt, -1);
9752
6.46M
    else if (*ctxt->space == -2)
9753
159k
  spacePush(ctxt, -1);
9754
6.30M
    else
9755
6.30M
  spacePush(ctxt, *ctxt->space);
9756
9757
6.49M
    line = ctxt->input->line;
9758
6.49M
#ifdef LIBXML_SAX1_ENABLED
9759
6.49M
    if (ctxt->sax2)
9760
6.20M
#endif /* LIBXML_SAX1_ENABLED */
9761
6.20M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9762
291k
#ifdef LIBXML_SAX1_ENABLED
9763
291k
    else
9764
291k
  name = xmlParseStartTag(ctxt);
9765
6.49M
#endif /* LIBXML_SAX1_ENABLED */
9766
6.49M
    if (name == NULL) {
9767
5.54M
  spacePop(ctxt);
9768
5.54M
        return(-1);
9769
5.54M
    }
9770
950k
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9771
950k
    cur = ctxt->node;
9772
9773
950k
#ifdef LIBXML_VALID_ENABLED
9774
    /*
9775
     * [ VC: Root Element Type ]
9776
     * The Name in the document type declaration must match the element
9777
     * type of the root element.
9778
     */
9779
950k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9780
950k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9781
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9782
950k
#endif /* LIBXML_VALID_ENABLED */
9783
9784
    /*
9785
     * Check for an Empty Element.
9786
     */
9787
950k
    if ((RAW == '/') && (NXT(1) == '>')) {
9788
81.4k
        SKIP(2);
9789
81.4k
  if (ctxt->sax2) {
9790
62.4k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9791
62.4k
    (!ctxt->disableSAX))
9792
24.4k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9793
62.4k
#ifdef LIBXML_SAX1_ENABLED
9794
62.4k
  } else {
9795
18.9k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9796
18.9k
    (!ctxt->disableSAX))
9797
18.2k
    ctxt->sax->endElement(ctxt->userData, name);
9798
18.9k
#endif /* LIBXML_SAX1_ENABLED */
9799
18.9k
  }
9800
81.4k
  namePop(ctxt);
9801
81.4k
  spacePop(ctxt);
9802
81.4k
  if (nbNs > 0)
9803
38.9k
      xmlParserNsPop(ctxt, nbNs);
9804
81.4k
  if (cur != NULL && ctxt->record_info) {
9805
0
            node_info.node = cur;
9806
0
            node_info.end_pos = ctxt->input->consumed +
9807
0
                                (CUR_PTR - ctxt->input->base);
9808
0
            node_info.end_line = ctxt->input->line;
9809
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9810
0
  }
9811
81.4k
  return(1);
9812
81.4k
    }
9813
869k
    if (RAW == '>') {
9814
549k
        NEXT1;
9815
549k
        if (cur != NULL && ctxt->record_info) {
9816
0
            node_info.node = cur;
9817
0
            node_info.end_pos = 0;
9818
0
            node_info.end_line = 0;
9819
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9820
0
        }
9821
549k
    } else {
9822
319k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9823
319k
         "Couldn't find end of Start Tag %s line %d\n",
9824
319k
                    name, line, NULL);
9825
9826
  /*
9827
   * end of parsing of this node.
9828
   */
9829
319k
  nodePop(ctxt);
9830
319k
  namePop(ctxt);
9831
319k
  spacePop(ctxt);
9832
319k
  if (nbNs > 0)
9833
52.3k
      xmlParserNsPop(ctxt, nbNs);
9834
319k
  return(-1);
9835
319k
    }
9836
9837
549k
    return(0);
9838
869k
}
9839
9840
/**
9841
 * Parse the end of an XML element. Always consumes '</'.
9842
 *
9843
 * @param ctxt  an XML parser context
9844
 */
9845
static void
9846
114k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9847
114k
    xmlNodePtr cur = ctxt->node;
9848
9849
114k
    if (ctxt->nameNr <= 0) {
9850
136
        if ((RAW == '<') && (NXT(1) == '/'))
9851
30
            SKIP(2);
9852
136
        return;
9853
136
    }
9854
9855
    /*
9856
     * parse the end of tag: '</' should be here.
9857
     */
9858
114k
    if (ctxt->sax2) {
9859
85.8k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9860
85.8k
  namePop(ctxt);
9861
85.8k
    }
9862
28.3k
#ifdef LIBXML_SAX1_ENABLED
9863
28.3k
    else
9864
28.3k
  xmlParseEndTag1(ctxt, 0);
9865
114k
#endif /* LIBXML_SAX1_ENABLED */
9866
9867
    /*
9868
     * Capture end position
9869
     */
9870
114k
    if (cur != NULL && ctxt->record_info) {
9871
0
        xmlParserNodeInfoPtr node_info;
9872
9873
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9874
0
        if (node_info != NULL) {
9875
0
            node_info->end_pos = ctxt->input->consumed +
9876
0
                                 (CUR_PTR - ctxt->input->base);
9877
0
            node_info->end_line = ctxt->input->line;
9878
0
        }
9879
0
    }
9880
114k
}
9881
9882
/**
9883
 * Parse the XML version value.
9884
 *
9885
 * @deprecated Internal function, don't use.
9886
 *
9887
 *     [26] VersionNum ::= '1.' [0-9]+
9888
 *
9889
 * In practice allow [0-9].[0-9]+ at that level
9890
 *
9891
 * @param ctxt  an XML parser context
9892
 * @returns the string giving the XML version number, or NULL
9893
 */
9894
xmlChar *
9895
17.6k
xmlParseVersionNum(xmlParserCtxt *ctxt) {
9896
17.6k
    xmlChar *buf = NULL;
9897
17.6k
    int len = 0;
9898
17.6k
    int size = 10;
9899
17.6k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9900
4.40k
                    XML_MAX_TEXT_LENGTH :
9901
17.6k
                    XML_MAX_NAME_LENGTH;
9902
17.6k
    xmlChar cur;
9903
9904
17.6k
    buf = xmlMalloc(size);
9905
17.6k
    if (buf == NULL) {
9906
97
  xmlErrMemory(ctxt);
9907
97
  return(NULL);
9908
97
    }
9909
17.5k
    cur = CUR;
9910
17.5k
    if (!((cur >= '0') && (cur <= '9'))) {
9911
1.11k
  xmlFree(buf);
9912
1.11k
  return(NULL);
9913
1.11k
    }
9914
16.4k
    buf[len++] = cur;
9915
16.4k
    NEXT;
9916
16.4k
    cur=CUR;
9917
16.4k
    if (cur != '.') {
9918
979
  xmlFree(buf);
9919
979
  return(NULL);
9920
979
    }
9921
15.4k
    buf[len++] = cur;
9922
15.4k
    NEXT;
9923
15.4k
    cur=CUR;
9924
82.9k
    while ((cur >= '0') && (cur <= '9')) {
9925
67.4k
  if (len + 1 >= size) {
9926
5.55k
      xmlChar *tmp;
9927
5.55k
            int newSize;
9928
9929
5.55k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9930
5.55k
            if (newSize < 0) {
9931
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
9932
0
                xmlFree(buf);
9933
0
                return(NULL);
9934
0
            }
9935
5.55k
      tmp = xmlRealloc(buf, newSize);
9936
5.55k
      if (tmp == NULL) {
9937
12
    xmlErrMemory(ctxt);
9938
12
          xmlFree(buf);
9939
12
    return(NULL);
9940
12
      }
9941
5.54k
      buf = tmp;
9942
5.54k
            size = newSize;
9943
5.54k
  }
9944
67.4k
  buf[len++] = cur;
9945
67.4k
  NEXT;
9946
67.4k
  cur=CUR;
9947
67.4k
    }
9948
15.4k
    buf[len] = 0;
9949
15.4k
    return(buf);
9950
15.4k
}
9951
9952
/**
9953
 * Parse the XML version.
9954
 *
9955
 * @deprecated Internal function, don't use.
9956
 *
9957
 *     [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9958
 *
9959
 *     [25] Eq ::= S? '=' S?
9960
 *
9961
 * @param ctxt  an XML parser context
9962
 * @returns the version string, e.g. "1.0"
9963
 */
9964
9965
xmlChar *
9966
31.0k
xmlParseVersionInfo(xmlParserCtxt *ctxt) {
9967
31.0k
    xmlChar *version = NULL;
9968
9969
31.0k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9970
18.9k
  SKIP(7);
9971
18.9k
  SKIP_BLANKS;
9972
18.9k
  if (RAW != '=') {
9973
833
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9974
833
      return(NULL);
9975
833
        }
9976
18.1k
  NEXT;
9977
18.1k
  SKIP_BLANKS;
9978
18.1k
  if (RAW == '"') {
9979
13.1k
      NEXT;
9980
13.1k
      version = xmlParseVersionNum(ctxt);
9981
13.1k
      if (RAW != '"') {
9982
1.85k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9983
1.85k
      } else
9984
11.2k
          NEXT;
9985
13.1k
  } else if (RAW == '\''){
9986
4.56k
      NEXT;
9987
4.56k
      version = xmlParseVersionNum(ctxt);
9988
4.56k
      if (RAW != '\'') {
9989
742
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9990
742
      } else
9991
3.82k
          NEXT;
9992
4.56k
  } else {
9993
474
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9994
474
  }
9995
18.1k
    }
9996
30.2k
    return(version);
9997
31.0k
}
9998
9999
/**
10000
 * Parse the XML encoding name
10001
 *
10002
 * @deprecated Internal function, don't use.
10003
 *
10004
 *     [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10005
 *
10006
 * @param ctxt  an XML parser context
10007
 * @returns the encoding name value or NULL
10008
 */
10009
xmlChar *
10010
14.7k
xmlParseEncName(xmlParserCtxt *ctxt) {
10011
14.7k
    xmlChar *buf = NULL;
10012
14.7k
    int len = 0;
10013
14.7k
    int size = 10;
10014
14.7k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10015
3.37k
                    XML_MAX_TEXT_LENGTH :
10016
14.7k
                    XML_MAX_NAME_LENGTH;
10017
14.7k
    xmlChar cur;
10018
10019
14.7k
    cur = CUR;
10020
14.7k
    if (((cur >= 'a') && (cur <= 'z')) ||
10021
14.7k
        ((cur >= 'A') && (cur <= 'Z'))) {
10022
14.2k
  buf = xmlMalloc(size);
10023
14.2k
  if (buf == NULL) {
10024
55
      xmlErrMemory(ctxt);
10025
55
      return(NULL);
10026
55
  }
10027
10028
14.2k
  buf[len++] = cur;
10029
14.2k
  NEXT;
10030
14.2k
  cur = CUR;
10031
33.7M
  while (((cur >= 'a') && (cur <= 'z')) ||
10032
33.7M
         ((cur >= 'A') && (cur <= 'Z')) ||
10033
33.7M
         ((cur >= '0') && (cur <= '9')) ||
10034
33.7M
         (cur == '.') || (cur == '_') ||
10035
33.7M
         (cur == '-')) {
10036
33.6M
      if (len + 1 >= size) {
10037
23.8k
          xmlChar *tmp;
10038
23.8k
                int newSize;
10039
10040
23.8k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10041
23.8k
                if (newSize < 0) {
10042
313
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10043
313
                    xmlFree(buf);
10044
313
                    return(NULL);
10045
313
                }
10046
23.5k
    tmp = xmlRealloc(buf, newSize);
10047
23.5k
    if (tmp == NULL) {
10048
17
        xmlErrMemory(ctxt);
10049
17
        xmlFree(buf);
10050
17
        return(NULL);
10051
17
    }
10052
23.5k
    buf = tmp;
10053
23.5k
                size = newSize;
10054
23.5k
      }
10055
33.6M
      buf[len++] = cur;
10056
33.6M
      NEXT;
10057
33.6M
      cur = CUR;
10058
33.6M
        }
10059
13.9k
  buf[len] = 0;
10060
13.9k
    } else {
10061
499
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10062
499
    }
10063
14.4k
    return(buf);
10064
14.7k
}
10065
10066
/**
10067
 * Parse the XML encoding declaration
10068
 *
10069
 * @deprecated Internal function, don't use.
10070
 *
10071
 *     [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | 
10072
 *                           "'" EncName "'")
10073
 *
10074
 * this setups the conversion filters.
10075
 *
10076
 * @param ctxt  an XML parser context
10077
 * @returns the encoding value or NULL
10078
 */
10079
10080
const xmlChar *
10081
29.6k
xmlParseEncodingDecl(xmlParserCtxt *ctxt) {
10082
29.6k
    xmlChar *encoding = NULL;
10083
10084
29.6k
    SKIP_BLANKS;
10085
29.6k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10086
13.8k
        return(NULL);
10087
10088
15.8k
    SKIP(8);
10089
15.8k
    SKIP_BLANKS;
10090
15.8k
    if (RAW != '=') {
10091
719
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10092
719
        return(NULL);
10093
719
    }
10094
15.1k
    NEXT;
10095
15.1k
    SKIP_BLANKS;
10096
15.1k
    if (RAW == '"') {
10097
10.3k
        NEXT;
10098
10.3k
        encoding = xmlParseEncName(ctxt);
10099
10.3k
        if (RAW != '"') {
10100
1.79k
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10101
1.79k
            xmlFree(encoding);
10102
1.79k
            return(NULL);
10103
1.79k
        } else
10104
8.57k
            NEXT;
10105
10.3k
    } else if (RAW == '\''){
10106
4.43k
        NEXT;
10107
4.43k
        encoding = xmlParseEncName(ctxt);
10108
4.43k
        if (RAW != '\'') {
10109
434
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10110
434
            xmlFree(encoding);
10111
434
            return(NULL);
10112
434
        } else
10113
3.99k
            NEXT;
10114
4.43k
    } else {
10115
315
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10116
315
    }
10117
10118
12.8k
    if (encoding == NULL)
10119
324
        return(NULL);
10120
10121
12.5k
    xmlSetDeclaredEncoding(ctxt, encoding);
10122
10123
12.5k
    return(ctxt->encoding);
10124
12.8k
}
10125
10126
/**
10127
 * Parse the XML standalone declaration
10128
 *
10129
 * @deprecated Internal function, don't use.
10130
 *
10131
 *     [32] SDDecl ::= S 'standalone' Eq
10132
 *                     (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10133
 *
10134
 * [ VC: Standalone Document Declaration ]
10135
 * TODO The standalone document declaration must have the value "no"
10136
 * if any external markup declarations contain declarations of:
10137
 *  - attributes with default values, if elements to which these
10138
 *    attributes apply appear in the document without specifications
10139
 *    of values for these attributes, or
10140
 *  - entities (other than amp, lt, gt, apos, quot), if references
10141
 *    to those entities appear in the document, or
10142
 *  - attributes with values subject to normalization, where the
10143
 *    attribute appears in the document with a value which will change
10144
 *    as a result of normalization, or
10145
 *  - element types with element content, if white space occurs directly
10146
 *    within any instance of those types.
10147
 *
10148
 * @param ctxt  an XML parser context
10149
 * @returns
10150
 *   1 if standalone="yes"
10151
 *   0 if standalone="no"
10152
 *  -2 if standalone attribute is missing or invalid
10153
 *    (A standalone value of -2 means that the XML declaration was found,
10154
 *     but no value was specified for the standalone attribute).
10155
 */
10156
10157
int
10158
3.94k
xmlParseSDDecl(xmlParserCtxt *ctxt) {
10159
3.94k
    int standalone = -2;
10160
10161
3.94k
    SKIP_BLANKS;
10162
3.94k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10163
866
  SKIP(10);
10164
866
        SKIP_BLANKS;
10165
866
  if (RAW != '=') {
10166
12
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10167
12
      return(standalone);
10168
12
        }
10169
854
  NEXT;
10170
854
  SKIP_BLANKS;
10171
854
        if (RAW == '\''){
10172
108
      NEXT;
10173
108
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10174
63
          standalone = 0;
10175
63
                SKIP(2);
10176
63
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10177
45
                 (NXT(2) == 's')) {
10178
12
          standalone = 1;
10179
12
    SKIP(3);
10180
33
            } else {
10181
33
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10182
33
      }
10183
108
      if (RAW != '\'') {
10184
43
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10185
43
      } else
10186
65
          NEXT;
10187
746
  } else if (RAW == '"'){
10188
737
      NEXT;
10189
737
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10190
54
          standalone = 0;
10191
54
    SKIP(2);
10192
683
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10193
683
                 (NXT(2) == 's')) {
10194
659
          standalone = 1;
10195
659
                SKIP(3);
10196
659
            } else {
10197
24
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10198
24
      }
10199
737
      if (RAW != '"') {
10200
58
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10201
58
      } else
10202
679
          NEXT;
10203
737
  } else {
10204
9
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10205
9
        }
10206
854
    }
10207
3.93k
    return(standalone);
10208
3.94k
}
10209
10210
/**
10211
 * Parse an XML declaration header
10212
 *
10213
 * @deprecated Internal function, don't use.
10214
 *
10215
 *     [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10216
 * @param ctxt  an XML parser context
10217
 */
10218
10219
void
10220
7.55k
xmlParseXMLDecl(xmlParserCtxt *ctxt) {
10221
7.55k
    xmlChar *version;
10222
10223
    /*
10224
     * This value for standalone indicates that the document has an
10225
     * XML declaration but it does not have a standalone attribute.
10226
     * It will be overwritten later if a standalone attribute is found.
10227
     */
10228
10229
7.55k
    ctxt->standalone = -2;
10230
10231
    /*
10232
     * We know that '<?xml' is here.
10233
     */
10234
7.55k
    SKIP(5);
10235
10236
7.55k
    if (!IS_BLANK_CH(RAW)) {
10237
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10238
0
                 "Blank needed after '<?xml'\n");
10239
0
    }
10240
7.55k
    SKIP_BLANKS;
10241
10242
    /*
10243
     * We must have the VersionInfo here.
10244
     */
10245
7.55k
    version = xmlParseVersionInfo(ctxt);
10246
7.55k
    if (version == NULL) {
10247
2.98k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10248
4.56k
    } else {
10249
4.56k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10250
      /*
10251
       * Changed here for XML-1.0 5th edition
10252
       */
10253
981
      if (ctxt->options & XML_PARSE_OLD10) {
10254
200
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10255
200
                "Unsupported version '%s'\n",
10256
200
                version);
10257
781
      } else {
10258
781
          if ((version[0] == '1') && ((version[1] == '.'))) {
10259
377
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10260
377
                      "Unsupported version '%s'\n",
10261
377
          version, NULL);
10262
404
    } else {
10263
404
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10264
404
              "Unsupported version '%s'\n",
10265
404
              version);
10266
404
    }
10267
781
      }
10268
981
  }
10269
4.56k
  if (ctxt->version != NULL)
10270
0
      xmlFree(ctxt->version);
10271
4.56k
  ctxt->version = version;
10272
4.56k
    }
10273
10274
    /*
10275
     * We may have the encoding declaration
10276
     */
10277
7.55k
    if (!IS_BLANK_CH(RAW)) {
10278
4.43k
        if ((RAW == '?') && (NXT(1) == '>')) {
10279
1.37k
      SKIP(2);
10280
1.37k
      return;
10281
1.37k
  }
10282
3.05k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10283
3.05k
    }
10284
6.17k
    xmlParseEncodingDecl(ctxt);
10285
10286
    /*
10287
     * We may have the standalone status.
10288
     */
10289
6.17k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10290
2.61k
        if ((RAW == '?') && (NXT(1) == '>')) {
10291
2.23k
      SKIP(2);
10292
2.23k
      return;
10293
2.23k
  }
10294
385
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10295
385
    }
10296
10297
    /*
10298
     * We can grow the input buffer freely at that point
10299
     */
10300
3.94k
    GROW;
10301
10302
3.94k
    SKIP_BLANKS;
10303
3.94k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10304
10305
3.94k
    SKIP_BLANKS;
10306
3.94k
    if ((RAW == '?') && (NXT(1) == '>')) {
10307
825
        SKIP(2);
10308
3.11k
    } else if (RAW == '>') {
10309
        /* Deprecated old WD ... */
10310
113
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10311
113
  NEXT;
10312
3.00k
    } else {
10313
3.00k
        int c;
10314
10315
3.00k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10316
310k
        while ((PARSER_STOPPED(ctxt) == 0) &&
10317
310k
               ((c = CUR) != 0)) {
10318
308k
            NEXT;
10319
308k
            if (c == '>')
10320
1.52k
                break;
10321
308k
        }
10322
3.00k
    }
10323
3.94k
}
10324
10325
/**
10326
 * @since 2.14.0
10327
 *
10328
 * @param ctxt  parser context
10329
 * @returns the version from the XML declaration.
10330
 */
10331
const xmlChar *
10332
0
xmlCtxtGetVersion(xmlParserCtxt *ctxt) {
10333
0
    if (ctxt == NULL)
10334
0
        return(NULL);
10335
10336
0
    return(ctxt->version);
10337
0
}
10338
10339
/**
10340
 * @since 2.14.0
10341
 *
10342
 * @param ctxt  parser context
10343
 * @returns the value from the standalone document declaration.
10344
 */
10345
int
10346
0
xmlCtxtGetStandalone(xmlParserCtxt *ctxt) {
10347
0
    if (ctxt == NULL)
10348
0
        return(0);
10349
10350
0
    return(ctxt->standalone);
10351
0
}
10352
10353
/**
10354
 * Parse an XML Misc* optional field.
10355
 *
10356
 * @deprecated Internal function, don't use.
10357
 *
10358
 *     [27] Misc ::= Comment | PI |  S
10359
 * @param ctxt  an XML parser context
10360
 */
10361
10362
void
10363
112k
xmlParseMisc(xmlParserCtxt *ctxt) {
10364
229k
    while (PARSER_STOPPED(ctxt) == 0) {
10365
214k
        SKIP_BLANKS;
10366
214k
        GROW;
10367
214k
        if ((RAW == '<') && (NXT(1) == '?')) {
10368
9.62k
      xmlParsePI(ctxt);
10369
204k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10370
107k
      xmlParseComment(ctxt);
10371
107k
        } else {
10372
97.5k
            break;
10373
97.5k
        }
10374
214k
    }
10375
112k
}
10376
10377
static void
10378
64.0k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10379
64.0k
    xmlDocPtr doc;
10380
10381
    /*
10382
     * SAX: end of the document processing.
10383
     */
10384
64.0k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10385
64.0k
        ctxt->sax->endDocument(ctxt->userData);
10386
10387
    /*
10388
     * Remove locally kept entity definitions if the tree was not built
10389
     */
10390
64.0k
    doc = ctxt->myDoc;
10391
64.0k
    if ((doc != NULL) &&
10392
64.0k
        (xmlStrEqual(doc->version, SAX_COMPAT_MODE))) {
10393
289
        xmlFreeDoc(doc);
10394
289
        ctxt->myDoc = NULL;
10395
289
    }
10396
64.0k
}
10397
10398
/**
10399
 * Parse an XML document and invoke the SAX handlers. This is useful
10400
 * if you're only interested in custom SAX callbacks. If you want a
10401
 * document tree, use #xmlCtxtParseDocument.
10402
 *
10403
 * @param ctxt  an XML parser context
10404
 * @returns 0, -1 in case of error.
10405
 */
10406
10407
int
10408
50.5k
xmlParseDocument(xmlParserCtxt *ctxt) {
10409
50.5k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10410
0
        return(-1);
10411
10412
50.5k
    GROW;
10413
10414
    /*
10415
     * SAX: detecting the level.
10416
     */
10417
50.5k
    xmlCtxtInitializeLate(ctxt);
10418
10419
50.5k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10420
50.5k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10421
50.5k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10422
50.5k
    }
10423
10424
50.5k
    xmlDetectEncoding(ctxt);
10425
10426
50.5k
    if (CUR == 0) {
10427
454
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10428
454
  return(-1);
10429
454
    }
10430
10431
50.1k
    GROW;
10432
50.1k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10433
10434
  /*
10435
   * Note that we will switch encoding on the fly.
10436
   */
10437
5.07k
  xmlParseXMLDecl(ctxt);
10438
5.07k
  SKIP_BLANKS;
10439
45.0k
    } else {
10440
45.0k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10441
45.0k
        if (ctxt->version == NULL) {
10442
34
            xmlErrMemory(ctxt);
10443
34
            return(-1);
10444
34
        }
10445
45.0k
    }
10446
50.0k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10447
48.3k
        ctxt->sax->startDocument(ctxt->userData);
10448
50.0k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10449
50.0k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10450
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10451
0
    }
10452
10453
    /*
10454
     * The Misc part of the Prolog
10455
     */
10456
50.0k
    xmlParseMisc(ctxt);
10457
10458
    /*
10459
     * Then possibly doc type declaration(s) and more Misc
10460
     * (doctypedecl Misc*)?
10461
     */
10462
50.0k
    GROW;
10463
50.0k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10464
10465
29.9k
  ctxt->inSubset = 1;
10466
29.9k
  xmlParseDocTypeDecl(ctxt);
10467
29.9k
  if (RAW == '[') {
10468
23.8k
      xmlParseInternalSubset(ctxt);
10469
23.8k
  } else if (RAW == '>') {
10470
4.08k
            NEXT;
10471
4.08k
        }
10472
10473
  /*
10474
   * Create and update the external subset.
10475
   */
10476
29.9k
  ctxt->inSubset = 2;
10477
29.9k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10478
29.9k
      (!ctxt->disableSAX))
10479
23.6k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10480
23.6k
                                ctxt->extSubSystem, ctxt->extSubURI);
10481
29.9k
  ctxt->inSubset = 0;
10482
10483
29.9k
        xmlCleanSpecialAttr(ctxt);
10484
10485
29.9k
  xmlParseMisc(ctxt);
10486
29.9k
    }
10487
10488
    /*
10489
     * Time to start parsing the tree itself
10490
     */
10491
50.0k
    GROW;
10492
50.0k
    if (RAW != '<') {
10493
17.2k
        if (ctxt->wellFormed)
10494
2.51k
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10495
2.51k
                           "Start tag expected, '<' not found\n");
10496
32.8k
    } else {
10497
32.8k
  xmlParseElement(ctxt);
10498
10499
  /*
10500
   * The Misc part at the end
10501
   */
10502
32.8k
  xmlParseMisc(ctxt);
10503
10504
32.8k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10505
32.8k
    }
10506
10507
50.0k
    ctxt->instate = XML_PARSER_EOF;
10508
50.0k
    xmlFinishDocument(ctxt);
10509
10510
50.0k
    if (! ctxt->wellFormed) {
10511
49.5k
  ctxt->valid = 0;
10512
49.5k
  return(-1);
10513
49.5k
    }
10514
10515
526
    return(0);
10516
50.0k
}
10517
10518
/**
10519
 * Parse a general parsed entity
10520
 * An external general parsed entity is well-formed if it matches the
10521
 * production labeled extParsedEnt.
10522
 *
10523
 * @deprecated Internal function, don't use.
10524
 *
10525
 *     [78] extParsedEnt ::= TextDecl? content
10526
 *
10527
 * @param ctxt  an XML parser context
10528
 * @returns 0, -1 in case of error. the parser context is augmented
10529
 *                as a result of the parsing.
10530
 */
10531
10532
int
10533
0
xmlParseExtParsedEnt(xmlParserCtxt *ctxt) {
10534
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10535
0
        return(-1);
10536
10537
0
    xmlCtxtInitializeLate(ctxt);
10538
10539
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10540
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10541
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10542
0
    }
10543
10544
0
    xmlDetectEncoding(ctxt);
10545
10546
0
    if (CUR == 0) {
10547
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10548
0
    }
10549
10550
    /*
10551
     * Check for the XMLDecl in the Prolog.
10552
     */
10553
0
    GROW;
10554
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10555
10556
  /*
10557
   * Note that we will switch encoding on the fly.
10558
   */
10559
0
  xmlParseXMLDecl(ctxt);
10560
0
  SKIP_BLANKS;
10561
0
    } else {
10562
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10563
0
    }
10564
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10565
0
        ctxt->sax->startDocument(ctxt->userData);
10566
10567
    /*
10568
     * Doing validity checking on chunk doesn't make sense
10569
     */
10570
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10571
0
    ctxt->validate = 0;
10572
0
    ctxt->depth = 0;
10573
10574
0
    xmlParseContentInternal(ctxt);
10575
10576
0
    if (ctxt->input->cur < ctxt->input->end)
10577
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10578
10579
    /*
10580
     * SAX: end of the document processing.
10581
     */
10582
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10583
0
        ctxt->sax->endDocument(ctxt->userData);
10584
10585
0
    if (! ctxt->wellFormed) return(-1);
10586
0
    return(0);
10587
0
}
10588
10589
#ifdef LIBXML_PUSH_ENABLED
10590
/************************************************************************
10591
 *                  *
10592
 *    Progressive parsing interfaces        *
10593
 *                  *
10594
 ************************************************************************/
10595
10596
/**
10597
 * Check whether the input buffer contains a character.
10598
 *
10599
 * @param ctxt  an XML parser context
10600
 * @param c  character
10601
 */
10602
static int
10603
57.7k
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10604
57.7k
    const xmlChar *cur;
10605
10606
57.7k
    if (ctxt->checkIndex == 0) {
10607
26.1k
        cur = ctxt->input->cur + 1;
10608
31.6k
    } else {
10609
31.6k
        cur = ctxt->input->cur + ctxt->checkIndex;
10610
31.6k
    }
10611
10612
57.7k
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10613
32.3k
        size_t index = ctxt->input->end - ctxt->input->cur;
10614
10615
32.3k
        if (index > LONG_MAX) {
10616
0
            ctxt->checkIndex = 0;
10617
0
            return(1);
10618
0
        }
10619
32.3k
        ctxt->checkIndex = index;
10620
32.3k
        return(0);
10621
32.3k
    } else {
10622
25.4k
        ctxt->checkIndex = 0;
10623
25.4k
        return(1);
10624
25.4k
    }
10625
57.7k
}
10626
10627
/**
10628
 * Check whether the input buffer contains a string.
10629
 *
10630
 * @param ctxt  an XML parser context
10631
 * @param startDelta  delta to apply at the start
10632
 * @param str  string
10633
 * @param strLen  length of string
10634
 */
10635
static const xmlChar *
10636
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10637
239k
                     const char *str, size_t strLen) {
10638
239k
    const xmlChar *cur, *term;
10639
10640
239k
    if (ctxt->checkIndex == 0) {
10641
110k
        cur = ctxt->input->cur + startDelta;
10642
129k
    } else {
10643
129k
        cur = ctxt->input->cur + ctxt->checkIndex;
10644
129k
    }
10645
10646
239k
    term = BAD_CAST strstr((const char *) cur, str);
10647
239k
    if (term == NULL) {
10648
131k
        const xmlChar *end = ctxt->input->end;
10649
131k
        size_t index;
10650
10651
        /* Rescan (strLen - 1) characters. */
10652
131k
        if ((size_t) (end - cur) < strLen)
10653
2.56k
            end = cur;
10654
128k
        else
10655
128k
            end -= strLen - 1;
10656
131k
        index = end - ctxt->input->cur;
10657
131k
        if (index > LONG_MAX) {
10658
0
            ctxt->checkIndex = 0;
10659
0
            return(ctxt->input->end - strLen);
10660
0
        }
10661
131k
        ctxt->checkIndex = index;
10662
131k
    } else {
10663
108k
        ctxt->checkIndex = 0;
10664
108k
    }
10665
10666
239k
    return(term);
10667
239k
}
10668
10669
/**
10670
 * Check whether the input buffer contains terminated char data.
10671
 *
10672
 * @param ctxt  an XML parser context
10673
 */
10674
static int
10675
53.4k
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10676
53.4k
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10677
53.4k
    const xmlChar *end = ctxt->input->end;
10678
53.4k
    size_t index;
10679
10680
1.87M
    while (cur < end) {
10681
1.86M
        if ((*cur == '<') || (*cur == '&')) {
10682
41.3k
            ctxt->checkIndex = 0;
10683
41.3k
            return(1);
10684
41.3k
        }
10685
1.82M
        cur++;
10686
1.82M
    }
10687
10688
12.1k
    index = cur - ctxt->input->cur;
10689
12.1k
    if (index > LONG_MAX) {
10690
0
        ctxt->checkIndex = 0;
10691
0
        return(1);
10692
0
    }
10693
12.1k
    ctxt->checkIndex = index;
10694
12.1k
    return(0);
10695
12.1k
}
10696
10697
/**
10698
 * Check whether there's enough data in the input buffer to finish parsing
10699
 * a start tag. This has to take quotes into account.
10700
 *
10701
 * @param ctxt  an XML parser context
10702
 */
10703
static int
10704
920k
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10705
920k
    const xmlChar *cur;
10706
920k
    const xmlChar *end = ctxt->input->end;
10707
920k
    int state = ctxt->endCheckState;
10708
920k
    size_t index;
10709
10710
920k
    if (ctxt->checkIndex == 0)
10711
124k
        cur = ctxt->input->cur + 1;
10712
796k
    else
10713
796k
        cur = ctxt->input->cur + ctxt->checkIndex;
10714
10715
119M
    while (cur < end) {
10716
118M
        if (state) {
10717
100M
            if (*cur == state)
10718
102k
                state = 0;
10719
100M
        } else if (*cur == '\'' || *cur == '"') {
10720
105k
            state = *cur;
10721
17.5M
        } else if (*cur == '>') {
10722
114k
            ctxt->checkIndex = 0;
10723
114k
            ctxt->endCheckState = 0;
10724
114k
            return(1);
10725
114k
        }
10726
118M
        cur++;
10727
118M
    }
10728
10729
805k
    index = cur - ctxt->input->cur;
10730
805k
    if (index > LONG_MAX) {
10731
0
        ctxt->checkIndex = 0;
10732
0
        ctxt->endCheckState = 0;
10733
0
        return(1);
10734
0
    }
10735
805k
    ctxt->checkIndex = index;
10736
805k
    ctxt->endCheckState = state;
10737
805k
    return(0);
10738
805k
}
10739
10740
/**
10741
 * Check whether there's enough data in the input buffer to finish parsing
10742
 * the internal subset.
10743
 *
10744
 * @param ctxt  an XML parser context
10745
 */
10746
static int
10747
67.5k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10748
    /*
10749
     * Sorry, but progressive parsing of the internal subset is not
10750
     * supported. We first check that the full content of the internal
10751
     * subset is available and parsing is launched only at that point.
10752
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10753
     * not in a ']]>' sequence which are conditional sections.
10754
     */
10755
67.5k
    const xmlChar *cur, *start;
10756
67.5k
    const xmlChar *end = ctxt->input->end;
10757
67.5k
    int state = ctxt->endCheckState;
10758
67.5k
    size_t index;
10759
10760
67.5k
    if (ctxt->checkIndex == 0) {
10761
9.78k
        cur = ctxt->input->cur + 1;
10762
57.7k
    } else {
10763
57.7k
        cur = ctxt->input->cur + ctxt->checkIndex;
10764
57.7k
    }
10765
67.5k
    start = cur;
10766
10767
39.6M
    while (cur < end) {
10768
39.5M
        if (state == '-') {
10769
44.0k
            if ((*cur == '-') &&
10770
44.0k
                (cur[1] == '-') &&
10771
44.0k
                (cur[2] == '>')) {
10772
1.20k
                state = 0;
10773
1.20k
                cur += 3;
10774
1.20k
                start = cur;
10775
1.20k
                continue;
10776
1.20k
            }
10777
44.0k
        }
10778
39.5M
        else if (state == ']') {
10779
9.12k
            if (*cur == '>') {
10780
6.54k
                ctxt->checkIndex = 0;
10781
6.54k
                ctxt->endCheckState = 0;
10782
6.54k
                return(1);
10783
6.54k
            }
10784
2.58k
            if (IS_BLANK_CH(*cur)) {
10785
1.22k
                state = ' ';
10786
1.36k
            } else if (*cur != ']') {
10787
678
                state = 0;
10788
678
                start = cur;
10789
678
                continue;
10790
678
            }
10791
2.58k
        }
10792
39.4M
        else if (state == ' ') {
10793
2.05k
            if (*cur == '>') {
10794
212
                ctxt->checkIndex = 0;
10795
212
                ctxt->endCheckState = 0;
10796
212
                return(1);
10797
212
            }
10798
1.83k
            if (!IS_BLANK_CH(*cur)) {
10799
1.00k
                state = 0;
10800
1.00k
                start = cur;
10801
1.00k
                continue;
10802
1.00k
            }
10803
1.83k
        }
10804
39.4M
        else if (state != 0) {
10805
28.1M
            if (*cur == state) {
10806
33.0k
                state = 0;
10807
33.0k
                start = cur + 1;
10808
33.0k
            }
10809
28.1M
        }
10810
11.3M
        else if (*cur == '<') {
10811
38.0k
            if ((cur[1] == '!') &&
10812
38.0k
                (cur[2] == '-') &&
10813
38.0k
                (cur[3] == '-')) {
10814
1.25k
                state = '-';
10815
1.25k
                cur += 4;
10816
                /* Don't treat <!--> as comment */
10817
1.25k
                start = cur;
10818
1.25k
                continue;
10819
1.25k
            }
10820
38.0k
        }
10821
11.3M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10822
41.7k
            state = *cur;
10823
41.7k
        }
10824
10825
39.5M
        cur++;
10826
39.5M
    }
10827
10828
    /*
10829
     * Rescan the three last characters to detect "<!--" and "-->"
10830
     * split across chunks.
10831
     */
10832
60.7k
    if ((state == 0) || (state == '-')) {
10833
9.32k
        if (cur - start < 3)
10834
746
            cur = start;
10835
8.57k
        else
10836
8.57k
            cur -= 3;
10837
9.32k
    }
10838
60.7k
    index = cur - ctxt->input->cur;
10839
60.7k
    if (index > LONG_MAX) {
10840
0
        ctxt->checkIndex = 0;
10841
0
        ctxt->endCheckState = 0;
10842
0
        return(1);
10843
0
    }
10844
60.7k
    ctxt->checkIndex = index;
10845
60.7k
    ctxt->endCheckState = state;
10846
60.7k
    return(0);
10847
60.7k
}
10848
10849
/**
10850
 * Try to progress on parsing
10851
 *
10852
 * @param ctxt  an XML parser context
10853
 * @param terminate  last chunk indicator
10854
 * @returns zero if no parsing was possible
10855
 */
10856
static int
10857
1.37M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10858
1.37M
    int ret = 0;
10859
1.37M
    size_t avail;
10860
1.37M
    xmlChar cur, next;
10861
10862
1.37M
    if (ctxt->input == NULL)
10863
0
        return(0);
10864
10865
1.37M
    if ((ctxt->input != NULL) &&
10866
1.37M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
10867
1.30k
        xmlParserShrink(ctxt);
10868
1.30k
    }
10869
10870
2.24M
    while (ctxt->disableSAX == 0) {
10871
2.23M
        avail = ctxt->input->end - ctxt->input->cur;
10872
2.23M
        if (avail < 1)
10873
15.5k
      goto done;
10874
2.21M
        switch (ctxt->instate) {
10875
291k
            case XML_PARSER_EOF:
10876
          /*
10877
     * Document parsing is done !
10878
     */
10879
291k
          goto done;
10880
25.9k
            case XML_PARSER_START:
10881
                /*
10882
                 * Very first chars read from the document flow.
10883
                 */
10884
25.9k
                if ((!terminate) && (avail < 4))
10885
647
                    goto done;
10886
10887
                /*
10888
                 * We need more bytes to detect EBCDIC code pages.
10889
                 * See xmlDetectEBCDIC.
10890
                 */
10891
25.3k
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
10892
25.3k
                    (!terminate) && (avail < 200))
10893
343
                    goto done;
10894
10895
24.9k
                xmlDetectEncoding(ctxt);
10896
24.9k
                ctxt->instate = XML_PARSER_XML_DECL;
10897
24.9k
    break;
10898
10899
71.1k
            case XML_PARSER_XML_DECL:
10900
71.1k
    if ((!terminate) && (avail < 2))
10901
20
        goto done;
10902
71.1k
    cur = ctxt->input->cur[0];
10903
71.1k
    next = ctxt->input->cur[1];
10904
71.1k
          if ((cur == '<') && (next == '?')) {
10905
        /* PI or XML decl */
10906
50.2k
        if ((!terminate) &&
10907
50.2k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
10908
46.2k
      goto done;
10909
3.98k
        if ((ctxt->input->cur[2] == 'x') &&
10910
3.98k
      (ctxt->input->cur[3] == 'm') &&
10911
3.98k
      (ctxt->input->cur[4] == 'l') &&
10912
3.98k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
10913
2.47k
      ret += 5;
10914
2.47k
      xmlParseXMLDecl(ctxt);
10915
2.47k
        } else {
10916
1.50k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10917
1.50k
                        if (ctxt->version == NULL) {
10918
3
                            xmlErrMemory(ctxt);
10919
3
                            break;
10920
3
                        }
10921
1.50k
        }
10922
20.9k
    } else {
10923
20.9k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10924
20.9k
        if (ctxt->version == NULL) {
10925
36
            xmlErrMemory(ctxt);
10926
36
      break;
10927
36
        }
10928
20.9k
    }
10929
24.8k
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10930
24.8k
                    ctxt->sax->setDocumentLocator(ctxt->userData,
10931
24.8k
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
10932
24.8k
                }
10933
24.8k
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10934
24.8k
                    (!ctxt->disableSAX))
10935
24.2k
                    ctxt->sax->startDocument(ctxt->userData);
10936
24.8k
                ctxt->instate = XML_PARSER_MISC;
10937
24.8k
    break;
10938
638k
            case XML_PARSER_START_TAG: {
10939
638k
          const xmlChar *name;
10940
638k
    const xmlChar *prefix = NULL;
10941
638k
    const xmlChar *URI = NULL;
10942
638k
                int line = ctxt->input->line;
10943
638k
    int nbNs = 0;
10944
10945
638k
    if ((!terminate) && (avail < 2))
10946
181
        goto done;
10947
637k
    cur = ctxt->input->cur[0];
10948
637k
          if (cur != '<') {
10949
2.60k
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10950
2.60k
                                   "Start tag expected, '<' not found");
10951
2.60k
                    ctxt->instate = XML_PARSER_EOF;
10952
2.60k
                    xmlFinishDocument(ctxt);
10953
2.60k
        goto done;
10954
2.60k
    }
10955
635k
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
10956
506k
                    goto done;
10957
128k
    if (ctxt->spaceNr == 0)
10958
0
        spacePush(ctxt, -1);
10959
128k
    else if (*ctxt->space == -2)
10960
16.7k
        spacePush(ctxt, -1);
10961
112k
    else
10962
112k
        spacePush(ctxt, *ctxt->space);
10963
128k
#ifdef LIBXML_SAX1_ENABLED
10964
128k
    if (ctxt->sax2)
10965
87.2k
#endif /* LIBXML_SAX1_ENABLED */
10966
87.2k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10967
41.6k
#ifdef LIBXML_SAX1_ENABLED
10968
41.6k
    else
10969
41.6k
        name = xmlParseStartTag(ctxt);
10970
128k
#endif /* LIBXML_SAX1_ENABLED */
10971
128k
    if (name == NULL) {
10972
3.40k
        spacePop(ctxt);
10973
3.40k
                    ctxt->instate = XML_PARSER_EOF;
10974
3.40k
                    xmlFinishDocument(ctxt);
10975
3.40k
        goto done;
10976
3.40k
    }
10977
125k
#ifdef LIBXML_VALID_ENABLED
10978
    /*
10979
     * [ VC: Root Element Type ]
10980
     * The Name in the document type declaration must match
10981
     * the element type of the root element.
10982
     */
10983
125k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10984
125k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10985
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10986
125k
#endif /* LIBXML_VALID_ENABLED */
10987
10988
    /*
10989
     * Check for an Empty Element.
10990
     */
10991
125k
    if ((RAW == '/') && (NXT(1) == '>')) {
10992
6.78k
        SKIP(2);
10993
10994
6.78k
        if (ctxt->sax2) {
10995
5.13k
      if ((ctxt->sax != NULL) &&
10996
5.13k
          (ctxt->sax->endElementNs != NULL) &&
10997
5.13k
          (!ctxt->disableSAX))
10998
5.08k
          ctxt->sax->endElementNs(ctxt->userData, name,
10999
5.08k
                                  prefix, URI);
11000
5.13k
      if (nbNs > 0)
11001
2.69k
          xmlParserNsPop(ctxt, nbNs);
11002
5.13k
#ifdef LIBXML_SAX1_ENABLED
11003
5.13k
        } else {
11004
1.65k
      if ((ctxt->sax != NULL) &&
11005
1.65k
          (ctxt->sax->endElement != NULL) &&
11006
1.65k
          (!ctxt->disableSAX))
11007
1.62k
          ctxt->sax->endElement(ctxt->userData, name);
11008
1.65k
#endif /* LIBXML_SAX1_ENABLED */
11009
1.65k
        }
11010
6.78k
        spacePop(ctxt);
11011
118k
    } else if (RAW == '>') {
11012
86.7k
        NEXT;
11013
86.7k
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11014
86.7k
    } else {
11015
32.0k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11016
32.0k
           "Couldn't find end of Start Tag %s\n",
11017
32.0k
           name);
11018
32.0k
        nodePop(ctxt);
11019
32.0k
        spacePop(ctxt);
11020
32.0k
                    if (nbNs > 0)
11021
4.14k
                        xmlParserNsPop(ctxt, nbNs);
11022
32.0k
    }
11023
11024
125k
                if (ctxt->nameNr == 0)
11025
4.70k
                    ctxt->instate = XML_PARSER_EPILOG;
11026
120k
                else
11027
120k
                    ctxt->instate = XML_PARSER_CONTENT;
11028
125k
                break;
11029
128k
      }
11030
686k
            case XML_PARSER_CONTENT: {
11031
686k
    cur = ctxt->input->cur[0];
11032
11033
686k
    if (cur == '<') {
11034
259k
                    if ((!terminate) && (avail < 2))
11035
1.14k
                        goto done;
11036
258k
        next = ctxt->input->cur[1];
11037
11038
258k
                    if (next == '/') {
11039
18.8k
                        ctxt->instate = XML_PARSER_END_TAG;
11040
18.8k
                        break;
11041
239k
                    } else if (next == '?') {
11042
16.0k
                        if ((!terminate) &&
11043
16.0k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11044
6.40k
                            goto done;
11045
9.62k
                        xmlParsePI(ctxt);
11046
9.62k
                        ctxt->instate = XML_PARSER_CONTENT;
11047
9.62k
                        break;
11048
223k
                    } else if (next == '!') {
11049
110k
                        if ((!terminate) && (avail < 3))
11050
405
                            goto done;
11051
110k
                        next = ctxt->input->cur[2];
11052
11053
110k
                        if (next == '-') {
11054
51.1k
                            if ((!terminate) && (avail < 4))
11055
362
                                goto done;
11056
50.8k
                            if (ctxt->input->cur[3] == '-') {
11057
50.8k
                                if ((!terminate) &&
11058
50.8k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11059
14.0k
                                    goto done;
11060
36.7k
                                xmlParseComment(ctxt);
11061
36.7k
                                ctxt->instate = XML_PARSER_CONTENT;
11062
36.7k
                                break;
11063
50.8k
                            }
11064
59.1k
                        } else if (next == '[') {
11065
58.4k
                            if ((!terminate) && (avail < 9))
11066
233
                                goto done;
11067
58.2k
                            if ((ctxt->input->cur[2] == '[') &&
11068
58.2k
                                (ctxt->input->cur[3] == 'C') &&
11069
58.2k
                                (ctxt->input->cur[4] == 'D') &&
11070
58.2k
                                (ctxt->input->cur[5] == 'A') &&
11071
58.2k
                                (ctxt->input->cur[6] == 'T') &&
11072
58.2k
                                (ctxt->input->cur[7] == 'A') &&
11073
58.2k
                                (ctxt->input->cur[8] == '[')) {
11074
58.0k
                                if ((!terminate) &&
11075
58.0k
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11076
52.9k
                                    goto done;
11077
5.13k
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11078
5.13k
                                xmlParseCDSect(ctxt);
11079
5.13k
                                ctxt->instate = XML_PARSER_CONTENT;
11080
5.13k
                                break;
11081
58.0k
                            }
11082
58.2k
                        }
11083
110k
                    }
11084
426k
    } else if (cur == '&') {
11085
38.4k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11086
25.9k
      goto done;
11087
12.5k
        xmlParseReference(ctxt);
11088
12.5k
                    break;
11089
387k
    } else {
11090
        /* TODO Avoid the extra copy, handle directly !!! */
11091
        /*
11092
         * Goal of the following test is:
11093
         *  - minimize calls to the SAX 'character' callback
11094
         *    when they are mergeable
11095
         *  - handle an problem for isBlank when we only parse
11096
         *    a sequence of blank chars and the next one is
11097
         *    not available to check against '<' presence.
11098
         *  - tries to homogenize the differences in SAX
11099
         *    callbacks between the push and pull versions
11100
         *    of the parser.
11101
         */
11102
387k
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11103
65.2k
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11104
12.1k
          goto done;
11105
65.2k
                    }
11106
375k
                    ctxt->checkIndex = 0;
11107
375k
        xmlParseCharDataInternal(ctxt, !terminate);
11108
375k
                    break;
11109
387k
    }
11110
11111
113k
                ctxt->instate = XML_PARSER_START_TAG;
11112
113k
    break;
11113
686k
      }
11114
25.3k
            case XML_PARSER_END_TAG:
11115
25.3k
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11116
6.42k
        goto done;
11117
18.8k
    if (ctxt->sax2) {
11118
15.7k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11119
15.7k
        nameNsPop(ctxt);
11120
15.7k
    }
11121
3.19k
#ifdef LIBXML_SAX1_ENABLED
11122
3.19k
      else
11123
3.19k
        xmlParseEndTag1(ctxt, 0);
11124
18.8k
#endif /* LIBXML_SAX1_ENABLED */
11125
18.8k
    if (ctxt->nameNr == 0) {
11126
459
        ctxt->instate = XML_PARSER_EPILOG;
11127
18.4k
    } else {
11128
18.4k
        ctxt->instate = XML_PARSER_CONTENT;
11129
18.4k
    }
11130
18.8k
    break;
11131
390k
            case XML_PARSER_MISC:
11132
402k
            case XML_PARSER_PROLOG:
11133
404k
            case XML_PARSER_EPILOG:
11134
404k
    SKIP_BLANKS;
11135
404k
                avail = ctxt->input->end - ctxt->input->cur;
11136
404k
    if (avail < 1)
11137
527
        goto done;
11138
404k
    if (ctxt->input->cur[0] == '<') {
11139
400k
                    if ((!terminate) && (avail < 2))
11140
602
                        goto done;
11141
400k
                    next = ctxt->input->cur[1];
11142
400k
                    if (next == '?') {
11143
8.12k
                        if ((!terminate) &&
11144
8.12k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11145
3.30k
                            goto done;
11146
4.81k
                        xmlParsePI(ctxt);
11147
4.81k
                        break;
11148
392k
                    } else if (next == '!') {
11149
376k
                        if ((!terminate) && (avail < 3))
11150
473
                            goto done;
11151
11152
376k
                        if (ctxt->input->cur[2] == '-') {
11153
62.0k
                            if ((!terminate) && (avail < 4))
11154
402
                                goto done;
11155
61.6k
                            if (ctxt->input->cur[3] == '-') {
11156
61.6k
                                if ((!terminate) &&
11157
61.6k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11158
8.20k
                                    goto done;
11159
53.4k
                                xmlParseComment(ctxt);
11160
53.4k
                                break;
11161
61.6k
                            }
11162
314k
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11163
314k
                            if ((!terminate) && (avail < 9))
11164
30
                                goto done;
11165
314k
                            if ((ctxt->input->cur[2] == 'D') &&
11166
314k
                                (ctxt->input->cur[3] == 'O') &&
11167
314k
                                (ctxt->input->cur[4] == 'C') &&
11168
314k
                                (ctxt->input->cur[5] == 'T') &&
11169
314k
                                (ctxt->input->cur[6] == 'Y') &&
11170
314k
                                (ctxt->input->cur[7] == 'P') &&
11171
314k
                                (ctxt->input->cur[8] == 'E')) {
11172
314k
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11173
299k
                                    goto done;
11174
14.7k
                                ctxt->inSubset = 1;
11175
14.7k
                                xmlParseDocTypeDecl(ctxt);
11176
14.7k
                                if (RAW == '[') {
11177
11.7k
                                    ctxt->instate = XML_PARSER_DTD;
11178
11.7k
                                } else {
11179
3.04k
                                    if (RAW == '>')
11180
2.04k
                                        NEXT;
11181
                                    /*
11182
                                     * Create and update the external subset.
11183
                                     */
11184
3.04k
                                    ctxt->inSubset = 2;
11185
3.04k
                                    if ((ctxt->sax != NULL) &&
11186
3.04k
                                        (!ctxt->disableSAX) &&
11187
3.04k
                                        (ctxt->sax->externalSubset != NULL))
11188
2.90k
                                        ctxt->sax->externalSubset(
11189
2.90k
                                                ctxt->userData,
11190
2.90k
                                                ctxt->intSubName,
11191
2.90k
                                                ctxt->extSubSystem,
11192
2.90k
                                                ctxt->extSubURI);
11193
3.04k
                                    ctxt->inSubset = 0;
11194
3.04k
                                    xmlCleanSpecialAttr(ctxt);
11195
3.04k
                                    ctxt->instate = XML_PARSER_PROLOG;
11196
3.04k
                                }
11197
14.7k
                                break;
11198
314k
                            }
11199
314k
                        }
11200
376k
                    }
11201
400k
                }
11202
11203
18.7k
                if (ctxt->instate == XML_PARSER_EPILOG) {
11204
892
                    if (ctxt->errNo == XML_ERR_OK)
11205
21
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11206
892
        ctxt->instate = XML_PARSER_EOF;
11207
892
                    xmlFinishDocument(ctxt);
11208
17.8k
                } else {
11209
17.8k
        ctxt->instate = XML_PARSER_START_TAG;
11210
17.8k
    }
11211
18.7k
    break;
11212
71.9k
            case XML_PARSER_DTD: {
11213
71.9k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11214
60.7k
                    goto done;
11215
11.1k
    xmlParseInternalSubset(ctxt);
11216
11.1k
    ctxt->inSubset = 2;
11217
11.1k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11218
11.1k
        (ctxt->sax->externalSubset != NULL))
11219
8.88k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11220
8.88k
          ctxt->extSubSystem, ctxt->extSubURI);
11221
11.1k
    ctxt->inSubset = 0;
11222
11.1k
    xmlCleanSpecialAttr(ctxt);
11223
11.1k
    ctxt->instate = XML_PARSER_PROLOG;
11224
11.1k
                break;
11225
71.9k
      }
11226
0
            default:
11227
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11228
0
      "PP: internal error\n");
11229
0
    ctxt->instate = XML_PARSER_EOF;
11230
0
    break;
11231
2.21M
  }
11232
2.21M
    }
11233
1.37M
done:
11234
1.37M
    return(ret);
11235
1.37M
}
11236
11237
/**
11238
 * Parse a chunk of memory in push parser mode.
11239
 *
11240
 * Assumes that the parser context was initialized with
11241
 * #xmlCreatePushParserCtxt.
11242
 *
11243
 * The last chunk, which will often be empty, must be marked with
11244
 * the `terminate` flag. With the default SAX callbacks, the resulting
11245
 * document will be available in ctxt->myDoc. This pointer will not
11246
 * be freed when calling #xmlFreeParserCtxt and must be freed by the
11247
 * caller. If the document isn't well-formed, it will still be returned
11248
 * in ctxt->myDoc.
11249
 *
11250
 * As an exception, #xmlCtxtResetPush will free the document in
11251
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11252
 * the document.
11253
 *
11254
 * Since 2.14.0, #xmlCtxtGetDocument can be used to retrieve the
11255
 * result document.
11256
 *
11257
 * @param ctxt  an XML parser context
11258
 * @param chunk  chunk of memory
11259
 * @param size  size of chunk in bytes
11260
 * @param terminate  last chunk indicator
11261
 * @returns an xmlParserErrors code (0 on success).
11262
 */
11263
int
11264
xmlParseChunk(xmlParserCtxt *ctxt, const char *chunk, int size,
11265
1.63M
              int terminate) {
11266
1.63M
    size_t curBase;
11267
1.63M
    size_t maxLength;
11268
1.63M
    size_t pos;
11269
1.63M
    int end_in_lf = 0;
11270
1.63M
    int res;
11271
11272
1.63M
    if ((ctxt == NULL) || (size < 0))
11273
0
        return(XML_ERR_ARGUMENT);
11274
1.63M
    if ((chunk == NULL) && (size > 0))
11275
0
        return(XML_ERR_ARGUMENT);
11276
1.63M
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11277
0
        return(XML_ERR_ARGUMENT);
11278
1.63M
    if (ctxt->disableSAX != 0)
11279
263k
        return(ctxt->errNo);
11280
11281
1.37M
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11282
1.37M
    if (ctxt->instate == XML_PARSER_START)
11283
26.2k
        xmlCtxtInitializeLate(ctxt);
11284
1.37M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11285
1.37M
        (chunk[size - 1] == '\r')) {
11286
800
  end_in_lf = 1;
11287
800
  size--;
11288
800
    }
11289
11290
    /*
11291
     * Also push an empty chunk to make sure that the raw buffer
11292
     * will be flushed if there is an encoder.
11293
     */
11294
1.37M
    pos = ctxt->input->cur - ctxt->input->base;
11295
1.37M
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11296
1.37M
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11297
1.37M
    if (res < 0) {
11298
33
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11299
33
        xmlHaltParser(ctxt);
11300
33
        return(ctxt->errNo);
11301
33
    }
11302
11303
1.37M
    xmlParseTryOrFinish(ctxt, terminate);
11304
11305
1.37M
    curBase = ctxt->input->cur - ctxt->input->base;
11306
1.37M
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11307
705k
                XML_MAX_HUGE_LENGTH :
11308
1.37M
                XML_MAX_LOOKUP_LIMIT;
11309
1.37M
    if (curBase > maxLength) {
11310
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11311
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11312
0
        xmlHaltParser(ctxt);
11313
0
    }
11314
11315
1.37M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11316
11.9k
        return(ctxt->errNo);
11317
11318
1.35M
    if (end_in_lf == 1) {
11319
784
  pos = ctxt->input->cur - ctxt->input->base;
11320
784
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11321
784
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11322
784
        if (res < 0) {
11323
4
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11324
4
            xmlHaltParser(ctxt);
11325
4
            return(ctxt->errNo);
11326
4
        }
11327
784
    }
11328
1.35M
    if (terminate) {
11329
  /*
11330
   * Check for termination
11331
   */
11332
13.3k
        if ((ctxt->instate != XML_PARSER_EOF) &&
11333
13.3k
            (ctxt->instate != XML_PARSER_EPILOG)) {
11334
5.69k
            if (ctxt->nameNr > 0) {
11335
3.54k
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11336
3.54k
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11337
3.54k
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11338
3.54k
                        "Premature end of data in tag %s line %d\n",
11339
3.54k
                        name, line, NULL);
11340
3.54k
            } else if (ctxt->instate == XML_PARSER_START) {
11341
124
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11342
2.02k
            } else {
11343
2.02k
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11344
2.02k
                               "Start tag expected, '<' not found\n");
11345
2.02k
            }
11346
7.60k
        } else {
11347
7.60k
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11348
7.60k
        }
11349
13.3k
  if (ctxt->instate != XML_PARSER_EOF) {
11350
7.08k
            ctxt->instate = XML_PARSER_EOF;
11351
7.08k
            xmlFinishDocument(ctxt);
11352
7.08k
  }
11353
13.3k
    }
11354
1.35M
    if (ctxt->wellFormed == 0)
11355
919k
  return((xmlParserErrors) ctxt->errNo);
11356
440k
    else
11357
440k
        return(0);
11358
1.35M
}
11359
11360
/************************************************************************
11361
 *                  *
11362
 *    I/O front end functions to the parser     *
11363
 *                  *
11364
 ************************************************************************/
11365
11366
/**
11367
 * Create a parser context for using the XML parser in push mode.
11368
 * See #xmlParseChunk.
11369
 *
11370
 * Passing an initial chunk is useless and deprecated.
11371
 *
11372
 * The push parser doesn't support recovery mode or the
11373
 * XML_PARSE_NOBLANKS option.
11374
 *
11375
 * `filename` is used as base URI to fetch external entities and for
11376
 * error reports.
11377
 *
11378
 * @param sax  a SAX handler (optional)
11379
 * @param user_data  user data for SAX callbacks (optional)
11380
 * @param chunk  initial chunk (optional, deprecated)
11381
 * @param size  size of initial chunk in bytes
11382
 * @param filename  file name or URI (optional)
11383
 * @returns the new parser context or NULL if a memory allocation
11384
 * failed.
11385
 */
11386
11387
xmlParserCtxt *
11388
xmlCreatePushParserCtxt(xmlSAXHandler *sax, void *user_data,
11389
25.3k
                        const char *chunk, int size, const char *filename) {
11390
25.3k
    xmlParserCtxtPtr ctxt;
11391
25.3k
    xmlParserInputPtr input;
11392
11393
25.3k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11394
25.3k
    if (ctxt == NULL)
11395
18
  return(NULL);
11396
11397
25.3k
    ctxt->options &= ~XML_PARSE_NODICT;
11398
25.3k
    ctxt->dictNames = 1;
11399
11400
25.3k
    input = xmlNewPushInput(filename, chunk, size);
11401
25.3k
    if (input == NULL) {
11402
12
  xmlFreeParserCtxt(ctxt);
11403
12
  return(NULL);
11404
12
    }
11405
25.2k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11406
7
        xmlFreeInputStream(input);
11407
7
        xmlFreeParserCtxt(ctxt);
11408
7
        return(NULL);
11409
7
    }
11410
11411
25.2k
    return(ctxt);
11412
25.2k
}
11413
#endif /* LIBXML_PUSH_ENABLED */
11414
11415
/**
11416
 * Blocks further parser processing
11417
 *
11418
 * @param ctxt  an XML parser context
11419
 */
11420
void
11421
0
xmlStopParser(xmlParserCtxt *ctxt) {
11422
0
    if (ctxt == NULL)
11423
0
        return;
11424
0
    xmlHaltParser(ctxt);
11425
    /*
11426
     * TODO: Update ctxt->lastError and ctxt->wellFormed?
11427
     */
11428
0
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11429
0
        ctxt->errNo = XML_ERR_USER_STOP;
11430
0
}
11431
11432
/**
11433
 * Create a parser context for using the XML parser with an existing
11434
 * I/O stream
11435
 *
11436
 * @param sax  a SAX handler (optional)
11437
 * @param user_data  user data for SAX callbacks (optional)
11438
 * @param ioread  an I/O read function
11439
 * @param ioclose  an I/O close function (optional)
11440
 * @param ioctx  an I/O handler
11441
 * @param enc  the charset encoding if known (deprecated)
11442
 * @returns the new parser context or NULL
11443
 */
11444
xmlParserCtxt *
11445
xmlCreateIOParserCtxt(xmlSAXHandler *sax, void *user_data,
11446
                      xmlInputReadCallback ioread,
11447
                      xmlInputCloseCallback ioclose,
11448
0
                      void *ioctx, xmlCharEncoding enc) {
11449
0
    xmlParserCtxtPtr ctxt;
11450
0
    xmlParserInputPtr input;
11451
0
    const char *encoding;
11452
11453
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11454
0
    if (ctxt == NULL)
11455
0
  return(NULL);
11456
11457
0
    encoding = xmlGetCharEncodingName(enc);
11458
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11459
0
                                  encoding, 0);
11460
0
    if (input == NULL) {
11461
0
  xmlFreeParserCtxt(ctxt);
11462
0
        return (NULL);
11463
0
    }
11464
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11465
0
        xmlFreeInputStream(input);
11466
0
        xmlFreeParserCtxt(ctxt);
11467
0
        return(NULL);
11468
0
    }
11469
11470
0
    return(ctxt);
11471
0
}
11472
11473
#ifdef LIBXML_VALID_ENABLED
11474
/************************************************************************
11475
 *                  *
11476
 *    Front ends when parsing a DTD       *
11477
 *                  *
11478
 ************************************************************************/
11479
11480
/**
11481
 * Parse a DTD.
11482
 *
11483
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11484
 * to make external entities work.
11485
 *
11486
 * @since 2.14.0
11487
 *
11488
 * @param ctxt  a parser context
11489
 * @param input  a parser input
11490
 * @param publicId  public ID of the DTD (optional)
11491
 * @param systemId  system ID of the DTD (optional)
11492
 * @returns the resulting xmlDtd or NULL in case of error.
11493
 * `input` will be freed by the function in any case.
11494
 */
11495
xmlDtd *
11496
xmlCtxtParseDtd(xmlParserCtxt *ctxt, xmlParserInput *input,
11497
643
                const xmlChar *publicId, const xmlChar *systemId) {
11498
643
    xmlDtdPtr ret = NULL;
11499
11500
643
    if ((ctxt == NULL) || (input == NULL)) {
11501
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11502
0
        xmlFreeInputStream(input);
11503
0
        return(NULL);
11504
0
    }
11505
11506
643
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11507
2
        xmlFreeInputStream(input);
11508
2
        return(NULL);
11509
2
    }
11510
11511
641
    if (publicId == NULL)
11512
568
        publicId = BAD_CAST "none";
11513
641
    if (systemId == NULL)
11514
0
        systemId = BAD_CAST "none";
11515
11516
641
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11517
641
    if (ctxt->myDoc == NULL) {
11518
2
        xmlErrMemory(ctxt);
11519
2
        goto error;
11520
2
    }
11521
639
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11522
639
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11523
639
                                       publicId, systemId);
11524
639
    if (ctxt->myDoc->extSubset == NULL) {
11525
9
        xmlErrMemory(ctxt);
11526
9
        xmlFreeDoc(ctxt->myDoc);
11527
9
        goto error;
11528
9
    }
11529
11530
630
    xmlParseExternalSubset(ctxt, publicId, systemId);
11531
11532
630
    if (ctxt->wellFormed) {
11533
106
        ret = ctxt->myDoc->extSubset;
11534
106
        ctxt->myDoc->extSubset = NULL;
11535
106
        if (ret != NULL) {
11536
106
            xmlNodePtr tmp;
11537
11538
106
            ret->doc = NULL;
11539
106
            tmp = ret->children;
11540
915
            while (tmp != NULL) {
11541
809
                tmp->doc = NULL;
11542
809
                tmp = tmp->next;
11543
809
            }
11544
106
        }
11545
524
    } else {
11546
524
        ret = NULL;
11547
524
    }
11548
630
    xmlFreeDoc(ctxt->myDoc);
11549
630
    ctxt->myDoc = NULL;
11550
11551
641
error:
11552
641
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11553
11554
641
    return(ret);
11555
630
}
11556
11557
/**
11558
 * Load and parse a DTD
11559
 *
11560
 * @deprecated Use #xmlCtxtParseDtd.
11561
 *
11562
 * @param sax  the SAX handler block or NULL
11563
 * @param input  an Input Buffer
11564
 * @param enc  the charset encoding if known
11565
 * @returns the resulting xmlDtd or NULL in case of error.
11566
 * `input` will be freed by the function in any case.
11567
 */
11568
11569
xmlDtd *
11570
xmlIOParseDTD(xmlSAXHandler *sax, xmlParserInputBuffer *input,
11571
0
        xmlCharEncoding enc) {
11572
0
    xmlDtdPtr ret = NULL;
11573
0
    xmlParserCtxtPtr ctxt;
11574
0
    xmlParserInputPtr pinput = NULL;
11575
11576
0
    if (input == NULL)
11577
0
  return(NULL);
11578
11579
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11580
0
    if (ctxt == NULL) {
11581
0
        xmlFreeParserInputBuffer(input);
11582
0
  return(NULL);
11583
0
    }
11584
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11585
11586
    /*
11587
     * generate a parser input from the I/O handler
11588
     */
11589
11590
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11591
0
    if (pinput == NULL) {
11592
0
        xmlFreeParserInputBuffer(input);
11593
0
  xmlFreeParserCtxt(ctxt);
11594
0
  return(NULL);
11595
0
    }
11596
11597
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11598
0
        xmlSwitchEncoding(ctxt, enc);
11599
0
    }
11600
11601
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11602
11603
0
    xmlFreeParserCtxt(ctxt);
11604
0
    return(ret);
11605
0
}
11606
11607
/**
11608
 * Load and parse an external subset.
11609
 *
11610
 * @deprecated Use #xmlCtxtParseDtd.
11611
 *
11612
 * @param sax  the SAX handler block
11613
 * @param publicId  public identifier of the DTD (optional)
11614
 * @param systemId  system identifier (URL) of the DTD
11615
 * @returns the resulting xmlDtd or NULL in case of error.
11616
 */
11617
11618
xmlDtd *
11619
xmlSAXParseDTD(xmlSAXHandler *sax, const xmlChar *publicId,
11620
0
               const xmlChar *systemId) {
11621
0
    xmlDtdPtr ret = NULL;
11622
0
    xmlParserCtxtPtr ctxt;
11623
0
    xmlParserInputPtr input = NULL;
11624
0
    xmlChar* systemIdCanonic;
11625
11626
0
    if ((publicId == NULL) && (systemId == NULL)) return(NULL);
11627
11628
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11629
0
    if (ctxt == NULL) {
11630
0
  return(NULL);
11631
0
    }
11632
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11633
11634
    /*
11635
     * Canonicalise the system ID
11636
     */
11637
0
    systemIdCanonic = xmlCanonicPath(systemId);
11638
0
    if ((systemId != NULL) && (systemIdCanonic == NULL)) {
11639
0
  xmlFreeParserCtxt(ctxt);
11640
0
  return(NULL);
11641
0
    }
11642
11643
    /*
11644
     * Ask the Entity resolver to load the damn thing
11645
     */
11646
11647
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11648
0
  input = ctxt->sax->resolveEntity(ctxt->userData, publicId,
11649
0
                                   systemIdCanonic);
11650
0
    if (input == NULL) {
11651
0
  xmlFreeParserCtxt(ctxt);
11652
0
  if (systemIdCanonic != NULL)
11653
0
      xmlFree(systemIdCanonic);
11654
0
  return(NULL);
11655
0
    }
11656
11657
0
    if (input->filename == NULL)
11658
0
  input->filename = (char *) systemIdCanonic;
11659
0
    else
11660
0
  xmlFree(systemIdCanonic);
11661
11662
0
    ret = xmlCtxtParseDtd(ctxt, input, publicId, systemId);
11663
11664
0
    xmlFreeParserCtxt(ctxt);
11665
0
    return(ret);
11666
0
}
11667
11668
11669
/**
11670
 * Load and parse an external subset.
11671
 *
11672
 * @param publicId  public identifier of the DTD (optional)
11673
 * @param systemId  system identifier (URL) of the DTD
11674
 * @returns the resulting xmlDtd or NULL in case of error.
11675
 */
11676
11677
xmlDtd *
11678
0
xmlParseDTD(const xmlChar *publicId, const xmlChar *systemId) {
11679
0
    return(xmlSAXParseDTD(NULL, publicId, systemId));
11680
0
}
11681
#endif /* LIBXML_VALID_ENABLED */
11682
11683
/************************************************************************
11684
 *                  *
11685
 *    Front ends when parsing an Entity     *
11686
 *                  *
11687
 ************************************************************************/
11688
11689
static xmlNodePtr
11690
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11691
6.44k
                            int hasTextDecl, int buildTree) {
11692
6.44k
    xmlNodePtr root = NULL;
11693
6.44k
    xmlNodePtr list = NULL;
11694
6.44k
    xmlChar *rootName = BAD_CAST "#root";
11695
6.44k
    int result;
11696
11697
6.44k
    if (buildTree) {
11698
6.44k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11699
6.44k
        if (root == NULL) {
11700
19
            xmlErrMemory(ctxt);
11701
19
            goto error;
11702
19
        }
11703
6.44k
    }
11704
11705
6.42k
    if (xmlCtxtPushInput(ctxt, input) < 0)
11706
22
        goto error;
11707
11708
6.40k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11709
6.40k
    spacePush(ctxt, -1);
11710
11711
6.40k
    if (buildTree)
11712
6.40k
        nodePush(ctxt, root);
11713
11714
6.40k
    if (hasTextDecl) {
11715
4.17k
        xmlDetectEncoding(ctxt);
11716
11717
        /*
11718
         * Parse a possible text declaration first
11719
         */
11720
4.17k
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11721
4.17k
            (IS_BLANK_CH(NXT(5)))) {
11722
762
            xmlParseTextDecl(ctxt);
11723
            /*
11724
             * An XML-1.0 document can't reference an entity not XML-1.0
11725
             */
11726
762
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11727
762
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11728
9
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11729
9
                               "Version mismatch between document and "
11730
9
                               "entity\n");
11731
9
            }
11732
762
        }
11733
4.17k
    }
11734
11735
6.40k
    xmlParseContentInternal(ctxt);
11736
11737
6.40k
    if (ctxt->input->cur < ctxt->input->end)
11738
886
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11739
11740
6.40k
    if ((ctxt->wellFormed) ||
11741
6.40k
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
11742
4.75k
        if (root != NULL) {
11743
4.75k
            xmlNodePtr cur;
11744
11745
            /*
11746
             * Unlink newly created node list.
11747
             */
11748
4.75k
            list = root->children;
11749
4.75k
            root->children = NULL;
11750
4.75k
            root->last = NULL;
11751
33.3k
            for (cur = list; cur != NULL; cur = cur->next)
11752
28.6k
                cur->parent = NULL;
11753
4.75k
        }
11754
4.75k
    }
11755
11756
    /*
11757
     * Read the rest of the stream in case of errors. We want
11758
     * to account for the whole entity size.
11759
     */
11760
6.71k
    do {
11761
6.71k
        ctxt->input->cur = ctxt->input->end;
11762
6.71k
        xmlParserShrink(ctxt);
11763
6.71k
        result = xmlParserGrow(ctxt);
11764
6.71k
    } while (result > 0);
11765
11766
6.40k
    if (buildTree)
11767
6.40k
        nodePop(ctxt);
11768
11769
6.40k
    namePop(ctxt);
11770
6.40k
    spacePop(ctxt);
11771
11772
6.40k
    xmlCtxtPopInput(ctxt);
11773
11774
6.44k
error:
11775
6.44k
    xmlFreeNode(root);
11776
11777
6.44k
    return(list);
11778
6.40k
}
11779
11780
static void
11781
7.56k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11782
7.56k
    xmlParserInputPtr input;
11783
7.56k
    xmlNodePtr list;
11784
7.56k
    unsigned long consumed;
11785
7.56k
    int isExternal;
11786
7.56k
    int buildTree;
11787
7.56k
    int oldMinNsIndex;
11788
7.56k
    int oldNodelen, oldNodemem;
11789
11790
7.56k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11791
7.56k
    buildTree = (ctxt->node != NULL);
11792
11793
    /*
11794
     * Recursion check
11795
     */
11796
7.56k
    if (ent->flags & XML_ENT_EXPANDING) {
11797
11
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11798
11
        xmlHaltParser(ctxt);
11799
11
        goto error;
11800
11
    }
11801
11802
    /*
11803
     * Load entity
11804
     */
11805
7.55k
    input = xmlNewEntityInputStream(ctxt, ent);
11806
7.55k
    if (input == NULL)
11807
1.10k
        goto error;
11808
11809
    /*
11810
     * When building a tree, we need to limit the scope of namespace
11811
     * declarations, so that entities don't reference xmlNs structs
11812
     * from the parent of a reference.
11813
     */
11814
6.44k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
11815
6.44k
    if (buildTree)
11816
6.44k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
11817
11818
6.44k
    oldNodelen = ctxt->nodelen;
11819
6.44k
    oldNodemem = ctxt->nodemem;
11820
6.44k
    ctxt->nodelen = 0;
11821
6.44k
    ctxt->nodemem = 0;
11822
11823
    /*
11824
     * Parse content
11825
     *
11826
     * This initiates a recursive call chain:
11827
     *
11828
     * - xmlCtxtParseContentInternal
11829
     * - xmlParseContentInternal
11830
     * - xmlParseReference
11831
     * - xmlCtxtParseEntity
11832
     *
11833
     * The nesting depth is limited by the maximum number of inputs,
11834
     * see xmlCtxtPushInput.
11835
     *
11836
     * It's possible to make this non-recursive (minNsIndex must be
11837
     * stored in the input struct) at the expense of code readability.
11838
     */
11839
11840
6.44k
    ent->flags |= XML_ENT_EXPANDING;
11841
11842
6.44k
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
11843
11844
6.44k
    ent->flags &= ~XML_ENT_EXPANDING;
11845
11846
6.44k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
11847
6.44k
    ctxt->nodelen = oldNodelen;
11848
6.44k
    ctxt->nodemem = oldNodemem;
11849
11850
    /*
11851
     * Entity size accounting
11852
     */
11853
6.44k
    consumed = input->consumed;
11854
6.44k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
11855
11856
6.44k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
11857
5.91k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
11858
11859
6.44k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
11860
5.91k
        if (isExternal)
11861
4.06k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
11862
11863
5.91k
        ent->children = list;
11864
11865
34.5k
        while (list != NULL) {
11866
28.6k
            list->parent = (xmlNodePtr) ent;
11867
11868
            /*
11869
             * Downstream code like the nginx xslt module can set
11870
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
11871
             * might have a different or a NULL document.
11872
             */
11873
28.6k
            if (list->doc != ent->doc)
11874
0
                xmlSetTreeDoc(list, ent->doc);
11875
11876
28.6k
            if (list->next == NULL)
11877
4.10k
                ent->last = list;
11878
28.6k
            list = list->next;
11879
28.6k
        }
11880
5.91k
    } else {
11881
528
        xmlFreeNodeList(list);
11882
528
    }
11883
11884
6.44k
    xmlFreeInputStream(input);
11885
11886
7.56k
error:
11887
7.56k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
11888
7.56k
}
11889
11890
/**
11891
 * Parse an external general entity within an existing parsing context
11892
 * An external general parsed entity is well-formed if it matches the
11893
 * production labeled extParsedEnt.
11894
 *
11895
 *     [78] extParsedEnt ::= TextDecl? content
11896
 *
11897
 * @param ctxt  the existing parsing context
11898
 * @param URL  the URL for the entity to load
11899
 * @param ID  the System ID for the entity to load
11900
 * @param listOut  the return value for the set of parsed nodes
11901
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11902
 *    the parser error code otherwise
11903
 */
11904
11905
int
11906
xmlParseCtxtExternalEntity(xmlParserCtxt *ctxt, const xmlChar *URL,
11907
0
                           const xmlChar *ID, xmlNode **listOut) {
11908
0
    xmlParserInputPtr input;
11909
0
    xmlNodePtr list;
11910
11911
0
    if (listOut != NULL)
11912
0
        *listOut = NULL;
11913
11914
0
    if (ctxt == NULL)
11915
0
        return(XML_ERR_ARGUMENT);
11916
11917
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
11918
0
                            XML_RESOURCE_GENERAL_ENTITY);
11919
0
    if (input == NULL)
11920
0
        return(ctxt->errNo);
11921
11922
0
    xmlCtxtInitializeLate(ctxt);
11923
11924
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
11925
0
    if (listOut != NULL)
11926
0
        *listOut = list;
11927
0
    else
11928
0
        xmlFreeNodeList(list);
11929
11930
0
    xmlFreeInputStream(input);
11931
0
    return(ctxt->errNo);
11932
0
}
11933
11934
#ifdef LIBXML_SAX1_ENABLED
11935
/**
11936
 * Parse an external general entity
11937
 * An external general parsed entity is well-formed if it matches the
11938
 * production labeled extParsedEnt.
11939
 *
11940
 * @deprecated Use #xmlParseCtxtExternalEntity.
11941
 *
11942
 *     [78] extParsedEnt ::= TextDecl? content
11943
 *
11944
 * @param doc  the document the chunk pertains to
11945
 * @param sax  the SAX handler block (possibly NULL)
11946
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11947
 * @param depth  Used for loop detection, use 0
11948
 * @param URL  the URL for the entity to load
11949
 * @param ID  the System ID for the entity to load
11950
 * @param list  the return value for the set of parsed nodes
11951
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11952
 *    the parser error code otherwise
11953
 */
11954
11955
int
11956
xmlParseExternalEntity(xmlDoc *doc, xmlSAXHandler *sax, void *user_data,
11957
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNode **list) {
11958
0
    xmlParserCtxtPtr ctxt;
11959
0
    int ret;
11960
11961
0
    if (list != NULL)
11962
0
        *list = NULL;
11963
11964
0
    if (doc == NULL)
11965
0
        return(XML_ERR_ARGUMENT);
11966
11967
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11968
0
    if (ctxt == NULL)
11969
0
        return(XML_ERR_NO_MEMORY);
11970
11971
0
    ctxt->depth = depth;
11972
0
    ctxt->myDoc = doc;
11973
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
11974
11975
0
    xmlFreeParserCtxt(ctxt);
11976
0
    return(ret);
11977
0
}
11978
11979
/**
11980
 * Parse a well-balanced chunk of an XML document
11981
 * called by the parser
11982
 * The allowed sequence for the Well Balanced Chunk is the one defined by
11983
 * the content production in the XML grammar:
11984
 *
11985
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
11986
 *                       Comment)*
11987
 *
11988
 * @param doc  the document the chunk pertains to (must not be NULL)
11989
 * @param sax  the SAX handler block (possibly NULL)
11990
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11991
 * @param depth  Used for loop detection, use 0
11992
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
11993
 * @param lst  the return value for the set of parsed nodes
11994
 * @returns 0 if the chunk is well balanced, -1 in case of args problem and
11995
 *    the parser error code otherwise
11996
 */
11997
11998
int
11999
xmlParseBalancedChunkMemory(xmlDoc *doc, xmlSAXHandler *sax,
12000
0
     void *user_data, int depth, const xmlChar *string, xmlNode **lst) {
12001
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12002
0
                                                depth, string, lst, 0 );
12003
0
}
12004
#endif /* LIBXML_SAX1_ENABLED */
12005
12006
/**
12007
 * Parse a well-balanced chunk of XML matching the 'content' production.
12008
 *
12009
 * Namespaces in scope of `node` and entities of `node`'s document are
12010
 * recognized. When validating, the DTD of `node`'s document is used.
12011
 *
12012
 * Always consumes `input` even in error case.
12013
 *
12014
 * @since 2.14.0
12015
 *
12016
 * @param ctxt  parser context
12017
 * @param input  parser input
12018
 * @param node  target node or document
12019
 * @param hasTextDecl  whether to parse text declaration
12020
 * @returns a node list or NULL in case of error.
12021
 */
12022
xmlNode *
12023
xmlCtxtParseContent(xmlParserCtxt *ctxt, xmlParserInput *input,
12024
0
                    xmlNode *node, int hasTextDecl) {
12025
0
    xmlDocPtr doc;
12026
0
    xmlNodePtr cur, list = NULL;
12027
0
    int nsnr = 0;
12028
0
    xmlDictPtr oldDict;
12029
0
    int oldOptions, oldDictNames, oldLoadSubset;
12030
12031
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12032
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12033
0
        goto exit;
12034
0
    }
12035
12036
0
    doc = node->doc;
12037
0
    if (doc == NULL) {
12038
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12039
0
        goto exit;
12040
0
    }
12041
12042
0
    switch (node->type) {
12043
0
        case XML_ELEMENT_NODE:
12044
0
        case XML_DOCUMENT_NODE:
12045
0
        case XML_HTML_DOCUMENT_NODE:
12046
0
            break;
12047
12048
0
        case XML_ATTRIBUTE_NODE:
12049
0
        case XML_TEXT_NODE:
12050
0
        case XML_CDATA_SECTION_NODE:
12051
0
        case XML_ENTITY_REF_NODE:
12052
0
        case XML_PI_NODE:
12053
0
        case XML_COMMENT_NODE:
12054
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12055
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12056
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12057
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12058
0
                    node = cur;
12059
0
                    break;
12060
0
                }
12061
0
            }
12062
0
            break;
12063
12064
0
        default:
12065
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12066
0
            goto exit;
12067
0
    }
12068
12069
0
    xmlCtxtReset(ctxt);
12070
12071
0
    oldDict = ctxt->dict;
12072
0
    oldOptions = ctxt->options;
12073
0
    oldDictNames = ctxt->dictNames;
12074
0
    oldLoadSubset = ctxt->loadsubset;
12075
12076
    /*
12077
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12078
     */
12079
0
    if (doc->dict != NULL) {
12080
0
        ctxt->dict = doc->dict;
12081
0
    } else {
12082
0
        ctxt->options |= XML_PARSE_NODICT;
12083
0
        ctxt->dictNames = 0;
12084
0
    }
12085
12086
    /*
12087
     * Disable IDs
12088
     */
12089
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12090
0
    ctxt->options |= XML_PARSE_SKIP_IDS;
12091
12092
0
    ctxt->myDoc = doc;
12093
12094
0
#ifdef LIBXML_HTML_ENABLED
12095
0
    if (ctxt->html) {
12096
        /*
12097
         * When parsing in context, it makes no sense to add implied
12098
         * elements like html/body/etc...
12099
         */
12100
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12101
12102
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12103
0
    } else
12104
0
#endif
12105
0
    {
12106
0
        xmlCtxtInitializeLate(ctxt);
12107
12108
        /*
12109
         * initialize the SAX2 namespaces stack
12110
         */
12111
0
        cur = node;
12112
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12113
0
            xmlNsPtr ns = cur->nsDef;
12114
0
            xmlHashedString hprefix, huri;
12115
12116
0
            while (ns != NULL) {
12117
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12118
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12119
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12120
0
                    nsnr++;
12121
0
                ns = ns->next;
12122
0
            }
12123
0
            cur = cur->parent;
12124
0
        }
12125
12126
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12127
12128
0
        if (nsnr > 0)
12129
0
            xmlParserNsPop(ctxt, nsnr);
12130
0
    }
12131
12132
0
    ctxt->dict = oldDict;
12133
0
    ctxt->options = oldOptions;
12134
0
    ctxt->dictNames = oldDictNames;
12135
0
    ctxt->loadsubset = oldLoadSubset;
12136
0
    ctxt->myDoc = NULL;
12137
0
    ctxt->node = NULL;
12138
12139
0
exit:
12140
0
    xmlFreeInputStream(input);
12141
0
    return(list);
12142
0
}
12143
12144
/**
12145
 * Parse a well-balanced chunk of an XML document
12146
 * within the context (DTD, namespaces, etc ...) of the given node.
12147
 *
12148
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12149
 * the content production in the XML grammar:
12150
 *
12151
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12152
 *                       Comment)*
12153
 *
12154
 * This function assumes the encoding of `node`'s document which is
12155
 * typically not what you want. A better alternative is
12156
 * #xmlCtxtParseContent.
12157
 *
12158
 * @param node  the context node
12159
 * @param data  the input string
12160
 * @param datalen  the input string length in bytes
12161
 * @param options  a combination of xmlParserOption
12162
 * @param listOut  the return value for the set of parsed nodes
12163
 * @returns XML_ERR_OK if the chunk is well balanced, and the parser
12164
 * error code otherwise
12165
 */
12166
xmlParserErrors
12167
xmlParseInNodeContext(xmlNode *node, const char *data, int datalen,
12168
0
                      int options, xmlNode **listOut) {
12169
0
    xmlParserCtxtPtr ctxt;
12170
0
    xmlParserInputPtr input;
12171
0
    xmlDocPtr doc;
12172
0
    xmlNodePtr list;
12173
0
    xmlParserErrors ret;
12174
12175
0
    if (listOut == NULL)
12176
0
        return(XML_ERR_INTERNAL_ERROR);
12177
0
    *listOut = NULL;
12178
12179
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12180
0
        return(XML_ERR_INTERNAL_ERROR);
12181
12182
0
    doc = node->doc;
12183
0
    if (doc == NULL)
12184
0
        return(XML_ERR_INTERNAL_ERROR);
12185
12186
0
#ifdef LIBXML_HTML_ENABLED
12187
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12188
0
        ctxt = htmlNewParserCtxt();
12189
0
    }
12190
0
    else
12191
0
#endif
12192
0
        ctxt = xmlNewParserCtxt();
12193
12194
0
    if (ctxt == NULL)
12195
0
        return(XML_ERR_NO_MEMORY);
12196
12197
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12198
0
                                      (const char *) doc->encoding,
12199
0
                                      XML_INPUT_BUF_STATIC);
12200
0
    if (input == NULL) {
12201
0
        xmlFreeParserCtxt(ctxt);
12202
0
        return(XML_ERR_NO_MEMORY);
12203
0
    }
12204
12205
0
    xmlCtxtUseOptions(ctxt, options);
12206
12207
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12208
12209
0
    if (list == NULL) {
12210
0
        ret = ctxt->errNo;
12211
0
        if (ret == XML_ERR_ARGUMENT)
12212
0
            ret = XML_ERR_INTERNAL_ERROR;
12213
0
    } else {
12214
0
        ret = XML_ERR_OK;
12215
0
        *listOut = list;
12216
0
    }
12217
12218
0
    xmlFreeParserCtxt(ctxt);
12219
12220
0
    return(ret);
12221
0
}
12222
12223
#ifdef LIBXML_SAX1_ENABLED
12224
/**
12225
 * Parse a well-balanced chunk of an XML document
12226
 *
12227
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12228
 * the content production in the XML grammar:
12229
 *
12230
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12231
 *                       Comment)*
12232
 *
12233
 * In case recover is set to 1, the nodelist will not be empty even if
12234
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12235
 * some extent.
12236
 *
12237
 * @param doc  the document the chunk pertains to (must not be NULL)
12238
 * @param sax  the SAX handler block (possibly NULL)
12239
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
12240
 * @param depth  Used for loop detection, use 0
12241
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
12242
 * @param listOut  the return value for the set of parsed nodes
12243
 * @param recover  return nodes even if the data is broken (use 0)
12244
 * @returns 0 if the chunk is well balanced, or thehe parser error code
12245
 * otherwise.
12246
 */
12247
int
12248
xmlParseBalancedChunkMemoryRecover(xmlDoc *doc, xmlSAXHandler *sax,
12249
     void *user_data, int depth, const xmlChar *string, xmlNode **listOut,
12250
0
     int recover) {
12251
0
    xmlParserCtxtPtr ctxt;
12252
0
    xmlParserInputPtr input;
12253
0
    xmlNodePtr list;
12254
0
    int ret;
12255
12256
0
    if (listOut != NULL)
12257
0
        *listOut = NULL;
12258
12259
0
    if (string == NULL)
12260
0
        return(XML_ERR_ARGUMENT);
12261
12262
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12263
0
    if (ctxt == NULL)
12264
0
        return(XML_ERR_NO_MEMORY);
12265
12266
0
    xmlCtxtInitializeLate(ctxt);
12267
12268
0
    ctxt->depth = depth;
12269
0
    ctxt->myDoc = doc;
12270
0
    if (recover) {
12271
0
        ctxt->options |= XML_PARSE_RECOVER;
12272
0
        ctxt->recovery = 1;
12273
0
    }
12274
12275
0
    input = xmlNewStringInputStream(ctxt, string);
12276
0
    if (input == NULL) {
12277
0
        ret = ctxt->errNo;
12278
0
        goto error;
12279
0
    }
12280
12281
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12282
0
    if (listOut != NULL)
12283
0
        *listOut = list;
12284
0
    else
12285
0
        xmlFreeNodeList(list);
12286
12287
0
    if (!ctxt->wellFormed)
12288
0
        ret = ctxt->errNo;
12289
0
    else
12290
0
        ret = XML_ERR_OK;
12291
12292
0
error:
12293
0
    xmlFreeInputStream(input);
12294
0
    xmlFreeParserCtxt(ctxt);
12295
0
    return(ret);
12296
0
}
12297
12298
/**
12299
 * Parse an XML external entity out of context and build a tree.
12300
 * It use the given SAX function block to handle the parsing callback.
12301
 * If sax is NULL, fallback to the default DOM tree building routines.
12302
 *
12303
 * @deprecated Don't use.
12304
 *
12305
 *     [78] extParsedEnt ::= TextDecl? content
12306
 *
12307
 * This correspond to a "Well Balanced" chunk
12308
 *
12309
 * @param sax  the SAX handler block
12310
 * @param filename  the filename
12311
 * @returns the resulting document tree
12312
 */
12313
12314
xmlDoc *
12315
0
xmlSAXParseEntity(xmlSAXHandler *sax, const char *filename) {
12316
0
    xmlDocPtr ret;
12317
0
    xmlParserCtxtPtr ctxt;
12318
12319
0
    ctxt = xmlCreateFileParserCtxt(filename);
12320
0
    if (ctxt == NULL) {
12321
0
  return(NULL);
12322
0
    }
12323
0
    if (sax != NULL) {
12324
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12325
0
            *ctxt->sax = *sax;
12326
0
        } else {
12327
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12328
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12329
0
        }
12330
0
        ctxt->userData = NULL;
12331
0
    }
12332
12333
0
    xmlParseExtParsedEnt(ctxt);
12334
12335
0
    if (ctxt->wellFormed) {
12336
0
  ret = ctxt->myDoc;
12337
0
    } else {
12338
0
        ret = NULL;
12339
0
        xmlFreeDoc(ctxt->myDoc);
12340
0
    }
12341
12342
0
    xmlFreeParserCtxt(ctxt);
12343
12344
0
    return(ret);
12345
0
}
12346
12347
/**
12348
 * Parse an XML external entity out of context and build a tree.
12349
 *
12350
 *     [78] extParsedEnt ::= TextDecl? content
12351
 *
12352
 * This correspond to a "Well Balanced" chunk
12353
 *
12354
 * @param filename  the filename
12355
 * @returns the resulting document tree
12356
 */
12357
12358
xmlDoc *
12359
0
xmlParseEntity(const char *filename) {
12360
0
    return(xmlSAXParseEntity(NULL, filename));
12361
0
}
12362
#endif /* LIBXML_SAX1_ENABLED */
12363
12364
/**
12365
 * Create a parser context for an external entity
12366
 * Automatic support for ZLIB/Compress compressed document is provided
12367
 * by default if found at compile-time.
12368
 *
12369
 * @deprecated Don't use.
12370
 *
12371
 * @param URL  the entity URL
12372
 * @param ID  the entity PUBLIC ID
12373
 * @param base  a possible base for the target URI
12374
 * @returns the new parser context or NULL
12375
 */
12376
xmlParserCtxt *
12377
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12378
0
                    const xmlChar *base) {
12379
0
    xmlParserCtxtPtr ctxt;
12380
0
    xmlParserInputPtr input;
12381
0
    xmlChar *uri = NULL;
12382
12383
0
    ctxt = xmlNewParserCtxt();
12384
0
    if (ctxt == NULL)
12385
0
  return(NULL);
12386
12387
0
    if (base != NULL) {
12388
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12389
0
            goto error;
12390
0
        if (uri != NULL)
12391
0
            URL = uri;
12392
0
    }
12393
12394
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12395
0
                            XML_RESOURCE_UNKNOWN);
12396
0
    if (input == NULL)
12397
0
        goto error;
12398
12399
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12400
0
        xmlFreeInputStream(input);
12401
0
        goto error;
12402
0
    }
12403
12404
0
    xmlFree(uri);
12405
0
    return(ctxt);
12406
12407
0
error:
12408
0
    xmlFree(uri);
12409
0
    xmlFreeParserCtxt(ctxt);
12410
0
    return(NULL);
12411
0
}
12412
12413
/************************************************************************
12414
 *                  *
12415
 *    Front ends when parsing from a file     *
12416
 *                  *
12417
 ************************************************************************/
12418
12419
/**
12420
 * Create a parser context for a file or URL content.
12421
 * Automatic support for ZLIB/Compress compressed document is provided
12422
 * by default if found at compile-time and for file accesses
12423
 *
12424
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12425
 *
12426
 * @param filename  the filename or URL
12427
 * @param options  a combination of xmlParserOption
12428
 * @returns the new parser context or NULL
12429
 */
12430
xmlParserCtxt *
12431
xmlCreateURLParserCtxt(const char *filename, int options)
12432
0
{
12433
0
    xmlParserCtxtPtr ctxt;
12434
0
    xmlParserInputPtr input;
12435
12436
0
    ctxt = xmlNewParserCtxt();
12437
0
    if (ctxt == NULL)
12438
0
  return(NULL);
12439
12440
0
    options |= XML_PARSE_UNZIP;
12441
12442
0
    xmlCtxtUseOptions(ctxt, options);
12443
12444
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12445
0
    if (input == NULL) {
12446
0
  xmlFreeParserCtxt(ctxt);
12447
0
  return(NULL);
12448
0
    }
12449
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12450
0
        xmlFreeInputStream(input);
12451
0
        xmlFreeParserCtxt(ctxt);
12452
0
        return(NULL);
12453
0
    }
12454
12455
0
    return(ctxt);
12456
0
}
12457
12458
/**
12459
 * Create a parser context for a file content.
12460
 * Automatic support for ZLIB/Compress compressed document is provided
12461
 * by default if found at compile-time.
12462
 *
12463
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12464
 *
12465
 * @param filename  the filename
12466
 * @returns the new parser context or NULL
12467
 */
12468
xmlParserCtxt *
12469
xmlCreateFileParserCtxt(const char *filename)
12470
0
{
12471
0
    return(xmlCreateURLParserCtxt(filename, 0));
12472
0
}
12473
12474
#ifdef LIBXML_SAX1_ENABLED
12475
/**
12476
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12477
 * compressed document is provided by default if found at compile-time.
12478
 * It use the given SAX function block to handle the parsing callback.
12479
 * If sax is NULL, fallback to the default DOM tree building routines.
12480
 *
12481
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12482
 *
12483
 * User data (void *) is stored within the parser context in the
12484
 * context's _private member, so it is available nearly everywhere in libxml
12485
 *
12486
 * @param sax  the SAX handler block
12487
 * @param filename  the filename
12488
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12489
 *             documents
12490
 * @param data  the userdata
12491
 * @returns the resulting document tree
12492
 */
12493
12494
xmlDoc *
12495
xmlSAXParseFileWithData(xmlSAXHandler *sax, const char *filename,
12496
0
                        int recovery, void *data) {
12497
0
    xmlDocPtr ret = NULL;
12498
0
    xmlParserCtxtPtr ctxt;
12499
0
    xmlParserInputPtr input;
12500
12501
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12502
0
    if (ctxt == NULL)
12503
0
  return(NULL);
12504
12505
0
    if (data != NULL)
12506
0
  ctxt->_private = data;
12507
12508
0
    if (recovery) {
12509
0
        ctxt->options |= XML_PARSE_RECOVER;
12510
0
        ctxt->recovery = 1;
12511
0
    }
12512
12513
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12514
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12515
0
    else
12516
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12517
12518
0
    if (input != NULL)
12519
0
        ret = xmlCtxtParseDocument(ctxt, input);
12520
12521
0
    xmlFreeParserCtxt(ctxt);
12522
0
    return(ret);
12523
0
}
12524
12525
/**
12526
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12527
 * compressed document is provided by default if found at compile-time.
12528
 * It use the given SAX function block to handle the parsing callback.
12529
 * If sax is NULL, fallback to the default DOM tree building routines.
12530
 *
12531
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12532
 *
12533
 * @param sax  the SAX handler block
12534
 * @param filename  the filename
12535
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12536
 *             documents
12537
 * @returns the resulting document tree
12538
 */
12539
12540
xmlDoc *
12541
xmlSAXParseFile(xmlSAXHandler *sax, const char *filename,
12542
0
                          int recovery) {
12543
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12544
0
}
12545
12546
/**
12547
 * Parse an XML in-memory document and build a tree.
12548
 * In the case the document is not Well Formed, a attempt to build a
12549
 * tree is tried anyway
12550
 *
12551
 * @deprecated Use #xmlReadDoc with XML_PARSE_RECOVER.
12552
 *
12553
 * @param cur  a pointer to an array of xmlChar
12554
 * @returns the resulting document tree or NULL in case of failure
12555
 */
12556
12557
xmlDoc *
12558
0
xmlRecoverDoc(const xmlChar *cur) {
12559
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12560
0
}
12561
12562
/**
12563
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12564
 * compressed document is provided by default if found at compile-time.
12565
 *
12566
 * @deprecated Use #xmlReadFile.
12567
 *
12568
 * @param filename  the filename
12569
 * @returns the resulting document tree if the file was wellformed,
12570
 * NULL otherwise.
12571
 */
12572
12573
xmlDoc *
12574
0
xmlParseFile(const char *filename) {
12575
0
    return(xmlSAXParseFile(NULL, filename, 0));
12576
0
}
12577
12578
/**
12579
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12580
 * compressed document is provided by default if found at compile-time.
12581
 * In the case the document is not Well Formed, it attempts to build
12582
 * a tree anyway
12583
 *
12584
 * @deprecated Use #xmlReadFile with XML_PARSE_RECOVER.
12585
 *
12586
 * @param filename  the filename
12587
 * @returns the resulting document tree or NULL in case of failure
12588
 */
12589
12590
xmlDoc *
12591
0
xmlRecoverFile(const char *filename) {
12592
0
    return(xmlSAXParseFile(NULL, filename, 1));
12593
0
}
12594
12595
12596
/**
12597
 * Setup the parser context to parse a new buffer; Clears any prior
12598
 * contents from the parser context. The buffer parameter must not be
12599
 * NULL, but the filename parameter can be
12600
 *
12601
 * @deprecated Don't use.
12602
 *
12603
 * @param ctxt  an XML parser context
12604
 * @param buffer  a xmlChar * buffer
12605
 * @param filename  a file name
12606
 */
12607
void
12608
xmlSetupParserForBuffer(xmlParserCtxt *ctxt, const xmlChar* buffer,
12609
                             const char* filename)
12610
0
{
12611
0
    xmlParserInputPtr input;
12612
12613
0
    if ((ctxt == NULL) || (buffer == NULL))
12614
0
        return;
12615
12616
0
    xmlCtxtReset(ctxt);
12617
12618
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12619
0
                                      NULL, 0);
12620
0
    if (input == NULL)
12621
0
        return;
12622
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12623
0
        xmlFreeInputStream(input);
12624
0
}
12625
12626
/**
12627
 * Parse an XML file and call the given SAX handler routines.
12628
 * Automatic support for ZLIB/Compress compressed document is provided
12629
 *
12630
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12631
 *
12632
 * @param sax  a SAX handler
12633
 * @param user_data  The user data returned on SAX callbacks
12634
 * @param filename  a file name
12635
 * @returns 0 in case of success or a error number otherwise
12636
 */
12637
int
12638
xmlSAXUserParseFile(xmlSAXHandler *sax, void *user_data,
12639
0
                    const char *filename) {
12640
0
    int ret = 0;
12641
0
    xmlParserCtxtPtr ctxt;
12642
12643
0
    ctxt = xmlCreateFileParserCtxt(filename);
12644
0
    if (ctxt == NULL) return -1;
12645
0
    if (sax != NULL) {
12646
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12647
0
            *ctxt->sax = *sax;
12648
0
        } else {
12649
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12650
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12651
0
        }
12652
0
  ctxt->userData = user_data;
12653
0
    }
12654
12655
0
    xmlParseDocument(ctxt);
12656
12657
0
    if (ctxt->wellFormed)
12658
0
  ret = 0;
12659
0
    else {
12660
0
        if (ctxt->errNo != 0)
12661
0
      ret = ctxt->errNo;
12662
0
  else
12663
0
      ret = -1;
12664
0
    }
12665
0
    if (ctxt->myDoc != NULL) {
12666
0
        xmlFreeDoc(ctxt->myDoc);
12667
0
  ctxt->myDoc = NULL;
12668
0
    }
12669
0
    xmlFreeParserCtxt(ctxt);
12670
12671
0
    return ret;
12672
0
}
12673
#endif /* LIBXML_SAX1_ENABLED */
12674
12675
/************************************************************************
12676
 *                  *
12677
 *    Front ends when parsing from memory     *
12678
 *                  *
12679
 ************************************************************************/
12680
12681
/**
12682
 * Create a parser context for an XML in-memory document. The input buffer
12683
 * must not contain a terminating null byte.
12684
 *
12685
 * @param buffer  a pointer to a char array
12686
 * @param size  the size of the array
12687
 * @returns the new parser context or NULL
12688
 */
12689
xmlParserCtxt *
12690
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12691
0
    xmlParserCtxtPtr ctxt;
12692
0
    xmlParserInputPtr input;
12693
12694
0
    if (size < 0)
12695
0
  return(NULL);
12696
12697
0
    ctxt = xmlNewParserCtxt();
12698
0
    if (ctxt == NULL)
12699
0
  return(NULL);
12700
12701
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12702
0
    if (input == NULL) {
12703
0
  xmlFreeParserCtxt(ctxt);
12704
0
  return(NULL);
12705
0
    }
12706
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12707
0
        xmlFreeInputStream(input);
12708
0
        xmlFreeParserCtxt(ctxt);
12709
0
        return(NULL);
12710
0
    }
12711
12712
0
    return(ctxt);
12713
0
}
12714
12715
#ifdef LIBXML_SAX1_ENABLED
12716
/**
12717
 * Parse an XML in-memory block and use the given SAX function block
12718
 * to handle the parsing callback. If sax is NULL, fallback to the default
12719
 * DOM tree building routines.
12720
 *
12721
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12722
 *
12723
 * User data (void *) is stored within the parser context in the
12724
 * context's _private member, so it is available nearly everywhere in libxml
12725
 *
12726
 * @param sax  the SAX handler block
12727
 * @param buffer  an pointer to a char array
12728
 * @param size  the size of the array
12729
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12730
 *             documents
12731
 * @param data  the userdata
12732
 * @returns the resulting document tree
12733
 */
12734
12735
xmlDoc *
12736
xmlSAXParseMemoryWithData(xmlSAXHandler *sax, const char *buffer,
12737
0
                          int size, int recovery, void *data) {
12738
0
    xmlDocPtr ret = NULL;
12739
0
    xmlParserCtxtPtr ctxt;
12740
0
    xmlParserInputPtr input;
12741
12742
0
    if (size < 0)
12743
0
        return(NULL);
12744
12745
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12746
0
    if (ctxt == NULL)
12747
0
        return(NULL);
12748
12749
0
    if (data != NULL)
12750
0
  ctxt->_private=data;
12751
12752
0
    if (recovery) {
12753
0
        ctxt->options |= XML_PARSE_RECOVER;
12754
0
        ctxt->recovery = 1;
12755
0
    }
12756
12757
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12758
0
                                      XML_INPUT_BUF_STATIC);
12759
12760
0
    if (input != NULL)
12761
0
        ret = xmlCtxtParseDocument(ctxt, input);
12762
12763
0
    xmlFreeParserCtxt(ctxt);
12764
0
    return(ret);
12765
0
}
12766
12767
/**
12768
 * Parse an XML in-memory block and use the given SAX function block
12769
 * to handle the parsing callback. If sax is NULL, fallback to the default
12770
 * DOM tree building routines.
12771
 *
12772
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12773
 *
12774
 * @param sax  the SAX handler block
12775
 * @param buffer  an pointer to a char array
12776
 * @param size  the size of the array
12777
 * @param recovery  work in recovery mode, i.e. tries to read not Well Formed
12778
 *             documents
12779
 * @returns the resulting document tree
12780
 */
12781
xmlDoc *
12782
xmlSAXParseMemory(xmlSAXHandler *sax, const char *buffer,
12783
0
            int size, int recovery) {
12784
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12785
0
}
12786
12787
/**
12788
 * Parse an XML in-memory block and build a tree.
12789
 *
12790
 * @deprecated Use #xmlReadMemory.
12791
 *
12792
 * @param buffer  an pointer to a char array
12793
 * @param size  the size of the array
12794
 * @returns the resulting document tree
12795
 */
12796
12797
0
xmlDoc *xmlParseMemory(const char *buffer, int size) {
12798
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
12799
0
}
12800
12801
/**
12802
 * Parse an XML in-memory block and build a tree.
12803
 * In the case the document is not Well Formed, an attempt to
12804
 * build a tree is tried anyway
12805
 *
12806
 * @deprecated Use #xmlReadMemory with XML_PARSE_RECOVER.
12807
 *
12808
 * @param buffer  an pointer to a char array
12809
 * @param size  the size of the array
12810
 * @returns the resulting document tree or NULL in case of error
12811
 */
12812
12813
0
xmlDoc *xmlRecoverMemory(const char *buffer, int size) {
12814
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
12815
0
}
12816
12817
/**
12818
 * Parse an XML in-memory buffer and call the given SAX handler routines.
12819
 *
12820
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12821
 *
12822
 * @param sax  a SAX handler
12823
 * @param user_data  The user data returned on SAX callbacks
12824
 * @param buffer  an in-memory XML document input
12825
 * @param size  the length of the XML document in bytes
12826
 * @returns 0 in case of success or a error number otherwise
12827
 */
12828
int xmlSAXUserParseMemory(xmlSAXHandler *sax, void *user_data,
12829
0
        const char *buffer, int size) {
12830
0
    int ret = 0;
12831
0
    xmlParserCtxtPtr ctxt;
12832
12833
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12834
0
    if (ctxt == NULL) return -1;
12835
0
    if (sax != NULL) {
12836
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12837
0
            *ctxt->sax = *sax;
12838
0
        } else {
12839
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12840
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12841
0
        }
12842
0
  ctxt->userData = user_data;
12843
0
    }
12844
12845
0
    xmlParseDocument(ctxt);
12846
12847
0
    if (ctxt->wellFormed)
12848
0
  ret = 0;
12849
0
    else {
12850
0
        if (ctxt->errNo != 0)
12851
0
      ret = ctxt->errNo;
12852
0
  else
12853
0
      ret = -1;
12854
0
    }
12855
0
    if (ctxt->myDoc != NULL) {
12856
0
        xmlFreeDoc(ctxt->myDoc);
12857
0
  ctxt->myDoc = NULL;
12858
0
    }
12859
0
    xmlFreeParserCtxt(ctxt);
12860
12861
0
    return ret;
12862
0
}
12863
#endif /* LIBXML_SAX1_ENABLED */
12864
12865
/**
12866
 * Creates a parser context for an XML in-memory document.
12867
 *
12868
 * @param str  a pointer to an array of xmlChar
12869
 * @returns the new parser context or NULL
12870
 */
12871
xmlParserCtxt *
12872
0
xmlCreateDocParserCtxt(const xmlChar *str) {
12873
0
    xmlParserCtxtPtr ctxt;
12874
0
    xmlParserInputPtr input;
12875
12876
0
    ctxt = xmlNewParserCtxt();
12877
0
    if (ctxt == NULL)
12878
0
  return(NULL);
12879
12880
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
12881
0
    if (input == NULL) {
12882
0
  xmlFreeParserCtxt(ctxt);
12883
0
  return(NULL);
12884
0
    }
12885
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12886
0
        xmlFreeInputStream(input);
12887
0
        xmlFreeParserCtxt(ctxt);
12888
0
        return(NULL);
12889
0
    }
12890
12891
0
    return(ctxt);
12892
0
}
12893
12894
#ifdef LIBXML_SAX1_ENABLED
12895
/**
12896
 * Parse an XML in-memory document and build a tree.
12897
 * It use the given SAX function block to handle the parsing callback.
12898
 * If sax is NULL, fallback to the default DOM tree building routines.
12899
 *
12900
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadDoc.
12901
 *
12902
 * @param sax  the SAX handler block
12903
 * @param cur  a pointer to an array of xmlChar
12904
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12905
 *             documents
12906
 * @returns the resulting document tree
12907
 */
12908
12909
xmlDoc *
12910
0
xmlSAXParseDoc(xmlSAXHandler *sax, const xmlChar *cur, int recovery) {
12911
0
    xmlDocPtr ret;
12912
0
    xmlParserCtxtPtr ctxt;
12913
0
    xmlSAXHandlerPtr oldsax = NULL;
12914
12915
0
    if (cur == NULL) return(NULL);
12916
12917
12918
0
    ctxt = xmlCreateDocParserCtxt(cur);
12919
0
    if (ctxt == NULL) return(NULL);
12920
0
    if (sax != NULL) {
12921
0
        oldsax = ctxt->sax;
12922
0
        ctxt->sax = sax;
12923
0
        ctxt->userData = NULL;
12924
0
    }
12925
12926
0
    xmlParseDocument(ctxt);
12927
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12928
0
    else {
12929
0
       ret = NULL;
12930
0
       xmlFreeDoc(ctxt->myDoc);
12931
0
       ctxt->myDoc = NULL;
12932
0
    }
12933
0
    if (sax != NULL)
12934
0
  ctxt->sax = oldsax;
12935
0
    xmlFreeParserCtxt(ctxt);
12936
12937
0
    return(ret);
12938
0
}
12939
12940
/**
12941
 * Parse an XML in-memory document and build a tree.
12942
 *
12943
 * @deprecated Use #xmlReadDoc.
12944
 *
12945
 * @param cur  a pointer to an array of xmlChar
12946
 * @returns the resulting document tree
12947
 */
12948
12949
xmlDoc *
12950
0
xmlParseDoc(const xmlChar *cur) {
12951
0
    return(xmlSAXParseDoc(NULL, cur, 0));
12952
0
}
12953
#endif /* LIBXML_SAX1_ENABLED */
12954
12955
/************************************************************************
12956
 *                  *
12957
 *  New set (2.6.0) of simpler and more flexible APIs   *
12958
 *                  *
12959
 ************************************************************************/
12960
12961
/**
12962
 * Reset a parser context
12963
 *
12964
 * @param ctxt  an XML parser context
12965
 */
12966
void
12967
xmlCtxtReset(xmlParserCtxt *ctxt)
12968
64.4k
{
12969
64.4k
    xmlParserInputPtr input;
12970
12971
64.4k
    if (ctxt == NULL)
12972
0
        return;
12973
12974
64.4k
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
12975
0
        xmlFreeInputStream(input);
12976
0
    }
12977
64.4k
    ctxt->inputNr = 0;
12978
64.4k
    ctxt->input = NULL;
12979
12980
64.4k
    ctxt->spaceNr = 0;
12981
64.4k
    if (ctxt->spaceTab != NULL) {
12982
64.4k
  ctxt->spaceTab[0] = -1;
12983
64.4k
  ctxt->space = &ctxt->spaceTab[0];
12984
64.4k
    } else {
12985
0
        ctxt->space = NULL;
12986
0
    }
12987
12988
12989
64.4k
    ctxt->nodeNr = 0;
12990
64.4k
    ctxt->node = NULL;
12991
12992
64.4k
    ctxt->nameNr = 0;
12993
64.4k
    ctxt->name = NULL;
12994
12995
64.4k
    ctxt->nsNr = 0;
12996
64.4k
    xmlParserNsReset(ctxt->nsdb);
12997
12998
64.4k
    if (ctxt->version != NULL) {
12999
13.3k
        xmlFree(ctxt->version);
13000
13.3k
        ctxt->version = NULL;
13001
13.3k
    }
13002
64.4k
    if (ctxt->encoding != NULL) {
13003
580
        xmlFree(ctxt->encoding);
13004
580
        ctxt->encoding = NULL;
13005
580
    }
13006
64.4k
    if (ctxt->extSubURI != NULL) {
13007
2.24k
        xmlFree(ctxt->extSubURI);
13008
2.24k
        ctxt->extSubURI = NULL;
13009
2.24k
    }
13010
64.4k
    if (ctxt->extSubSystem != NULL) {
13011
207
        xmlFree(ctxt->extSubSystem);
13012
207
        ctxt->extSubSystem = NULL;
13013
207
    }
13014
64.4k
    if (ctxt->directory != NULL) {
13015
13.8k
        xmlFree(ctxt->directory);
13016
13.8k
        ctxt->directory = NULL;
13017
13.8k
    }
13018
13019
64.4k
    if (ctxt->myDoc != NULL)
13020
0
        xmlFreeDoc(ctxt->myDoc);
13021
64.4k
    ctxt->myDoc = NULL;
13022
13023
64.4k
    ctxt->standalone = -1;
13024
64.4k
    ctxt->hasExternalSubset = 0;
13025
64.4k
    ctxt->hasPErefs = 0;
13026
64.4k
    ctxt->html = ctxt->html ? 1 : 0;
13027
64.4k
    ctxt->instate = XML_PARSER_START;
13028
13029
64.4k
    ctxt->wellFormed = 1;
13030
64.4k
    ctxt->nsWellFormed = 1;
13031
64.4k
    ctxt->disableSAX = 0;
13032
64.4k
    ctxt->valid = 1;
13033
64.4k
    ctxt->record_info = 0;
13034
64.4k
    ctxt->checkIndex = 0;
13035
64.4k
    ctxt->endCheckState = 0;
13036
64.4k
    ctxt->inSubset = 0;
13037
64.4k
    ctxt->errNo = XML_ERR_OK;
13038
64.4k
    ctxt->depth = 0;
13039
64.4k
    ctxt->catalogs = NULL;
13040
64.4k
    ctxt->sizeentities = 0;
13041
64.4k
    ctxt->sizeentcopy = 0;
13042
64.4k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13043
13044
64.4k
    if (ctxt->attsDefault != NULL) {
13045
1.33k
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13046
1.33k
        ctxt->attsDefault = NULL;
13047
1.33k
    }
13048
64.4k
    if (ctxt->attsSpecial != NULL) {
13049
1.66k
        xmlHashFree(ctxt->attsSpecial, NULL);
13050
1.66k
        ctxt->attsSpecial = NULL;
13051
1.66k
    }
13052
13053
64.4k
#ifdef LIBXML_CATALOG_ENABLED
13054
64.4k
    if (ctxt->catalogs != NULL)
13055
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13056
64.4k
#endif
13057
64.4k
    ctxt->nbErrors = 0;
13058
64.4k
    ctxt->nbWarnings = 0;
13059
64.4k
    if (ctxt->lastError.code != XML_ERR_OK)
13060
13.5k
        xmlResetError(&ctxt->lastError);
13061
64.4k
}
13062
13063
/**
13064
 * Reset a push parser context
13065
 *
13066
 * @param ctxt  an XML parser context
13067
 * @param chunk  a pointer to an array of chars
13068
 * @param size  number of chars in the array
13069
 * @param filename  an optional file name or URI
13070
 * @param encoding  the document encoding, or NULL
13071
 * @returns 0 in case of success and 1 in case of error
13072
 */
13073
int
13074
xmlCtxtResetPush(xmlParserCtxt *ctxt, const char *chunk,
13075
                 int size, const char *filename, const char *encoding)
13076
0
{
13077
0
    xmlParserInputPtr input;
13078
13079
0
    if (ctxt == NULL)
13080
0
        return(1);
13081
13082
0
    xmlCtxtReset(ctxt);
13083
13084
0
    input = xmlNewPushInput(filename, chunk, size);
13085
0
    if (input == NULL)
13086
0
        return(1);
13087
13088
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13089
0
        xmlFreeInputStream(input);
13090
0
        return(1);
13091
0
    }
13092
13093
0
    if (encoding != NULL)
13094
0
        xmlSwitchEncodingName(ctxt, encoding);
13095
13096
0
    return(0);
13097
0
}
13098
13099
static int
13100
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13101
75.9k
{
13102
75.9k
    int allMask;
13103
13104
75.9k
    if (ctxt == NULL)
13105
0
        return(-1);
13106
13107
    /*
13108
     * XInclude options aren't handled by the parser.
13109
     *
13110
     * XML_PARSE_XINCLUDE
13111
     * XML_PARSE_NOXINCNODE
13112
     * XML_PARSE_NOBASEFIX
13113
     */
13114
75.9k
    allMask = XML_PARSE_RECOVER |
13115
75.9k
              XML_PARSE_NOENT |
13116
75.9k
              XML_PARSE_DTDLOAD |
13117
75.9k
              XML_PARSE_DTDATTR |
13118
75.9k
              XML_PARSE_DTDVALID |
13119
75.9k
              XML_PARSE_NOERROR |
13120
75.9k
              XML_PARSE_NOWARNING |
13121
75.9k
              XML_PARSE_PEDANTIC |
13122
75.9k
              XML_PARSE_NOBLANKS |
13123
75.9k
#ifdef LIBXML_SAX1_ENABLED
13124
75.9k
              XML_PARSE_SAX1 |
13125
75.9k
#endif
13126
75.9k
              XML_PARSE_NONET |
13127
75.9k
              XML_PARSE_NODICT |
13128
75.9k
              XML_PARSE_NSCLEAN |
13129
75.9k
              XML_PARSE_NOCDATA |
13130
75.9k
              XML_PARSE_COMPACT |
13131
75.9k
              XML_PARSE_OLD10 |
13132
75.9k
              XML_PARSE_HUGE |
13133
75.9k
              XML_PARSE_OLDSAX |
13134
75.9k
              XML_PARSE_IGNORE_ENC |
13135
75.9k
              XML_PARSE_BIG_LINES |
13136
75.9k
              XML_PARSE_NO_XXE |
13137
75.9k
              XML_PARSE_UNZIP |
13138
75.9k
              XML_PARSE_NO_SYS_CATALOG |
13139
75.9k
              XML_PARSE_CATALOG_PI;
13140
13141
75.9k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13142
13143
    /*
13144
     * For some options, struct members are historically the source
13145
     * of truth. The values are initalized from global variables and
13146
     * old code could also modify them directly. Several older API
13147
     * functions that don't take an options argument rely on these
13148
     * deprecated mechanisms.
13149
     *
13150
     * Once public access to struct members and the globals are
13151
     * disabled, we can use the options bitmask as source of
13152
     * truth, making all these struct members obsolete.
13153
     *
13154
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13155
     * loading of the external subset.
13156
     */
13157
75.9k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13158
75.9k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13159
75.9k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13160
75.9k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13161
75.9k
    ctxt->loadsubset |= (options & XML_PARSE_SKIP_IDS) ? XML_SKIP_IDS : 0;
13162
75.9k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13163
75.9k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13164
75.9k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13165
75.9k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13166
13167
75.9k
    return(options & ~allMask);
13168
75.9k
}
13169
13170
/**
13171
 * Applies the options to the parser context. Unset options are
13172
 * cleared.
13173
 *
13174
 * @since 2.13.0
13175
 *
13176
 * With older versions, you can use #xmlCtxtUseOptions.
13177
 *
13178
 * @param ctxt  an XML parser context
13179
 * @param options  a bitmask of xmlParserOption values
13180
 * @returns 0 in case of success, the set of unknown or unimplemented options
13181
 *         in case of error.
13182
 */
13183
int
13184
xmlCtxtSetOptions(xmlParserCtxt *ctxt, int options)
13185
0
{
13186
0
#ifdef LIBXML_HTML_ENABLED
13187
0
    if ((ctxt != NULL) && (ctxt->html))
13188
0
        return(htmlCtxtSetOptions(ctxt, options));
13189
0
#endif
13190
13191
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13192
0
}
13193
13194
/**
13195
 * Get the current options of the parser context.
13196
 *
13197
 * @since 2.14.0
13198
 *
13199
 * @param ctxt  an XML parser context
13200
 * @returns the current options set in the parser context, or -1 if ctxt is NULL.
13201
 */
13202
int
13203
xmlCtxtGetOptions(xmlParserCtxt *ctxt)
13204
0
{
13205
0
    if (ctxt == NULL)
13206
0
        return(-1);
13207
13208
0
    return(ctxt->options);
13209
0
}
13210
13211
/**
13212
 * Applies the options to the parser context. The following options
13213
 * are never cleared and can only be enabled:
13214
 *
13215
 * - XML_PARSE_NOERROR
13216
 * - XML_PARSE_NOWARNING
13217
 * - XML_PARSE_NONET
13218
 * - XML_PARSE_NSCLEAN
13219
 * - XML_PARSE_NOCDATA
13220
 * - XML_PARSE_COMPACT
13221
 * - XML_PARSE_OLD10
13222
 * - XML_PARSE_HUGE
13223
 * - XML_PARSE_OLDSAX
13224
 * - XML_PARSE_IGNORE_ENC
13225
 * - XML_PARSE_BIG_LINES
13226
 *
13227
 * @deprecated Use #xmlCtxtSetOptions.
13228
 *
13229
 * @param ctxt  an XML parser context
13230
 * @param options  a combination of xmlParserOption
13231
 * @returns 0 in case of success, the set of unknown or unimplemented options
13232
 *         in case of error.
13233
 */
13234
int
13235
xmlCtxtUseOptions(xmlParserCtxt *ctxt, int options)
13236
75.9k
{
13237
75.9k
    int keepMask;
13238
13239
75.9k
#ifdef LIBXML_HTML_ENABLED
13240
75.9k
    if ((ctxt != NULL) && (ctxt->html))
13241
0
        return(htmlCtxtUseOptions(ctxt, options));
13242
75.9k
#endif
13243
13244
    /*
13245
     * For historic reasons, some options can only be enabled.
13246
     */
13247
75.9k
    keepMask = XML_PARSE_NOERROR |
13248
75.9k
               XML_PARSE_NOWARNING |
13249
75.9k
               XML_PARSE_NONET |
13250
75.9k
               XML_PARSE_NSCLEAN |
13251
75.9k
               XML_PARSE_NOCDATA |
13252
75.9k
               XML_PARSE_COMPACT |
13253
75.9k
               XML_PARSE_OLD10 |
13254
75.9k
               XML_PARSE_HUGE |
13255
75.9k
               XML_PARSE_OLDSAX |
13256
75.9k
               XML_PARSE_IGNORE_ENC |
13257
75.9k
               XML_PARSE_BIG_LINES;
13258
13259
75.9k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13260
75.9k
}
13261
13262
/**
13263
 * To protect against exponential entity expansion ("billion laughs"), the
13264
 * size of serialized output is (roughly) limited to the input size
13265
 * multiplied by this factor. The default value is 5.
13266
 *
13267
 * When working with documents making heavy use of entity expansion, it can
13268
 * be necessary to increase the value. For security reasons, this should only
13269
 * be considered when processing trusted input.
13270
 *
13271
 * @param ctxt  an XML parser context
13272
 * @param maxAmpl  maximum amplification factor
13273
 */
13274
void
13275
xmlCtxtSetMaxAmplification(xmlParserCtxt *ctxt, unsigned maxAmpl)
13276
0
{
13277
0
    if (ctxt == NULL)
13278
0
        return;
13279
0
    ctxt->maxAmpl = maxAmpl;
13280
0
}
13281
13282
/**
13283
 * Parse an XML document and return the resulting document tree.
13284
 * Takes ownership of the input object.
13285
 *
13286
 * @since 2.13.0
13287
 *
13288
 * @param ctxt  an XML parser context
13289
 * @param input  parser input
13290
 * @returns the resulting document tree or NULL
13291
 */
13292
xmlDoc *
13293
xmlCtxtParseDocument(xmlParserCtxt *ctxt, xmlParserInput *input)
13294
50.5k
{
13295
50.5k
    xmlDocPtr ret = NULL;
13296
13297
50.5k
    if ((ctxt == NULL) || (input == NULL)) {
13298
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13299
0
        xmlFreeInputStream(input);
13300
0
        return(NULL);
13301
0
    }
13302
13303
    /* assert(ctxt->inputNr == 0); */
13304
50.5k
    while (ctxt->inputNr > 0)
13305
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13306
13307
50.5k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13308
8
        xmlFreeInputStream(input);
13309
8
        return(NULL);
13310
8
    }
13311
13312
50.5k
    xmlParseDocument(ctxt);
13313
13314
50.5k
    ret = xmlCtxtGetDocument(ctxt);
13315
13316
    /* assert(ctxt->inputNr == 1); */
13317
102k
    while (ctxt->inputNr > 0)
13318
51.5k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13319
13320
50.5k
    return(ret);
13321
50.5k
}
13322
13323
/**
13324
 * Convenience function to parse an XML document from a
13325
 * zero-terminated string.
13326
 *
13327
 * See #xmlCtxtReadDoc for details.
13328
 *
13329
 * @param cur  a pointer to a zero terminated string
13330
 * @param URL  base URL (optional)
13331
 * @param encoding  the document encoding (optional)
13332
 * @param options  a combination of xmlParserOption
13333
 * @returns the resulting document tree
13334
 */
13335
xmlDoc *
13336
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13337
           int options)
13338
0
{
13339
0
    xmlParserCtxtPtr ctxt;
13340
0
    xmlParserInputPtr input;
13341
0
    xmlDocPtr doc = NULL;
13342
13343
0
    ctxt = xmlNewParserCtxt();
13344
0
    if (ctxt == NULL)
13345
0
        return(NULL);
13346
13347
0
    xmlCtxtUseOptions(ctxt, options);
13348
13349
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13350
0
                                      XML_INPUT_BUF_STATIC);
13351
13352
0
    if (input != NULL)
13353
0
        doc = xmlCtxtParseDocument(ctxt, input);
13354
13355
0
    xmlFreeParserCtxt(ctxt);
13356
0
    return(doc);
13357
0
}
13358
13359
/**
13360
 * Convenience function to parse an XML file from the filesystem
13361
 * or a global, user-defined resource loader.
13362
 *
13363
 * This function always enables the XML_PARSE_UNZIP option for
13364
 * backward compatibility. If a "-" filename is passed, it will
13365
 * read from stdin. Both of these features are potentially
13366
 * insecure and might be removed from later versions.
13367
 *
13368
 * See #xmlCtxtReadFile for details.
13369
 *
13370
 * @param filename  a file or URL
13371
 * @param encoding  the document encoding (optional)
13372
 * @param options  a combination of xmlParserOption
13373
 * @returns the resulting document tree
13374
 */
13375
xmlDoc *
13376
xmlReadFile(const char *filename, const char *encoding, int options)
13377
0
{
13378
0
    xmlParserCtxtPtr ctxt;
13379
0
    xmlParserInputPtr input;
13380
0
    xmlDocPtr doc = NULL;
13381
13382
0
    ctxt = xmlNewParserCtxt();
13383
0
    if (ctxt == NULL)
13384
0
        return(NULL);
13385
13386
0
    options |= XML_PARSE_UNZIP;
13387
13388
0
    xmlCtxtUseOptions(ctxt, options);
13389
13390
    /*
13391
     * Backward compatibility for users of command line utilities like
13392
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13393
     * should be removed at some point.
13394
     */
13395
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13396
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13397
0
                                      encoding, 0);
13398
0
    else
13399
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13400
13401
0
    if (input != NULL)
13402
0
        doc = xmlCtxtParseDocument(ctxt, input);
13403
13404
0
    xmlFreeParserCtxt(ctxt);
13405
0
    return(doc);
13406
0
}
13407
13408
/**
13409
 * Parse an XML in-memory document and build a tree. The input buffer must
13410
 * not contain a terminating null byte.
13411
 *
13412
 * See #xmlCtxtReadMemory for details.
13413
 *
13414
 * @param buffer  a pointer to a char array
13415
 * @param size  the size of the array
13416
 * @param url  base URL (optional)
13417
 * @param encoding  the document encoding (optional)
13418
 * @param options  a combination of xmlParserOption
13419
 * @returns the resulting document tree
13420
 */
13421
xmlDoc *
13422
xmlReadMemory(const char *buffer, int size, const char *url,
13423
              const char *encoding, int options)
13424
0
{
13425
0
    xmlParserCtxtPtr ctxt;
13426
0
    xmlParserInputPtr input;
13427
0
    xmlDocPtr doc = NULL;
13428
13429
0
    if (size < 0)
13430
0
  return(NULL);
13431
13432
0
    ctxt = xmlNewParserCtxt();
13433
0
    if (ctxt == NULL)
13434
0
        return(NULL);
13435
13436
0
    xmlCtxtUseOptions(ctxt, options);
13437
13438
0
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13439
0
                                      XML_INPUT_BUF_STATIC);
13440
13441
0
    if (input != NULL)
13442
0
        doc = xmlCtxtParseDocument(ctxt, input);
13443
13444
0
    xmlFreeParserCtxt(ctxt);
13445
0
    return(doc);
13446
0
}
13447
13448
/**
13449
 * Parse an XML from a file descriptor and build a tree.
13450
 *
13451
 * See #xmlCtxtReadFd for details.
13452
 *
13453
 * NOTE that the file descriptor will not be closed when the
13454
 * context is freed or reset.
13455
 *
13456
 * @param fd  an open file descriptor
13457
 * @param URL  base URL (optional)
13458
 * @param encoding  the document encoding (optional)
13459
 * @param options  a combination of xmlParserOption
13460
 * @returns the resulting document tree
13461
 */
13462
xmlDoc *
13463
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13464
0
{
13465
0
    xmlParserCtxtPtr ctxt;
13466
0
    xmlParserInputPtr input;
13467
0
    xmlDocPtr doc = NULL;
13468
13469
0
    ctxt = xmlNewParserCtxt();
13470
0
    if (ctxt == NULL)
13471
0
        return(NULL);
13472
13473
0
    xmlCtxtUseOptions(ctxt, options);
13474
13475
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13476
13477
0
    if (input != NULL)
13478
0
        doc = xmlCtxtParseDocument(ctxt, input);
13479
13480
0
    xmlFreeParserCtxt(ctxt);
13481
0
    return(doc);
13482
0
}
13483
13484
/**
13485
 * Parse an XML document from I/O functions and context and build a tree.
13486
 *
13487
 * See #xmlCtxtReadIO for details.
13488
 *
13489
 * @param ioread  an I/O read function
13490
 * @param ioclose  an I/O close function (optional)
13491
 * @param ioctx  an I/O handler
13492
 * @param URL  base URL (optional)
13493
 * @param encoding  the document encoding (optional)
13494
 * @param options  a combination of xmlParserOption
13495
 * @returns the resulting document tree
13496
 */
13497
xmlDoc *
13498
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13499
          void *ioctx, const char *URL, const char *encoding, int options)
13500
0
{
13501
0
    xmlParserCtxtPtr ctxt;
13502
0
    xmlParserInputPtr input;
13503
0
    xmlDocPtr doc = NULL;
13504
13505
0
    ctxt = xmlNewParserCtxt();
13506
0
    if (ctxt == NULL)
13507
0
        return(NULL);
13508
13509
0
    xmlCtxtUseOptions(ctxt, options);
13510
13511
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13512
0
                                  encoding, 0);
13513
13514
0
    if (input != NULL)
13515
0
        doc = xmlCtxtParseDocument(ctxt, input);
13516
13517
0
    xmlFreeParserCtxt(ctxt);
13518
0
    return(doc);
13519
0
}
13520
13521
/**
13522
 * Parse an XML in-memory document and build a tree.
13523
 *
13524
 * `URL` is used as base to resolve external entities and for error
13525
 * reporting.
13526
 *
13527
 * @param ctxt  an XML parser context
13528
 * @param str  a pointer to a zero terminated string
13529
 * @param URL  base URL (optional)
13530
 * @param encoding  the document encoding (optional)
13531
 * @param options  a combination of xmlParserOption
13532
 * @returns the resulting document tree
13533
 */
13534
xmlDoc *
13535
xmlCtxtReadDoc(xmlParserCtxt *ctxt, const xmlChar *str,
13536
               const char *URL, const char *encoding, int options)
13537
0
{
13538
0
    xmlParserInputPtr input;
13539
13540
0
    if (ctxt == NULL)
13541
0
        return(NULL);
13542
13543
0
    xmlCtxtReset(ctxt);
13544
0
    xmlCtxtUseOptions(ctxt, options);
13545
13546
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13547
0
                                      XML_INPUT_BUF_STATIC);
13548
0
    if (input == NULL)
13549
0
        return(NULL);
13550
13551
0
    return(xmlCtxtParseDocument(ctxt, input));
13552
0
}
13553
13554
/**
13555
 * Parse an XML file from the filesystem or a global, user-defined
13556
 * resource loader.
13557
 *
13558
 * This function always enables the XML_PARSE_UNZIP option for
13559
 * backward compatibility. This feature is potentially insecure
13560
 * and might be removed from later versions.
13561
 *
13562
 * @param ctxt  an XML parser context
13563
 * @param filename  a file or URL
13564
 * @param encoding  the document encoding (optional)
13565
 * @param options  a combination of xmlParserOption
13566
 * @returns the resulting document tree
13567
 */
13568
xmlDoc *
13569
xmlCtxtReadFile(xmlParserCtxt *ctxt, const char *filename,
13570
                const char *encoding, int options)
13571
0
{
13572
0
    xmlParserInputPtr input;
13573
13574
0
    if (ctxt == NULL)
13575
0
        return(NULL);
13576
13577
0
    options |= XML_PARSE_UNZIP;
13578
13579
0
    xmlCtxtReset(ctxt);
13580
0
    xmlCtxtUseOptions(ctxt, options);
13581
13582
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13583
0
    if (input == NULL)
13584
0
        return(NULL);
13585
13586
0
    return(xmlCtxtParseDocument(ctxt, input));
13587
0
}
13588
13589
/**
13590
 * Parse an XML in-memory document and build a tree. The input buffer must
13591
 * not contain a terminating null byte.
13592
 *
13593
 * `URL` is used as base to resolve external entities and for error
13594
 * reporting.
13595
 *
13596
 * @param ctxt  an XML parser context
13597
 * @param buffer  a pointer to a char array
13598
 * @param size  the size of the array
13599
 * @param URL  base URL (optional)
13600
 * @param encoding  the document encoding (optional)
13601
 * @param options  a combination of xmlParserOption
13602
 * @returns the resulting document tree
13603
 */
13604
xmlDoc *
13605
xmlCtxtReadMemory(xmlParserCtxt *ctxt, const char *buffer, int size,
13606
                  const char *URL, const char *encoding, int options)
13607
50.6k
{
13608
50.6k
    xmlParserInputPtr input;
13609
13610
50.6k
    if ((ctxt == NULL) || (size < 0))
13611
0
        return(NULL);
13612
13613
50.6k
    xmlCtxtReset(ctxt);
13614
50.6k
    xmlCtxtUseOptions(ctxt, options);
13615
13616
50.6k
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
13617
50.6k
                                      XML_INPUT_BUF_STATIC);
13618
50.6k
    if (input == NULL)
13619
38
        return(NULL);
13620
13621
50.5k
    return(xmlCtxtParseDocument(ctxt, input));
13622
50.6k
}
13623
13624
/**
13625
 * Parse an XML document from a file descriptor and build a tree.
13626
 *
13627
 * NOTE that the file descriptor will not be closed when the
13628
 * context is freed or reset.
13629
 *
13630
 * `URL` is used as base to resolve external entities and for error
13631
 * reporting.
13632
 *
13633
 * @param ctxt  an XML parser context
13634
 * @param fd  an open file descriptor
13635
 * @param URL  base URL (optional)
13636
 * @param encoding  the document encoding (optional)
13637
 * @param options  a combination of xmlParserOption
13638
 * @returns the resulting document tree
13639
 */
13640
xmlDoc *
13641
xmlCtxtReadFd(xmlParserCtxt *ctxt, int fd,
13642
              const char *URL, const char *encoding, int options)
13643
0
{
13644
0
    xmlParserInputPtr input;
13645
13646
0
    if (ctxt == NULL)
13647
0
        return(NULL);
13648
13649
0
    xmlCtxtReset(ctxt);
13650
0
    xmlCtxtUseOptions(ctxt, options);
13651
13652
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13653
0
    if (input == NULL)
13654
0
        return(NULL);
13655
13656
0
    return(xmlCtxtParseDocument(ctxt, input));
13657
0
}
13658
13659
/**
13660
 * Parse an XML document from I/O functions and source and build a tree.
13661
 * This reuses the existing `ctxt` parser context
13662
 *
13663
 * `URL` is used as base to resolve external entities and for error
13664
 * reporting.
13665
 *
13666
 * @param ctxt  an XML parser context
13667
 * @param ioread  an I/O read function
13668
 * @param ioclose  an I/O close function
13669
 * @param ioctx  an I/O handler
13670
 * @param URL  the base URL to use for the document
13671
 * @param encoding  the document encoding, or NULL
13672
 * @param options  a combination of xmlParserOption
13673
 * @returns the resulting document tree
13674
 */
13675
xmlDoc *
13676
xmlCtxtReadIO(xmlParserCtxt *ctxt, xmlInputReadCallback ioread,
13677
              xmlInputCloseCallback ioclose, void *ioctx,
13678
        const char *URL,
13679
              const char *encoding, int options)
13680
0
{
13681
0
    xmlParserInputPtr input;
13682
13683
0
    if (ctxt == NULL)
13684
0
        return(NULL);
13685
13686
0
    xmlCtxtReset(ctxt);
13687
0
    xmlCtxtUseOptions(ctxt, options);
13688
13689
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13690
0
                                  encoding, 0);
13691
0
    if (input == NULL)
13692
0
        return(NULL);
13693
13694
0
    return(xmlCtxtParseDocument(ctxt, input));
13695
0
}
13696