Coverage Report

Created: 2026-03-21 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxml2/parser.c
Line
Count
Source
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * Author: Daniel Veillard
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
#include "private/tree.h"
79
80
650k
#define NS_INDEX_EMPTY  INT_MAX
81
41.2k
#define NS_INDEX_XML    (INT_MAX - 1)
82
350k
#define URI_HASH_EMPTY  0xD943A04E
83
10.1k
#define URI_HASH_XML    0xF0451F02
84
85
#ifndef STDIN_FILENO
86
0
  #define STDIN_FILENO 0
87
#endif
88
89
#ifndef SIZE_MAX
90
  #define SIZE_MAX ((size_t) -1)
91
#endif
92
93
327k
#define XML_MAX_ATTRS 100000000 /* 100 million */
94
95
451k
#define XML_SPECIAL_EXTERNAL    (1 << 20)
96
411k
#define XML_SPECIAL_TYPE_MASK   (XML_SPECIAL_EXTERNAL - 1)
97
98
432k
#define XML_ATTVAL_ALLOC        (1 << 0)
99
1.52M
#define XML_ATTVAL_NORM_CHANGE  (1 << 1)
100
101
struct _xmlStartTag {
102
    const xmlChar *prefix;
103
    const xmlChar *URI;
104
    int line;
105
    int nsNr;
106
};
107
108
typedef struct {
109
    void *saxData;
110
    unsigned prefixHashValue;
111
    unsigned uriHashValue;
112
    unsigned elementId;
113
    int oldIndex;
114
} xmlParserNsExtra;
115
116
typedef struct {
117
    unsigned hashValue;
118
    int index;
119
} xmlParserNsBucket;
120
121
struct _xmlParserNsData {
122
    xmlParserNsExtra *extra;
123
124
    unsigned hashSize;
125
    unsigned hashElems;
126
    xmlParserNsBucket *hash;
127
128
    unsigned elementId;
129
    int defaultNsIndex;
130
    int minNsIndex;
131
};
132
133
static int
134
xmlParseElementStart(xmlParserCtxtPtr ctxt);
135
136
static void
137
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
138
139
static xmlEntityPtr
140
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
141
142
static const xmlChar *
143
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
144
145
/************************************************************************
146
 *                  *
147
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
148
 *                  *
149
 ************************************************************************/
150
151
#define XML_PARSER_BIG_ENTITY 1000
152
#define XML_PARSER_LOT_ENTITY 5000
153
154
/*
155
 * Constants for protection against abusive entity expansion
156
 * ("billion laughs").
157
 */
158
159
/*
160
 * A certain amount of entity expansion which is always allowed.
161
 */
162
2.55M
#define XML_PARSER_ALLOWED_EXPANSION 1000000
163
164
/*
165
 * Fixed cost for each entity reference. This crudely models processing time
166
 * as well to protect, for example, against exponential expansion of empty
167
 * or very short entities.
168
 */
169
2.56M
#define XML_ENT_FIXED_COST 20
170
171
151M
#define XML_PARSER_BIG_BUFFER_SIZE 300
172
665k
#define XML_PARSER_BUFFER_SIZE 100
173
110k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
174
175
/**
176
 * XML_PARSER_CHUNK_SIZE
177
 *
178
 * When calling GROW that's the minimal amount of data
179
 * the parser expected to have received. It is not a hard
180
 * limit but an optimization when reading strings like Names
181
 * It is not strictly needed as long as inputs available characters
182
 * are followed by 0, which should be provided by the I/O level
183
 */
184
#define XML_PARSER_CHUNK_SIZE 100
185
186
/**
187
 * Constant string describing the version of the library used at
188
 * run-time.
189
 */
190
const char *const
191
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
192
193
/*
194
 * List of XML prefixed PI allowed by W3C specs
195
 */
196
197
static const char* const xmlW3CPIs[] = {
198
    "xml-stylesheet",
199
    "xml-model",
200
    NULL
201
};
202
203
204
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206
                                              const xmlChar **str);
207
208
static void
209
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
210
211
static int
212
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
213
214
static void
215
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl);
216
217
/************************************************************************
218
 *                  *
219
 *    Some factorized error routines        *
220
 *                  *
221
 ************************************************************************/
222
223
static void
224
3.90k
xmlErrMemory(xmlParserCtxtPtr ctxt) {
225
3.90k
    xmlCtxtErrMemory(ctxt);
226
3.90k
}
227
228
/**
229
 * Handle a redefinition of attribute error
230
 *
231
 * @param ctxt  an XML parser context
232
 * @param prefix  the attribute prefix
233
 * @param localname  the attribute localname
234
 */
235
static void
236
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237
                   const xmlChar * localname)
238
44.4k
{
239
44.4k
    if (prefix == NULL)
240
39.6k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
39.6k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
242
39.6k
                   "Attribute %s redefined\n", localname);
243
4.87k
    else
244
4.87k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245
4.87k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
246
4.87k
                   "Attribute %s:%s redefined\n", prefix, localname);
247
44.4k
}
248
249
/**
250
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
251
 *
252
 * @param ctxt  an XML parser context
253
 * @param error  the error number
254
 * @param msg  the error message
255
 */
256
static void LIBXML_ATTR_FORMAT(3,0)
257
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
258
               const char *msg)
259
72.1M
{
260
72.1M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
261
72.1M
               NULL, NULL, NULL, 0, "%s", msg);
262
72.1M
}
263
264
/**
265
 * Handle a warning.
266
 *
267
 * @param ctxt  an XML parser context
268
 * @param error  the error number
269
 * @param msg  the error message
270
 * @param str1  extra data
271
 * @param str2  extra data
272
 */
273
void LIBXML_ATTR_FORMAT(3,0)
274
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
275
              const char *msg, const xmlChar *str1, const xmlChar *str2)
276
59.2k
{
277
59.2k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
278
59.2k
               str1, str2, NULL, 0, msg, str1, str2);
279
59.2k
}
280
281
#ifdef LIBXML_VALID_ENABLED
282
/**
283
 * Handle a validity error.
284
 *
285
 * @param ctxt  an XML parser context
286
 * @param error  the error number
287
 * @param msg  the error message
288
 * @param str1  extra data
289
 * @param str2  extra data
290
 */
291
static void LIBXML_ATTR_FORMAT(3,0)
292
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
293
              const char *msg, const xmlChar *str1, const xmlChar *str2)
294
92.3k
{
295
92.3k
    ctxt->valid = 0;
296
297
92.3k
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
298
92.3k
               str1, str2, NULL, 0, msg, str1, str2);
299
92.3k
}
300
#endif
301
302
/**
303
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
304
 *
305
 * @param ctxt  an XML parser context
306
 * @param error  the error number
307
 * @param msg  the error message
308
 * @param val  an integer value
309
 */
310
static void LIBXML_ATTR_FORMAT(3,0)
311
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
312
                  const char *msg, int val)
313
9.78M
{
314
9.78M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
315
9.78M
               NULL, NULL, NULL, val, msg, val);
316
9.78M
}
317
318
/**
319
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
320
 *
321
 * @param ctxt  an XML parser context
322
 * @param error  the error number
323
 * @param msg  the error message
324
 * @param str1  an string info
325
 * @param val  an integer value
326
 * @param str2  an string info
327
 */
328
static void LIBXML_ATTR_FORMAT(3,0)
329
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
330
                  const char *msg, const xmlChar *str1, int val,
331
      const xmlChar *str2)
332
432k
{
333
432k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
334
432k
               str1, str2, NULL, val, msg, str1, val, str2);
335
432k
}
336
337
/**
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 *
340
 * @param ctxt  an XML parser context
341
 * @param error  the error number
342
 * @param msg  the error message
343
 * @param val  a string value
344
 */
345
static void LIBXML_ATTR_FORMAT(3,0)
346
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
347
                  const char *msg, const xmlChar * val)
348
1.21M
{
349
1.21M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
350
1.21M
               val, NULL, NULL, 0, msg, val);
351
1.21M
}
352
353
/**
354
 * Handle a non fatal parser error
355
 *
356
 * @param ctxt  an XML parser context
357
 * @param error  the error number
358
 * @param msg  the error message
359
 * @param val  a string value
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
10.3k
{
365
10.3k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
10.3k
               val, NULL, NULL, 0, msg, val);
367
10.3k
}
368
369
/**
370
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
371
 *
372
 * @param ctxt  an XML parser context
373
 * @param error  the error number
374
 * @param msg  the message
375
 * @param info1  extra information string
376
 * @param info2  extra information string
377
 * @param info3  extra information string
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
171k
{
385
171k
    ctxt->nsWellFormed = 0;
386
387
171k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
171k
               info1, info2, info3, 0, msg, info1, info2, info3);
389
171k
}
390
391
/**
392
 * Handle a namespace warning error
393
 *
394
 * @param ctxt  an XML parser context
395
 * @param error  the error number
396
 * @param msg  the message
397
 * @param info1  extra information string
398
 * @param info2  extra information string
399
 * @param info3  extra information string
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
13.0k
{
407
13.0k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
13.0k
               info1, info2, info3, 0, msg, info1, info2, info3);
409
13.0k
}
410
411
/**
412
 * Check for non-linear entity expansion behaviour.
413
 *
414
 * In some cases like xmlExpandEntityInAttValue, this function is called
415
 * for each, possibly nested entity and its unexpanded content length.
416
 *
417
 * In other cases like #xmlParseReference, it's only called for each
418
 * top-level entity with its unexpanded content length plus the sum of
419
 * the unexpanded content lengths (plus fixed cost) of all nested
420
 * entities.
421
 *
422
 * Summing the unexpanded lengths also adds the length of the reference.
423
 * This is by design. Taking the length of the entity name into account
424
 * discourages attacks that try to waste CPU time with abusively long
425
 * entity names. See test/recurse/lol6.xml for example. Each call also
426
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
427
 * short entities.
428
 *
429
 * @param ctxt  parser context
430
 * @param extra  sum of unexpanded entity sizes
431
 * @returns 1 on error, 0 on success.
432
 */
433
static int
434
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
435
2.61M
{
436
2.61M
    unsigned long consumed;
437
2.61M
    unsigned long *expandedSize;
438
2.61M
    xmlParserInputPtr input = ctxt->input;
439
2.61M
    xmlEntityPtr entity = input->entity;
440
441
2.61M
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
442
55.0k
        return(0);
443
444
    /*
445
     * Compute total consumed bytes so far, including input streams of
446
     * external entities.
447
     */
448
2.55M
    consumed = input->consumed;
449
2.55M
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
450
2.55M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
451
452
2.55M
    if (entity)
453
165k
        expandedSize = &entity->expandedSize;
454
2.38M
    else
455
2.38M
        expandedSize = &ctxt->sizeentcopy;
456
457
    /*
458
     * Add extra cost and some fixed cost.
459
     */
460
2.55M
    xmlSaturatedAdd(expandedSize, extra);
461
2.55M
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
462
463
    /*
464
     * It's important to always use saturation arithmetic when tracking
465
     * entity sizes to make the size checks reliable. If "sizeentcopy"
466
     * overflows, we have to abort.
467
     */
468
2.55M
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
469
231k
        ((*expandedSize >= ULONG_MAX) ||
470
231k
         (*expandedSize / ctxt->maxAmpl > consumed))) {
471
1.22k
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
472
1.22k
                       "Maximum entity amplification factor exceeded, see "
473
1.22k
                       "xmlCtxtSetMaxAmplification.\n");
474
1.22k
        return(1);
475
1.22k
    }
476
477
2.55M
    return(0);
478
2.55M
}
479
480
/************************************************************************
481
 *                  *
482
 *    Library wide options          *
483
 *                  *
484
 ************************************************************************/
485
486
/**
487
 * Examines if the library has been compiled with a given feature.
488
 *
489
 * @param feature  the feature to be examined
490
 * @returns zero (0) if the feature does not exist or an unknown
491
 * feature is requested, non-zero otherwise.
492
 */
493
int
494
xmlHasFeature(xmlFeature feature)
495
0
{
496
0
    switch (feature) {
497
0
  case XML_WITH_THREAD:
498
0
#ifdef LIBXML_THREAD_ENABLED
499
0
      return(1);
500
#else
501
      return(0);
502
#endif
503
0
        case XML_WITH_TREE:
504
0
            return(1);
505
0
        case XML_WITH_OUTPUT:
506
0
#ifdef LIBXML_OUTPUT_ENABLED
507
0
            return(1);
508
#else
509
            return(0);
510
#endif
511
0
        case XML_WITH_PUSH:
512
0
#ifdef LIBXML_PUSH_ENABLED
513
0
            return(1);
514
#else
515
            return(0);
516
#endif
517
0
        case XML_WITH_READER:
518
0
#ifdef LIBXML_READER_ENABLED
519
0
            return(1);
520
#else
521
            return(0);
522
#endif
523
0
        case XML_WITH_PATTERN:
524
0
#ifdef LIBXML_PATTERN_ENABLED
525
0
            return(1);
526
#else
527
            return(0);
528
#endif
529
0
        case XML_WITH_WRITER:
530
0
#ifdef LIBXML_WRITER_ENABLED
531
0
            return(1);
532
#else
533
            return(0);
534
#endif
535
0
        case XML_WITH_SAX1:
536
0
#ifdef LIBXML_SAX1_ENABLED
537
0
            return(1);
538
#else
539
            return(0);
540
#endif
541
0
        case XML_WITH_HTTP:
542
0
            return(0);
543
0
        case XML_WITH_VALID:
544
0
#ifdef LIBXML_VALID_ENABLED
545
0
            return(1);
546
#else
547
            return(0);
548
#endif
549
0
        case XML_WITH_HTML:
550
0
#ifdef LIBXML_HTML_ENABLED
551
0
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_LEGACY:
556
0
            return(0);
557
0
        case XML_WITH_C14N:
558
0
#ifdef LIBXML_C14N_ENABLED
559
0
            return(1);
560
#else
561
            return(0);
562
#endif
563
0
        case XML_WITH_CATALOG:
564
0
#ifdef LIBXML_CATALOG_ENABLED
565
0
            return(1);
566
#else
567
            return(0);
568
#endif
569
0
        case XML_WITH_XPATH:
570
0
#ifdef LIBXML_XPATH_ENABLED
571
0
            return(1);
572
#else
573
            return(0);
574
#endif
575
0
        case XML_WITH_XPTR:
576
0
#ifdef LIBXML_XPTR_ENABLED
577
0
            return(1);
578
#else
579
            return(0);
580
#endif
581
0
        case XML_WITH_XINCLUDE:
582
0
#ifdef LIBXML_XINCLUDE_ENABLED
583
0
            return(1);
584
#else
585
            return(0);
586
#endif
587
0
        case XML_WITH_ICONV:
588
0
#ifdef LIBXML_ICONV_ENABLED
589
0
            return(1);
590
#else
591
            return(0);
592
#endif
593
0
        case XML_WITH_ISO8859X:
594
0
#ifdef LIBXML_ISO8859X_ENABLED
595
0
            return(1);
596
#else
597
            return(0);
598
#endif
599
0
        case XML_WITH_UNICODE:
600
0
            return(0);
601
0
        case XML_WITH_REGEXP:
602
0
#ifdef LIBXML_REGEXP_ENABLED
603
0
            return(1);
604
#else
605
            return(0);
606
#endif
607
0
        case XML_WITH_AUTOMATA:
608
0
#ifdef LIBXML_REGEXP_ENABLED
609
0
            return(1);
610
#else
611
            return(0);
612
#endif
613
0
        case XML_WITH_EXPR:
614
0
            return(0);
615
0
        case XML_WITH_RELAXNG:
616
0
#ifdef LIBXML_RELAXNG_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_SCHEMAS:
622
0
#ifdef LIBXML_SCHEMAS_ENABLED
623
0
            return(1);
624
#else
625
            return(0);
626
#endif
627
0
        case XML_WITH_SCHEMATRON:
628
#ifdef LIBXML_SCHEMATRON_ENABLED
629
            return(1);
630
#else
631
0
            return(0);
632
0
#endif
633
0
        case XML_WITH_MODULES:
634
0
#ifdef LIBXML_MODULES_ENABLED
635
0
            return(1);
636
#else
637
            return(0);
638
#endif
639
0
        case XML_WITH_DEBUG:
640
#ifdef LIBXML_DEBUG_ENABLED
641
            return(1);
642
#else
643
0
            return(0);
644
0
#endif
645
0
        case XML_WITH_DEBUG_MEM:
646
0
            return(0);
647
0
        case XML_WITH_ZLIB:
648
0
#ifdef LIBXML_ZLIB_ENABLED
649
0
            return(1);
650
#else
651
            return(0);
652
#endif
653
0
        case XML_WITH_LZMA:
654
0
            return(0);
655
0
        case XML_WITH_ICU:
656
#ifdef LIBXML_ICU_ENABLED
657
            return(1);
658
#else
659
0
            return(0);
660
0
#endif
661
0
        default:
662
0
      break;
663
0
     }
664
0
     return(0);
665
0
}
666
667
/************************************************************************
668
 *                  *
669
 *      Simple string buffer        *
670
 *                  *
671
 ************************************************************************/
672
673
typedef struct {
674
    xmlChar *mem;
675
    unsigned size;
676
    unsigned cap; /* size < cap */
677
    unsigned max; /* size <= max */
678
    xmlParserErrors code;
679
} xmlSBuf;
680
681
static void
682
500k
xmlSBufInit(xmlSBuf *buf, unsigned max) {
683
500k
    buf->mem = NULL;
684
500k
    buf->size = 0;
685
500k
    buf->cap = 0;
686
500k
    buf->max = max;
687
500k
    buf->code = XML_ERR_OK;
688
500k
}
689
690
static int
691
287k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
692
287k
    xmlChar *mem;
693
287k
    unsigned cap;
694
695
287k
    if (len >= UINT_MAX / 2 - buf->size) {
696
0
        if (buf->code == XML_ERR_OK)
697
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
698
0
        return(-1);
699
0
    }
700
701
287k
    cap = (buf->size + len) * 2;
702
287k
    if (cap < 240)
703
220k
        cap = 240;
704
705
287k
    mem = xmlRealloc(buf->mem, cap);
706
287k
    if (mem == NULL) {
707
645
        buf->code = XML_ERR_NO_MEMORY;
708
645
        return(-1);
709
645
    }
710
711
286k
    buf->mem = mem;
712
286k
    buf->cap = cap;
713
714
286k
    return(0);
715
287k
}
716
717
static void
718
310M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
719
310M
    if (buf->max - buf->size < len) {
720
1.94M
        if (buf->code == XML_ERR_OK)
721
379
            buf->code = XML_ERR_RESOURCE_LIMIT;
722
1.94M
        return;
723
1.94M
    }
724
725
308M
    if (buf->cap - buf->size <= len) {
726
274k
        if (xmlSBufGrow(buf, len) < 0)
727
583
            return;
728
274k
    }
729
730
308M
    if (len > 0)
731
308M
        memcpy(buf->mem + buf->size, str, len);
732
308M
    buf->size += len;
733
308M
}
734
735
static void
736
306M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
737
306M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
738
306M
}
739
740
static void
741
443k
xmlSBufAddChar(xmlSBuf *buf, int c) {
742
443k
    xmlChar *end;
743
744
443k
    if (buf->max - buf->size < 4) {
745
937
        if (buf->code == XML_ERR_OK)
746
6
            buf->code = XML_ERR_RESOURCE_LIMIT;
747
937
        return;
748
937
    }
749
750
442k
    if (buf->cap - buf->size <= 4) {
751
12.4k
        if (xmlSBufGrow(buf, 4) < 0)
752
62
            return;
753
12.4k
    }
754
755
442k
    end = buf->mem + buf->size;
756
757
442k
    if (c < 0x80) {
758
433k
        *end = (xmlChar) c;
759
433k
        buf->size += 1;
760
433k
    } else {
761
8.88k
        buf->size += xmlCopyCharMultiByte(end, c);
762
8.88k
    }
763
442k
}
764
765
static void
766
227M
xmlSBufAddReplChar(xmlSBuf *buf) {
767
227M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
768
227M
}
769
770
static void
771
1.08k
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
772
1.08k
    if (buf->code == XML_ERR_NO_MEMORY)
773
700
        xmlCtxtErrMemory(ctxt);
774
385
    else
775
385
        xmlFatalErr(ctxt, buf->code, errMsg);
776
1.08k
}
777
778
static xmlChar *
779
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
780
241k
              const char *errMsg) {
781
241k
    if (buf->mem == NULL) {
782
25.0k
        buf->mem = xmlMalloc(1);
783
25.0k
        if (buf->mem == NULL) {
784
55
            buf->code = XML_ERR_NO_MEMORY;
785
24.9k
        } else {
786
24.9k
            buf->mem[0] = 0;
787
24.9k
        }
788
216k
    } else {
789
216k
        buf->mem[buf->size] = 0;
790
216k
    }
791
792
241k
    if (buf->code == XML_ERR_OK) {
793
240k
        if (sizeOut != NULL)
794
49.8k
            *sizeOut = buf->size;
795
240k
        return(buf->mem);
796
240k
    }
797
798
604
    xmlSBufReportError(buf, ctxt, errMsg);
799
800
604
    xmlFree(buf->mem);
801
802
604
    if (sizeOut != NULL)
803
144
        *sizeOut = 0;
804
604
    return(NULL);
805
241k
}
806
807
static void
808
241k
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
809
241k
    if (buf->code != XML_ERR_OK)
810
481
        xmlSBufReportError(buf, ctxt, errMsg);
811
812
241k
    xmlFree(buf->mem);
813
241k
}
814
815
static int
816
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
817
732M
                    const char *errMsg) {
818
732M
    int c = str[0];
819
732M
    int c1 = str[1];
820
821
732M
    if ((c1 & 0xC0) != 0x80)
822
85.8M
        goto encoding_error;
823
824
647M
    if (c < 0xE0) {
825
        /* 2-byte sequence */
826
82.3M
        if (c < 0xC2)
827
70.3M
            goto encoding_error;
828
829
11.9M
        return(2);
830
564M
    } else {
831
564M
        int c2 = str[2];
832
833
564M
        if ((c2 & 0xC0) != 0x80)
834
10.2k
            goto encoding_error;
835
836
564M
        if (c < 0xF0) {
837
            /* 3-byte sequence */
838
564M
            if (c == 0xE0) {
839
                /* overlong */
840
6.43k
                if (c1 < 0xA0)
841
268
                    goto encoding_error;
842
564M
            } else if (c == 0xED) {
843
                /* surrogate */
844
5.89k
                if (c1 >= 0xA0)
845
270
                    goto encoding_error;
846
564M
            } else if (c == 0xEF) {
847
                /* U+FFFE and U+FFFF are invalid Chars */
848
65.7M
                if ((c1 == 0xBF) && (c2 >= 0xBE))
849
575
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
850
65.7M
            }
851
852
564M
            return(3);
853
564M
        } else {
854
            /* 4-byte sequence */
855
46.9k
            if ((str[3] & 0xC0) != 0x80)
856
2.58k
                goto encoding_error;
857
44.3k
            if (c == 0xF0) {
858
                /* overlong */
859
1.54k
                if (c1 < 0x90)
860
771
                    goto encoding_error;
861
42.8k
            } else if (c >= 0xF4) {
862
                /* greater than 0x10FFFF */
863
9.07k
                if ((c > 0xF4) || (c1 >= 0x90))
864
8.29k
                    goto encoding_error;
865
9.07k
            }
866
867
35.3k
            return(4);
868
44.3k
        }
869
564M
    }
870
871
156M
encoding_error:
872
    /* Only report the first error */
873
156M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
874
24.2k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
875
24.2k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
876
24.2k
    }
877
878
156M
    return(0);
879
647M
}
880
881
/************************************************************************
882
 *                  *
883
 *    SAX2 defaulted attributes handling      *
884
 *                  *
885
 ************************************************************************/
886
887
/**
888
 * Final initialization of the parser context before starting to parse.
889
 *
890
 * This accounts for users modifying struct members of parser context
891
 * directly.
892
 *
893
 * @param ctxt  an XML parser context
894
 */
895
static void
896
86.3k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
897
86.3k
    xmlSAXHandlerPtr sax;
898
899
    /* Avoid unused variable warning if features are disabled. */
900
86.3k
    (void) sax;
901
902
    /*
903
     * Changing the SAX struct directly is still widespread practice
904
     * in internal and external code.
905
     */
906
86.3k
    if (ctxt == NULL) return;
907
86.3k
    sax = ctxt->sax;
908
86.3k
#ifdef LIBXML_SAX1_ENABLED
909
    /*
910
     * Only enable SAX2 if there SAX2 element handlers, except when there
911
     * are no element handlers at all.
912
     */
913
86.3k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
914
53.1k
        (sax) &&
915
53.1k
        (sax->initialized == XML_SAX2_MAGIC) &&
916
53.1k
        ((sax->startElementNs != NULL) ||
917
0
         (sax->endElementNs != NULL) ||
918
0
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
919
53.1k
        ctxt->sax2 = 1;
920
#else
921
    ctxt->sax2 = 1;
922
#endif /* LIBXML_SAX1_ENABLED */
923
924
    /*
925
     * Some users replace the dictionary directly in the context struct.
926
     * We really need an API function to do that cleanly.
927
     */
928
86.3k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
929
86.3k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
930
86.3k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
931
86.3k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
932
85.5k
    (ctxt->str_xml_ns == NULL)) {
933
721
        xmlErrMemory(ctxt);
934
721
    }
935
936
86.3k
    xmlDictSetLimit(ctxt->dict,
937
86.3k
                    (ctxt->options & XML_PARSE_HUGE) ?
938
38.0k
                        0 :
939
86.3k
                        XML_MAX_DICTIONARY_LIMIT);
940
941
86.3k
#ifdef LIBXML_VALID_ENABLED
942
86.3k
    if (ctxt->validate)
943
57.7k
        ctxt->vctxt.flags |= XML_VCTXT_VALIDATE;
944
28.5k
    else
945
28.5k
        ctxt->vctxt.flags &= ~XML_VCTXT_VALIDATE;
946
86.3k
#endif /* LIBXML_VALID_ENABLED */
947
86.3k
}
948
949
typedef struct {
950
    xmlHashedString prefix;
951
    xmlHashedString name;
952
    xmlHashedString value;
953
    const xmlChar *valueEnd;
954
    int external;
955
    int expandedSize;
956
} xmlDefAttr;
957
958
typedef struct _xmlDefAttrs xmlDefAttrs;
959
typedef xmlDefAttrs *xmlDefAttrsPtr;
960
struct _xmlDefAttrs {
961
    int nbAttrs;  /* number of defaulted attributes on that element */
962
    int maxAttrs;       /* the size of the array */
963
#if __STDC_VERSION__ >= 199901L
964
    /* Using a C99 flexible array member avoids UBSan errors. */
965
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
966
#else
967
    xmlDefAttr attrs[1];
968
#endif
969
};
970
971
/**
972
 * Normalize the space in non CDATA attribute values:
973
 * If the attribute type is not CDATA, then the XML processor MUST further
974
 * process the normalized attribute value by discarding any leading and
975
 * trailing space (\#x20) characters, and by replacing sequences of space
976
 * (\#x20) characters by a single space (\#x20) character.
977
 * Note that the size of dst need to be at least src, and if one doesn't need
978
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
979
 * passing src as dst is just fine.
980
 *
981
 * @param src  the source string
982
 * @param dst  the target string
983
 * @returns a pointer to the normalized value (dst) or NULL if no conversion
984
 *         is needed.
985
 */
986
static xmlChar *
987
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
988
51.6k
{
989
51.6k
    if ((src == NULL) || (dst == NULL))
990
0
        return(NULL);
991
992
53.3k
    while (*src == 0x20) src++;
993
104M
    while (*src != 0) {
994
104M
  if (*src == 0x20) {
995
11.4M
      while (*src == 0x20) src++;
996
17.3k
      if (*src != 0)
997
16.9k
    *dst++ = 0x20;
998
104M
  } else {
999
104M
      *dst++ = *src++;
1000
104M
  }
1001
104M
    }
1002
51.6k
    *dst = 0;
1003
51.6k
    if (dst == src)
1004
50.1k
       return(NULL);
1005
1.50k
    return(dst);
1006
51.6k
}
1007
1008
/**
1009
 * Add a defaulted attribute for an element
1010
 *
1011
 * @param ctxt  an XML parser context
1012
 * @param fullname  the element fullname
1013
 * @param fullattr  the attribute fullname
1014
 * @param value  the attribute value
1015
 */
1016
static void
1017
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1018
               const xmlChar *fullname,
1019
               const xmlChar *fullattr,
1020
42.2k
               const xmlChar *value) {
1021
42.2k
    xmlDefAttrsPtr defaults;
1022
42.2k
    xmlDefAttr *attr;
1023
42.2k
    int len, expandedSize;
1024
42.2k
    xmlHashedString name;
1025
42.2k
    xmlHashedString prefix;
1026
42.2k
    xmlHashedString hvalue;
1027
42.2k
    const xmlChar *localname;
1028
1029
    /*
1030
     * Allows to detect attribute redefinitions
1031
     */
1032
42.2k
    if (ctxt->attsSpecial != NULL) {
1033
36.3k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1034
18.6k
      return;
1035
36.3k
    }
1036
1037
23.5k
    if (ctxt->attsDefault == NULL) {
1038
6.13k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1039
6.13k
  if (ctxt->attsDefault == NULL)
1040
24
      goto mem_error;
1041
6.13k
    }
1042
1043
    /*
1044
     * split the element name into prefix:localname , the string found
1045
     * are within the DTD and then not associated to namespace names.
1046
     */
1047
23.5k
    localname = xmlSplitQName3(fullname, &len);
1048
23.5k
    if (localname == NULL) {
1049
22.9k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1050
22.9k
  prefix.name = NULL;
1051
22.9k
    } else {
1052
541
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1053
541
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1054
541
        if (prefix.name == NULL)
1055
3
            goto mem_error;
1056
541
    }
1057
23.5k
    if (name.name == NULL)
1058
7
        goto mem_error;
1059
1060
    /*
1061
     * make sure there is some storage
1062
     */
1063
23.5k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1064
23.5k
    if ((defaults == NULL) ||
1065
16.3k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1066
8.37k
        xmlDefAttrsPtr temp;
1067
8.37k
        int newSize;
1068
1069
8.37k
        if (defaults == NULL) {
1070
7.18k
            newSize = 4;
1071
7.18k
        } else {
1072
1.18k
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1073
1.18k
                ((size_t) defaults->maxAttrs >
1074
1.18k
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1075
0
                goto mem_error;
1076
1077
1.18k
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1078
0
                newSize = XML_MAX_ATTRS;
1079
1.18k
            else
1080
1.18k
                newSize = defaults->maxAttrs * 2;
1081
1.18k
        }
1082
8.37k
        temp = xmlRealloc(defaults,
1083
8.37k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1084
8.37k
  if (temp == NULL)
1085
22
      goto mem_error;
1086
8.34k
        if (defaults == NULL)
1087
7.16k
            temp->nbAttrs = 0;
1088
8.34k
  temp->maxAttrs = newSize;
1089
8.34k
        defaults = temp;
1090
8.34k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1091
8.34k
                          defaults, NULL) < 0) {
1092
0
      xmlFree(defaults);
1093
0
      goto mem_error;
1094
0
  }
1095
8.34k
    }
1096
1097
    /*
1098
     * Split the attribute name into prefix:localname , the string found
1099
     * are within the DTD and hen not associated to namespace names.
1100
     */
1101
23.4k
    localname = xmlSplitQName3(fullattr, &len);
1102
23.4k
    if (localname == NULL) {
1103
19.1k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1104
19.1k
  prefix.name = NULL;
1105
19.1k
    } else {
1106
4.33k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1107
4.33k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1108
4.33k
        if (prefix.name == NULL)
1109
6
            goto mem_error;
1110
4.33k
    }
1111
23.4k
    if (name.name == NULL)
1112
6
        goto mem_error;
1113
1114
    /* intern the string and precompute the end */
1115
23.4k
    len = strlen((const char *) value);
1116
23.4k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1117
23.4k
    if (hvalue.name == NULL)
1118
9
        goto mem_error;
1119
1120
23.4k
    expandedSize = strlen((const char *) name.name);
1121
23.4k
    if (prefix.name != NULL)
1122
4.31k
        expandedSize += strlen((const char *) prefix.name);
1123
23.4k
    expandedSize += len;
1124
1125
23.4k
    attr = &defaults->attrs[defaults->nbAttrs++];
1126
23.4k
    attr->name = name;
1127
23.4k
    attr->prefix = prefix;
1128
23.4k
    attr->value = hvalue;
1129
23.4k
    attr->valueEnd = hvalue.name + len;
1130
23.4k
    attr->external = PARSER_EXTERNAL(ctxt);
1131
23.4k
    attr->expandedSize = expandedSize;
1132
1133
23.4k
    return;
1134
1135
77
mem_error:
1136
77
    xmlErrMemory(ctxt);
1137
77
}
1138
1139
/**
1140
 * Register this attribute type
1141
 *
1142
 * @param ctxt  an XML parser context
1143
 * @param fullname  the element fullname
1144
 * @param fullattr  the attribute fullname
1145
 * @param type  the attribute type
1146
 */
1147
static void
1148
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1149
      const xmlChar *fullname,
1150
      const xmlChar *fullattr,
1151
      int type)
1152
77.1k
{
1153
77.1k
    if (ctxt->attsSpecial == NULL) {
1154
8.84k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1155
8.84k
  if (ctxt->attsSpecial == NULL)
1156
38
      goto mem_error;
1157
8.84k
    }
1158
1159
77.0k
    if (PARSER_EXTERNAL(ctxt))
1160
38.5k
        type |= XML_SPECIAL_EXTERNAL;
1161
1162
77.0k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1163
77.0k
                    XML_INT_TO_PTR(type)) < 0)
1164
4
        goto mem_error;
1165
77.0k
    return;
1166
1167
77.0k
mem_error:
1168
42
    xmlErrMemory(ctxt);
1169
42
}
1170
1171
/**
1172
 * Removes CDATA attributes from the special attribute table
1173
 */
1174
static void
1175
xmlCleanSpecialAttrCallback(void *payload, void *data,
1176
                            const xmlChar *fullname, const xmlChar *fullattr,
1177
53.0k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1178
53.0k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1179
1180
53.0k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1181
3.72k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1182
3.72k
    }
1183
53.0k
}
1184
1185
/**
1186
 * Trim the list of attributes defined to remove all those of type
1187
 * CDATA as they are not special. This call should be done when finishing
1188
 * to parse the DTD and before starting to parse the document root.
1189
 *
1190
 * @param ctxt  an XML parser context
1191
 */
1192
static void
1193
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1194
46.9k
{
1195
46.9k
    if (ctxt->attsSpecial == NULL)
1196
38.1k
        return;
1197
1198
8.76k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1199
1200
8.76k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1201
500
        xmlHashFree(ctxt->attsSpecial, NULL);
1202
500
        ctxt->attsSpecial = NULL;
1203
500
    }
1204
8.76k
}
1205
1206
/**
1207
 * Checks that the value conforms to the LanguageID production:
1208
 *
1209
 * @deprecated Internal function, do not use.
1210
 *
1211
 * NOTE: this is somewhat deprecated, those productions were removed from
1212
 * the XML Second edition.
1213
 *
1214
 *     [33] LanguageID ::= Langcode ('-' Subcode)*
1215
 *     [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1216
 *     [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1217
 *     [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1218
 *     [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1219
 *     [38] Subcode ::= ([a-z] | [A-Z])+
1220
 *
1221
 * The current REC reference the successors of RFC 1766, currently 5646
1222
 *
1223
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1224
 *
1225
 *     langtag       = language
1226
 *                     ["-" script]
1227
 *                     ["-" region]
1228
 *                     *("-" variant)
1229
 *                     *("-" extension)
1230
 *                     ["-" privateuse]
1231
 *     language      = 2*3ALPHA            ; shortest ISO 639 code
1232
 *                     ["-" extlang]       ; sometimes followed by
1233
 *                                         ; extended language subtags
1234
 *                   / 4ALPHA              ; or reserved for future use
1235
 *                   / 5*8ALPHA            ; or registered language subtag
1236
 *
1237
 *     extlang       = 3ALPHA              ; selected ISO 639 codes
1238
 *                     *2("-" 3ALPHA)      ; permanently reserved
1239
 *
1240
 *     script        = 4ALPHA              ; ISO 15924 code
1241
 *
1242
 *     region        = 2ALPHA              ; ISO 3166-1 code
1243
 *                   / 3DIGIT              ; UN M.49 code
1244
 *
1245
 *     variant       = 5*8alphanum         ; registered variants
1246
 *                   / (DIGIT 3alphanum)
1247
 *
1248
 *     extension     = singleton 1*("-" (2*8alphanum))
1249
 *
1250
 *                                         ; Single alphanumerics
1251
 *                                         ; "x" reserved for private use
1252
 *     singleton     = DIGIT               ; 0 - 9
1253
 *                   / %x41-57             ; A - W
1254
 *                   / %x59-5A             ; Y - Z
1255
 *                   / %x61-77             ; a - w
1256
 *                   / %x79-7A             ; y - z
1257
 *
1258
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1259
 * The parser below doesn't try to cope with extension or privateuse
1260
 * that could be added but that's not interoperable anyway
1261
 *
1262
 * @param lang  pointer to the string value
1263
 * @returns 1 if correct 0 otherwise
1264
 **/
1265
int
1266
xmlCheckLanguageID(const xmlChar * lang)
1267
11.0k
{
1268
11.0k
    const xmlChar *cur = lang, *nxt;
1269
1270
11.0k
    if (cur == NULL)
1271
420
        return (0);
1272
10.6k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1273
10.4k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1274
10.2k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1275
9.98k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1276
        /*
1277
         * Still allow IANA code and user code which were coming
1278
         * from the previous version of the XML-1.0 specification
1279
         * it's deprecated but we should not fail
1280
         */
1281
864
        cur += 2;
1282
1.79k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1283
1.33k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1284
935
            cur++;
1285
864
        return(cur[0] == 0);
1286
864
    }
1287
9.77k
    nxt = cur;
1288
36.5k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1289
20.1k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1290
26.8k
           nxt++;
1291
9.77k
    if (nxt - cur >= 4) {
1292
        /*
1293
         * Reserved
1294
         */
1295
896
        if ((nxt - cur > 8) || (nxt[0] != 0))
1296
683
            return(0);
1297
213
        return(1);
1298
896
    }
1299
8.88k
    if (nxt - cur < 2)
1300
627
        return(0);
1301
    /* we got an ISO 639 code */
1302
8.25k
    if (nxt[0] == 0)
1303
230
        return(1);
1304
8.02k
    if (nxt[0] != '-')
1305
458
        return(0);
1306
1307
7.56k
    nxt++;
1308
7.56k
    cur = nxt;
1309
    /* now we can have extlang or script or region or variant */
1310
7.56k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1311
526
        goto region_m49;
1312
1313
32.7k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1314
21.2k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1315
25.7k
           nxt++;
1316
7.03k
    if (nxt - cur == 4)
1317
1.75k
        goto script;
1318
5.28k
    if (nxt - cur == 2)
1319
1.04k
        goto region;
1320
4.24k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1321
524
        goto variant;
1322
3.72k
    if (nxt - cur != 3)
1323
940
        return(0);
1324
    /* we parsed an extlang */
1325
2.78k
    if (nxt[0] == 0)
1326
83
        return(1);
1327
2.70k
    if (nxt[0] != '-')
1328
577
        return(0);
1329
1330
2.12k
    nxt++;
1331
2.12k
    cur = nxt;
1332
    /* now we can have script or region or variant */
1333
2.12k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1334
228
        goto region_m49;
1335
1336
10.8k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1337
4.59k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1338
8.99k
           nxt++;
1339
1.89k
    if (nxt - cur == 2)
1340
78
        goto region;
1341
1.81k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1342
248
        goto variant;
1343
1.56k
    if (nxt - cur != 4)
1344
995
        return(0);
1345
    /* we parsed a script */
1346
2.32k
script:
1347
2.32k
    if (nxt[0] == 0)
1348
240
        return(1);
1349
2.08k
    if (nxt[0] != '-')
1350
260
        return(0);
1351
1352
1.82k
    nxt++;
1353
1.82k
    cur = nxt;
1354
    /* now we can have region or variant */
1355
1.82k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1356
388
        goto region_m49;
1357
1358
6.81k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1359
5.04k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1360
5.38k
           nxt++;
1361
1362
1.43k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1363
225
        goto variant;
1364
1.21k
    if (nxt - cur != 2)
1365
862
        return(0);
1366
    /* we parsed a region */
1367
1.67k
region:
1368
1.67k
    if (nxt[0] == 0)
1369
249
        return(1);
1370
1.42k
    if (nxt[0] != '-')
1371
484
        return(0);
1372
1373
939
    nxt++;
1374
939
    cur = nxt;
1375
    /* now we can just have a variant */
1376
5.27k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1377
3.53k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1378
4.33k
           nxt++;
1379
1380
939
    if ((nxt - cur < 5) || (nxt - cur > 8))
1381
639
        return(0);
1382
1383
    /* we parsed a variant */
1384
1.29k
variant:
1385
1.29k
    if (nxt[0] == 0)
1386
421
        return(1);
1387
876
    if (nxt[0] != '-')
1388
641
        return(0);
1389
    /* extensions and private use subtags not checked */
1390
235
    return (1);
1391
1392
1.14k
region_m49:
1393
1.14k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1394
505
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1395
203
        nxt += 3;
1396
203
        goto region;
1397
203
    }
1398
939
    return(0);
1399
1.14k
}
1400
1401
/************************************************************************
1402
 *                  *
1403
 *    Parser stacks related functions and macros    *
1404
 *                  *
1405
 ************************************************************************/
1406
1407
static xmlChar *
1408
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1409
1410
/**
1411
 * Create a new namespace database.
1412
 *
1413
 * @returns the new obejct.
1414
 */
1415
xmlParserNsData *
1416
81.6k
xmlParserNsCreate(void) {
1417
81.6k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1418
1419
81.6k
    if (nsdb == NULL)
1420
9
        return(NULL);
1421
81.6k
    memset(nsdb, 0, sizeof(*nsdb));
1422
81.6k
    nsdb->defaultNsIndex = INT_MAX;
1423
1424
81.6k
    return(nsdb);
1425
81.6k
}
1426
1427
/**
1428
 * Free a namespace database.
1429
 *
1430
 * @param nsdb  namespace database
1431
 */
1432
void
1433
81.6k
xmlParserNsFree(xmlParserNsData *nsdb) {
1434
81.6k
    if (nsdb == NULL)
1435
0
        return;
1436
1437
81.6k
    xmlFree(nsdb->extra);
1438
81.6k
    xmlFree(nsdb->hash);
1439
81.6k
    xmlFree(nsdb);
1440
81.6k
}
1441
1442
/**
1443
 * Reset a namespace database.
1444
 *
1445
 * @param nsdb  namespace database
1446
 */
1447
static void
1448
69.6k
xmlParserNsReset(xmlParserNsData *nsdb) {
1449
69.6k
    if (nsdb == NULL)
1450
0
        return;
1451
1452
69.6k
    nsdb->hashElems = 0;
1453
69.6k
    nsdb->elementId = 0;
1454
69.6k
    nsdb->defaultNsIndex = INT_MAX;
1455
1456
69.6k
    if (nsdb->hash)
1457
861
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1458
69.6k
}
1459
1460
/**
1461
 * Signal that a new element has started.
1462
 *
1463
 * @param nsdb  namespace database
1464
 * @returns 0 on success, -1 if the element counter overflowed.
1465
 */
1466
static int
1467
1.10M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1468
1.10M
    if (nsdb->elementId == UINT_MAX)
1469
0
        return(-1);
1470
1.10M
    nsdb->elementId++;
1471
1472
1.10M
    return(0);
1473
1.10M
}
1474
1475
/**
1476
 * Lookup namespace with given prefix. If `bucketPtr` is non-NULL, it will
1477
 * be set to the matching bucket, or the first empty bucket if no match
1478
 * was found.
1479
 *
1480
 * @param ctxt  parser context
1481
 * @param prefix  namespace prefix
1482
 * @param bucketPtr  optional bucket (return value)
1483
 * @returns the namespace index on success, INT_MAX if no namespace was
1484
 * found.
1485
 */
1486
static int
1487
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1488
2.61M
                  xmlParserNsBucket **bucketPtr) {
1489
2.61M
    xmlParserNsBucket *bucket, *tombstone;
1490
2.61M
    unsigned index, hashValue;
1491
1492
2.61M
    if (prefix->name == NULL)
1493
925k
        return(ctxt->nsdb->defaultNsIndex);
1494
1495
1.68M
    if (ctxt->nsdb->hashSize == 0)
1496
67.9k
        return(INT_MAX);
1497
1498
1.62M
    hashValue = prefix->hashValue;
1499
1.62M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1500
1.62M
    bucket = &ctxt->nsdb->hash[index];
1501
1.62M
    tombstone = NULL;
1502
1503
2.04M
    while (bucket->hashValue) {
1504
1.74M
        if (bucket->index == INT_MAX) {
1505
279k
            if (tombstone == NULL)
1506
271k
                tombstone = bucket;
1507
1.46M
        } else if (bucket->hashValue == hashValue) {
1508
1.32M
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1509
1.32M
                if (bucketPtr != NULL)
1510
949k
                    *bucketPtr = bucket;
1511
1.32M
                return(bucket->index);
1512
1.32M
            }
1513
1.32M
        }
1514
1515
424k
        index++;
1516
424k
        bucket++;
1517
424k
        if (index == ctxt->nsdb->hashSize) {
1518
15.9k
            index = 0;
1519
15.9k
            bucket = ctxt->nsdb->hash;
1520
15.9k
        }
1521
424k
    }
1522
1523
299k
    if (bucketPtr != NULL)
1524
264k
        *bucketPtr = tombstone ? tombstone : bucket;
1525
299k
    return(INT_MAX);
1526
1.62M
}
1527
1528
/**
1529
 * Lookup namespace URI with given prefix.
1530
 *
1531
 * @param ctxt  parser context
1532
 * @param prefix  namespace prefix
1533
 * @returns the namespace URI on success, NULL if no namespace was found.
1534
 */
1535
static const xmlChar *
1536
846k
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1537
846k
    const xmlChar *ret;
1538
846k
    int nsIndex;
1539
1540
846k
    if (prefix->name == ctxt->str_xml)
1541
1.29k
        return(ctxt->str_xml_ns);
1542
1543
    /*
1544
     * minNsIndex is used when building an entity tree. We must
1545
     * ignore namespaces declared outside the entity.
1546
     */
1547
845k
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1548
845k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1549
615k
        return(NULL);
1550
1551
230k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1552
230k
    if (ret[0] == 0)
1553
2.73k
        ret = NULL;
1554
230k
    return(ret);
1555
845k
}
1556
1557
/**
1558
 * Lookup extra data for the given prefix. This returns data stored
1559
 * with xmlParserNsUdpateSax.
1560
 *
1561
 * @param ctxt  parser context
1562
 * @param prefix  namespace prefix
1563
 * @returns the data on success, NULL if no namespace was found.
1564
 */
1565
void *
1566
121k
xmlParserNsLookupSax(xmlParserCtxt *ctxt, const xmlChar *prefix) {
1567
121k
    xmlHashedString hprefix;
1568
121k
    int nsIndex;
1569
1570
121k
    if (prefix == ctxt->str_xml)
1571
12.8k
        return(NULL);
1572
1573
108k
    hprefix.name = prefix;
1574
108k
    if (prefix != NULL)
1575
19.3k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1576
89.4k
    else
1577
89.4k
        hprefix.hashValue = 0;
1578
108k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1579
108k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1580
0
        return(NULL);
1581
1582
108k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1583
108k
}
1584
1585
/**
1586
 * Sets or updates extra data for the given prefix. This value will be
1587
 * returned by xmlParserNsLookupSax as long as the namespace with the
1588
 * given prefix is in scope.
1589
 *
1590
 * @param ctxt  parser context
1591
 * @param prefix  namespace prefix
1592
 * @param saxData  extra data for SAX handler
1593
 * @returns the data on success, NULL if no namespace was found.
1594
 */
1595
int
1596
xmlParserNsUpdateSax(xmlParserCtxt *ctxt, const xmlChar *prefix,
1597
339k
                     void *saxData) {
1598
339k
    xmlHashedString hprefix;
1599
339k
    int nsIndex;
1600
1601
339k
    if (prefix == ctxt->str_xml)
1602
0
        return(-1);
1603
1604
339k
    hprefix.name = prefix;
1605
339k
    if (prefix != NULL)
1606
310k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1607
28.6k
    else
1608
28.6k
        hprefix.hashValue = 0;
1609
339k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1610
339k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1611
0
        return(-1);
1612
1613
339k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1614
339k
    return(0);
1615
339k
}
1616
1617
/**
1618
 * Grows the namespace tables.
1619
 *
1620
 * @param ctxt  parser context
1621
 * @returns 0 on success, -1 if a memory allocation failed.
1622
 */
1623
static int
1624
18.5k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1625
18.5k
    const xmlChar **table;
1626
18.5k
    xmlParserNsExtra *extra;
1627
18.5k
    int newSize;
1628
1629
18.5k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1630
18.5k
                              sizeof(table[0]) + sizeof(extra[0]),
1631
18.5k
                              16, XML_MAX_ITEMS);
1632
18.5k
    if (newSize < 0)
1633
0
        goto error;
1634
1635
18.5k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1636
18.5k
    if (table == NULL)
1637
57
        goto error;
1638
18.5k
    ctxt->nsTab = table;
1639
1640
18.5k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1641
18.5k
    if (extra == NULL)
1642
48
        goto error;
1643
18.4k
    ctxt->nsdb->extra = extra;
1644
1645
18.4k
    ctxt->nsMax = newSize;
1646
18.4k
    return(0);
1647
1648
105
error:
1649
105
    xmlErrMemory(ctxt);
1650
105
    return(-1);
1651
18.5k
}
1652
1653
/**
1654
 * Push a new namespace on the table.
1655
 *
1656
 * @param ctxt  parser context
1657
 * @param prefix  prefix with hash value
1658
 * @param uri  uri with hash value
1659
 * @param saxData  extra data for SAX handler
1660
 * @param defAttr  whether the namespace comes from a default attribute
1661
 * @returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1662
 * -1 if a memory allocation failed.
1663
 */
1664
static int
1665
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1666
680k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1667
680k
    xmlParserNsBucket *bucket = NULL;
1668
680k
    xmlParserNsExtra *extra;
1669
680k
    const xmlChar **ns;
1670
680k
    unsigned hashValue, nsIndex, oldIndex;
1671
1672
680k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1673
69
        return(0);
1674
1675
680k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1676
105
        xmlErrMemory(ctxt);
1677
105
        return(-1);
1678
105
    }
1679
1680
    /*
1681
     * Default namespace and 'xml' namespace
1682
     */
1683
680k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1684
50.5k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1685
1686
50.5k
        if (oldIndex != INT_MAX) {
1687
45.0k
            extra = &ctxt->nsdb->extra[oldIndex];
1688
1689
45.0k
            if (extra->elementId == ctxt->nsdb->elementId) {
1690
650
                if (defAttr == 0)
1691
444
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1692
650
                return(0);
1693
650
            }
1694
1695
44.3k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1696
9.14k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1697
4.56k
                return(0);
1698
44.3k
        }
1699
1700
45.3k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1701
45.3k
        goto populate_entry;
1702
50.5k
    }
1703
1704
    /*
1705
     * Hash table lookup
1706
     */
1707
629k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1708
629k
    if (oldIndex != INT_MAX) {
1709
361k
        extra = &ctxt->nsdb->extra[oldIndex];
1710
1711
        /*
1712
         * Check for duplicate definitions on the same element.
1713
         */
1714
361k
        if (extra->elementId == ctxt->nsdb->elementId) {
1715
1.02k
            if (defAttr == 0)
1716
817
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1717
1.02k
            return(0);
1718
1.02k
        }
1719
1720
360k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1721
27.6k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1722
25.2k
            return(0);
1723
1724
334k
        bucket->index = ctxt->nsNr;
1725
334k
        goto populate_entry;
1726
360k
    }
1727
1728
    /*
1729
     * Insert new bucket
1730
     */
1731
1732
268k
    hashValue = prefix->hashValue;
1733
1734
    /*
1735
     * Grow hash table, 50% fill factor
1736
     */
1737
268k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1738
5.53k
        xmlParserNsBucket *newHash;
1739
5.53k
        unsigned newSize, i, index;
1740
1741
5.53k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1742
0
            xmlErrMemory(ctxt);
1743
0
            return(-1);
1744
0
        }
1745
5.53k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1746
5.53k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1747
5.53k
        if (newHash == NULL) {
1748
11
            xmlErrMemory(ctxt);
1749
11
            return(-1);
1750
11
        }
1751
5.52k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1752
1753
869k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1754
864k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1755
864k
            unsigned newIndex;
1756
1757
864k
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1758
861k
                continue;
1759
2.44k
            newIndex = hv & (newSize - 1);
1760
1761
3.57k
            while (newHash[newIndex].hashValue != 0) {
1762
1.13k
                newIndex++;
1763
1.13k
                if (newIndex == newSize)
1764
386
                    newIndex = 0;
1765
1.13k
            }
1766
1767
2.44k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1768
2.44k
        }
1769
1770
5.52k
        xmlFree(ctxt->nsdb->hash);
1771
5.52k
        ctxt->nsdb->hash = newHash;
1772
5.52k
        ctxt->nsdb->hashSize = newSize;
1773
1774
        /*
1775
         * Relookup
1776
         */
1777
5.52k
        index = hashValue & (newSize - 1);
1778
1779
6.02k
        while (newHash[index].hashValue != 0) {
1780
499
            index++;
1781
499
            if (index == newSize)
1782
189
                index = 0;
1783
499
        }
1784
1785
5.52k
        bucket = &newHash[index];
1786
5.52k
    }
1787
1788
268k
    bucket->hashValue = hashValue;
1789
268k
    bucket->index = ctxt->nsNr;
1790
268k
    ctxt->nsdb->hashElems++;
1791
268k
    oldIndex = INT_MAX;
1792
1793
648k
populate_entry:
1794
648k
    nsIndex = ctxt->nsNr;
1795
1796
648k
    ns = &ctxt->nsTab[nsIndex * 2];
1797
648k
    ns[0] = prefix ? prefix->name : NULL;
1798
648k
    ns[1] = uri->name;
1799
1800
648k
    extra = &ctxt->nsdb->extra[nsIndex];
1801
648k
    extra->saxData = saxData;
1802
648k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1803
648k
    extra->uriHashValue = uri->hashValue;
1804
648k
    extra->elementId = ctxt->nsdb->elementId;
1805
648k
    extra->oldIndex = oldIndex;
1806
1807
648k
    ctxt->nsNr++;
1808
1809
648k
    return(1);
1810
268k
}
1811
1812
/**
1813
 * Pops the top `nr` namespaces and restores the hash table.
1814
 *
1815
 * @param ctxt  an XML parser context
1816
 * @param nr  the number to pop
1817
 * @returns the number of namespaces popped.
1818
 */
1819
static int
1820
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1821
344k
{
1822
344k
    int i;
1823
1824
    /* assert(nr <= ctxt->nsNr); */
1825
1826
972k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1827
628k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1828
628k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1829
1830
628k
        if (prefix == NULL) {
1831
40.1k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1832
588k
        } else {
1833
588k
            xmlHashedString hprefix;
1834
588k
            xmlParserNsBucket *bucket = NULL;
1835
1836
588k
            hprefix.name = prefix;
1837
588k
            hprefix.hashValue = extra->prefixHashValue;
1838
588k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1839
            /* assert(bucket && bucket->hashValue); */
1840
588k
            bucket->index = extra->oldIndex;
1841
588k
        }
1842
628k
    }
1843
1844
344k
    ctxt->nsNr -= nr;
1845
344k
    return(nr);
1846
344k
}
1847
1848
static int
1849
14.9k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1850
14.9k
    const xmlChar **atts;
1851
14.9k
    unsigned *attallocs;
1852
14.9k
    int newSize;
1853
1854
14.9k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1855
14.9k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1856
14.9k
                              10, XML_MAX_ATTRS);
1857
14.9k
    if (newSize < 0) {
1858
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1859
0
                    "Maximum number of attributes exceeded");
1860
0
        return(-1);
1861
0
    }
1862
1863
14.9k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1864
14.9k
    if (atts == NULL)
1865
48
        goto mem_error;
1866
14.8k
    ctxt->atts = atts;
1867
1868
14.8k
    attallocs = xmlRealloc(ctxt->attallocs,
1869
14.8k
                           newSize * sizeof(attallocs[0]));
1870
14.8k
    if (attallocs == NULL)
1871
46
        goto mem_error;
1872
14.8k
    ctxt->attallocs = attallocs;
1873
1874
14.8k
    ctxt->maxatts = newSize * 5;
1875
1876
14.8k
    return(0);
1877
1878
94
mem_error:
1879
94
    xmlErrMemory(ctxt);
1880
94
    return(-1);
1881
14.8k
}
1882
1883
/**
1884
 * Pushes a new parser input on top of the input stack
1885
 *
1886
 * @param ctxt  an XML parser context
1887
 * @param value  the parser input
1888
 * @returns -1 in case of error, the index in the stack otherwise
1889
 */
1890
int
1891
xmlCtxtPushInput(xmlParserCtxt *ctxt, xmlParserInput *value)
1892
220k
{
1893
220k
    char *directory = NULL;
1894
220k
    int maxDepth;
1895
1896
220k
    if ((ctxt == NULL) || (value == NULL))
1897
5.54k
        return(-1);
1898
1899
214k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1900
1901
214k
    if (ctxt->inputNr >= ctxt->inputMax) {
1902
13.7k
        xmlParserInputPtr *tmp;
1903
13.7k
        int newSize;
1904
1905
13.7k
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1906
13.7k
                                  5, maxDepth);
1907
13.7k
        if (newSize < 0) {
1908
6
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1909
6
                           "Maximum entity nesting depth exceeded");
1910
6
            return(-1);
1911
6
        }
1912
13.7k
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1913
13.7k
        if (tmp == NULL) {
1914
74
            xmlErrMemory(ctxt);
1915
74
            return(-1);
1916
74
        }
1917
13.6k
        ctxt->inputTab = tmp;
1918
13.6k
        ctxt->inputMax = newSize;
1919
13.6k
    }
1920
1921
214k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1922
109k
        directory = xmlParserGetDirectory(value->filename);
1923
109k
        if (directory == NULL) {
1924
50
            xmlErrMemory(ctxt);
1925
50
            return(-1);
1926
50
        }
1927
109k
    }
1928
1929
214k
    if (ctxt->input_id >= INT_MAX) {
1930
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1931
0
        return(-1);
1932
0
    }
1933
1934
214k
    ctxt->inputTab[ctxt->inputNr] = value;
1935
214k
    ctxt->input = value;
1936
1937
214k
    if (ctxt->inputNr == 0) {
1938
108k
        xmlFree(ctxt->directory);
1939
108k
        ctxt->directory = directory;
1940
108k
    }
1941
1942
    /*
1943
     * The input ID is unused internally, but there are entity
1944
     * loaders in downstream code that detect the main document
1945
     * by checking for "input_id == 1".
1946
     */
1947
214k
    value->id = ctxt->input_id++;
1948
1949
214k
    return(ctxt->inputNr++);
1950
214k
}
1951
1952
/**
1953
 * Pops the top parser input from the input stack
1954
 *
1955
 * @param ctxt  an XML parser context
1956
 * @returns the input just removed
1957
 */
1958
xmlParserInput *
1959
xmlCtxtPopInput(xmlParserCtxt *ctxt)
1960
444k
{
1961
444k
    xmlParserInputPtr ret;
1962
1963
444k
    if (ctxt == NULL)
1964
0
        return(NULL);
1965
444k
    if (ctxt->inputNr <= 0)
1966
233k
        return (NULL);
1967
211k
    ctxt->inputNr--;
1968
211k
    if (ctxt->inputNr > 0)
1969
105k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1970
105k
    else
1971
105k
        ctxt->input = NULL;
1972
211k
    ret = ctxt->inputTab[ctxt->inputNr];
1973
211k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1974
211k
    return (ret);
1975
444k
}
1976
1977
/**
1978
 * Pushes a new element node on top of the node stack
1979
 *
1980
 * @deprecated Internal function, do not use.
1981
 *
1982
 * @param ctxt  an XML parser context
1983
 * @param value  the element node
1984
 * @returns -1 in case of error, the index in the stack otherwise
1985
 */
1986
int
1987
nodePush(xmlParserCtxt *ctxt, xmlNode *value)
1988
767k
{
1989
767k
    if (ctxt == NULL)
1990
0
        return(0);
1991
1992
767k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1993
67.5k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
1994
67.5k
        xmlNodePtr *tmp;
1995
67.5k
        int newSize;
1996
1997
67.5k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
1998
67.5k
                                  10, maxDepth);
1999
67.5k
        if (newSize < 0) {
2000
26
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2001
26
                    "Excessive depth in document: %d,"
2002
26
                    " use XML_PARSE_HUGE option\n",
2003
26
                    ctxt->nodeNr);
2004
26
            return(-1);
2005
26
        }
2006
2007
67.5k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2008
67.5k
        if (tmp == NULL) {
2009
107
            xmlErrMemory(ctxt);
2010
107
            return (-1);
2011
107
        }
2012
67.4k
        ctxt->nodeTab = tmp;
2013
67.4k
  ctxt->nodeMax = newSize;
2014
67.4k
    }
2015
2016
767k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2017
767k
    ctxt->node = value;
2018
767k
    return (ctxt->nodeNr++);
2019
767k
}
2020
2021
/**
2022
 * Pops the top element node from the node stack
2023
 *
2024
 * @deprecated Internal function, do not use.
2025
 *
2026
 * @param ctxt  an XML parser context
2027
 * @returns the node just removed
2028
 */
2029
xmlNode *
2030
nodePop(xmlParserCtxt *ctxt)
2031
895k
{
2032
895k
    xmlNodePtr ret;
2033
2034
895k
    if (ctxt == NULL) return(NULL);
2035
895k
    if (ctxt->nodeNr <= 0)
2036
212k
        return (NULL);
2037
683k
    ctxt->nodeNr--;
2038
683k
    if (ctxt->nodeNr > 0)
2039
667k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2040
15.3k
    else
2041
15.3k
        ctxt->node = NULL;
2042
683k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2043
683k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2044
683k
    return (ret);
2045
895k
}
2046
2047
/**
2048
 * Pushes a new element name/prefix/URL on top of the name stack
2049
 *
2050
 * @param ctxt  an XML parser context
2051
 * @param value  the element name
2052
 * @param prefix  the element prefix
2053
 * @param URI  the element namespace name
2054
 * @param line  the current line number for error messages
2055
 * @param nsNr  the number of namespaces pushed on the namespace table
2056
 * @returns -1 in case of error, the index in the stack otherwise
2057
 */
2058
static int
2059
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2060
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2061
1.02M
{
2062
1.02M
    xmlStartTag *tag;
2063
2064
1.02M
    if (ctxt->nameNr >= ctxt->nameMax) {
2065
71.6k
        const xmlChar **tmp;
2066
71.6k
        xmlStartTag *tmp2;
2067
71.6k
        int newSize;
2068
2069
71.6k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2070
71.6k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2071
71.6k
                                  10, XML_MAX_ITEMS);
2072
71.6k
        if (newSize < 0)
2073
0
            goto mem_error;
2074
2075
71.6k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2076
71.6k
        if (tmp == NULL)
2077
110
      goto mem_error;
2078
71.4k
  ctxt->nameTab = tmp;
2079
2080
71.4k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2081
71.4k
        if (tmp2 == NULL)
2082
79
      goto mem_error;
2083
71.4k
  ctxt->pushTab = tmp2;
2084
2085
71.4k
        ctxt->nameMax = newSize;
2086
951k
    } else if (ctxt->pushTab == NULL) {
2087
43.0k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2088
43.0k
        if (ctxt->pushTab == NULL)
2089
315
            goto mem_error;
2090
43.0k
    }
2091
1.02M
    ctxt->nameTab[ctxt->nameNr] = value;
2092
1.02M
    ctxt->name = value;
2093
1.02M
    tag = &ctxt->pushTab[ctxt->nameNr];
2094
1.02M
    tag->prefix = prefix;
2095
1.02M
    tag->URI = URI;
2096
1.02M
    tag->line = line;
2097
1.02M
    tag->nsNr = nsNr;
2098
1.02M
    return (ctxt->nameNr++);
2099
504
mem_error:
2100
504
    xmlErrMemory(ctxt);
2101
504
    return (-1);
2102
1.02M
}
2103
#ifdef LIBXML_PUSH_ENABLED
2104
/**
2105
 * Pops the top element/prefix/URI name from the name stack
2106
 *
2107
 * @param ctxt  an XML parser context
2108
 * @returns the name just removed
2109
 */
2110
static const xmlChar *
2111
nameNsPop(xmlParserCtxtPtr ctxt)
2112
16.8k
{
2113
16.8k
    const xmlChar *ret;
2114
2115
16.8k
    if (ctxt->nameNr <= 0)
2116
0
        return (NULL);
2117
16.8k
    ctxt->nameNr--;
2118
16.8k
    if (ctxt->nameNr > 0)
2119
16.5k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2120
309
    else
2121
309
        ctxt->name = NULL;
2122
16.8k
    ret = ctxt->nameTab[ctxt->nameNr];
2123
16.8k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2124
16.8k
    return (ret);
2125
16.8k
}
2126
#endif /* LIBXML_PUSH_ENABLED */
2127
2128
/**
2129
 * Pops the top element name from the name stack
2130
 *
2131
 * @deprecated Internal function, do not use.
2132
 *
2133
 * @param ctxt  an XML parser context
2134
 * @returns the name just removed
2135
 */
2136
static const xmlChar *
2137
namePop(xmlParserCtxtPtr ctxt)
2138
919k
{
2139
919k
    const xmlChar *ret;
2140
2141
919k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2142
117
        return (NULL);
2143
919k
    ctxt->nameNr--;
2144
919k
    if (ctxt->nameNr > 0)
2145
904k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2146
15.0k
    else
2147
15.0k
        ctxt->name = NULL;
2148
919k
    ret = ctxt->nameTab[ctxt->nameNr];
2149
919k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2150
919k
    return (ret);
2151
919k
}
2152
2153
1.35M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2154
1.35M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2155
90.3k
        int *tmp;
2156
90.3k
        int newSize;
2157
2158
90.3k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2159
90.3k
                                  10, XML_MAX_ITEMS);
2160
90.3k
        if (newSize < 0) {
2161
0
      xmlErrMemory(ctxt);
2162
0
      return(-1);
2163
0
        }
2164
2165
90.3k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2166
90.3k
        if (tmp == NULL) {
2167
194
      xmlErrMemory(ctxt);
2168
194
      return(-1);
2169
194
  }
2170
90.1k
  ctxt->spaceTab = tmp;
2171
2172
90.1k
        ctxt->spaceMax = newSize;
2173
90.1k
    }
2174
1.35M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2175
1.35M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2176
1.35M
    return(ctxt->spaceNr++);
2177
1.35M
}
2178
2179
1.26M
static int spacePop(xmlParserCtxtPtr ctxt) {
2180
1.26M
    int ret;
2181
1.26M
    if (ctxt->spaceNr <= 0) return(0);
2182
1.26M
    ctxt->spaceNr--;
2183
1.26M
    if (ctxt->spaceNr > 0)
2184
1.25M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2185
15.2k
    else
2186
15.2k
        ctxt->space = &ctxt->spaceTab[0];
2187
1.26M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2188
1.26M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2189
1.26M
    return(ret);
2190
1.26M
}
2191
2192
/*
2193
 * Macros for accessing the content. Those should be used only by the parser,
2194
 * and not exported.
2195
 *
2196
 * Dirty macros, i.e. one often need to make assumption on the context to
2197
 * use them
2198
 *
2199
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2200
 *           To be used with extreme caution since operations consuming
2201
 *           characters may move the input buffer to a different location !
2202
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2203
 *           This should be used internally by the parser
2204
 *           only to compare to ASCII values otherwise it would break when
2205
 *           running with UTF-8 encoding.
2206
 *   RAW     same as CUR but in the input buffer, bypass any token
2207
 *           extraction that may have been done
2208
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2209
 *           to compare on ASCII based substring.
2210
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2211
 *           strings without newlines within the parser.
2212
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2213
 *           defined char within the parser.
2214
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2215
 *
2216
 *   NEXT    Skip to the next character, this does the proper decoding
2217
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2218
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2219
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2220
 *            the index
2221
 *   GROW, SHRINK  handling of input buffers
2222
 */
2223
2224
22.1M
#define RAW (*ctxt->input->cur)
2225
1.73G
#define CUR (*ctxt->input->cur)
2226
10.0M
#define NXT(val) ctxt->input->cur[(val)]
2227
3.40G
#define CUR_PTR ctxt->input->cur
2228
2.72M
#define BASE_PTR ctxt->input->base
2229
2230
#define CMP4( s, c1, c2, c3, c4 ) \
2231
27.8M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2232
14.0M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2233
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2234
27.1M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2235
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2236
26.3M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2237
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2238
25.6M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2239
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2240
25.1M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2241
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2242
12.4M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2243
12.4M
    ((unsigned char *) s)[ 8 ] == c9 )
2244
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2245
4.12k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2246
4.12k
    ((unsigned char *) s)[ 9 ] == c10 )
2247
2248
2.50M
#define SKIP(val) do {             \
2249
2.50M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2250
2.50M
    if (*ctxt->input->cur == 0)           \
2251
2.50M
        xmlParserGrow(ctxt);           \
2252
2.50M
  } while (0)
2253
2254
#define SKIPL(val) do {             \
2255
    int skipl;                \
2256
    for(skipl=0; skipl<val; skipl++) {          \
2257
  if (*(ctxt->input->cur) == '\n') {        \
2258
  ctxt->input->line++; ctxt->input->col = 1;      \
2259
  } else ctxt->input->col++;          \
2260
  ctxt->input->cur++;           \
2261
    }                 \
2262
    if (*ctxt->input->cur == 0)           \
2263
        xmlParserGrow(ctxt);            \
2264
  } while (0)
2265
2266
#define SHRINK \
2267
25.8M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2268
25.8M
  xmlParserShrink(ctxt);
2269
2270
#define GROW \
2271
48.5M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2272
48.5M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2273
3.82M
  xmlParserGrow(ctxt);
2274
2275
5.69M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2276
2277
1.01M
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2278
2279
176M
#define NEXT xmlNextChar(ctxt)
2280
2281
1.85M
#define NEXT1 {               \
2282
1.85M
  ctxt->input->col++;           \
2283
1.85M
  ctxt->input->cur++;           \
2284
1.85M
  if (*ctxt->input->cur == 0)         \
2285
1.85M
      xmlParserGrow(ctxt);           \
2286
1.85M
    }
2287
2288
1.85G
#define NEXTL(l) do {             \
2289
1.85G
    if (*(ctxt->input->cur) == '\n') {         \
2290
37.1M
  ctxt->input->line++; ctxt->input->col = 1;      \
2291
1.81G
    } else ctxt->input->col++;           \
2292
1.85G
    ctxt->input->cur += l;        \
2293
1.85G
  } while (0)
2294
2295
#define COPY_BUF(b, i, v)           \
2296
210M
    if (v < 0x80) b[i++] = v;           \
2297
210M
    else i += xmlCopyCharMultiByte(&b[i],v)
2298
2299
static int
2300
207M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2301
207M
    int c = xmlCurrentChar(ctxt, len);
2302
2303
207M
    if (c == XML_INVALID_CHAR)
2304
31.6M
        c = 0xFFFD; /* replacement character */
2305
2306
207M
    return(c);
2307
207M
}
2308
2309
/**
2310
 * Skip whitespace in the input stream.
2311
 *
2312
 * @deprecated Internal function, do not use.
2313
 *
2314
 * @param ctxt  the XML parser context
2315
 * @returns the number of space chars skipped
2316
 */
2317
int
2318
6.03M
xmlSkipBlankChars(xmlParserCtxt *ctxt) {
2319
6.03M
    const xmlChar *cur;
2320
6.03M
    int res = 0;
2321
2322
6.03M
    cur = ctxt->input->cur;
2323
6.03M
    while (IS_BLANK_CH(*cur)) {
2324
2.16M
        if (*cur == '\n') {
2325
732k
            ctxt->input->line++; ctxt->input->col = 1;
2326
1.42M
        } else {
2327
1.42M
            ctxt->input->col++;
2328
1.42M
        }
2329
2.16M
        cur++;
2330
2.16M
        if (res < INT_MAX)
2331
2.16M
            res++;
2332
2.16M
        if (*cur == 0) {
2333
13.0k
            ctxt->input->cur = cur;
2334
13.0k
            xmlParserGrow(ctxt);
2335
13.0k
            cur = ctxt->input->cur;
2336
13.0k
        }
2337
2.16M
    }
2338
6.03M
    ctxt->input->cur = cur;
2339
2340
6.03M
    if (res > 4)
2341
21.2k
        GROW;
2342
2343
6.03M
    return(res);
2344
6.03M
}
2345
2346
static void
2347
86.3k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2348
86.3k
    unsigned long consumed;
2349
86.3k
    xmlEntityPtr ent;
2350
2351
86.3k
    ent = ctxt->input->entity;
2352
2353
86.3k
    ent->flags &= ~XML_ENT_EXPANDING;
2354
2355
86.3k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2356
9.80k
        int result;
2357
2358
        /*
2359
         * Read the rest of the stream in case of errors. We want
2360
         * to account for the whole entity size.
2361
         */
2362
14.5k
        do {
2363
14.5k
            ctxt->input->cur = ctxt->input->end;
2364
14.5k
            xmlParserShrink(ctxt);
2365
14.5k
            result = xmlParserGrow(ctxt);
2366
14.5k
        } while (result > 0);
2367
2368
9.80k
        consumed = ctxt->input->consumed;
2369
9.80k
        xmlSaturatedAddSizeT(&consumed,
2370
9.80k
                             ctxt->input->end - ctxt->input->base);
2371
2372
9.80k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2373
2374
        /*
2375
         * Add to sizeentities when parsing an external entity
2376
         * for the first time.
2377
         */
2378
9.80k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2379
5.21k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2380
5.21k
        }
2381
2382
9.80k
        ent->flags |= XML_ENT_CHECKED;
2383
9.80k
    }
2384
2385
86.3k
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2386
2387
86.3k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2388
2389
86.3k
    GROW;
2390
86.3k
}
2391
2392
/**
2393
 * Skip whitespace in the input stream, also handling parameter
2394
 * entities.
2395
 *
2396
 * @param ctxt  the XML parser context
2397
 * @returns the number of space chars skipped
2398
 */
2399
static int
2400
1.01M
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2401
1.01M
    int res = 0;
2402
1.01M
    int inParam;
2403
1.01M
    int expandParam;
2404
2405
1.01M
    inParam = PARSER_IN_PE(ctxt);
2406
1.01M
    expandParam = PARSER_EXTERNAL(ctxt);
2407
2408
1.01M
    if (!inParam && !expandParam)
2409
347k
        return(xmlSkipBlankChars(ctxt));
2410
2411
    /*
2412
     * It's Okay to use CUR/NEXT here since all the blanks are on
2413
     * the ASCII range.
2414
     */
2415
1.78M
    while (PARSER_STOPPED(ctxt) == 0) {
2416
1.78M
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2417
1.08M
            NEXT;
2418
1.08M
        } else if (CUR == '%') {
2419
44.1k
            if ((expandParam == 0) ||
2420
43.8k
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2421
26.6k
                break;
2422
2423
            /*
2424
             * Expand parameter entity. We continue to consume
2425
             * whitespace at the start of the entity and possible
2426
             * even consume the whole entity and pop it. We might
2427
             * even pop multiple PEs in this loop.
2428
             */
2429
17.4k
            xmlParsePERefInternal(ctxt, 0);
2430
2431
17.4k
            inParam = PARSER_IN_PE(ctxt);
2432
17.4k
            expandParam = PARSER_EXTERNAL(ctxt);
2433
656k
        } else if (CUR == 0) {
2434
38.3k
            if (inParam == 0)
2435
315
                break;
2436
2437
            /*
2438
             * Don't pop parameter entities that start a markup
2439
             * declaration to detect Well-formedness constraint:
2440
             * PE Between Declarations.
2441
             */
2442
38.0k
            if (ctxt->input->flags & XML_INPUT_MARKUP_DECL)
2443
25.6k
                break;
2444
2445
12.3k
            xmlPopPE(ctxt);
2446
2447
12.3k
            inParam = PARSER_IN_PE(ctxt);
2448
12.3k
            expandParam = PARSER_EXTERNAL(ctxt);
2449
617k
        } else {
2450
617k
            break;
2451
617k
        }
2452
2453
        /*
2454
         * Also increase the counter when entering or exiting a PERef.
2455
         * The spec says: "When a parameter-entity reference is recognized
2456
         * in the DTD and included, its replacement text MUST be enlarged
2457
         * by the attachment of one leading and one following space (#x20)
2458
         * character."
2459
         */
2460
1.11M
        if (res < INT_MAX)
2461
1.11M
            res++;
2462
1.11M
    }
2463
2464
671k
    return(res);
2465
1.01M
}
2466
2467
/************************************************************************
2468
 *                  *
2469
 *    Commodity functions to handle entities      *
2470
 *                  *
2471
 ************************************************************************/
2472
2473
/**
2474
 * @deprecated Internal function, don't use.
2475
 *
2476
 * @param ctxt  an XML parser context
2477
 * @returns the current xmlChar in the parser context
2478
 */
2479
xmlChar
2480
0
xmlPopInput(xmlParserCtxt *ctxt) {
2481
0
    xmlParserInputPtr input;
2482
2483
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2484
0
    input = xmlCtxtPopInput(ctxt);
2485
0
    xmlFreeInputStream(input);
2486
0
    if (*ctxt->input->cur == 0)
2487
0
        xmlParserGrow(ctxt);
2488
0
    return(CUR);
2489
0
}
2490
2491
/**
2492
 * Push an input stream onto the stack.
2493
 *
2494
 * @deprecated Internal function, don't use.
2495
 *
2496
 * @param ctxt  an XML parser context
2497
 * @param input  an XML parser input fragment (entity, XML fragment ...).
2498
 * @returns -1 in case of error or the index in the input stack
2499
 */
2500
int
2501
0
xmlPushInput(xmlParserCtxt *ctxt, xmlParserInput *input) {
2502
0
    int ret;
2503
2504
0
    if ((ctxt == NULL) || (input == NULL))
2505
0
        return(-1);
2506
2507
0
    ret = xmlCtxtPushInput(ctxt, input);
2508
0
    if (ret >= 0)
2509
0
        GROW;
2510
0
    return(ret);
2511
0
}
2512
2513
/**
2514
 * Parse a numeric character reference. Always consumes '&'.
2515
 *
2516
 * @deprecated Internal function, don't use.
2517
 *
2518
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2519
 *                      '&#x' [0-9a-fA-F]+ ';'
2520
 *
2521
 * [ WFC: Legal Character ]
2522
 * Characters referred to using character references must match the
2523
 * production for Char.
2524
 *
2525
 * @param ctxt  an XML parser context
2526
 * @returns the value parsed (as an int), 0 in case of error
2527
 */
2528
int
2529
296k
xmlParseCharRef(xmlParserCtxt *ctxt) {
2530
296k
    int val = 0;
2531
296k
    int count = 0;
2532
2533
    /*
2534
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2535
     */
2536
296k
    if ((RAW == '&') && (NXT(1) == '#') &&
2537
296k
        (NXT(2) == 'x')) {
2538
185k
  SKIP(3);
2539
185k
  GROW;
2540
504k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2541
358k
      if (count++ > 20) {
2542
9.49k
    count = 0;
2543
9.49k
    GROW;
2544
9.49k
      }
2545
358k
      if ((RAW >= '0') && (RAW <= '9'))
2546
141k
          val = val * 16 + (CUR - '0');
2547
216k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2548
156k
          val = val * 16 + (CUR - 'a') + 10;
2549
59.8k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2550
20.7k
          val = val * 16 + (CUR - 'A') + 10;
2551
39.1k
      else {
2552
39.1k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2553
39.1k
    val = 0;
2554
39.1k
    break;
2555
39.1k
      }
2556
319k
      if (val > 0x110000)
2557
111k
          val = 0x110000;
2558
2559
319k
      NEXT;
2560
319k
      count++;
2561
319k
  }
2562
185k
  if (RAW == ';') {
2563
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2564
146k
      ctxt->input->col++;
2565
146k
      ctxt->input->cur++;
2566
146k
  }
2567
185k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2568
111k
  SKIP(2);
2569
111k
  GROW;
2570
290k
  while (RAW != ';') { /* loop blocked by count */
2571
201k
      if (count++ > 20) {
2572
1.35k
    count = 0;
2573
1.35k
    GROW;
2574
1.35k
      }
2575
201k
      if ((RAW >= '0') && (RAW <= '9'))
2576
179k
          val = val * 10 + (CUR - '0');
2577
21.8k
      else {
2578
21.8k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2579
21.8k
    val = 0;
2580
21.8k
    break;
2581
21.8k
      }
2582
179k
      if (val > 0x110000)
2583
14.5k
          val = 0x110000;
2584
2585
179k
      NEXT;
2586
179k
      count++;
2587
179k
  }
2588
111k
  if (RAW == ';') {
2589
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2590
89.2k
      ctxt->input->col++;
2591
89.2k
      ctxt->input->cur++;
2592
89.2k
  }
2593
111k
    } else {
2594
0
        if (RAW == '&')
2595
0
            SKIP(1);
2596
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2597
0
    }
2598
2599
    /*
2600
     * [ WFC: Legal Character ]
2601
     * Characters referred to using character references must match the
2602
     * production for Char.
2603
     */
2604
296k
    if (val >= 0x110000) {
2605
578
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2606
578
                "xmlParseCharRef: character reference out of bounds\n",
2607
578
          val);
2608
578
        val = 0xFFFD;
2609
295k
    } else if (!IS_CHAR(val)) {
2610
86.4k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2611
86.4k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2612
86.4k
                    val);
2613
86.4k
    }
2614
296k
    return(val);
2615
296k
}
2616
2617
/**
2618
 * Parse Reference declarations, variant parsing from a string rather
2619
 * than an an input flow.
2620
 *
2621
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2622
 *                      '&#x' [0-9a-fA-F]+ ';'
2623
 *
2624
 * [ WFC: Legal Character ]
2625
 * Characters referred to using character references must match the
2626
 * production for Char.
2627
 *
2628
 * @param ctxt  an XML parser context
2629
 * @param str  a pointer to an index in the string
2630
 * @returns the value parsed (as an int), 0 in case of error, str will be
2631
 *         updated to the current value of the index
2632
 */
2633
static int
2634
414k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2635
414k
    const xmlChar *ptr;
2636
414k
    xmlChar cur;
2637
414k
    int val = 0;
2638
2639
414k
    if ((str == NULL) || (*str == NULL)) return(0);
2640
414k
    ptr = *str;
2641
414k
    cur = *ptr;
2642
414k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2643
6.41k
  ptr += 3;
2644
6.41k
  cur = *ptr;
2645
20.8k
  while (cur != ';') { /* Non input consuming loop */
2646
15.3k
      if ((cur >= '0') && (cur <= '9'))
2647
9.18k
          val = val * 16 + (cur - '0');
2648
6.19k
      else if ((cur >= 'a') && (cur <= 'f'))
2649
2.51k
          val = val * 16 + (cur - 'a') + 10;
2650
3.68k
      else if ((cur >= 'A') && (cur <= 'F'))
2651
2.70k
          val = val * 16 + (cur - 'A') + 10;
2652
986
      else {
2653
986
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2654
986
    val = 0;
2655
986
    break;
2656
986
      }
2657
14.3k
      if (val > 0x110000)
2658
788
          val = 0x110000;
2659
2660
14.3k
      ptr++;
2661
14.3k
      cur = *ptr;
2662
14.3k
  }
2663
6.41k
  if (cur == ';')
2664
5.43k
      ptr++;
2665
408k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2666
408k
  ptr += 2;
2667
408k
  cur = *ptr;
2668
1.23M
  while (cur != ';') { /* Non input consuming loops */
2669
832k
      if ((cur >= '0') && (cur <= '9'))
2670
830k
          val = val * 10 + (cur - '0');
2671
1.41k
      else {
2672
1.41k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2673
1.41k
    val = 0;
2674
1.41k
    break;
2675
1.41k
      }
2676
830k
      if (val > 0x110000)
2677
3.01k
          val = 0x110000;
2678
2679
830k
      ptr++;
2680
830k
      cur = *ptr;
2681
830k
  }
2682
408k
  if (cur == ';')
2683
407k
      ptr++;
2684
408k
    } else {
2685
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2686
0
  return(0);
2687
0
    }
2688
414k
    *str = ptr;
2689
2690
    /*
2691
     * [ WFC: Legal Character ]
2692
     * Characters referred to using character references must match the
2693
     * production for Char.
2694
     */
2695
414k
    if (val >= 0x110000) {
2696
675
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2697
675
                "xmlParseStringCharRef: character reference out of bounds\n",
2698
675
                val);
2699
414k
    } else if (IS_CHAR(val)) {
2700
410k
        return(val);
2701
410k
    } else {
2702
3.32k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2703
3.32k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2704
3.32k
        val);
2705
3.32k
    }
2706
4.00k
    return(0);
2707
414k
}
2708
2709
/**
2710
 *     [69] PEReference ::= '%' Name ';'
2711
 *
2712
 * @deprecated Internal function, do not use.
2713
 *
2714
 * [ WFC: No Recursion ]
2715
 * A parsed entity must not contain a recursive
2716
 * reference to itself, either directly or indirectly.
2717
 *
2718
 * [ WFC: Entity Declared ]
2719
 * In a document without any DTD, a document with only an internal DTD
2720
 * subset which contains no parameter entity references, or a document
2721
 * with "standalone='yes'", ...  ... The declaration of a parameter
2722
 * entity must precede any reference to it...
2723
 *
2724
 * [ VC: Entity Declared ]
2725
 * In a document with an external subset or external parameter entities
2726
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2727
 * must precede any reference to it...
2728
 *
2729
 * [ WFC: In DTD ]
2730
 * Parameter-entity references may only appear in the DTD.
2731
 * NOTE: misleading but this is handled.
2732
 *
2733
 * A PEReference may have been detected in the current input stream
2734
 * the handling is done accordingly to
2735
 *      http://www.w3.org/TR/REC-xml#entproc
2736
 * i.e.
2737
 *   - Included in literal in entity values
2738
 *   - Included as Parameter Entity reference within DTDs
2739
 * @param ctxt  the parser context
2740
 */
2741
void
2742
0
xmlParserHandlePEReference(xmlParserCtxt *ctxt) {
2743
0
    xmlParsePERefInternal(ctxt, 0);
2744
0
}
2745
2746
/**
2747
 * @deprecated Internal function, don't use.
2748
 *
2749
 * @param ctxt  the parser context
2750
 * @param str  the input string
2751
 * @param len  the string length
2752
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2753
 * @param end  an end marker xmlChar, 0 if none
2754
 * @param end2  an end marker xmlChar, 0 if none
2755
 * @param end3  an end marker xmlChar, 0 if none
2756
 * @returns A newly allocated string with the substitution done. The caller
2757
 *      must deallocate it !
2758
 */
2759
xmlChar *
2760
xmlStringLenDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str, int len,
2761
                           int what ATTRIBUTE_UNUSED,
2762
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2763
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2764
0
        return(NULL);
2765
2766
0
    if ((str[len] != 0) ||
2767
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2768
0
        return(NULL);
2769
2770
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2771
0
}
2772
2773
/**
2774
 * @deprecated Internal function, don't use.
2775
 *
2776
 * @param ctxt  the parser context
2777
 * @param str  the input string
2778
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2779
 * @param end  an end marker xmlChar, 0 if none
2780
 * @param end2  an end marker xmlChar, 0 if none
2781
 * @param end3  an end marker xmlChar, 0 if none
2782
 * @returns A newly allocated string with the substitution done. The caller
2783
 *      must deallocate it !
2784
 */
2785
xmlChar *
2786
xmlStringDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str,
2787
                        int what ATTRIBUTE_UNUSED,
2788
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2789
0
    if ((ctxt == NULL) || (str == NULL))
2790
0
        return(NULL);
2791
2792
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2793
0
        return(NULL);
2794
2795
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2796
0
}
2797
2798
/************************************************************************
2799
 *                  *
2800
 *    Commodity functions, cleanup needed ?     *
2801
 *                  *
2802
 ************************************************************************/
2803
2804
/**
2805
 * Is this a sequence of blank chars that one can ignore ?
2806
 *
2807
 * @param ctxt  an XML parser context
2808
 * @param str  a xmlChar *
2809
 * @param len  the size of `str`
2810
 * @param blank_chars  we know the chars are blanks
2811
 * @returns 1 if ignorable 0 otherwise.
2812
 */
2813
2814
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2815
813k
                     int blank_chars) {
2816
813k
    int i;
2817
813k
    xmlNodePtr lastChild;
2818
2819
    /*
2820
     * Check for xml:space value.
2821
     */
2822
813k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2823
812k
        (*(ctxt->space) == -2))
2824
735k
  return(0);
2825
2826
    /*
2827
     * Check that the string is made of blanks
2828
     */
2829
78.1k
    if (blank_chars == 0) {
2830
218k
  for (i = 0;i < len;i++)
2831
215k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2832
70.4k
    }
2833
2834
    /*
2835
     * Look if the element is mixed content in the DTD if available
2836
     */
2837
10.6k
    if (ctxt->node == NULL) return(0);
2838
10.6k
    if (ctxt->myDoc != NULL) {
2839
10.6k
        xmlElementPtr elemDecl = NULL;
2840
10.6k
        xmlDocPtr doc = ctxt->myDoc;
2841
10.6k
        const xmlChar *prefix = NULL;
2842
2843
10.6k
        if (ctxt->node->ns)
2844
1.40k
            prefix = ctxt->node->ns->prefix;
2845
10.6k
        if (doc->intSubset != NULL)
2846
7.85k
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2847
7.85k
                                      prefix);
2848
10.6k
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2849
1.93k
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2850
1.93k
                                      prefix);
2851
10.6k
        if (elemDecl != NULL) {
2852
3.46k
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2853
2.41k
                return(1);
2854
1.05k
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2855
856
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2856
586
                return(0);
2857
1.05k
        }
2858
10.6k
    }
2859
2860
    /*
2861
     * Otherwise, heuristic :-\
2862
     *
2863
     * When push parsing, we could be at the end of a chunk.
2864
     * This makes the look-ahead and consequently the NOBLANKS
2865
     * option unreliable.
2866
     */
2867
7.63k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2868
6.34k
    if ((ctxt->node->children == NULL) &&
2869
4.27k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2870
2871
6.09k
    lastChild = xmlGetLastChild(ctxt->node);
2872
6.09k
    if (lastChild == NULL) {
2873
4.02k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2874
0
            (ctxt->node->content != NULL)) return(0);
2875
4.02k
    } else if (xmlNodeIsText(lastChild))
2876
218
        return(0);
2877
1.85k
    else if ((ctxt->node->children != NULL) &&
2878
1.85k
             (xmlNodeIsText(ctxt->node->children)))
2879
205
        return(0);
2880
5.67k
    return(1);
2881
6.09k
}
2882
2883
/************************************************************************
2884
 *                  *
2885
 *    Extra stuff for namespace support     *
2886
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2887
 *                  *
2888
 ************************************************************************/
2889
2890
/**
2891
 * Parse an UTF8 encoded XML qualified name string
2892
 *
2893
 * @deprecated Don't use.
2894
 *
2895
 * @param ctxt  an XML parser context
2896
 * @param name  an XML parser context
2897
 * @param prefixOut  a xmlChar **
2898
 * @returns the local part, and prefix is updated
2899
 *   to get the Prefix if any.
2900
 */
2901
2902
xmlChar *
2903
0
xmlSplitQName(xmlParserCtxt *ctxt, const xmlChar *name, xmlChar **prefixOut) {
2904
0
    xmlChar *ret;
2905
0
    const xmlChar *localname;
2906
2907
0
    localname = xmlSplitQName4(name, prefixOut);
2908
0
    if (localname == NULL) {
2909
0
        xmlCtxtErrMemory(ctxt);
2910
0
        return(NULL);
2911
0
    }
2912
2913
0
    ret = xmlStrdup(localname);
2914
0
    if (ret == NULL) {
2915
0
        xmlCtxtErrMemory(ctxt);
2916
0
        xmlFree(*prefixOut);
2917
0
    }
2918
2919
0
    return(ret);
2920
0
}
2921
2922
/************************************************************************
2923
 *                  *
2924
 *      The parser itself       *
2925
 *  Relates to http://www.w3.org/TR/REC-xml       *
2926
 *                  *
2927
 ************************************************************************/
2928
2929
/************************************************************************
2930
 *                  *
2931
 *  Routines to parse Name, NCName and NmToken      *
2932
 *                  *
2933
 ************************************************************************/
2934
2935
/*
2936
 * The two following functions are related to the change of accepted
2937
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2938
 * They correspond to the modified production [4] and the new production [4a]
2939
 * changes in that revision. Also note that the macros used for the
2940
 * productions Letter, Digit, CombiningChar and Extender are not needed
2941
 * anymore.
2942
 * We still keep compatibility to pre-revision5 parsing semantic if the
2943
 * new XML_PARSE_OLD10 option is given to the parser.
2944
 */
2945
2946
static int
2947
1.53M
xmlIsNameStartCharNew(int c) {
2948
    /*
2949
     * Use the new checks of production [4] [4a] amd [5] of the
2950
     * Update 5 of XML-1.0
2951
     */
2952
1.53M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2953
1.50M
        (((c >= 'a') && (c <= 'z')) ||
2954
566k
         ((c >= 'A') && (c <= 'Z')) ||
2955
544k
         (c == '_') || (c == ':') ||
2956
534k
         ((c >= 0xC0) && (c <= 0xD6)) ||
2957
533k
         ((c >= 0xD8) && (c <= 0xF6)) ||
2958
532k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2959
530k
         ((c >= 0x370) && (c <= 0x37D)) ||
2960
530k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2961
511k
         ((c >= 0x200C) && (c <= 0x200D)) ||
2962
511k
         ((c >= 0x2070) && (c <= 0x218F)) ||
2963
508k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2964
508k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2965
507k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2966
506k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2967
495k
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2968
1.00M
        return(1);
2969
523k
    return(0);
2970
1.53M
}
2971
2972
static int
2973
97.5M
xmlIsNameCharNew(int c) {
2974
    /*
2975
     * Use the new checks of production [4] [4a] amd [5] of the
2976
     * Update 5 of XML-1.0
2977
     */
2978
97.5M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2979
97.5M
        (((c >= 'a') && (c <= 'z')) ||
2980
80.7M
         ((c >= 'A') && (c <= 'Z')) ||
2981
80.4M
         ((c >= '0') && (c <= '9')) || /* !start */
2982
80.1M
         (c == '_') || (c == ':') ||
2983
80.0M
         (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2984
80.0M
         ((c >= 0xC0) && (c <= 0xD6)) ||
2985
80.0M
         ((c >= 0xD8) && (c <= 0xF6)) ||
2986
77.5M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2987
77.5M
         ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2988
77.5M
         ((c >= 0x370) && (c <= 0x37D)) ||
2989
77.5M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2990
75.6M
         ((c >= 0x200C) && (c <= 0x200D)) ||
2991
75.6M
         ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2992
75.6M
         ((c >= 0x2070) && (c <= 0x218F)) ||
2993
10.2M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2994
10.2M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2995
10.2M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2996
10.2M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2997
969k
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2998
96.5M
         return(1);
2999
1.00M
    return(0);
3000
97.5M
}
3001
3002
static int
3003
1.27M
xmlIsNameStartCharOld(int c) {
3004
1.27M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3005
1.27M
        ((IS_LETTER(c) || (c == '_') || (c == ':'))))
3006
962k
        return(1);
3007
317k
    return(0);
3008
1.27M
}
3009
3010
static int
3011
14.7M
xmlIsNameCharOld(int c) {
3012
14.7M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3013
14.7M
        ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3014
970k
         (c == '.') || (c == '-') ||
3015
967k
         (c == '_') || (c == ':') ||
3016
961k
         (IS_COMBINING(c)) ||
3017
957k
         (IS_EXTENDER(c))))
3018
13.8M
        return(1);
3019
962k
    return(0);
3020
14.7M
}
3021
3022
static int
3023
2.81M
xmlIsNameStartChar(int c, int old10) {
3024
2.81M
    if (!old10)
3025
1.53M
        return(xmlIsNameStartCharNew(c));
3026
1.27M
    else
3027
1.27M
        return(xmlIsNameStartCharOld(c));
3028
2.81M
}
3029
3030
static int
3031
112M
xmlIsNameChar(int c, int old10) {
3032
112M
    if (!old10)
3033
97.5M
        return(xmlIsNameCharNew(c));
3034
14.7M
    else
3035
14.7M
        return(xmlIsNameCharOld(c));
3036
112M
}
3037
3038
/*
3039
 * Scan an XML Name, NCName or Nmtoken.
3040
 *
3041
 * Returns a pointer to the end of the name on success. If the
3042
 * name is invalid, returns `ptr`. If the name is longer than
3043
 * `maxSize` bytes, returns NULL.
3044
 *
3045
 * @param ptr  pointer to the start of the name
3046
 * @param maxSize  maximum size in bytes
3047
 * @param flags  XML_SCAN_* flags
3048
 * @returns a pointer to the end of the name or NULL
3049
 */
3050
const xmlChar *
3051
1.94M
xmlScanName(const xmlChar *ptr, size_t maxSize, int flags) {
3052
1.94M
    int stop = flags & XML_SCAN_NC ? ':' : 0;
3053
1.94M
    int old10 = flags & XML_SCAN_OLD10 ? 1 : 0;
3054
3055
20.0M
    while (1) {
3056
20.0M
        int c, len;
3057
3058
20.0M
        c = *ptr;
3059
20.0M
        if (c < 0x80) {
3060
10.7M
            if (c == stop)
3061
150k
                break;
3062
10.6M
            len = 1;
3063
10.6M
        } else {
3064
9.26M
            len = 4;
3065
9.26M
            c = xmlGetUTF8Char(ptr, &len);
3066
9.26M
            if (c < 0)
3067
4.38k
                break;
3068
9.26M
        }
3069
3070
19.9M
        if (flags & XML_SCAN_NMTOKEN ?
3071
18.0M
                !xmlIsNameChar(c, old10) :
3072
19.9M
                !xmlIsNameStartChar(c, old10))
3073
1.79M
            break;
3074
3075
18.1M
        if ((size_t) len > maxSize)
3076
190
            return(NULL);
3077
18.1M
        ptr += len;
3078
18.1M
        maxSize -= len;
3079
18.1M
        flags |= XML_SCAN_NMTOKEN;
3080
18.1M
    }
3081
3082
1.94M
    return(ptr);
3083
1.94M
}
3084
3085
static const xmlChar *
3086
380k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3087
380k
    const xmlChar *ret;
3088
380k
    int len = 0, l;
3089
380k
    int c;
3090
380k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3091
236k
                    XML_MAX_TEXT_LENGTH :
3092
380k
                    XML_MAX_NAME_LENGTH;
3093
380k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3094
3095
    /*
3096
     * Handler for more complex cases
3097
     */
3098
380k
    c = xmlCurrentChar(ctxt, &l);
3099
380k
    if (!xmlIsNameStartChar(c, old10))
3100
285k
        return(NULL);
3101
94.7k
    len += l;
3102
94.7k
    NEXTL(l);
3103
94.7k
    c = xmlCurrentChar(ctxt, &l);
3104
31.3M
    while (xmlIsNameChar(c, old10)) {
3105
31.2M
        if (len <= INT_MAX - l)
3106
31.2M
            len += l;
3107
31.2M
        NEXTL(l);
3108
31.2M
        c = xmlCurrentChar(ctxt, &l);
3109
31.2M
    }
3110
94.7k
    if (len > maxLength) {
3111
353
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3112
353
        return(NULL);
3113
353
    }
3114
94.4k
    if (ctxt->input->cur - ctxt->input->base < len) {
3115
        /*
3116
         * There were a couple of bugs where PERefs lead to to a change
3117
         * of the buffer. Check the buffer size to avoid passing an invalid
3118
         * pointer to xmlDictLookup.
3119
         */
3120
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3121
0
                    "unexpected change of input buffer");
3122
0
        return (NULL);
3123
0
    }
3124
94.4k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3125
284
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3126
94.1k
    else
3127
94.1k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3128
94.4k
    if (ret == NULL)
3129
16
        xmlErrMemory(ctxt);
3130
94.4k
    return(ret);
3131
94.4k
}
3132
3133
/**
3134
 * Parse an XML name.
3135
 *
3136
 * @deprecated Internal function, don't use.
3137
 *
3138
 *     [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3139
 *                      CombiningChar | Extender
3140
 *
3141
 *     [5] Name ::= (Letter | '_' | ':') (NameChar)*
3142
 *
3143
 *     [6] Names ::= Name (#x20 Name)*
3144
 *
3145
 * @param ctxt  an XML parser context
3146
 * @returns the Name parsed or NULL
3147
 */
3148
3149
const xmlChar *
3150
2.99M
xmlParseName(xmlParserCtxt *ctxt) {
3151
2.99M
    const xmlChar *in;
3152
2.99M
    const xmlChar *ret;
3153
2.99M
    size_t count = 0;
3154
2.99M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3155
1.76M
                       XML_MAX_TEXT_LENGTH :
3156
2.99M
                       XML_MAX_NAME_LENGTH;
3157
3158
2.99M
    GROW;
3159
3160
    /*
3161
     * Accelerator for simple ASCII names
3162
     */
3163
2.99M
    in = ctxt->input->cur;
3164
2.99M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3165
636k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3166
2.67M
  (*in == '_') || (*in == ':')) {
3167
2.67M
  in++;
3168
20.9M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3169
4.84M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3170
3.68M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3171
3.27M
         (*in == '_') || (*in == '-') ||
3172
3.15M
         (*in == ':') || (*in == '.'))
3173
18.2M
      in++;
3174
2.67M
  if ((*in > 0) && (*in < 0x80)) {
3175
2.61M
      count = in - ctxt->input->cur;
3176
2.61M
            if (count > maxLength) {
3177
61
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3178
61
                return(NULL);
3179
61
            }
3180
2.61M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3181
2.61M
      ctxt->input->cur = in;
3182
2.61M
      ctxt->input->col += count;
3183
2.61M
      if (ret == NULL)
3184
14
          xmlErrMemory(ctxt);
3185
2.61M
      return(ret);
3186
2.61M
  }
3187
2.67M
    }
3188
    /* accelerator for special cases */
3189
380k
    return(xmlParseNameComplex(ctxt));
3190
2.99M
}
3191
3192
static xmlHashedString
3193
599k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3194
599k
    xmlHashedString ret;
3195
599k
    int len = 0, l;
3196
599k
    int c;
3197
599k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3198
477k
                    XML_MAX_TEXT_LENGTH :
3199
599k
                    XML_MAX_NAME_LENGTH;
3200
599k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3201
599k
    size_t startPosition = 0;
3202
3203
599k
    ret.name = NULL;
3204
599k
    ret.hashValue = 0;
3205
3206
    /*
3207
     * Handler for more complex cases
3208
     */
3209
599k
    startPosition = CUR_PTR - BASE_PTR;
3210
599k
    c = xmlCurrentChar(ctxt, &l);
3211
599k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3212
584k
  (!xmlIsNameStartChar(c, old10) || (c == ':'))) {
3213
551k
  return(ret);
3214
551k
    }
3215
3216
49.3M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3217
49.3M
     (xmlIsNameChar(c, old10) && (c != ':'))) {
3218
49.3M
        if (len <= INT_MAX - l)
3219
49.3M
      len += l;
3220
49.3M
  NEXTL(l);
3221
49.3M
  c = xmlCurrentChar(ctxt, &l);
3222
49.3M
    }
3223
48.4k
    if (len > maxLength) {
3224
355
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3225
355
        return(ret);
3226
355
    }
3227
48.1k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3228
48.1k
    if (ret.name == NULL)
3229
8
        xmlErrMemory(ctxt);
3230
48.1k
    return(ret);
3231
48.4k
}
3232
3233
/**
3234
 * Parse an XML name.
3235
 *
3236
 *     [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3237
 *                          CombiningChar | Extender
3238
 *
3239
 *     [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3240
 *
3241
 * @param ctxt  an XML parser context
3242
 * @returns the Name parsed or NULL
3243
 */
3244
3245
static xmlHashedString
3246
1.89M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3247
1.89M
    const xmlChar *in, *e;
3248
1.89M
    xmlHashedString ret;
3249
1.89M
    size_t count = 0;
3250
1.89M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3251
1.30M
                       XML_MAX_TEXT_LENGTH :
3252
1.89M
                       XML_MAX_NAME_LENGTH;
3253
3254
1.89M
    ret.name = NULL;
3255
3256
    /*
3257
     * Accelerator for simple ASCII names
3258
     */
3259
1.89M
    in = ctxt->input->cur;
3260
1.89M
    e = ctxt->input->end;
3261
1.89M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3262
606k
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3263
1.32M
   (*in == '_')) && (in < e)) {
3264
1.32M
  in++;
3265
15.8M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3266
1.55M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3267
1.41M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3268
1.34M
          (*in == '_') || (*in == '-') ||
3269
14.5M
          (*in == '.')) && (in < e))
3270
14.5M
      in++;
3271
1.32M
  if (in >= e)
3272
3.52k
      goto complex;
3273
1.31M
  if ((*in > 0) && (*in < 0x80)) {
3274
1.29M
      count = in - ctxt->input->cur;
3275
1.29M
            if (count > maxLength) {
3276
103
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3277
103
                return(ret);
3278
103
            }
3279
1.29M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3280
1.29M
      ctxt->input->cur = in;
3281
1.29M
      ctxt->input->col += count;
3282
1.29M
      if (ret.name == NULL) {
3283
9
          xmlErrMemory(ctxt);
3284
9
      }
3285
1.29M
      return(ret);
3286
1.29M
  }
3287
1.31M
    }
3288
599k
complex:
3289
599k
    return(xmlParseNCNameComplex(ctxt));
3290
1.89M
}
3291
3292
/**
3293
 * Parse an XML name and compares for match
3294
 * (specialized for endtag parsing)
3295
 *
3296
 * @param ctxt  an XML parser context
3297
 * @param other  the name to compare with
3298
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
3299
 * and the name for mismatch
3300
 */
3301
3302
static const xmlChar *
3303
88.8k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3304
88.8k
    register const xmlChar *cmp = other;
3305
88.8k
    register const xmlChar *in;
3306
88.8k
    const xmlChar *ret;
3307
3308
88.8k
    GROW;
3309
3310
88.8k
    in = ctxt->input->cur;
3311
258k
    while (*in != 0 && *in == *cmp) {
3312
169k
  ++in;
3313
169k
  ++cmp;
3314
169k
    }
3315
88.8k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3316
  /* success */
3317
74.2k
  ctxt->input->col += in - ctxt->input->cur;
3318
74.2k
  ctxt->input->cur = in;
3319
74.2k
  return (const xmlChar*) 1;
3320
74.2k
    }
3321
    /* failure (or end of input buffer), check with full function */
3322
14.5k
    ret = xmlParseName (ctxt);
3323
    /* strings coming from the dictionary direct compare possible */
3324
14.5k
    if (ret == other) {
3325
981
  return (const xmlChar*) 1;
3326
981
    }
3327
13.5k
    return ret;
3328
14.5k
}
3329
3330
/**
3331
 * Parse an XML name.
3332
 *
3333
 * @param ctxt  an XML parser context
3334
 * @param str  a pointer to the string pointer (IN/OUT)
3335
 * @returns the Name parsed or NULL. The `str` pointer
3336
 * is updated to the current location in the string.
3337
 */
3338
3339
static xmlChar *
3340
1.75M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3341
1.75M
    xmlChar *ret;
3342
1.75M
    const xmlChar *cur = *str;
3343
1.75M
    int flags = 0;
3344
1.75M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3345
1.08M
                    XML_MAX_TEXT_LENGTH :
3346
1.75M
                    XML_MAX_NAME_LENGTH;
3347
3348
1.75M
    if (ctxt->options & XML_PARSE_OLD10)
3349
912k
        flags |= XML_SCAN_OLD10;
3350
3351
1.75M
    cur = xmlScanName(*str, maxLength, flags);
3352
1.75M
    if (cur == NULL) {
3353
190
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3354
190
        return(NULL);
3355
190
    }
3356
1.75M
    if (cur == *str)
3357
6.61k
        return(NULL);
3358
3359
1.74M
    ret = xmlStrndup(*str, cur - *str);
3360
1.74M
    if (ret == NULL)
3361
95
        xmlErrMemory(ctxt);
3362
1.74M
    *str = cur;
3363
1.74M
    return(ret);
3364
1.75M
}
3365
3366
/**
3367
 * Parse an XML Nmtoken.
3368
 *
3369
 * @deprecated Internal function, don't use.
3370
 *
3371
 *     [7] Nmtoken ::= (NameChar)+
3372
 *
3373
 *     [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3374
 *
3375
 * @param ctxt  an XML parser context
3376
 * @returns the Nmtoken parsed or NULL
3377
 */
3378
3379
xmlChar *
3380
64.5k
xmlParseNmtoken(xmlParserCtxt *ctxt) {
3381
64.5k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3382
64.5k
    xmlChar *ret;
3383
64.5k
    int len = 0, l;
3384
64.5k
    int c;
3385
64.5k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3386
25.8k
                    XML_MAX_TEXT_LENGTH :
3387
64.5k
                    XML_MAX_NAME_LENGTH;
3388
64.5k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3389
3390
64.5k
    c = xmlCurrentChar(ctxt, &l);
3391
3392
264k
    while (xmlIsNameChar(c, old10)) {
3393
201k
  COPY_BUF(buf, len, c);
3394
201k
  NEXTL(l);
3395
201k
  c = xmlCurrentChar(ctxt, &l);
3396
201k
  if (len >= XML_MAX_NAMELEN) {
3397
      /*
3398
       * Okay someone managed to make a huge token, so he's ready to pay
3399
       * for the processing speed.
3400
       */
3401
1.84k
      xmlChar *buffer;
3402
1.84k
      int max = len * 2;
3403
3404
1.84k
      buffer = xmlMalloc(max);
3405
1.84k
      if (buffer == NULL) {
3406
19
          xmlErrMemory(ctxt);
3407
19
    return(NULL);
3408
19
      }
3409
1.82k
      memcpy(buffer, buf, len);
3410
13.3M
      while (xmlIsNameChar(c, old10)) {
3411
13.3M
    if (len + 10 > max) {
3412
10.0k
        xmlChar *tmp;
3413
10.0k
                    int newSize;
3414
3415
10.0k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3416
10.0k
                    if (newSize < 0) {
3417
448
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3418
448
                        xmlFree(buffer);
3419
448
                        return(NULL);
3420
448
                    }
3421
9.57k
        tmp = xmlRealloc(buffer, newSize);
3422
9.57k
        if (tmp == NULL) {
3423
6
      xmlErrMemory(ctxt);
3424
6
      xmlFree(buffer);
3425
6
      return(NULL);
3426
6
        }
3427
9.57k
        buffer = tmp;
3428
9.57k
                    max = newSize;
3429
9.57k
    }
3430
13.3M
    COPY_BUF(buffer, len, c);
3431
13.3M
    NEXTL(l);
3432
13.3M
    c = xmlCurrentChar(ctxt, &l);
3433
13.3M
      }
3434
1.36k
      buffer[len] = 0;
3435
1.36k
      return(buffer);
3436
1.82k
  }
3437
201k
    }
3438
62.6k
    if (len == 0)
3439
11.4k
        return(NULL);
3440
51.2k
    if (len > maxLength) {
3441
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3442
0
        return(NULL);
3443
0
    }
3444
51.2k
    ret = xmlStrndup(buf, len);
3445
51.2k
    if (ret == NULL)
3446
29
        xmlErrMemory(ctxt);
3447
51.2k
    return(ret);
3448
51.2k
}
3449
3450
/**
3451
 * Validate an entity value and expand parameter entities.
3452
 *
3453
 * @param ctxt  parser context
3454
 * @param buf  string buffer
3455
 * @param str  entity value
3456
 * @param length  size of entity value
3457
 * @param depth  nesting depth
3458
 */
3459
static void
3460
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3461
106k
                          const xmlChar *str, int length, int depth) {
3462
106k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3463
106k
    const xmlChar *end, *chunk;
3464
106k
    int c, l;
3465
3466
106k
    if (str == NULL)
3467
27.0k
        return;
3468
3469
79.8k
    depth += 1;
3470
79.8k
    if (depth > maxDepth) {
3471
4
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3472
4
                       "Maximum entity nesting depth exceeded");
3473
4
  return;
3474
4
    }
3475
3476
79.8k
    end = str + length;
3477
79.8k
    chunk = str;
3478
3479
499M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3480
499M
        c = *str;
3481
3482
499M
        if (c >= 0x80) {
3483
432M
            l = xmlUTF8MultibyteLen(ctxt, str,
3484
432M
                    "invalid character in entity value\n");
3485
432M
            if (l == 0) {
3486
48.6M
                if (chunk < str)
3487
64.8k
                    xmlSBufAddString(buf, chunk, str - chunk);
3488
48.6M
                xmlSBufAddReplChar(buf);
3489
48.6M
                str += 1;
3490
48.6M
                chunk = str;
3491
383M
            } else {
3492
383M
                str += l;
3493
383M
            }
3494
432M
        } else if (c == '&') {
3495
67.4k
            if (str[1] == '#') {
3496
18.7k
                if (chunk < str)
3497
11.1k
                    xmlSBufAddString(buf, chunk, str - chunk);
3498
3499
18.7k
                c = xmlParseStringCharRef(ctxt, &str);
3500
18.7k
                if (c == 0)
3501
3.95k
                    return;
3502
3503
14.8k
                xmlSBufAddChar(buf, c);
3504
3505
14.8k
                chunk = str;
3506
48.6k
            } else {
3507
48.6k
                xmlChar *name;
3508
3509
                /*
3510
                 * General entity references are checked for
3511
                 * syntactic validity.
3512
                 */
3513
48.6k
                str++;
3514
48.6k
                name = xmlParseStringName(ctxt, &str);
3515
3516
48.6k
                if ((name == NULL) || (*str++ != ';')) {
3517
4.76k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3518
4.76k
                            "EntityValue: '&' forbidden except for entities "
3519
4.76k
                            "references\n");
3520
4.76k
                    xmlFree(name);
3521
4.76k
                    return;
3522
4.76k
                }
3523
3524
43.8k
                xmlFree(name);
3525
43.8k
            }
3526
67.7M
        } else if (c == '%') {
3527
54.2k
            xmlEntityPtr ent;
3528
3529
54.2k
            if (chunk < str)
3530
16.0k
                xmlSBufAddString(buf, chunk, str - chunk);
3531
3532
54.2k
            ent = xmlParseStringPEReference(ctxt, &str);
3533
54.2k
            if (ent == NULL)
3534
9.07k
                return;
3535
3536
45.2k
            if (!PARSER_EXTERNAL(ctxt)) {
3537
224
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3538
224
                return;
3539
224
            }
3540
3541
44.9k
            if (ent->content == NULL) {
3542
                /*
3543
                 * Note: external parsed entities will not be loaded,
3544
                 * it is not required for a non-validating parser to
3545
                 * complete external PEReferences coming from the
3546
                 * internal subset
3547
                 */
3548
27.3k
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3549
27.2k
                    ((ctxt->replaceEntities) ||
3550
24.5k
                     (ctxt->validate))) {
3551
24.5k
                    xmlLoadEntityContent(ctxt, ent);
3552
24.5k
                } else {
3553
2.76k
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3554
2.76k
                                  "not validating will not read content for "
3555
2.76k
                                  "PE entity %s\n", ent->name, NULL);
3556
2.76k
                }
3557
27.3k
            }
3558
3559
            /*
3560
             * TODO: Skip if ent->content is still NULL.
3561
             */
3562
3563
44.9k
            if (xmlParserEntityCheck(ctxt, ent->length))
3564
16
                return;
3565
3566
44.9k
            if (ent->flags & XML_ENT_EXPANDING) {
3567
190
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3568
190
                return;
3569
190
            }
3570
3571
44.7k
            ent->flags |= XML_ENT_EXPANDING;
3572
44.7k
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3573
44.7k
                                      depth);
3574
44.7k
            ent->flags &= ~XML_ENT_EXPANDING;
3575
3576
44.7k
            chunk = str;
3577
67.7M
        } else {
3578
            /* Normal ASCII char */
3579
67.7M
            if (!IS_BYTE_CHAR(c)) {
3580
7.29M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3581
7.29M
                        "invalid character in entity value\n");
3582
7.29M
                if (chunk < str)
3583
23.4k
                    xmlSBufAddString(buf, chunk, str - chunk);
3584
7.29M
                xmlSBufAddReplChar(buf);
3585
7.29M
                str += 1;
3586
7.29M
                chunk = str;
3587
60.4M
            } else {
3588
60.4M
                str += 1;
3589
60.4M
            }
3590
67.7M
        }
3591
499M
    }
3592
3593
61.5k
    if (chunk < str)
3594
46.2k
        xmlSBufAddString(buf, chunk, str - chunk);
3595
61.5k
}
3596
3597
/**
3598
 * Parse a value for ENTITY declarations
3599
 *
3600
 * @deprecated Internal function, don't use.
3601
 *
3602
 *     [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3603
 *                         "'" ([^%&'] | PEReference | Reference)* "'"
3604
 *
3605
 * @param ctxt  an XML parser context
3606
 * @param orig  if non-NULL store a copy of the original entity value
3607
 * @returns the EntityValue parsed with reference substituted or NULL
3608
 */
3609
xmlChar *
3610
63.7k
xmlParseEntityValue(xmlParserCtxt *ctxt, xmlChar **orig) {
3611
63.7k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3612
25.4k
                         XML_MAX_HUGE_LENGTH :
3613
63.7k
                         XML_MAX_TEXT_LENGTH;
3614
63.7k
    xmlSBuf buf;
3615
63.7k
    const xmlChar *start;
3616
63.7k
    int quote, length;
3617
3618
63.7k
    xmlSBufInit(&buf, maxLength);
3619
3620
63.7k
    GROW;
3621
3622
63.7k
    quote = CUR;
3623
63.7k
    if ((quote != '"') && (quote != '\'')) {
3624
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3625
0
  return(NULL);
3626
0
    }
3627
63.7k
    CUR_PTR++;
3628
3629
63.7k
    length = 0;
3630
3631
    /*
3632
     * Copy raw content of the entity into a buffer
3633
     */
3634
1.15G
    while (1) {
3635
1.15G
        int c;
3636
3637
1.15G
        if (PARSER_STOPPED(ctxt))
3638
23
            goto error;
3639
3640
1.15G
        if (CUR_PTR >= ctxt->input->end) {
3641
1.64k
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3642
1.64k
            goto error;
3643
1.64k
        }
3644
3645
1.15G
        c = CUR;
3646
3647
1.15G
        if (c == 0) {
3648
84
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3649
84
                    "invalid character in entity value\n");
3650
84
            goto error;
3651
84
        }
3652
1.15G
        if (c == quote)
3653
62.0k
            break;
3654
1.15G
        NEXTL(1);
3655
1.15G
        length += 1;
3656
3657
        /*
3658
         * TODO: Check growth threshold
3659
         */
3660
1.15G
        if (ctxt->input->end - CUR_PTR < 10)
3661
142k
            GROW;
3662
1.15G
    }
3663
3664
62.0k
    start = CUR_PTR - length;
3665
3666
62.0k
    if (orig != NULL) {
3667
62.0k
        *orig = xmlStrndup(start, length);
3668
62.0k
        if (*orig == NULL)
3669
104
            xmlErrMemory(ctxt);
3670
62.0k
    }
3671
3672
62.0k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3673
3674
62.0k
    NEXTL(1);
3675
3676
62.0k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3677
3678
1.75k
error:
3679
1.75k
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3680
1.75k
    return(NULL);
3681
63.7k
}
3682
3683
/**
3684
 * Check an entity reference in an attribute value for validity
3685
 * without expanding it.
3686
 *
3687
 * @param ctxt  parser context
3688
 * @param pent  entity
3689
 * @param depth  nesting depth
3690
 */
3691
static void
3692
8.43k
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3693
8.43k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3694
8.43k
    const xmlChar *str;
3695
8.43k
    unsigned long expandedSize = pent->length;
3696
8.43k
    int c, flags;
3697
3698
8.43k
    depth += 1;
3699
8.43k
    if (depth > maxDepth) {
3700
6
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3701
6
                       "Maximum entity nesting depth exceeded");
3702
6
  return;
3703
6
    }
3704
3705
8.42k
    if (pent->flags & XML_ENT_EXPANDING) {
3706
23
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3707
23
        return;
3708
23
    }
3709
3710
    /*
3711
     * If we're parsing a default attribute value in DTD content,
3712
     * the entity might reference other entities which weren't
3713
     * defined yet, so the check isn't reliable.
3714
     */
3715
8.40k
    if (ctxt->inSubset == 0)
3716
8.22k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3717
180
    else
3718
180
        flags = XML_ENT_VALIDATED;
3719
3720
8.40k
    str = pent->content;
3721
8.40k
    if (str == NULL)
3722
83
        goto done;
3723
3724
    /*
3725
     * Note that entity values are already validated. We only check
3726
     * for illegal less-than signs and compute the expanded size
3727
     * of the entity. No special handling for multi-byte characters
3728
     * is needed.
3729
     */
3730
91.1M
    while (!PARSER_STOPPED(ctxt)) {
3731
91.1M
        c = *str;
3732
3733
91.1M
  if (c != '&') {
3734
91.1M
            if (c == 0)
3735
8.03k
                break;
3736
3737
91.1M
            if (c == '<')
3738
4.94k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3739
4.94k
                        "'<' in entity '%s' is not allowed in attributes "
3740
4.94k
                        "values\n", pent->name);
3741
3742
91.1M
            str += 1;
3743
91.1M
        } else if (str[1] == '#') {
3744
1.10k
            int val;
3745
3746
1.10k
      val = xmlParseStringCharRef(ctxt, &str);
3747
1.10k
      if (val == 0) {
3748
21
                pent->content[0] = 0;
3749
21
                break;
3750
21
            }
3751
15.9k
  } else {
3752
15.9k
            xmlChar *name;
3753
15.9k
            xmlEntityPtr ent;
3754
3755
15.9k
      name = xmlParseStringEntityRef(ctxt, &str);
3756
15.9k
      if (name == NULL) {
3757
38
                pent->content[0] = 0;
3758
38
                break;
3759
38
            }
3760
3761
15.9k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3762
15.9k
            xmlFree(name);
3763
3764
15.9k
            if ((ent != NULL) &&
3765
14.7k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
3766
14.1k
                if ((ent->flags & flags) != flags) {
3767
6.50k
                    pent->flags |= XML_ENT_EXPANDING;
3768
6.50k
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
3769
6.50k
                    pent->flags &= ~XML_ENT_EXPANDING;
3770
6.50k
                }
3771
3772
14.1k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
3773
14.1k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
3774
14.1k
            }
3775
15.9k
        }
3776
91.1M
    }
3777
3778
8.40k
done:
3779
8.40k
    if (ctxt->inSubset == 0)
3780
8.22k
        pent->expandedSize = expandedSize;
3781
3782
8.40k
    pent->flags |= flags;
3783
8.40k
}
3784
3785
/**
3786
 * Expand general entity references in an entity or attribute value.
3787
 * Perform attribute value normalization.
3788
 *
3789
 * @param ctxt  parser context
3790
 * @param buf  string buffer
3791
 * @param str  entity or attribute value
3792
 * @param pent  entity for entity value, NULL for attribute values
3793
 * @param normalize  whether to collapse whitespace
3794
 * @param inSpace  whitespace state
3795
 * @param depth  nesting depth
3796
 * @param check  whether to check for amplification
3797
 * @returns  whether there was a normalization change
3798
 */
3799
static int
3800
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3801
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
3802
1.07M
                          int *inSpace, int depth, int check) {
3803
1.07M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3804
1.07M
    int c, chunkSize;
3805
1.07M
    int normChange = 0;
3806
3807
1.07M
    if (str == NULL)
3808
204
        return(0);
3809
3810
1.07M
    depth += 1;
3811
1.07M
    if (depth > maxDepth) {
3812
6
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3813
6
                       "Maximum entity nesting depth exceeded");
3814
6
  return(0);
3815
6
    }
3816
3817
1.07M
    if (pent != NULL) {
3818
1.04M
        if (pent->flags & XML_ENT_EXPANDING) {
3819
9
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3820
9
            return(0);
3821
9
        }
3822
3823
1.04M
        if (check) {
3824
1.03M
            if (xmlParserEntityCheck(ctxt, pent->length))
3825
547
                return(0);
3826
1.03M
        }
3827
1.04M
    }
3828
3829
1.06M
    chunkSize = 0;
3830
3831
    /*
3832
     * Note that entity values are already validated. No special
3833
     * handling for multi-byte characters is needed.
3834
     */
3835
6.47G
    while (!PARSER_STOPPED(ctxt)) {
3836
6.47G
        c = *str;
3837
3838
6.47G
  if (c != '&') {
3839
6.47G
            if (c == 0)
3840
1.00M
                break;
3841
3842
            /*
3843
             * If this function is called without an entity, it is used to
3844
             * expand entities in an attribute content where less-than was
3845
             * already unscaped and is allowed.
3846
             */
3847
6.47G
            if ((pent != NULL) && (c == '<')) {
3848
64.8k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3849
64.8k
                        "'<' in entity '%s' is not allowed in attributes "
3850
64.8k
                        "values\n", pent->name);
3851
64.8k
                break;
3852
64.8k
            }
3853
3854
6.47G
            if (c <= 0x20) {
3855
81.1M
                if ((normalize) && (*inSpace)) {
3856
                    /* Skip char */
3857
483k
                    if (chunkSize > 0) {
3858
89.3k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3859
89.3k
                        chunkSize = 0;
3860
89.3k
                    }
3861
483k
                    normChange = 1;
3862
80.7M
                } else if (c < 0x20) {
3863
59.5M
                    if (chunkSize > 0) {
3864
249k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3865
249k
                        chunkSize = 0;
3866
249k
                    }
3867
3868
59.5M
                    xmlSBufAddCString(buf, " ", 1);
3869
59.5M
                } else {
3870
21.1M
                    chunkSize += 1;
3871
21.1M
                }
3872
3873
81.1M
                *inSpace = 1;
3874
6.39G
            } else {
3875
6.39G
                chunkSize += 1;
3876
6.39G
                *inSpace = 0;
3877
6.39G
            }
3878
3879
6.47G
            str += 1;
3880
6.47G
        } else if (str[1] == '#') {
3881
394k
            int val;
3882
3883
394k
            if (chunkSize > 0) {
3884
393k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3885
393k
                chunkSize = 0;
3886
393k
            }
3887
3888
394k
      val = xmlParseStringCharRef(ctxt, &str);
3889
394k
      if (val == 0) {
3890
27
                if (pent != NULL)
3891
27
                    pent->content[0] = 0;
3892
27
                break;
3893
27
            }
3894
3895
394k
            if (val == ' ') {
3896
8.04k
                if ((normalize) && (*inSpace))
3897
237
                    normChange = 1;
3898
7.80k
                else
3899
7.80k
                    xmlSBufAddCString(buf, " ", 1);
3900
8.04k
                *inSpace = 1;
3901
386k
            } else {
3902
386k
                xmlSBufAddChar(buf, val);
3903
386k
                *inSpace = 0;
3904
386k
            }
3905
1.63M
  } else {
3906
1.63M
            xmlChar *name;
3907
1.63M
            xmlEntityPtr ent;
3908
3909
1.63M
            if (chunkSize > 0) {
3910
864k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3911
864k
                chunkSize = 0;
3912
864k
            }
3913
3914
1.63M
      name = xmlParseStringEntityRef(ctxt, &str);
3915
1.63M
            if (name == NULL) {
3916
88
                if (pent != NULL)
3917
79
                    pent->content[0] = 0;
3918
88
                break;
3919
88
            }
3920
3921
1.63M
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3922
1.63M
            xmlFree(name);
3923
3924
1.63M
      if ((ent != NULL) &&
3925
1.44M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3926
558k
    if (ent->content == NULL) {
3927
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
3928
0
          "predefined entity has no content\n");
3929
0
                    break;
3930
0
                }
3931
3932
558k
                xmlSBufAddString(buf, ent->content, ent->length);
3933
3934
558k
                *inSpace = 0;
3935
1.07M
      } else if ((ent != NULL) && (ent->content != NULL)) {
3936
883k
                if (pent != NULL)
3937
878k
                    pent->flags |= XML_ENT_EXPANDING;
3938
883k
    normChange |= xmlExpandEntityInAttValue(ctxt, buf,
3939
883k
                        ent->content, ent, normalize, inSpace, depth, check);
3940
883k
                if (pent != NULL)
3941
878k
                    pent->flags &= ~XML_ENT_EXPANDING;
3942
883k
      }
3943
1.63M
        }
3944
6.47G
    }
3945
3946
1.06M
    if (chunkSize > 0)
3947
553k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3948
3949
1.06M
    return(normChange);
3950
1.07M
}
3951
3952
/**
3953
 * Expand general entity references in an entity or attribute value.
3954
 * Perform attribute value normalization.
3955
 *
3956
 * @param ctxt  parser context
3957
 * @param str  entity or attribute value
3958
 * @param normalize  whether to collapse whitespace
3959
 * @returns the expanded attribtue value.
3960
 */
3961
xmlChar *
3962
xmlExpandEntitiesInAttValue(xmlParserCtxt *ctxt, const xmlChar *str,
3963
25.2k
                            int normalize) {
3964
25.2k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3965
11.8k
                         XML_MAX_HUGE_LENGTH :
3966
25.2k
                         XML_MAX_TEXT_LENGTH;
3967
25.2k
    xmlSBuf buf;
3968
25.2k
    int inSpace = 1;
3969
3970
25.2k
    xmlSBufInit(&buf, maxLength);
3971
3972
25.2k
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
3973
25.2k
                              ctxt->inputNr, /* check */ 0);
3974
3975
25.2k
    if ((normalize) && (inSpace) && (buf.size > 0))
3976
0
        buf.size--;
3977
3978
25.2k
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
3979
25.2k
}
3980
3981
/**
3982
 * Parse a value for an attribute.
3983
 *
3984
 * NOTE: if no normalization is needed, the routine will return pointers
3985
 * directly from the data buffer.
3986
 *
3987
 * 3.3.3 Attribute-Value Normalization:
3988
 *
3989
 * Before the value of an attribute is passed to the application or
3990
 * checked for validity, the XML processor must normalize it as follows:
3991
 *
3992
 * - a character reference is processed by appending the referenced
3993
 *   character to the attribute value
3994
 * - an entity reference is processed by recursively processing the
3995
 *   replacement text of the entity
3996
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
3997
 *   appending \#x20 to the normalized value, except that only a single
3998
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
3999
 *   parsed entity or the literal entity value of an internal parsed entity
4000
 * - other characters are processed by appending them to the normalized value
4001
 *
4002
 * If the declared value is not CDATA, then the XML processor must further
4003
 * process the normalized attribute value by discarding any leading and
4004
 * trailing space (\#x20) characters, and by replacing sequences of space
4005
 * (\#x20) characters by a single space (\#x20) character.
4006
 * All attributes for which no declaration has been read should be treated
4007
 * by a non-validating parser as if declared CDATA.
4008
 *
4009
 * @param ctxt  an XML parser context
4010
 * @param attlen  attribute len result
4011
 * @param outFlags  resulting XML_ATTVAL_* flags
4012
 * @param special  value from attsSpecial
4013
 * @param isNamespace  whether this is a namespace declaration
4014
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4015
 *     caller if it was copied, this can be detected by val[*len] == 0.
4016
 */
4017
static xmlChar *
4018
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *outFlags,
4019
411k
                         int special, int isNamespace) {
4020
411k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4021
204k
                         XML_MAX_HUGE_LENGTH :
4022
411k
                         XML_MAX_TEXT_LENGTH;
4023
411k
    xmlSBuf buf;
4024
411k
    xmlChar *ret;
4025
411k
    int c, l, quote, entFlags, chunkSize;
4026
411k
    int inSpace = 1;
4027
411k
    int replaceEntities;
4028
411k
    int normalize = (special & XML_SPECIAL_TYPE_MASK) > XML_ATTRIBUTE_CDATA;
4029
411k
    int attvalFlags = 0;
4030
4031
    /* Always expand namespace URIs */
4032
411k
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4033
4034
411k
    xmlSBufInit(&buf, maxLength);
4035
4036
411k
    GROW;
4037
4038
411k
    quote = CUR;
4039
411k
    if ((quote != '"') && (quote != '\'')) {
4040
17.7k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4041
17.7k
  return(NULL);
4042
17.7k
    }
4043
394k
    NEXTL(1);
4044
4045
394k
    if (ctxt->inSubset == 0)
4046
333k
        entFlags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4047
60.7k
    else
4048
60.7k
        entFlags = XML_ENT_VALIDATED;
4049
4050
394k
    inSpace = 1;
4051
394k
    chunkSize = 0;
4052
4053
392M
    while (1) {
4054
392M
        if (PARSER_STOPPED(ctxt))
4055
958
            goto error;
4056
4057
392M
        if (CUR_PTR >= ctxt->input->end) {
4058
8.30k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4059
8.30k
                           "AttValue: ' expected\n");
4060
8.30k
            goto error;
4061
8.30k
        }
4062
4063
        /*
4064
         * TODO: Check growth threshold
4065
         */
4066
392M
        if (ctxt->input->end - CUR_PTR < 10)
4067
193k
            GROW;
4068
4069
392M
        c = CUR;
4070
4071
392M
        if (c >= 0x80) {
4072
300M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4073
300M
                    "invalid character in attribute value\n");
4074
300M
            if (l == 0) {
4075
107M
                if (chunkSize > 0) {
4076
128k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4077
128k
                    chunkSize = 0;
4078
128k
                }
4079
107M
                xmlSBufAddReplChar(&buf);
4080
107M
                NEXTL(1);
4081
193M
            } else {
4082
193M
                chunkSize += l;
4083
193M
                NEXTL(l);
4084
193M
            }
4085
4086
300M
            inSpace = 0;
4087
300M
        } else if (c != '&') {
4088
90.5M
            if (c > 0x20) {
4089
6.06M
                if (c == quote)
4090
382k
                    break;
4091
4092
5.68M
                if (c == '<')
4093
192k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4094
4095
5.68M
                chunkSize += 1;
4096
5.68M
                inSpace = 0;
4097
84.5M
            } else if (!IS_BYTE_CHAR(c)) {
4098
63.8M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4099
63.8M
                        "invalid character in attribute value\n");
4100
63.8M
                if (chunkSize > 0) {
4101
85.2k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4102
85.2k
                    chunkSize = 0;
4103
85.2k
                }
4104
63.8M
                xmlSBufAddReplChar(&buf);
4105
63.8M
                inSpace = 0;
4106
63.8M
            } else {
4107
                /* Whitespace */
4108
20.7M
                if ((normalize) && (inSpace)) {
4109
                    /* Skip char */
4110
1.47M
                    if (chunkSize > 0) {
4111
27.0k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4112
27.0k
                        chunkSize = 0;
4113
27.0k
                    }
4114
1.47M
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4115
19.2M
                } else if (c < 0x20) {
4116
                    /* Convert to space */
4117
18.8M
                    if (chunkSize > 0) {
4118
75.5k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4119
75.5k
                        chunkSize = 0;
4120
75.5k
                    }
4121
4122
18.8M
                    xmlSBufAddCString(&buf, " ", 1);
4123
18.8M
                } else {
4124
367k
                    chunkSize += 1;
4125
367k
                }
4126
4127
20.7M
                inSpace = 1;
4128
4129
20.7M
                if ((c == 0xD) && (NXT(1) == 0xA))
4130
3.40k
                    CUR_PTR++;
4131
20.7M
            }
4132
4133
90.1M
            NEXTL(1);
4134
90.1M
        } else if (NXT(1) == '#') {
4135
59.8k
            int val;
4136
4137
59.8k
            if (chunkSize > 0) {
4138
33.6k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4139
33.6k
                chunkSize = 0;
4140
33.6k
            }
4141
4142
59.8k
            val = xmlParseCharRef(ctxt);
4143
59.8k
            if (val == 0)
4144
2.45k
                goto error;
4145
4146
57.3k
            if ((val == '&') && (!replaceEntities)) {
4147
                /*
4148
                 * The reparsing will be done in xmlNodeParseContent()
4149
                 * called from SAX2.c
4150
                 */
4151
3.62k
                xmlSBufAddCString(&buf, "&#38;", 5);
4152
3.62k
                inSpace = 0;
4153
53.7k
            } else if (val == ' ') {
4154
11.8k
                if ((normalize) && (inSpace))
4155
1.30k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4156
10.5k
                else
4157
10.5k
                    xmlSBufAddCString(&buf, " ", 1);
4158
11.8k
                inSpace = 1;
4159
41.8k
            } else {
4160
41.8k
                xmlSBufAddChar(&buf, val);
4161
41.8k
                inSpace = 0;
4162
41.8k
            }
4163
809k
        } else {
4164
809k
            const xmlChar *name;
4165
809k
            xmlEntityPtr ent;
4166
4167
809k
            if (chunkSize > 0) {
4168
85.2k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4169
85.2k
                chunkSize = 0;
4170
85.2k
            }
4171
4172
809k
            name = xmlParseEntityRefInternal(ctxt);
4173
809k
            if (name == NULL) {
4174
                /*
4175
                 * Probably a literal '&' which wasn't escaped.
4176
                 * TODO: Handle gracefully in recovery mode.
4177
                 */
4178
63.9k
                continue;
4179
63.9k
            }
4180
4181
745k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4182
745k
            if (ent == NULL)
4183
31.9k
                continue;
4184
4185
713k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4186
17.6k
                if ((ent->content[0] == '&') && (!replaceEntities))
4187
2.81k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4188
14.8k
                else
4189
14.8k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4190
17.6k
                inSpace = 0;
4191
696k
            } else if (replaceEntities) {
4192
162k
                if (xmlExpandEntityInAttValue(ctxt, &buf,
4193
162k
                        ent->content, ent, normalize, &inSpace, ctxt->inputNr,
4194
162k
                        /* check */ 1) > 0)
4195
37.0k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4196
534k
            } else {
4197
534k
                if ((ent->flags & entFlags) != entFlags)
4198
1.92k
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4199
4200
534k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4201
265
                    ent->content[0] = 0;
4202
265
                    goto error;
4203
265
                }
4204
4205
                /*
4206
                 * Just output the reference
4207
                 */
4208
533k
                xmlSBufAddCString(&buf, "&", 1);
4209
533k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4210
533k
                xmlSBufAddCString(&buf, ";", 1);
4211
4212
533k
                inSpace = 0;
4213
533k
            }
4214
713k
  }
4215
392M
    }
4216
4217
382k
    if ((buf.mem == NULL) && (outFlags != NULL)) {
4218
227k
        ret = (xmlChar *) CUR_PTR - chunkSize;
4219
4220
227k
        if (attlen != NULL)
4221
227k
            *attlen = chunkSize;
4222
227k
        if ((normalize) && (inSpace) && (chunkSize > 0)) {
4223
1.24k
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4224
1.24k
            *attlen -= 1;
4225
1.24k
        }
4226
4227
        /* Report potential error */
4228
227k
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4229
227k
    } else {
4230
154k
        if (chunkSize > 0)
4231
113k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4232
4233
154k
        if ((normalize) && (inSpace) && (buf.size > 0)) {
4234
3.28k
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4235
3.28k
            buf.size--;
4236
3.28k
        }
4237
4238
154k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4239
154k
        attvalFlags |= XML_ATTVAL_ALLOC;
4240
4241
154k
        if (ret != NULL) {
4242
153k
            if (attlen != NULL)
4243
49.8k
                *attlen = buf.size;
4244
153k
        }
4245
154k
    }
4246
4247
382k
    if (outFlags != NULL)
4248
277k
        *outFlags = attvalFlags;
4249
4250
382k
    NEXTL(1);
4251
4252
382k
    return(ret);
4253
4254
11.9k
error:
4255
11.9k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4256
11.9k
    return(NULL);
4257
394k
}
4258
4259
/**
4260
 * Parse a value for an attribute
4261
 * Note: the parser won't do substitution of entities here, this
4262
 * will be handled later in #xmlStringGetNodeList
4263
 *
4264
 * @deprecated Internal function, don't use.
4265
 *
4266
 *     [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4267
 *                       "'" ([^<&'] | Reference)* "'"
4268
 *
4269
 * 3.3.3 Attribute-Value Normalization:
4270
 *
4271
 * Before the value of an attribute is passed to the application or
4272
 * checked for validity, the XML processor must normalize it as follows:
4273
 *
4274
 * - a character reference is processed by appending the referenced
4275
 *   character to the attribute value
4276
 * - an entity reference is processed by recursively processing the
4277
 *   replacement text of the entity
4278
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4279
 *   appending \#x20 to the normalized value, except that only a single
4280
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4281
 *   parsed entity or the literal entity value of an internal parsed entity
4282
 * - other characters are processed by appending them to the normalized value
4283
 *
4284
 * If the declared value is not CDATA, then the XML processor must further
4285
 * process the normalized attribute value by discarding any leading and
4286
 * trailing space (\#x20) characters, and by replacing sequences of space
4287
 * (\#x20) characters by a single space (\#x20) character.
4288
 * All attributes for which no declaration has been read should be treated
4289
 * by a non-validating parser as if declared CDATA.
4290
 *
4291
 * @param ctxt  an XML parser context
4292
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4293
 * caller.
4294
 */
4295
xmlChar *
4296
118k
xmlParseAttValue(xmlParserCtxt *ctxt) {
4297
118k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4298
118k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4299
118k
}
4300
4301
/**
4302
 * Parse an XML Literal
4303
 *
4304
 * @deprecated Internal function, don't use.
4305
 *
4306
 *     [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4307
 *
4308
 * @param ctxt  an XML parser context
4309
 * @returns the SystemLiteral parsed or NULL
4310
 */
4311
4312
xmlChar *
4313
30.2k
xmlParseSystemLiteral(xmlParserCtxt *ctxt) {
4314
30.2k
    xmlChar *buf = NULL;
4315
30.2k
    int len = 0;
4316
30.2k
    int size = XML_PARSER_BUFFER_SIZE;
4317
30.2k
    int cur, l;
4318
30.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4319
11.0k
                    XML_MAX_TEXT_LENGTH :
4320
30.2k
                    XML_MAX_NAME_LENGTH;
4321
30.2k
    xmlChar stop;
4322
4323
30.2k
    if (RAW == '"') {
4324
19.1k
        NEXT;
4325
19.1k
  stop = '"';
4326
19.1k
    } else if (RAW == '\'') {
4327
10.1k
        NEXT;
4328
10.1k
  stop = '\'';
4329
10.1k
    } else {
4330
900
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4331
900
  return(NULL);
4332
900
    }
4333
4334
29.3k
    buf = xmlMalloc(size);
4335
29.3k
    if (buf == NULL) {
4336
41
        xmlErrMemory(ctxt);
4337
41
  return(NULL);
4338
41
    }
4339
29.2k
    cur = xmlCurrentCharRecover(ctxt, &l);
4340
27.1M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4341
27.1M
  if (len + 5 >= size) {
4342
9.73k
      xmlChar *tmp;
4343
9.73k
            int newSize;
4344
4345
9.73k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4346
9.73k
            if (newSize < 0) {
4347
6
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4348
6
                xmlFree(buf);
4349
6
                return(NULL);
4350
6
            }
4351
9.72k
      tmp = xmlRealloc(buf, newSize);
4352
9.72k
      if (tmp == NULL) {
4353
8
          xmlFree(buf);
4354
8
    xmlErrMemory(ctxt);
4355
8
    return(NULL);
4356
8
      }
4357
9.71k
      buf = tmp;
4358
9.71k
            size = newSize;
4359
9.71k
  }
4360
27.1M
  COPY_BUF(buf, len, cur);
4361
27.1M
  NEXTL(l);
4362
27.1M
  cur = xmlCurrentCharRecover(ctxt, &l);
4363
27.1M
    }
4364
29.2k
    buf[len] = 0;
4365
29.2k
    if (!IS_CHAR(cur)) {
4366
2.97k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4367
26.2k
    } else {
4368
26.2k
  NEXT;
4369
26.2k
    }
4370
29.2k
    return(buf);
4371
29.2k
}
4372
4373
/**
4374
 * Parse an XML public literal
4375
 *
4376
 * @deprecated Internal function, don't use.
4377
 *
4378
 *     [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4379
 *
4380
 * @param ctxt  an XML parser context
4381
 * @returns the PubidLiteral parsed or NULL.
4382
 */
4383
4384
xmlChar *
4385
11.3k
xmlParsePubidLiteral(xmlParserCtxt *ctxt) {
4386
11.3k
    xmlChar *buf = NULL;
4387
11.3k
    int len = 0;
4388
11.3k
    int size = XML_PARSER_BUFFER_SIZE;
4389
11.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4390
3.72k
                    XML_MAX_TEXT_LENGTH :
4391
11.3k
                    XML_MAX_NAME_LENGTH;
4392
11.3k
    xmlChar cur;
4393
11.3k
    xmlChar stop;
4394
4395
11.3k
    if (RAW == '"') {
4396
710
        NEXT;
4397
710
  stop = '"';
4398
10.6k
    } else if (RAW == '\'') {
4399
9.88k
        NEXT;
4400
9.88k
  stop = '\'';
4401
9.88k
    } else {
4402
727
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4403
727
  return(NULL);
4404
727
    }
4405
10.5k
    buf = xmlMalloc(size);
4406
10.5k
    if (buf == NULL) {
4407
26
  xmlErrMemory(ctxt);
4408
26
  return(NULL);
4409
26
    }
4410
10.5k
    cur = CUR;
4411
1.07M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4412
1.06M
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4413
1.06M
  if (len + 1 >= size) {
4414
655
      xmlChar *tmp;
4415
655
            int newSize;
4416
4417
655
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4418
655
            if (newSize < 0) {
4419
6
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4420
6
                xmlFree(buf);
4421
6
                return(NULL);
4422
6
            }
4423
649
      tmp = xmlRealloc(buf, newSize);
4424
649
      if (tmp == NULL) {
4425
6
    xmlErrMemory(ctxt);
4426
6
    xmlFree(buf);
4427
6
    return(NULL);
4428
6
      }
4429
643
      buf = tmp;
4430
643
            size = newSize;
4431
643
  }
4432
1.06M
  buf[len++] = cur;
4433
1.06M
  NEXT;
4434
1.06M
  cur = CUR;
4435
1.06M
    }
4436
10.5k
    buf[len] = 0;
4437
10.5k
    if (cur != stop) {
4438
3.49k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4439
7.06k
    } else {
4440
7.06k
  NEXTL(1);
4441
7.06k
    }
4442
10.5k
    return(buf);
4443
10.5k
}
4444
4445
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4446
4447
/*
4448
 * used for the test in the inner loop of the char data testing
4449
 */
4450
static const unsigned char test_char_data[256] = {
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4456
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4457
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4458
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4459
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4460
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4461
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4462
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4463
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4464
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4465
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4466
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4467
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4468
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4469
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4470
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4471
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4472
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4474
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4475
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4476
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4477
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4478
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4479
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4482
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4483
};
4484
4485
static void
4486
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4487
2.53M
              int isBlank) {
4488
2.53M
    int checkBlanks;
4489
4490
2.53M
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4491
252k
        return;
4492
4493
2.28M
    checkBlanks = (!ctxt->keepBlanks) ||
4494
1.47M
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4495
4496
    /*
4497
     * Calling areBlanks with only parts of a text node
4498
     * is fundamentally broken, making the NOBLANKS option
4499
     * essentially unusable.
4500
     */
4501
2.28M
    if ((checkBlanks) &&
4502
813k
        (areBlanks(ctxt, buf, size, isBlank))) {
4503
8.08k
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4504
8.08k
            (ctxt->keepBlanks))
4505
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4506
2.27M
    } else {
4507
2.27M
        if (ctxt->sax->characters != NULL)
4508
2.27M
            ctxt->sax->characters(ctxt->userData, buf, size);
4509
4510
        /*
4511
         * The old code used to update this value for "complex" data
4512
         * even if checkBlanks was false. This was probably a bug.
4513
         */
4514
2.27M
        if ((checkBlanks) && (*ctxt->space == -1))
4515
69.4k
            *ctxt->space = -2;
4516
2.27M
    }
4517
2.28M
}
4518
4519
/**
4520
 * Parse character data. Always makes progress if the first char isn't
4521
 * '<' or '&'.
4522
 *
4523
 * The right angle bracket (>) may be represented using the string "&gt;",
4524
 * and must, for compatibility, be escaped using "&gt;" or a character
4525
 * reference when it appears in the string "]]>" in content, when that
4526
 * string is not marking the end of a CDATA section.
4527
 *
4528
 *     [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4529
 * @param ctxt  an XML parser context
4530
 * @param partial  buffer may contain partial UTF-8 sequences
4531
 */
4532
static void
4533
10.4M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4534
10.4M
    const xmlChar *in;
4535
10.4M
    int line = ctxt->input->line;
4536
10.4M
    int col = ctxt->input->col;
4537
10.4M
    int ccol;
4538
10.4M
    int terminate = 0;
4539
4540
10.4M
    GROW;
4541
    /*
4542
     * Accelerated common case where input don't need to be
4543
     * modified before passing it to the handler.
4544
     */
4545
10.4M
    in = ctxt->input->cur;
4546
10.4M
    do {
4547
10.5M
get_more_space:
4548
10.5M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4549
10.5M
        if (*in == 0xA) {
4550
11.9M
            do {
4551
11.9M
                ctxt->input->line++; ctxt->input->col = 1;
4552
11.9M
                in++;
4553
11.9M
            } while (*in == 0xA);
4554
76.2k
            goto get_more_space;
4555
76.2k
        }
4556
10.4M
        if (*in == '<') {
4557
77.4k
            while (in > ctxt->input->cur) {
4558
38.7k
                const xmlChar *tmp = ctxt->input->cur;
4559
38.7k
                size_t nbchar = in - tmp;
4560
4561
38.7k
                if (nbchar > XML_MAX_ITEMS)
4562
0
                    nbchar = XML_MAX_ITEMS;
4563
38.7k
                ctxt->input->cur += nbchar;
4564
4565
38.7k
                xmlCharacters(ctxt, tmp, nbchar, 1);
4566
38.7k
            }
4567
38.7k
            return;
4568
38.7k
        }
4569
4570
11.0M
get_more:
4571
11.0M
        ccol = ctxt->input->col;
4572
34.4M
        while (test_char_data[*in]) {
4573
23.4M
            in++;
4574
23.4M
            ccol++;
4575
23.4M
        }
4576
11.0M
        ctxt->input->col = ccol;
4577
11.0M
        if (*in == 0xA) {
4578
5.64M
            do {
4579
5.64M
                ctxt->input->line++; ctxt->input->col = 1;
4580
5.64M
                in++;
4581
5.64M
            } while (*in == 0xA);
4582
82.1k
            goto get_more;
4583
82.1k
        }
4584
10.9M
        if (*in == ']') {
4585
523k
            size_t avail = ctxt->input->end - in;
4586
4587
523k
            if (partial && avail < 2) {
4588
165
                terminate = 1;
4589
165
                goto invoke_callback;
4590
165
            }
4591
523k
            if (in[1] == ']') {
4592
434k
                if (partial && avail < 3) {
4593
422
                    terminate = 1;
4594
422
                    goto invoke_callback;
4595
422
                }
4596
433k
                if (in[2] == '>')
4597
62.5k
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4598
433k
            }
4599
4600
523k
            in++;
4601
523k
            ctxt->input->col++;
4602
523k
            goto get_more;
4603
523k
        }
4604
4605
10.3M
invoke_callback:
4606
11.1M
        while (in > ctxt->input->cur) {
4607
733k
            const xmlChar *tmp = ctxt->input->cur;
4608
733k
            size_t nbchar = in - tmp;
4609
4610
733k
            if (nbchar > XML_MAX_ITEMS)
4611
0
                nbchar = XML_MAX_ITEMS;
4612
733k
            ctxt->input->cur += nbchar;
4613
4614
733k
            xmlCharacters(ctxt, tmp, nbchar, 0);
4615
4616
733k
            line = ctxt->input->line;
4617
733k
            col = ctxt->input->col;
4618
733k
        }
4619
10.3M
        ctxt->input->cur = in;
4620
10.3M
        if (*in == 0xD) {
4621
7.78k
            in++;
4622
7.78k
            if (*in == 0xA) {
4623
2.40k
                ctxt->input->cur = in;
4624
2.40k
                in++;
4625
2.40k
                ctxt->input->line++; ctxt->input->col = 1;
4626
2.40k
                continue; /* while */
4627
2.40k
            }
4628
5.38k
            in--;
4629
5.38k
        }
4630
10.3M
        if (*in == '<') {
4631
231k
            return;
4632
231k
        }
4633
10.1M
        if (*in == '&') {
4634
243k
            return;
4635
243k
        }
4636
9.91M
        if (terminate) {
4637
587
            return;
4638
587
        }
4639
9.91M
        SHRINK;
4640
9.91M
        GROW;
4641
9.91M
        in = ctxt->input->cur;
4642
9.92M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4643
9.91M
             (*in == 0x09) || (*in == 0x0a));
4644
9.91M
    ctxt->input->line = line;
4645
9.91M
    ctxt->input->col = col;
4646
9.91M
    xmlParseCharDataComplex(ctxt, partial);
4647
9.91M
}
4648
4649
/**
4650
 * Always makes progress if the first char isn't '<' or '&'.
4651
 *
4652
 * parse a CharData section.this is the fallback function
4653
 * of #xmlParseCharData when the parsing requires handling
4654
 * of non-ASCII characters.
4655
 *
4656
 * @param ctxt  an XML parser context
4657
 * @param partial  whether the input can end with truncated UTF-8
4658
 */
4659
static void
4660
9.91M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4661
9.91M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4662
9.91M
    int nbchar = 0;
4663
9.91M
    int cur, l;
4664
4665
9.91M
    cur = xmlCurrentCharRecover(ctxt, &l);
4666
160M
    while ((cur != '<') && /* checked */
4667
160M
           (cur != '&') &&
4668
160M
     (IS_CHAR(cur))) {
4669
151M
        if (cur == ']') {
4670
912k
            size_t avail = ctxt->input->end - ctxt->input->cur;
4671
4672
912k
            if (partial && avail < 2)
4673
210
                break;
4674
911k
            if (NXT(1) == ']') {
4675
571k
                if (partial && avail < 3)
4676
414
                    break;
4677
571k
                if (NXT(2) == '>')
4678
278k
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4679
571k
            }
4680
911k
        }
4681
4682
151M
  COPY_BUF(buf, nbchar, cur);
4683
  /* move current position before possible calling of ctxt->sax->characters */
4684
151M
  NEXTL(l);
4685
151M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4686
1.27M
      buf[nbchar] = 0;
4687
4688
1.27M
            xmlCharacters(ctxt, buf, nbchar, 0);
4689
1.27M
      nbchar = 0;
4690
1.27M
            SHRINK;
4691
1.27M
  }
4692
151M
  cur = xmlCurrentCharRecover(ctxt, &l);
4693
151M
    }
4694
9.91M
    if (nbchar != 0) {
4695
489k
        buf[nbchar] = 0;
4696
4697
489k
        xmlCharacters(ctxt, buf, nbchar, 0);
4698
489k
    }
4699
    /*
4700
     * cur == 0 can mean
4701
     *
4702
     * - End of buffer.
4703
     * - An actual 0 character.
4704
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4705
     */
4706
9.91M
    if (ctxt->input->cur < ctxt->input->end) {
4707
9.89M
        if ((cur == 0) && (CUR != 0)) {
4708
1.71k
            if (partial == 0) {
4709
1.40k
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4710
1.40k
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4711
1.40k
                NEXTL(1);
4712
1.40k
            }
4713
9.89M
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4714
            /* Generate the error and skip the offending character */
4715
9.66M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4716
9.66M
                              "PCDATA invalid Char value %d\n", cur);
4717
9.66M
            NEXTL(l);
4718
9.66M
        }
4719
9.89M
    }
4720
9.91M
}
4721
4722
/**
4723
 * @deprecated Internal function, don't use.
4724
 * @param ctxt  an XML parser context
4725
 * @param cdata  unused
4726
 */
4727
void
4728
0
xmlParseCharData(xmlParserCtxt *ctxt, ATTRIBUTE_UNUSED int cdata) {
4729
0
    xmlParseCharDataInternal(ctxt, 0);
4730
0
}
4731
4732
/**
4733
 * Parse an External ID or a Public ID
4734
 *
4735
 * @deprecated Internal function, don't use.
4736
 *
4737
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4738
 * `'PUBLIC' S PubidLiteral S SystemLiteral`
4739
 *
4740
 *     [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4741
 *                       | 'PUBLIC' S PubidLiteral S SystemLiteral
4742
 *
4743
 *     [83] PublicID ::= 'PUBLIC' S PubidLiteral
4744
 *
4745
 * @param ctxt  an XML parser context
4746
 * @param publicId  a xmlChar** receiving PubidLiteral
4747
 * @param strict  indicate whether we should restrict parsing to only
4748
 *          production [75], see NOTE below
4749
 * @returns the function returns SystemLiteral and in the second
4750
 *                case publicID receives PubidLiteral, is strict is off
4751
 *                it is possible to return NULL and have publicID set.
4752
 */
4753
4754
xmlChar *
4755
76.7k
xmlParseExternalID(xmlParserCtxt *ctxt, xmlChar **publicId, int strict) {
4756
76.7k
    xmlChar *URI = NULL;
4757
4758
76.7k
    *publicId = NULL;
4759
76.7k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4760
23.5k
        SKIP(6);
4761
23.5k
  if (SKIP_BLANKS == 0) {
4762
2.29k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4763
2.29k
                     "Space required after 'SYSTEM'\n");
4764
2.29k
  }
4765
23.5k
  URI = xmlParseSystemLiteral(ctxt);
4766
23.5k
  if (URI == NULL) {
4767
370
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4768
370
        }
4769
53.1k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4770
11.3k
        SKIP(6);
4771
11.3k
  if (SKIP_BLANKS == 0) {
4772
1.27k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4773
1.27k
        "Space required after 'PUBLIC'\n");
4774
1.27k
  }
4775
11.3k
  *publicId = xmlParsePubidLiteral(ctxt);
4776
11.3k
  if (*publicId == NULL) {
4777
765
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4778
765
  }
4779
11.3k
  if (strict) {
4780
      /*
4781
       * We don't handle [83] so "S SystemLiteral" is required.
4782
       */
4783
6.36k
      if (SKIP_BLANKS == 0) {
4784
1.84k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4785
1.84k
      "Space required after the Public Identifier\n");
4786
1.84k
      }
4787
6.36k
  } else {
4788
      /*
4789
       * We handle [83] so we return immediately, if
4790
       * "S SystemLiteral" is not detected. We skip blanks if no
4791
             * system literal was found, but this is harmless since we must
4792
             * be at the end of a NotationDecl.
4793
       */
4794
4.95k
      if (SKIP_BLANKS == 0) return(NULL);
4795
506
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4796
506
  }
4797
6.66k
  URI = xmlParseSystemLiteral(ctxt);
4798
6.66k
  if (URI == NULL) {
4799
585
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4800
585
        }
4801
6.66k
    }
4802
72.0k
    return(URI);
4803
76.7k
}
4804
4805
/**
4806
 * Skip an XML (SGML) comment <!-- .... -->
4807
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4808
 *  must not occur within comments. "
4809
 * This is the slow routine in case the accelerator for ascii didn't work
4810
 *
4811
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4812
 * @param ctxt  an XML parser context
4813
 * @param buf  the already parsed part of the buffer
4814
 * @param len  number of bytes in the buffer
4815
 * @param size  allocated size of the buffer
4816
 */
4817
static void
4818
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4819
73.9k
                       size_t len, size_t size) {
4820
73.9k
    int q, ql;
4821
73.9k
    int r, rl;
4822
73.9k
    int cur, l;
4823
73.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4824
15.3k
                    XML_MAX_HUGE_LENGTH :
4825
73.9k
                    XML_MAX_TEXT_LENGTH;
4826
4827
73.9k
    if (buf == NULL) {
4828
12.7k
        len = 0;
4829
12.7k
  size = XML_PARSER_BUFFER_SIZE;
4830
12.7k
  buf = xmlMalloc(size);
4831
12.7k
  if (buf == NULL) {
4832
105
      xmlErrMemory(ctxt);
4833
105
      return;
4834
105
  }
4835
12.7k
    }
4836
73.8k
    q = xmlCurrentCharRecover(ctxt, &ql);
4837
73.8k
    if (q == 0)
4838
6.20k
        goto not_terminated;
4839
67.6k
    if (!IS_CHAR(q)) {
4840
861
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4841
861
                          "xmlParseComment: invalid xmlChar value %d\n",
4842
861
                    q);
4843
861
  xmlFree (buf);
4844
861
  return;
4845
861
    }
4846
66.8k
    NEXTL(ql);
4847
66.8k
    r = xmlCurrentCharRecover(ctxt, &rl);
4848
66.8k
    if (r == 0)
4849
641
        goto not_terminated;
4850
66.1k
    if (!IS_CHAR(r)) {
4851
4.56k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4852
4.56k
                          "xmlParseComment: invalid xmlChar value %d\n",
4853
4.56k
                    r);
4854
4.56k
  xmlFree (buf);
4855
4.56k
  return;
4856
4.56k
    }
4857
61.6k
    NEXTL(rl);
4858
61.6k
    cur = xmlCurrentCharRecover(ctxt, &l);
4859
61.6k
    if (cur == 0)
4860
1.38k
        goto not_terminated;
4861
11.0M
    while (IS_CHAR(cur) && /* checked */
4862
11.0M
           ((cur != '>') ||
4863
10.9M
      (r != '-') || (q != '-'))) {
4864
10.9M
  if ((r == '-') && (q == '-')) {
4865
424k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4866
424k
  }
4867
10.9M
  if (len + 5 >= size) {
4868
12.3k
      xmlChar *tmp;
4869
12.3k
            int newSize;
4870
4871
12.3k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4872
12.3k
            if (newSize < 0) {
4873
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4874
0
                             "Comment too big found", NULL);
4875
0
                xmlFree (buf);
4876
0
                return;
4877
0
            }
4878
12.3k
      tmp = xmlRealloc(buf, newSize);
4879
12.3k
      if (tmp == NULL) {
4880
14
    xmlErrMemory(ctxt);
4881
14
    xmlFree(buf);
4882
14
    return;
4883
14
      }
4884
12.3k
      buf = tmp;
4885
12.3k
            size = newSize;
4886
12.3k
  }
4887
10.9M
  COPY_BUF(buf, len, q);
4888
4889
10.9M
  q = r;
4890
10.9M
  ql = rl;
4891
10.9M
  r = cur;
4892
10.9M
  rl = l;
4893
4894
10.9M
  NEXTL(l);
4895
10.9M
  cur = xmlCurrentCharRecover(ctxt, &l);
4896
4897
10.9M
    }
4898
60.2k
    buf[len] = 0;
4899
60.2k
    if (cur == 0) {
4900
11.8k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4901
11.8k
                       "Comment not terminated \n<!--%.50s\n", buf);
4902
48.3k
    } else if (!IS_CHAR(cur)) {
4903
1.29k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4904
1.29k
                          "xmlParseComment: invalid xmlChar value %d\n",
4905
1.29k
                    cur);
4906
47.0k
    } else {
4907
47.0k
        NEXT;
4908
47.0k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4909
47.0k
      (!ctxt->disableSAX))
4910
46.7k
      ctxt->sax->comment(ctxt->userData, buf);
4911
47.0k
    }
4912
60.2k
    xmlFree(buf);
4913
60.2k
    return;
4914
8.22k
not_terminated:
4915
8.22k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4916
8.22k
       "Comment not terminated\n", NULL);
4917
8.22k
    xmlFree(buf);
4918
8.22k
}
4919
4920
/**
4921
 * Parse an XML (SGML) comment. Always consumes '<!'.
4922
 *
4923
 * @deprecated Internal function, don't use.
4924
 *
4925
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4926
 *  must not occur within comments. "
4927
 *
4928
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4929
 * @param ctxt  an XML parser context
4930
 */
4931
void
4932
371k
xmlParseComment(xmlParserCtxt *ctxt) {
4933
371k
    xmlChar *buf = NULL;
4934
371k
    size_t size = XML_PARSER_BUFFER_SIZE;
4935
371k
    size_t len = 0;
4936
371k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4937
36.2k
                       XML_MAX_HUGE_LENGTH :
4938
371k
                       XML_MAX_TEXT_LENGTH;
4939
371k
    const xmlChar *in;
4940
371k
    size_t nbchar = 0;
4941
371k
    int ccol;
4942
4943
    /*
4944
     * Check that there is a comment right here.
4945
     */
4946
371k
    if ((RAW != '<') || (NXT(1) != '!'))
4947
0
        return;
4948
371k
    SKIP(2);
4949
371k
    if ((RAW != '-') || (NXT(1) != '-'))
4950
35
        return;
4951
371k
    SKIP(2);
4952
371k
    GROW;
4953
4954
    /*
4955
     * Accelerated common case where input don't need to be
4956
     * modified before passing it to the handler.
4957
     */
4958
371k
    in = ctxt->input->cur;
4959
372k
    do {
4960
372k
  if (*in == 0xA) {
4961
956k
      do {
4962
956k
    ctxt->input->line++; ctxt->input->col = 1;
4963
956k
    in++;
4964
956k
      } while (*in == 0xA);
4965
3.33k
  }
4966
1.78M
get_more:
4967
1.78M
        ccol = ctxt->input->col;
4968
10.7M
  while (((*in > '-') && (*in <= 0x7F)) ||
4969
3.77M
         ((*in >= 0x20) && (*in < '-')) ||
4970
8.99M
         (*in == 0x09)) {
4971
8.99M
        in++;
4972
8.99M
        ccol++;
4973
8.99M
  }
4974
1.78M
  ctxt->input->col = ccol;
4975
1.78M
  if (*in == 0xA) {
4976
1.04M
      do {
4977
1.04M
    ctxt->input->line++; ctxt->input->col = 1;
4978
1.04M
    in++;
4979
1.04M
      } while (*in == 0xA);
4980
19.4k
      goto get_more;
4981
19.4k
  }
4982
1.76M
  nbchar = in - ctxt->input->cur;
4983
  /*
4984
   * save current set of data
4985
   */
4986
1.76M
  if (nbchar > 0) {
4987
1.47M
            if (nbchar > maxLength - len) {
4988
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4989
0
                                  "Comment too big found", NULL);
4990
0
                xmlFree(buf);
4991
0
                return;
4992
0
            }
4993
1.47M
            if (buf == NULL) {
4994
135k
                if ((*in == '-') && (in[1] == '-'))
4995
73.2k
                    size = nbchar + 1;
4996
61.9k
                else
4997
61.9k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
4998
135k
                buf = xmlMalloc(size);
4999
135k
                if (buf == NULL) {
5000
54
                    xmlErrMemory(ctxt);
5001
54
                    return;
5002
54
                }
5003
135k
                len = 0;
5004
1.33M
            } else if (len + nbchar + 1 >= size) {
5005
66.7k
                xmlChar *new_buf;
5006
66.7k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5007
66.7k
                new_buf = xmlRealloc(buf, size);
5008
66.7k
                if (new_buf == NULL) {
5009
11
                    xmlErrMemory(ctxt);
5010
11
                    xmlFree(buf);
5011
11
                    return;
5012
11
                }
5013
66.7k
                buf = new_buf;
5014
66.7k
            }
5015
1.47M
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5016
1.47M
            len += nbchar;
5017
1.47M
            buf[len] = 0;
5018
1.47M
  }
5019
1.76M
  ctxt->input->cur = in;
5020
1.76M
  if (*in == 0xA) {
5021
0
      in++;
5022
0
      ctxt->input->line++; ctxt->input->col = 1;
5023
0
  }
5024
1.76M
  if (*in == 0xD) {
5025
10.7k
      in++;
5026
10.7k
      if (*in == 0xA) {
5027
9.14k
    ctxt->input->cur = in;
5028
9.14k
    in++;
5029
9.14k
    ctxt->input->line++; ctxt->input->col = 1;
5030
9.14k
    goto get_more;
5031
9.14k
      }
5032
1.62k
      in--;
5033
1.62k
  }
5034
1.75M
  SHRINK;
5035
1.75M
  GROW;
5036
1.75M
  in = ctxt->input->cur;
5037
1.75M
  if (*in == '-') {
5038
1.68M
      if (in[1] == '-') {
5039
1.00M
          if (in[2] == '>') {
5040
297k
        SKIP(3);
5041
297k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5042
297k
            (!ctxt->disableSAX)) {
5043
289k
      if (buf != NULL)
5044
73.5k
          ctxt->sax->comment(ctxt->userData, buf);
5045
216k
      else
5046
216k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5047
289k
        }
5048
297k
        if (buf != NULL)
5049
73.8k
            xmlFree(buf);
5050
297k
        return;
5051
297k
    }
5052
710k
    if (buf != NULL) {
5053
687k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5054
687k
                          "Double hyphen within comment: "
5055
687k
                                      "<!--%.50s\n",
5056
687k
              buf);
5057
687k
    } else
5058
23.0k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5059
23.0k
                          "Double hyphen within comment\n", NULL);
5060
710k
    in++;
5061
710k
    ctxt->input->col++;
5062
710k
      }
5063
1.38M
      in++;
5064
1.38M
      ctxt->input->col++;
5065
1.38M
      goto get_more;
5066
1.68M
  }
5067
1.75M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5068
73.9k
    xmlParseCommentComplex(ctxt, buf, len, size);
5069
73.9k
}
5070
5071
5072
/**
5073
 * Parse the name of a PI
5074
 *
5075
 * @deprecated Internal function, don't use.
5076
 *
5077
 *     [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5078
 *
5079
 * @param ctxt  an XML parser context
5080
 * @returns the PITarget name or NULL
5081
 */
5082
5083
const xmlChar *
5084
77.7k
xmlParsePITarget(xmlParserCtxt *ctxt) {
5085
77.7k
    const xmlChar *name;
5086
5087
77.7k
    name = xmlParseName(ctxt);
5088
77.7k
    if ((name != NULL) &&
5089
67.3k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5090
58.2k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5091
54.1k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5092
4.03k
  int i;
5093
4.03k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5094
3.56k
      (name[2] == 'l') && (name[3] == 0)) {
5095
2.63k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5096
2.63k
     "XML declaration allowed only at the start of the document\n");
5097
2.63k
      return(name);
5098
2.63k
  } else if (name[3] == 0) {
5099
545
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5100
545
      return(name);
5101
545
  }
5102
2.30k
  for (i = 0;;i++) {
5103
2.30k
      if (xmlW3CPIs[i] == NULL) break;
5104
1.67k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5105
225
          return(name);
5106
1.67k
  }
5107
624
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5108
624
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5109
624
          NULL, NULL);
5110
624
    }
5111
74.3k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5112
819
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5113
819
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5114
819
    }
5115
74.3k
    return(name);
5116
77.7k
}
5117
5118
#ifdef LIBXML_CATALOG_ENABLED
5119
/**
5120
 * Parse an XML Catalog Processing Instruction.
5121
 *
5122
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5123
 *
5124
 * Occurs only if allowed by the user and if happening in the Misc
5125
 * part of the document before any doctype information
5126
 * This will add the given catalog to the parsing context in order
5127
 * to be used if there is a resolution need further down in the document
5128
 *
5129
 * @param ctxt  an XML parser context
5130
 * @param catalog  the PI value string
5131
 */
5132
5133
static void
5134
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5135
0
    xmlChar *URL = NULL;
5136
0
    const xmlChar *tmp, *base;
5137
0
    xmlChar marker;
5138
5139
0
    tmp = catalog;
5140
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5141
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5142
0
  goto error;
5143
0
    tmp += 7;
5144
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5145
0
    if (*tmp != '=') {
5146
0
  return;
5147
0
    }
5148
0
    tmp++;
5149
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5150
0
    marker = *tmp;
5151
0
    if ((marker != '\'') && (marker != '"'))
5152
0
  goto error;
5153
0
    tmp++;
5154
0
    base = tmp;
5155
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5156
0
    if (*tmp == 0)
5157
0
  goto error;
5158
0
    URL = xmlStrndup(base, tmp - base);
5159
0
    tmp++;
5160
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5161
0
    if (*tmp != 0)
5162
0
  goto error;
5163
5164
0
    if (URL != NULL) {
5165
        /*
5166
         * Unfortunately, the catalog API doesn't report OOM errors.
5167
         * xmlGetLastError isn't very helpful since we don't know
5168
         * where the last error came from. We'd have to reset it
5169
         * before this call and restore it afterwards.
5170
         */
5171
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5172
0
  xmlFree(URL);
5173
0
    }
5174
0
    return;
5175
5176
0
error:
5177
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5178
0
            "Catalog PI syntax error: %s\n",
5179
0
      catalog, NULL);
5180
0
    if (URL != NULL)
5181
0
  xmlFree(URL);
5182
0
}
5183
#endif
5184
5185
/**
5186
 * Parse an XML Processing Instruction.
5187
 *
5188
 * @deprecated Internal function, don't use.
5189
 *
5190
 *     [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5191
 *
5192
 * The processing is transferred to SAX once parsed.
5193
 *
5194
 * @param ctxt  an XML parser context
5195
 */
5196
5197
void
5198
77.7k
xmlParsePI(xmlParserCtxt *ctxt) {
5199
77.7k
    xmlChar *buf = NULL;
5200
77.7k
    size_t len = 0;
5201
77.7k
    size_t size = XML_PARSER_BUFFER_SIZE;
5202
77.7k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5203
20.8k
                       XML_MAX_HUGE_LENGTH :
5204
77.7k
                       XML_MAX_TEXT_LENGTH;
5205
77.7k
    int cur, l;
5206
77.7k
    const xmlChar *target;
5207
5208
77.7k
    if ((RAW == '<') && (NXT(1) == '?')) {
5209
  /*
5210
   * this is a Processing Instruction.
5211
   */
5212
77.7k
  SKIP(2);
5213
5214
  /*
5215
   * Parse the target name and check for special support like
5216
   * namespace.
5217
   */
5218
77.7k
        target = xmlParsePITarget(ctxt);
5219
77.7k
  if (target != NULL) {
5220
67.3k
      if ((RAW == '?') && (NXT(1) == '>')) {
5221
41.4k
    SKIP(2);
5222
5223
    /*
5224
     * SAX: PI detected.
5225
     */
5226
41.4k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5227
41.1k
        (ctxt->sax->processingInstruction != NULL))
5228
41.1k
        ctxt->sax->processingInstruction(ctxt->userData,
5229
41.1k
                                         target, NULL);
5230
41.4k
    return;
5231
41.4k
      }
5232
25.9k
      buf = xmlMalloc(size);
5233
25.9k
      if (buf == NULL) {
5234
89
    xmlErrMemory(ctxt);
5235
89
    return;
5236
89
      }
5237
25.8k
      if (SKIP_BLANKS == 0) {
5238
20.3k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5239
20.3k
        "ParsePI: PI %s space expected\n", target);
5240
20.3k
      }
5241
25.8k
      cur = xmlCurrentCharRecover(ctxt, &l);
5242
5.92M
      while (IS_CHAR(cur) && /* checked */
5243
5.92M
       ((cur != '?') || (NXT(1) != '>'))) {
5244
5.90M
    if (len + 5 >= size) {
5245
3.48k
        xmlChar *tmp;
5246
3.48k
                    int newSize;
5247
5248
3.48k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5249
3.48k
                    if (newSize < 0) {
5250
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5251
0
                                          "PI %s too big found", target);
5252
0
                        xmlFree(buf);
5253
0
                        return;
5254
0
                    }
5255
3.48k
        tmp = xmlRealloc(buf, newSize);
5256
3.48k
        if (tmp == NULL) {
5257
14
      xmlErrMemory(ctxt);
5258
14
      xmlFree(buf);
5259
14
      return;
5260
14
        }
5261
3.47k
        buf = tmp;
5262
3.47k
                    size = newSize;
5263
3.47k
    }
5264
5.90M
    COPY_BUF(buf, len, cur);
5265
5.90M
    NEXTL(l);
5266
5.90M
    cur = xmlCurrentCharRecover(ctxt, &l);
5267
5.90M
      }
5268
25.8k
      buf[len] = 0;
5269
25.8k
      if (cur != '?') {
5270
8.13k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5271
8.13k
          "ParsePI: PI %s never end ...\n", target);
5272
17.7k
      } else {
5273
17.7k
    SKIP(2);
5274
5275
17.7k
#ifdef LIBXML_CATALOG_ENABLED
5276
17.7k
    if ((ctxt->inSubset == 0) &&
5277
16.4k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5278
417
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5279
5280
417
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5281
210
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5282
210
       (allow == XML_CATA_ALLOW_ALL)))
5283
0
      xmlParseCatalogPI(ctxt, buf);
5284
417
    }
5285
17.7k
#endif
5286
5287
    /*
5288
     * SAX: PI detected.
5289
     */
5290
17.7k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5291
17.3k
        (ctxt->sax->processingInstruction != NULL))
5292
17.3k
        ctxt->sax->processingInstruction(ctxt->userData,
5293
17.3k
                                         target, buf);
5294
17.7k
      }
5295
25.8k
      xmlFree(buf);
5296
25.8k
  } else {
5297
10.3k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5298
10.3k
  }
5299
77.7k
    }
5300
77.7k
}
5301
5302
/**
5303
 * Parse a notation declaration. Always consumes '<!'.
5304
 *
5305
 * @deprecated Internal function, don't use.
5306
 *
5307
 *     [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID)
5308
 *                           S? '>'
5309
 *
5310
 * Hence there is actually 3 choices:
5311
 *
5312
 *     'PUBLIC' S PubidLiteral
5313
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5314
 *     'SYSTEM' S SystemLiteral
5315
 *
5316
 * See the NOTE on #xmlParseExternalID.
5317
 *
5318
 * @param ctxt  an XML parser context
5319
 */
5320
5321
void
5322
7.82k
xmlParseNotationDecl(xmlParserCtxt *ctxt) {
5323
7.82k
    const xmlChar *name;
5324
7.82k
    xmlChar *Pubid;
5325
7.82k
    xmlChar *Systemid;
5326
5327
7.82k
    if ((CUR != '<') || (NXT(1) != '!'))
5328
0
        return;
5329
7.82k
    SKIP(2);
5330
5331
7.82k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5332
7.71k
#ifdef LIBXML_VALID_ENABLED
5333
7.71k
  int oldInputNr = ctxt->inputNr;
5334
7.71k
#endif
5335
5336
7.71k
  SKIP(8);
5337
7.71k
  if (SKIP_BLANKS_PE == 0) {
5338
234
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5339
234
         "Space required after '<!NOTATION'\n");
5340
234
      return;
5341
234
  }
5342
5343
7.48k
        name = xmlParseName(ctxt);
5344
7.48k
  if (name == NULL) {
5345
221
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5346
221
      return;
5347
221
  }
5348
7.26k
  if (xmlStrchr(name, ':') != NULL) {
5349
380
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5350
380
         "colons are forbidden from notation names '%s'\n",
5351
380
         name, NULL, NULL);
5352
380
  }
5353
7.26k
  if (SKIP_BLANKS_PE == 0) {
5354
593
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5355
593
         "Space required after the NOTATION name'\n");
5356
593
      return;
5357
593
  }
5358
5359
  /*
5360
   * Parse the IDs.
5361
   */
5362
6.66k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5363
6.66k
  SKIP_BLANKS_PE;
5364
5365
6.66k
  if (RAW == '>') {
5366
2.92k
#ifdef LIBXML_VALID_ENABLED
5367
2.92k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5368
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5369
0
                           "Notation declaration doesn't start and stop"
5370
0
                                 " in the same entity\n",
5371
0
                                 NULL, NULL);
5372
0
      }
5373
2.92k
#endif
5374
2.92k
      NEXT;
5375
2.92k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5376
2.46k
    (ctxt->sax->notationDecl != NULL))
5377
2.46k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5378
3.74k
  } else {
5379
3.74k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5380
3.74k
  }
5381
6.66k
  if (Systemid != NULL) xmlFree(Systemid);
5382
6.66k
  if (Pubid != NULL) xmlFree(Pubid);
5383
6.66k
    }
5384
7.82k
}
5385
5386
/**
5387
 * Parse an entity declaration. Always consumes '<!'.
5388
 *
5389
 * @deprecated Internal function, don't use.
5390
 *
5391
 *     [70] EntityDecl ::= GEDecl | PEDecl
5392
 *
5393
 *     [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5394
 *
5395
 *     [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5396
 *
5397
 *     [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5398
 *
5399
 *     [74] PEDef ::= EntityValue | ExternalID
5400
 *
5401
 *     [76] NDataDecl ::= S 'NDATA' S Name
5402
 *
5403
 * [ VC: Notation Declared ]
5404
 * The Name must match the declared name of a notation.
5405
 *
5406
 * @param ctxt  an XML parser context
5407
 */
5408
5409
void
5410
87.4k
xmlParseEntityDecl(xmlParserCtxt *ctxt) {
5411
87.4k
    const xmlChar *name = NULL;
5412
87.4k
    xmlChar *value = NULL;
5413
87.4k
    xmlChar *URI = NULL, *literal = NULL;
5414
87.4k
    const xmlChar *ndata = NULL;
5415
87.4k
    int isParameter = 0;
5416
87.4k
    xmlChar *orig = NULL;
5417
5418
87.4k
    if ((CUR != '<') || (NXT(1) != '!'))
5419
0
        return;
5420
87.4k
    SKIP(2);
5421
5422
    /* GROW; done in the caller */
5423
87.4k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5424
87.3k
#ifdef LIBXML_VALID_ENABLED
5425
87.3k
  int oldInputNr = ctxt->inputNr;
5426
87.3k
#endif
5427
5428
87.3k
  SKIP(6);
5429
87.3k
  if (SKIP_BLANKS_PE == 0) {
5430
6.56k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5431
6.56k
         "Space required after '<!ENTITY'\n");
5432
6.56k
  }
5433
5434
87.3k
  if (RAW == '%') {
5435
37.8k
      NEXT;
5436
37.8k
      if (SKIP_BLANKS_PE == 0) {
5437
2.57k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5438
2.57k
             "Space required after '%%'\n");
5439
2.57k
      }
5440
37.8k
      isParameter = 1;
5441
37.8k
  }
5442
5443
87.3k
        name = xmlParseName(ctxt);
5444
87.3k
  if (name == NULL) {
5445
1.11k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5446
1.11k
                     "xmlParseEntityDecl: no name\n");
5447
1.11k
            return;
5448
1.11k
  }
5449
86.2k
  if (xmlStrchr(name, ':') != NULL) {
5450
694
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5451
694
         "colons are forbidden from entities names '%s'\n",
5452
694
         name, NULL, NULL);
5453
694
  }
5454
86.2k
  if (SKIP_BLANKS_PE == 0) {
5455
8.16k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5456
8.16k
         "Space required after the entity name\n");
5457
8.16k
  }
5458
5459
  /*
5460
   * handle the various case of definitions...
5461
   */
5462
86.2k
  if (isParameter) {
5463
37.7k
      if ((RAW == '"') || (RAW == '\'')) {
5464
26.4k
          value = xmlParseEntityValue(ctxt, &orig);
5465
26.4k
    if (value) {
5466
26.1k
        if ((ctxt->sax != NULL) &&
5467
26.1k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5468
22.1k
      ctxt->sax->entityDecl(ctxt->userData, name,
5469
22.1k
                        XML_INTERNAL_PARAMETER_ENTITY,
5470
22.1k
            NULL, NULL, value);
5471
26.1k
    }
5472
26.4k
      } else {
5473
11.2k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5474
11.2k
    if ((URI == NULL) && (literal == NULL)) {
5475
441
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5476
441
    }
5477
11.2k
    if (URI) {
5478
10.7k
                    if (xmlStrchr(URI, '#')) {
5479
541
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5480
10.2k
                    } else {
5481
10.2k
                        if ((ctxt->sax != NULL) &&
5482
10.2k
                            (!ctxt->disableSAX) &&
5483
9.36k
                            (ctxt->sax->entityDecl != NULL))
5484
9.36k
                            ctxt->sax->entityDecl(ctxt->userData, name,
5485
9.36k
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5486
9.36k
                                        literal, URI, NULL);
5487
10.2k
                    }
5488
10.7k
    }
5489
11.2k
      }
5490
48.5k
  } else {
5491
48.5k
      if ((RAW == '"') || (RAW == '\'')) {
5492
37.3k
          value = xmlParseEntityValue(ctxt, &orig);
5493
37.3k
    if ((ctxt->sax != NULL) &&
5494
37.3k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5495
30.2k
        ctxt->sax->entityDecl(ctxt->userData, name,
5496
30.2k
        XML_INTERNAL_GENERAL_ENTITY,
5497
30.2k
        NULL, NULL, value);
5498
    /*
5499
     * For expat compatibility in SAX mode.
5500
     */
5501
37.3k
    if ((ctxt->myDoc == NULL) ||
5502
36.8k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5503
5.08k
        if (ctxt->myDoc == NULL) {
5504
492
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5505
492
      if (ctxt->myDoc == NULL) {
5506
3
          xmlErrMemory(ctxt);
5507
3
          goto done;
5508
3
      }
5509
489
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5510
489
        }
5511
5.08k
        if (ctxt->myDoc->intSubset == NULL) {
5512
489
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5513
489
              BAD_CAST "fake", NULL, NULL);
5514
489
                        if (ctxt->myDoc->intSubset == NULL) {
5515
4
                            xmlErrMemory(ctxt);
5516
4
                            goto done;
5517
4
                        }
5518
489
                    }
5519
5520
5.08k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5521
5.08k
                    NULL, NULL, value);
5522
5.08k
    }
5523
37.3k
      } else {
5524
11.1k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5525
11.1k
    if ((URI == NULL) && (literal == NULL)) {
5526
3.17k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5527
3.17k
    }
5528
11.1k
    if (URI) {
5529
7.69k
                    if (xmlStrchr(URI, '#')) {
5530
329
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5531
329
                    }
5532
7.69k
    }
5533
11.1k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5534
2.87k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5535
2.87k
           "Space required before 'NDATA'\n");
5536
2.87k
    }
5537
11.1k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5538
1.48k
        SKIP(5);
5539
1.48k
        if (SKIP_BLANKS_PE == 0) {
5540
272
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5541
272
               "Space required after 'NDATA'\n");
5542
272
        }
5543
1.48k
        ndata = xmlParseName(ctxt);
5544
1.48k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5545
1.18k
            (ctxt->sax->unparsedEntityDecl != NULL))
5546
1.18k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5547
1.18k
            literal, URI, ndata);
5548
9.70k
    } else {
5549
9.70k
        if ((ctxt->sax != NULL) &&
5550
9.70k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5551
9.06k
      ctxt->sax->entityDecl(ctxt->userData, name,
5552
9.06k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5553
9.06k
            literal, URI, NULL);
5554
        /*
5555
         * For expat compatibility in SAX mode.
5556
         * assuming the entity replacement was asked for
5557
         */
5558
9.70k
        if ((ctxt->replaceEntities != 0) &&
5559
5.66k
      ((ctxt->myDoc == NULL) ||
5560
5.63k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5561
226
      if (ctxt->myDoc == NULL) {
5562
32
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5563
32
          if (ctxt->myDoc == NULL) {
5564
2
              xmlErrMemory(ctxt);
5565
2
        goto done;
5566
2
          }
5567
30
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5568
30
      }
5569
5570
224
      if (ctxt->myDoc->intSubset == NULL) {
5571
30
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5572
30
            BAD_CAST "fake", NULL, NULL);
5573
30
                            if (ctxt->myDoc->intSubset == NULL) {
5574
2
                                xmlErrMemory(ctxt);
5575
2
                                goto done;
5576
2
                            }
5577
30
                        }
5578
222
      xmlSAX2EntityDecl(ctxt, name,
5579
222
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5580
222
                  literal, URI, NULL);
5581
222
        }
5582
9.70k
    }
5583
11.1k
      }
5584
48.5k
  }
5585
86.2k
  SKIP_BLANKS_PE;
5586
86.2k
  if (RAW != '>') {
5587
9.75k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5588
9.75k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5589
76.5k
  } else {
5590
76.5k
#ifdef LIBXML_VALID_ENABLED
5591
76.5k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5592
204
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5593
204
                           "Entity declaration doesn't start and stop in"
5594
204
                                 " the same entity\n",
5595
204
                                 NULL, NULL);
5596
204
      }
5597
76.5k
#endif
5598
76.5k
      NEXT;
5599
76.5k
  }
5600
86.2k
  if (orig != NULL) {
5601
      /*
5602
       * Ugly mechanism to save the raw entity value.
5603
       */
5604
61.9k
      xmlEntityPtr cur = NULL;
5605
5606
61.9k
      if (isParameter) {
5607
26.1k
          if ((ctxt->sax != NULL) &&
5608
26.1k
        (ctxt->sax->getParameterEntity != NULL))
5609
26.1k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5610
35.8k
      } else {
5611
35.8k
          if ((ctxt->sax != NULL) &&
5612
35.8k
        (ctxt->sax->getEntity != NULL))
5613
35.8k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5614
35.8k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5615
1.89k
        cur = xmlSAX2GetEntity(ctxt, name);
5616
1.89k
    }
5617
35.8k
      }
5618
61.9k
            if ((cur != NULL) && (cur->orig == NULL)) {
5619
32.5k
    cur->orig = orig;
5620
32.5k
                orig = NULL;
5621
32.5k
      }
5622
61.9k
  }
5623
5624
86.2k
done:
5625
86.2k
  if (value != NULL) xmlFree(value);
5626
86.2k
  if (URI != NULL) xmlFree(URI);
5627
86.2k
  if (literal != NULL) xmlFree(literal);
5628
86.2k
        if (orig != NULL) xmlFree(orig);
5629
86.2k
    }
5630
87.4k
}
5631
5632
/**
5633
 * Parse an attribute default declaration
5634
 *
5635
 * @deprecated Internal function, don't use.
5636
 *
5637
 *     [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5638
 *
5639
 * [ VC: Required Attribute ]
5640
 * if the default declaration is the keyword \#REQUIRED, then the
5641
 * attribute must be specified for all elements of the type in the
5642
 * attribute-list declaration.
5643
 *
5644
 * [ VC: Attribute Default Legal ]
5645
 * The declared default value must meet the lexical constraints of
5646
 * the declared attribute type c.f. #xmlValidateAttributeDecl
5647
 *
5648
 * [ VC: Fixed Attribute Default ]
5649
 * if an attribute has a default value declared with the \#FIXED
5650
 * keyword, instances of that attribute must match the default value.
5651
 *
5652
 * [ WFC: No < in Attribute Values ]
5653
 * handled in #xmlParseAttValue
5654
 *
5655
 * @param ctxt  an XML parser context
5656
 * @param value  Receive a possible fixed default value for the attribute
5657
 * @returns XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5658
 *          or XML_ATTRIBUTE_FIXED.
5659
 */
5660
5661
int
5662
105k
xmlParseDefaultDecl(xmlParserCtxt *ctxt, xmlChar **value) {
5663
105k
    int val;
5664
105k
    xmlChar *ret;
5665
5666
105k
    *value = NULL;
5667
105k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5668
4.76k
  SKIP(9);
5669
4.76k
  return(XML_ATTRIBUTE_REQUIRED);
5670
4.76k
    }
5671
100k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5672
35.2k
  SKIP(8);
5673
35.2k
  return(XML_ATTRIBUTE_IMPLIED);
5674
35.2k
    }
5675
65.2k
    val = XML_ATTRIBUTE_NONE;
5676
65.2k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5677
3.80k
  SKIP(6);
5678
3.80k
  val = XML_ATTRIBUTE_FIXED;
5679
3.80k
  if (SKIP_BLANKS_PE == 0) {
5680
390
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5681
390
         "Space required after '#FIXED'\n");
5682
390
  }
5683
3.80k
    }
5684
65.2k
    ret = xmlParseAttValue(ctxt);
5685
65.2k
    if (ret == NULL) {
5686
9.83k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5687
9.83k
           "Attribute default value declaration error\n");
5688
9.83k
    } else
5689
55.3k
        *value = ret;
5690
65.2k
    return(val);
5691
100k
}
5692
5693
/**
5694
 * Parse an Notation attribute type.
5695
 *
5696
 * @deprecated Internal function, don't use.
5697
 *
5698
 * Note: the leading 'NOTATION' S part has already being parsed...
5699
 *
5700
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5701
 *
5702
 * [ VC: Notation Attributes ]
5703
 * Values of this type must match one of the notation names included
5704
 * in the declaration; all notation names in the declaration must be declared.
5705
 *
5706
 * @param ctxt  an XML parser context
5707
 * @returns the notation attribute tree built while parsing
5708
 */
5709
5710
xmlEnumeration *
5711
1.88k
xmlParseNotationType(xmlParserCtxt *ctxt) {
5712
1.88k
    const xmlChar *name;
5713
1.88k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5714
5715
1.88k
    if (RAW != '(') {
5716
207
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5717
207
  return(NULL);
5718
207
    }
5719
3.55k
    do {
5720
3.55k
        NEXT;
5721
3.55k
  SKIP_BLANKS_PE;
5722
3.55k
        name = xmlParseName(ctxt);
5723
3.55k
  if (name == NULL) {
5724
225
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5725
225
         "Name expected in NOTATION declaration\n");
5726
225
            xmlFreeEnumeration(ret);
5727
225
      return(NULL);
5728
225
  }
5729
3.32k
        tmp = NULL;
5730
3.32k
#ifdef LIBXML_VALID_ENABLED
5731
3.32k
        if (ctxt->validate) {
5732
2.22k
            tmp = ret;
5733
5.79k
            while (tmp != NULL) {
5734
4.32k
                if (xmlStrEqual(name, tmp->name)) {
5735
752
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5736
752
              "standalone: attribute notation value token %s duplicated\n",
5737
752
                                     name, NULL);
5738
752
                    if (!xmlDictOwns(ctxt->dict, name))
5739
0
                        xmlFree((xmlChar *) name);
5740
752
                    break;
5741
752
                }
5742
3.56k
                tmp = tmp->next;
5743
3.56k
            }
5744
2.22k
        }
5745
3.32k
#endif /* LIBXML_VALID_ENABLED */
5746
3.32k
  if (tmp == NULL) {
5747
2.57k
      cur = xmlCreateEnumeration(name);
5748
2.57k
      if (cur == NULL) {
5749
20
                xmlErrMemory(ctxt);
5750
20
                xmlFreeEnumeration(ret);
5751
20
                return(NULL);
5752
20
            }
5753
2.55k
      if (last == NULL) ret = last = cur;
5754
1.11k
      else {
5755
1.11k
    last->next = cur;
5756
1.11k
    last = cur;
5757
1.11k
      }
5758
2.55k
  }
5759
3.30k
  SKIP_BLANKS_PE;
5760
3.30k
    } while (RAW == '|');
5761
1.43k
    if (RAW != ')') {
5762
500
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5763
500
        xmlFreeEnumeration(ret);
5764
500
  return(NULL);
5765
500
    }
5766
931
    NEXT;
5767
931
    return(ret);
5768
1.43k
}
5769
5770
/**
5771
 * Parse an Enumeration attribute type.
5772
 *
5773
 * @deprecated Internal function, don't use.
5774
 *
5775
 *     [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5776
 *
5777
 * [ VC: Enumeration ]
5778
 * Values of this type must match one of the Nmtoken tokens in
5779
 * the declaration
5780
 *
5781
 * @param ctxt  an XML parser context
5782
 * @returns the enumeration attribute tree built while parsing
5783
 */
5784
5785
xmlEnumeration *
5786
27.8k
xmlParseEnumerationType(xmlParserCtxt *ctxt) {
5787
27.8k
    xmlChar *name;
5788
27.8k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5789
5790
27.8k
    if (RAW != '(') {
5791
1.21k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5792
1.21k
  return(NULL);
5793
1.21k
    }
5794
38.3k
    do {
5795
38.3k
        NEXT;
5796
38.3k
  SKIP_BLANKS_PE;
5797
38.3k
        name = xmlParseNmtoken(ctxt);
5798
38.3k
  if (name == NULL) {
5799
294
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5800
294
      return(ret);
5801
294
  }
5802
38.0k
        tmp = NULL;
5803
38.0k
#ifdef LIBXML_VALID_ENABLED
5804
38.0k
        if (ctxt->validate) {
5805
25.6k
            tmp = ret;
5806
45.8k
            while (tmp != NULL) {
5807
20.7k
                if (xmlStrEqual(name, tmp->name)) {
5808
584
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5809
584
              "standalone: attribute enumeration value token %s duplicated\n",
5810
584
                                     name, NULL);
5811
584
                    if (!xmlDictOwns(ctxt->dict, name))
5812
584
                        xmlFree(name);
5813
584
                    break;
5814
584
                }
5815
20.1k
                tmp = tmp->next;
5816
20.1k
            }
5817
25.6k
        }
5818
38.0k
#endif /* LIBXML_VALID_ENABLED */
5819
38.0k
  if (tmp == NULL) {
5820
37.4k
      cur = xmlCreateEnumeration(name);
5821
37.4k
      if (!xmlDictOwns(ctxt->dict, name))
5822
37.4k
    xmlFree(name);
5823
37.4k
      if (cur == NULL) {
5824
33
                xmlErrMemory(ctxt);
5825
33
                xmlFreeEnumeration(ret);
5826
33
                return(NULL);
5827
33
            }
5828
37.4k
      if (last == NULL) ret = last = cur;
5829
11.1k
      else {
5830
11.1k
    last->next = cur;
5831
11.1k
    last = cur;
5832
11.1k
      }
5833
37.4k
  }
5834
38.0k
  SKIP_BLANKS_PE;
5835
38.0k
    } while (RAW == '|');
5836
26.2k
    if (RAW != ')') {
5837
1.14k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5838
1.14k
  return(ret);
5839
1.14k
    }
5840
25.1k
    NEXT;
5841
25.1k
    return(ret);
5842
26.2k
}
5843
5844
/**
5845
 * Parse an Enumerated attribute type.
5846
 *
5847
 * @deprecated Internal function, don't use.
5848
 *
5849
 *     [57] EnumeratedType ::= NotationType | Enumeration
5850
 *
5851
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5852
 *
5853
 * @param ctxt  an XML parser context
5854
 * @param tree  the enumeration tree built while parsing
5855
 * @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5856
 */
5857
5858
int
5859
29.9k
xmlParseEnumeratedType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5860
29.9k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5861
2.12k
  SKIP(8);
5862
2.12k
  if (SKIP_BLANKS_PE == 0) {
5863
244
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5864
244
         "Space required after 'NOTATION'\n");
5865
244
      return(0);
5866
244
  }
5867
1.88k
  *tree = xmlParseNotationType(ctxt);
5868
1.88k
  if (*tree == NULL) return(0);
5869
931
  return(XML_ATTRIBUTE_NOTATION);
5870
1.88k
    }
5871
27.8k
    *tree = xmlParseEnumerationType(ctxt);
5872
27.8k
    if (*tree == NULL) return(0);
5873
26.3k
    return(XML_ATTRIBUTE_ENUMERATION);
5874
27.8k
}
5875
5876
/**
5877
 * Parse the Attribute list def for an element
5878
 *
5879
 * @deprecated Internal function, don't use.
5880
 *
5881
 *     [54] AttType ::= StringType | TokenizedType | EnumeratedType
5882
 *
5883
 *     [55] StringType ::= 'CDATA'
5884
 *
5885
 *     [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5886
 *                            'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5887
 *
5888
 * Validity constraints for attribute values syntax are checked in
5889
 * #xmlValidateAttributeValue
5890
 *
5891
 * [ VC: ID ]
5892
 * Values of type ID must match the Name production. A name must not
5893
 * appear more than once in an XML document as a value of this type;
5894
 * i.e., ID values must uniquely identify the elements which bear them.
5895
 *
5896
 * [ VC: One ID per Element Type ]
5897
 * No element type may have more than one ID attribute specified.
5898
 *
5899
 * [ VC: ID Attribute Default ]
5900
 * An ID attribute must have a declared default of \#IMPLIED or \#REQUIRED.
5901
 *
5902
 * [ VC: IDREF ]
5903
 * Values of type IDREF must match the Name production, and values
5904
 * of type IDREFS must match Names; each IDREF Name must match the value
5905
 * of an ID attribute on some element in the XML document; i.e. IDREF
5906
 * values must match the value of some ID attribute.
5907
 *
5908
 * [ VC: Entity Name ]
5909
 * Values of type ENTITY must match the Name production, values
5910
 * of type ENTITIES must match Names; each Entity Name must match the
5911
 * name of an unparsed entity declared in the DTD.
5912
 *
5913
 * [ VC: Name Token ]
5914
 * Values of type NMTOKEN must match the Nmtoken production; values
5915
 * of type NMTOKENS must match Nmtokens.
5916
 *
5917
 * @param ctxt  an XML parser context
5918
 * @param tree  the enumeration tree built while parsing
5919
 * @returns the attribute type
5920
 */
5921
int
5922
110k
xmlParseAttributeType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5923
110k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5924
16.7k
  SKIP(5);
5925
16.7k
  return(XML_ATTRIBUTE_CDATA);
5926
94.1k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5927
14.7k
  SKIP(6);
5928
14.7k
  return(XML_ATTRIBUTE_IDREFS);
5929
79.3k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5930
1.57k
  SKIP(5);
5931
1.57k
  return(XML_ATTRIBUTE_IDREF);
5932
77.7k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5933
33.0k
        SKIP(2);
5934
33.0k
  return(XML_ATTRIBUTE_ID);
5935
44.7k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5936
1.90k
  SKIP(6);
5937
1.90k
  return(XML_ATTRIBUTE_ENTITY);
5938
42.8k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5939
2.59k
  SKIP(8);
5940
2.59k
  return(XML_ATTRIBUTE_ENTITIES);
5941
40.2k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5942
3.25k
  SKIP(8);
5943
3.25k
  return(XML_ATTRIBUTE_NMTOKENS);
5944
36.9k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5945
6.98k
  SKIP(7);
5946
6.98k
  return(XML_ATTRIBUTE_NMTOKEN);
5947
6.98k
     }
5948
29.9k
     return(xmlParseEnumeratedType(ctxt, tree));
5949
110k
}
5950
5951
/**
5952
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5953
 *
5954
 * @deprecated Internal function, don't use.
5955
 *
5956
 *     [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5957
 *
5958
 *     [53] AttDef ::= S Name S AttType S DefaultDecl
5959
 * @param ctxt  an XML parser context
5960
 */
5961
void
5962
81.2k
xmlParseAttributeListDecl(xmlParserCtxt *ctxt) {
5963
81.2k
    const xmlChar *elemName;
5964
81.2k
    const xmlChar *attrName;
5965
81.2k
    xmlEnumerationPtr tree;
5966
5967
81.2k
    if ((CUR != '<') || (NXT(1) != '!'))
5968
0
        return;
5969
81.2k
    SKIP(2);
5970
5971
81.2k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5972
81.0k
#ifdef LIBXML_VALID_ENABLED
5973
81.0k
  int oldInputNr = ctxt->inputNr;
5974
81.0k
#endif
5975
5976
81.0k
  SKIP(7);
5977
81.0k
  if (SKIP_BLANKS_PE == 0) {
5978
12.2k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5979
12.2k
                     "Space required after '<!ATTLIST'\n");
5980
12.2k
  }
5981
81.0k
        elemName = xmlParseName(ctxt);
5982
81.0k
  if (elemName == NULL) {
5983
2.42k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5984
2.42k
         "ATTLIST: no name for Element\n");
5985
2.42k
      return;
5986
2.42k
  }
5987
78.6k
  SKIP_BLANKS_PE;
5988
78.6k
  GROW;
5989
169k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
5990
129k
      int type;
5991
129k
      int def;
5992
129k
      xmlChar *defaultValue = NULL;
5993
5994
129k
      GROW;
5995
129k
            tree = NULL;
5996
129k
      attrName = xmlParseName(ctxt);
5997
129k
      if (attrName == NULL) {
5998
12.4k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5999
12.4k
             "ATTLIST: no name for Attribute\n");
6000
12.4k
    break;
6001
12.4k
      }
6002
117k
      GROW;
6003
117k
      if (SKIP_BLANKS_PE == 0) {
6004
6.58k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6005
6.58k
            "Space required after the attribute name\n");
6006
6.58k
    break;
6007
6.58k
      }
6008
6009
110k
      type = xmlParseAttributeType(ctxt, &tree);
6010
110k
      if (type <= 0) {
6011
2.72k
          break;
6012
2.72k
      }
6013
6014
108k
      GROW;
6015
108k
      if (SKIP_BLANKS_PE == 0) {
6016
2.96k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6017
2.96k
             "Space required after the attribute type\n");
6018
2.96k
          if (tree != NULL)
6019
1.21k
        xmlFreeEnumeration(tree);
6020
2.96k
    break;
6021
2.96k
      }
6022
6023
105k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6024
105k
      if (def <= 0) {
6025
0
                if (defaultValue != NULL)
6026
0
        xmlFree(defaultValue);
6027
0
          if (tree != NULL)
6028
0
        xmlFreeEnumeration(tree);
6029
0
          break;
6030
0
      }
6031
105k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6032
51.6k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6033
6034
105k
      GROW;
6035
105k
            if (RAW != '>') {
6036
78.2k
    if (SKIP_BLANKS_PE == 0) {
6037
13.9k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6038
13.9k
      "Space required after the attribute default value\n");
6039
13.9k
        if (defaultValue != NULL)
6040
4.25k
      xmlFree(defaultValue);
6041
13.9k
        if (tree != NULL)
6042
3.64k
      xmlFreeEnumeration(tree);
6043
13.9k
        break;
6044
13.9k
    }
6045
78.2k
      }
6046
91.2k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6047
87.8k
    (ctxt->sax->attributeDecl != NULL))
6048
87.8k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6049
87.8k
                          type, def, defaultValue, tree);
6050
3.34k
      else if (tree != NULL)
6051
1.21k
    xmlFreeEnumeration(tree);
6052
6053
91.2k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6054
42.2k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6055
42.2k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6056
42.2k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6057
42.2k
      }
6058
91.2k
      if (ctxt->sax2) {
6059
77.1k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6060
77.1k
      }
6061
91.2k
      if (defaultValue != NULL)
6062
51.1k
          xmlFree(defaultValue);
6063
91.2k
      GROW;
6064
91.2k
  }
6065
78.6k
  if (RAW == '>') {
6066
42.2k
#ifdef LIBXML_VALID_ENABLED
6067
42.2k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6068
82
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6069
82
                                 "Attribute list declaration doesn't start and"
6070
82
                                 " stop in the same entity\n",
6071
82
                                 NULL, NULL);
6072
82
      }
6073
42.2k
#endif
6074
42.2k
      NEXT;
6075
42.2k
  }
6076
78.6k
    }
6077
81.2k
}
6078
6079
/**
6080
 * Handle PEs and check that we don't pop the entity that started
6081
 * a balanced group.
6082
 *
6083
 * @param ctxt  parser context
6084
 * @param openInputNr  input nr of the entity with opening '('
6085
 */
6086
static void
6087
1.82M
xmlSkipBlankCharsPEBalanced(xmlParserCtxt *ctxt, int openInputNr) {
6088
1.82M
    SKIP_BLANKS;
6089
1.82M
    GROW;
6090
6091
1.82M
    (void) openInputNr;
6092
6093
1.82M
    if (!PARSER_EXTERNAL(ctxt) && !PARSER_IN_PE(ctxt))
6094
1.65M
        return;
6095
6096
185k
    while (!PARSER_STOPPED(ctxt)) {
6097
185k
        if (ctxt->input->cur >= ctxt->input->end) {
6098
6.18k
#ifdef LIBXML_VALID_ENABLED
6099
6.18k
            if ((ctxt->validate) && (ctxt->inputNr <= openInputNr)) {
6100
1.86k
                xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6101
1.86k
                                 "Element content declaration doesn't start "
6102
1.86k
                                 "and stop in the same entity\n",
6103
1.86k
                                 NULL, NULL);
6104
1.86k
            }
6105
6.18k
#endif
6106
6.18k
            if (PARSER_IN_PE(ctxt))
6107
6.06k
                xmlPopPE(ctxt);
6108
125
            else
6109
125
                break;
6110
179k
        } else if (RAW == '%') {
6111
8.41k
            xmlParsePERefInternal(ctxt, 0);
6112
170k
        } else {
6113
170k
            break;
6114
170k
        }
6115
6116
14.4k
        SKIP_BLANKS;
6117
14.4k
        GROW;
6118
14.4k
    }
6119
171k
}
6120
6121
/**
6122
 * Parse the declaration for a Mixed Element content
6123
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6124
 *
6125
 * @deprecated Internal function, don't use.
6126
 *
6127
 *     [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6128
 *                    '(' S? '#PCDATA' S? ')'
6129
 *
6130
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6131
 *
6132
 * [ VC: No Duplicate Types ]
6133
 * The same name must not appear more than once in a single
6134
 * mixed-content declaration.
6135
 *
6136
 * @param ctxt  an XML parser context
6137
 * @param openInputNr  the input used for the current entity, needed for
6138
 * boundary checks
6139
 * @returns the list of the xmlElementContent describing the element choices
6140
 */
6141
xmlElementContent *
6142
12.9k
xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int openInputNr) {
6143
12.9k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6144
12.9k
    const xmlChar *elem = NULL;
6145
6146
12.9k
    GROW;
6147
12.9k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6148
12.9k
  SKIP(7);
6149
12.9k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6150
12.9k
  if (RAW == ')') {
6151
7.04k
#ifdef LIBXML_VALID_ENABLED
6152
7.04k
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6153
3
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6154
3
                                 "Element content declaration doesn't start "
6155
3
                                 "and stop in the same entity\n",
6156
3
                                 NULL, NULL);
6157
3
      }
6158
7.04k
#endif
6159
7.04k
      NEXT;
6160
7.04k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6161
7.04k
      if (ret == NULL)
6162
9
                goto mem_error;
6163
7.03k
      if (RAW == '*') {
6164
474
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6165
474
    NEXT;
6166
474
      }
6167
7.03k
      return(ret);
6168
7.04k
  }
6169
5.87k
  if ((RAW == '(') || (RAW == '|')) {
6170
5.59k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6171
5.59k
      if (ret == NULL)
6172
9
                goto mem_error;
6173
5.59k
  }
6174
41.6k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6175
36.4k
      NEXT;
6176
36.4k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6177
36.4k
            if (n == NULL)
6178
18
                goto mem_error;
6179
36.4k
      if (elem == NULL) {
6180
5.56k
    n->c1 = cur;
6181
5.56k
    if (cur != NULL)
6182
5.56k
        cur->parent = n;
6183
5.56k
    ret = cur = n;
6184
30.8k
      } else {
6185
30.8k
          cur->c2 = n;
6186
30.8k
    n->parent = cur;
6187
30.8k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6188
30.8k
                if (n->c1 == NULL)
6189
6
                    goto mem_error;
6190
30.8k
    n->c1->parent = n;
6191
30.8k
    cur = n;
6192
30.8k
      }
6193
36.4k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6194
36.4k
      elem = xmlParseName(ctxt);
6195
36.4k
      if (elem == NULL) {
6196
612
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6197
612
      "xmlParseElementMixedContentDecl : Name expected\n");
6198
612
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6199
612
    return(NULL);
6200
612
      }
6201
35.8k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6202
35.8k
  }
6203
5.23k
  if ((RAW == ')') && (NXT(1) == '*')) {
6204
4.79k
      if (elem != NULL) {
6205
4.79k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6206
4.79k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6207
4.79k
    if (cur->c2 == NULL)
6208
12
                    goto mem_error;
6209
4.78k
    cur->c2->parent = cur;
6210
4.78k
            }
6211
4.78k
            if (ret != NULL)
6212
4.78k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6213
4.78k
#ifdef LIBXML_VALID_ENABLED
6214
4.78k
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6215
2
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6216
2
                                 "Element content declaration doesn't start "
6217
2
                                 "and stop in the same entity\n",
6218
2
                                 NULL, NULL);
6219
2
      }
6220
4.78k
#endif
6221
4.78k
      SKIP(2);
6222
4.78k
  } else {
6223
440
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6224
440
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6225
440
      return(NULL);
6226
440
  }
6227
6228
5.23k
    } else {
6229
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6230
0
    }
6231
4.78k
    return(ret);
6232
6233
54
mem_error:
6234
54
    xmlErrMemory(ctxt);
6235
54
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6236
54
    return(NULL);
6237
12.9k
}
6238
6239
/**
6240
 * Parse the declaration for a Mixed Element content
6241
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6242
 *
6243
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6244
 *
6245
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6246
 *
6247
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6248
 *
6249
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6250
 *
6251
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6252
 * TODO Parameter-entity replacement text must be properly nested
6253
 *  with parenthesized groups. That is to say, if either of the
6254
 *  opening or closing parentheses in a choice, seq, or Mixed
6255
 *  construct is contained in the replacement text for a parameter
6256
 *  entity, both must be contained in the same replacement text. For
6257
 *  interoperability, if a parameter-entity reference appears in a
6258
 *  choice, seq, or Mixed construct, its replacement text should not
6259
 *  be empty, and neither the first nor last non-blank character of
6260
 *  the replacement text should be a connector (| or ,).
6261
 *
6262
 * @param ctxt  an XML parser context
6263
 * @param openInputNr  the input used for the current entity, needed for
6264
 * boundary checks
6265
 * @param depth  the level of recursion
6266
 * @returns the tree of xmlElementContent describing the element
6267
 *          hierarchy.
6268
 */
6269
static xmlElementContentPtr
6270
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int openInputNr,
6271
122k
                                       int depth) {
6272
122k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6273
122k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6274
122k
    const xmlChar *elem;
6275
122k
    xmlChar type = 0;
6276
6277
122k
    if (depth > maxDepth) {
6278
6
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6279
6
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6280
6
                "use XML_PARSE_HUGE\n", depth);
6281
6
  return(NULL);
6282
6
    }
6283
122k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6284
122k
    if (RAW == '(') {
6285
94.1k
        int newInputNr = ctxt->inputNr;
6286
6287
        /* Recurse on first child */
6288
94.1k
  NEXT;
6289
94.1k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6290
94.1k
                                                           depth + 1);
6291
94.1k
        if (cur == NULL)
6292
82.7k
            return(NULL);
6293
94.1k
    } else {
6294
28.1k
  elem = xmlParseName(ctxt);
6295
28.1k
  if (elem == NULL) {
6296
527
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6297
527
      return(NULL);
6298
527
  }
6299
27.6k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6300
27.6k
  if (cur == NULL) {
6301
55
      xmlErrMemory(ctxt);
6302
55
      return(NULL);
6303
55
  }
6304
27.5k
  GROW;
6305
27.5k
  if (RAW == '?') {
6306
2.92k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6307
2.92k
      NEXT;
6308
24.6k
  } else if (RAW == '*') {
6309
2.78k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6310
2.78k
      NEXT;
6311
21.8k
  } else if (RAW == '+') {
6312
1.95k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6313
1.95k
      NEXT;
6314
19.9k
  } else {
6315
19.9k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6316
19.9k
  }
6317
27.5k
  GROW;
6318
27.5k
    }
6319
815k
    while (!PARSER_STOPPED(ctxt)) {
6320
813k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6321
813k
        if (RAW == ')')
6322
33.5k
            break;
6323
        /*
6324
   * Each loop we parse one separator and one element.
6325
   */
6326
780k
        if (RAW == ',') {
6327
612k
      if (type == 0) type = CUR;
6328
6329
      /*
6330
       * Detect "Name | Name , Name" error
6331
       */
6332
603k
      else if (type != CUR) {
6333
6
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6334
6
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6335
6
                      type);
6336
6
    if ((last != NULL) && (last != ret))
6337
6
        xmlFreeDocElementContent(ctxt->myDoc, last);
6338
6
    if (ret != NULL)
6339
6
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6340
6
    return(NULL);
6341
6
      }
6342
612k
      NEXT;
6343
6344
612k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6345
612k
      if (op == NULL) {
6346
20
                xmlErrMemory(ctxt);
6347
20
    if ((last != NULL) && (last != ret))
6348
8
        xmlFreeDocElementContent(ctxt->myDoc, last);
6349
20
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6350
20
    return(NULL);
6351
20
      }
6352
612k
      if (last == NULL) {
6353
8.65k
    op->c1 = ret;
6354
8.65k
    if (ret != NULL)
6355
8.65k
        ret->parent = op;
6356
8.65k
    ret = cur = op;
6357
603k
      } else {
6358
603k
          cur->c2 = op;
6359
603k
    if (op != NULL)
6360
603k
        op->parent = cur;
6361
603k
    op->c1 = last;
6362
603k
    if (last != NULL)
6363
603k
        last->parent = op;
6364
603k
    cur =op;
6365
603k
    last = NULL;
6366
603k
      }
6367
612k
  } else if (RAW == '|') {
6368
165k
      if (type == 0) type = CUR;
6369
6370
      /*
6371
       * Detect "Name , Name | Name" error
6372
       */
6373
152k
      else if (type != CUR) {
6374
10
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6375
10
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6376
10
          type);
6377
10
    if ((last != NULL) && (last != ret))
6378
10
        xmlFreeDocElementContent(ctxt->myDoc, last);
6379
10
    if (ret != NULL)
6380
10
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6381
10
    return(NULL);
6382
10
      }
6383
165k
      NEXT;
6384
6385
165k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6386
165k
      if (op == NULL) {
6387
23
                xmlErrMemory(ctxt);
6388
23
    if ((last != NULL) && (last != ret))
6389
8
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
23
    if (ret != NULL)
6391
23
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
23
    return(NULL);
6393
23
      }
6394
165k
      if (last == NULL) {
6395
12.9k
    op->c1 = ret;
6396
12.9k
    if (ret != NULL)
6397
12.9k
        ret->parent = op;
6398
12.9k
    ret = cur = op;
6399
152k
      } else {
6400
152k
          cur->c2 = op;
6401
152k
    if (op != NULL)
6402
152k
        op->parent = cur;
6403
152k
    op->c1 = last;
6404
152k
    if (last != NULL)
6405
152k
        last->parent = op;
6406
152k
    cur =op;
6407
152k
    last = NULL;
6408
152k
      }
6409
165k
  } else {
6410
2.00k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6411
2.00k
      if ((last != NULL) && (last != ret))
6412
1.26k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6413
2.00k
      if (ret != NULL)
6414
2.00k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6415
2.00k
      return(NULL);
6416
2.00k
  }
6417
778k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6418
778k
        if (RAW == '(') {
6419
12.0k
            int newInputNr = ctxt->inputNr;
6420
6421
      /* Recurse on second child */
6422
12.0k
      NEXT;
6423
12.0k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6424
12.0k
                                                          depth + 1);
6425
12.0k
            if (last == NULL) {
6426
1.57k
    if (ret != NULL)
6427
1.57k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6428
1.57k
    return(NULL);
6429
1.57k
            }
6430
766k
  } else {
6431
766k
      elem = xmlParseName(ctxt);
6432
766k
      if (elem == NULL) {
6433
401
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6434
401
    if (ret != NULL)
6435
401
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6436
401
    return(NULL);
6437
401
      }
6438
765k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6439
765k
      if (last == NULL) {
6440
36
                xmlErrMemory(ctxt);
6441
36
    if (ret != NULL)
6442
36
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6443
36
    return(NULL);
6444
36
      }
6445
765k
      if (RAW == '?') {
6446
64.2k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6447
64.2k
    NEXT;
6448
701k
      } else if (RAW == '*') {
6449
5.16k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6450
5.16k
    NEXT;
6451
696k
      } else if (RAW == '+') {
6452
5.51k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6453
5.51k
    NEXT;
6454
690k
      } else {
6455
690k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6456
690k
      }
6457
765k
  }
6458
778k
    }
6459
34.9k
    if ((cur != NULL) && (last != NULL)) {
6460
18.3k
        cur->c2 = last;
6461
18.3k
  if (last != NULL)
6462
18.3k
      last->parent = cur;
6463
18.3k
    }
6464
34.9k
#ifdef LIBXML_VALID_ENABLED
6465
34.9k
    if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6466
10
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6467
10
                         "Element content declaration doesn't start "
6468
10
                         "and stop in the same entity\n",
6469
10
                         NULL, NULL);
6470
10
    }
6471
34.9k
#endif
6472
34.9k
    NEXT;
6473
34.9k
    if (RAW == '?') {
6474
6.55k
  if (ret != NULL) {
6475
6.55k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6476
6.32k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6477
546
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6478
6.00k
      else
6479
6.00k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6480
6.55k
  }
6481
6.55k
  NEXT;
6482
28.3k
    } else if (RAW == '*') {
6483
4.73k
  if (ret != NULL) {
6484
4.73k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6485
4.73k
      cur = ret;
6486
      /*
6487
       * Some normalization:
6488
       * (a | b* | c?)* == (a | b | c)*
6489
       */
6490
16.5k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6491
11.8k
    if ((cur->c1 != NULL) &&
6492
11.8k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6493
11.5k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6494
674
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6495
11.8k
    if ((cur->c2 != NULL) &&
6496
11.8k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6497
11.5k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6498
661
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6499
11.8k
    cur = cur->c2;
6500
11.8k
      }
6501
4.73k
  }
6502
4.73k
  NEXT;
6503
23.6k
    } else if (RAW == '+') {
6504
6.30k
  if (ret != NULL) {
6505
6.30k
      int found = 0;
6506
6507
6.30k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6508
5.32k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6509
1.41k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6510
4.88k
      else
6511
4.88k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6512
      /*
6513
       * Some normalization:
6514
       * (a | b*)+ == (a | b)*
6515
       * (a | b?)+ == (a | b)*
6516
       */
6517
10.9k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6518
4.62k
    if ((cur->c1 != NULL) &&
6519
4.62k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6520
3.77k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6521
1.47k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6522
1.47k
        found = 1;
6523
1.47k
    }
6524
4.62k
    if ((cur->c2 != NULL) &&
6525
4.62k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6526
4.38k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6527
1.24k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6528
1.24k
        found = 1;
6529
1.24k
    }
6530
4.62k
    cur = cur->c2;
6531
4.62k
      }
6532
6.30k
      if (found)
6533
1.51k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6534
6.30k
  }
6535
6.30k
  NEXT;
6536
6.30k
    }
6537
34.9k
    return(ret);
6538
38.9k
}
6539
6540
/**
6541
 * Parse the declaration for a Mixed Element content
6542
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6543
 *
6544
 * @deprecated Internal function, don't use.
6545
 *
6546
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6547
 *
6548
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6549
 *
6550
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6551
 *
6552
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6553
 *
6554
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6555
 * TODO Parameter-entity replacement text must be properly nested
6556
 *  with parenthesized groups. That is to say, if either of the
6557
 *  opening or closing parentheses in a choice, seq, or Mixed
6558
 *  construct is contained in the replacement text for a parameter
6559
 *  entity, both must be contained in the same replacement text. For
6560
 *  interoperability, if a parameter-entity reference appears in a
6561
 *  choice, seq, or Mixed construct, its replacement text should not
6562
 *  be empty, and neither the first nor last non-blank character of
6563
 *  the replacement text should be a connector (| or ,).
6564
 *
6565
 * @param ctxt  an XML parser context
6566
 * @param inputchk  the input used for the current entity, needed for boundary checks
6567
 * @returns the tree of xmlElementContent describing the element
6568
 *          hierarchy.
6569
 */
6570
xmlElementContent *
6571
0
xmlParseElementChildrenContentDecl(xmlParserCtxt *ctxt, int inputchk) {
6572
    /* stub left for API/ABI compat */
6573
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6574
0
}
6575
6576
/**
6577
 * Parse the declaration for an Element content either Mixed or Children,
6578
 * the cases EMPTY and ANY are handled directly in #xmlParseElementDecl
6579
 *
6580
 * @deprecated Internal function, don't use.
6581
 *
6582
 *     [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6583
 *
6584
 * @param ctxt  an XML parser context
6585
 * @param name  the name of the element being defined.
6586
 * @param result  the Element Content pointer will be stored here if any
6587
 * @returns an xmlElementTypeVal value or -1 on error
6588
 */
6589
6590
int
6591
xmlParseElementContentDecl(xmlParserCtxt *ctxt, const xmlChar *name,
6592
29.0k
                           xmlElementContent **result) {
6593
6594
29.0k
    xmlElementContentPtr tree = NULL;
6595
29.0k
    int openInputNr = ctxt->inputNr;
6596
29.0k
    int res;
6597
6598
29.0k
    *result = NULL;
6599
6600
29.0k
    if (RAW != '(') {
6601
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6602
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6603
0
  return(-1);
6604
0
    }
6605
29.0k
    NEXT;
6606
29.0k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6607
29.0k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6608
12.9k
        tree = xmlParseElementMixedContentDecl(ctxt, openInputNr);
6609
12.9k
  res = XML_ELEMENT_TYPE_MIXED;
6610
16.1k
    } else {
6611
16.1k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, openInputNr, 1);
6612
16.1k
  res = XML_ELEMENT_TYPE_ELEMENT;
6613
16.1k
    }
6614
29.0k
    if (tree == NULL)
6615
4.19k
        return(-1);
6616
24.8k
    SKIP_BLANKS_PE;
6617
24.8k
    *result = tree;
6618
24.8k
    return(res);
6619
29.0k
}
6620
6621
/**
6622
 * Parse an element declaration. Always consumes '<!'.
6623
 *
6624
 * @deprecated Internal function, don't use.
6625
 *
6626
 *     [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6627
 *
6628
 * [ VC: Unique Element Type Declaration ]
6629
 * No element type may be declared more than once
6630
 *
6631
 * @param ctxt  an XML parser context
6632
 * @returns the type of the element, or -1 in case of error
6633
 */
6634
int
6635
37.1k
xmlParseElementDecl(xmlParserCtxt *ctxt) {
6636
37.1k
    const xmlChar *name;
6637
37.1k
    int ret = -1;
6638
37.1k
    xmlElementContentPtr content  = NULL;
6639
6640
37.1k
    if ((CUR != '<') || (NXT(1) != '!'))
6641
0
        return(ret);
6642
37.1k
    SKIP(2);
6643
6644
    /* GROW; done in the caller */
6645
37.1k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6646
37.0k
#ifdef LIBXML_VALID_ENABLED
6647
37.0k
  int oldInputNr = ctxt->inputNr;
6648
37.0k
#endif
6649
6650
37.0k
  SKIP(7);
6651
37.0k
  if (SKIP_BLANKS_PE == 0) {
6652
485
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6653
485
               "Space required after 'ELEMENT'\n");
6654
485
      return(-1);
6655
485
  }
6656
36.5k
        name = xmlParseName(ctxt);
6657
36.5k
  if (name == NULL) {
6658
548
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6659
548
         "xmlParseElementDecl: no name for Element\n");
6660
548
      return(-1);
6661
548
  }
6662
35.9k
  if (SKIP_BLANKS_PE == 0) {
6663
5.71k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6664
5.71k
         "Space required after the element name\n");
6665
5.71k
  }
6666
35.9k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6667
4.87k
      SKIP(5);
6668
      /*
6669
       * Element must always be empty.
6670
       */
6671
4.87k
      ret = XML_ELEMENT_TYPE_EMPTY;
6672
31.0k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6673
958
             (NXT(2) == 'Y')) {
6674
948
      SKIP(3);
6675
      /*
6676
       * Element is a generic container.
6677
       */
6678
948
      ret = XML_ELEMENT_TYPE_ANY;
6679
30.1k
  } else if (RAW == '(') {
6680
29.0k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6681
29.0k
            if (ret <= 0)
6682
4.19k
                return(-1);
6683
29.0k
  } else {
6684
      /*
6685
       * [ WFC: PEs in Internal Subset ] error handling.
6686
       */
6687
1.08k
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6688
1.08k
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6689
1.08k
      return(-1);
6690
1.08k
  }
6691
6692
30.6k
  SKIP_BLANKS_PE;
6693
6694
30.6k
  if (RAW != '>') {
6695
2.69k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6696
2.69k
      if (content != NULL) {
6697
2.39k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6698
2.39k
      }
6699
27.9k
  } else {
6700
27.9k
#ifdef LIBXML_VALID_ENABLED
6701
27.9k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6702
5
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6703
5
                                 "Element declaration doesn't start and stop in"
6704
5
                                 " the same entity\n",
6705
5
                                 NULL, NULL);
6706
5
      }
6707
27.9k
#endif
6708
6709
27.9k
      NEXT;
6710
27.9k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6711
26.2k
    (ctxt->sax->elementDecl != NULL)) {
6712
26.2k
    if (content != NULL)
6713
21.0k
        content->parent = NULL;
6714
26.2k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6715
26.2k
                           content);
6716
26.2k
    if ((content != NULL) && (content->parent == NULL)) {
6717
        /*
6718
         * this is a trick: if xmlAddElementDecl is called,
6719
         * instead of copying the full tree it is plugged directly
6720
         * if called from the parser. Avoid duplicating the
6721
         * interfaces or change the API/ABI
6722
         */
6723
3.51k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6724
3.51k
    }
6725
26.2k
      } else if (content != NULL) {
6726
1.43k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6727
1.43k
      }
6728
27.9k
  }
6729
30.6k
    }
6730
30.7k
    return(ret);
6731
37.1k
}
6732
6733
/**
6734
 * Parse a conditional section. Always consumes '<!['.
6735
 *
6736
 *     [61] conditionalSect ::= includeSect | ignoreSect
6737
 *     [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6738
 *     [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6739
 *     [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>'
6740
 *                                 Ignore)*
6741
 *     [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6742
 * @param ctxt  an XML parser context
6743
 */
6744
6745
static void
6746
5.71k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6747
5.71k
    size_t depth = 0;
6748
5.71k
    int isFreshPE = 0;
6749
5.71k
    int oldInputNr = ctxt->inputNr;
6750
5.71k
    int declInputNr = ctxt->inputNr;
6751
6752
12.6k
    while (!PARSER_STOPPED(ctxt)) {
6753
12.6k
        if (ctxt->input->cur >= ctxt->input->end) {
6754
1.48k
            if (ctxt->inputNr <= oldInputNr) {
6755
771
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6756
771
                return;
6757
771
            }
6758
6759
717
            xmlPopPE(ctxt);
6760
717
            declInputNr = ctxt->inputNr;
6761
11.1k
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6762
6.87k
            SKIP(3);
6763
6.87k
            SKIP_BLANKS_PE;
6764
6765
6.87k
            isFreshPE = 0;
6766
6767
6.87k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6768
4.09k
                SKIP(7);
6769
4.09k
                SKIP_BLANKS_PE;
6770
4.09k
                if (RAW != '[') {
6771
209
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6772
209
                    return;
6773
209
                }
6774
3.88k
#ifdef LIBXML_VALID_ENABLED
6775
3.88k
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6776
34
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6777
34
                                     "All markup of the conditional section is"
6778
34
                                     " not in the same entity\n",
6779
34
                                     NULL, NULL);
6780
34
                }
6781
3.88k
#endif
6782
3.88k
                NEXT;
6783
6784
3.88k
                depth++;
6785
3.88k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6786
2.30k
                size_t ignoreDepth = 0;
6787
6788
2.30k
                SKIP(6);
6789
2.30k
                SKIP_BLANKS_PE;
6790
2.30k
                if (RAW != '[') {
6791
249
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6792
249
                    return;
6793
249
                }
6794
2.05k
#ifdef LIBXML_VALID_ENABLED
6795
2.05k
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6796
14
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6797
14
                                     "All markup of the conditional section is"
6798
14
                                     " not in the same entity\n",
6799
14
                                     NULL, NULL);
6800
14
                }
6801
2.05k
#endif
6802
2.05k
                NEXT;
6803
6804
26.2k
                while (PARSER_STOPPED(ctxt) == 0) {
6805
26.2k
                    if (RAW == 0) {
6806
838
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6807
838
                        return;
6808
838
                    }
6809
25.3k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6810
775
                        SKIP(3);
6811
775
                        ignoreDepth++;
6812
                        /* Check for integer overflow */
6813
775
                        if (ignoreDepth == 0) {
6814
0
                            xmlErrMemory(ctxt);
6815
0
                            return;
6816
0
                        }
6817
24.6k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6818
2.19k
                               (NXT(2) == '>')) {
6819
1.46k
                        SKIP(3);
6820
1.46k
                        if (ignoreDepth == 0)
6821
1.20k
                            break;
6822
263
                        ignoreDepth--;
6823
23.1k
                    } else {
6824
23.1k
                        NEXT;
6825
23.1k
                    }
6826
25.3k
                }
6827
6828
1.21k
#ifdef LIBXML_VALID_ENABLED
6829
1.21k
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6830
12
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6831
12
                                     "All markup of the conditional section is"
6832
12
                                     " not in the same entity\n",
6833
12
                                     NULL, NULL);
6834
12
                }
6835
1.21k
#endif
6836
1.21k
            } else {
6837
474
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6838
474
                return;
6839
474
            }
6840
6.87k
        } else if ((depth > 0) &&
6841
4.28k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6842
1.88k
            if (isFreshPE) {
6843
6
                xmlFatalErrMsg(ctxt, XML_ERR_CONDSEC_INVALID,
6844
6
                               "Parameter entity must match "
6845
6
                               "extSubsetDecl\n");
6846
6
                return;
6847
6
            }
6848
6849
1.87k
            depth--;
6850
1.87k
#ifdef LIBXML_VALID_ENABLED
6851
1.87k
            if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6852
12
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6853
12
                                 "All markup of the conditional section is not"
6854
12
                                 " in the same entity\n",
6855
12
                                 NULL, NULL);
6856
12
            }
6857
1.87k
#endif
6858
1.87k
            SKIP(3);
6859
2.40k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6860
899
            isFreshPE = 0;
6861
899
            xmlParseMarkupDecl(ctxt);
6862
1.50k
        } else if (RAW == '%') {
6863
1.42k
            xmlParsePERefInternal(ctxt, 1);
6864
1.42k
            if (ctxt->inputNr > declInputNr) {
6865
724
                isFreshPE = 1;
6866
724
                declInputNr = ctxt->inputNr;
6867
724
            }
6868
1.42k
        } else {
6869
80
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6870
80
            return;
6871
80
        }
6872
6873
10.0k
        if (depth == 0)
6874
3.05k
            break;
6875
6876
6.96k
        SKIP_BLANKS;
6877
6.96k
        SHRINK;
6878
6.96k
        GROW;
6879
6.96k
    }
6880
5.71k
}
6881
6882
/**
6883
 * Parse markup declarations. Always consumes '<!' or '<?'.
6884
 *
6885
 * @deprecated Internal function, don't use.
6886
 *
6887
 *     [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6888
 *                         NotationDecl | PI | Comment
6889
 *
6890
 * [ VC: Proper Declaration/PE Nesting ]
6891
 * Parameter-entity replacement text must be properly nested with
6892
 * markup declarations. That is to say, if either the first character
6893
 * or the last character of a markup declaration (markupdecl above) is
6894
 * contained in the replacement text for a parameter-entity reference,
6895
 * both must be contained in the same replacement text.
6896
 *
6897
 * [ WFC: PEs in Internal Subset ]
6898
 * In the internal DTD subset, parameter-entity references can occur
6899
 * only where markup declarations can occur, not within markup declarations.
6900
 * (This does not apply to references that occur in external parameter
6901
 * entities or to the external subset.)
6902
 *
6903
 * @param ctxt  an XML parser context
6904
 */
6905
void
6906
350k
xmlParseMarkupDecl(xmlParserCtxt *ctxt) {
6907
350k
    GROW;
6908
350k
    if (CUR == '<') {
6909
350k
        if (NXT(1) == '!') {
6910
342k
      switch (NXT(2)) {
6911
124k
          case 'E':
6912
124k
        if (NXT(3) == 'L')
6913
37.1k
      xmlParseElementDecl(ctxt);
6914
87.5k
        else if (NXT(3) == 'N')
6915
87.4k
      xmlParseEntityDecl(ctxt);
6916
82
                    else
6917
82
                        SKIP(2);
6918
124k
        break;
6919
81.2k
          case 'A':
6920
81.2k
        xmlParseAttributeListDecl(ctxt);
6921
81.2k
        break;
6922
7.82k
          case 'N':
6923
7.82k
        xmlParseNotationDecl(ctxt);
6924
7.82k
        break;
6925
116k
          case '-':
6926
116k
        xmlParseComment(ctxt);
6927
116k
        break;
6928
12.6k
    default:
6929
12.6k
                    xmlFatalErr(ctxt,
6930
12.6k
                                ctxt->inSubset == 2 ?
6931
5.76k
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
6932
12.6k
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
6933
12.6k
                                NULL);
6934
12.6k
                    SKIP(2);
6935
12.6k
        break;
6936
342k
      }
6937
342k
  } else if (NXT(1) == '?') {
6938
7.44k
      xmlParsePI(ctxt);
6939
7.44k
  }
6940
350k
    }
6941
350k
}
6942
6943
/**
6944
 * Parse an XML declaration header for external entities
6945
 *
6946
 * @deprecated Internal function, don't use.
6947
 *
6948
 *     [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6949
 * @param ctxt  an XML parser context
6950
 */
6951
6952
void
6953
28.5k
xmlParseTextDecl(xmlParserCtxt *ctxt) {
6954
28.5k
    xmlChar *version;
6955
6956
    /*
6957
     * We know that '<?xml' is here.
6958
     */
6959
28.5k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6960
28.5k
  SKIP(5);
6961
28.5k
    } else {
6962
13
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6963
13
  return;
6964
13
    }
6965
6966
28.5k
    if (SKIP_BLANKS == 0) {
6967
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6968
0
           "Space needed after '<?xml'\n");
6969
0
    }
6970
6971
    /*
6972
     * We may have the VersionInfo here.
6973
     */
6974
28.5k
    version = xmlParseVersionInfo(ctxt);
6975
28.5k
    if (version == NULL) {
6976
14.5k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6977
14.5k
        if (version == NULL) {
6978
15
            xmlErrMemory(ctxt);
6979
15
            return;
6980
15
        }
6981
14.5k
    } else {
6982
13.9k
  if (SKIP_BLANKS == 0) {
6983
391
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6984
391
               "Space needed here\n");
6985
391
  }
6986
13.9k
    }
6987
28.5k
    ctxt->input->version = version;
6988
6989
    /*
6990
     * We must have the encoding declaration
6991
     */
6992
28.5k
    xmlParseEncodingDecl(ctxt);
6993
6994
28.5k
    SKIP_BLANKS;
6995
28.5k
    if ((RAW == '?') && (NXT(1) == '>')) {
6996
1.65k
        SKIP(2);
6997
26.8k
    } else if (RAW == '>') {
6998
        /* Deprecated old WD ... */
6999
479
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7000
479
  NEXT;
7001
26.4k
    } else {
7002
26.4k
        int c;
7003
7004
26.4k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7005
120M
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7006
120M
            NEXT;
7007
120M
            if (c == '>')
7008
12.1k
                break;
7009
120M
        }
7010
26.4k
    }
7011
28.5k
}
7012
7013
/**
7014
 * Parse Markup declarations from an external subset
7015
 *
7016
 * @deprecated Internal function, don't use.
7017
 *
7018
 *     [30] extSubset ::= textDecl? extSubsetDecl
7019
 *
7020
 *     [31] extSubsetDecl ::= (markupdecl | conditionalSect |
7021
 *                             PEReference | S) *
7022
 * @param ctxt  an XML parser context
7023
 * @param publicId  the public identifier
7024
 * @param systemId  the system identifier (URL)
7025
 */
7026
void
7027
xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
7028
3.66k
                       const xmlChar *systemId) {
7029
3.66k
    int oldInputNr;
7030
7031
3.66k
    xmlCtxtInitializeLate(ctxt);
7032
7033
3.66k
    xmlDetectEncoding(ctxt);
7034
7035
3.66k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7036
348
  xmlParseTextDecl(ctxt);
7037
348
    }
7038
3.66k
    if (ctxt->myDoc == NULL) {
7039
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7040
0
  if (ctxt->myDoc == NULL) {
7041
0
      xmlErrMemory(ctxt);
7042
0
      return;
7043
0
  }
7044
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7045
0
    }
7046
3.66k
    if ((ctxt->myDoc->intSubset == NULL) &&
7047
673
        (xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) {
7048
9
        xmlErrMemory(ctxt);
7049
9
    }
7050
7051
3.66k
    ctxt->inSubset = 2;
7052
3.66k
    oldInputNr = ctxt->inputNr;
7053
7054
3.66k
    SKIP_BLANKS;
7055
152k
    while (!PARSER_STOPPED(ctxt)) {
7056
151k
        if (ctxt->input->cur >= ctxt->input->end) {
7057
2.54k
            if (ctxt->inputNr <= oldInputNr) {
7058
1.50k
                xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7059
1.50k
                break;
7060
1.50k
            }
7061
7062
1.03k
            xmlPopPE(ctxt);
7063
149k
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7064
2.06k
            xmlParseConditionalSections(ctxt);
7065
147k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7066
143k
            xmlParseMarkupDecl(ctxt);
7067
143k
        } else if (RAW == '%') {
7068
2.27k
            xmlParsePERefInternal(ctxt, 1);
7069
2.27k
        } else {
7070
1.77k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7071
7072
2.05k
            while (ctxt->inputNr > oldInputNr)
7073
274
                xmlPopPE(ctxt);
7074
1.77k
            break;
7075
1.77k
        }
7076
148k
        SKIP_BLANKS;
7077
148k
        SHRINK;
7078
148k
        GROW;
7079
148k
    }
7080
3.66k
}
7081
7082
/**
7083
 * Parse and handle entity references in content, depending on the SAX
7084
 * interface, this may end-up in a call to character() if this is a
7085
 * CharRef, a predefined entity, if there is no reference() callback.
7086
 * or if the parser was asked to switch to that mode.
7087
 *
7088
 * @deprecated Internal function, don't use.
7089
 *
7090
 * Always consumes '&'.
7091
 *
7092
 *     [67] Reference ::= EntityRef | CharRef
7093
 * @param ctxt  an XML parser context
7094
 */
7095
void
7096
618k
xmlParseReference(xmlParserCtxt *ctxt) {
7097
618k
    xmlEntityPtr ent = NULL;
7098
618k
    const xmlChar *name;
7099
618k
    xmlChar *val;
7100
7101
618k
    if (RAW != '&')
7102
0
        return;
7103
7104
    /*
7105
     * Simple case of a CharRef
7106
     */
7107
618k
    if (NXT(1) == '#') {
7108
236k
  int i = 0;
7109
236k
  xmlChar out[16];
7110
236k
  int value = xmlParseCharRef(ctxt);
7111
7112
236k
  if (value == 0)
7113
77.0k
      return;
7114
7115
        /*
7116
         * Just encode the value in UTF-8
7117
         */
7118
159k
        COPY_BUF(out, i, value);
7119
159k
        out[i] = 0;
7120
159k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7121
159k
            (!ctxt->disableSAX))
7122
138k
            ctxt->sax->characters(ctxt->userData, out, i);
7123
159k
  return;
7124
236k
    }
7125
7126
    /*
7127
     * We are seeing an entity reference
7128
     */
7129
381k
    name = xmlParseEntityRefInternal(ctxt);
7130
381k
    if (name == NULL)
7131
152k
        return;
7132
229k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7133
229k
    if (ent == NULL) {
7134
        /*
7135
         * Create a reference for undeclared entities.
7136
         */
7137
173k
        if ((ctxt->replaceEntities == 0) &&
7138
136k
            (ctxt->sax != NULL) &&
7139
136k
            (ctxt->disableSAX == 0) &&
7140
127k
            (ctxt->sax->reference != NULL)) {
7141
127k
            ctxt->sax->reference(ctxt->userData, name);
7142
127k
        }
7143
173k
        return;
7144
173k
    }
7145
56.3k
    if (!ctxt->wellFormed)
7146
25.6k
  return;
7147
7148
    /* special case of predefined entities */
7149
30.6k
    if ((ent->name == NULL) ||
7150
30.6k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7151
1.23k
  val = ent->content;
7152
1.23k
  if (val == NULL) return;
7153
  /*
7154
   * inline the entity.
7155
   */
7156
1.23k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7157
1.23k
      (!ctxt->disableSAX))
7158
1.23k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7159
1.23k
  return;
7160
1.23k
    }
7161
7162
    /*
7163
     * Some users try to parse entities on their own and used to set
7164
     * the renamed "checked" member. Fix the flags to cover this
7165
     * case.
7166
     */
7167
29.4k
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7168
0
        ent->flags |= XML_ENT_PARSED;
7169
7170
    /*
7171
     * The first reference to the entity trigger a parsing phase
7172
     * where the ent->children is filled with the result from
7173
     * the parsing.
7174
     * Note: external parsed entities will not be loaded, it is not
7175
     * required for a non-validating parser, unless the parsing option
7176
     * of validating, or substituting entities were given. Doing so is
7177
     * far more secure as the parser will only process data coming from
7178
     * the document entity by default.
7179
     *
7180
     * FIXME: This doesn't work correctly since entities can be
7181
     * expanded with different namespace declarations in scope.
7182
     * For example:
7183
     *
7184
     * <!DOCTYPE doc [
7185
     *   <!ENTITY ent "<ns:elem/>">
7186
     * ]>
7187
     * <doc>
7188
     *   <decl1 xmlns:ns="urn:ns1">
7189
     *     &ent;
7190
     *   </decl1>
7191
     *   <decl2 xmlns:ns="urn:ns2">
7192
     *     &ent;
7193
     *   </decl2>
7194
     * </doc>
7195
     *
7196
     * Proposed fix:
7197
     *
7198
     * - Ignore current namespace declarations when parsing the
7199
     *   entity. If a prefix can't be resolved, don't report an error
7200
     *   but mark it as unresolved.
7201
     * - Try to resolve these prefixes when expanding the entity.
7202
     *   This will require a specialized version of xmlStaticCopyNode
7203
     *   which can also make use of the namespace hash table to avoid
7204
     *   quadratic behavior.
7205
     *
7206
     * Alternatively, we could simply reparse the entity on each
7207
     * expansion like we already do with custom SAX callbacks.
7208
     * External entity content should be cached in this case.
7209
     */
7210
29.4k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7211
9.54k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7212
9.28k
         ((ctxt->replaceEntities) ||
7213
28.7k
          (ctxt->validate)))) {
7214
28.7k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7215
6.27k
            xmlCtxtParseEntity(ctxt, ent);
7216
22.4k
        } else if (ent->children == NULL) {
7217
            /*
7218
             * Probably running in SAX mode and the callbacks don't
7219
             * build the entity content. Parse the entity again.
7220
             *
7221
             * This will also be triggered in normal tree builder mode
7222
             * if an entity happens to be empty, causing unnecessary
7223
             * reloads. It's hard to come up with a reliable check in
7224
             * which mode we're running.
7225
             */
7226
12.6k
            xmlCtxtParseEntity(ctxt, ent);
7227
12.6k
        }
7228
28.7k
    }
7229
7230
    /*
7231
     * We also check for amplification if entities aren't substituted.
7232
     * They might be expanded later.
7233
     */
7234
29.4k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7235
126
        return;
7236
7237
29.3k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7238
1.71k
        return;
7239
7240
27.6k
    if (ctxt->replaceEntities == 0) {
7241
  /*
7242
   * Create a reference
7243
   */
7244
4.66k
        if (ctxt->sax->reference != NULL)
7245
4.66k
      ctxt->sax->reference(ctxt->userData, ent->name);
7246
22.9k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7247
10.3k
        xmlNodePtr copy, cur;
7248
7249
        /*
7250
         * Seems we are generating the DOM content, copy the tree
7251
   */
7252
10.3k
        cur = ent->children;
7253
7254
        /*
7255
         * Handle first text node with SAX to coalesce text efficiently
7256
         */
7257
10.3k
        if ((cur->type == XML_TEXT_NODE) ||
7258
8.55k
            (cur->type == XML_CDATA_SECTION_NODE)) {
7259
8.55k
            int len = xmlStrlen(cur->content);
7260
7261
8.55k
            if ((cur->type == XML_TEXT_NODE) ||
7262
8.32k
                (ctxt->options & XML_PARSE_NOCDATA)) {
7263
8.32k
                if (ctxt->sax->characters != NULL)
7264
8.32k
                    ctxt->sax->characters(ctxt, cur->content, len);
7265
8.32k
            } else {
7266
224
                if (ctxt->sax->cdataBlock != NULL)
7267
224
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7268
224
            }
7269
7270
8.55k
            cur = cur->next;
7271
8.55k
        }
7272
7273
62.7k
        while (cur != NULL) {
7274
53.9k
            xmlNodePtr last;
7275
7276
            /*
7277
             * Handle last text node with SAX to coalesce text efficiently
7278
             */
7279
53.9k
            if ((cur->next == NULL) &&
7280
5.42k
                ((cur->type == XML_TEXT_NODE) ||
7281
4.30k
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7282
1.36k
                int len = xmlStrlen(cur->content);
7283
7284
1.36k
                if ((cur->type == XML_TEXT_NODE) ||
7285
1.12k
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7286
1.12k
                    if (ctxt->sax->characters != NULL)
7287
1.12k
                        ctxt->sax->characters(ctxt, cur->content, len);
7288
1.12k
                } else {
7289
240
                    if (ctxt->sax->cdataBlock != NULL)
7290
240
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7291
240
                }
7292
7293
1.36k
                break;
7294
1.36k
            }
7295
7296
            /*
7297
             * Reset coalesce buffer stats only for non-text nodes.
7298
             */
7299
52.6k
            ctxt->nodemem = 0;
7300
52.6k
            ctxt->nodelen = 0;
7301
7302
52.6k
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7303
7304
52.6k
            if (copy == NULL) {
7305
301
                xmlErrMemory(ctxt);
7306
301
                break;
7307
301
            }
7308
7309
52.3k
            if (ctxt->parseMode == XML_PARSE_READER) {
7310
                /* Needed for reader */
7311
0
                copy->extra = cur->extra;
7312
                /* Maybe needed for reader */
7313
0
                copy->_private = cur->_private;
7314
0
            }
7315
7316
52.3k
            copy->parent = ctxt->node;
7317
52.3k
            last = ctxt->node->last;
7318
52.3k
            if (last == NULL) {
7319
272
                ctxt->node->children = copy;
7320
52.0k
            } else {
7321
52.0k
                last->next = copy;
7322
52.0k
                copy->prev = last;
7323
52.0k
            }
7324
52.3k
            ctxt->node->last = copy;
7325
7326
52.3k
            cur = cur->next;
7327
52.3k
        }
7328
10.3k
    }
7329
27.6k
}
7330
7331
static void
7332
413k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7333
    /*
7334
     * [ WFC: Entity Declared ]
7335
     * In a document without any DTD, a document with only an
7336
     * internal DTD subset which contains no parameter entity
7337
     * references, or a document with "standalone='yes'", the
7338
     * Name given in the entity reference must match that in an
7339
     * entity declaration, except that well-formed documents
7340
     * need not declare any of the following entities: amp, lt,
7341
     * gt, apos, quot.
7342
     * The declaration of a parameter entity must precede any
7343
     * reference to it.
7344
     * Similarly, the declaration of a general entity must
7345
     * precede any reference to it which appears in a default
7346
     * value in an attribute-list declaration. Note that if
7347
     * entities are declared in the external subset or in
7348
     * external parameter entities, a non-validating processor
7349
     * is not obligated to read and process their declarations;
7350
     * for such documents, the rule that an entity must be
7351
     * declared is a well-formedness constraint only if
7352
     * standalone='yes'.
7353
     */
7354
413k
    if ((ctxt->standalone == 1) ||
7355
413k
        ((ctxt->hasExternalSubset == 0) &&
7356
407k
         (ctxt->hasPErefs == 0))) {
7357
269k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7358
269k
                          "Entity '%s' not defined\n", name);
7359
269k
#ifdef LIBXML_VALID_ENABLED
7360
269k
    } else if (ctxt->validate) {
7361
        /*
7362
         * [ VC: Entity Declared ]
7363
         * In a document with an external subset or external
7364
         * parameter entities with "standalone='no'", ...
7365
         * ... The declaration of a parameter entity must
7366
         * precede any reference to it...
7367
         */
7368
88.2k
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7369
88.2k
                         "Entity '%s' not defined\n", name, NULL);
7370
88.2k
#endif
7371
88.2k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7372
46.0k
               ((ctxt->replaceEntities) &&
7373
10.3k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7374
        /*
7375
         * Also raise a non-fatal error
7376
         *
7377
         * - if the external subset is loaded and all entity declarations
7378
         *   should be available, or
7379
         * - entity substition was requested without restricting
7380
         *   external entity access.
7381
         */
7382
10.3k
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7383
10.3k
                     "Entity '%s' not defined\n", name);
7384
45.3k
    } else {
7385
45.3k
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7386
45.3k
                      "Entity '%s' not defined\n", name, NULL);
7387
45.3k
    }
7388
7389
413k
    ctxt->valid = 0;
7390
413k
}
7391
7392
static xmlEntityPtr
7393
2.62M
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7394
2.62M
    xmlEntityPtr ent = NULL;
7395
7396
    /*
7397
     * Predefined entities override any extra definition
7398
     */
7399
2.62M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7400
1.39M
        ent = xmlGetPredefinedEntity(name);
7401
1.39M
        if (ent != NULL)
7402
355k
            return(ent);
7403
1.39M
    }
7404
7405
    /*
7406
     * Ask first SAX for entity resolution, otherwise try the
7407
     * entities which may have stored in the parser context.
7408
     */
7409
2.27M
    if (ctxt->sax != NULL) {
7410
2.27M
  if (ctxt->sax->getEntity != NULL)
7411
2.27M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7412
2.27M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7413
83.8k
      (ctxt->options & XML_PARSE_OLDSAX))
7414
67.5k
      ent = xmlGetPredefinedEntity(name);
7415
2.27M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7416
83.8k
      (ctxt->userData==ctxt)) {
7417
83.8k
      ent = xmlSAX2GetEntity(ctxt, name);
7418
83.8k
  }
7419
2.27M
    }
7420
7421
2.27M
    if (ent == NULL) {
7422
398k
        xmlHandleUndeclaredEntity(ctxt, name);
7423
398k
    }
7424
7425
    /*
7426
     * [ WFC: Parsed Entity ]
7427
     * An entity reference must not contain the name of an
7428
     * unparsed entity
7429
     */
7430
1.87M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7431
579
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7432
579
     "Entity reference to unparsed entity %s\n", name);
7433
579
        ent = NULL;
7434
579
    }
7435
7436
    /*
7437
     * [ WFC: No External Entity References ]
7438
     * Attribute values cannot contain direct or indirect
7439
     * entity references to external entities.
7440
     */
7441
1.87M
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7442
24.0k
        if (inAttr) {
7443
945
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7444
945
                 "Attribute references external entity '%s'\n", name);
7445
945
            ent = NULL;
7446
945
        }
7447
24.0k
    }
7448
7449
2.27M
    return(ent);
7450
2.62M
}
7451
7452
/**
7453
 * Parse an entity reference. Always consumes '&'.
7454
 *
7455
 *     [68] EntityRef ::= '&' Name ';'
7456
 *
7457
 * @param ctxt  an XML parser context
7458
 * @returns the name, or NULL in case of error.
7459
 */
7460
static const xmlChar *
7461
1.19M
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7462
1.19M
    const xmlChar *name;
7463
7464
1.19M
    GROW;
7465
7466
1.19M
    if (RAW != '&')
7467
0
        return(NULL);
7468
1.19M
    NEXT;
7469
1.19M
    name = xmlParseName(ctxt);
7470
1.19M
    if (name == NULL) {
7471
148k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7472
148k
           "xmlParseEntityRef: no name\n");
7473
148k
        return(NULL);
7474
148k
    }
7475
1.04M
    if (RAW != ';') {
7476
67.9k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7477
67.9k
  return(NULL);
7478
67.9k
    }
7479
975k
    NEXT;
7480
7481
975k
    return(name);
7482
1.04M
}
7483
7484
/**
7485
 * @deprecated Internal function, don't use.
7486
 *
7487
 * @param ctxt  an XML parser context
7488
 * @returns the xmlEntity if found, or NULL otherwise.
7489
 */
7490
xmlEntity *
7491
0
xmlParseEntityRef(xmlParserCtxt *ctxt) {
7492
0
    const xmlChar *name;
7493
7494
0
    if (ctxt == NULL)
7495
0
        return(NULL);
7496
7497
0
    name = xmlParseEntityRefInternal(ctxt);
7498
0
    if (name == NULL)
7499
0
        return(NULL);
7500
7501
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7502
0
}
7503
7504
/**
7505
 * Parse ENTITY references declarations, but this version parses it from
7506
 * a string value.
7507
 *
7508
 *     [68] EntityRef ::= '&' Name ';'
7509
 *
7510
 * [ WFC: Entity Declared ]
7511
 * In a document without any DTD, a document with only an internal DTD
7512
 * subset which contains no parameter entity references, or a document
7513
 * with "standalone='yes'", the Name given in the entity reference
7514
 * must match that in an entity declaration, except that well-formed
7515
 * documents need not declare any of the following entities: amp, lt,
7516
 * gt, apos, quot.  The declaration of a parameter entity must precede
7517
 * any reference to it.  Similarly, the declaration of a general entity
7518
 * must precede any reference to it which appears in a default value in an
7519
 * attribute-list declaration. Note that if entities are declared in the
7520
 * external subset or in external parameter entities, a non-validating
7521
 * processor is not obligated to read and process their declarations;
7522
 * for such documents, the rule that an entity must be declared is a
7523
 * well-formedness constraint only if standalone='yes'.
7524
 *
7525
 * [ WFC: Parsed Entity ]
7526
 * An entity reference must not contain the name of an unparsed entity
7527
 *
7528
 * @param ctxt  an XML parser context
7529
 * @param str  a pointer to an index in the string
7530
 * @returns the xmlEntity if found, or NULL otherwise. The str pointer
7531
 * is updated to the current location in the string.
7532
 */
7533
static xmlChar *
7534
1.65M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7535
1.65M
    xmlChar *name;
7536
1.65M
    const xmlChar *ptr;
7537
1.65M
    xmlChar cur;
7538
7539
1.65M
    if ((str == NULL) || (*str == NULL))
7540
0
        return(NULL);
7541
1.65M
    ptr = *str;
7542
1.65M
    cur = *ptr;
7543
1.65M
    if (cur != '&')
7544
0
  return(NULL);
7545
7546
1.65M
    ptr++;
7547
1.65M
    name = xmlParseStringName(ctxt, &ptr);
7548
1.65M
    if (name == NULL) {
7549
88
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7550
88
           "xmlParseStringEntityRef: no name\n");
7551
88
  *str = ptr;
7552
88
  return(NULL);
7553
88
    }
7554
1.65M
    if (*ptr != ';') {
7555
38
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7556
38
        xmlFree(name);
7557
38
  *str = ptr;
7558
38
  return(NULL);
7559
38
    }
7560
1.65M
    ptr++;
7561
7562
1.65M
    *str = ptr;
7563
1.65M
    return(name);
7564
1.65M
}
7565
7566
/**
7567
 * Parse a parameter entity reference. Always consumes '%'.
7568
 *
7569
 * The entity content is handled directly by pushing it's content as
7570
 * a new input stream.
7571
 *
7572
 *     [69] PEReference ::= '%' Name ';'
7573
 *
7574
 * [ WFC: No Recursion ]
7575
 * A parsed entity must not contain a recursive
7576
 * reference to itself, either directly or indirectly.
7577
 *
7578
 * [ WFC: Entity Declared ]
7579
 * In a document without any DTD, a document with only an internal DTD
7580
 * subset which contains no parameter entity references, or a document
7581
 * with "standalone='yes'", ...  ... The declaration of a parameter
7582
 * entity must precede any reference to it...
7583
 *
7584
 * [ VC: Entity Declared ]
7585
 * In a document with an external subset or external parameter entities
7586
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7587
 * must precede any reference to it...
7588
 *
7589
 * [ WFC: In DTD ]
7590
 * Parameter-entity references may only appear in the DTD.
7591
 * NOTE: misleading but this is handled.
7592
 *
7593
 * @param ctxt  an XML parser context
7594
 * @param markupDecl  whether the PERef starts a markup declaration
7595
 */
7596
static void
7597
135k
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl) {
7598
135k
    const xmlChar *name;
7599
135k
    xmlEntityPtr entity = NULL;
7600
135k
    xmlParserInputPtr input;
7601
7602
135k
    if (RAW != '%')
7603
0
        return;
7604
135k
    NEXT;
7605
135k
    name = xmlParseName(ctxt);
7606
135k
    if (name == NULL) {
7607
19.3k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7608
19.3k
  return;
7609
19.3k
    }
7610
116k
    if (RAW != ';') {
7611
8.85k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7612
8.85k
        return;
7613
8.85k
    }
7614
7615
107k
    NEXT;
7616
7617
    /* Must be set before xmlHandleUndeclaredEntity */
7618
107k
    ctxt->hasPErefs = 1;
7619
7620
    /*
7621
     * Request the entity from SAX
7622
     */
7623
107k
    if ((ctxt->sax != NULL) &&
7624
107k
  (ctxt->sax->getParameterEntity != NULL))
7625
107k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7626
7627
107k
    if (entity == NULL) {
7628
12.8k
        xmlHandleUndeclaredEntity(ctxt, name);
7629
94.7k
    } else {
7630
  /*
7631
   * Internal checking in case the entity quest barfed
7632
   */
7633
94.7k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7634
57.7k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7635
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7636
0
      "Internal: %%%s; is not a parameter entity\n",
7637
0
        name, NULL);
7638
94.7k
  } else {
7639
94.7k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7640
57.7k
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7641
57.5k
     (((ctxt->loadsubset & ~XML_SKIP_IDS) == 0) &&
7642
9.45k
      (ctxt->replaceEntities == 0) &&
7643
3.91k
      (ctxt->validate == 0))))
7644
1.25k
    return;
7645
7646
93.4k
            if (entity->flags & XML_ENT_EXPANDING) {
7647
23
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7648
23
                return;
7649
23
            }
7650
7651
93.4k
      input = xmlNewEntityInputStream(ctxt, entity);
7652
93.4k
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7653
5.60k
                xmlFreeInputStream(input);
7654
5.60k
    return;
7655
5.60k
            }
7656
7657
87.8k
            entity->flags |= XML_ENT_EXPANDING;
7658
7659
87.8k
            if (markupDecl)
7660
69.6k
                input->flags |= XML_INPUT_MARKUP_DECL;
7661
7662
87.8k
            GROW;
7663
7664
87.8k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7665
50.9k
                xmlDetectEncoding(ctxt);
7666
7667
50.9k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7668
13.9k
                    (IS_BLANK_CH(NXT(5)))) {
7669
13.2k
                    xmlParseTextDecl(ctxt);
7670
13.2k
                }
7671
50.9k
            }
7672
87.8k
  }
7673
94.7k
    }
7674
107k
}
7675
7676
/**
7677
 * Parse a parameter entity reference.
7678
 *
7679
 * @deprecated Internal function, don't use.
7680
 *
7681
 * @param ctxt  an XML parser context
7682
 */
7683
void
7684
0
xmlParsePEReference(xmlParserCtxt *ctxt) {
7685
0
    xmlParsePERefInternal(ctxt, 0);
7686
0
}
7687
7688
/**
7689
 * Load the content of an entity.
7690
 *
7691
 * @param ctxt  an XML parser context
7692
 * @param entity  an unloaded system entity
7693
 * @returns 0 in case of success and -1 in case of failure
7694
 */
7695
static int
7696
24.5k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7697
24.5k
    xmlParserInputPtr oldinput, input = NULL;
7698
24.5k
    xmlParserInputPtr *oldinputTab;
7699
24.5k
    xmlChar *oldencoding;
7700
24.5k
    xmlChar *content = NULL;
7701
24.5k
    xmlResourceType rtype;
7702
24.5k
    size_t length, i;
7703
24.5k
    int oldinputNr, oldinputMax;
7704
24.5k
    int ret = -1;
7705
24.5k
    int res;
7706
7707
24.5k
    if ((ctxt == NULL) || (entity == NULL) ||
7708
24.5k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7709
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7710
24.5k
  (entity->content != NULL)) {
7711
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7712
0
              "xmlLoadEntityContent parameter error");
7713
0
        return(-1);
7714
0
    }
7715
7716
24.5k
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7717
24.5k
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7718
0
    else
7719
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7720
7721
24.5k
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7722
24.5k
                            (char *) entity->ExternalID, rtype);
7723
24.5k
    if (input == NULL)
7724
835
        return(-1);
7725
7726
23.7k
    oldinput = ctxt->input;
7727
23.7k
    oldinputNr = ctxt->inputNr;
7728
23.7k
    oldinputMax = ctxt->inputMax;
7729
23.7k
    oldinputTab = ctxt->inputTab;
7730
23.7k
    oldencoding = ctxt->encoding;
7731
7732
23.7k
    ctxt->input = NULL;
7733
23.7k
    ctxt->inputNr = 0;
7734
23.7k
    ctxt->inputMax = 1;
7735
23.7k
    ctxt->encoding = NULL;
7736
23.7k
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7737
23.7k
    if (ctxt->inputTab == NULL) {
7738
11
        xmlErrMemory(ctxt);
7739
11
        xmlFreeInputStream(input);
7740
11
        goto error;
7741
11
    }
7742
7743
23.7k
    xmlBufResetInput(input->buf->buffer, input);
7744
7745
23.7k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7746
19
        xmlFreeInputStream(input);
7747
19
        goto error;
7748
19
    }
7749
7750
23.7k
    xmlDetectEncoding(ctxt);
7751
7752
    /*
7753
     * Parse a possible text declaration first
7754
     */
7755
23.7k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7756
14.2k
  xmlParseTextDecl(ctxt);
7757
        /*
7758
         * An XML-1.0 document can't reference an entity not XML-1.0
7759
         */
7760
14.2k
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7761
13.6k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7762
10.8k
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7763
10.8k
                           "Version mismatch between document and entity\n");
7764
10.8k
        }
7765
14.2k
    }
7766
7767
23.7k
    length = input->cur - input->base;
7768
23.7k
    xmlBufShrink(input->buf->buffer, length);
7769
23.7k
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7770
7771
61.8k
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7772
38.1k
        ;
7773
7774
23.7k
    xmlBufResetInput(input->buf->buffer, input);
7775
7776
23.7k
    if (res < 0) {
7777
6.95k
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7778
6.95k
        goto error;
7779
6.95k
    }
7780
7781
16.7k
    length = xmlBufUse(input->buf->buffer);
7782
16.7k
    if (length > INT_MAX) {
7783
0
        xmlErrMemory(ctxt);
7784
0
        goto error;
7785
0
    }
7786
7787
16.7k
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
7788
16.7k
    if (content == NULL) {
7789
17
        xmlErrMemory(ctxt);
7790
17
        goto error;
7791
17
    }
7792
7793
60.8M
    for (i = 0; i < length; ) {
7794
60.8M
        int clen = length - i;
7795
60.8M
        int c = xmlGetUTF8Char(content + i, &clen);
7796
7797
60.8M
        if ((c < 0) || (!IS_CHAR(c))) {
7798
16.5k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7799
16.5k
                              "xmlLoadEntityContent: invalid char value %d\n",
7800
16.5k
                              content[i]);
7801
16.5k
            goto error;
7802
16.5k
        }
7803
60.8M
        i += clen;
7804
60.8M
    }
7805
7806
213
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7807
213
    entity->content = content;
7808
213
    entity->length = length;
7809
213
    content = NULL;
7810
213
    ret = 0;
7811
7812
23.7k
error:
7813
47.4k
    while (ctxt->inputNr > 0)
7814
23.7k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
7815
23.7k
    xmlFree(ctxt->inputTab);
7816
23.7k
    xmlFree(ctxt->encoding);
7817
7818
23.7k
    ctxt->input = oldinput;
7819
23.7k
    ctxt->inputNr = oldinputNr;
7820
23.7k
    ctxt->inputMax = oldinputMax;
7821
23.7k
    ctxt->inputTab = oldinputTab;
7822
23.7k
    ctxt->encoding = oldencoding;
7823
7824
23.7k
    xmlFree(content);
7825
7826
23.7k
    return(ret);
7827
213
}
7828
7829
/**
7830
 * Parse PEReference declarations
7831
 *
7832
 *     [69] PEReference ::= '%' Name ';'
7833
 *
7834
 * [ WFC: No Recursion ]
7835
 * A parsed entity must not contain a recursive
7836
 * reference to itself, either directly or indirectly.
7837
 *
7838
 * [ WFC: Entity Declared ]
7839
 * In a document without any DTD, a document with only an internal DTD
7840
 * subset which contains no parameter entity references, or a document
7841
 * with "standalone='yes'", ...  ... The declaration of a parameter
7842
 * entity must precede any reference to it...
7843
 *
7844
 * [ VC: Entity Declared ]
7845
 * In a document with an external subset or external parameter entities
7846
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7847
 * must precede any reference to it...
7848
 *
7849
 * [ WFC: In DTD ]
7850
 * Parameter-entity references may only appear in the DTD.
7851
 * NOTE: misleading but this is handled.
7852
 *
7853
 * @param ctxt  an XML parser context
7854
 * @param str  a pointer to an index in the string
7855
 * @returns the string of the entity content.
7856
 *         str is updated to the current value of the index
7857
 */
7858
static xmlEntityPtr
7859
54.2k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7860
54.2k
    const xmlChar *ptr;
7861
54.2k
    xmlChar cur;
7862
54.2k
    xmlChar *name;
7863
54.2k
    xmlEntityPtr entity = NULL;
7864
7865
54.2k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7866
54.2k
    ptr = *str;
7867
54.2k
    cur = *ptr;
7868
54.2k
    if (cur != '%')
7869
0
        return(NULL);
7870
54.2k
    ptr++;
7871
54.2k
    name = xmlParseStringName(ctxt, &ptr);
7872
54.2k
    if (name == NULL) {
7873
3.28k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7874
3.28k
           "xmlParseStringPEReference: no name\n");
7875
3.28k
  *str = ptr;
7876
3.28k
  return(NULL);
7877
3.28k
    }
7878
51.0k
    cur = *ptr;
7879
51.0k
    if (cur != ';') {
7880
3.57k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7881
3.57k
  xmlFree(name);
7882
3.57k
  *str = ptr;
7883
3.57k
  return(NULL);
7884
3.57k
    }
7885
47.4k
    ptr++;
7886
7887
    /* Must be set before xmlHandleUndeclaredEntity */
7888
47.4k
    ctxt->hasPErefs = 1;
7889
7890
    /*
7891
     * Request the entity from SAX
7892
     */
7893
47.4k
    if ((ctxt->sax != NULL) &&
7894
47.4k
  (ctxt->sax->getParameterEntity != NULL))
7895
47.4k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7896
7897
47.4k
    if (entity == NULL) {
7898
2.22k
        xmlHandleUndeclaredEntity(ctxt, name);
7899
45.2k
    } else {
7900
  /*
7901
   * Internal checking in case the entity quest barfed
7902
   */
7903
45.2k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7904
28.4k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7905
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7906
0
        "%%%s; is not a parameter entity\n",
7907
0
        name, NULL);
7908
0
  }
7909
45.2k
    }
7910
7911
47.4k
    xmlFree(name);
7912
47.4k
    *str = ptr;
7913
47.4k
    return(entity);
7914
51.0k
}
7915
7916
/**
7917
 * Parse a DOCTYPE declaration
7918
 *
7919
 * @deprecated Internal function, don't use.
7920
 *
7921
 *     [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7922
 *                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7923
 *
7924
 * [ VC: Root Element Type ]
7925
 * The Name in the document type declaration must match the element
7926
 * type of the root element.
7927
 *
7928
 * @param ctxt  an XML parser context
7929
 */
7930
7931
void
7932
47.5k
xmlParseDocTypeDecl(xmlParserCtxt *ctxt) {
7933
47.5k
    const xmlChar *name = NULL;
7934
47.5k
    xmlChar *publicId = NULL;
7935
47.5k
    xmlChar *URI = NULL;
7936
7937
    /*
7938
     * We know that '<!DOCTYPE' has been detected.
7939
     */
7940
47.5k
    SKIP(9);
7941
7942
47.5k
    if (SKIP_BLANKS == 0) {
7943
15.4k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7944
15.4k
                       "Space required after 'DOCTYPE'\n");
7945
15.4k
    }
7946
7947
    /*
7948
     * Parse the DOCTYPE name.
7949
     */
7950
47.5k
    name = xmlParseName(ctxt);
7951
47.5k
    if (name == NULL) {
7952
13.6k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7953
13.6k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7954
13.6k
    }
7955
47.5k
    ctxt->intSubName = name;
7956
7957
47.5k
    SKIP_BLANKS;
7958
7959
    /*
7960
     * Check for public and system identifier (URI)
7961
     */
7962
47.5k
    URI = xmlParseExternalID(ctxt, &publicId, 1);
7963
7964
47.5k
    if ((URI != NULL) || (publicId != NULL)) {
7965
9.55k
        ctxt->hasExternalSubset = 1;
7966
9.55k
    }
7967
47.5k
    ctxt->extSubURI = URI;
7968
47.5k
    ctxt->extSubSystem = publicId;
7969
7970
47.5k
    SKIP_BLANKS;
7971
7972
    /*
7973
     * Create and update the internal subset.
7974
     */
7975
47.5k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7976
47.5k
  (!ctxt->disableSAX))
7977
44.5k
  ctxt->sax->internalSubset(ctxt->userData, name, publicId, URI);
7978
7979
47.5k
    if ((RAW != '[') && (RAW != '>')) {
7980
3.23k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7981
3.23k
    }
7982
47.5k
}
7983
7984
/**
7985
 * Parse the internal subset declaration
7986
 *
7987
 *     [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7988
 * @param ctxt  an XML parser context
7989
 */
7990
7991
static void
7992
37.5k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7993
    /*
7994
     * Is there any DTD definition ?
7995
     */
7996
37.5k
    if (RAW == '[') {
7997
37.5k
        int oldInputNr = ctxt->inputNr;
7998
7999
37.5k
        NEXT;
8000
  /*
8001
   * Parse the succession of Markup declarations and
8002
   * PEReferences.
8003
   * Subsequence (markupdecl | PEReference | S)*
8004
   */
8005
37.5k
  SKIP_BLANKS;
8006
418k
        while (1) {
8007
418k
            if (PARSER_STOPPED(ctxt)) {
8008
3.82k
                return;
8009
414k
            } else if (ctxt->input->cur >= ctxt->input->end) {
8010
69.6k
                if (ctxt->inputNr <= oldInputNr) {
8011
4.65k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8012
4.65k
                    return;
8013
4.65k
                }
8014
64.9k
                xmlPopPE(ctxt);
8015
344k
            } else if ((RAW == ']') && (ctxt->inputNr <= oldInputNr)) {
8016
17.8k
                NEXT;
8017
17.8k
                SKIP_BLANKS;
8018
17.8k
                break;
8019
326k
            } else if ((PARSER_EXTERNAL(ctxt)) &&
8020
152k
                       (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8021
                /*
8022
                 * Conditional sections are allowed in external entities
8023
                 * included by PE References in the internal subset.
8024
                 */
8025
3.64k
                xmlParseConditionalSections(ctxt);
8026
323k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8027
205k
                xmlParseMarkupDecl(ctxt);
8028
205k
            } else if (RAW == '%') {
8029
106k
                xmlParsePERefInternal(ctxt, 1);
8030
106k
            } else {
8031
11.2k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8032
8033
12.1k
                while (ctxt->inputNr > oldInputNr)
8034
957
                    xmlPopPE(ctxt);
8035
11.2k
                return;
8036
11.2k
            }
8037
380k
            SKIP_BLANKS;
8038
380k
            SHRINK;
8039
380k
            GROW;
8040
380k
        }
8041
37.5k
    }
8042
8043
    /*
8044
     * We should be at the end of the DOCTYPE declaration.
8045
     */
8046
17.8k
    if (RAW != '>') {
8047
1.04k
        xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8048
1.04k
        return;
8049
1.04k
    }
8050
16.8k
    NEXT;
8051
16.8k
}
8052
8053
#ifdef LIBXML_SAX1_ENABLED
8054
/**
8055
 * Parse an attribute
8056
 *
8057
 * @deprecated Internal function, don't use.
8058
 *
8059
 *     [41] Attribute ::= Name Eq AttValue
8060
 *
8061
 * [ WFC: No External Entity References ]
8062
 * Attribute values cannot contain direct or indirect entity references
8063
 * to external entities.
8064
 *
8065
 * [ WFC: No < in Attribute Values ]
8066
 * The replacement text of any entity referred to directly or indirectly in
8067
 * an attribute value (other than "&lt;") must not contain a <.
8068
 *
8069
 * [ VC: Attribute Value Type ]
8070
 * The attribute must have been declared; the value must be of the type
8071
 * declared for it.
8072
 *
8073
 *     [25] Eq ::= S? '=' S?
8074
 *
8075
 * With namespace:
8076
 *
8077
 *     [NS 11] Attribute ::= QName Eq AttValue
8078
 *
8079
 * Also the case QName == xmlns:??? is handled independently as a namespace
8080
 * definition.
8081
 *
8082
 * @param ctxt  an XML parser context
8083
 * @param value  a xmlChar ** used to store the value of the attribute
8084
 * @returns the attribute name, and the value in *value.
8085
 */
8086
8087
const xmlChar *
8088
115k
xmlParseAttribute(xmlParserCtxt *ctxt, xmlChar **value) {
8089
115k
    const xmlChar *name;
8090
115k
    xmlChar *val;
8091
8092
115k
    *value = NULL;
8093
115k
    GROW;
8094
115k
    name = xmlParseName(ctxt);
8095
115k
    if (name == NULL) {
8096
44.7k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8097
44.7k
                 "error parsing attribute name\n");
8098
44.7k
        return(NULL);
8099
44.7k
    }
8100
8101
    /*
8102
     * read the value
8103
     */
8104
70.6k
    SKIP_BLANKS;
8105
70.6k
    if (RAW == '=') {
8106
53.1k
        NEXT;
8107
53.1k
  SKIP_BLANKS;
8108
53.1k
  val = xmlParseAttValue(ctxt);
8109
53.1k
    } else {
8110
17.5k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8111
17.5k
         "Specification mandates value for attribute %s\n", name);
8112
17.5k
  return(name);
8113
17.5k
    }
8114
8115
    /*
8116
     * Check that xml:lang conforms to the specification
8117
     * No more registered as an error, just generate a warning now
8118
     * since this was deprecated in XML second edition
8119
     */
8120
53.1k
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8121
5.97k
  if (!xmlCheckLanguageID(val)) {
8122
4.92k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8123
4.92k
              "Malformed value for xml:lang : %s\n",
8124
4.92k
        val, NULL);
8125
4.92k
  }
8126
5.97k
    }
8127
8128
    /*
8129
     * Check that xml:space conforms to the specification
8130
     */
8131
53.1k
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8132
670
  if (xmlStrEqual(val, BAD_CAST "default"))
8133
232
      *(ctxt->space) = 0;
8134
438
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8135
210
      *(ctxt->space) = 1;
8136
228
  else {
8137
228
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8138
228
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8139
228
                                 val, NULL);
8140
228
  }
8141
670
    }
8142
8143
53.1k
    *value = val;
8144
53.1k
    return(name);
8145
70.6k
}
8146
8147
/**
8148
 * Parse a start tag. Always consumes '<'.
8149
 *
8150
 * @deprecated Internal function, don't use.
8151
 *
8152
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8153
 *
8154
 * [ WFC: Unique Att Spec ]
8155
 * No attribute name may appear more than once in the same start-tag or
8156
 * empty-element tag.
8157
 *
8158
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8159
 *
8160
 * [ WFC: Unique Att Spec ]
8161
 * No attribute name may appear more than once in the same start-tag or
8162
 * empty-element tag.
8163
 *
8164
 * With namespace:
8165
 *
8166
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8167
 *
8168
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8169
 *
8170
 * @param ctxt  an XML parser context
8171
 * @returns the element name parsed
8172
 */
8173
8174
const xmlChar *
8175
231k
xmlParseStartTag(xmlParserCtxt *ctxt) {
8176
231k
    const xmlChar *name;
8177
231k
    const xmlChar *attname;
8178
231k
    xmlChar *attvalue;
8179
231k
    const xmlChar **atts = ctxt->atts;
8180
231k
    int nbatts = 0;
8181
231k
    int maxatts = ctxt->maxatts;
8182
231k
    int i;
8183
8184
231k
    if (RAW != '<') return(NULL);
8185
231k
    NEXT1;
8186
8187
231k
    name = xmlParseName(ctxt);
8188
231k
    if (name == NULL) {
8189
26.8k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8190
26.8k
       "xmlParseStartTag: invalid element name\n");
8191
26.8k
        return(NULL);
8192
26.8k
    }
8193
8194
    /*
8195
     * Now parse the attributes, it ends up with the ending
8196
     *
8197
     * (S Attribute)* S?
8198
     */
8199
204k
    SKIP_BLANKS;
8200
204k
    GROW;
8201
8202
255k
    while (((RAW != '>') &&
8203
135k
     ((RAW != '/') || (NXT(1) != '>')) &&
8204
129k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8205
115k
  attname = xmlParseAttribute(ctxt, &attvalue);
8206
115k
        if (attname == NULL)
8207
44.7k
      break;
8208
70.6k
        if (attvalue != NULL) {
8209
      /*
8210
       * [ WFC: Unique Att Spec ]
8211
       * No attribute name may appear more than once in the same
8212
       * start-tag or empty-element tag.
8213
       */
8214
342k
      for (i = 0; i < nbatts;i += 2) {
8215
296k
          if (xmlStrEqual(atts[i], attname)) {
8216
2.58k
        xmlErrAttributeDup(ctxt, NULL, attname);
8217
2.58k
        goto failed;
8218
2.58k
    }
8219
296k
      }
8220
      /*
8221
       * Add the pair to atts
8222
       */
8223
46.1k
      if (nbatts + 4 > maxatts) {
8224
8.99k
          const xmlChar **n;
8225
8.99k
                int newSize;
8226
8227
8.99k
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8228
8.99k
                                          11, XML_MAX_ATTRS);
8229
8.99k
                if (newSize < 0) {
8230
0
        xmlErrMemory(ctxt);
8231
0
        goto failed;
8232
0
    }
8233
8.99k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8234
8.99k
                if (newSize < 2)
8235
7.49k
                    newSize = 2;
8236
8.99k
#endif
8237
8.99k
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8238
8.99k
    if (n == NULL) {
8239
11
        xmlErrMemory(ctxt);
8240
11
        goto failed;
8241
11
    }
8242
8.98k
    atts = n;
8243
8.98k
                maxatts = newSize * 2;
8244
8.98k
    ctxt->atts = atts;
8245
8.98k
    ctxt->maxatts = maxatts;
8246
8.98k
      }
8247
8248
46.1k
      atts[nbatts++] = attname;
8249
46.1k
      atts[nbatts++] = attvalue;
8250
46.1k
      atts[nbatts] = NULL;
8251
46.1k
      atts[nbatts + 1] = NULL;
8252
8253
46.1k
            attvalue = NULL;
8254
46.1k
  }
8255
8256
70.6k
failed:
8257
8258
70.6k
        if (attvalue != NULL)
8259
2.60k
            xmlFree(attvalue);
8260
8261
70.6k
  GROW
8262
70.6k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8263
19.7k
      break;
8264
50.9k
  if (SKIP_BLANKS == 0) {
8265
35.1k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8266
35.1k
         "attributes construct error\n");
8267
35.1k
  }
8268
50.9k
  SHRINK;
8269
50.9k
        GROW;
8270
50.9k
    }
8271
8272
    /*
8273
     * SAX: Start of Element !
8274
     */
8275
204k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8276
204k
  (!ctxt->disableSAX)) {
8277
192k
  if (nbatts > 0)
8278
34.0k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8279
158k
  else
8280
158k
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8281
192k
    }
8282
8283
204k
    if (atts != NULL) {
8284
        /* Free only the content strings */
8285
115k
        for (i = 1;i < nbatts;i+=2)
8286
46.1k
      if (atts[i] != NULL)
8287
46.1k
         xmlFree((xmlChar *) atts[i]);
8288
69.3k
    }
8289
204k
    return(name);
8290
204k
}
8291
8292
/**
8293
 * Parse an end tag. Always consumes '</'.
8294
 *
8295
 *     [42] ETag ::= '</' Name S? '>'
8296
 *
8297
 * With namespace
8298
 *
8299
 *     [NS 9] ETag ::= '</' QName S? '>'
8300
 * @param ctxt  an XML parser context
8301
 * @param line  line of the start tag
8302
 */
8303
8304
static void
8305
17.7k
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8306
17.7k
    const xmlChar *name;
8307
8308
17.7k
    GROW;
8309
17.7k
    if ((RAW != '<') || (NXT(1) != '/')) {
8310
1.12k
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8311
1.12k
           "xmlParseEndTag: '</' not found\n");
8312
1.12k
  return;
8313
1.12k
    }
8314
16.6k
    SKIP(2);
8315
8316
16.6k
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8317
8318
    /*
8319
     * We should definitely be at the ending "S? '>'" part
8320
     */
8321
16.6k
    GROW;
8322
16.6k
    SKIP_BLANKS;
8323
16.6k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8324
3.10k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8325
3.10k
    } else
8326
13.5k
  NEXT1;
8327
8328
    /*
8329
     * [ WFC: Element Type Match ]
8330
     * The Name in an element's end-tag must match the element type in the
8331
     * start-tag.
8332
     *
8333
     */
8334
16.6k
    if (name != (xmlChar*)1) {
8335
3.31k
        if (name == NULL) name = BAD_CAST "unparsable";
8336
3.31k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8337
3.31k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8338
3.31k
                    ctxt->name, line, name);
8339
3.31k
    }
8340
8341
    /*
8342
     * SAX: End of Tag
8343
     */
8344
16.6k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8345
16.6k
  (!ctxt->disableSAX))
8346
15.1k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8347
8348
16.6k
    namePop(ctxt);
8349
16.6k
    spacePop(ctxt);
8350
16.6k
}
8351
8352
/**
8353
 * Parse an end of tag
8354
 *
8355
 * @deprecated Internal function, don't use.
8356
 *
8357
 *     [42] ETag ::= '</' Name S? '>'
8358
 *
8359
 * With namespace
8360
 *
8361
 *     [NS 9] ETag ::= '</' QName S? '>'
8362
 * @param ctxt  an XML parser context
8363
 */
8364
8365
void
8366
0
xmlParseEndTag(xmlParserCtxt *ctxt) {
8367
0
    xmlParseEndTag1(ctxt, 0);
8368
0
}
8369
#endif /* LIBXML_SAX1_ENABLED */
8370
8371
/************************************************************************
8372
 *                  *
8373
 *          SAX 2 specific operations       *
8374
 *                  *
8375
 ************************************************************************/
8376
8377
/**
8378
 * Parse an XML Namespace QName
8379
 *
8380
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8381
 *     [7]  Prefix  ::= NCName
8382
 *     [8]  LocalPart  ::= NCName
8383
 *
8384
 * @param ctxt  an XML parser context
8385
 * @param prefix  pointer to store the prefix part
8386
 * @returns the Name parsed or NULL
8387
 */
8388
8389
static xmlHashedString
8390
1.70M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8391
1.70M
    xmlHashedString l, p;
8392
1.70M
    int start, isNCName = 0;
8393
8394
1.70M
    l.name = NULL;
8395
1.70M
    p.name = NULL;
8396
8397
1.70M
    GROW;
8398
1.70M
    start = CUR_PTR - BASE_PTR;
8399
8400
1.70M
    l = xmlParseNCName(ctxt);
8401
1.70M
    if (l.name != NULL) {
8402
1.16M
        isNCName = 1;
8403
1.16M
        if (CUR == ':') {
8404
182k
            NEXT;
8405
182k
            p = l;
8406
182k
            l = xmlParseNCName(ctxt);
8407
182k
        }
8408
1.16M
    }
8409
1.70M
    if ((l.name == NULL) || (CUR == ':')) {
8410
556k
        xmlChar *tmp;
8411
8412
556k
        l.name = NULL;
8413
556k
        p.name = NULL;
8414
556k
        if ((isNCName == 0) && (CUR != ':'))
8415
530k
            return(l);
8416
26.1k
        tmp = xmlParseNmtoken(ctxt);
8417
26.1k
        if (tmp != NULL)
8418
14.4k
            xmlFree(tmp);
8419
26.1k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8420
26.1k
                                CUR_PTR - (BASE_PTR + start));
8421
26.1k
        if (l.name == NULL) {
8422
6
            xmlErrMemory(ctxt);
8423
6
            return(l);
8424
6
        }
8425
26.1k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8426
26.1k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8427
26.1k
    }
8428
8429
1.17M
    *prefix = p;
8430
1.17M
    return(l);
8431
1.70M
}
8432
8433
/**
8434
 * Parse an XML Namespace QName
8435
 *
8436
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8437
 *     [7]  Prefix  ::= NCName
8438
 *     [8]  LocalPart  ::= NCName
8439
 *
8440
 * @param ctxt  an XML parser context
8441
 * @param prefix  pointer to store the prefix part
8442
 * @returns the Name parsed or NULL
8443
 */
8444
8445
static const xmlChar *
8446
5.25k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8447
5.25k
    xmlHashedString n, p;
8448
8449
5.25k
    n = xmlParseQNameHashed(ctxt, &p);
8450
5.25k
    if (n.name == NULL)
8451
2.41k
        return(NULL);
8452
2.83k
    *prefix = p.name;
8453
2.83k
    return(n.name);
8454
5.25k
}
8455
8456
/**
8457
 * Parse an XML name and compares for match
8458
 * (specialized for endtag parsing)
8459
 *
8460
 * @param ctxt  an XML parser context
8461
 * @param name  the localname
8462
 * @param prefix  the prefix, if any.
8463
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
8464
 * and the name for mismatch
8465
 */
8466
8467
static const xmlChar *
8468
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8469
7.87k
                        xmlChar const *prefix) {
8470
7.87k
    const xmlChar *cmp;
8471
7.87k
    const xmlChar *in;
8472
7.87k
    const xmlChar *ret;
8473
7.87k
    const xmlChar *prefix2;
8474
8475
7.87k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8476
8477
7.87k
    GROW;
8478
7.87k
    in = ctxt->input->cur;
8479
8480
7.87k
    cmp = prefix;
8481
14.5k
    while (*in != 0 && *in == *cmp) {
8482
6.67k
  ++in;
8483
6.67k
  ++cmp;
8484
6.67k
    }
8485
7.87k
    if ((*cmp == 0) && (*in == ':')) {
8486
3.74k
        in++;
8487
3.74k
  cmp = name;
8488
8.70k
  while (*in != 0 && *in == *cmp) {
8489
4.95k
      ++in;
8490
4.95k
      ++cmp;
8491
4.95k
  }
8492
3.74k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8493
      /* success */
8494
2.62k
            ctxt->input->col += in - ctxt->input->cur;
8495
2.62k
      ctxt->input->cur = in;
8496
2.62k
      return((const xmlChar*) 1);
8497
2.62k
  }
8498
3.74k
    }
8499
    /*
8500
     * all strings coms from the dictionary, equality can be done directly
8501
     */
8502
5.25k
    ret = xmlParseQName (ctxt, &prefix2);
8503
5.25k
    if (ret == NULL)
8504
2.41k
        return(NULL);
8505
2.83k
    if ((ret == name) && (prefix == prefix2))
8506
684
  return((const xmlChar*) 1);
8507
2.15k
    return ret;
8508
2.83k
}
8509
8510
/**
8511
 * Parse an attribute in the new SAX2 framework.
8512
 *
8513
 * @param ctxt  an XML parser context
8514
 * @param pref  the element prefix
8515
 * @param elem  the element name
8516
 * @param hprefix  resulting attribute prefix
8517
 * @param value  resulting value of the attribute
8518
 * @param len  resulting length of the attribute
8519
 * @param alloc  resulting indicator if the attribute was allocated
8520
 * @returns the attribute name, and the value in *value, .
8521
 */
8522
8523
static xmlHashedString
8524
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8525
                   const xmlChar * pref, const xmlChar * elem,
8526
                   xmlHashedString * hprefix, xmlChar ** value,
8527
                   int *len, int *alloc)
8528
599k
{
8529
599k
    xmlHashedString hname;
8530
599k
    const xmlChar *prefix, *name;
8531
599k
    xmlChar *val = NULL, *internal_val = NULL;
8532
599k
    int special = 0;
8533
599k
    int isNamespace;
8534
599k
    int flags;
8535
8536
599k
    *value = NULL;
8537
599k
    GROW;
8538
599k
    hname = xmlParseQNameHashed(ctxt, hprefix);
8539
599k
    if (hname.name == NULL) {
8540
270k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8541
270k
                       "error parsing attribute name\n");
8542
270k
        return(hname);
8543
270k
    }
8544
328k
    name = hname.name;
8545
328k
    prefix = hprefix->name;
8546
8547
    /*
8548
     * get the type if needed
8549
     */
8550
328k
    if (ctxt->attsSpecial != NULL) {
8551
163k
        special = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8552
163k
                                              prefix, name));
8553
163k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
328k
    SKIP_BLANKS;
8559
328k
    if (RAW != '=') {
8560
35.0k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8561
35.0k
                          "Specification mandates value for attribute %s\n",
8562
35.0k
                          name);
8563
35.0k
        goto error;
8564
35.0k
    }
8565
8566
8567
293k
    NEXT;
8568
293k
    SKIP_BLANKS;
8569
293k
    flags = 0;
8570
293k
    isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8571
246k
                   (prefix == ctxt->str_xmlns));
8572
293k
    val = xmlParseAttValueInternal(ctxt, len, &flags, special,
8573
293k
                                   isNamespace);
8574
293k
    if (val == NULL)
8575
15.7k
        goto error;
8576
8577
277k
    *alloc = (flags & XML_ATTVAL_ALLOC) != 0;
8578
8579
277k
#ifdef LIBXML_VALID_ENABLED
8580
277k
    if ((ctxt->validate) &&
8581
118k
        (ctxt->standalone == 1) &&
8582
622
        (special & XML_SPECIAL_EXTERNAL) &&
8583
418
        (flags & XML_ATTVAL_NORM_CHANGE)) {
8584
196
        xmlValidityError(ctxt, XML_DTD_NOT_STANDALONE,
8585
196
                         "standalone: normalization of attribute %s on %s "
8586
196
                         "by external subset declaration\n",
8587
196
                         name, elem);
8588
196
    }
8589
277k
#endif
8590
8591
277k
    if (prefix == ctxt->str_xml) {
8592
        /*
8593
         * Check that xml:lang conforms to the specification
8594
         * No more registered as an error, just generate a warning now
8595
         * since this was deprecated in XML second edition
8596
         */
8597
9.16k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8598
5.10k
            internal_val = xmlStrndup(val, *len);
8599
5.10k
            if (internal_val == NULL)
8600
14
                goto mem_error;
8601
5.08k
            if (!xmlCheckLanguageID(internal_val)) {
8602
4.30k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8603
4.30k
                              "Malformed value for xml:lang : %s\n",
8604
4.30k
                              internal_val, NULL);
8605
4.30k
            }
8606
5.08k
        }
8607
8608
        /*
8609
         * Check that xml:space conforms to the specification
8610
         */
8611
9.14k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8612
674
            internal_val = xmlStrndup(val, *len);
8613
674
            if (internal_val == NULL)
8614
8
                goto mem_error;
8615
666
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8616
216
                *(ctxt->space) = 0;
8617
450
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8618
217
                *(ctxt->space) = 1;
8619
233
            else {
8620
233
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8621
233
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8622
233
                              internal_val, NULL);
8623
233
            }
8624
666
        }
8625
9.13k
        if (internal_val) {
8626
5.75k
            xmlFree(internal_val);
8627
5.75k
        }
8628
9.13k
    }
8629
8630
277k
    *value = val;
8631
277k
    return (hname);
8632
8633
22
mem_error:
8634
22
    xmlErrMemory(ctxt);
8635
50.7k
error:
8636
50.7k
    if ((val != NULL) && (*alloc != 0))
8637
6
        xmlFree(val);
8638
50.7k
    return(hname);
8639
22
}
8640
8641
/**
8642
 * Inserts a new attribute into the hash table.
8643
 *
8644
 * @param ctxt  parser context
8645
 * @param size  size of the hash table
8646
 * @param name  attribute name
8647
 * @param uri  namespace uri
8648
 * @param hashValue  combined hash value of name and uri
8649
 * @param aindex  attribute index (this is a multiple of 5)
8650
 * @returns INT_MAX if no existing attribute was found, the attribute
8651
 * index if an attribute was found, -1 if a memory allocation failed.
8652
 */
8653
static int
8654
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8655
336k
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8656
336k
    xmlAttrHashBucket *table = ctxt->attrHash;
8657
336k
    xmlAttrHashBucket *bucket;
8658
336k
    unsigned hindex;
8659
8660
336k
    hindex = hashValue & (size - 1);
8661
336k
    bucket = &table[hindex];
8662
8663
390k
    while (bucket->index >= 0) {
8664
119k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8665
8666
119k
        if (name == atts[0]) {
8667
66.1k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8668
8669
66.1k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8670
66.1k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8671
7.71k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8672
65.2k
                return(bucket->index);
8673
66.1k
        }
8674
8675
54.0k
        hindex++;
8676
54.0k
        bucket++;
8677
54.0k
        if (hindex >= size) {
8678
4.88k
            hindex = 0;
8679
4.88k
            bucket = table;
8680
4.88k
        }
8681
54.0k
    }
8682
8683
271k
    bucket->index = aindex;
8684
8685
271k
    return(INT_MAX);
8686
336k
}
8687
8688
static int
8689
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8690
                       const xmlChar *name, const xmlChar *prefix,
8691
3.38k
                       unsigned hashValue, int aindex) {
8692
3.38k
    xmlAttrHashBucket *table = ctxt->attrHash;
8693
3.38k
    xmlAttrHashBucket *bucket;
8694
3.38k
    unsigned hindex;
8695
8696
3.38k
    hindex = hashValue & (size - 1);
8697
3.38k
    bucket = &table[hindex];
8698
8699
5.41k
    while (bucket->index >= 0) {
8700
3.07k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8701
8702
3.07k
        if ((name == atts[0]) && (prefix == atts[1]))
8703
1.04k
            return(bucket->index);
8704
8705
2.02k
        hindex++;
8706
2.02k
        bucket++;
8707
2.02k
        if (hindex >= size) {
8708
203
            hindex = 0;
8709
203
            bucket = table;
8710
203
        }
8711
2.02k
    }
8712
8713
2.34k
    bucket->index = aindex;
8714
8715
2.34k
    return(INT_MAX);
8716
3.38k
}
8717
/**
8718
 * Parse a start tag. Always consumes '<'.
8719
 *
8720
 * This routine is called when running SAX2 parsing
8721
 *
8722
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8723
 *
8724
 * [ WFC: Unique Att Spec ]
8725
 * No attribute name may appear more than once in the same start-tag or
8726
 * empty-element tag.
8727
 *
8728
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8729
 *
8730
 * [ WFC: Unique Att Spec ]
8731
 * No attribute name may appear more than once in the same start-tag or
8732
 * empty-element tag.
8733
 *
8734
 * With namespace:
8735
 *
8736
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8737
 *
8738
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8739
 *
8740
 * @param ctxt  an XML parser context
8741
 * @param pref  resulting namespace prefix
8742
 * @param URI  resulting namespace URI
8743
 * @param nbNsPtr  resulting number of namespace declarations
8744
 * @returns the element name parsed
8745
 */
8746
8747
static const xmlChar *
8748
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8749
1.10M
                  const xmlChar **URI, int *nbNsPtr) {
8750
1.10M
    xmlHashedString hlocalname;
8751
1.10M
    xmlHashedString hprefix;
8752
1.10M
    xmlHashedString hattname;
8753
1.10M
    xmlHashedString haprefix;
8754
1.10M
    const xmlChar *localname;
8755
1.10M
    const xmlChar *prefix;
8756
1.10M
    const xmlChar *attname;
8757
1.10M
    const xmlChar *aprefix;
8758
1.10M
    const xmlChar *uri;
8759
1.10M
    xmlChar *attvalue = NULL;
8760
1.10M
    const xmlChar **atts = ctxt->atts;
8761
1.10M
    unsigned attrHashSize = 0;
8762
1.10M
    int maxatts = ctxt->maxatts;
8763
1.10M
    int nratts, nbatts, nbdef;
8764
1.10M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8765
1.10M
    int alloc = 0;
8766
1.10M
    int numNsErr = 0;
8767
1.10M
    int numDupErr = 0;
8768
8769
1.10M
    if (RAW != '<') return(NULL);
8770
1.10M
    NEXT1;
8771
8772
1.10M
    nbatts = 0;
8773
1.10M
    nratts = 0;
8774
1.10M
    nbdef = 0;
8775
1.10M
    nbNs = 0;
8776
1.10M
    nbTotalDef = 0;
8777
1.10M
    attval = 0;
8778
8779
1.10M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8780
0
        xmlErrMemory(ctxt);
8781
0
        return(NULL);
8782
0
    }
8783
8784
1.10M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8785
1.10M
    if (hlocalname.name == NULL) {
8786
257k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8787
257k
           "StartTag: invalid element name\n");
8788
257k
        return(NULL);
8789
257k
    }
8790
846k
    localname = hlocalname.name;
8791
846k
    prefix = hprefix.name;
8792
8793
    /*
8794
     * Now parse the attributes, it ends up with the ending
8795
     *
8796
     * (S Attribute)* S?
8797
     */
8798
846k
    SKIP_BLANKS;
8799
846k
    GROW;
8800
8801
    /*
8802
     * The ctxt->atts array will be ultimately passed to the SAX callback
8803
     * containing five xmlChar pointers for each attribute:
8804
     *
8805
     * [0] attribute name
8806
     * [1] attribute prefix
8807
     * [2] namespace URI
8808
     * [3] attribute value
8809
     * [4] end of attribute value
8810
     *
8811
     * To save memory, we reuse this array temporarily and store integers
8812
     * in these pointer variables.
8813
     *
8814
     * [0] attribute name
8815
     * [1] attribute prefix
8816
     * [2] hash value of attribute prefix, and later namespace index
8817
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
8818
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
8819
     *
8820
     * The ctxt->attallocs array contains an additional unsigned int for
8821
     * each attribute, containing the hash value of the attribute name
8822
     * and the alloc flag in bit 31.
8823
     */
8824
8825
1.00M
    while (((RAW != '>') &&
8826
692k
     ((RAW != '/') || (NXT(1) != '>')) &&
8827
619k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8828
599k
  int len = -1;
8829
8830
599k
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
8831
599k
                                          &haprefix, &attvalue, &len,
8832
599k
                                          &alloc);
8833
599k
        if (hattname.name == NULL)
8834
270k
      break;
8835
328k
        if (attvalue == NULL)
8836
50.7k
            goto next_attr;
8837
277k
        attname = hattname.name;
8838
277k
        aprefix = haprefix.name;
8839
277k
  if (len < 0) len = xmlStrlen(attvalue);
8840
8841
277k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8842
44.4k
            xmlHashedString huri;
8843
44.4k
            xmlURIPtr parsedUri;
8844
8845
44.4k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8846
44.4k
            uri = huri.name;
8847
44.4k
            if (uri == NULL) {
8848
12
                xmlErrMemory(ctxt);
8849
12
                goto next_attr;
8850
12
            }
8851
44.4k
            if (*uri != 0) {
8852
43.3k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8853
99
                    xmlErrMemory(ctxt);
8854
99
                    goto next_attr;
8855
99
                }
8856
43.2k
                if (parsedUri == NULL) {
8857
32.8k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8858
32.8k
                             "xmlns: '%s' is not a valid URI\n",
8859
32.8k
                                       uri, NULL, NULL);
8860
32.8k
                } else {
8861
10.4k
                    if (parsedUri->scheme == NULL) {
8862
6.36k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8863
6.36k
                                  "xmlns: URI %s is not absolute\n",
8864
6.36k
                                  uri, NULL, NULL);
8865
6.36k
                    }
8866
10.4k
                    xmlFreeURI(parsedUri);
8867
10.4k
                }
8868
43.2k
                if (uri == ctxt->str_xml_ns) {
8869
201
                    if (attname != ctxt->str_xml) {
8870
201
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8871
201
                     "xml namespace URI cannot be the default namespace\n",
8872
201
                                 NULL, NULL, NULL);
8873
201
                    }
8874
201
                    goto next_attr;
8875
201
                }
8876
43.0k
                if ((len == 29) &&
8877
820
                    (xmlStrEqual(uri,
8878
820
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8879
219
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8880
219
                         "reuse of the xmlns namespace name is forbidden\n",
8881
219
                             NULL, NULL, NULL);
8882
219
                    goto next_attr;
8883
219
                }
8884
43.0k
            }
8885
8886
43.8k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
8887
39.9k
                nbNs++;
8888
233k
        } else if (aprefix == ctxt->str_xmlns) {
8889
40.1k
            xmlHashedString huri;
8890
40.1k
            xmlURIPtr parsedUri;
8891
8892
40.1k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8893
40.1k
            uri = huri.name;
8894
40.1k
            if (uri == NULL) {
8895
6
                xmlErrMemory(ctxt);
8896
6
                goto next_attr;
8897
6
            }
8898
8899
40.1k
            if (attname == ctxt->str_xml) {
8900
604
                if (uri != ctxt->str_xml_ns) {
8901
313
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8902
313
                             "xml namespace prefix mapped to wrong URI\n",
8903
313
                             NULL, NULL, NULL);
8904
313
                }
8905
                /*
8906
                 * Do not keep a namespace definition node
8907
                 */
8908
604
                goto next_attr;
8909
604
            }
8910
39.5k
            if (uri == ctxt->str_xml_ns) {
8911
69
                if (attname != ctxt->str_xml) {
8912
69
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8913
69
                             "xml namespace URI mapped to wrong prefix\n",
8914
69
                             NULL, NULL, NULL);
8915
69
                }
8916
69
                goto next_attr;
8917
69
            }
8918
39.5k
            if (attname == ctxt->str_xmlns) {
8919
249
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8920
249
                         "redefinition of the xmlns prefix is forbidden\n",
8921
249
                         NULL, NULL, NULL);
8922
249
                goto next_attr;
8923
249
            }
8924
39.2k
            if ((len == 29) &&
8925
782
                (xmlStrEqual(uri,
8926
782
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8927
276
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8928
276
                         "reuse of the xmlns namespace name is forbidden\n",
8929
276
                         NULL, NULL, NULL);
8930
276
                goto next_attr;
8931
276
            }
8932
38.9k
            if ((uri == NULL) || (uri[0] == 0)) {
8933
279
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8934
279
                         "xmlns:%s: Empty XML namespace is not allowed\n",
8935
279
                              attname, NULL, NULL);
8936
279
                goto next_attr;
8937
38.7k
            } else {
8938
38.7k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8939
30
                    xmlErrMemory(ctxt);
8940
30
                    goto next_attr;
8941
30
                }
8942
38.6k
                if (parsedUri == NULL) {
8943
7.50k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8944
7.50k
                         "xmlns:%s: '%s' is not a valid URI\n",
8945
7.50k
                                       attname, uri, NULL);
8946
31.1k
                } else {
8947
31.1k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
8948
6.64k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8949
6.64k
                                  "xmlns:%s: URI %s is not absolute\n",
8950
6.64k
                                  attname, uri, NULL);
8951
6.64k
                    }
8952
31.1k
                    xmlFreeURI(parsedUri);
8953
31.1k
                }
8954
38.6k
            }
8955
8956
38.6k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
8957
32.2k
                nbNs++;
8958
193k
        } else {
8959
            /*
8960
             * Populate attributes array, see above for repurposing
8961
             * of xmlChar pointers.
8962
             */
8963
193k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8964
11.3k
                int res = xmlCtxtGrowAttrs(ctxt);
8965
8966
11.3k
                maxatts = ctxt->maxatts;
8967
11.3k
                atts = ctxt->atts;
8968
8969
11.3k
                if (res < 0)
8970
71
                    goto next_attr;
8971
11.3k
            }
8972
193k
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
8973
193k
                                        ((unsigned) alloc << 31);
8974
193k
            atts[nbatts++] = attname;
8975
193k
            atts[nbatts++] = aprefix;
8976
193k
            atts[nbatts++] = XML_INT_TO_PTR(haprefix.hashValue);
8977
193k
            if (alloc) {
8978
37.3k
                atts[nbatts++] = attvalue;
8979
37.3k
                attvalue += len;
8980
37.3k
                atts[nbatts++] = attvalue;
8981
155k
            } else {
8982
                /*
8983
                 * attvalue points into the input buffer which can be
8984
                 * reallocated. Store differences to input->base instead.
8985
                 * The pointers will be reconstructed later.
8986
                 */
8987
155k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8988
155k
                attvalue += len;
8989
155k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8990
155k
            }
8991
            /*
8992
             * tag if some deallocation is needed
8993
             */
8994
193k
            if (alloc != 0) attval = 1;
8995
193k
            attvalue = NULL; /* moved into atts */
8996
193k
        }
8997
8998
328k
next_attr:
8999
328k
        if ((attvalue != NULL) && (alloc != 0)) {
9000
12.4k
            xmlFree(attvalue);
9001
12.4k
            attvalue = NULL;
9002
12.4k
        }
9003
9004
328k
  GROW
9005
328k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9006
85.7k
      break;
9007
242k
  if (SKIP_BLANKS == 0) {
9008
89.3k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9009
89.3k
         "attributes construct error\n");
9010
89.3k
      break;
9011
89.3k
  }
9012
153k
        GROW;
9013
153k
    }
9014
9015
    /*
9016
     * Namespaces from default attributes
9017
     */
9018
846k
    if (ctxt->attsDefault != NULL) {
9019
480k
        xmlDefAttrsPtr defaults;
9020
9021
480k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9022
480k
  if (defaults != NULL) {
9023
1.27M
      for (i = 0; i < defaults->nbAttrs; i++) {
9024
899k
                xmlDefAttr *attr = &defaults->attrs[i];
9025
9026
899k
          attname = attr->name.name;
9027
899k
    aprefix = attr->prefix.name;
9028
9029
899k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9030
6.76k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9031
9032
6.76k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9033
5.47k
                        nbNs++;
9034
892k
    } else if (aprefix == ctxt->str_xmlns) {
9035
591k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9036
9037
591k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9038
591k
                                      NULL, 1) > 0)
9039
571k
                        nbNs++;
9040
591k
    } else {
9041
301k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9042
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9043
0
                                    "Maximum number of attributes exceeded");
9044
0
                        break;
9045
0
                    }
9046
301k
                    nbTotalDef += 1;
9047
301k
                }
9048
899k
      }
9049
377k
  }
9050
480k
    }
9051
9052
    /*
9053
     * Resolve attribute namespaces
9054
     */
9055
1.03M
    for (i = 0; i < nbatts; i += 5) {
9056
193k
        attname = atts[i];
9057
193k
        aprefix = atts[i+1];
9058
9059
        /*
9060
  * The default namespace does not apply to attribute names.
9061
  */
9062
193k
  if (aprefix == NULL) {
9063
115k
            nsIndex = NS_INDEX_EMPTY;
9064
115k
        } else if (aprefix == ctxt->str_xml) {
9065
9.12k
            nsIndex = NS_INDEX_XML;
9066
68.0k
        } else {
9067
68.0k
            haprefix.name = aprefix;
9068
68.0k
            haprefix.hashValue = (size_t) atts[i+2];
9069
68.0k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9070
9071
68.0k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9072
52.3k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9073
52.3k
        "Namespace prefix %s for %s on %s is not defined\n",
9074
52.3k
        aprefix, attname, localname);
9075
52.3k
                nsIndex = NS_INDEX_EMPTY;
9076
52.3k
            }
9077
68.0k
        }
9078
9079
193k
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9080
193k
    }
9081
9082
    /*
9083
     * Maximum number of attributes including default attributes.
9084
     */
9085
846k
    maxAtts = nratts + nbTotalDef;
9086
9087
    /*
9088
     * Verify that attribute names are unique.
9089
     */
9090
846k
    if (maxAtts > 1) {
9091
51.9k
        attrHashSize = 4;
9092
70.9k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9093
19.0k
            attrHashSize *= 2;
9094
9095
51.9k
        if (attrHashSize > ctxt->attrHashMax) {
9096
2.62k
            xmlAttrHashBucket *tmp;
9097
9098
2.62k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9099
2.62k
            if (tmp == NULL) {
9100
17
                xmlErrMemory(ctxt);
9101
17
                goto done;
9102
17
            }
9103
9104
2.60k
            ctxt->attrHash = tmp;
9105
2.60k
            ctxt->attrHashMax = attrHashSize;
9106
2.60k
        }
9107
9108
51.9k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9109
9110
186k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9111
134k
            const xmlChar *nsuri;
9112
134k
            unsigned hashValue, nameHashValue, uriHashValue;
9113
134k
            int res;
9114
9115
134k
            attname = atts[i];
9116
134k
            aprefix = atts[i+1];
9117
134k
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9118
            /* Hash values always have bit 31 set, see dict.c */
9119
134k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9120
9121
134k
            if (nsIndex == NS_INDEX_EMPTY) {
9122
                /*
9123
                 * Prefix with empty namespace means an undeclared
9124
                 * prefix which was already reported above.
9125
                 */
9126
118k
                if (aprefix != NULL)
9127
49.1k
                    continue;
9128
69.7k
                nsuri = NULL;
9129
69.7k
                uriHashValue = URI_HASH_EMPTY;
9130
69.7k
            } else if (nsIndex == NS_INDEX_XML) {
9131
1.77k
                nsuri = ctxt->str_xml_ns;
9132
1.77k
                uriHashValue = URI_HASH_XML;
9133
14.2k
            } else {
9134
14.2k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9135
14.2k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9136
14.2k
            }
9137
9138
85.8k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9139
85.8k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9140
85.8k
                                    hashValue, i);
9141
85.8k
            if (res < 0)
9142
0
                continue;
9143
9144
            /*
9145
             * [ WFC: Unique Att Spec ]
9146
             * No attribute name may appear more than once in the same
9147
             * start-tag or empty-element tag.
9148
             * As extended by the Namespace in XML REC.
9149
             */
9150
85.8k
            if (res < INT_MAX) {
9151
41.2k
                if (aprefix == atts[res+1]) {
9152
39.5k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9153
39.5k
                    numDupErr += 1;
9154
39.5k
                } else {
9155
1.68k
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9156
1.68k
                             "Namespaced Attribute %s in '%s' redefined\n",
9157
1.68k
                             attname, nsuri, NULL);
9158
1.68k
                    numNsErr += 1;
9159
1.68k
                }
9160
41.2k
            }
9161
85.8k
        }
9162
51.9k
    }
9163
9164
    /*
9165
     * Default attributes
9166
     */
9167
846k
    if (ctxt->attsDefault != NULL) {
9168
480k
        xmlDefAttrsPtr defaults;
9169
9170
480k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9171
480k
  if (defaults != NULL) {
9172
1.27M
      for (i = 0; i < defaults->nbAttrs; i++) {
9173
899k
                xmlDefAttr *attr = &defaults->attrs[i];
9174
899k
                const xmlChar *nsuri = NULL;
9175
899k
                unsigned hashValue, uriHashValue = 0;
9176
899k
                int res;
9177
9178
899k
          attname = attr->name.name;
9179
899k
    aprefix = attr->prefix.name;
9180
9181
899k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9182
6.76k
                    continue;
9183
892k
    if (aprefix == ctxt->str_xmlns)
9184
591k
                    continue;
9185
9186
301k
                if (aprefix == NULL) {
9187
258k
                    nsIndex = NS_INDEX_EMPTY;
9188
258k
                    nsuri = NULL;
9189
258k
                    uriHashValue = URI_HASH_EMPTY;
9190
258k
                } else if (aprefix == ctxt->str_xml) {
9191
8.38k
                    nsIndex = NS_INDEX_XML;
9192
8.38k
                    nsuri = ctxt->str_xml_ns;
9193
8.38k
                    uriHashValue = URI_HASH_XML;
9194
34.6k
                } else {
9195
34.6k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9196
34.6k
                    if ((nsIndex == INT_MAX) ||
9197
22.9k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9198
22.9k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9199
22.9k
                                 "Namespace prefix %s for %s on %s is not "
9200
22.9k
                                 "defined\n",
9201
22.9k
                                 aprefix, attname, localname);
9202
22.9k
                        nsIndex = NS_INDEX_EMPTY;
9203
22.9k
                        nsuri = NULL;
9204
22.9k
                        uriHashValue = URI_HASH_EMPTY;
9205
22.9k
                    } else {
9206
11.7k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9207
11.7k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9208
11.7k
                    }
9209
34.6k
                }
9210
9211
                /*
9212
                 * Check whether the attribute exists
9213
                 */
9214
301k
                if (maxAtts > 1) {
9215
250k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9216
250k
                                                   uriHashValue);
9217
250k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9218
250k
                                            hashValue, nbatts);
9219
250k
                    if (res < 0)
9220
0
                        continue;
9221
250k
                    if (res < INT_MAX) {
9222
23.9k
                        if (aprefix == atts[res+1])
9223
23.7k
                            continue;
9224
198
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9225
198
                                 "Namespaced Attribute %s in '%s' redefined\n",
9226
198
                                 attname, nsuri, NULL);
9227
198
                    }
9228
250k
                }
9229
9230
277k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9231
9232
277k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9233
3.54k
                    res = xmlCtxtGrowAttrs(ctxt);
9234
9235
3.54k
                    maxatts = ctxt->maxatts;
9236
3.54k
                    atts = ctxt->atts;
9237
9238
3.54k
                    if (res < 0) {
9239
23
                        localname = NULL;
9240
23
                        goto done;
9241
23
                    }
9242
3.54k
                }
9243
9244
277k
                atts[nbatts++] = attname;
9245
277k
                atts[nbatts++] = aprefix;
9246
277k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9247
277k
                atts[nbatts++] = attr->value.name;
9248
277k
                atts[nbatts++] = attr->valueEnd;
9249
9250
277k
#ifdef LIBXML_VALID_ENABLED
9251
                /*
9252
                 * This should be moved to valid.c, but we don't keep track
9253
                 * whether an attribute was defaulted.
9254
                 */
9255
277k
                if ((ctxt->validate) &&
9256
174k
                    (ctxt->standalone == 1) &&
9257
582
                    (attr->external != 0)) {
9258
369
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9259
369
                            "standalone: attribute %s on %s defaulted "
9260
369
                            "from external subset\n",
9261
369
                            attname, localname);
9262
369
                }
9263
277k
#endif
9264
277k
                nbdef++;
9265
277k
      }
9266
377k
  }
9267
480k
    }
9268
9269
    /*
9270
     * Using a single hash table for nsUri/localName pairs cannot
9271
     * detect duplicate QNames reliably. The following example will
9272
     * only result in two namespace errors.
9273
     *
9274
     * <doc xmlns:a="a" xmlns:b="a">
9275
     *   <elem a:a="" b:a="" b:a=""/>
9276
     * </doc>
9277
     *
9278
     * If we saw more than one namespace error but no duplicate QNames
9279
     * were found, we have to scan for duplicate QNames.
9280
     */
9281
846k
    if ((numDupErr == 0) && (numNsErr > 1)) {
9282
630
        memset(ctxt->attrHash, -1,
9283
630
               attrHashSize * sizeof(ctxt->attrHash[0]));
9284
9285
4.29k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9286
3.66k
            unsigned hashValue, nameHashValue, prefixHashValue;
9287
3.66k
            int res;
9288
9289
3.66k
            aprefix = atts[i+1];
9290
3.66k
            if (aprefix == NULL)
9291
280
                continue;
9292
9293
3.38k
            attname = atts[i];
9294
            /* Hash values always have bit 31 set, see dict.c */
9295
3.38k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9296
3.38k
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9297
9298
3.38k
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9299
3.38k
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9300
3.38k
                                         aprefix, hashValue, i);
9301
3.38k
            if (res < INT_MAX)
9302
1.04k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9303
3.38k
        }
9304
630
    }
9305
9306
    /*
9307
     * Reconstruct attribute pointers
9308
     */
9309
1.31M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9310
        /* namespace URI */
9311
470k
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9312
470k
        if (nsIndex == INT_MAX)
9313
428k
            atts[i+2] = NULL;
9314
42.4k
        else if (nsIndex == INT_MAX - 1)
9315
17.3k
            atts[i+2] = ctxt->str_xml_ns;
9316
25.1k
        else
9317
25.1k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9318
9319
470k
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9320
155k
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9321
155k
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9322
155k
        }
9323
470k
    }
9324
9325
846k
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9326
846k
    if ((prefix != NULL) && (uri == NULL)) {
9327
23.9k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9328
23.9k
           "Namespace prefix %s on %s is not defined\n",
9329
23.9k
     prefix, localname, NULL);
9330
23.9k
    }
9331
846k
    *pref = prefix;
9332
846k
    *URI = uri;
9333
9334
    /*
9335
     * SAX callback
9336
     */
9337
846k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9338
846k
  (!ctxt->disableSAX)) {
9339
558k
  if (nbNs > 0)
9340
156k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9341
156k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9342
156k
        nbatts / 5, nbdef, atts);
9343
401k
  else
9344
401k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9345
401k
                          0, NULL, nbatts / 5, nbdef, atts);
9346
558k
    }
9347
9348
846k
done:
9349
    /*
9350
     * Free allocated attribute values
9351
     */
9352
846k
    if (attval != 0) {
9353
99.9k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9354
63.8k
      if (ctxt->attallocs[j] & 0x80000000)
9355
37.3k
          xmlFree((xmlChar *) atts[i+3]);
9356
36.1k
    }
9357
9358
846k
    *nbNsPtr = nbNs;
9359
846k
    return(localname);
9360
846k
}
9361
9362
/**
9363
 * Parse an end tag. Always consumes '</'.
9364
 *
9365
 *     [42] ETag ::= '</' Name S? '>'
9366
 *
9367
 * With namespace
9368
 *
9369
 *     [NS 9] ETag ::= '</' QName S? '>'
9370
 * @param ctxt  an XML parser context
9371
 * @param tag  the corresponding start tag
9372
 */
9373
9374
static void
9375
82.1k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9376
82.1k
    const xmlChar *name;
9377
9378
82.1k
    GROW;
9379
82.1k
    if ((RAW != '<') || (NXT(1) != '/')) {
9380
2.08k
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9381
2.08k
  return;
9382
2.08k
    }
9383
80.0k
    SKIP(2);
9384
9385
80.0k
    if (tag->prefix == NULL)
9386
72.2k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9387
7.87k
    else
9388
7.87k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9389
9390
    /*
9391
     * We should definitely be at the ending "S? '>'" part
9392
     */
9393
80.0k
    GROW;
9394
80.0k
    SKIP_BLANKS;
9395
80.0k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9396
10.5k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9397
10.5k
    } else
9398
69.5k
  NEXT1;
9399
9400
    /*
9401
     * [ WFC: Element Type Match ]
9402
     * The Name in an element's end-tag must match the element type in the
9403
     * start-tag.
9404
     *
9405
     */
9406
80.0k
    if (name != (xmlChar*)1) {
9407
14.8k
        if (name == NULL) name = BAD_CAST "unparsable";
9408
14.8k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9409
14.8k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9410
14.8k
                    ctxt->name, tag->line, name);
9411
14.8k
    }
9412
9413
    /*
9414
     * SAX: End of Tag
9415
     */
9416
80.0k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9417
80.0k
  (!ctxt->disableSAX))
9418
74.5k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9419
74.5k
                                tag->URI);
9420
9421
80.0k
    spacePop(ctxt);
9422
80.0k
    if (tag->nsNr != 0)
9423
2.76k
  xmlParserNsPop(ctxt, tag->nsNr);
9424
80.0k
}
9425
9426
/**
9427
 * Parse escaped pure raw content. Always consumes '<!['.
9428
 *
9429
 * @deprecated Internal function, don't use.
9430
 *
9431
 *     [18] CDSect ::= CDStart CData CDEnd
9432
 *
9433
 *     [19] CDStart ::= '<![CDATA['
9434
 *
9435
 *     [20] Data ::= (Char* - (Char* ']]>' Char*))
9436
 *
9437
 *     [21] CDEnd ::= ']]>'
9438
 * @param ctxt  an XML parser context
9439
 */
9440
void
9441
33.9k
xmlParseCDSect(xmlParserCtxt *ctxt) {
9442
33.9k
    xmlChar *buf = NULL;
9443
33.9k
    int len = 0;
9444
33.9k
    int size = XML_PARSER_BUFFER_SIZE;
9445
33.9k
    int r, rl;
9446
33.9k
    int s, sl;
9447
33.9k
    int cur, l;
9448
33.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9449
12.8k
                    XML_MAX_HUGE_LENGTH :
9450
33.9k
                    XML_MAX_TEXT_LENGTH;
9451
9452
33.9k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9453
0
        return;
9454
33.9k
    SKIP(3);
9455
9456
33.9k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9457
0
        return;
9458
33.9k
    SKIP(6);
9459
9460
33.9k
    r = xmlCurrentCharRecover(ctxt, &rl);
9461
33.9k
    if (!IS_CHAR(r)) {
9462
776
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9463
776
        goto out;
9464
776
    }
9465
33.2k
    NEXTL(rl);
9466
33.2k
    s = xmlCurrentCharRecover(ctxt, &sl);
9467
33.2k
    if (!IS_CHAR(s)) {
9468
671
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9469
671
        goto out;
9470
671
    }
9471
32.5k
    NEXTL(sl);
9472
32.5k
    cur = xmlCurrentCharRecover(ctxt, &l);
9473
32.5k
    buf = xmlMalloc(size);
9474
32.5k
    if (buf == NULL) {
9475
22
  xmlErrMemory(ctxt);
9476
22
        goto out;
9477
22
    }
9478
2.26M
    while (IS_CHAR(cur) &&
9479
2.24M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9480
2.23M
  if (len + 5 >= size) {
9481
5.96k
      xmlChar *tmp;
9482
5.96k
            int newSize;
9483
9484
5.96k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9485
5.96k
            if (newSize < 0) {
9486
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9487
0
                               "CData section too big found\n");
9488
0
                goto out;
9489
0
            }
9490
5.96k
      tmp = xmlRealloc(buf, newSize);
9491
5.96k
      if (tmp == NULL) {
9492
8
    xmlErrMemory(ctxt);
9493
8
                goto out;
9494
8
      }
9495
5.95k
      buf = tmp;
9496
5.95k
      size = newSize;
9497
5.95k
  }
9498
2.23M
  COPY_BUF(buf, len, r);
9499
2.23M
  r = s;
9500
2.23M
  rl = sl;
9501
2.23M
  s = cur;
9502
2.23M
  sl = l;
9503
2.23M
  NEXTL(l);
9504
2.23M
  cur = xmlCurrentCharRecover(ctxt, &l);
9505
2.23M
    }
9506
32.5k
    buf[len] = 0;
9507
32.5k
    if (cur != '>') {
9508
22.6k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9509
22.6k
                       "CData section not finished\n%.50s\n", buf);
9510
22.6k
        goto out;
9511
22.6k
    }
9512
9.89k
    NEXTL(l);
9513
9514
    /*
9515
     * OK the buffer is to be consumed as cdata.
9516
     */
9517
9.89k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9518
9.30k
        if ((ctxt->sax->cdataBlock != NULL) &&
9519
9.30k
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9520
3.00k
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9521
6.29k
        } else if (ctxt->sax->characters != NULL) {
9522
6.29k
            ctxt->sax->characters(ctxt->userData, buf, len);
9523
6.29k
        }
9524
9.30k
    }
9525
9526
33.9k
out:
9527
33.9k
    xmlFree(buf);
9528
33.9k
}
9529
9530
/**
9531
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9532
 * unexpected EOF to the caller.
9533
 *
9534
 * @param ctxt  an XML parser context
9535
 */
9536
9537
static void
9538
39.2k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9539
39.2k
    int oldNameNr = ctxt->nameNr;
9540
39.2k
    int oldSpaceNr = ctxt->spaceNr;
9541
39.2k
    int oldNodeNr = ctxt->nodeNr;
9542
9543
39.2k
    GROW;
9544
12.3M
    while ((ctxt->input->cur < ctxt->input->end) &&
9545
12.2M
     (PARSER_STOPPED(ctxt) == 0)) {
9546
12.2M
  const xmlChar *cur = ctxt->input->cur;
9547
9548
  /*
9549
   * First case : a Processing Instruction.
9550
   */
9551
12.2M
  if ((*cur == '<') && (cur[1] == '?')) {
9552
18.3k
      xmlParsePI(ctxt);
9553
18.3k
  }
9554
9555
  /*
9556
   * Second case : a CDSection
9557
   */
9558
  /* 2.6.0 test was *cur not RAW */
9559
12.2M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9560
31.5k
      xmlParseCDSect(ctxt);
9561
31.5k
  }
9562
9563
  /*
9564
   * Third case :  a comment
9565
   */
9566
12.2M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9567
132k
     (NXT(2) == '-') && (NXT(3) == '-')) {
9568
82.3k
      xmlParseComment(ctxt);
9569
82.3k
  }
9570
9571
  /*
9572
   * Fourth case :  a sub-element.
9573
   */
9574
12.1M
  else if (*cur == '<') {
9575
1.23M
            if (NXT(1) == '/') {
9576
76.0k
                if (ctxt->nameNr <= oldNameNr)
9577
1.28k
                    break;
9578
74.7k
          xmlParseElementEnd(ctxt);
9579
1.16M
            } else {
9580
1.16M
          xmlParseElementStart(ctxt);
9581
1.16M
            }
9582
1.23M
  }
9583
9584
  /*
9585
   * Fifth case : a reference. If if has not been resolved,
9586
   *    parsing returns it's Name, create the node
9587
   */
9588
9589
10.9M
  else if (*cur == '&') {
9590
604k
      xmlParseReference(ctxt);
9591
604k
  }
9592
9593
  /*
9594
   * Last case, text. Note that References are handled directly.
9595
   */
9596
10.3M
  else {
9597
10.3M
      xmlParseCharDataInternal(ctxt, 0);
9598
10.3M
  }
9599
9600
12.2M
  SHRINK;
9601
12.2M
  GROW;
9602
12.2M
    }
9603
9604
39.2k
    if ((ctxt->nameNr > oldNameNr) &&
9605
12.1k
        (ctxt->input->cur >= ctxt->input->end) &&
9606
10.8k
        (ctxt->wellFormed)) {
9607
568
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9608
568
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9609
568
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9610
568
                "Premature end of data in tag %s line %d\n",
9611
568
                name, line, NULL);
9612
568
    }
9613
9614
    /*
9615
     * Clean up in error case
9616
     */
9617
9618
302k
    while (ctxt->nodeNr > oldNodeNr)
9619
263k
        nodePop(ctxt);
9620
9621
376k
    while (ctxt->nameNr > oldNameNr) {
9622
337k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9623
9624
337k
        if (tag->nsNr != 0)
9625
76.6k
            xmlParserNsPop(ctxt, tag->nsNr);
9626
9627
337k
        namePop(ctxt);
9628
337k
    }
9629
9630
376k
    while (ctxt->spaceNr > oldSpaceNr)
9631
337k
        spacePop(ctxt);
9632
39.2k
}
9633
9634
/**
9635
 * Parse XML element content. This is useful if you're only interested
9636
 * in custom SAX callbacks. If you want a node list, use
9637
 * #xmlCtxtParseContent.
9638
 *
9639
 * @param ctxt  an XML parser context
9640
 */
9641
void
9642
0
xmlParseContent(xmlParserCtxt *ctxt) {
9643
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9644
0
        return;
9645
9646
0
    xmlCtxtInitializeLate(ctxt);
9647
9648
0
    xmlParseContentInternal(ctxt);
9649
9650
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9651
0
}
9652
9653
/**
9654
 * Parse an XML element
9655
 *
9656
 * @deprecated Internal function, don't use.
9657
 *
9658
 *     [39] element ::= EmptyElemTag | STag content ETag
9659
 *
9660
 * [ WFC: Element Type Match ]
9661
 * The Name in an element's end-tag must match the element type in the
9662
 * start-tag.
9663
 *
9664
 * @param ctxt  an XML parser context
9665
 */
9666
9667
void
9668
35.0k
xmlParseElement(xmlParserCtxt *ctxt) {
9669
35.0k
    if (xmlParseElementStart(ctxt) != 0)
9670
13.6k
        return;
9671
9672
21.3k
    xmlParseContentInternal(ctxt);
9673
9674
21.3k
    if (ctxt->input->cur >= ctxt->input->end) {
9675
16.5k
        if (ctxt->wellFormed) {
9676
935
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9677
935
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9678
935
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9679
935
                    "Premature end of data in tag %s line %d\n",
9680
935
                    name, line, NULL);
9681
935
        }
9682
16.5k
        return;
9683
16.5k
    }
9684
9685
4.79k
    xmlParseElementEnd(ctxt);
9686
4.79k
}
9687
9688
/**
9689
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9690
 * opening tag was parsed, 1 if an empty element was parsed.
9691
 *
9692
 * Always consumes '<'.
9693
 *
9694
 * @param ctxt  an XML parser context
9695
 */
9696
static int
9697
1.19M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9698
1.19M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9699
1.19M
    const xmlChar *name;
9700
1.19M
    const xmlChar *prefix = NULL;
9701
1.19M
    const xmlChar *URI = NULL;
9702
1.19M
    xmlParserNodeInfo node_info;
9703
1.19M
    int line;
9704
1.19M
    xmlNodePtr cur;
9705
1.19M
    int nbNs = 0;
9706
9707
1.19M
    if (ctxt->nameNr > maxDepth) {
9708
19
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9709
19
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9710
19
                ctxt->nameNr);
9711
19
  return(-1);
9712
19
    }
9713
9714
    /* Capture start position */
9715
1.19M
    if (ctxt->record_info) {
9716
0
        node_info.begin_pos = ctxt->input->consumed +
9717
0
                          (CUR_PTR - ctxt->input->base);
9718
0
  node_info.begin_line = ctxt->input->line;
9719
0
    }
9720
9721
1.19M
    if (ctxt->spaceNr == 0)
9722
35.0k
  spacePush(ctxt, -1);
9723
1.16M
    else if (*ctxt->space == -2)
9724
356k
  spacePush(ctxt, -1);
9725
806k
    else
9726
806k
  spacePush(ctxt, *ctxt->space);
9727
9728
1.19M
    line = ctxt->input->line;
9729
1.19M
#ifdef LIBXML_SAX1_ENABLED
9730
1.19M
    if (ctxt->sax2)
9731
1.01M
#endif /* LIBXML_SAX1_ENABLED */
9732
1.01M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9733
185k
#ifdef LIBXML_SAX1_ENABLED
9734
185k
    else
9735
185k
  name = xmlParseStartTag(ctxt);
9736
1.19M
#endif /* LIBXML_SAX1_ENABLED */
9737
1.19M
    if (name == NULL) {
9738
281k
  spacePop(ctxt);
9739
281k
        return(-1);
9740
281k
    }
9741
916k
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9742
916k
    cur = ctxt->node;
9743
9744
916k
#ifdef LIBXML_VALID_ENABLED
9745
    /*
9746
     * [ VC: Root Element Type ]
9747
     * The Name in the document type declaration must match the element
9748
     * type of the root element.
9749
     */
9750
916k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9751
29.8k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9752
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9753
916k
#endif /* LIBXML_VALID_ENABLED */
9754
9755
    /*
9756
     * Check for an Empty Element.
9757
     */
9758
916k
    if ((RAW == '/') && (NXT(1) == '>')) {
9759
73.7k
        SKIP(2);
9760
73.7k
  if (ctxt->sax2) {
9761
67.1k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9762
67.1k
    (!ctxt->disableSAX))
9763
62.4k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9764
67.1k
#ifdef LIBXML_SAX1_ENABLED
9765
67.1k
  } else {
9766
6.62k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9767
6.62k
    (!ctxt->disableSAX))
9768
6.26k
    ctxt->sax->endElement(ctxt->userData, name);
9769
6.62k
#endif /* LIBXML_SAX1_ENABLED */
9770
6.62k
  }
9771
73.7k
  namePop(ctxt);
9772
73.7k
  spacePop(ctxt);
9773
73.7k
  if (nbNs > 0)
9774
34.6k
      xmlParserNsPop(ctxt, nbNs);
9775
73.7k
  if (cur != NULL && ctxt->record_info) {
9776
0
            node_info.node = cur;
9777
0
            node_info.end_pos = ctxt->input->consumed +
9778
0
                                (CUR_PTR - ctxt->input->base);
9779
0
            node_info.end_line = ctxt->input->line;
9780
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9781
0
  }
9782
73.7k
  return(1);
9783
73.7k
    }
9784
842k
    if (RAW == '>') {
9785
433k
        NEXT1;
9786
433k
        if (cur != NULL && ctxt->record_info) {
9787
0
            node_info.node = cur;
9788
0
            node_info.end_pos = 0;
9789
0
            node_info.end_line = 0;
9790
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9791
0
        }
9792
433k
    } else {
9793
409k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9794
409k
         "Couldn't find end of Start Tag %s line %d\n",
9795
409k
                    name, line, NULL);
9796
9797
  /*
9798
   * end of parsing of this node.
9799
   */
9800
409k
  nodePop(ctxt);
9801
409k
  namePop(ctxt);
9802
409k
  spacePop(ctxt);
9803
409k
  if (nbNs > 0)
9804
213k
      xmlParserNsPop(ctxt, nbNs);
9805
409k
  return(-1);
9806
409k
    }
9807
9808
433k
    return(0);
9809
842k
}
9810
9811
/**
9812
 * Parse the end of an XML element. Always consumes '</'.
9813
 *
9814
 * @param ctxt  an XML parser context
9815
 */
9816
static void
9817
79.5k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9818
79.5k
    xmlNodePtr cur = ctxt->node;
9819
9820
79.5k
    if (ctxt->nameNr <= 0) {
9821
133
        if ((RAW == '<') && (NXT(1) == '/'))
9822
35
            SKIP(2);
9823
133
        return;
9824
133
    }
9825
9826
    /*
9827
     * parse the end of tag: '</' should be here.
9828
     */
9829
79.4k
    if (ctxt->sax2) {
9830
65.3k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9831
65.3k
  namePop(ctxt);
9832
65.3k
    }
9833
14.1k
#ifdef LIBXML_SAX1_ENABLED
9834
14.1k
    else
9835
14.1k
  xmlParseEndTag1(ctxt, 0);
9836
79.4k
#endif /* LIBXML_SAX1_ENABLED */
9837
9838
    /*
9839
     * Capture end position
9840
     */
9841
79.4k
    if (cur != NULL && ctxt->record_info) {
9842
0
        xmlParserNodeInfoPtr node_info;
9843
9844
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9845
0
        if (node_info != NULL) {
9846
0
            node_info->end_pos = ctxt->input->consumed +
9847
0
                                 (CUR_PTR - ctxt->input->base);
9848
0
            node_info->end_line = ctxt->input->line;
9849
0
        }
9850
0
    }
9851
79.4k
}
9852
9853
/**
9854
 * Parse the XML version value.
9855
 *
9856
 * @deprecated Internal function, don't use.
9857
 *
9858
 *     [26] VersionNum ::= '1.' [0-9]+
9859
 *
9860
 * In practice allow [0-9].[0-9]+ at that level
9861
 *
9862
 * @param ctxt  an XML parser context
9863
 * @returns the string giving the XML version number, or NULL
9864
 */
9865
xmlChar *
9866
20.5k
xmlParseVersionNum(xmlParserCtxt *ctxt) {
9867
20.5k
    xmlChar *buf = NULL;
9868
20.5k
    int len = 0;
9869
20.5k
    int size = 10;
9870
20.5k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
7.37k
                    XML_MAX_TEXT_LENGTH :
9872
20.5k
                    XML_MAX_NAME_LENGTH;
9873
20.5k
    xmlChar cur;
9874
9875
20.5k
    buf = xmlMalloc(size);
9876
20.5k
    if (buf == NULL) {
9877
101
  xmlErrMemory(ctxt);
9878
101
  return(NULL);
9879
101
    }
9880
20.4k
    cur = CUR;
9881
20.4k
    if (!((cur >= '0') && (cur <= '9'))) {
9882
1.04k
  xmlFree(buf);
9883
1.04k
  return(NULL);
9884
1.04k
    }
9885
19.4k
    buf[len++] = cur;
9886
19.4k
    NEXT;
9887
19.4k
    cur=CUR;
9888
19.4k
    if (cur != '.') {
9889
378
  xmlFree(buf);
9890
378
  return(NULL);
9891
378
    }
9892
19.0k
    buf[len++] = cur;
9893
19.0k
    NEXT;
9894
19.0k
    cur=CUR;
9895
3.40M
    while ((cur >= '0') && (cur <= '9')) {
9896
3.38M
  if (len + 1 >= size) {
9897
3.76k
      xmlChar *tmp;
9898
3.76k
            int newSize;
9899
9900
3.76k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9901
3.76k
            if (newSize < 0) {
9902
20
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
9903
20
                xmlFree(buf);
9904
20
                return(NULL);
9905
20
            }
9906
3.74k
      tmp = xmlRealloc(buf, newSize);
9907
3.74k
      if (tmp == NULL) {
9908
6
    xmlErrMemory(ctxt);
9909
6
          xmlFree(buf);
9910
6
    return(NULL);
9911
6
      }
9912
3.73k
      buf = tmp;
9913
3.73k
            size = newSize;
9914
3.73k
  }
9915
3.38M
  buf[len++] = cur;
9916
3.38M
  NEXT;
9917
3.38M
  cur=CUR;
9918
3.38M
    }
9919
19.0k
    buf[len] = 0;
9920
19.0k
    return(buf);
9921
19.0k
}
9922
9923
/**
9924
 * Parse the XML version.
9925
 *
9926
 * @deprecated Internal function, don't use.
9927
 *
9928
 *     [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9929
 *
9930
 *     [25] Eq ::= S? '=' S?
9931
 *
9932
 * @param ctxt  an XML parser context
9933
 * @returns the version string, e.g. "1.0"
9934
 */
9935
9936
xmlChar *
9937
37.0k
xmlParseVersionInfo(xmlParserCtxt *ctxt) {
9938
37.0k
    xmlChar *version = NULL;
9939
9940
37.0k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9941
21.4k
  SKIP(7);
9942
21.4k
  SKIP_BLANKS;
9943
21.4k
  if (RAW != '=') {
9944
624
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9945
624
      return(NULL);
9946
624
        }
9947
20.8k
  NEXT;
9948
20.8k
  SKIP_BLANKS;
9949
20.8k
  if (RAW == '"') {
9950
16.3k
      NEXT;
9951
16.3k
      version = xmlParseVersionNum(ctxt);
9952
16.3k
      if (RAW != '"') {
9953
791
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9954
791
      } else
9955
15.5k
          NEXT;
9956
16.3k
  } else if (RAW == '\''){
9957
4.24k
      NEXT;
9958
4.24k
      version = xmlParseVersionNum(ctxt);
9959
4.24k
      if (RAW != '\'') {
9960
949
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9961
949
      } else
9962
3.29k
          NEXT;
9963
4.24k
  } else {
9964
297
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9965
297
  }
9966
20.8k
    }
9967
36.4k
    return(version);
9968
37.0k
}
9969
9970
/**
9971
 * Parse the XML encoding name
9972
 *
9973
 * @deprecated Internal function, don't use.
9974
 *
9975
 *     [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9976
 *
9977
 * @param ctxt  an XML parser context
9978
 * @returns the encoding name value or NULL
9979
 */
9980
xmlChar *
9981
18.9k
xmlParseEncName(xmlParserCtxt *ctxt) {
9982
18.9k
    xmlChar *buf = NULL;
9983
18.9k
    int len = 0;
9984
18.9k
    int size = 10;
9985
18.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9986
6.53k
                    XML_MAX_TEXT_LENGTH :
9987
18.9k
                    XML_MAX_NAME_LENGTH;
9988
18.9k
    xmlChar cur;
9989
9990
18.9k
    cur = CUR;
9991
18.9k
    if (((cur >= 'a') && (cur <= 'z')) ||
9992
18.4k
        ((cur >= 'A') && (cur <= 'Z'))) {
9993
18.4k
  buf = xmlMalloc(size);
9994
18.4k
  if (buf == NULL) {
9995
70
      xmlErrMemory(ctxt);
9996
70
      return(NULL);
9997
70
  }
9998
9999
18.4k
  buf[len++] = cur;
10000
18.4k
  NEXT;
10001
18.4k
  cur = CUR;
10002
41.5M
  while (((cur >= 'a') && (cur <= 'z')) ||
10003
108k
         ((cur >= 'A') && (cur <= 'Z')) ||
10004
67.5k
         ((cur >= '0') && (cur <= '9')) ||
10005
32.4k
         (cur == '.') || (cur == '_') ||
10006
41.5M
         (cur == '-')) {
10007
41.5M
      if (len + 1 >= size) {
10008
20.8k
          xmlChar *tmp;
10009
20.8k
                int newSize;
10010
10011
20.8k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10012
20.8k
                if (newSize < 0) {
10013
319
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10014
319
                    xmlFree(buf);
10015
319
                    return(NULL);
10016
319
                }
10017
20.4k
    tmp = xmlRealloc(buf, newSize);
10018
20.4k
    if (tmp == NULL) {
10019
23
        xmlErrMemory(ctxt);
10020
23
        xmlFree(buf);
10021
23
        return(NULL);
10022
23
    }
10023
20.4k
    buf = tmp;
10024
20.4k
                size = newSize;
10025
20.4k
      }
10026
41.5M
      buf[len++] = cur;
10027
41.5M
      NEXT;
10028
41.5M
      cur = CUR;
10029
41.5M
        }
10030
18.0k
  buf[len] = 0;
10031
18.0k
    } else {
10032
489
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10033
489
    }
10034
18.5k
    return(buf);
10035
18.9k
}
10036
10037
/**
10038
 * Parse the XML encoding declaration
10039
 *
10040
 * @deprecated Internal function, don't use.
10041
 *
10042
 *     [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | 
10043
 *                           "'" EncName "'")
10044
 *
10045
 * this setups the conversion filters.
10046
 *
10047
 * @param ctxt  an XML parser context
10048
 * @returns the encoding value or NULL
10049
 */
10050
10051
const xmlChar *
10052
36.0k
xmlParseEncodingDecl(xmlParserCtxt *ctxt) {
10053
36.0k
    xmlChar *encoding = NULL;
10054
10055
36.0k
    SKIP_BLANKS;
10056
36.0k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10057
15.6k
        return(NULL);
10058
10059
20.4k
    SKIP(8);
10060
20.4k
    SKIP_BLANKS;
10061
20.4k
    if (RAW != '=') {
10062
877
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10063
877
        return(NULL);
10064
877
    }
10065
19.5k
    NEXT;
10066
19.5k
    SKIP_BLANKS;
10067
19.5k
    if (RAW == '"') {
10068
14.6k
        NEXT;
10069
14.6k
        encoding = xmlParseEncName(ctxt);
10070
14.6k
        if (RAW != '"') {
10071
1.18k
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10072
1.18k
            xmlFree(encoding);
10073
1.18k
            return(NULL);
10074
1.18k
        } else
10075
13.4k
            NEXT;
10076
14.6k
    } else if (RAW == '\''){
10077
4.34k
        NEXT;
10078
4.34k
        encoding = xmlParseEncName(ctxt);
10079
4.34k
        if (RAW != '\'') {
10080
396
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10081
396
            xmlFree(encoding);
10082
396
            return(NULL);
10083
396
        } else
10084
3.95k
            NEXT;
10085
4.34k
    } else {
10086
604
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10087
604
    }
10088
10089
17.9k
    if (encoding == NULL)
10090
616
        return(NULL);
10091
10092
17.3k
    xmlSetDeclaredEncoding(ctxt, encoding);
10093
10094
17.3k
    return(ctxt->encoding);
10095
17.9k
}
10096
10097
/**
10098
 * Parse the XML standalone declaration
10099
 *
10100
 * @deprecated Internal function, don't use.
10101
 *
10102
 *     [32] SDDecl ::= S 'standalone' Eq
10103
 *                     (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10104
 *
10105
 * [ VC: Standalone Document Declaration ]
10106
 * TODO The standalone document declaration must have the value "no"
10107
 * if any external markup declarations contain declarations of:
10108
 *  - attributes with default values, if elements to which these
10109
 *    attributes apply appear in the document without specifications
10110
 *    of values for these attributes, or
10111
 *  - entities (other than amp, lt, gt, apos, quot), if references
10112
 *    to those entities appear in the document, or
10113
 *  - attributes with values subject to normalization, where the
10114
 *    attribute appears in the document with a value which will change
10115
 *    as a result of normalization, or
10116
 *  - element types with element content, if white space occurs directly
10117
 *    within any instance of those types.
10118
 *
10119
 * @param ctxt  an XML parser context
10120
 * @returns
10121
 *   1 if standalone="yes"
10122
 *   0 if standalone="no"
10123
 *  -2 if standalone attribute is missing or invalid
10124
 *    (A standalone value of -2 means that the XML declaration was found,
10125
 *     but no value was specified for the standalone attribute).
10126
 */
10127
10128
int
10129
4.12k
xmlParseSDDecl(xmlParserCtxt *ctxt) {
10130
4.12k
    int standalone = -2;
10131
10132
4.12k
    SKIP_BLANKS;
10133
4.12k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10134
840
  SKIP(10);
10135
840
        SKIP_BLANKS;
10136
840
  if (RAW != '=') {
10137
15
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10138
15
      return(standalone);
10139
15
        }
10140
825
  NEXT;
10141
825
  SKIP_BLANKS;
10142
825
        if (RAW == '\''){
10143
120
      NEXT;
10144
120
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10145
72
          standalone = 0;
10146
72
                SKIP(2);
10147
72
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10148
18
                 (NXT(2) == 's')) {
10149
12
          standalone = 1;
10150
12
    SKIP(3);
10151
36
            } else {
10152
36
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10153
36
      }
10154
120
      if (RAW != '\'') {
10155
49
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10156
49
      } else
10157
71
          NEXT;
10158
705
  } else if (RAW == '"'){
10159
696
      NEXT;
10160
696
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10161
12
          standalone = 0;
10162
12
    SKIP(2);
10163
684
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10164
666
                 (NXT(2) == 's')) {
10165
660
          standalone = 1;
10166
660
                SKIP(3);
10167
660
            } else {
10168
24
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10169
24
      }
10170
696
      if (RAW != '"') {
10171
63
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10172
63
      } else
10173
633
          NEXT;
10174
696
  } else {
10175
9
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10176
9
        }
10177
825
    }
10178
4.10k
    return(standalone);
10179
4.12k
}
10180
10181
/**
10182
 * Parse an XML declaration header
10183
 *
10184
 * @deprecated Internal function, don't use.
10185
 *
10186
 *     [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10187
 * @param ctxt  an XML parser context
10188
 */
10189
10190
void
10191
8.51k
xmlParseXMLDecl(xmlParserCtxt *ctxt) {
10192
8.51k
    xmlChar *version;
10193
10194
    /*
10195
     * This value for standalone indicates that the document has an
10196
     * XML declaration but it does not have a standalone attribute.
10197
     * It will be overwritten later if a standalone attribute is found.
10198
     */
10199
10200
8.51k
    ctxt->standalone = -2;
10201
10202
    /*
10203
     * We know that '<?xml' is here.
10204
     */
10205
8.51k
    SKIP(5);
10206
10207
8.51k
    if (!IS_BLANK_CH(RAW)) {
10208
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10209
0
                 "Blank needed after '<?xml'\n");
10210
0
    }
10211
8.51k
    SKIP_BLANKS;
10212
10213
    /*
10214
     * We must have the VersionInfo here.
10215
     */
10216
8.51k
    version = xmlParseVersionInfo(ctxt);
10217
8.51k
    if (version == NULL) {
10218
3.49k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10219
5.02k
    } else {
10220
5.02k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10221
      /*
10222
       * Changed here for XML-1.0 5th edition
10223
       */
10224
1.72k
      if (ctxt->options & XML_PARSE_OLD10) {
10225
347
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10226
347
                "Unsupported version '%s'\n",
10227
347
                version);
10228
1.38k
      } else {
10229
1.38k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10230
622
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10231
622
                      "Unsupported version '%s'\n",
10232
622
          version, NULL);
10233
759
    } else {
10234
759
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10235
759
              "Unsupported version '%s'\n",
10236
759
              version);
10237
759
    }
10238
1.38k
      }
10239
1.72k
  }
10240
5.02k
  if (ctxt->version != NULL)
10241
0
      xmlFree(ctxt->version);
10242
5.02k
  ctxt->version = version;
10243
5.02k
    }
10244
10245
    /*
10246
     * We may have the encoding declaration
10247
     */
10248
8.51k
    if (!IS_BLANK_CH(RAW)) {
10249
4.64k
        if ((RAW == '?') && (NXT(1) == '>')) {
10250
1.01k
      SKIP(2);
10251
1.01k
      return;
10252
1.01k
  }
10253
3.63k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10254
3.63k
    }
10255
7.50k
    xmlParseEncodingDecl(ctxt);
10256
10257
    /*
10258
     * We may have the standalone status.
10259
     */
10260
7.50k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10261
3.91k
        if ((RAW == '?') && (NXT(1) == '>')) {
10262
3.38k
      SKIP(2);
10263
3.38k
      return;
10264
3.38k
  }
10265
530
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10266
530
    }
10267
10268
    /*
10269
     * We can grow the input buffer freely at that point
10270
     */
10271
4.12k
    GROW;
10272
10273
4.12k
    SKIP_BLANKS;
10274
4.12k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10275
10276
4.12k
    SKIP_BLANKS;
10277
4.12k
    if ((RAW == '?') && (NXT(1) == '>')) {
10278
723
        SKIP(2);
10279
3.39k
    } else if (RAW == '>') {
10280
        /* Deprecated old WD ... */
10281
258
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10282
258
  NEXT;
10283
3.13k
    } else {
10284
3.13k
        int c;
10285
10286
3.13k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10287
4.02M
        while ((PARSER_STOPPED(ctxt) == 0) &&
10288
4.02M
               ((c = CUR) != 0)) {
10289
4.02M
            NEXT;
10290
4.02M
            if (c == '>')
10291
1.59k
                break;
10292
4.02M
        }
10293
3.13k
    }
10294
4.12k
}
10295
10296
/**
10297
 * @since 2.14.0
10298
 *
10299
 * @param ctxt  parser context
10300
 * @returns the version from the XML declaration.
10301
 */
10302
const xmlChar *
10303
0
xmlCtxtGetVersion(xmlParserCtxt *ctxt) {
10304
0
    if (ctxt == NULL)
10305
0
        return(NULL);
10306
10307
0
    return(ctxt->version);
10308
0
}
10309
10310
/**
10311
 * @since 2.14.0
10312
 *
10313
 * @param ctxt  parser context
10314
 * @returns the value from the standalone document declaration.
10315
 */
10316
int
10317
0
xmlCtxtGetStandalone(xmlParserCtxt *ctxt) {
10318
0
    if (ctxt == NULL)
10319
0
        return(0);
10320
10321
0
    return(ctxt->standalone);
10322
0
}
10323
10324
/**
10325
 * Parse an XML Misc* optional field.
10326
 *
10327
 * @deprecated Internal function, don't use.
10328
 *
10329
 *     [27] Misc ::= Comment | PI |  S
10330
 * @param ctxt  an XML parser context
10331
 */
10332
10333
void
10334
120k
xmlParseMisc(xmlParserCtxt *ctxt) {
10335
242k
    while (PARSER_STOPPED(ctxt) == 0) {
10336
227k
        SKIP_BLANKS;
10337
227k
        GROW;
10338
227k
        if ((RAW == '<') && (NXT(1) == '?')) {
10339
30.2k
      xmlParsePI(ctxt);
10340
197k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10341
91.7k
      xmlParseComment(ctxt);
10342
105k
        } else {
10343
105k
            break;
10344
105k
        }
10345
227k
    }
10346
120k
}
10347
10348
static void
10349
69.5k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10350
69.5k
    xmlDocPtr doc;
10351
10352
    /*
10353
     * SAX: end of the document processing.
10354
     */
10355
69.5k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10356
69.5k
        ctxt->sax->endDocument(ctxt->userData);
10357
10358
    /*
10359
     * Remove locally kept entity definitions if the tree was not built
10360
     */
10361
69.5k
    doc = ctxt->myDoc;
10362
69.5k
    if ((doc != NULL) &&
10363
67.6k
        (xmlStrEqual(doc->version, SAX_COMPAT_MODE))) {
10364
519
        xmlFreeDoc(doc);
10365
519
        ctxt->myDoc = NULL;
10366
519
    }
10367
69.5k
}
10368
10369
/**
10370
 * Parse an XML document and invoke the SAX handlers. This is useful
10371
 * if you're only interested in custom SAX callbacks. If you want a
10372
 * document tree, use #xmlCtxtParseDocument.
10373
 *
10374
 * @param ctxt  an XML parser context
10375
 * @returns 0, -1 in case of error.
10376
 */
10377
10378
int
10379
54.3k
xmlParseDocument(xmlParserCtxt *ctxt) {
10380
54.3k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10381
0
        return(-1);
10382
10383
54.3k
    GROW;
10384
10385
    /*
10386
     * SAX: detecting the level.
10387
     */
10388
54.3k
    xmlCtxtInitializeLate(ctxt);
10389
10390
54.3k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10391
54.3k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10392
54.3k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10393
54.3k
    }
10394
10395
54.3k
    xmlDetectEncoding(ctxt);
10396
10397
54.3k
    if (CUR == 0) {
10398
496
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10399
496
  return(-1);
10400
496
    }
10401
10402
53.8k
    GROW;
10403
53.8k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10404
10405
  /*
10406
   * Note that we will switch encoding on the fly.
10407
   */
10408
5.72k
  xmlParseXMLDecl(ctxt);
10409
5.72k
  SKIP_BLANKS;
10410
48.1k
    } else {
10411
48.1k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10412
48.1k
        if (ctxt->version == NULL) {
10413
32
            xmlErrMemory(ctxt);
10414
32
            return(-1);
10415
32
        }
10416
48.1k
    }
10417
53.8k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10418
51.7k
        ctxt->sax->startDocument(ctxt->userData);
10419
53.8k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10420
51.5k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10421
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10422
0
    }
10423
10424
    /*
10425
     * The Misc part of the Prolog
10426
     */
10427
53.8k
    xmlParseMisc(ctxt);
10428
10429
    /*
10430
     * Then possibly doc type declaration(s) and more Misc
10431
     * (doctypedecl Misc*)?
10432
     */
10433
53.8k
    GROW;
10434
53.8k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10435
10436
31.9k
  ctxt->inSubset = 1;
10437
31.9k
  xmlParseDocTypeDecl(ctxt);
10438
31.9k
  if (RAW == '[') {
10439
25.6k
      xmlParseInternalSubset(ctxt);
10440
25.6k
  } else if (RAW == '>') {
10441
4.07k
            NEXT;
10442
4.07k
        }
10443
10444
  /*
10445
   * Create and update the external subset.
10446
   */
10447
31.9k
  ctxt->inSubset = 2;
10448
31.9k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10449
31.9k
      (!ctxt->disableSAX))
10450
25.9k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10451
25.9k
                                ctxt->extSubSystem, ctxt->extSubURI);
10452
31.9k
  ctxt->inSubset = 0;
10453
10454
31.9k
        xmlCleanSpecialAttr(ctxt);
10455
10456
31.9k
  xmlParseMisc(ctxt);
10457
31.9k
    }
10458
10459
    /*
10460
     * Time to start parsing the tree itself
10461
     */
10462
53.8k
    GROW;
10463
53.8k
    if (RAW != '<') {
10464
18.8k
        if (ctxt->wellFormed)
10465
2.74k
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10466
2.74k
                           "Start tag expected, '<' not found\n");
10467
35.0k
    } else {
10468
35.0k
  xmlParseElement(ctxt);
10469
10470
  /*
10471
   * The Misc part at the end
10472
   */
10473
35.0k
  xmlParseMisc(ctxt);
10474
10475
35.0k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10476
35.0k
    }
10477
10478
53.8k
    ctxt->instate = XML_PARSER_EOF;
10479
53.8k
    xmlFinishDocument(ctxt);
10480
10481
53.8k
    if (! ctxt->wellFormed) {
10482
53.3k
  ctxt->valid = 0;
10483
53.3k
  return(-1);
10484
53.3k
    }
10485
10486
513
    return(0);
10487
53.8k
}
10488
10489
/**
10490
 * Parse a general parsed entity
10491
 * An external general parsed entity is well-formed if it matches the
10492
 * production labeled extParsedEnt.
10493
 *
10494
 * @deprecated Internal function, don't use.
10495
 *
10496
 *     [78] extParsedEnt ::= TextDecl? content
10497
 *
10498
 * @param ctxt  an XML parser context
10499
 * @returns 0, -1 in case of error. the parser context is augmented
10500
 *                as a result of the parsing.
10501
 */
10502
10503
int
10504
0
xmlParseExtParsedEnt(xmlParserCtxt *ctxt) {
10505
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10506
0
        return(-1);
10507
10508
0
    xmlCtxtInitializeLate(ctxt);
10509
10510
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10511
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10512
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10513
0
    }
10514
10515
0
    xmlDetectEncoding(ctxt);
10516
10517
0
    if (CUR == 0) {
10518
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10519
0
    }
10520
10521
    /*
10522
     * Check for the XMLDecl in the Prolog.
10523
     */
10524
0
    GROW;
10525
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10526
10527
  /*
10528
   * Note that we will switch encoding on the fly.
10529
   */
10530
0
  xmlParseXMLDecl(ctxt);
10531
0
  SKIP_BLANKS;
10532
0
    } else {
10533
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10534
0
    }
10535
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10536
0
        ctxt->sax->startDocument(ctxt->userData);
10537
10538
    /*
10539
     * Doing validity checking on chunk doesn't make sense
10540
     */
10541
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10542
0
    ctxt->validate = 0;
10543
0
    ctxt->depth = 0;
10544
10545
0
    xmlParseContentInternal(ctxt);
10546
10547
0
    if (ctxt->input->cur < ctxt->input->end)
10548
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10549
10550
    /*
10551
     * SAX: end of the document processing.
10552
     */
10553
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10554
0
        ctxt->sax->endDocument(ctxt->userData);
10555
10556
0
    if (! ctxt->wellFormed) return(-1);
10557
0
    return(0);
10558
0
}
10559
10560
#ifdef LIBXML_PUSH_ENABLED
10561
/************************************************************************
10562
 *                  *
10563
 *    Progressive parsing interfaces        *
10564
 *                  *
10565
 ************************************************************************/
10566
10567
/**
10568
 * Check whether the input buffer contains a character.
10569
 *
10570
 * @param ctxt  an XML parser context
10571
 * @param c  character
10572
 */
10573
static int
10574
62.6k
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10575
62.6k
    const xmlChar *cur;
10576
10577
62.6k
    if (ctxt->checkIndex == 0) {
10578
28.9k
        cur = ctxt->input->cur + 1;
10579
33.7k
    } else {
10580
33.7k
        cur = ctxt->input->cur + ctxt->checkIndex;
10581
33.7k
    }
10582
10583
62.6k
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10584
34.3k
        size_t index = ctxt->input->end - ctxt->input->cur;
10585
10586
34.3k
        if (index > LONG_MAX) {
10587
0
            ctxt->checkIndex = 0;
10588
0
            return(1);
10589
0
        }
10590
34.3k
        ctxt->checkIndex = index;
10591
34.3k
        return(0);
10592
34.3k
    } else {
10593
28.2k
        ctxt->checkIndex = 0;
10594
28.2k
        return(1);
10595
28.2k
    }
10596
62.6k
}
10597
10598
/**
10599
 * Check whether the input buffer contains a string.
10600
 *
10601
 * @param ctxt  an XML parser context
10602
 * @param startDelta  delta to apply at the start
10603
 * @param str  string
10604
 * @param strLen  length of string
10605
 */
10606
static const xmlChar *
10607
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10608
344k
                     const char *str, size_t strLen) {
10609
344k
    const xmlChar *cur, *term;
10610
10611
344k
    if (ctxt->checkIndex == 0) {
10612
105k
        cur = ctxt->input->cur + startDelta;
10613
238k
    } else {
10614
238k
        cur = ctxt->input->cur + ctxt->checkIndex;
10615
238k
    }
10616
10617
344k
    term = BAD_CAST strstr((const char *) cur, str);
10618
344k
    if (term == NULL) {
10619
240k
        const xmlChar *end = ctxt->input->end;
10620
240k
        size_t index;
10621
10622
        /* Rescan (strLen - 1) characters. */
10623
240k
        if ((size_t) (end - cur) < strLen)
10624
2.38k
            end = cur;
10625
238k
        else
10626
238k
            end -= strLen - 1;
10627
240k
        index = end - ctxt->input->cur;
10628
240k
        if (index > LONG_MAX) {
10629
0
            ctxt->checkIndex = 0;
10630
0
            return(ctxt->input->end - strLen);
10631
0
        }
10632
240k
        ctxt->checkIndex = index;
10633
240k
    } else {
10634
103k
        ctxt->checkIndex = 0;
10635
103k
    }
10636
10637
344k
    return(term);
10638
344k
}
10639
10640
/**
10641
 * Check whether the input buffer contains terminated char data.
10642
 *
10643
 * @param ctxt  an XML parser context
10644
 */
10645
static int
10646
61.2k
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10647
61.2k
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10648
61.2k
    const xmlChar *end = ctxt->input->end;
10649
61.2k
    size_t index;
10650
10651
2.25M
    while (cur < end) {
10652
2.23M
        if ((*cur == '<') || (*cur == '&')) {
10653
44.2k
            ctxt->checkIndex = 0;
10654
44.2k
            return(1);
10655
44.2k
        }
10656
2.19M
        cur++;
10657
2.19M
    }
10658
10659
16.9k
    index = cur - ctxt->input->cur;
10660
16.9k
    if (index > LONG_MAX) {
10661
0
        ctxt->checkIndex = 0;
10662
0
        return(1);
10663
0
    }
10664
16.9k
    ctxt->checkIndex = index;
10665
16.9k
    return(0);
10666
16.9k
}
10667
10668
/**
10669
 * Check whether there's enough data in the input buffer to finish parsing
10670
 * a start tag. This has to take quotes into account.
10671
 *
10672
 * @param ctxt  an XML parser context
10673
 */
10674
static int
10675
1.16M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10676
1.16M
    const xmlChar *cur;
10677
1.16M
    const xmlChar *end = ctxt->input->end;
10678
1.16M
    int state = ctxt->endCheckState;
10679
1.16M
    size_t index;
10680
10681
1.16M
    if (ctxt->checkIndex == 0)
10682
119k
        cur = ctxt->input->cur + 1;
10683
1.04M
    else
10684
1.04M
        cur = ctxt->input->cur + ctxt->checkIndex;
10685
10686
240M
    while (cur < end) {
10687
239M
        if (state) {
10688
207M
            if (*cur == state)
10689
78.9k
                state = 0;
10690
207M
        } else if (*cur == '\'' || *cur == '"') {
10691
81.3k
            state = *cur;
10692
31.5M
        } else if (*cur == '>') {
10693
109k
            ctxt->checkIndex = 0;
10694
109k
            ctxt->endCheckState = 0;
10695
109k
            return(1);
10696
109k
        }
10697
239M
        cur++;
10698
239M
    }
10699
10700
1.05M
    index = cur - ctxt->input->cur;
10701
1.05M
    if (index > LONG_MAX) {
10702
0
        ctxt->checkIndex = 0;
10703
0
        ctxt->endCheckState = 0;
10704
0
        return(1);
10705
0
    }
10706
1.05M
    ctxt->checkIndex = index;
10707
1.05M
    ctxt->endCheckState = state;
10708
1.05M
    return(0);
10709
1.05M
}
10710
10711
/**
10712
 * Check whether there's enough data in the input buffer to finish parsing
10713
 * the internal subset.
10714
 *
10715
 * @param ctxt  an XML parser context
10716
 */
10717
static int
10718
801k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10719
    /*
10720
     * Sorry, but progressive parsing of the internal subset is not
10721
     * supported. We first check that the full content of the internal
10722
     * subset is available and parsing is launched only at that point.
10723
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10724
     * not in a ']]>' sequence which are conditional sections.
10725
     */
10726
801k
    const xmlChar *cur, *start;
10727
801k
    const xmlChar *end = ctxt->input->end;
10728
801k
    int state = ctxt->endCheckState;
10729
801k
    size_t index;
10730
10731
801k
    if (ctxt->checkIndex == 0) {
10732
9.92k
        cur = ctxt->input->cur + 1;
10733
791k
    } else {
10734
791k
        cur = ctxt->input->cur + ctxt->checkIndex;
10735
791k
    }
10736
801k
    start = cur;
10737
10738
314M
    while (cur < end) {
10739
313M
        if (state == '-') {
10740
1.00M
            if ((*cur == '-') &&
10741
1.57k
                (cur[1] == '-') &&
10742
1.01k
                (cur[2] == '>')) {
10743
493
                state = 0;
10744
493
                cur += 3;
10745
493
                start = cur;
10746
493
                continue;
10747
493
            }
10748
1.00M
        }
10749
312M
        else if (state == ']') {
10750
7.95k
            if (*cur == '>') {
10751
5.57k
                ctxt->checkIndex = 0;
10752
5.57k
                ctxt->endCheckState = 0;
10753
5.57k
                return(1);
10754
5.57k
            }
10755
2.38k
            if (IS_BLANK_CH(*cur)) {
10756
1.28k
                state = ' ';
10757
1.28k
            } else if (*cur != ']') {
10758
550
                state = 0;
10759
550
                start = cur;
10760
550
                continue;
10761
550
            }
10762
2.38k
        }
10763
312M
        else if (state == ' ') {
10764
2.06k
            if (*cur == '>') {
10765
158
                ctxt->checkIndex = 0;
10766
158
                ctxt->endCheckState = 0;
10767
158
                return(1);
10768
158
            }
10769
1.90k
            if (!IS_BLANK_CH(*cur)) {
10770
1.12k
                state = 0;
10771
1.12k
                start = cur;
10772
1.12k
                continue;
10773
1.12k
            }
10774
1.90k
        }
10775
312M
        else if (state != 0) {
10776
304M
            if (*cur == state) {
10777
33.7k
                state = 0;
10778
33.7k
                start = cur + 1;
10779
33.7k
            }
10780
304M
        }
10781
7.72M
        else if (*cur == '<') {
10782
50.2k
            if ((cur[1] == '!') &&
10783
22.5k
                (cur[2] == '-') &&
10784
784
                (cur[3] == '-')) {
10785
538
                state = '-';
10786
538
                cur += 4;
10787
                /* Don't treat <!--> as comment */
10788
538
                start = cur;
10789
538
                continue;
10790
538
            }
10791
50.2k
        }
10792
7.67M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10793
41.6k
            state = *cur;
10794
41.6k
        }
10795
10796
313M
        cur++;
10797
313M
    }
10798
10799
    /*
10800
     * Rescan the three last characters to detect "<!--" and "-->"
10801
     * split across chunks.
10802
     */
10803
795k
    if ((state == 0) || (state == '-')) {
10804
17.0k
        if (cur - start < 3)
10805
888
            cur = start;
10806
16.1k
        else
10807
16.1k
            cur -= 3;
10808
17.0k
    }
10809
795k
    index = cur - ctxt->input->cur;
10810
795k
    if (index > LONG_MAX) {
10811
0
        ctxt->checkIndex = 0;
10812
0
        ctxt->endCheckState = 0;
10813
0
        return(1);
10814
0
    }
10815
795k
    ctxt->checkIndex = index;
10816
795k
    ctxt->endCheckState = state;
10817
795k
    return(0);
10818
795k
}
10819
10820
/**
10821
 * Try to progress on parsing
10822
 *
10823
 * @param ctxt  an XML parser context
10824
 * @param terminate  last chunk indicator
10825
 * @returns zero if no parsing was possible
10826
 */
10827
static int
10828
2.51M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10829
2.51M
    int ret = 0;
10830
2.51M
    size_t avail;
10831
2.51M
    xmlChar cur, next;
10832
10833
2.51M
    if (ctxt->input == NULL)
10834
0
        return(0);
10835
10836
2.51M
    if ((ctxt->input != NULL) &&
10837
2.51M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
10838
1.97k
        xmlParserShrink(ctxt);
10839
1.97k
    }
10840
10841
3.14M
    while (ctxt->disableSAX == 0) {
10842
3.13M
        avail = ctxt->input->end - ctxt->input->cur;
10843
3.13M
        if (avail < 1)
10844
20.3k
      goto done;
10845
3.11M
        switch (ctxt->instate) {
10846
329k
            case XML_PARSER_EOF:
10847
          /*
10848
     * Document parsing is done !
10849
     */
10850
329k
          goto done;
10851
27.9k
            case XML_PARSER_START:
10852
                /*
10853
                 * Very first chars read from the document flow.
10854
                 */
10855
27.9k
                if ((!terminate) && (avail < 4))
10856
712
                    goto done;
10857
10858
                /*
10859
                 * We need more bytes to detect EBCDIC code pages.
10860
                 * See xmlDetectEBCDIC.
10861
                 */
10862
27.1k
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
10863
678
                    (!terminate) && (avail < 200))
10864
340
                    goto done;
10865
10866
26.8k
                xmlDetectEncoding(ctxt);
10867
26.8k
                ctxt->instate = XML_PARSER_XML_DECL;
10868
26.8k
    break;
10869
10870
129k
            case XML_PARSER_XML_DECL:
10871
129k
    if ((!terminate) && (avail < 2))
10872
24
        goto done;
10873
129k
    cur = ctxt->input->cur[0];
10874
129k
    next = ctxt->input->cur[1];
10875
129k
          if ((cur == '<') && (next == '?')) {
10876
        /* PI or XML decl */
10877
107k
        if ((!terminate) &&
10878
105k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
10879
102k
      goto done;
10880
4.24k
        if ((ctxt->input->cur[2] == 'x') &&
10881
3.64k
      (ctxt->input->cur[3] == 'm') &&
10882
3.41k
      (ctxt->input->cur[4] == 'l') &&
10883
2.91k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
10884
2.79k
      ret += 5;
10885
2.79k
      xmlParseXMLDecl(ctxt);
10886
2.79k
        } else {
10887
1.45k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10888
1.45k
                        if (ctxt->version == NULL) {
10889
8
                            xmlErrMemory(ctxt);
10890
8
                            break;
10891
8
                        }
10892
1.45k
        }
10893
22.5k
    } else {
10894
22.5k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10895
22.5k
        if (ctxt->version == NULL) {
10896
46
            xmlErrMemory(ctxt);
10897
46
      break;
10898
46
        }
10899
22.5k
    }
10900
26.6k
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10901
26.6k
                    ctxt->sax->setDocumentLocator(ctxt->userData,
10902
26.6k
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
10903
26.6k
                }
10904
26.6k
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10905
26.6k
                    (!ctxt->disableSAX))
10906
25.9k
                    ctxt->sax->startDocument(ctxt->userData);
10907
26.6k
                ctxt->instate = XML_PARSER_MISC;
10908
26.6k
    break;
10909
933k
            case XML_PARSER_START_TAG: {
10910
933k
          const xmlChar *name;
10911
933k
    const xmlChar *prefix = NULL;
10912
933k
    const xmlChar *URI = NULL;
10913
933k
                int line = ctxt->input->line;
10914
933k
    int nbNs = 0;
10915
10916
933k
    if ((!terminate) && (avail < 2))
10917
120
        goto done;
10918
933k
    cur = ctxt->input->cur[0];
10919
933k
          if (cur != '<') {
10920
2.66k
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10921
2.66k
                                   "Start tag expected, '<' not found");
10922
2.66k
                    ctxt->instate = XML_PARSER_EOF;
10923
2.66k
                    xmlFinishDocument(ctxt);
10924
2.66k
        goto done;
10925
2.66k
    }
10926
931k
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
10927
792k
                    goto done;
10928
138k
    if (ctxt->spaceNr == 0)
10929
0
        spacePush(ctxt, -1);
10930
138k
    else if (*ctxt->space == -2)
10931
14.3k
        spacePush(ctxt, -1);
10932
124k
    else
10933
124k
        spacePush(ctxt, *ctxt->space);
10934
138k
#ifdef LIBXML_SAX1_ENABLED
10935
138k
    if (ctxt->sax2)
10936
92.6k
#endif /* LIBXML_SAX1_ENABLED */
10937
92.6k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10938
45.8k
#ifdef LIBXML_SAX1_ENABLED
10939
45.8k
    else
10940
45.8k
        name = xmlParseStartTag(ctxt);
10941
138k
#endif /* LIBXML_SAX1_ENABLED */
10942
138k
    if (name == NULL) {
10943
3.13k
        spacePop(ctxt);
10944
3.13k
                    ctxt->instate = XML_PARSER_EOF;
10945
3.13k
                    xmlFinishDocument(ctxt);
10946
3.13k
        goto done;
10947
3.13k
    }
10948
135k
#ifdef LIBXML_VALID_ENABLED
10949
    /*
10950
     * [ VC: Root Element Type ]
10951
     * The Name in the document type declaration must match
10952
     * the element type of the root element.
10953
     */
10954
135k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10955
28.9k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10956
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10957
135k
#endif /* LIBXML_VALID_ENABLED */
10958
10959
    /*
10960
     * Check for an Empty Element.
10961
     */
10962
135k
    if ((RAW == '/') && (NXT(1) == '>')) {
10963
16.5k
        SKIP(2);
10964
10965
16.5k
        if (ctxt->sax2) {
10966
14.8k
      if ((ctxt->sax != NULL) &&
10967
14.8k
          (ctxt->sax->endElementNs != NULL) &&
10968
14.8k
          (!ctxt->disableSAX))
10969
14.8k
          ctxt->sax->endElementNs(ctxt->userData, name,
10970
14.8k
                                  prefix, URI);
10971
14.8k
      if (nbNs > 0)
10972
11.5k
          xmlParserNsPop(ctxt, nbNs);
10973
14.8k
#ifdef LIBXML_SAX1_ENABLED
10974
14.8k
        } else {
10975
1.71k
      if ((ctxt->sax != NULL) &&
10976
1.71k
          (ctxt->sax->endElement != NULL) &&
10977
1.71k
          (!ctxt->disableSAX))
10978
1.69k
          ctxt->sax->endElement(ctxt->userData, name);
10979
1.71k
#endif /* LIBXML_SAX1_ENABLED */
10980
1.71k
        }
10981
16.5k
        spacePop(ctxt);
10982
118k
    } else if (RAW == '>') {
10983
88.7k
        NEXT;
10984
88.7k
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
10985
88.7k
    } else {
10986
30.0k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10987
30.0k
           "Couldn't find end of Start Tag %s\n",
10988
30.0k
           name);
10989
30.0k
        nodePop(ctxt);
10990
30.0k
        spacePop(ctxt);
10991
30.0k
                    if (nbNs > 0)
10992
5.25k
                        xmlParserNsPop(ctxt, nbNs);
10993
30.0k
    }
10994
10995
135k
                if (ctxt->nameNr == 0)
10996
5.37k
                    ctxt->instate = XML_PARSER_EPILOG;
10997
129k
                else
10998
129k
                    ctxt->instate = XML_PARSER_CONTENT;
10999
135k
                break;
11000
138k
      }
11001
437k
            case XML_PARSER_CONTENT: {
11002
437k
    cur = ctxt->input->cur[0];
11003
11004
437k
    if (cur == '<') {
11005
282k
                    if ((!terminate) && (avail < 2))
11006
1.26k
                        goto done;
11007
281k
        next = ctxt->input->cur[1];
11008
11009
281k
                    if (next == '/') {
11010
20.4k
                        ctxt->instate = XML_PARSER_END_TAG;
11011
20.4k
                        break;
11012
260k
                    } else if (next == '?') {
11013
17.4k
                        if ((!terminate) &&
11014
16.3k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11015
10.7k
                            goto done;
11016
6.69k
                        xmlParsePI(ctxt);
11017
6.69k
                        ctxt->instate = XML_PARSER_CONTENT;
11018
6.69k
                        break;
11019
243k
                    } else if (next == '!') {
11020
121k
                        if ((!terminate) && (avail < 3))
11021
354
                            goto done;
11022
121k
                        next = ctxt->input->cur[2];
11023
11024
121k
                        if (next == '-') {
11025
47.8k
                            if ((!terminate) && (avail < 4))
11026
355
                                goto done;
11027
47.5k
                            if (ctxt->input->cur[3] == '-') {
11028
47.5k
                                if ((!terminate) &&
11029
46.9k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11030
12.2k
                                    goto done;
11031
35.2k
                                xmlParseComment(ctxt);
11032
35.2k
                                ctxt->instate = XML_PARSER_CONTENT;
11033
35.2k
                                break;
11034
47.5k
                            }
11035
73.3k
                        } else if (next == '[') {
11036
72.7k
                            if ((!terminate) && (avail < 9))
11037
121
                                goto done;
11038
72.6k
                            if ((ctxt->input->cur[2] == '[') &&
11039
72.6k
                                (ctxt->input->cur[3] == 'C') &&
11040
72.6k
                                (ctxt->input->cur[4] == 'D') &&
11041
72.6k
                                (ctxt->input->cur[5] == 'A') &&
11042
72.6k
                                (ctxt->input->cur[6] == 'T') &&
11043
72.5k
                                (ctxt->input->cur[7] == 'A') &&
11044
72.5k
                                (ctxt->input->cur[8] == '[')) {
11045
72.5k
                                if ((!terminate) &&
11046
71.6k
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11047
70.0k
                                    goto done;
11048
2.43k
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11049
2.43k
                                xmlParseCDSect(ctxt);
11050
2.43k
                                ctxt->instate = XML_PARSER_CONTENT;
11051
2.43k
                                break;
11052
72.5k
                            }
11053
72.6k
                        }
11054
121k
                    }
11055
281k
    } else if (cur == '&') {
11056
24.5k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11057
10.4k
      goto done;
11058
14.1k
        xmlParseReference(ctxt);
11059
14.1k
                    break;
11060
130k
    } else {
11061
        /* TODO Avoid the extra copy, handle directly !!! */
11062
        /*
11063
         * Goal of the following test is:
11064
         *  - minimize calls to the SAX 'character' callback
11065
         *    when they are mergeable
11066
         *  - handle an problem for isBlank when we only parse
11067
         *    a sequence of blank chars and the next one is
11068
         *    not available to check against '<' presence.
11069
         *  - tries to homogenize the differences in SAX
11070
         *    callbacks between the push and pull versions
11071
         *    of the parser.
11072
         */
11073
130k
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11074
72.2k
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11075
16.9k
          goto done;
11076
72.2k
                    }
11077
113k
                    ctxt->checkIndex = 0;
11078
113k
        xmlParseCharDataInternal(ctxt, !terminate);
11079
113k
                    break;
11080
130k
    }
11081
11082
122k
                ctxt->instate = XML_PARSER_START_TAG;
11083
122k
    break;
11084
437k
      }
11085
44.4k
            case XML_PARSER_END_TAG:
11086
44.4k
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11087
23.9k
        goto done;
11088
20.4k
    if (ctxt->sax2) {
11089
16.8k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11090
16.8k
        nameNsPop(ctxt);
11091
16.8k
    }
11092
3.67k
#ifdef LIBXML_SAX1_ENABLED
11093
3.67k
      else
11094
3.67k
        xmlParseEndTag1(ctxt, 0);
11095
20.4k
#endif /* LIBXML_SAX1_ENABLED */
11096
20.4k
    if (ctxt->nameNr == 0) {
11097
476
        ctxt->instate = XML_PARSER_EPILOG;
11098
20.0k
    } else {
11099
20.0k
        ctxt->instate = XML_PARSER_CONTENT;
11100
20.0k
    }
11101
20.4k
    break;
11102
378k
            case XML_PARSER_MISC:
11103
402k
            case XML_PARSER_PROLOG:
11104
404k
            case XML_PARSER_EPILOG:
11105
404k
    SKIP_BLANKS;
11106
404k
                avail = ctxt->input->end - ctxt->input->cur;
11107
404k
    if (avail < 1)
11108
441
        goto done;
11109
403k
    if (ctxt->input->cur[0] == '<') {
11110
400k
                    if ((!terminate) && (avail < 2))
11111
540
                        goto done;
11112
400k
                    next = ctxt->input->cur[1];
11113
400k
                    if (next == '?') {
11114
37.4k
                        if ((!terminate) &&
11115
36.1k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11116
22.4k
                            goto done;
11117
15.0k
                        xmlParsePI(ctxt);
11118
15.0k
                        break;
11119
362k
                    } else if (next == '!') {
11120
346k
                        if ((!terminate) && (avail < 3))
11121
445
                            goto done;
11122
11123
345k
                        if (ctxt->input->cur[2] == '-') {
11124
68.4k
                            if ((!terminate) && (avail < 4))
11125
425
                                goto done;
11126
67.9k
                            if (ctxt->input->cur[3] == '-') {
11127
67.9k
                                if ((!terminate) &&
11128
67.3k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11129
22.1k
                                    goto done;
11130
45.7k
                                xmlParseComment(ctxt);
11131
45.7k
                                break;
11132
67.9k
                            }
11133
277k
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11134
277k
                            if ((!terminate) && (avail < 9))
11135
43
                                goto done;
11136
277k
                            if ((ctxt->input->cur[2] == 'D') &&
11137
277k
                                (ctxt->input->cur[3] == 'O') &&
11138
277k
                                (ctxt->input->cur[4] == 'C') &&
11139
277k
                                (ctxt->input->cur[5] == 'T') &&
11140
277k
                                (ctxt->input->cur[6] == 'Y') &&
11141
277k
                                (ctxt->input->cur[7] == 'P') &&
11142
277k
                                (ctxt->input->cur[8] == 'E')) {
11143
277k
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11144
261k
                                    goto done;
11145
15.6k
                                ctxt->inSubset = 1;
11146
15.6k
                                xmlParseDocTypeDecl(ctxt);
11147
15.6k
                                if (RAW == '[') {
11148
12.5k
                                    ctxt->instate = XML_PARSER_DTD;
11149
12.5k
                                } else {
11150
3.09k
                                    if (RAW == '>')
11151
2.03k
                                        NEXT;
11152
                                    /*
11153
                                     * Create and update the external subset.
11154
                                     */
11155
3.09k
                                    ctxt->inSubset = 2;
11156
3.09k
                                    if ((ctxt->sax != NULL) &&
11157
3.09k
                                        (!ctxt->disableSAX) &&
11158
2.94k
                                        (ctxt->sax->externalSubset != NULL))
11159
2.94k
                                        ctxt->sax->externalSubset(
11160
2.94k
                                                ctxt->userData,
11161
2.94k
                                                ctxt->intSubName,
11162
2.94k
                                                ctxt->extSubSystem,
11163
2.94k
                                                ctxt->extSubURI);
11164
3.09k
                                    ctxt->inSubset = 0;
11165
3.09k
                                    xmlCleanSpecialAttr(ctxt);
11166
3.09k
                                    ctxt->instate = XML_PARSER_PROLOG;
11167
3.09k
                                }
11168
15.6k
                                break;
11169
277k
                            }
11170
277k
                        }
11171
345k
                    }
11172
400k
                }
11173
11174
19.7k
                if (ctxt->instate == XML_PARSER_EPILOG) {
11175
886
                    if (ctxt->errNo == XML_ERR_OK)
11176
22
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11177
886
        ctxt->instate = XML_PARSER_EOF;
11178
886
                    xmlFinishDocument(ctxt);
11179
18.8k
                } else {
11180
18.8k
        ctxt->instate = XML_PARSER_START_TAG;
11181
18.8k
    }
11182
19.7k
    break;
11183
807k
            case XML_PARSER_DTD: {
11184
807k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11185
795k
                    goto done;
11186
11.8k
    xmlParseInternalSubset(ctxt);
11187
11.8k
    ctxt->inSubset = 2;
11188
11.8k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11189
9.96k
        (ctxt->sax->externalSubset != NULL))
11190
9.96k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11191
9.96k
          ctxt->extSubSystem, ctxt->extSubURI);
11192
11.8k
    ctxt->inSubset = 0;
11193
11.8k
    xmlCleanSpecialAttr(ctxt);
11194
11.8k
    ctxt->instate = XML_PARSER_PROLOG;
11195
11.8k
                break;
11196
807k
      }
11197
0
            default:
11198
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11199
0
      "PP: internal error\n");
11200
0
    ctxt->instate = XML_PARSER_EOF;
11201
0
    break;
11202
3.11M
  }
11203
3.11M
    }
11204
2.51M
done:
11205
2.51M
    return(ret);
11206
2.51M
}
11207
11208
/**
11209
 * Parse a chunk of memory in push parser mode.
11210
 *
11211
 * Assumes that the parser context was initialized with
11212
 * #xmlCreatePushParserCtxt.
11213
 *
11214
 * The last chunk, which will often be empty, must be marked with
11215
 * the `terminate` flag. With the default SAX callbacks, the resulting
11216
 * document will be available in ctxt->myDoc. This pointer will not
11217
 * be freed when calling #xmlFreeParserCtxt and must be freed by the
11218
 * caller. If the document isn't well-formed, it will still be returned
11219
 * in ctxt->myDoc.
11220
 *
11221
 * As an exception, #xmlCtxtResetPush will free the document in
11222
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11223
 * the document.
11224
 *
11225
 * Since 2.14.0, #xmlCtxtGetDocument can be used to retrieve the
11226
 * result document.
11227
 *
11228
 * @param ctxt  an XML parser context
11229
 * @param chunk  chunk of memory
11230
 * @param size  size of chunk in bytes
11231
 * @param terminate  last chunk indicator
11232
 * @returns an xmlParserErrors code (0 on success).
11233
 */
11234
int
11235
xmlParseChunk(xmlParserCtxt *ctxt, const char *chunk, int size,
11236
3.15M
              int terminate) {
11237
3.15M
    size_t curBase;
11238
3.15M
    size_t maxLength;
11239
3.15M
    size_t pos;
11240
3.15M
    int end_in_lf = 0;
11241
3.15M
    int res;
11242
11243
3.15M
    if ((ctxt == NULL) || (size < 0))
11244
0
        return(XML_ERR_ARGUMENT);
11245
3.15M
    if ((chunk == NULL) && (size > 0))
11246
0
        return(XML_ERR_ARGUMENT);
11247
3.15M
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11248
0
        return(XML_ERR_ARGUMENT);
11249
3.15M
    if (ctxt->disableSAX != 0)
11250
643k
        return(ctxt->errNo);
11251
11252
2.51M
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11253
2.51M
    if (ctxt->instate == XML_PARSER_START)
11254
28.2k
        xmlCtxtInitializeLate(ctxt);
11255
2.51M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11256
2.49M
        (chunk[size - 1] == '\r')) {
11257
652
  end_in_lf = 1;
11258
652
  size--;
11259
652
    }
11260
11261
    /*
11262
     * Also push an empty chunk to make sure that the raw buffer
11263
     * will be flushed if there is an encoder.
11264
     */
11265
2.51M
    pos = ctxt->input->cur - ctxt->input->base;
11266
2.51M
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11267
2.51M
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11268
2.51M
    if (res < 0) {
11269
341
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11270
341
        return(ctxt->errNo);
11271
341
    }
11272
11273
2.51M
    xmlParseTryOrFinish(ctxt, terminate);
11274
11275
2.51M
    curBase = ctxt->input->cur - ctxt->input->base;
11276
2.51M
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11277
701k
                XML_MAX_HUGE_LENGTH :
11278
2.51M
                XML_MAX_LOOKUP_LIMIT;
11279
2.51M
    if (curBase > maxLength) {
11280
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11281
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11282
0
    }
11283
11284
2.51M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11285
12.1k
        return(ctxt->errNo);
11286
11287
2.50M
    if (end_in_lf == 1) {
11288
620
  pos = ctxt->input->cur - ctxt->input->base;
11289
620
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11290
620
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11291
620
        if (res < 0) {
11292
8
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11293
8
            return(ctxt->errNo);
11294
8
        }
11295
620
    }
11296
2.50M
    if (terminate) {
11297
  /*
11298
   * Check for termination
11299
   */
11300
14.9k
        if ((ctxt->instate != XML_PARSER_EOF) &&
11301
9.22k
            (ctxt->instate != XML_PARSER_EPILOG)) {
11302
7.17k
            if (ctxt->nameNr > 0) {
11303
4.09k
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11304
4.09k
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11305
4.09k
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11306
4.09k
                        "Premature end of data in tag %s line %d\n",
11307
4.09k
                        name, line, NULL);
11308
4.09k
            } else if (ctxt->instate == XML_PARSER_START) {
11309
145
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11310
2.93k
            } else {
11311
2.93k
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11312
2.93k
                               "Start tag expected, '<' not found\n");
11313
2.93k
            }
11314
7.80k
        } else {
11315
7.80k
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11316
7.80k
        }
11317
14.9k
  if (ctxt->instate != XML_PARSER_EOF) {
11318
8.99k
            ctxt->instate = XML_PARSER_EOF;
11319
8.99k
            xmlFinishDocument(ctxt);
11320
8.99k
  }
11321
14.9k
    }
11322
2.50M
    if (ctxt->wellFormed == 0)
11323
1.30M
  return((xmlParserErrors) ctxt->errNo);
11324
1.19M
    else
11325
1.19M
        return(0);
11326
2.50M
}
11327
11328
/************************************************************************
11329
 *                  *
11330
 *    I/O front end functions to the parser     *
11331
 *                  *
11332
 ************************************************************************/
11333
11334
/**
11335
 * Create a parser context for using the XML parser in push mode.
11336
 * See #xmlParseChunk.
11337
 *
11338
 * Passing an initial chunk is useless and deprecated.
11339
 *
11340
 * The push parser doesn't support recovery mode or the
11341
 * XML_PARSE_NOBLANKS option.
11342
 *
11343
 * `filename` is used as base URI to fetch external entities and for
11344
 * error reports.
11345
 *
11346
 * @param sax  a SAX handler (optional)
11347
 * @param user_data  user data for SAX callbacks (optional)
11348
 * @param chunk  initial chunk (optional, deprecated)
11349
 * @param size  size of initial chunk in bytes
11350
 * @param filename  file name or URI (optional)
11351
 * @returns the new parser context or NULL if a memory allocation
11352
 * failed.
11353
 */
11354
11355
xmlParserCtxt *
11356
xmlCreatePushParserCtxt(xmlSAXHandler *sax, void *user_data,
11357
27.2k
                        const char *chunk, int size, const char *filename) {
11358
27.2k
    xmlParserCtxtPtr ctxt;
11359
27.2k
    xmlParserInputPtr input;
11360
11361
27.2k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11362
27.2k
    if (ctxt == NULL)
11363
20
  return(NULL);
11364
11365
27.2k
    ctxt->options &= ~XML_PARSE_NODICT;
11366
27.2k
    ctxt->dictNames = 1;
11367
11368
27.2k
    input = xmlNewPushInput(filename, chunk, size);
11369
27.2k
    if (input == NULL) {
11370
18
  xmlFreeParserCtxt(ctxt);
11371
18
  return(NULL);
11372
18
    }
11373
27.2k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11374
6
        xmlFreeInputStream(input);
11375
6
        xmlFreeParserCtxt(ctxt);
11376
6
        return(NULL);
11377
6
    }
11378
11379
27.2k
    return(ctxt);
11380
27.2k
}
11381
#endif /* LIBXML_PUSH_ENABLED */
11382
11383
/**
11384
 * Blocks further parser processing
11385
 *
11386
 * @param ctxt  an XML parser context
11387
 */
11388
void
11389
0
xmlStopParser(xmlParserCtxt *ctxt) {
11390
0
    if (ctxt == NULL)
11391
0
        return;
11392
11393
    /* This stops the parser */
11394
0
    ctxt->disableSAX = 2;
11395
11396
    /*
11397
     * xmlStopParser is often called from error handlers,
11398
     * so we can't raise an error here to avoid infinite
11399
     * loops. Just make sure that an error condition is
11400
     * reported.
11401
     */
11402
0
    if (ctxt->errNo == XML_ERR_OK) {
11403
0
        ctxt->errNo = XML_ERR_USER_STOP;
11404
0
        ctxt->lastError.code = XML_ERR_USER_STOP;
11405
0
        ctxt->wellFormed = 0;
11406
0
    }
11407
0
}
11408
11409
/**
11410
 * Create a parser context for using the XML parser with an existing
11411
 * I/O stream
11412
 *
11413
 * @param sax  a SAX handler (optional)
11414
 * @param user_data  user data for SAX callbacks (optional)
11415
 * @param ioread  an I/O read function
11416
 * @param ioclose  an I/O close function (optional)
11417
 * @param ioctx  an I/O handler
11418
 * @param enc  the charset encoding if known (deprecated)
11419
 * @returns the new parser context or NULL
11420
 */
11421
xmlParserCtxt *
11422
xmlCreateIOParserCtxt(xmlSAXHandler *sax, void *user_data,
11423
                      xmlInputReadCallback ioread,
11424
                      xmlInputCloseCallback ioclose,
11425
0
                      void *ioctx, xmlCharEncoding enc) {
11426
0
    xmlParserCtxtPtr ctxt;
11427
0
    xmlParserInputPtr input;
11428
0
    const char *encoding;
11429
11430
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11431
0
    if (ctxt == NULL)
11432
0
  return(NULL);
11433
11434
0
    encoding = xmlGetCharEncodingName(enc);
11435
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11436
0
                                  encoding, 0);
11437
0
    if (input == NULL) {
11438
0
  xmlFreeParserCtxt(ctxt);
11439
0
        return (NULL);
11440
0
    }
11441
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11442
0
        xmlFreeInputStream(input);
11443
0
        xmlFreeParserCtxt(ctxt);
11444
0
        return(NULL);
11445
0
    }
11446
11447
0
    return(ctxt);
11448
0
}
11449
11450
#ifdef LIBXML_VALID_ENABLED
11451
/************************************************************************
11452
 *                  *
11453
 *    Front ends when parsing a DTD       *
11454
 *                  *
11455
 ************************************************************************/
11456
11457
/**
11458
 * Parse a DTD.
11459
 *
11460
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11461
 * to make external entities work.
11462
 *
11463
 * @since 2.14.0
11464
 *
11465
 * @param ctxt  a parser context
11466
 * @param input  a parser input
11467
 * @param publicId  public ID of the DTD (optional)
11468
 * @param systemId  system ID of the DTD (optional)
11469
 * @returns the resulting xmlDtd or NULL in case of error.
11470
 * `input` will be freed by the function in any case.
11471
 */
11472
xmlDtd *
11473
xmlCtxtParseDtd(xmlParserCtxt *ctxt, xmlParserInput *input,
11474
691
                const xmlChar *publicId, const xmlChar *systemId) {
11475
691
    xmlDtdPtr ret = NULL;
11476
11477
691
    if ((ctxt == NULL) || (input == NULL)) {
11478
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11479
0
        xmlFreeInputStream(input);
11480
0
        return(NULL);
11481
0
    }
11482
11483
691
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11484
4
        xmlFreeInputStream(input);
11485
4
        return(NULL);
11486
4
    }
11487
11488
687
    if (publicId == NULL)
11489
602
        publicId = BAD_CAST "none";
11490
687
    if (systemId == NULL)
11491
0
        systemId = BAD_CAST "none";
11492
11493
687
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11494
687
    if (ctxt->myDoc == NULL) {
11495
2
        xmlErrMemory(ctxt);
11496
2
        goto error;
11497
2
    }
11498
685
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11499
685
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11500
685
                                       publicId, systemId);
11501
685
    if (ctxt->myDoc->extSubset == NULL) {
11502
12
        xmlErrMemory(ctxt);
11503
12
        xmlFreeDoc(ctxt->myDoc);
11504
12
        goto error;
11505
12
    }
11506
11507
673
    xmlParseExternalSubset(ctxt, publicId, systemId);
11508
11509
673
    if (ctxt->wellFormed) {
11510
84
        ret = ctxt->myDoc->extSubset;
11511
84
        ctxt->myDoc->extSubset = NULL;
11512
84
        if (ret != NULL) {
11513
84
            xmlNodePtr tmp;
11514
11515
84
            ret->doc = NULL;
11516
84
            tmp = ret->children;
11517
1.17k
            while (tmp != NULL) {
11518
1.09k
                tmp->doc = NULL;
11519
1.09k
                tmp = tmp->next;
11520
1.09k
            }
11521
84
        }
11522
589
    } else {
11523
589
        ret = NULL;
11524
589
    }
11525
673
    xmlFreeDoc(ctxt->myDoc);
11526
673
    ctxt->myDoc = NULL;
11527
11528
687
error:
11529
687
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11530
11531
687
    return(ret);
11532
673
}
11533
11534
/**
11535
 * Load and parse a DTD
11536
 *
11537
 * @deprecated Use #xmlCtxtParseDtd.
11538
 *
11539
 * @param sax  the SAX handler block or NULL
11540
 * @param input  an Input Buffer
11541
 * @param enc  the charset encoding if known
11542
 * @returns the resulting xmlDtd or NULL in case of error.
11543
 * `input` will be freed by the function in any case.
11544
 */
11545
11546
xmlDtd *
11547
xmlIOParseDTD(xmlSAXHandler *sax, xmlParserInputBuffer *input,
11548
0
        xmlCharEncoding enc) {
11549
0
    xmlDtdPtr ret = NULL;
11550
0
    xmlParserCtxtPtr ctxt;
11551
0
    xmlParserInputPtr pinput = NULL;
11552
11553
0
    if (input == NULL)
11554
0
  return(NULL);
11555
11556
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11557
0
    if (ctxt == NULL) {
11558
0
        xmlFreeParserInputBuffer(input);
11559
0
  return(NULL);
11560
0
    }
11561
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11562
11563
    /*
11564
     * generate a parser input from the I/O handler
11565
     */
11566
11567
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11568
0
    if (pinput == NULL) {
11569
0
        xmlFreeParserInputBuffer(input);
11570
0
  xmlFreeParserCtxt(ctxt);
11571
0
  return(NULL);
11572
0
    }
11573
11574
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11575
0
        xmlSwitchEncoding(ctxt, enc);
11576
0
    }
11577
11578
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11579
11580
0
    xmlFreeParserCtxt(ctxt);
11581
0
    return(ret);
11582
0
}
11583
11584
/**
11585
 * Load and parse an external subset.
11586
 *
11587
 * @deprecated Use #xmlCtxtParseDtd.
11588
 *
11589
 * @param sax  the SAX handler block
11590
 * @param publicId  public identifier of the DTD (optional)
11591
 * @param systemId  system identifier (URL) of the DTD
11592
 * @returns the resulting xmlDtd or NULL in case of error.
11593
 */
11594
11595
xmlDtd *
11596
xmlSAXParseDTD(xmlSAXHandler *sax, const xmlChar *publicId,
11597
0
               const xmlChar *systemId) {
11598
0
    xmlDtdPtr ret = NULL;
11599
0
    xmlParserCtxtPtr ctxt;
11600
0
    xmlParserInputPtr input = NULL;
11601
0
    xmlChar* systemIdCanonic;
11602
11603
0
    if ((publicId == NULL) && (systemId == NULL)) return(NULL);
11604
11605
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11606
0
    if (ctxt == NULL) {
11607
0
  return(NULL);
11608
0
    }
11609
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11610
11611
    /*
11612
     * Canonicalise the system ID
11613
     */
11614
0
    systemIdCanonic = xmlCanonicPath(systemId);
11615
0
    if ((systemId != NULL) && (systemIdCanonic == NULL)) {
11616
0
  xmlFreeParserCtxt(ctxt);
11617
0
  return(NULL);
11618
0
    }
11619
11620
    /*
11621
     * Ask the Entity resolver to load the damn thing
11622
     */
11623
11624
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11625
0
  input = ctxt->sax->resolveEntity(ctxt->userData, publicId,
11626
0
                                   systemIdCanonic);
11627
0
    if (input == NULL) {
11628
0
  xmlFreeParserCtxt(ctxt);
11629
0
  if (systemIdCanonic != NULL)
11630
0
      xmlFree(systemIdCanonic);
11631
0
  return(NULL);
11632
0
    }
11633
11634
0
    if (input->filename == NULL)
11635
0
  input->filename = (char *) systemIdCanonic;
11636
0
    else
11637
0
  xmlFree(systemIdCanonic);
11638
11639
0
    ret = xmlCtxtParseDtd(ctxt, input, publicId, systemId);
11640
11641
0
    xmlFreeParserCtxt(ctxt);
11642
0
    return(ret);
11643
0
}
11644
11645
11646
/**
11647
 * Load and parse an external subset.
11648
 *
11649
 * @param publicId  public identifier of the DTD (optional)
11650
 * @param systemId  system identifier (URL) of the DTD
11651
 * @returns the resulting xmlDtd or NULL in case of error.
11652
 */
11653
11654
xmlDtd *
11655
0
xmlParseDTD(const xmlChar *publicId, const xmlChar *systemId) {
11656
0
    return(xmlSAXParseDTD(NULL, publicId, systemId));
11657
0
}
11658
#endif /* LIBXML_VALID_ENABLED */
11659
11660
/************************************************************************
11661
 *                  *
11662
 *    Front ends when parsing an Entity     *
11663
 *                  *
11664
 ************************************************************************/
11665
11666
static xmlNodePtr
11667
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11668
17.9k
                            int hasTextDecl, int buildTree) {
11669
17.9k
    xmlNodePtr root = NULL;
11670
17.9k
    xmlNodePtr list = NULL;
11671
17.9k
    xmlChar *rootName = BAD_CAST "#root";
11672
17.9k
    int result;
11673
11674
17.9k
    if (buildTree) {
11675
17.9k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11676
17.9k
        if (root == NULL) {
11677
15
            xmlErrMemory(ctxt);
11678
15
            goto error;
11679
15
        }
11680
17.9k
    }
11681
11682
17.9k
    if (xmlCtxtPushInput(ctxt, input) < 0)
11683
21
        goto error;
11684
11685
17.8k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11686
17.8k
    spacePush(ctxt, -1);
11687
11688
17.8k
    if (buildTree)
11689
17.8k
        nodePush(ctxt, root);
11690
11691
17.8k
    if (hasTextDecl) {
11692
3.79k
        xmlDetectEncoding(ctxt);
11693
11694
        /*
11695
         * Parse a possible text declaration first
11696
         */
11697
3.79k
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11698
664
            (IS_BLANK_CH(NXT(5)))) {
11699
649
            xmlParseTextDecl(ctxt);
11700
            /*
11701
             * An XML-1.0 document can't reference an entity not XML-1.0
11702
             */
11703
649
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11704
582
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11705
9
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11706
9
                               "Version mismatch between document and "
11707
9
                               "entity\n");
11708
9
            }
11709
649
        }
11710
3.79k
    }
11711
11712
17.8k
    xmlParseContentInternal(ctxt);
11713
11714
17.8k
    if (ctxt->input->cur < ctxt->input->end)
11715
875
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11716
11717
17.8k
    if ((ctxt->wellFormed) ||
11718
16.2k
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
11719
16.2k
        if (root != NULL) {
11720
16.2k
            xmlNodePtr cur;
11721
11722
            /*
11723
             * Unlink newly created node list.
11724
             */
11725
16.2k
            list = root->children;
11726
16.2k
            root->children = NULL;
11727
16.2k
            root->last = NULL;
11728
39.0k
            for (cur = list; cur != NULL; cur = cur->next)
11729
22.7k
                cur->parent = NULL;
11730
16.2k
        }
11731
16.2k
    }
11732
11733
    /*
11734
     * Read the rest of the stream in case of errors. We want
11735
     * to account for the whole entity size.
11736
     */
11737
18.1k
    do {
11738
18.1k
        ctxt->input->cur = ctxt->input->end;
11739
18.1k
        xmlParserShrink(ctxt);
11740
18.1k
        result = xmlParserGrow(ctxt);
11741
18.1k
    } while (result > 0);
11742
11743
17.8k
    if (buildTree)
11744
17.8k
        nodePop(ctxt);
11745
11746
17.8k
    namePop(ctxt);
11747
17.8k
    spacePop(ctxt);
11748
11749
17.8k
    xmlCtxtPopInput(ctxt);
11750
11751
17.9k
error:
11752
17.9k
    xmlFreeNode(root);
11753
11754
17.9k
    return(list);
11755
17.8k
}
11756
11757
static void
11758
18.9k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11759
18.9k
    xmlParserInputPtr input;
11760
18.9k
    xmlNodePtr list;
11761
18.9k
    unsigned long consumed;
11762
18.9k
    int isExternal;
11763
18.9k
    int buildTree;
11764
18.9k
    int oldMinNsIndex;
11765
18.9k
    int oldNodelen, oldNodemem;
11766
11767
18.9k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11768
18.9k
    buildTree = (ctxt->node != NULL);
11769
11770
    /*
11771
     * Recursion check
11772
     */
11773
18.9k
    if (ent->flags & XML_ENT_EXPANDING) {
11774
9
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11775
9
        goto error;
11776
9
    }
11777
11778
    /*
11779
     * Load entity
11780
     */
11781
18.9k
    input = xmlNewEntityInputStream(ctxt, ent);
11782
18.9k
    if (input == NULL)
11783
979
        goto error;
11784
11785
    /*
11786
     * When building a tree, we need to limit the scope of namespace
11787
     * declarations, so that entities don't reference xmlNs structs
11788
     * from the parent of a reference.
11789
     */
11790
17.9k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
11791
17.9k
    if (buildTree)
11792
17.9k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
11793
11794
17.9k
    oldNodelen = ctxt->nodelen;
11795
17.9k
    oldNodemem = ctxt->nodemem;
11796
17.9k
    ctxt->nodelen = 0;
11797
17.9k
    ctxt->nodemem = 0;
11798
11799
    /*
11800
     * Parse content
11801
     *
11802
     * This initiates a recursive call chain:
11803
     *
11804
     * - xmlCtxtParseContentInternal
11805
     * - xmlParseContentInternal
11806
     * - xmlParseReference
11807
     * - xmlCtxtParseEntity
11808
     *
11809
     * The nesting depth is limited by the maximum number of inputs,
11810
     * see xmlCtxtPushInput.
11811
     *
11812
     * It's possible to make this non-recursive (minNsIndex must be
11813
     * stored in the input struct) at the expense of code readability.
11814
     */
11815
11816
17.9k
    ent->flags |= XML_ENT_EXPANDING;
11817
11818
17.9k
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
11819
11820
17.9k
    ent->flags &= ~XML_ENT_EXPANDING;
11821
11822
17.9k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
11823
17.9k
    ctxt->nodelen = oldNodelen;
11824
17.9k
    ctxt->nodemem = oldNodemem;
11825
11826
    /*
11827
     * Entity size accounting
11828
     */
11829
17.9k
    consumed = input->consumed;
11830
17.9k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
11831
11832
17.9k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
11833
6.00k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
11834
11835
17.9k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
11836
6.00k
        if (isExternal)
11837
3.67k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
11838
11839
6.00k
        ent->children = list;
11840
11841
28.7k
        while (list != NULL) {
11842
22.7k
            list->parent = (xmlNodePtr) ent;
11843
11844
            /*
11845
             * Downstream code like the nginx xslt module can set
11846
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
11847
             * might have a different or a NULL document.
11848
             */
11849
22.7k
            if (list->doc != ent->doc)
11850
0
                xmlSetTreeDoc(list, ent->doc);
11851
11852
22.7k
            if (list->next == NULL)
11853
4.19k
                ent->last = list;
11854
22.7k
            list = list->next;
11855
22.7k
        }
11856
11.9k
    } else {
11857
11.9k
        xmlFreeNodeList(list);
11858
11.9k
    }
11859
11860
17.9k
    xmlFreeInputStream(input);
11861
11862
18.9k
error:
11863
18.9k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
11864
18.9k
}
11865
11866
/**
11867
 * Parse an external general entity within an existing parsing context
11868
 * An external general parsed entity is well-formed if it matches the
11869
 * production labeled extParsedEnt.
11870
 *
11871
 *     [78] extParsedEnt ::= TextDecl? content
11872
 *
11873
 * @param ctxt  the existing parsing context
11874
 * @param URL  the URL for the entity to load
11875
 * @param ID  the System ID for the entity to load
11876
 * @param listOut  the return value for the set of parsed nodes
11877
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11878
 *    the parser error code otherwise
11879
 */
11880
11881
int
11882
xmlParseCtxtExternalEntity(xmlParserCtxt *ctxt, const xmlChar *URL,
11883
0
                           const xmlChar *ID, xmlNode **listOut) {
11884
0
    xmlParserInputPtr input;
11885
0
    xmlNodePtr list;
11886
11887
0
    if (listOut != NULL)
11888
0
        *listOut = NULL;
11889
11890
0
    if (ctxt == NULL)
11891
0
        return(XML_ERR_ARGUMENT);
11892
11893
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
11894
0
                            XML_RESOURCE_GENERAL_ENTITY);
11895
0
    if (input == NULL)
11896
0
        return(ctxt->errNo);
11897
11898
0
    xmlCtxtInitializeLate(ctxt);
11899
11900
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
11901
0
    if (listOut != NULL)
11902
0
        *listOut = list;
11903
0
    else
11904
0
        xmlFreeNodeList(list);
11905
11906
0
    xmlFreeInputStream(input);
11907
0
    return(ctxt->errNo);
11908
0
}
11909
11910
#ifdef LIBXML_SAX1_ENABLED
11911
/**
11912
 * Parse an external general entity
11913
 * An external general parsed entity is well-formed if it matches the
11914
 * production labeled extParsedEnt.
11915
 *
11916
 * This function uses deprecated global variables to set parser options
11917
 * which default to XML_PARSE_NODICT.
11918
 *
11919
 * @deprecated Use #xmlParseCtxtExternalEntity.
11920
 *
11921
 *     [78] extParsedEnt ::= TextDecl? content
11922
 *
11923
 * @param doc  the document the chunk pertains to
11924
 * @param sax  the SAX handler block (possibly NULL)
11925
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11926
 * @param depth  Used for loop detection, use 0
11927
 * @param URL  the URL for the entity to load
11928
 * @param ID  the System ID for the entity to load
11929
 * @param list  the return value for the set of parsed nodes
11930
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11931
 *    the parser error code otherwise
11932
 */
11933
11934
int
11935
xmlParseExternalEntity(xmlDoc *doc, xmlSAXHandler *sax, void *user_data,
11936
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNode **list) {
11937
0
    xmlParserCtxtPtr ctxt;
11938
0
    int ret;
11939
11940
0
    if (list != NULL)
11941
0
        *list = NULL;
11942
11943
0
    if (doc == NULL)
11944
0
        return(XML_ERR_ARGUMENT);
11945
11946
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11947
0
    if (ctxt == NULL)
11948
0
        return(XML_ERR_NO_MEMORY);
11949
11950
0
    ctxt->depth = depth;
11951
0
    ctxt->myDoc = doc;
11952
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
11953
11954
0
    xmlFreeParserCtxt(ctxt);
11955
0
    return(ret);
11956
0
}
11957
11958
/**
11959
 * Parse a well-balanced chunk of an XML document
11960
 * called by the parser
11961
 * The allowed sequence for the Well Balanced Chunk is the one defined by
11962
 * the content production in the XML grammar:
11963
 *
11964
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
11965
 *                       Comment)*
11966
 *
11967
 * This function uses deprecated global variables to set parser options
11968
 * which default to XML_PARSE_NODICT.
11969
 *
11970
 * @param doc  the document the chunk pertains to (must not be NULL)
11971
 * @param sax  the SAX handler block (possibly NULL)
11972
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11973
 * @param depth  Used for loop detection, use 0
11974
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
11975
 * @param lst  the return value for the set of parsed nodes
11976
 * @returns 0 if the chunk is well balanced, -1 in case of args problem and
11977
 *    the parser error code otherwise
11978
 */
11979
11980
int
11981
xmlParseBalancedChunkMemory(xmlDoc *doc, xmlSAXHandler *sax,
11982
0
     void *user_data, int depth, const xmlChar *string, xmlNode **lst) {
11983
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11984
0
                                                depth, string, lst, 0 );
11985
0
}
11986
#endif /* LIBXML_SAX1_ENABLED */
11987
11988
/**
11989
 * Parse a well-balanced chunk of XML matching the 'content' production.
11990
 *
11991
 * Namespaces in scope of `node` and entities of `node`'s document are
11992
 * recognized. When validating, the DTD of `node`'s document is used.
11993
 *
11994
 * Always consumes `input` even in error case.
11995
 *
11996
 * @since 2.14.0
11997
 *
11998
 * @param ctxt  parser context
11999
 * @param input  parser input
12000
 * @param node  target node or document
12001
 * @param hasTextDecl  whether to parse text declaration
12002
 * @returns a node list or NULL in case of error.
12003
 */
12004
xmlNode *
12005
xmlCtxtParseContent(xmlParserCtxt *ctxt, xmlParserInput *input,
12006
0
                    xmlNode *node, int hasTextDecl) {
12007
0
    xmlDocPtr doc;
12008
0
    xmlNodePtr cur, list = NULL;
12009
0
    int nsnr = 0;
12010
0
    xmlDictPtr oldDict;
12011
0
    int oldOptions, oldDictNames, oldLoadSubset;
12012
12013
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12014
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12015
0
        goto exit;
12016
0
    }
12017
12018
0
    doc = node->doc;
12019
0
    if (doc == NULL) {
12020
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12021
0
        goto exit;
12022
0
    }
12023
12024
0
    switch (node->type) {
12025
0
        case XML_ELEMENT_NODE:
12026
0
        case XML_DOCUMENT_NODE:
12027
0
        case XML_HTML_DOCUMENT_NODE:
12028
0
            break;
12029
12030
0
        case XML_ATTRIBUTE_NODE:
12031
0
        case XML_TEXT_NODE:
12032
0
        case XML_CDATA_SECTION_NODE:
12033
0
        case XML_ENTITY_REF_NODE:
12034
0
        case XML_PI_NODE:
12035
0
        case XML_COMMENT_NODE:
12036
0
            for (cur = node->parent; cur != NULL; cur = cur->parent) {
12037
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12038
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12039
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12040
0
                    node = cur;
12041
0
                    break;
12042
0
                }
12043
0
            }
12044
0
            break;
12045
12046
0
        default:
12047
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12048
0
            goto exit;
12049
0
    }
12050
12051
0
    xmlCtxtReset(ctxt);
12052
12053
0
    oldDict = ctxt->dict;
12054
0
    oldOptions = ctxt->options;
12055
0
    oldDictNames = ctxt->dictNames;
12056
0
    oldLoadSubset = ctxt->loadsubset;
12057
12058
    /*
12059
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12060
     */
12061
0
    if (doc->dict != NULL) {
12062
0
        ctxt->dict = doc->dict;
12063
0
    } else {
12064
0
        ctxt->options |= XML_PARSE_NODICT;
12065
0
        ctxt->dictNames = 0;
12066
0
    }
12067
12068
    /*
12069
     * Disable IDs
12070
     */
12071
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12072
0
    ctxt->options |= XML_PARSE_SKIP_IDS;
12073
12074
0
    ctxt->myDoc = doc;
12075
12076
0
#ifdef LIBXML_HTML_ENABLED
12077
0
    if (ctxt->html) {
12078
        /*
12079
         * When parsing in context, it makes no sense to add implied
12080
         * elements like html/body/etc...
12081
         */
12082
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12083
12084
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12085
0
    } else
12086
0
#endif
12087
0
    {
12088
0
        xmlCtxtInitializeLate(ctxt);
12089
12090
        /*
12091
         * initialize the SAX2 namespaces stack
12092
         */
12093
0
        cur = node;
12094
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12095
0
            xmlNsPtr ns = cur->nsDef;
12096
0
            xmlHashedString hprefix, huri;
12097
12098
0
            while (ns != NULL) {
12099
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12100
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12101
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12102
0
                    nsnr++;
12103
0
                ns = ns->next;
12104
0
            }
12105
0
            cur = cur->parent;
12106
0
        }
12107
12108
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12109
12110
0
        if (nsnr > 0)
12111
0
            xmlParserNsPop(ctxt, nsnr);
12112
0
    }
12113
12114
0
    ctxt->dict = oldDict;
12115
0
    ctxt->options = oldOptions;
12116
0
    ctxt->dictNames = oldDictNames;
12117
0
    ctxt->loadsubset = oldLoadSubset;
12118
0
    ctxt->myDoc = NULL;
12119
0
    ctxt->node = NULL;
12120
12121
0
exit:
12122
0
    xmlFreeInputStream(input);
12123
0
    return(list);
12124
0
}
12125
12126
/**
12127
 * Parse a well-balanced chunk of an XML document
12128
 * within the context (DTD, namespaces, etc ...) of the given node.
12129
 *
12130
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12131
 * the content production in the XML grammar:
12132
 *
12133
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12134
 *                       Comment)*
12135
 *
12136
 * This function assumes the encoding of `node`'s document which is
12137
 * typically not what you want. A better alternative is
12138
 * #xmlCtxtParseContent.
12139
 *
12140
 * @param node  the context node
12141
 * @param data  the input string
12142
 * @param datalen  the input string length in bytes
12143
 * @param options  a combination of xmlParserOption
12144
 * @param listOut  the return value for the set of parsed nodes
12145
 * @returns XML_ERR_OK if the chunk is well balanced, and the parser
12146
 * error code otherwise
12147
 */
12148
xmlParserErrors
12149
xmlParseInNodeContext(xmlNode *node, const char *data, int datalen,
12150
0
                      int options, xmlNode **listOut) {
12151
0
    xmlParserCtxtPtr ctxt;
12152
0
    xmlParserInputPtr input;
12153
0
    xmlDocPtr doc;
12154
0
    xmlNodePtr list;
12155
0
    xmlParserErrors ret;
12156
12157
0
    if (listOut == NULL)
12158
0
        return(XML_ERR_INTERNAL_ERROR);
12159
0
    *listOut = NULL;
12160
12161
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12162
0
        return(XML_ERR_INTERNAL_ERROR);
12163
12164
0
    doc = node->doc;
12165
0
    if (doc == NULL)
12166
0
        return(XML_ERR_INTERNAL_ERROR);
12167
12168
0
#ifdef LIBXML_HTML_ENABLED
12169
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12170
0
        ctxt = htmlNewParserCtxt();
12171
0
    }
12172
0
    else
12173
0
#endif
12174
0
        ctxt = xmlNewParserCtxt();
12175
12176
0
    if (ctxt == NULL)
12177
0
        return(XML_ERR_NO_MEMORY);
12178
12179
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12180
0
                                      (const char *) doc->encoding,
12181
0
                                      XML_INPUT_BUF_STATIC);
12182
0
    if (input == NULL) {
12183
0
        xmlFreeParserCtxt(ctxt);
12184
0
        return(XML_ERR_NO_MEMORY);
12185
0
    }
12186
12187
0
    xmlCtxtUseOptions(ctxt, options);
12188
12189
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12190
12191
0
    if (list == NULL) {
12192
0
        ret = ctxt->errNo;
12193
0
        if (ret == XML_ERR_ARGUMENT)
12194
0
            ret = XML_ERR_INTERNAL_ERROR;
12195
0
    } else {
12196
0
        ret = XML_ERR_OK;
12197
0
        *listOut = list;
12198
0
    }
12199
12200
0
    xmlFreeParserCtxt(ctxt);
12201
12202
0
    return(ret);
12203
0
}
12204
12205
#ifdef LIBXML_SAX1_ENABLED
12206
/**
12207
 * Parse a well-balanced chunk of an XML document
12208
 *
12209
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12210
 * the content production in the XML grammar:
12211
 *
12212
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12213
 *                       Comment)*
12214
 *
12215
 * In case recover is set to 1, the nodelist will not be empty even if
12216
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12217
 * some extent.
12218
 *
12219
 * This function uses deprecated global variables to set parser options
12220
 * which default to XML_PARSE_NODICT.
12221
 *
12222
 * @param doc  the document the chunk pertains to (must not be NULL)
12223
 * @param sax  the SAX handler block (possibly NULL)
12224
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
12225
 * @param depth  Used for loop detection, use 0
12226
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
12227
 * @param listOut  the return value for the set of parsed nodes
12228
 * @param recover  return nodes even if the data is broken (use 0)
12229
 * @returns 0 if the chunk is well balanced, or thehe parser error code
12230
 * otherwise.
12231
 */
12232
int
12233
xmlParseBalancedChunkMemoryRecover(xmlDoc *doc, xmlSAXHandler *sax,
12234
     void *user_data, int depth, const xmlChar *string, xmlNode **listOut,
12235
0
     int recover) {
12236
0
    xmlParserCtxtPtr ctxt;
12237
0
    xmlParserInputPtr input;
12238
0
    xmlNodePtr list;
12239
0
    int ret;
12240
12241
0
    if (listOut != NULL)
12242
0
        *listOut = NULL;
12243
12244
0
    if (string == NULL)
12245
0
        return(XML_ERR_ARGUMENT);
12246
12247
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12248
0
    if (ctxt == NULL)
12249
0
        return(XML_ERR_NO_MEMORY);
12250
12251
0
    xmlCtxtInitializeLate(ctxt);
12252
12253
0
    ctxt->depth = depth;
12254
0
    ctxt->myDoc = doc;
12255
0
    if (recover) {
12256
0
        ctxt->options |= XML_PARSE_RECOVER;
12257
0
        ctxt->recovery = 1;
12258
0
    }
12259
12260
0
    input = xmlNewStringInputStream(ctxt, string);
12261
0
    if (input == NULL) {
12262
0
        ret = ctxt->errNo;
12263
0
        goto error;
12264
0
    }
12265
12266
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12267
0
    if (listOut != NULL)
12268
0
        *listOut = list;
12269
0
    else
12270
0
        xmlFreeNodeList(list);
12271
12272
0
    if (!ctxt->wellFormed)
12273
0
        ret = ctxt->errNo;
12274
0
    else
12275
0
        ret = XML_ERR_OK;
12276
12277
0
error:
12278
0
    xmlFreeInputStream(input);
12279
0
    xmlFreeParserCtxt(ctxt);
12280
0
    return(ret);
12281
0
}
12282
12283
/**
12284
 * Parse an XML external entity out of context and build a tree.
12285
 * It use the given SAX function block to handle the parsing callback.
12286
 * If sax is NULL, fallback to the default DOM tree building routines.
12287
 *
12288
 * @deprecated Don't use.
12289
 *
12290
 *     [78] extParsedEnt ::= TextDecl? content
12291
 *
12292
 * This correspond to a "Well Balanced" chunk
12293
 *
12294
 * This function uses deprecated global variables to set parser options
12295
 * which default to XML_PARSE_NODICT.
12296
 *
12297
 * @param sax  the SAX handler block
12298
 * @param filename  the filename
12299
 * @returns the resulting document tree
12300
 */
12301
12302
xmlDoc *
12303
0
xmlSAXParseEntity(xmlSAXHandler *sax, const char *filename) {
12304
0
    xmlDocPtr ret;
12305
0
    xmlParserCtxtPtr ctxt;
12306
12307
0
    ctxt = xmlCreateFileParserCtxt(filename);
12308
0
    if (ctxt == NULL) {
12309
0
  return(NULL);
12310
0
    }
12311
0
    if (sax != NULL) {
12312
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12313
0
            *ctxt->sax = *sax;
12314
0
        } else {
12315
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12316
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12317
0
        }
12318
0
        ctxt->userData = NULL;
12319
0
    }
12320
12321
0
    xmlParseExtParsedEnt(ctxt);
12322
12323
0
    if (ctxt->wellFormed) {
12324
0
  ret = ctxt->myDoc;
12325
0
    } else {
12326
0
        ret = NULL;
12327
0
        xmlFreeDoc(ctxt->myDoc);
12328
0
    }
12329
12330
0
    xmlFreeParserCtxt(ctxt);
12331
12332
0
    return(ret);
12333
0
}
12334
12335
/**
12336
 * Parse an XML external entity out of context and build a tree.
12337
 *
12338
 *     [78] extParsedEnt ::= TextDecl? content
12339
 *
12340
 * This correspond to a "Well Balanced" chunk
12341
 *
12342
 * This function uses deprecated global variables to set parser options
12343
 * which default to XML_PARSE_NODICT.
12344
 *
12345
 * @deprecated Don't use.
12346
 *
12347
 * @param filename  the filename
12348
 * @returns the resulting document tree
12349
 */
12350
12351
xmlDoc *
12352
0
xmlParseEntity(const char *filename) {
12353
0
    return(xmlSAXParseEntity(NULL, filename));
12354
0
}
12355
#endif /* LIBXML_SAX1_ENABLED */
12356
12357
/**
12358
 * Create a parser context for an external entity
12359
 * Automatic support for ZLIB/Compress compressed document is provided
12360
 * by default if found at compile-time.
12361
 *
12362
 * @deprecated Don't use.
12363
 *
12364
 * @param URL  the entity URL
12365
 * @param ID  the entity PUBLIC ID
12366
 * @param base  a possible base for the target URI
12367
 * @returns the new parser context or NULL
12368
 */
12369
xmlParserCtxt *
12370
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12371
0
                    const xmlChar *base) {
12372
0
    xmlParserCtxtPtr ctxt;
12373
0
    xmlParserInputPtr input;
12374
0
    xmlChar *uri = NULL;
12375
12376
0
    ctxt = xmlNewParserCtxt();
12377
0
    if (ctxt == NULL)
12378
0
  return(NULL);
12379
12380
0
    if (base != NULL) {
12381
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12382
0
            goto error;
12383
0
        if (uri != NULL)
12384
0
            URL = uri;
12385
0
    }
12386
12387
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12388
0
                            XML_RESOURCE_UNKNOWN);
12389
0
    if (input == NULL)
12390
0
        goto error;
12391
12392
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12393
0
        xmlFreeInputStream(input);
12394
0
        goto error;
12395
0
    }
12396
12397
0
    xmlFree(uri);
12398
0
    return(ctxt);
12399
12400
0
error:
12401
0
    xmlFree(uri);
12402
0
    xmlFreeParserCtxt(ctxt);
12403
0
    return(NULL);
12404
0
}
12405
12406
/************************************************************************
12407
 *                  *
12408
 *    Front ends when parsing from a file     *
12409
 *                  *
12410
 ************************************************************************/
12411
12412
/**
12413
 * Create a parser context for a file or URL content.
12414
 * Automatic support for ZLIB/Compress compressed document is provided
12415
 * by default if found at compile-time and for file accesses
12416
 *
12417
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12418
 *
12419
 * @param filename  the filename or URL
12420
 * @param options  a combination of xmlParserOption
12421
 * @returns the new parser context or NULL
12422
 */
12423
xmlParserCtxt *
12424
xmlCreateURLParserCtxt(const char *filename, int options)
12425
0
{
12426
0
    xmlParserCtxtPtr ctxt;
12427
0
    xmlParserInputPtr input;
12428
12429
0
    ctxt = xmlNewParserCtxt();
12430
0
    if (ctxt == NULL)
12431
0
  return(NULL);
12432
12433
0
    xmlCtxtUseOptions(ctxt, options);
12434
12435
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12436
0
    if (input == NULL) {
12437
0
  xmlFreeParserCtxt(ctxt);
12438
0
  return(NULL);
12439
0
    }
12440
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12441
0
        xmlFreeInputStream(input);
12442
0
        xmlFreeParserCtxt(ctxt);
12443
0
        return(NULL);
12444
0
    }
12445
12446
0
    return(ctxt);
12447
0
}
12448
12449
/**
12450
 * Create a parser context for a file content.
12451
 * Automatic support for ZLIB/Compress compressed document is provided
12452
 * by default if found at compile-time.
12453
 *
12454
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12455
 *
12456
 * @param filename  the filename
12457
 * @returns the new parser context or NULL
12458
 */
12459
xmlParserCtxt *
12460
xmlCreateFileParserCtxt(const char *filename)
12461
0
{
12462
0
    return(xmlCreateURLParserCtxt(filename, 0));
12463
0
}
12464
12465
#ifdef LIBXML_SAX1_ENABLED
12466
/**
12467
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12468
 * compressed document is provided by default if found at compile-time.
12469
 * It use the given SAX function block to handle the parsing callback.
12470
 * If sax is NULL, fallback to the default DOM tree building routines.
12471
 *
12472
 * This function uses deprecated global variables to set parser options
12473
 * which default to XML_PARSE_NODICT.
12474
 *
12475
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12476
 *
12477
 * User data (void *) is stored within the parser context in the
12478
 * context's _private member, so it is available nearly everywhere in libxml
12479
 *
12480
 * @param sax  the SAX handler block
12481
 * @param filename  the filename
12482
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12483
 *             documents
12484
 * @param data  the userdata
12485
 * @returns the resulting document tree
12486
 */
12487
12488
xmlDoc *
12489
xmlSAXParseFileWithData(xmlSAXHandler *sax, const char *filename,
12490
0
                        int recovery, void *data) {
12491
0
    xmlDocPtr ret = NULL;
12492
0
    xmlParserCtxtPtr ctxt;
12493
0
    xmlParserInputPtr input;
12494
12495
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12496
0
    if (ctxt == NULL)
12497
0
  return(NULL);
12498
12499
0
    if (data != NULL)
12500
0
  ctxt->_private = data;
12501
12502
0
    if (recovery) {
12503
0
        ctxt->options |= XML_PARSE_RECOVER;
12504
0
        ctxt->recovery = 1;
12505
0
    }
12506
12507
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12508
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12509
0
    else
12510
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12511
12512
0
    if (input != NULL)
12513
0
        ret = xmlCtxtParseDocument(ctxt, input);
12514
12515
0
    xmlFreeParserCtxt(ctxt);
12516
0
    return(ret);
12517
0
}
12518
12519
/**
12520
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12521
 * compressed document is provided by default if found at compile-time.
12522
 * It use the given SAX function block to handle the parsing callback.
12523
 * If sax is NULL, fallback to the default DOM tree building routines.
12524
 *
12525
 * This function uses deprecated global variables to set parser options
12526
 * which default to XML_PARSE_NODICT.
12527
 *
12528
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12529
 *
12530
 * @param sax  the SAX handler block
12531
 * @param filename  the filename
12532
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12533
 *             documents
12534
 * @returns the resulting document tree
12535
 */
12536
12537
xmlDoc *
12538
xmlSAXParseFile(xmlSAXHandler *sax, const char *filename,
12539
0
                          int recovery) {
12540
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12541
0
}
12542
12543
/**
12544
 * Parse an XML in-memory document and build a tree.
12545
 * In the case the document is not Well Formed, a attempt to build a
12546
 * tree is tried anyway
12547
 *
12548
 * This function uses deprecated global variables to set parser options
12549
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12550
 *
12551
 * @deprecated Use #xmlReadDoc with XML_PARSE_RECOVER.
12552
 *
12553
 * @param cur  a pointer to an array of xmlChar
12554
 * @returns the resulting document tree or NULL in case of failure
12555
 */
12556
12557
xmlDoc *
12558
0
xmlRecoverDoc(const xmlChar *cur) {
12559
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12560
0
}
12561
12562
/**
12563
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12564
 * compressed document is provided by default if found at compile-time.
12565
 *
12566
 * This function uses deprecated global variables to set parser options
12567
 * which default to XML_PARSE_NODICT.
12568
 *
12569
 * @deprecated Use #xmlReadFile.
12570
 *
12571
 * @param filename  the filename
12572
 * @returns the resulting document tree if the file was wellformed,
12573
 * NULL otherwise.
12574
 */
12575
12576
xmlDoc *
12577
0
xmlParseFile(const char *filename) {
12578
0
    return(xmlSAXParseFile(NULL, filename, 0));
12579
0
}
12580
12581
/**
12582
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12583
 * compressed document is provided by default if found at compile-time.
12584
 * In the case the document is not Well Formed, it attempts to build
12585
 * a tree anyway
12586
 *
12587
 * This function uses deprecated global variables to set parser options
12588
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12589
 *
12590
 * @deprecated Use #xmlReadFile with XML_PARSE_RECOVER.
12591
 *
12592
 * @param filename  the filename
12593
 * @returns the resulting document tree or NULL in case of failure
12594
 */
12595
12596
xmlDoc *
12597
0
xmlRecoverFile(const char *filename) {
12598
0
    return(xmlSAXParseFile(NULL, filename, 1));
12599
0
}
12600
12601
12602
/**
12603
 * Setup the parser context to parse a new buffer; Clears any prior
12604
 * contents from the parser context. The buffer parameter must not be
12605
 * NULL, but the filename parameter can be
12606
 *
12607
 * @deprecated Don't use.
12608
 *
12609
 * @param ctxt  an XML parser context
12610
 * @param buffer  a xmlChar * buffer
12611
 * @param filename  a file name
12612
 */
12613
void
12614
xmlSetupParserForBuffer(xmlParserCtxt *ctxt, const xmlChar* buffer,
12615
                             const char* filename)
12616
0
{
12617
0
    xmlParserInputPtr input;
12618
12619
0
    if ((ctxt == NULL) || (buffer == NULL))
12620
0
        return;
12621
12622
0
    xmlCtxtReset(ctxt);
12623
12624
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12625
0
                                      NULL, 0);
12626
0
    if (input == NULL)
12627
0
        return;
12628
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12629
0
        xmlFreeInputStream(input);
12630
0
}
12631
12632
/**
12633
 * Parse an XML file and call the given SAX handler routines.
12634
 * Automatic support for ZLIB/Compress compressed document is provided
12635
 *
12636
 * This function uses deprecated global variables to set parser options
12637
 * which default to XML_PARSE_NODICT.
12638
 *
12639
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12640
 *
12641
 * @param sax  a SAX handler
12642
 * @param user_data  The user data returned on SAX callbacks
12643
 * @param filename  a file name
12644
 * @returns 0 in case of success or a error number otherwise
12645
 */
12646
int
12647
xmlSAXUserParseFile(xmlSAXHandler *sax, void *user_data,
12648
0
                    const char *filename) {
12649
0
    int ret = 0;
12650
0
    xmlParserCtxtPtr ctxt;
12651
12652
0
    ctxt = xmlCreateFileParserCtxt(filename);
12653
0
    if (ctxt == NULL) return -1;
12654
0
    if (sax != NULL) {
12655
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12656
0
            *ctxt->sax = *sax;
12657
0
        } else {
12658
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12659
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12660
0
        }
12661
0
  ctxt->userData = user_data;
12662
0
    }
12663
12664
0
    xmlParseDocument(ctxt);
12665
12666
0
    if (ctxt->wellFormed)
12667
0
  ret = 0;
12668
0
    else {
12669
0
        if (ctxt->errNo != 0)
12670
0
      ret = ctxt->errNo;
12671
0
  else
12672
0
      ret = -1;
12673
0
    }
12674
0
    if (ctxt->myDoc != NULL) {
12675
0
        xmlFreeDoc(ctxt->myDoc);
12676
0
  ctxt->myDoc = NULL;
12677
0
    }
12678
0
    xmlFreeParserCtxt(ctxt);
12679
12680
0
    return ret;
12681
0
}
12682
#endif /* LIBXML_SAX1_ENABLED */
12683
12684
/************************************************************************
12685
 *                  *
12686
 *    Front ends when parsing from memory     *
12687
 *                  *
12688
 ************************************************************************/
12689
12690
/**
12691
 * Create a parser context for an XML in-memory document. The input buffer
12692
 * must not contain a terminating null byte.
12693
 *
12694
 * @param buffer  a pointer to a char array
12695
 * @param size  the size of the array
12696
 * @returns the new parser context or NULL
12697
 */
12698
xmlParserCtxt *
12699
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12700
0
    xmlParserCtxtPtr ctxt;
12701
0
    xmlParserInputPtr input;
12702
12703
0
    if (size < 0)
12704
0
  return(NULL);
12705
12706
0
    ctxt = xmlNewParserCtxt();
12707
0
    if (ctxt == NULL)
12708
0
  return(NULL);
12709
12710
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12711
0
    if (input == NULL) {
12712
0
  xmlFreeParserCtxt(ctxt);
12713
0
  return(NULL);
12714
0
    }
12715
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12716
0
        xmlFreeInputStream(input);
12717
0
        xmlFreeParserCtxt(ctxt);
12718
0
        return(NULL);
12719
0
    }
12720
12721
0
    return(ctxt);
12722
0
}
12723
12724
#ifdef LIBXML_SAX1_ENABLED
12725
/**
12726
 * Parse an XML in-memory block and use the given SAX function block
12727
 * to handle the parsing callback. If sax is NULL, fallback to the default
12728
 * DOM tree building routines.
12729
 *
12730
 * This function uses deprecated global variables to set parser options
12731
 * which default to XML_PARSE_NODICT.
12732
 *
12733
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12734
 *
12735
 * User data (void *) is stored within the parser context in the
12736
 * context's _private member, so it is available nearly everywhere in libxml
12737
 *
12738
 * @param sax  the SAX handler block
12739
 * @param buffer  an pointer to a char array
12740
 * @param size  the size of the array
12741
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12742
 *             documents
12743
 * @param data  the userdata
12744
 * @returns the resulting document tree
12745
 */
12746
12747
xmlDoc *
12748
xmlSAXParseMemoryWithData(xmlSAXHandler *sax, const char *buffer,
12749
0
                          int size, int recovery, void *data) {
12750
0
    xmlDocPtr ret = NULL;
12751
0
    xmlParserCtxtPtr ctxt;
12752
0
    xmlParserInputPtr input;
12753
12754
0
    if (size < 0)
12755
0
        return(NULL);
12756
12757
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12758
0
    if (ctxt == NULL)
12759
0
        return(NULL);
12760
12761
0
    if (data != NULL)
12762
0
  ctxt->_private=data;
12763
12764
0
    if (recovery) {
12765
0
        ctxt->options |= XML_PARSE_RECOVER;
12766
0
        ctxt->recovery = 1;
12767
0
    }
12768
12769
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12770
0
                                      XML_INPUT_BUF_STATIC);
12771
12772
0
    if (input != NULL)
12773
0
        ret = xmlCtxtParseDocument(ctxt, input);
12774
12775
0
    xmlFreeParserCtxt(ctxt);
12776
0
    return(ret);
12777
0
}
12778
12779
/**
12780
 * Parse an XML in-memory block and use the given SAX function block
12781
 * to handle the parsing callback. If sax is NULL, fallback to the default
12782
 * DOM tree building routines.
12783
 *
12784
 * This function uses deprecated global variables to set parser options
12785
 * which default to XML_PARSE_NODICT.
12786
 *
12787
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12788
 *
12789
 * @param sax  the SAX handler block
12790
 * @param buffer  an pointer to a char array
12791
 * @param size  the size of the array
12792
 * @param recovery  work in recovery mode, i.e. tries to read not Well Formed
12793
 *             documents
12794
 * @returns the resulting document tree
12795
 */
12796
xmlDoc *
12797
xmlSAXParseMemory(xmlSAXHandler *sax, const char *buffer,
12798
0
            int size, int recovery) {
12799
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12800
0
}
12801
12802
/**
12803
 * Parse an XML in-memory block and build a tree.
12804
 *
12805
 * This function uses deprecated global variables to set parser options
12806
 * which default to XML_PARSE_NODICT.
12807
 *
12808
 * @deprecated Use #xmlReadMemory.
12809
 *
12810
 * @param buffer  an pointer to a char array
12811
 * @param size  the size of the array
12812
 * @returns the resulting document tree
12813
 */
12814
12815
0
xmlDoc *xmlParseMemory(const char *buffer, int size) {
12816
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
12817
0
}
12818
12819
/**
12820
 * Parse an XML in-memory block and build a tree.
12821
 * In the case the document is not Well Formed, an attempt to
12822
 * build a tree is tried anyway
12823
 *
12824
 * This function uses deprecated global variables to set parser options
12825
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12826
 *
12827
 * @deprecated Use #xmlReadMemory with XML_PARSE_RECOVER.
12828
 *
12829
 * @param buffer  an pointer to a char array
12830
 * @param size  the size of the array
12831
 * @returns the resulting document tree or NULL in case of error
12832
 */
12833
12834
0
xmlDoc *xmlRecoverMemory(const char *buffer, int size) {
12835
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
12836
0
}
12837
12838
/**
12839
 * Parse an XML in-memory buffer and call the given SAX handler routines.
12840
 *
12841
 * This function uses deprecated global variables to set parser options
12842
 * which default to XML_PARSE_NODICT.
12843
 *
12844
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12845
 *
12846
 * @param sax  a SAX handler
12847
 * @param user_data  The user data returned on SAX callbacks
12848
 * @param buffer  an in-memory XML document input
12849
 * @param size  the length of the XML document in bytes
12850
 * @returns 0 in case of success or a error number otherwise
12851
 */
12852
int xmlSAXUserParseMemory(xmlSAXHandler *sax, void *user_data,
12853
0
        const char *buffer, int size) {
12854
0
    int ret = 0;
12855
0
    xmlParserCtxtPtr ctxt;
12856
12857
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12858
0
    if (ctxt == NULL) return -1;
12859
0
    if (sax != NULL) {
12860
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12861
0
            *ctxt->sax = *sax;
12862
0
        } else {
12863
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12864
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12865
0
        }
12866
0
  ctxt->userData = user_data;
12867
0
    }
12868
12869
0
    xmlParseDocument(ctxt);
12870
12871
0
    if (ctxt->wellFormed)
12872
0
  ret = 0;
12873
0
    else {
12874
0
        if (ctxt->errNo != 0)
12875
0
      ret = ctxt->errNo;
12876
0
  else
12877
0
      ret = -1;
12878
0
    }
12879
0
    if (ctxt->myDoc != NULL) {
12880
0
        xmlFreeDoc(ctxt->myDoc);
12881
0
  ctxt->myDoc = NULL;
12882
0
    }
12883
0
    xmlFreeParserCtxt(ctxt);
12884
12885
0
    return ret;
12886
0
}
12887
#endif /* LIBXML_SAX1_ENABLED */
12888
12889
/**
12890
 * Creates a parser context for an XML in-memory document.
12891
 *
12892
 * @param str  a pointer to an array of xmlChar
12893
 * @returns the new parser context or NULL
12894
 */
12895
xmlParserCtxt *
12896
0
xmlCreateDocParserCtxt(const xmlChar *str) {
12897
0
    xmlParserCtxtPtr ctxt;
12898
0
    xmlParserInputPtr input;
12899
12900
0
    ctxt = xmlNewParserCtxt();
12901
0
    if (ctxt == NULL)
12902
0
  return(NULL);
12903
12904
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
12905
0
    if (input == NULL) {
12906
0
  xmlFreeParserCtxt(ctxt);
12907
0
  return(NULL);
12908
0
    }
12909
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12910
0
        xmlFreeInputStream(input);
12911
0
        xmlFreeParserCtxt(ctxt);
12912
0
        return(NULL);
12913
0
    }
12914
12915
0
    return(ctxt);
12916
0
}
12917
12918
#ifdef LIBXML_SAX1_ENABLED
12919
/**
12920
 * Parse an XML in-memory document and build a tree.
12921
 * It use the given SAX function block to handle the parsing callback.
12922
 * If sax is NULL, fallback to the default DOM tree building routines.
12923
 *
12924
 * This function uses deprecated global variables to set parser options
12925
 * which default to XML_PARSE_NODICT.
12926
 *
12927
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadDoc.
12928
 *
12929
 * @param sax  the SAX handler block
12930
 * @param cur  a pointer to an array of xmlChar
12931
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12932
 *             documents
12933
 * @returns the resulting document tree
12934
 */
12935
12936
xmlDoc *
12937
0
xmlSAXParseDoc(xmlSAXHandler *sax, const xmlChar *cur, int recovery) {
12938
0
    xmlDocPtr ret;
12939
0
    xmlParserCtxtPtr ctxt;
12940
0
    xmlSAXHandlerPtr oldsax = NULL;
12941
12942
0
    if (cur == NULL) return(NULL);
12943
12944
12945
0
    ctxt = xmlCreateDocParserCtxt(cur);
12946
0
    if (ctxt == NULL) return(NULL);
12947
0
    if (sax != NULL) {
12948
0
        oldsax = ctxt->sax;
12949
0
        ctxt->sax = sax;
12950
0
        ctxt->userData = NULL;
12951
0
    }
12952
12953
0
    xmlParseDocument(ctxt);
12954
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12955
0
    else {
12956
0
       ret = NULL;
12957
0
       xmlFreeDoc(ctxt->myDoc);
12958
0
       ctxt->myDoc = NULL;
12959
0
    }
12960
0
    if (sax != NULL)
12961
0
  ctxt->sax = oldsax;
12962
0
    xmlFreeParserCtxt(ctxt);
12963
12964
0
    return(ret);
12965
0
}
12966
12967
/**
12968
 * Parse an XML in-memory document and build a tree.
12969
 *
12970
 * This function uses deprecated global variables to set parser options
12971
 * which default to XML_PARSE_NODICT.
12972
 *
12973
 * @deprecated Use #xmlReadDoc.
12974
 *
12975
 * @param cur  a pointer to an array of xmlChar
12976
 * @returns the resulting document tree
12977
 */
12978
12979
xmlDoc *
12980
0
xmlParseDoc(const xmlChar *cur) {
12981
0
    return(xmlSAXParseDoc(NULL, cur, 0));
12982
0
}
12983
#endif /* LIBXML_SAX1_ENABLED */
12984
12985
/************************************************************************
12986
 *                  *
12987
 *  New set (2.6.0) of simpler and more flexible APIs   *
12988
 *                  *
12989
 ************************************************************************/
12990
12991
/**
12992
 * Reset a parser context
12993
 *
12994
 * @param ctxt  an XML parser context
12995
 */
12996
void
12997
xmlCtxtReset(xmlParserCtxt *ctxt)
12998
69.6k
{
12999
69.6k
    xmlParserInputPtr input;
13000
13001
69.6k
    if (ctxt == NULL)
13002
0
        return;
13003
13004
69.6k
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
13005
0
        xmlFreeInputStream(input);
13006
0
    }
13007
69.6k
    ctxt->inputNr = 0;
13008
69.6k
    ctxt->input = NULL;
13009
13010
69.6k
    ctxt->spaceNr = 0;
13011
69.6k
    if (ctxt->spaceTab != NULL) {
13012
69.6k
  ctxt->spaceTab[0] = -1;
13013
69.6k
  ctxt->space = &ctxt->spaceTab[0];
13014
69.6k
    } else {
13015
0
        ctxt->space = NULL;
13016
0
    }
13017
13018
13019
69.6k
    ctxt->nodeNr = 0;
13020
69.6k
    ctxt->node = NULL;
13021
13022
69.6k
    ctxt->nameNr = 0;
13023
69.6k
    ctxt->name = NULL;
13024
13025
69.6k
    ctxt->nsNr = 0;
13026
69.6k
    xmlParserNsReset(ctxt->nsdb);
13027
13028
69.6k
    if (ctxt->version != NULL) {
13029
14.6k
        xmlFree(ctxt->version);
13030
14.6k
        ctxt->version = NULL;
13031
14.6k
    }
13032
69.6k
    if (ctxt->encoding != NULL) {
13033
812
        xmlFree(ctxt->encoding);
13034
812
        ctxt->encoding = NULL;
13035
812
    }
13036
69.6k
    if (ctxt->extSubURI != NULL) {
13037
2.28k
        xmlFree(ctxt->extSubURI);
13038
2.28k
        ctxt->extSubURI = NULL;
13039
2.28k
    }
13040
69.6k
    if (ctxt->extSubSystem != NULL) {
13041
254
        xmlFree(ctxt->extSubSystem);
13042
254
        ctxt->extSubSystem = NULL;
13043
254
    }
13044
69.6k
    if (ctxt->directory != NULL) {
13045
15.2k
        xmlFree(ctxt->directory);
13046
15.2k
        ctxt->directory = NULL;
13047
15.2k
    }
13048
13049
69.6k
    if (ctxt->myDoc != NULL)
13050
0
        xmlFreeDoc(ctxt->myDoc);
13051
69.6k
    ctxt->myDoc = NULL;
13052
13053
69.6k
    ctxt->standalone = -1;
13054
69.6k
    ctxt->hasExternalSubset = 0;
13055
69.6k
    ctxt->hasPErefs = 0;
13056
69.6k
    ctxt->html = ctxt->html ? 1 : 0;
13057
69.6k
    ctxt->instate = XML_PARSER_START;
13058
13059
69.6k
    ctxt->wellFormed = 1;
13060
69.6k
    ctxt->nsWellFormed = 1;
13061
69.6k
    ctxt->disableSAX = 0;
13062
69.6k
    ctxt->valid = 1;
13063
69.6k
    ctxt->record_info = 0;
13064
69.6k
    ctxt->checkIndex = 0;
13065
69.6k
    ctxt->endCheckState = 0;
13066
69.6k
    ctxt->inSubset = 0;
13067
69.6k
    ctxt->errNo = XML_ERR_OK;
13068
69.6k
    ctxt->depth = 0;
13069
69.6k
    ctxt->catalogs = NULL;
13070
69.6k
    ctxt->sizeentities = 0;
13071
69.6k
    ctxt->sizeentcopy = 0;
13072
69.6k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13073
13074
69.6k
    if (ctxt->attsDefault != NULL) {
13075
1.37k
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13076
1.37k
        ctxt->attsDefault = NULL;
13077
1.37k
    }
13078
69.6k
    if (ctxt->attsSpecial != NULL) {
13079
1.83k
        xmlHashFree(ctxt->attsSpecial, NULL);
13080
1.83k
        ctxt->attsSpecial = NULL;
13081
1.83k
    }
13082
13083
69.6k
#ifdef LIBXML_CATALOG_ENABLED
13084
69.6k
    if (ctxt->catalogs != NULL)
13085
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13086
69.6k
#endif
13087
69.6k
    ctxt->nbErrors = 0;
13088
69.6k
    ctxt->nbWarnings = 0;
13089
69.6k
    if (ctxt->lastError.code != XML_ERR_OK)
13090
15.0k
        xmlResetError(&ctxt->lastError);
13091
69.6k
}
13092
13093
/**
13094
 * Reset a push parser context
13095
 *
13096
 * @param ctxt  an XML parser context
13097
 * @param chunk  a pointer to an array of chars
13098
 * @param size  number of chars in the array
13099
 * @param filename  an optional file name or URI
13100
 * @param encoding  the document encoding, or NULL
13101
 * @returns 0 in case of success and 1 in case of error
13102
 */
13103
int
13104
xmlCtxtResetPush(xmlParserCtxt *ctxt, const char *chunk,
13105
                 int size, const char *filename, const char *encoding)
13106
0
{
13107
0
    xmlParserInputPtr input;
13108
13109
0
    if (ctxt == NULL)
13110
0
        return(1);
13111
13112
0
    xmlCtxtReset(ctxt);
13113
13114
0
    input = xmlNewPushInput(filename, chunk, size);
13115
0
    if (input == NULL)
13116
0
        return(1);
13117
13118
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13119
0
        xmlFreeInputStream(input);
13120
0
        return(1);
13121
0
    }
13122
13123
0
    if (encoding != NULL)
13124
0
        xmlSwitchEncodingName(ctxt, encoding);
13125
13126
0
    return(0);
13127
0
}
13128
13129
static int
13130
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13131
81.6k
{
13132
81.6k
    int allMask;
13133
13134
81.6k
    if (ctxt == NULL)
13135
0
        return(-1);
13136
13137
    /*
13138
     * XInclude options aren't handled by the parser.
13139
     *
13140
     * XML_PARSE_XINCLUDE
13141
     * XML_PARSE_NOXINCNODE
13142
     * XML_PARSE_NOBASEFIX
13143
     */
13144
81.6k
    allMask = XML_PARSE_RECOVER |
13145
81.6k
              XML_PARSE_NOENT |
13146
81.6k
              XML_PARSE_DTDLOAD |
13147
81.6k
              XML_PARSE_DTDATTR |
13148
81.6k
              XML_PARSE_DTDVALID |
13149
81.6k
              XML_PARSE_NOERROR |
13150
81.6k
              XML_PARSE_NOWARNING |
13151
81.6k
              XML_PARSE_PEDANTIC |
13152
81.6k
              XML_PARSE_NOBLANKS |
13153
81.6k
#ifdef LIBXML_SAX1_ENABLED
13154
81.6k
              XML_PARSE_SAX1 |
13155
81.6k
#endif
13156
81.6k
              XML_PARSE_NONET |
13157
81.6k
              XML_PARSE_NODICT |
13158
81.6k
              XML_PARSE_NSCLEAN |
13159
81.6k
              XML_PARSE_NOCDATA |
13160
81.6k
              XML_PARSE_COMPACT |
13161
81.6k
              XML_PARSE_OLD10 |
13162
81.6k
              XML_PARSE_HUGE |
13163
81.6k
              XML_PARSE_OLDSAX |
13164
81.6k
              XML_PARSE_IGNORE_ENC |
13165
81.6k
              XML_PARSE_BIG_LINES |
13166
81.6k
              XML_PARSE_NO_XXE |
13167
81.6k
              XML_PARSE_UNZIP |
13168
81.6k
              XML_PARSE_NO_SYS_CATALOG |
13169
81.6k
              XML_PARSE_CATALOG_PI;
13170
13171
81.6k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13172
13173
    /*
13174
     * For some options, struct members are historically the source
13175
     * of truth. The values are initalized from global variables and
13176
     * old code could also modify them directly. Several older API
13177
     * functions that don't take an options argument rely on these
13178
     * deprecated mechanisms.
13179
     *
13180
     * Once public access to struct members and the globals are
13181
     * disabled, we can use the options bitmask as source of
13182
     * truth, making all these struct members obsolete.
13183
     *
13184
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13185
     * loading of the external subset.
13186
     */
13187
81.6k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13188
81.6k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13189
81.6k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13190
81.6k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13191
81.6k
    ctxt->loadsubset |= (options & XML_PARSE_SKIP_IDS) ? XML_SKIP_IDS : 0;
13192
81.6k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13193
81.6k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13194
81.6k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13195
81.6k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13196
13197
81.6k
    return(options & ~allMask);
13198
81.6k
}
13199
13200
/**
13201
 * Applies the options to the parser context. Unset options are
13202
 * cleared.
13203
 *
13204
 * @since 2.13.0
13205
 *
13206
 * With older versions, you can use #xmlCtxtUseOptions.
13207
 *
13208
 * @param ctxt  an XML parser context
13209
 * @param options  a bitmask of xmlParserOption values
13210
 * @returns 0 in case of success, the set of unknown or unimplemented options
13211
 *         in case of error.
13212
 */
13213
int
13214
xmlCtxtSetOptions(xmlParserCtxt *ctxt, int options)
13215
0
{
13216
0
#ifdef LIBXML_HTML_ENABLED
13217
0
    if ((ctxt != NULL) && (ctxt->html))
13218
0
        return(htmlCtxtSetOptions(ctxt, options));
13219
0
#endif
13220
13221
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13222
0
}
13223
13224
/**
13225
 * Get the current options of the parser context.
13226
 *
13227
 * @since 2.14.0
13228
 *
13229
 * @param ctxt  an XML parser context
13230
 * @returns the current options set in the parser context, or -1 if ctxt is NULL.
13231
 */
13232
int
13233
xmlCtxtGetOptions(xmlParserCtxt *ctxt)
13234
0
{
13235
0
    if (ctxt == NULL)
13236
0
        return(-1);
13237
13238
0
    return(ctxt->options);
13239
0
}
13240
13241
/**
13242
 * Applies the options to the parser context. The following options
13243
 * are never cleared and can only be enabled:
13244
 *
13245
 * - XML_PARSE_NOERROR
13246
 * - XML_PARSE_NOWARNING
13247
 * - XML_PARSE_NONET
13248
 * - XML_PARSE_NSCLEAN
13249
 * - XML_PARSE_NOCDATA
13250
 * - XML_PARSE_COMPACT
13251
 * - XML_PARSE_OLD10
13252
 * - XML_PARSE_HUGE
13253
 * - XML_PARSE_OLDSAX
13254
 * - XML_PARSE_IGNORE_ENC
13255
 * - XML_PARSE_BIG_LINES
13256
 *
13257
 * @deprecated Use #xmlCtxtSetOptions.
13258
 *
13259
 * @param ctxt  an XML parser context
13260
 * @param options  a combination of xmlParserOption
13261
 * @returns 0 in case of success, the set of unknown or unimplemented options
13262
 *         in case of error.
13263
 */
13264
int
13265
xmlCtxtUseOptions(xmlParserCtxt *ctxt, int options)
13266
81.6k
{
13267
81.6k
    int keepMask;
13268
13269
81.6k
#ifdef LIBXML_HTML_ENABLED
13270
81.6k
    if ((ctxt != NULL) && (ctxt->html))
13271
0
        return(htmlCtxtUseOptions(ctxt, options));
13272
81.6k
#endif
13273
13274
    /*
13275
     * For historic reasons, some options can only be enabled.
13276
     */
13277
81.6k
    keepMask = XML_PARSE_NOERROR |
13278
81.6k
               XML_PARSE_NOWARNING |
13279
81.6k
               XML_PARSE_NONET |
13280
81.6k
               XML_PARSE_NSCLEAN |
13281
81.6k
               XML_PARSE_NOCDATA |
13282
81.6k
               XML_PARSE_COMPACT |
13283
81.6k
               XML_PARSE_OLD10 |
13284
81.6k
               XML_PARSE_HUGE |
13285
81.6k
               XML_PARSE_OLDSAX |
13286
81.6k
               XML_PARSE_IGNORE_ENC |
13287
81.6k
               XML_PARSE_BIG_LINES;
13288
13289
81.6k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13290
81.6k
}
13291
13292
/**
13293
 * To protect against exponential entity expansion ("billion laughs"), the
13294
 * size of serialized output is (roughly) limited to the input size
13295
 * multiplied by this factor. The default value is 5.
13296
 *
13297
 * When working with documents making heavy use of entity expansion, it can
13298
 * be necessary to increase the value. For security reasons, this should only
13299
 * be considered when processing trusted input.
13300
 *
13301
 * @param ctxt  an XML parser context
13302
 * @param maxAmpl  maximum amplification factor
13303
 */
13304
void
13305
xmlCtxtSetMaxAmplification(xmlParserCtxt *ctxt, unsigned maxAmpl)
13306
0
{
13307
0
    if (ctxt == NULL)
13308
0
        return;
13309
0
    ctxt->maxAmpl = maxAmpl;
13310
0
}
13311
13312
/**
13313
 * Parse an XML document and return the resulting document tree.
13314
 * Takes ownership of the input object.
13315
 *
13316
 * @since 2.13.0
13317
 *
13318
 * @param ctxt  an XML parser context
13319
 * @param input  parser input
13320
 * @returns the resulting document tree or NULL
13321
 */
13322
xmlDoc *
13323
xmlCtxtParseDocument(xmlParserCtxt *ctxt, xmlParserInput *input)
13324
54.4k
{
13325
54.4k
    xmlDocPtr ret = NULL;
13326
13327
54.4k
    if ((ctxt == NULL) || (input == NULL)) {
13328
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13329
0
        xmlFreeInputStream(input);
13330
0
        return(NULL);
13331
0
    }
13332
13333
    /* assert(ctxt->inputNr == 0); */
13334
54.4k
    while (ctxt->inputNr > 0)
13335
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13336
13337
54.4k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13338
10
        xmlFreeInputStream(input);
13339
10
        return(NULL);
13340
10
    }
13341
13342
54.3k
    xmlParseDocument(ctxt);
13343
13344
54.3k
    ret = xmlCtxtGetDocument(ctxt);
13345
13346
    /* assert(ctxt->inputNr == 1); */
13347
109k
    while (ctxt->inputNr > 0)
13348
55.2k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13349
13350
54.3k
    return(ret);
13351
54.4k
}
13352
13353
/**
13354
 * Convenience function to parse an XML document from a
13355
 * zero-terminated string.
13356
 *
13357
 * See #xmlCtxtReadDoc for details.
13358
 *
13359
 * @param cur  a pointer to a zero terminated string
13360
 * @param URL  base URL (optional)
13361
 * @param encoding  the document encoding (optional)
13362
 * @param options  a combination of xmlParserOption
13363
 * @returns the resulting document tree
13364
 */
13365
xmlDoc *
13366
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13367
           int options)
13368
0
{
13369
0
    xmlParserCtxtPtr ctxt;
13370
0
    xmlParserInputPtr input;
13371
0
    xmlDocPtr doc = NULL;
13372
13373
0
    ctxt = xmlNewParserCtxt();
13374
0
    if (ctxt == NULL)
13375
0
        return(NULL);
13376
13377
0
    xmlCtxtUseOptions(ctxt, options);
13378
13379
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13380
0
                                      XML_INPUT_BUF_STATIC);
13381
13382
0
    if (input != NULL)
13383
0
        doc = xmlCtxtParseDocument(ctxt, input);
13384
13385
0
    xmlFreeParserCtxt(ctxt);
13386
0
    return(doc);
13387
0
}
13388
13389
/**
13390
 * Convenience function to parse an XML file from the filesystem
13391
 * or a global, user-defined resource loader.
13392
 *
13393
 * If a "-" filename is passed, the function will read from stdin.
13394
 * This feature is potentially insecure and might be removed from
13395
 * later versions.
13396
 *
13397
 * See #xmlCtxtReadFile for details.
13398
 *
13399
 * @param filename  a file or URL
13400
 * @param encoding  the document encoding (optional)
13401
 * @param options  a combination of xmlParserOption
13402
 * @returns the resulting document tree
13403
 */
13404
xmlDoc *
13405
xmlReadFile(const char *filename, const char *encoding, int options)
13406
0
{
13407
0
    xmlParserCtxtPtr ctxt;
13408
0
    xmlParserInputPtr input;
13409
0
    xmlDocPtr doc = NULL;
13410
13411
0
    ctxt = xmlNewParserCtxt();
13412
0
    if (ctxt == NULL)
13413
0
        return(NULL);
13414
13415
0
    xmlCtxtUseOptions(ctxt, options);
13416
13417
    /*
13418
     * Backward compatibility for users of command line utilities like
13419
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13420
     * should be removed at some point.
13421
     */
13422
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13423
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13424
0
                                      encoding, 0);
13425
0
    else
13426
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13427
13428
0
    if (input != NULL)
13429
0
        doc = xmlCtxtParseDocument(ctxt, input);
13430
13431
0
    xmlFreeParserCtxt(ctxt);
13432
0
    return(doc);
13433
0
}
13434
13435
/**
13436
 * Parse an XML in-memory document and build a tree. The input buffer must
13437
 * not contain a terminating null byte.
13438
 *
13439
 * See #xmlCtxtReadMemory for details.
13440
 *
13441
 * @param buffer  a pointer to a char array
13442
 * @param size  the size of the array
13443
 * @param url  base URL (optional)
13444
 * @param encoding  the document encoding (optional)
13445
 * @param options  a combination of xmlParserOption
13446
 * @returns the resulting document tree
13447
 */
13448
xmlDoc *
13449
xmlReadMemory(const char *buffer, int size, const char *url,
13450
              const char *encoding, int options)
13451
0
{
13452
0
    xmlParserCtxtPtr ctxt;
13453
0
    xmlParserInputPtr input;
13454
0
    xmlDocPtr doc = NULL;
13455
13456
0
    if (size < 0)
13457
0
  return(NULL);
13458
13459
0
    ctxt = xmlNewParserCtxt();
13460
0
    if (ctxt == NULL)
13461
0
        return(NULL);
13462
13463
0
    xmlCtxtUseOptions(ctxt, options);
13464
13465
0
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13466
0
                                      XML_INPUT_BUF_STATIC);
13467
13468
0
    if (input != NULL)
13469
0
        doc = xmlCtxtParseDocument(ctxt, input);
13470
13471
0
    xmlFreeParserCtxt(ctxt);
13472
0
    return(doc);
13473
0
}
13474
13475
/**
13476
 * Parse an XML from a file descriptor and build a tree.
13477
 *
13478
 * See #xmlCtxtReadFd for details.
13479
 *
13480
 * NOTE that the file descriptor will not be closed when the
13481
 * context is freed or reset.
13482
 *
13483
 * @param fd  an open file descriptor
13484
 * @param URL  base URL (optional)
13485
 * @param encoding  the document encoding (optional)
13486
 * @param options  a combination of xmlParserOption
13487
 * @returns the resulting document tree
13488
 */
13489
xmlDoc *
13490
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13491
0
{
13492
0
    xmlParserCtxtPtr ctxt;
13493
0
    xmlParserInputPtr input;
13494
0
    xmlDocPtr doc = NULL;
13495
13496
0
    ctxt = xmlNewParserCtxt();
13497
0
    if (ctxt == NULL)
13498
0
        return(NULL);
13499
13500
0
    xmlCtxtUseOptions(ctxt, options);
13501
13502
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13503
13504
0
    if (input != NULL)
13505
0
        doc = xmlCtxtParseDocument(ctxt, input);
13506
13507
0
    xmlFreeParserCtxt(ctxt);
13508
0
    return(doc);
13509
0
}
13510
13511
/**
13512
 * Parse an XML document from I/O functions and context and build a tree.
13513
 *
13514
 * See #xmlCtxtReadIO for details.
13515
 *
13516
 * @param ioread  an I/O read function
13517
 * @param ioclose  an I/O close function (optional)
13518
 * @param ioctx  an I/O handler
13519
 * @param URL  base URL (optional)
13520
 * @param encoding  the document encoding (optional)
13521
 * @param options  a combination of xmlParserOption
13522
 * @returns the resulting document tree
13523
 */
13524
xmlDoc *
13525
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13526
          void *ioctx, const char *URL, const char *encoding, int options)
13527
0
{
13528
0
    xmlParserCtxtPtr ctxt;
13529
0
    xmlParserInputPtr input;
13530
0
    xmlDocPtr doc = NULL;
13531
13532
0
    ctxt = xmlNewParserCtxt();
13533
0
    if (ctxt == NULL)
13534
0
        return(NULL);
13535
13536
0
    xmlCtxtUseOptions(ctxt, options);
13537
13538
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13539
0
                                  encoding, 0);
13540
13541
0
    if (input != NULL)
13542
0
        doc = xmlCtxtParseDocument(ctxt, input);
13543
13544
0
    xmlFreeParserCtxt(ctxt);
13545
0
    return(doc);
13546
0
}
13547
13548
/**
13549
 * Parse an XML in-memory document and build a tree.
13550
 *
13551
 * `URL` is used as base to resolve external entities and for error
13552
 * reporting.
13553
 *
13554
 * @param ctxt  an XML parser context
13555
 * @param str  a pointer to a zero terminated string
13556
 * @param URL  base URL (optional)
13557
 * @param encoding  the document encoding (optional)
13558
 * @param options  a combination of xmlParserOption
13559
 * @returns the resulting document tree
13560
 */
13561
xmlDoc *
13562
xmlCtxtReadDoc(xmlParserCtxt *ctxt, const xmlChar *str,
13563
               const char *URL, const char *encoding, int options)
13564
0
{
13565
0
    xmlParserInputPtr input;
13566
13567
0
    if (ctxt == NULL)
13568
0
        return(NULL);
13569
13570
0
    xmlCtxtReset(ctxt);
13571
0
    xmlCtxtUseOptions(ctxt, options);
13572
13573
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13574
0
                                      XML_INPUT_BUF_STATIC);
13575
0
    if (input == NULL)
13576
0
        return(NULL);
13577
13578
0
    return(xmlCtxtParseDocument(ctxt, input));
13579
0
}
13580
13581
/**
13582
 * Parse an XML file from the filesystem or a global, user-defined
13583
 * resource loader.
13584
 *
13585
 * @param ctxt  an XML parser context
13586
 * @param filename  a file or URL
13587
 * @param encoding  the document encoding (optional)
13588
 * @param options  a combination of xmlParserOption
13589
 * @returns the resulting document tree
13590
 */
13591
xmlDoc *
13592
xmlCtxtReadFile(xmlParserCtxt *ctxt, const char *filename,
13593
                const char *encoding, int options)
13594
0
{
13595
0
    xmlParserInputPtr input;
13596
13597
0
    if (ctxt == NULL)
13598
0
        return(NULL);
13599
13600
0
    xmlCtxtReset(ctxt);
13601
0
    xmlCtxtUseOptions(ctxt, options);
13602
13603
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13604
0
    if (input == NULL)
13605
0
        return(NULL);
13606
13607
0
    return(xmlCtxtParseDocument(ctxt, input));
13608
0
}
13609
13610
/**
13611
 * Parse an XML in-memory document and build a tree. The input buffer must
13612
 * not contain a terminating null byte.
13613
 *
13614
 * `URL` is used as base to resolve external entities and for error
13615
 * reporting.
13616
 *
13617
 * @param ctxt  an XML parser context
13618
 * @param buffer  a pointer to a char array
13619
 * @param size  the size of the array
13620
 * @param URL  base URL (optional)
13621
 * @param encoding  the document encoding (optional)
13622
 * @param options  a combination of xmlParserOption
13623
 * @returns the resulting document tree
13624
 */
13625
xmlDoc *
13626
xmlCtxtReadMemory(xmlParserCtxt *ctxt, const char *buffer, int size,
13627
                  const char *URL, const char *encoding, int options)
13628
54.4k
{
13629
54.4k
    xmlParserInputPtr input;
13630
13631
54.4k
    if ((ctxt == NULL) || (size < 0))
13632
0
        return(NULL);
13633
13634
54.4k
    xmlCtxtReset(ctxt);
13635
54.4k
    xmlCtxtUseOptions(ctxt, options);
13636
13637
54.4k
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
13638
54.4k
                                      XML_INPUT_BUF_STATIC);
13639
54.4k
    if (input == NULL)
13640
48
        return(NULL);
13641
13642
54.4k
    return(xmlCtxtParseDocument(ctxt, input));
13643
54.4k
}
13644
13645
/**
13646
 * Parse an XML document from a file descriptor and build a tree.
13647
 *
13648
 * NOTE that the file descriptor will not be closed when the
13649
 * context is freed or reset.
13650
 *
13651
 * `URL` is used as base to resolve external entities and for error
13652
 * reporting.
13653
 *
13654
 * @param ctxt  an XML parser context
13655
 * @param fd  an open file descriptor
13656
 * @param URL  base URL (optional)
13657
 * @param encoding  the document encoding (optional)
13658
 * @param options  a combination of xmlParserOption
13659
 * @returns the resulting document tree
13660
 */
13661
xmlDoc *
13662
xmlCtxtReadFd(xmlParserCtxt *ctxt, int fd,
13663
              const char *URL, const char *encoding, int options)
13664
0
{
13665
0
    xmlParserInputPtr input;
13666
13667
0
    if (ctxt == NULL)
13668
0
        return(NULL);
13669
13670
0
    xmlCtxtReset(ctxt);
13671
0
    xmlCtxtUseOptions(ctxt, options);
13672
13673
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13674
0
    if (input == NULL)
13675
0
        return(NULL);
13676
13677
0
    return(xmlCtxtParseDocument(ctxt, input));
13678
0
}
13679
13680
/**
13681
 * Parse an XML document from I/O functions and source and build a tree.
13682
 * This reuses the existing `ctxt` parser context
13683
 *
13684
 * `URL` is used as base to resolve external entities and for error
13685
 * reporting.
13686
 *
13687
 * @param ctxt  an XML parser context
13688
 * @param ioread  an I/O read function
13689
 * @param ioclose  an I/O close function
13690
 * @param ioctx  an I/O handler
13691
 * @param URL  the base URL to use for the document
13692
 * @param encoding  the document encoding, or NULL
13693
 * @param options  a combination of xmlParserOption
13694
 * @returns the resulting document tree
13695
 */
13696
xmlDoc *
13697
xmlCtxtReadIO(xmlParserCtxt *ctxt, xmlInputReadCallback ioread,
13698
              xmlInputCloseCallback ioclose, void *ioctx,
13699
        const char *URL,
13700
              const char *encoding, int options)
13701
0
{
13702
0
    xmlParserInputPtr input;
13703
13704
0
    if (ctxt == NULL)
13705
0
        return(NULL);
13706
13707
0
    xmlCtxtReset(ctxt);
13708
0
    xmlCtxtUseOptions(ctxt, options);
13709
13710
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13711
0
                                  encoding, 0);
13712
0
    if (input == NULL)
13713
0
        return(NULL);
13714
13715
0
    return(xmlCtxtParseDocument(ctxt, input));
13716
0
}
13717