Coverage Report

Created: 2025-08-26 06:42

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * Author: Daniel Veillard
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
#include "private/tree.h"
79
80
404k
#define NS_INDEX_EMPTY  INT_MAX
81
66.7k
#define NS_INDEX_XML    (INT_MAX - 1)
82
194k
#define URI_HASH_EMPTY  0xD943A04E
83
24.6k
#define URI_HASH_XML    0xF0451F02
84
85
#ifndef STDIN_FILENO
86
0
  #define STDIN_FILENO 0
87
#endif
88
89
#ifndef SIZE_MAX
90
  #define SIZE_MAX ((size_t) -1)
91
#endif
92
93
200k
#define XML_MAX_ATTRS 100000000 /* 100 million */
94
95
374k
#define XML_SPECIAL_EXTERNAL    (1 << 20)
96
331k
#define XML_SPECIAL_TYPE_MASK   (XML_SPECIAL_EXTERNAL - 1)
97
98
374k
#define XML_ATTVAL_ALLOC        (1 << 0)
99
977k
#define XML_ATTVAL_NORM_CHANGE  (1 << 1)
100
101
struct _xmlStartTag {
102
    const xmlChar *prefix;
103
    const xmlChar *URI;
104
    int line;
105
    int nsNr;
106
};
107
108
typedef struct {
109
    void *saxData;
110
    unsigned prefixHashValue;
111
    unsigned uriHashValue;
112
    unsigned elementId;
113
    int oldIndex;
114
} xmlParserNsExtra;
115
116
typedef struct {
117
    unsigned hashValue;
118
    int index;
119
} xmlParserNsBucket;
120
121
struct _xmlParserNsData {
122
    xmlParserNsExtra *extra;
123
124
    unsigned hashSize;
125
    unsigned hashElems;
126
    xmlParserNsBucket *hash;
127
128
    unsigned elementId;
129
    int defaultNsIndex;
130
    int minNsIndex;
131
};
132
133
static int
134
xmlParseElementStart(xmlParserCtxtPtr ctxt);
135
136
static void
137
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
138
139
static xmlEntityPtr
140
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
141
142
static const xmlChar *
143
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
144
145
/************************************************************************
146
 *                  *
147
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
148
 *                  *
149
 ************************************************************************/
150
151
#define XML_PARSER_BIG_ENTITY 1000
152
#define XML_PARSER_LOT_ENTITY 5000
153
154
/*
155
 * Constants for protection against abusive entity expansion
156
 * ("billion laughs").
157
 */
158
159
/*
160
 * A certain amount of entity expansion which is always allowed.
161
 */
162
1.91M
#define XML_PARSER_ALLOWED_EXPANSION 1000000
163
164
/*
165
 * Fixed cost for each entity reference. This crudely models processing time
166
 * as well to protect, for example, against exponential expansion of empty
167
 * or very short entities.
168
 */
169
1.92M
#define XML_ENT_FIXED_COST 20
170
171
68.6M
#define XML_PARSER_BIG_BUFFER_SIZE 300
172
515k
#define XML_PARSER_BUFFER_SIZE 100
173
74.9k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
174
175
/**
176
 * XML_PARSER_CHUNK_SIZE
177
 *
178
 * When calling GROW that's the minimal amount of data
179
 * the parser expected to have received. It is not a hard
180
 * limit but an optimization when reading strings like Names
181
 * It is not strictly needed as long as inputs available characters
182
 * are followed by 0, which should be provided by the I/O level
183
 */
184
#define XML_PARSER_CHUNK_SIZE 100
185
186
/**
187
 * Constant string describing the version of the library used at
188
 * run-time.
189
 */
190
const char *const
191
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
192
193
/*
194
 * List of XML prefixed PI allowed by W3C specs
195
 */
196
197
static const char* const xmlW3CPIs[] = {
198
    "xml-stylesheet",
199
    "xml-model",
200
    NULL
201
};
202
203
204
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206
                                              const xmlChar **str);
207
208
static void
209
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
210
211
static int
212
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
213
214
static void
215
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl);
216
217
/************************************************************************
218
 *                  *
219
 *    Some factorized error routines        *
220
 *                  *
221
 ************************************************************************/
222
223
static void
224
2.12k
xmlErrMemory(xmlParserCtxtPtr ctxt) {
225
2.12k
    xmlCtxtErrMemory(ctxt);
226
2.12k
}
227
228
/**
229
 * Handle a redefinition of attribute error
230
 *
231
 * @param ctxt  an XML parser context
232
 * @param prefix  the attribute prefix
233
 * @param localname  the attribute localname
234
 */
235
static void
236
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237
                   const xmlChar * localname)
238
17.7k
{
239
17.7k
    if (prefix == NULL)
240
11.8k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
11.8k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
242
11.8k
                   "Attribute %s redefined\n", localname);
243
5.90k
    else
244
5.90k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245
5.90k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
246
5.90k
                   "Attribute %s:%s redefined\n", prefix, localname);
247
17.7k
}
248
249
/**
250
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
251
 *
252
 * @param ctxt  an XML parser context
253
 * @param error  the error number
254
 * @param msg  the error message
255
 */
256
static void LIBXML_ATTR_FORMAT(3,0)
257
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
258
               const char *msg)
259
45.4M
{
260
45.4M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
261
45.4M
               NULL, NULL, NULL, 0, "%s", msg);
262
45.4M
}
263
264
/**
265
 * Handle a warning.
266
 *
267
 * @param ctxt  an XML parser context
268
 * @param error  the error number
269
 * @param msg  the error message
270
 * @param str1  extra data
271
 * @param str2  extra data
272
 */
273
void LIBXML_ATTR_FORMAT(3,0)
274
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
275
              const char *msg, const xmlChar *str1, const xmlChar *str2)
276
16.2k
{
277
16.2k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
278
16.2k
               str1, str2, NULL, 0, msg, str1, str2);
279
16.2k
}
280
281
#ifdef LIBXML_VALID_ENABLED
282
/**
283
 * Handle a validity error.
284
 *
285
 * @param ctxt  an XML parser context
286
 * @param error  the error number
287
 * @param msg  the error message
288
 * @param str1  extra data
289
 * @param str2  extra data
290
 */
291
static void LIBXML_ATTR_FORMAT(3,0)
292
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
293
              const char *msg, const xmlChar *str1, const xmlChar *str2)
294
0
{
295
0
    ctxt->valid = 0;
296
297
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
298
0
               str1, str2, NULL, 0, msg, str1, str2);
299
0
}
300
#endif
301
302
/**
303
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
304
 *
305
 * @param ctxt  an XML parser context
306
 * @param error  the error number
307
 * @param msg  the error message
308
 * @param val  an integer value
309
 */
310
static void LIBXML_ATTR_FORMAT(3,0)
311
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
312
                  const char *msg, int val)
313
3.03M
{
314
3.03M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
315
3.03M
               NULL, NULL, NULL, val, msg, val);
316
3.03M
}
317
318
/**
319
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
320
 *
321
 * @param ctxt  an XML parser context
322
 * @param error  the error number
323
 * @param msg  the error message
324
 * @param str1  an string info
325
 * @param val  an integer value
326
 * @param str2  an string info
327
 */
328
static void LIBXML_ATTR_FORMAT(3,0)
329
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
330
                  const char *msg, const xmlChar *str1, int val,
331
      const xmlChar *str2)
332
228k
{
333
228k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
334
228k
               str1, str2, NULL, val, msg, str1, val, str2);
335
228k
}
336
337
/**
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 *
340
 * @param ctxt  an XML parser context
341
 * @param error  the error number
342
 * @param msg  the error message
343
 * @param val  a string value
344
 */
345
static void LIBXML_ATTR_FORMAT(3,0)
346
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
347
                  const char *msg, const xmlChar * val)
348
2.32M
{
349
2.32M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
350
2.32M
               val, NULL, NULL, 0, msg, val);
351
2.32M
}
352
353
/**
354
 * Handle a non fatal parser error
355
 *
356
 * @param ctxt  an XML parser context
357
 * @param error  the error number
358
 * @param msg  the error message
359
 * @param val  a string value
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
73.9k
{
365
73.9k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
73.9k
               val, NULL, NULL, 0, msg, val);
367
73.9k
}
368
369
/**
370
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
371
 *
372
 * @param ctxt  an XML parser context
373
 * @param error  the error number
374
 * @param msg  the message
375
 * @param info1  extra information string
376
 * @param info2  extra information string
377
 * @param info3  extra information string
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
173k
{
385
173k
    ctxt->nsWellFormed = 0;
386
387
173k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
173k
               info1, info2, info3, 0, msg, info1, info2, info3);
389
173k
}
390
391
/**
392
 * Handle a namespace warning error
393
 *
394
 * @param ctxt  an XML parser context
395
 * @param error  the error number
396
 * @param msg  the message
397
 * @param info1  extra information string
398
 * @param info2  extra information string
399
 * @param info3  extra information string
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
11.1k
{
407
11.1k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
11.1k
               info1, info2, info3, 0, msg, info1, info2, info3);
409
11.1k
}
410
411
/**
412
 * Check for non-linear entity expansion behaviour.
413
 *
414
 * In some cases like xmlExpandEntityInAttValue, this function is called
415
 * for each, possibly nested entity and its unexpanded content length.
416
 *
417
 * In other cases like #xmlParseReference, it's only called for each
418
 * top-level entity with its unexpanded content length plus the sum of
419
 * the unexpanded content lengths (plus fixed cost) of all nested
420
 * entities.
421
 *
422
 * Summing the unexpanded lengths also adds the length of the reference.
423
 * This is by design. Taking the length of the entity name into account
424
 * discourages attacks that try to waste CPU time with abusively long
425
 * entity names. See test/recurse/lol6.xml for example. Each call also
426
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
427
 * short entities.
428
 *
429
 * @param ctxt  parser context
430
 * @param extra  sum of unexpanded entity sizes
431
 * @returns 1 on error, 0 on success.
432
 */
433
static int
434
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
435
2.03M
{
436
2.03M
    unsigned long consumed;
437
2.03M
    unsigned long *expandedSize;
438
2.03M
    xmlParserInputPtr input = ctxt->input;
439
2.03M
    xmlEntityPtr entity = input->entity;
440
441
2.03M
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
442
111k
        return(0);
443
444
    /*
445
     * Compute total consumed bytes so far, including input streams of
446
     * external entities.
447
     */
448
1.91M
    consumed = input->consumed;
449
1.91M
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
450
1.91M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
451
452
1.91M
    if (entity)
453
36.1k
        expandedSize = &entity->expandedSize;
454
1.88M
    else
455
1.88M
        expandedSize = &ctxt->sizeentcopy;
456
457
    /*
458
     * Add extra cost and some fixed cost.
459
     */
460
1.91M
    xmlSaturatedAdd(expandedSize, extra);
461
1.91M
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
462
463
    /*
464
     * It's important to always use saturation arithmetic when tracking
465
     * entity sizes to make the size checks reliable. If "sizeentcopy"
466
     * overflows, we have to abort.
467
     */
468
1.91M
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
469
1.91M
        ((*expandedSize >= ULONG_MAX) ||
470
89.9k
         (*expandedSize / ctxt->maxAmpl > consumed))) {
471
951
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
472
951
                       "Maximum entity amplification factor exceeded, see "
473
951
                       "xmlCtxtSetMaxAmplification.\n");
474
951
        return(1);
475
951
    }
476
477
1.91M
    return(0);
478
1.91M
}
479
480
/************************************************************************
481
 *                  *
482
 *    Library wide options          *
483
 *                  *
484
 ************************************************************************/
485
486
/**
487
 * Examines if the library has been compiled with a given feature.
488
 *
489
 * @param feature  the feature to be examined
490
 * @returns zero (0) if the feature does not exist or an unknown
491
 * feature is requested, non-zero otherwise.
492
 */
493
int
494
xmlHasFeature(xmlFeature feature)
495
0
{
496
0
    switch (feature) {
497
0
  case XML_WITH_THREAD:
498
0
#ifdef LIBXML_THREAD_ENABLED
499
0
      return(1);
500
#else
501
      return(0);
502
#endif
503
0
        case XML_WITH_TREE:
504
0
            return(1);
505
0
        case XML_WITH_OUTPUT:
506
0
#ifdef LIBXML_OUTPUT_ENABLED
507
0
            return(1);
508
#else
509
            return(0);
510
#endif
511
0
        case XML_WITH_PUSH:
512
0
#ifdef LIBXML_PUSH_ENABLED
513
0
            return(1);
514
#else
515
            return(0);
516
#endif
517
0
        case XML_WITH_READER:
518
0
#ifdef LIBXML_READER_ENABLED
519
0
            return(1);
520
#else
521
            return(0);
522
#endif
523
0
        case XML_WITH_PATTERN:
524
0
#ifdef LIBXML_PATTERN_ENABLED
525
0
            return(1);
526
#else
527
            return(0);
528
#endif
529
0
        case XML_WITH_WRITER:
530
0
#ifdef LIBXML_WRITER_ENABLED
531
0
            return(1);
532
#else
533
            return(0);
534
#endif
535
0
        case XML_WITH_SAX1:
536
0
#ifdef LIBXML_SAX1_ENABLED
537
0
            return(1);
538
#else
539
            return(0);
540
#endif
541
0
        case XML_WITH_HTTP:
542
0
            return(0);
543
0
        case XML_WITH_VALID:
544
0
#ifdef LIBXML_VALID_ENABLED
545
0
            return(1);
546
#else
547
            return(0);
548
#endif
549
0
        case XML_WITH_HTML:
550
0
#ifdef LIBXML_HTML_ENABLED
551
0
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_LEGACY:
556
0
            return(0);
557
0
        case XML_WITH_C14N:
558
0
#ifdef LIBXML_C14N_ENABLED
559
0
            return(1);
560
#else
561
            return(0);
562
#endif
563
0
        case XML_WITH_CATALOG:
564
0
#ifdef LIBXML_CATALOG_ENABLED
565
0
            return(1);
566
#else
567
            return(0);
568
#endif
569
0
        case XML_WITH_XPATH:
570
0
#ifdef LIBXML_XPATH_ENABLED
571
0
            return(1);
572
#else
573
            return(0);
574
#endif
575
0
        case XML_WITH_XPTR:
576
0
#ifdef LIBXML_XPTR_ENABLED
577
0
            return(1);
578
#else
579
            return(0);
580
#endif
581
0
        case XML_WITH_XINCLUDE:
582
0
#ifdef LIBXML_XINCLUDE_ENABLED
583
0
            return(1);
584
#else
585
            return(0);
586
#endif
587
0
        case XML_WITH_ICONV:
588
0
#ifdef LIBXML_ICONV_ENABLED
589
0
            return(1);
590
#else
591
            return(0);
592
#endif
593
0
        case XML_WITH_ISO8859X:
594
0
#ifdef LIBXML_ISO8859X_ENABLED
595
0
            return(1);
596
#else
597
            return(0);
598
#endif
599
0
        case XML_WITH_UNICODE:
600
0
            return(0);
601
0
        case XML_WITH_REGEXP:
602
0
#ifdef LIBXML_REGEXP_ENABLED
603
0
            return(1);
604
#else
605
            return(0);
606
#endif
607
0
        case XML_WITH_AUTOMATA:
608
0
#ifdef LIBXML_REGEXP_ENABLED
609
0
            return(1);
610
#else
611
            return(0);
612
#endif
613
0
        case XML_WITH_EXPR:
614
0
            return(0);
615
0
        case XML_WITH_RELAXNG:
616
0
#ifdef LIBXML_RELAXNG_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_SCHEMAS:
622
0
#ifdef LIBXML_SCHEMAS_ENABLED
623
0
            return(1);
624
#else
625
            return(0);
626
#endif
627
0
        case XML_WITH_SCHEMATRON:
628
#ifdef LIBXML_SCHEMATRON_ENABLED
629
            return(1);
630
#else
631
0
            return(0);
632
0
#endif
633
0
        case XML_WITH_MODULES:
634
0
#ifdef LIBXML_MODULES_ENABLED
635
0
            return(1);
636
#else
637
            return(0);
638
#endif
639
0
        case XML_WITH_DEBUG:
640
#ifdef LIBXML_DEBUG_ENABLED
641
            return(1);
642
#else
643
0
            return(0);
644
0
#endif
645
0
        case XML_WITH_DEBUG_MEM:
646
0
            return(0);
647
0
        case XML_WITH_ZLIB:
648
0
#ifdef LIBXML_ZLIB_ENABLED
649
0
            return(1);
650
#else
651
            return(0);
652
#endif
653
0
        case XML_WITH_LZMA:
654
0
#ifdef LIBXML_LZMA_ENABLED
655
0
            return(1);
656
#else
657
            return(0);
658
#endif
659
0
        case XML_WITH_ICU:
660
#ifdef LIBXML_ICU_ENABLED
661
            return(1);
662
#else
663
0
            return(0);
664
0
#endif
665
0
        default:
666
0
      break;
667
0
     }
668
0
     return(0);
669
0
}
670
671
/************************************************************************
672
 *                  *
673
 *      Simple string buffer        *
674
 *                  *
675
 ************************************************************************/
676
677
typedef struct {
678
    xmlChar *mem;
679
    unsigned size;
680
    unsigned cap; /* size < cap */
681
    unsigned max; /* size <= max */
682
    xmlParserErrors code;
683
} xmlSBuf;
684
685
static void
686
377k
xmlSBufInit(xmlSBuf *buf, unsigned max) {
687
377k
    buf->mem = NULL;
688
377k
    buf->size = 0;
689
377k
    buf->cap = 0;
690
377k
    buf->max = max;
691
377k
    buf->code = XML_ERR_OK;
692
377k
}
693
694
static int
695
188k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
696
188k
    xmlChar *mem;
697
188k
    unsigned cap;
698
699
188k
    if (len >= UINT_MAX / 2 - buf->size) {
700
0
        if (buf->code == XML_ERR_OK)
701
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
702
0
        return(-1);
703
0
    }
704
705
188k
    cap = (buf->size + len) * 2;
706
188k
    if (cap < 240)
707
145k
        cap = 240;
708
709
188k
    mem = xmlRealloc(buf->mem, cap);
710
188k
    if (mem == NULL) {
711
341
        buf->code = XML_ERR_NO_MEMORY;
712
341
        return(-1);
713
341
    }
714
715
188k
    buf->mem = mem;
716
188k
    buf->cap = cap;
717
718
188k
    return(0);
719
188k
}
720
721
static void
722
125M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
723
125M
    if (buf->max - buf->size < len) {
724
600k
        if (buf->code == XML_ERR_OK)
725
201
            buf->code = XML_ERR_RESOURCE_LIMIT;
726
600k
        return;
727
600k
    }
728
729
124M
    if (buf->cap - buf->size <= len) {
730
179k
        if (xmlSBufGrow(buf, len) < 0)
731
312
            return;
732
179k
    }
733
734
124M
    if (len > 0)
735
124M
        memcpy(buf->mem + buf->size, str, len);
736
124M
    buf->size += len;
737
124M
}
738
739
static void
740
122M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
741
122M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
742
122M
}
743
744
static void
745
332k
xmlSBufAddChar(xmlSBuf *buf, int c) {
746
332k
    xmlChar *end;
747
748
332k
    if (buf->max - buf->size < 4) {
749
422
        if (buf->code == XML_ERR_OK)
750
6
            buf->code = XML_ERR_RESOURCE_LIMIT;
751
422
        return;
752
422
    }
753
754
332k
    if (buf->cap - buf->size <= 4) {
755
9.53k
        if (xmlSBufGrow(buf, 4) < 0)
756
29
            return;
757
9.53k
    }
758
759
332k
    end = buf->mem + buf->size;
760
761
332k
    if (c < 0x80) {
762
132k
        *end = (xmlChar) c;
763
132k
        buf->size += 1;
764
200k
    } else {
765
200k
        buf->size += xmlCopyCharMultiByte(end, c);
766
200k
    }
767
332k
}
768
769
static void
770
106M
xmlSBufAddReplChar(xmlSBuf *buf) {
771
106M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
772
106M
}
773
774
static void
775
570
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
776
570
    if (buf->code == XML_ERR_NO_MEMORY)
777
363
        xmlCtxtErrMemory(ctxt);
778
207
    else
779
207
        xmlFatalErr(ctxt, buf->code, errMsg);
780
570
}
781
782
static xmlChar *
783
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
784
155k
              const char *errMsg) {
785
155k
    if (buf->mem == NULL) {
786
16.6k
        buf->mem = xmlMalloc(1);
787
16.6k
        if (buf->mem == NULL) {
788
22
            buf->code = XML_ERR_NO_MEMORY;
789
16.6k
        } else {
790
16.6k
            buf->mem[0] = 0;
791
16.6k
        }
792
139k
    } else {
793
139k
        buf->mem[buf->size] = 0;
794
139k
    }
795
796
155k
    if (buf->code == XML_ERR_OK) {
797
155k
        if (sizeOut != NULL)
798
62.2k
            *sizeOut = buf->size;
799
155k
        return(buf->mem);
800
155k
    }
801
802
345
    xmlSBufReportError(buf, ctxt, errMsg);
803
804
345
    xmlFree(buf->mem);
805
806
345
    if (sizeOut != NULL)
807
166
        *sizeOut = 0;
808
345
    return(NULL);
809
155k
}
810
811
static void
812
214k
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
813
214k
    if (buf->code != XML_ERR_OK)
814
225
        xmlSBufReportError(buf, ctxt, errMsg);
815
816
214k
    xmlFree(buf->mem);
817
214k
}
818
819
static int
820
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
821
442M
                    const char *errMsg) {
822
442M
    int c = str[0];
823
442M
    int c1 = str[1];
824
825
442M
    if ((c1 & 0xC0) != 0x80)
826
48.8M
        goto encoding_error;
827
828
393M
    if (c < 0xE0) {
829
        /* 2-byte sequence */
830
18.8M
        if (c < 0xC2)
831
13.2M
            goto encoding_error;
832
833
5.54M
        return(2);
834
374M
    } else {
835
374M
        int c2 = str[2];
836
837
374M
        if ((c2 & 0xC0) != 0x80)
838
13.2k
            goto encoding_error;
839
840
374M
        if (c < 0xF0) {
841
            /* 3-byte sequence */
842
374M
            if (c == 0xE0) {
843
                /* overlong */
844
4.21k
                if (c1 < 0xA0)
845
260
                    goto encoding_error;
846
374M
            } else if (c == 0xED) {
847
                /* surrogate */
848
780
                if (c1 >= 0xA0)
849
274
                    goto encoding_error;
850
374M
            } else if (c == 0xEF) {
851
                /* U+FFFE and U+FFFF are invalid Chars */
852
39.3M
                if ((c1 == 0xBF) && (c2 >= 0xBE))
853
332
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
854
39.3M
            }
855
856
374M
            return(3);
857
374M
        } else {
858
            /* 4-byte sequence */
859
14.5k
            if ((str[3] & 0xC0) != 0x80)
860
2.07k
                goto encoding_error;
861
12.4k
            if (c == 0xF0) {
862
                /* overlong */
863
1.11k
                if (c1 < 0x90)
864
323
                    goto encoding_error;
865
11.3k
            } else if (c >= 0xF4) {
866
                /* greater than 0x10FFFF */
867
5.28k
                if ((c > 0xF4) || (c1 >= 0x90))
868
4.13k
                    goto encoding_error;
869
5.28k
            }
870
871
8.02k
            return(4);
872
12.4k
        }
873
374M
    }
874
875
62.1M
encoding_error:
876
    /* Only report the first error */
877
62.1M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
878
13.3k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
879
13.3k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
880
13.3k
    }
881
882
62.1M
    return(0);
883
393M
}
884
885
/************************************************************************
886
 *                  *
887
 *    SAX2 defaulted attributes handling      *
888
 *                  *
889
 ************************************************************************/
890
891
/**
892
 * Final initialization of the parser context before starting to parse.
893
 *
894
 * This accounts for users modifying struct members of parser context
895
 * directly.
896
 *
897
 * @param ctxt  an XML parser context
898
 */
899
static void
900
60.0k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
901
60.0k
    xmlSAXHandlerPtr sax;
902
903
    /* Avoid unused variable warning if features are disabled. */
904
60.0k
    (void) sax;
905
906
    /*
907
     * Changing the SAX struct directly is still widespread practice
908
     * in internal and external code.
909
     */
910
60.0k
    if (ctxt == NULL) return;
911
60.0k
    sax = ctxt->sax;
912
60.0k
#ifdef LIBXML_SAX1_ENABLED
913
    /*
914
     * Only enable SAX2 if there SAX2 element handlers, except when there
915
     * are no element handlers at all.
916
     */
917
60.0k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
918
60.0k
        (sax) &&
919
60.0k
        (sax->initialized == XML_SAX2_MAGIC) &&
920
60.0k
        ((sax->startElementNs != NULL) ||
921
60.0k
         (sax->endElementNs != NULL) ||
922
60.0k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
923
60.0k
        ctxt->sax2 = 1;
924
#else
925
    ctxt->sax2 = 1;
926
#endif /* LIBXML_SAX1_ENABLED */
927
928
    /*
929
     * Some users replace the dictionary directly in the context struct.
930
     * We really need an API function to do that cleanly.
931
     */
932
60.0k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
933
60.0k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
934
60.0k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
935
60.0k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
936
60.0k
    (ctxt->str_xml_ns == NULL)) {
937
491
        xmlErrMemory(ctxt);
938
491
    }
939
940
60.0k
    xmlDictSetLimit(ctxt->dict,
941
60.0k
                    (ctxt->options & XML_PARSE_HUGE) ?
942
26.4k
                        0 :
943
60.0k
                        XML_MAX_DICTIONARY_LIMIT);
944
945
60.0k
#ifdef LIBXML_VALID_ENABLED
946
60.0k
    if (ctxt->validate)
947
0
        ctxt->vctxt.flags |= XML_VCTXT_VALIDATE;
948
60.0k
    else
949
60.0k
        ctxt->vctxt.flags &= ~XML_VCTXT_VALIDATE;
950
60.0k
#endif /* LIBXML_VALID_ENABLED */
951
60.0k
}
952
953
typedef struct {
954
    xmlHashedString prefix;
955
    xmlHashedString name;
956
    xmlHashedString value;
957
    const xmlChar *valueEnd;
958
    int external;
959
    int expandedSize;
960
} xmlDefAttr;
961
962
typedef struct _xmlDefAttrs xmlDefAttrs;
963
typedef xmlDefAttrs *xmlDefAttrsPtr;
964
struct _xmlDefAttrs {
965
    int nbAttrs;  /* number of defaulted attributes on that element */
966
    int maxAttrs;       /* the size of the array */
967
#if __STDC_VERSION__ >= 199901L
968
    /* Using a C99 flexible array member avoids UBSan errors. */
969
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
970
#else
971
    xmlDefAttr attrs[1];
972
#endif
973
};
974
975
/**
976
 * Normalize the space in non CDATA attribute values:
977
 * If the attribute type is not CDATA, then the XML processor MUST further
978
 * process the normalized attribute value by discarding any leading and
979
 * trailing space (\#x20) characters, and by replacing sequences of space
980
 * (\#x20) characters by a single space (\#x20) character.
981
 * Note that the size of dst need to be at least src, and if one doesn't need
982
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
983
 * passing src as dst is just fine.
984
 *
985
 * @param src  the source string
986
 * @param dst  the target string
987
 * @returns a pointer to the normalized value (dst) or NULL if no conversion
988
 *         is needed.
989
 */
990
static xmlChar *
991
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
992
47.4k
{
993
47.4k
    if ((src == NULL) || (dst == NULL))
994
0
        return(NULL);
995
996
48.5k
    while (*src == 0x20) src++;
997
51.8M
    while (*src != 0) {
998
51.8M
  if (*src == 0x20) {
999
307k
      while (*src == 0x20) src++;
1000
12.1k
      if (*src != 0)
1001
11.5k
    *dst++ = 0x20;
1002
51.8M
  } else {
1003
51.8M
      *dst++ = *src++;
1004
51.8M
  }
1005
51.8M
    }
1006
47.4k
    *dst = 0;
1007
47.4k
    if (dst == src)
1008
45.3k
       return(NULL);
1009
2.06k
    return(dst);
1010
47.4k
}
1011
1012
/**
1013
 * Add a defaulted attribute for an element
1014
 *
1015
 * @param ctxt  an XML parser context
1016
 * @param fullname  the element fullname
1017
 * @param fullattr  the attribute fullname
1018
 * @param value  the attribute value
1019
 */
1020
static void
1021
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1022
               const xmlChar *fullname,
1023
               const xmlChar *fullattr,
1024
43.7k
               const xmlChar *value) {
1025
43.7k
    xmlDefAttrsPtr defaults;
1026
43.7k
    xmlDefAttr *attr;
1027
43.7k
    int len, expandedSize;
1028
43.7k
    xmlHashedString name;
1029
43.7k
    xmlHashedString prefix;
1030
43.7k
    xmlHashedString hvalue;
1031
43.7k
    const xmlChar *localname;
1032
1033
    /*
1034
     * Allows to detect attribute redefinitions
1035
     */
1036
43.7k
    if (ctxt->attsSpecial != NULL) {
1037
39.8k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1038
25.9k
      return;
1039
39.8k
    }
1040
1041
17.8k
    if (ctxt->attsDefault == NULL) {
1042
4.10k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1043
4.10k
  if (ctxt->attsDefault == NULL)
1044
11
      goto mem_error;
1045
4.10k
    }
1046
1047
    /*
1048
     * split the element name into prefix:localname , the string found
1049
     * are within the DTD and then not associated to namespace names.
1050
     */
1051
17.8k
    localname = xmlSplitQName3(fullname, &len);
1052
17.8k
    if (localname == NULL) {
1053
17.2k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1054
17.2k
  prefix.name = NULL;
1055
17.2k
    } else {
1056
556
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1057
556
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1058
556
        if (prefix.name == NULL)
1059
1
            goto mem_error;
1060
556
    }
1061
17.8k
    if (name.name == NULL)
1062
3
        goto mem_error;
1063
1064
    /*
1065
     * make sure there is some storage
1066
     */
1067
17.8k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1068
17.8k
    if ((defaults == NULL) ||
1069
17.8k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1070
5.92k
        xmlDefAttrsPtr temp;
1071
5.92k
        int newSize;
1072
1073
5.92k
        if (defaults == NULL) {
1074
4.89k
            newSize = 4;
1075
4.89k
        } else {
1076
1.03k
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1077
1.03k
                ((size_t) defaults->maxAttrs >
1078
1.03k
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1079
0
                goto mem_error;
1080
1081
1.03k
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1082
0
                newSize = XML_MAX_ATTRS;
1083
1.03k
            else
1084
1.03k
                newSize = defaults->maxAttrs * 2;
1085
1.03k
        }
1086
5.92k
        temp = xmlRealloc(defaults,
1087
5.92k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1088
5.92k
  if (temp == NULL)
1089
11
      goto mem_error;
1090
5.91k
        if (defaults == NULL)
1091
4.88k
            temp->nbAttrs = 0;
1092
5.91k
  temp->maxAttrs = newSize;
1093
5.91k
        defaults = temp;
1094
5.91k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1095
5.91k
                          defaults, NULL) < 0) {
1096
1
      xmlFree(defaults);
1097
1
      goto mem_error;
1098
1
  }
1099
5.91k
    }
1100
1101
    /*
1102
     * Split the attribute name into prefix:localname , the string found
1103
     * are within the DTD and hen not associated to namespace names.
1104
     */
1105
17.8k
    localname = xmlSplitQName3(fullattr, &len);
1106
17.8k
    if (localname == NULL) {
1107
13.7k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1108
13.7k
  prefix.name = NULL;
1109
13.7k
    } else {
1110
4.07k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1111
4.07k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1112
4.07k
        if (prefix.name == NULL)
1113
3
            goto mem_error;
1114
4.07k
    }
1115
17.8k
    if (name.name == NULL)
1116
3
        goto mem_error;
1117
1118
    /* intern the string and precompute the end */
1119
17.8k
    len = strlen((const char *) value);
1120
17.8k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1121
17.8k
    if (hvalue.name == NULL)
1122
5
        goto mem_error;
1123
1124
17.8k
    expandedSize = strlen((const char *) name.name);
1125
17.8k
    if (prefix.name != NULL)
1126
4.06k
        expandedSize += strlen((const char *) prefix.name);
1127
17.8k
    expandedSize += len;
1128
1129
17.8k
    attr = &defaults->attrs[defaults->nbAttrs++];
1130
17.8k
    attr->name = name;
1131
17.8k
    attr->prefix = prefix;
1132
17.8k
    attr->value = hvalue;
1133
17.8k
    attr->valueEnd = hvalue.name + len;
1134
17.8k
    attr->external = PARSER_EXTERNAL(ctxt);
1135
17.8k
    attr->expandedSize = expandedSize;
1136
1137
17.8k
    return;
1138
1139
38
mem_error:
1140
38
    xmlErrMemory(ctxt);
1141
38
}
1142
1143
/**
1144
 * Register this attribute type
1145
 *
1146
 * @param ctxt  an XML parser context
1147
 * @param fullname  the element fullname
1148
 * @param fullattr  the attribute fullname
1149
 * @param type  the attribute type
1150
 */
1151
static void
1152
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1153
      const xmlChar *fullname,
1154
      const xmlChar *fullattr,
1155
      int type)
1156
81.5k
{
1157
81.5k
    if (ctxt->attsSpecial == NULL) {
1158
5.93k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1159
5.93k
  if (ctxt->attsSpecial == NULL)
1160
13
      goto mem_error;
1161
5.93k
    }
1162
1163
81.5k
    if (PARSER_EXTERNAL(ctxt))
1164
42.4k
        type |= XML_SPECIAL_EXTERNAL;
1165
1166
81.5k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1167
81.5k
                    XML_INT_TO_PTR(type)) < 0)
1168
5
        goto mem_error;
1169
81.5k
    return;
1170
1171
81.5k
mem_error:
1172
18
    xmlErrMemory(ctxt);
1173
18
}
1174
1175
/**
1176
 * Removes CDATA attributes from the special attribute table
1177
 */
1178
static void
1179
xmlCleanSpecialAttrCallback(void *payload, void *data,
1180
                            const xmlChar *fullname, const xmlChar *fullattr,
1181
46.3k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1182
46.3k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1183
1184
46.3k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1185
4.79k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1186
4.79k
    }
1187
46.3k
}
1188
1189
/**
1190
 * Trim the list of attributes defined to remove all those of type
1191
 * CDATA as they are not special. This call should be done when finishing
1192
 * to parse the DTD and before starting to parse the document root.
1193
 *
1194
 * @param ctxt  an XML parser context
1195
 */
1196
static void
1197
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1198
26.4k
{
1199
26.4k
    if (ctxt->attsSpecial == NULL)
1200
20.5k
        return;
1201
1202
5.91k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1203
1204
5.91k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1205
166
        xmlHashFree(ctxt->attsSpecial, NULL);
1206
166
        ctxt->attsSpecial = NULL;
1207
166
    }
1208
5.91k
}
1209
1210
/**
1211
 * Checks that the value conforms to the LanguageID production:
1212
 *
1213
 * @deprecated Internal function, do not use.
1214
 *
1215
 * NOTE: this is somewhat deprecated, those productions were removed from
1216
 * the XML Second edition.
1217
 *
1218
 *     [33] LanguageID ::= Langcode ('-' Subcode)*
1219
 *     [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1220
 *     [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1221
 *     [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1222
 *     [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1223
 *     [38] Subcode ::= ([a-z] | [A-Z])+
1224
 *
1225
 * The current REC reference the successors of RFC 1766, currently 5646
1226
 *
1227
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1228
 *
1229
 *     langtag       = language
1230
 *                     ["-" script]
1231
 *                     ["-" region]
1232
 *                     *("-" variant)
1233
 *                     *("-" extension)
1234
 *                     ["-" privateuse]
1235
 *     language      = 2*3ALPHA            ; shortest ISO 639 code
1236
 *                     ["-" extlang]       ; sometimes followed by
1237
 *                                         ; extended language subtags
1238
 *                   / 4ALPHA              ; or reserved for future use
1239
 *                   / 5*8ALPHA            ; or registered language subtag
1240
 *
1241
 *     extlang       = 3ALPHA              ; selected ISO 639 codes
1242
 *                     *2("-" 3ALPHA)      ; permanently reserved
1243
 *
1244
 *     script        = 4ALPHA              ; ISO 15924 code
1245
 *
1246
 *     region        = 2ALPHA              ; ISO 3166-1 code
1247
 *                   / 3DIGIT              ; UN M.49 code
1248
 *
1249
 *     variant       = 5*8alphanum         ; registered variants
1250
 *                   / (DIGIT 3alphanum)
1251
 *
1252
 *     extension     = singleton 1*("-" (2*8alphanum))
1253
 *
1254
 *                                         ; Single alphanumerics
1255
 *                                         ; "x" reserved for private use
1256
 *     singleton     = DIGIT               ; 0 - 9
1257
 *                   / %x41-57             ; A - W
1258
 *                   / %x59-5A             ; Y - Z
1259
 *                   / %x61-77             ; a - w
1260
 *                   / %x79-7A             ; y - z
1261
 *
1262
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1263
 * The parser below doesn't try to cope with extension or privateuse
1264
 * that could be added but that's not interoperable anyway
1265
 *
1266
 * @param lang  pointer to the string value
1267
 * @returns 1 if correct 0 otherwise
1268
 **/
1269
int
1270
xmlCheckLanguageID(const xmlChar * lang)
1271
11.5k
{
1272
11.5k
    const xmlChar *cur = lang, *nxt;
1273
1274
11.5k
    if (cur == NULL)
1275
0
        return (0);
1276
11.5k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1277
11.5k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1278
11.5k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1279
11.5k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1280
        /*
1281
         * Still allow IANA code and user code which were coming
1282
         * from the previous version of the XML-1.0 specification
1283
         * it's deprecated but we should not fail
1284
         */
1285
1.35k
        cur += 2;
1286
3.73k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1287
3.73k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1288
2.37k
            cur++;
1289
1.35k
        return(cur[0] == 0);
1290
1.35k
    }
1291
10.2k
    nxt = cur;
1292
35.4k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1293
35.4k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1294
25.2k
           nxt++;
1295
10.2k
    if (nxt - cur >= 4) {
1296
        /*
1297
         * Reserved
1298
         */
1299
1.06k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1300
856
            return(0);
1301
211
        return(1);
1302
1.06k
    }
1303
9.15k
    if (nxt - cur < 2)
1304
770
        return(0);
1305
    /* we got an ISO 639 code */
1306
8.38k
    if (nxt[0] == 0)
1307
431
        return(1);
1308
7.95k
    if (nxt[0] != '-')
1309
484
        return(0);
1310
1311
7.46k
    nxt++;
1312
7.46k
    cur = nxt;
1313
    /* now we can have extlang or script or region or variant */
1314
7.46k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1315
810
        goto region_m49;
1316
1317
37.4k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1318
37.4k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1319
30.7k
           nxt++;
1320
6.65k
    if (nxt - cur == 4)
1321
1.88k
        goto script;
1322
4.76k
    if (nxt - cur == 2)
1323
984
        goto region;
1324
3.78k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1325
911
        goto variant;
1326
2.87k
    if (nxt - cur != 3)
1327
457
        return(0);
1328
    /* we parsed an extlang */
1329
2.41k
    if (nxt[0] == 0)
1330
294
        return(1);
1331
2.12k
    if (nxt[0] != '-')
1332
253
        return(0);
1333
1334
1.87k
    nxt++;
1335
1.87k
    cur = nxt;
1336
    /* now we can have script or region or variant */
1337
1.87k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1338
219
        goto region_m49;
1339
1340
9.26k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1341
9.26k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1342
7.61k
           nxt++;
1343
1.65k
    if (nxt - cur == 2)
1344
275
        goto region;
1345
1.37k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1346
340
        goto variant;
1347
1.03k
    if (nxt - cur != 4)
1348
486
        return(0);
1349
    /* we parsed a script */
1350
2.43k
script:
1351
2.43k
    if (nxt[0] == 0)
1352
477
        return(1);
1353
1.96k
    if (nxt[0] != '-')
1354
266
        return(0);
1355
1356
1.69k
    nxt++;
1357
1.69k
    cur = nxt;
1358
    /* now we can have region or variant */
1359
1.69k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1360
407
        goto region_m49;
1361
1362
8.10k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1363
8.10k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1364
6.81k
           nxt++;
1365
1366
1.28k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1367
340
        goto variant;
1368
948
    if (nxt - cur != 2)
1369
617
        return(0);
1370
    /* we parsed a region */
1371
1.82k
region:
1372
1.82k
    if (nxt[0] == 0)
1373
328
        return(1);
1374
1.49k
    if (nxt[0] != '-')
1375
848
        return(0);
1376
1377
647
    nxt++;
1378
647
    cur = nxt;
1379
    /* now we can just have a variant */
1380
4.43k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1381
4.43k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1382
3.78k
           nxt++;
1383
1384
647
    if ((nxt - cur < 5) || (nxt - cur > 8))
1385
439
        return(0);
1386
1387
    /* we parsed a variant */
1388
1.79k
variant:
1389
1.79k
    if (nxt[0] == 0)
1390
256
        return(1);
1391
1.54k
    if (nxt[0] != '-')
1392
1.08k
        return(0);
1393
    /* extensions and private use subtags not checked */
1394
462
    return (1);
1395
1396
1.43k
region_m49:
1397
1.43k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1398
1.43k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1399
233
        nxt += 3;
1400
233
        goto region;
1401
233
    }
1402
1.20k
    return(0);
1403
1.43k
}
1404
1405
/************************************************************************
1406
 *                  *
1407
 *    Parser stacks related functions and macros    *
1408
 *                  *
1409
 ************************************************************************/
1410
1411
static xmlChar *
1412
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1413
1414
/**
1415
 * Create a new namespace database.
1416
 *
1417
 * @returns the new obejct.
1418
 */
1419
xmlParserNsData *
1420
49.6k
xmlParserNsCreate(void) {
1421
49.6k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1422
1423
49.6k
    if (nsdb == NULL)
1424
10
        return(NULL);
1425
49.6k
    memset(nsdb, 0, sizeof(*nsdb));
1426
49.6k
    nsdb->defaultNsIndex = INT_MAX;
1427
1428
49.6k
    return(nsdb);
1429
49.6k
}
1430
1431
/**
1432
 * Free a namespace database.
1433
 *
1434
 * @param nsdb  namespace database
1435
 */
1436
void
1437
49.6k
xmlParserNsFree(xmlParserNsData *nsdb) {
1438
49.6k
    if (nsdb == NULL)
1439
0
        return;
1440
1441
49.6k
    xmlFree(nsdb->extra);
1442
49.6k
    xmlFree(nsdb->hash);
1443
49.6k
    xmlFree(nsdb);
1444
49.6k
}
1445
1446
/**
1447
 * Reset a namespace database.
1448
 *
1449
 * @param nsdb  namespace database
1450
 */
1451
static void
1452
24.8k
xmlParserNsReset(xmlParserNsData *nsdb) {
1453
24.8k
    if (nsdb == NULL)
1454
0
        return;
1455
1456
24.8k
    nsdb->hashElems = 0;
1457
24.8k
    nsdb->elementId = 0;
1458
24.8k
    nsdb->defaultNsIndex = INT_MAX;
1459
1460
24.8k
    if (nsdb->hash)
1461
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1462
24.8k
}
1463
1464
/**
1465
 * Signal that a new element has started.
1466
 *
1467
 * @param nsdb  namespace database
1468
 * @returns 0 on success, -1 if the element counter overflowed.
1469
 */
1470
static int
1471
1.02M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1472
1.02M
    if (nsdb->elementId == UINT_MAX)
1473
0
        return(-1);
1474
1.02M
    nsdb->elementId++;
1475
1476
1.02M
    return(0);
1477
1.02M
}
1478
1479
/**
1480
 * Lookup namespace with given prefix. If `bucketPtr` is non-NULL, it will
1481
 * be set to the matching bucket, or the first empty bucket if no match
1482
 * was found.
1483
 *
1484
 * @param ctxt  parser context
1485
 * @param prefix  namespace prefix
1486
 * @param bucketPtr  optional bucket (return value)
1487
 * @returns the namespace index on success, INT_MAX if no namespace was
1488
 * found.
1489
 */
1490
static int
1491
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1492
1.70M
                  xmlParserNsBucket **bucketPtr) {
1493
1.70M
    xmlParserNsBucket *bucket, *tombstone;
1494
1.70M
    unsigned index, hashValue;
1495
1496
1.70M
    if (prefix->name == NULL)
1497
781k
        return(ctxt->nsdb->defaultNsIndex);
1498
1499
927k
    if (ctxt->nsdb->hashSize == 0)
1500
23.4k
        return(INT_MAX);
1501
1502
903k
    hashValue = prefix->hashValue;
1503
903k
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1504
903k
    bucket = &ctxt->nsdb->hash[index];
1505
903k
    tombstone = NULL;
1506
1507
1.16M
    while (bucket->hashValue) {
1508
1.06M
        if (bucket->index == INT_MAX) {
1509
55.2k
            if (tombstone == NULL)
1510
43.9k
                tombstone = bucket;
1511
1.01M
        } else if (bucket->hashValue == hashValue) {
1512
804k
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1513
804k
                if (bucketPtr != NULL)
1514
530k
                    *bucketPtr = bucket;
1515
804k
                return(bucket->index);
1516
804k
            }
1517
804k
        }
1518
1519
262k
        index++;
1520
262k
        bucket++;
1521
262k
        if (index == ctxt->nsdb->hashSize) {
1522
16.8k
            index = 0;
1523
16.8k
            bucket = ctxt->nsdb->hash;
1524
16.8k
        }
1525
262k
    }
1526
1527
99.3k
    if (bucketPtr != NULL)
1528
16.9k
        *bucketPtr = tombstone ? tombstone : bucket;
1529
99.3k
    return(INT_MAX);
1530
903k
}
1531
1532
/**
1533
 * Lookup namespace URI with given prefix.
1534
 *
1535
 * @param ctxt  parser context
1536
 * @param prefix  namespace prefix
1537
 * @returns the namespace URI on success, NULL if no namespace was found.
1538
 */
1539
static const xmlChar *
1540
673k
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1541
673k
    const xmlChar *ret;
1542
673k
    int nsIndex;
1543
1544
673k
    if (prefix->name == ctxt->str_xml)
1545
680
        return(ctxt->str_xml_ns);
1546
1547
    /*
1548
     * minNsIndex is used when building an entity tree. We must
1549
     * ignore namespaces declared outside the entity.
1550
     */
1551
672k
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1552
672k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1553
460k
        return(NULL);
1554
1555
212k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1556
212k
    if (ret[0] == 0)
1557
13.2k
        ret = NULL;
1558
212k
    return(ret);
1559
672k
}
1560
1561
/**
1562
 * Lookup extra data for the given prefix. This returns data stored
1563
 * with xmlParserNsUdpateSax.
1564
 *
1565
 * @param ctxt  parser context
1566
 * @param prefix  namespace prefix
1567
 * @returns the data on success, NULL if no namespace was found.
1568
 */
1569
void *
1570
197k
xmlParserNsLookupSax(xmlParserCtxt *ctxt, const xmlChar *prefix) {
1571
197k
    xmlHashedString hprefix;
1572
197k
    int nsIndex;
1573
1574
197k
    if (prefix == ctxt->str_xml)
1575
39.0k
        return(NULL);
1576
1577
158k
    hprefix.name = prefix;
1578
158k
    if (prefix != NULL)
1579
31.6k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1580
126k
    else
1581
126k
        hprefix.hashValue = 0;
1582
158k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1583
158k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1584
0
        return(NULL);
1585
1586
158k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1587
158k
}
1588
1589
/**
1590
 * Sets or updates extra data for the given prefix. This value will be
1591
 * returned by xmlParserNsLookupSax as long as the namespace with the
1592
 * given prefix is in scope.
1593
 *
1594
 * @param ctxt  parser context
1595
 * @param prefix  namespace prefix
1596
 * @param saxData  extra data for SAX handler
1597
 * @returns the data on success, NULL if no namespace was found.
1598
 */
1599
int
1600
xmlParserNsUpdateSax(xmlParserCtxt *ctxt, const xmlChar *prefix,
1601
242k
                     void *saxData) {
1602
242k
    xmlHashedString hprefix;
1603
242k
    int nsIndex;
1604
1605
242k
    if (prefix == ctxt->str_xml)
1606
0
        return(-1);
1607
1608
242k
    hprefix.name = prefix;
1609
242k
    if (prefix != NULL)
1610
197k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1611
45.0k
    else
1612
45.0k
        hprefix.hashValue = 0;
1613
242k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1614
242k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1615
0
        return(-1);
1616
1617
242k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1618
242k
    return(0);
1619
242k
}
1620
1621
/**
1622
 * Grows the namespace tables.
1623
 *
1624
 * @param ctxt  parser context
1625
 * @returns 0 on success, -1 if a memory allocation failed.
1626
 */
1627
static int
1628
20.4k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1629
20.4k
    const xmlChar **table;
1630
20.4k
    xmlParserNsExtra *extra;
1631
20.4k
    int newSize;
1632
1633
20.4k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1634
20.4k
                              sizeof(table[0]) + sizeof(extra[0]),
1635
20.4k
                              16, XML_MAX_ITEMS);
1636
20.4k
    if (newSize < 0)
1637
0
        goto error;
1638
1639
20.4k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1640
20.4k
    if (table == NULL)
1641
38
        goto error;
1642
20.4k
    ctxt->nsTab = table;
1643
1644
20.4k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1645
20.4k
    if (extra == NULL)
1646
35
        goto error;
1647
20.3k
    ctxt->nsdb->extra = extra;
1648
1649
20.3k
    ctxt->nsMax = newSize;
1650
20.3k
    return(0);
1651
1652
73
error:
1653
73
    xmlErrMemory(ctxt);
1654
73
    return(-1);
1655
20.4k
}
1656
1657
/**
1658
 * Push a new namespace on the table.
1659
 *
1660
 * @param ctxt  parser context
1661
 * @param prefix  prefix with hash value
1662
 * @param uri  uri with hash value
1663
 * @param saxData  extra data for SAX handler
1664
 * @param defAttr  whether the namespace comes from a default attribute
1665
 * @returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1666
 * -1 if a memory allocation failed.
1667
 */
1668
static int
1669
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1670
355k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1671
355k
    xmlParserNsBucket *bucket = NULL;
1672
355k
    xmlParserNsExtra *extra;
1673
355k
    const xmlChar **ns;
1674
355k
    unsigned hashValue, nsIndex, oldIndex;
1675
1676
355k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1677
194
        return(0);
1678
1679
355k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1680
73
        xmlErrMemory(ctxt);
1681
73
        return(-1);
1682
73
    }
1683
1684
    /*
1685
     * Default namespace and 'xml' namespace
1686
     */
1687
354k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1688
63.0k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1689
1690
63.0k
        if (oldIndex != INT_MAX) {
1691
53.1k
            extra = &ctxt->nsdb->extra[oldIndex];
1692
1693
53.1k
            if (extra->elementId == ctxt->nsdb->elementId) {
1694
981
                if (defAttr == 0)
1695
763
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1696
981
                return(0);
1697
981
            }
1698
1699
52.1k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1700
52.1k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1701
10.0k
                return(0);
1702
52.1k
        }
1703
1704
51.9k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1705
51.9k
        goto populate_entry;
1706
63.0k
    }
1707
1708
    /*
1709
     * Hash table lookup
1710
     */
1711
291k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1712
291k
    if (oldIndex != INT_MAX) {
1713
271k
        extra = &ctxt->nsdb->extra[oldIndex];
1714
1715
        /*
1716
         * Check for duplicate definitions on the same element.
1717
         */
1718
271k
        if (extra->elementId == ctxt->nsdb->elementId) {
1719
755
            if (defAttr == 0)
1720
515
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1721
755
            return(0);
1722
755
        }
1723
1724
271k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1725
271k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1726
12.1k
            return(0);
1727
1728
259k
        bucket->index = ctxt->nsNr;
1729
259k
        goto populate_entry;
1730
271k
    }
1731
1732
    /*
1733
     * Insert new bucket
1734
     */
1735
1736
20.0k
    hashValue = prefix->hashValue;
1737
1738
    /*
1739
     * Grow hash table, 50% fill factor
1740
     */
1741
20.0k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1742
3.91k
        xmlParserNsBucket *newHash;
1743
3.91k
        unsigned newSize, i, index;
1744
1745
3.91k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1746
0
            xmlErrMemory(ctxt);
1747
0
            return(-1);
1748
0
        }
1749
3.91k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1750
3.91k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1751
3.91k
        if (newHash == NULL) {
1752
7
            xmlErrMemory(ctxt);
1753
7
            return(-1);
1754
7
        }
1755
3.91k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1756
1757
44.8k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1758
40.9k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1759
40.9k
            unsigned newIndex;
1760
1761
40.9k
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1762
38.5k
                continue;
1763
2.39k
            newIndex = hv & (newSize - 1);
1764
1765
3.46k
            while (newHash[newIndex].hashValue != 0) {
1766
1.07k
                newIndex++;
1767
1.07k
                if (newIndex == newSize)
1768
46
                    newIndex = 0;
1769
1.07k
            }
1770
1771
2.39k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1772
2.39k
        }
1773
1774
3.91k
        xmlFree(ctxt->nsdb->hash);
1775
3.91k
        ctxt->nsdb->hash = newHash;
1776
3.91k
        ctxt->nsdb->hashSize = newSize;
1777
1778
        /*
1779
         * Relookup
1780
         */
1781
3.91k
        index = hashValue & (newSize - 1);
1782
1783
4.29k
        while (newHash[index].hashValue != 0) {
1784
384
            index++;
1785
384
            if (index == newSize)
1786
26
                index = 0;
1787
384
        }
1788
1789
3.91k
        bucket = &newHash[index];
1790
3.91k
    }
1791
1792
19.9k
    bucket->hashValue = hashValue;
1793
19.9k
    bucket->index = ctxt->nsNr;
1794
19.9k
    ctxt->nsdb->hashElems++;
1795
19.9k
    oldIndex = INT_MAX;
1796
1797
330k
populate_entry:
1798
330k
    nsIndex = ctxt->nsNr;
1799
1800
330k
    ns = &ctxt->nsTab[nsIndex * 2];
1801
330k
    ns[0] = prefix ? prefix->name : NULL;
1802
330k
    ns[1] = uri->name;
1803
1804
330k
    extra = &ctxt->nsdb->extra[nsIndex];
1805
330k
    extra->saxData = saxData;
1806
330k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1807
330k
    extra->uriHashValue = uri->hashValue;
1808
330k
    extra->elementId = ctxt->nsdb->elementId;
1809
330k
    extra->oldIndex = oldIndex;
1810
1811
330k
    ctxt->nsNr++;
1812
1813
330k
    return(1);
1814
19.9k
}
1815
1816
/**
1817
 * Pops the top `nr` namespaces and restores the hash table.
1818
 *
1819
 * @param ctxt  an XML parser context
1820
 * @param nr  the number to pop
1821
 * @returns the number of namespaces popped.
1822
 */
1823
static int
1824
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1825
97.3k
{
1826
97.3k
    int i;
1827
1828
    /* assert(nr <= ctxt->nsNr); */
1829
1830
401k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1831
304k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1832
304k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1833
1834
304k
        if (prefix == NULL) {
1835
45.5k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1836
258k
        } else {
1837
258k
            xmlHashedString hprefix;
1838
258k
            xmlParserNsBucket *bucket = NULL;
1839
1840
258k
            hprefix.name = prefix;
1841
258k
            hprefix.hashValue = extra->prefixHashValue;
1842
258k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1843
            /* assert(bucket && bucket->hashValue); */
1844
258k
            bucket->index = extra->oldIndex;
1845
258k
        }
1846
304k
    }
1847
1848
97.3k
    ctxt->nsNr -= nr;
1849
97.3k
    return(nr);
1850
97.3k
}
1851
1852
static int
1853
13.6k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1854
13.6k
    const xmlChar **atts;
1855
13.6k
    unsigned *attallocs;
1856
13.6k
    int newSize;
1857
1858
13.6k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1859
13.6k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1860
13.6k
                              10, XML_MAX_ATTRS);
1861
13.6k
    if (newSize < 0) {
1862
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1863
0
                    "Maximum number of attributes exceeded");
1864
0
        return(-1);
1865
0
    }
1866
1867
13.6k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1868
13.6k
    if (atts == NULL)
1869
13
        goto mem_error;
1870
13.6k
    ctxt->atts = atts;
1871
1872
13.6k
    attallocs = xmlRealloc(ctxt->attallocs,
1873
13.6k
                           newSize * sizeof(attallocs[0]));
1874
13.6k
    if (attallocs == NULL)
1875
8
        goto mem_error;
1876
13.6k
    ctxt->attallocs = attallocs;
1877
1878
13.6k
    ctxt->maxatts = newSize * 5;
1879
1880
13.6k
    return(0);
1881
1882
21
mem_error:
1883
21
    xmlErrMemory(ctxt);
1884
21
    return(-1);
1885
13.6k
}
1886
1887
/**
1888
 * Pushes a new parser input on top of the input stack
1889
 *
1890
 * @param ctxt  an XML parser context
1891
 * @param value  the parser input
1892
 * @returns -1 in case of error, the index in the stack otherwise
1893
 */
1894
int
1895
xmlCtxtPushInput(xmlParserCtxt *ctxt, xmlParserInput *value)
1896
226k
{
1897
226k
    char *directory = NULL;
1898
226k
    int maxDepth;
1899
1900
226k
    if ((ctxt == NULL) || (value == NULL))
1901
4.73k
        return(-1);
1902
1903
221k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1904
1905
221k
    if (ctxt->inputNr >= ctxt->inputMax) {
1906
11.1k
        xmlParserInputPtr *tmp;
1907
11.1k
        int newSize;
1908
1909
11.1k
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1910
11.1k
                                  5, maxDepth);
1911
11.1k
        if (newSize < 0) {
1912
3
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1913
3
                           "Maximum entity nesting depth exceeded");
1914
3
            return(-1);
1915
3
        }
1916
11.1k
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1917
11.1k
        if (tmp == NULL) {
1918
51
            xmlErrMemory(ctxt);
1919
51
            return(-1);
1920
51
        }
1921
11.1k
        ctxt->inputTab = tmp;
1922
11.1k
        ctxt->inputMax = newSize;
1923
11.1k
    }
1924
1925
221k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1926
70.1k
        directory = xmlParserGetDirectory(value->filename);
1927
70.1k
        if (directory == NULL) {
1928
92
            xmlErrMemory(ctxt);
1929
92
            return(-1);
1930
92
        }
1931
70.1k
    }
1932
1933
221k
    if (ctxt->input_id >= INT_MAX) {
1934
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1935
0
        return(-1);
1936
0
    }
1937
1938
221k
    ctxt->inputTab[ctxt->inputNr] = value;
1939
221k
    ctxt->input = value;
1940
1941
221k
    if (ctxt->inputNr == 0) {
1942
70.0k
        xmlFree(ctxt->directory);
1943
70.0k
        ctxt->directory = directory;
1944
70.0k
    }
1945
1946
    /*
1947
     * The input ID is unused internally, but there are entity
1948
     * loaders in downstream code that detect the main document
1949
     * by checking for "input_id == 1".
1950
     */
1951
221k
    value->id = ctxt->input_id++;
1952
1953
221k
    return(ctxt->inputNr++);
1954
221k
}
1955
1956
/**
1957
 * Pops the top parser input from the input stack
1958
 *
1959
 * @param ctxt  an XML parser context
1960
 * @returns the input just removed
1961
 */
1962
xmlParserInput *
1963
xmlCtxtPopInput(xmlParserCtxt *ctxt)
1964
343k
{
1965
343k
    xmlParserInputPtr ret;
1966
1967
343k
    if (ctxt == NULL)
1968
0
        return(NULL);
1969
343k
    if (ctxt->inputNr <= 0)
1970
124k
        return (NULL);
1971
219k
    ctxt->inputNr--;
1972
219k
    if (ctxt->inputNr > 0)
1973
151k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1974
68.5k
    else
1975
68.5k
        ctxt->input = NULL;
1976
219k
    ret = ctxt->inputTab[ctxt->inputNr];
1977
219k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1978
219k
    return (ret);
1979
343k
}
1980
1981
/**
1982
 * Pushes a new element node on top of the node stack
1983
 *
1984
 * @deprecated Internal function, do not use.
1985
 *
1986
 * @param ctxt  an XML parser context
1987
 * @param value  the element node
1988
 * @returns -1 in case of error, the index in the stack otherwise
1989
 */
1990
int
1991
nodePush(xmlParserCtxt *ctxt, xmlNode *value)
1992
674k
{
1993
674k
    if (ctxt == NULL)
1994
0
        return(0);
1995
1996
674k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1997
41.5k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
1998
41.5k
        xmlNodePtr *tmp;
1999
41.5k
        int newSize;
2000
2001
41.5k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
2002
41.5k
                                  10, maxDepth);
2003
41.5k
        if (newSize < 0) {
2004
20
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2005
20
                    "Excessive depth in document: %d,"
2006
20
                    " use XML_PARSE_HUGE option\n",
2007
20
                    ctxt->nodeNr);
2008
20
            return(-1);
2009
20
        }
2010
2011
41.4k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2012
41.4k
        if (tmp == NULL) {
2013
39
            xmlErrMemory(ctxt);
2014
39
            return (-1);
2015
39
        }
2016
41.4k
        ctxt->nodeTab = tmp;
2017
41.4k
  ctxt->nodeMax = newSize;
2018
41.4k
    }
2019
2020
674k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2021
674k
    ctxt->node = value;
2022
674k
    return (ctxt->nodeNr++);
2023
674k
}
2024
2025
/**
2026
 * Pops the top element node from the node stack
2027
 *
2028
 * @deprecated Internal function, do not use.
2029
 *
2030
 * @param ctxt  an XML parser context
2031
 * @returns the node just removed
2032
 */
2033
xmlNode *
2034
nodePop(xmlParserCtxt *ctxt)
2035
612k
{
2036
612k
    xmlNodePtr ret;
2037
2038
612k
    if (ctxt == NULL) return(NULL);
2039
612k
    if (ctxt->nodeNr <= 0)
2040
14.4k
        return (NULL);
2041
598k
    ctxt->nodeNr--;
2042
598k
    if (ctxt->nodeNr > 0)
2043
591k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2044
7.22k
    else
2045
7.22k
        ctxt->node = NULL;
2046
598k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2047
598k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2048
598k
    return (ret);
2049
612k
}
2050
2051
/**
2052
 * Pushes a new element name/prefix/URL on top of the name stack
2053
 *
2054
 * @param ctxt  an XML parser context
2055
 * @param value  the element name
2056
 * @param prefix  the element prefix
2057
 * @param URI  the element namespace name
2058
 * @param line  the current line number for error messages
2059
 * @param nsNr  the number of namespaces pushed on the namespace table
2060
 * @returns -1 in case of error, the index in the stack otherwise
2061
 */
2062
static int
2063
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2064
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2065
708k
{
2066
708k
    xmlStartTag *tag;
2067
2068
708k
    if (ctxt->nameNr >= ctxt->nameMax) {
2069
42.6k
        const xmlChar **tmp;
2070
42.6k
        xmlStartTag *tmp2;
2071
42.6k
        int newSize;
2072
2073
42.6k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2074
42.6k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2075
42.6k
                                  10, XML_MAX_ITEMS);
2076
42.6k
        if (newSize < 0)
2077
0
            goto mem_error;
2078
2079
42.6k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2080
42.6k
        if (tmp == NULL)
2081
25
      goto mem_error;
2082
42.6k
  ctxt->nameTab = tmp;
2083
2084
42.6k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2085
42.6k
        if (tmp2 == NULL)
2086
35
      goto mem_error;
2087
42.6k
  ctxt->pushTab = tmp2;
2088
2089
42.6k
        ctxt->nameMax = newSize;
2090
665k
    } else if (ctxt->pushTab == NULL) {
2091
23.5k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2092
23.5k
        if (ctxt->pushTab == NULL)
2093
44
            goto mem_error;
2094
23.5k
    }
2095
708k
    ctxt->nameTab[ctxt->nameNr] = value;
2096
708k
    ctxt->name = value;
2097
708k
    tag = &ctxt->pushTab[ctxt->nameNr];
2098
708k
    tag->prefix = prefix;
2099
708k
    tag->URI = URI;
2100
708k
    tag->line = line;
2101
708k
    tag->nsNr = nsNr;
2102
708k
    return (ctxt->nameNr++);
2103
104
mem_error:
2104
104
    xmlErrMemory(ctxt);
2105
104
    return (-1);
2106
708k
}
2107
#ifdef LIBXML_PUSH_ENABLED
2108
/**
2109
 * Pops the top element/prefix/URI name from the name stack
2110
 *
2111
 * @param ctxt  an XML parser context
2112
 * @returns the name just removed
2113
 */
2114
static const xmlChar *
2115
nameNsPop(xmlParserCtxtPtr ctxt)
2116
6.18k
{
2117
6.18k
    const xmlChar *ret;
2118
2119
6.18k
    if (ctxt->nameNr <= 0)
2120
0
        return (NULL);
2121
6.18k
    ctxt->nameNr--;
2122
6.18k
    if (ctxt->nameNr > 0)
2123
5.90k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2124
282
    else
2125
282
        ctxt->name = NULL;
2126
6.18k
    ret = ctxt->nameTab[ctxt->nameNr];
2127
6.18k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2128
6.18k
    return (ret);
2129
6.18k
}
2130
#endif /* LIBXML_PUSH_ENABLED */
2131
2132
/**
2133
 * Pops the top element name from the name stack
2134
 *
2135
 * @deprecated Internal function, do not use.
2136
 *
2137
 * @param ctxt  an XML parser context
2138
 * @returns the name just removed
2139
 */
2140
static const xmlChar *
2141
namePop(xmlParserCtxtPtr ctxt)
2142
624k
{
2143
624k
    const xmlChar *ret;
2144
2145
624k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2146
26
        return (NULL);
2147
624k
    ctxt->nameNr--;
2148
624k
    if (ctxt->nameNr > 0)
2149
618k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2150
6.25k
    else
2151
6.25k
        ctxt->name = NULL;
2152
624k
    ret = ctxt->nameTab[ctxt->nameNr];
2153
624k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2154
624k
    return (ret);
2155
624k
}
2156
2157
1.09M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2158
1.09M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2159
58.1k
        int *tmp;
2160
58.1k
        int newSize;
2161
2162
58.1k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2163
58.1k
                                  10, XML_MAX_ITEMS);
2164
58.1k
        if (newSize < 0) {
2165
0
      xmlErrMemory(ctxt);
2166
0
      return(-1);
2167
0
        }
2168
2169
58.1k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2170
58.1k
        if (tmp == NULL) {
2171
46
      xmlErrMemory(ctxt);
2172
46
      return(-1);
2173
46
  }
2174
58.1k
  ctxt->spaceTab = tmp;
2175
2176
58.1k
        ctxt->spaceMax = newSize;
2177
58.1k
    }
2178
1.09M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2179
1.09M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2180
1.09M
    return(ctxt->spaceNr++);
2181
1.09M
}
2182
2183
1.01M
static int spacePop(xmlParserCtxtPtr ctxt) {
2184
1.01M
    int ret;
2185
1.01M
    if (ctxt->spaceNr <= 0) return(0);
2186
1.01M
    ctxt->spaceNr--;
2187
1.01M
    if (ctxt->spaceNr > 0)
2188
1.01M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2189
6.15k
    else
2190
6.15k
        ctxt->space = &ctxt->spaceTab[0];
2191
1.01M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2192
1.01M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2193
1.01M
    return(ret);
2194
1.01M
}
2195
2196
/*
2197
 * Macros for accessing the content. Those should be used only by the parser,
2198
 * and not exported.
2199
 *
2200
 * Dirty macros, i.e. one often need to make assumption on the context to
2201
 * use them
2202
 *
2203
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2204
 *           To be used with extreme caution since operations consuming
2205
 *           characters may move the input buffer to a different location !
2206
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2207
 *           This should be used internally by the parser
2208
 *           only to compare to ASCII values otherwise it would break when
2209
 *           running with UTF-8 encoding.
2210
 *   RAW     same as CUR but in the input buffer, bypass any token
2211
 *           extraction that may have been done
2212
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2213
 *           to compare on ASCII based substring.
2214
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2215
 *           strings without newlines within the parser.
2216
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2217
 *           defined char within the parser.
2218
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2219
 *
2220
 *   NEXT    Skip to the next character, this does the proper decoding
2221
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2222
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2223
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2224
 *            the index
2225
 *   GROW, SHRINK  handling of input buffers
2226
 */
2227
2228
13.2M
#define RAW (*ctxt->input->cur)
2229
1.53G
#define CUR (*ctxt->input->cur)
2230
12.3M
#define NXT(val) ctxt->input->cur[(val)]
2231
2.40G
#define CUR_PTR ctxt->input->cur
2232
2.40M
#define BASE_PTR ctxt->input->base
2233
2234
#define CMP4( s, c1, c2, c3, c4 ) \
2235
11.6M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2236
5.87M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2237
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2238
11.2M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2239
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2240
10.5M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2241
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2242
9.92M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2243
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2244
9.48M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2245
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2246
4.63M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2247
4.63M
    ((unsigned char *) s)[ 8 ] == c9 )
2248
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2249
3.45k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2250
3.45k
    ((unsigned char *) s)[ 9 ] == c10 )
2251
2252
2.08M
#define SKIP(val) do {             \
2253
2.08M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2254
2.08M
    if (*ctxt->input->cur == 0)           \
2255
2.08M
        xmlParserGrow(ctxt);           \
2256
2.08M
  } while (0)
2257
2258
#define SKIPL(val) do {             \
2259
    int skipl;                \
2260
    for(skipl=0; skipl<val; skipl++) {          \
2261
  if (*(ctxt->input->cur) == '\n') {        \
2262
  ctxt->input->line++; ctxt->input->col = 1;      \
2263
  } else ctxt->input->col++;          \
2264
  ctxt->input->cur++;           \
2265
    }                 \
2266
    if (*ctxt->input->cur == 0)           \
2267
        xmlParserGrow(ctxt);            \
2268
  } while (0)
2269
2270
#define SHRINK \
2271
12.5M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2272
12.5M
  xmlParserShrink(ctxt);
2273
2274
#define GROW \
2275
24.8M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2276
24.8M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2277
2.95M
  xmlParserGrow(ctxt);
2278
2279
3.10M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2280
2281
842k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2282
2283
376M
#define NEXT xmlNextChar(ctxt)
2284
2285
1.40M
#define NEXT1 {               \
2286
1.40M
  ctxt->input->col++;           \
2287
1.40M
  ctxt->input->cur++;           \
2288
1.40M
  if (*ctxt->input->cur == 0)         \
2289
1.40M
      xmlParserGrow(ctxt);           \
2290
1.40M
    }
2291
2292
1.30G
#define NEXTL(l) do {             \
2293
1.30G
    if (*(ctxt->input->cur) == '\n') {         \
2294
14.4M
  ctxt->input->line++; ctxt->input->col = 1;      \
2295
1.28G
    } else ctxt->input->col++;           \
2296
1.30G
    ctxt->input->cur += l;        \
2297
1.30G
  } while (0)
2298
2299
#define COPY_BUF(b, i, v)           \
2300
123M
    if (v < 0x80) b[i++] = v;           \
2301
123M
    else i += xmlCopyCharMultiByte(&b[i],v)
2302
2303
static int
2304
120M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2305
120M
    int c = xmlCurrentChar(ctxt, len);
2306
2307
120M
    if (c == XML_INVALID_CHAR)
2308
21.3M
        c = 0xFFFD; /* replacement character */
2309
2310
120M
    return(c);
2311
120M
}
2312
2313
/**
2314
 * Skip whitespace in the input stream.
2315
 *
2316
 * @deprecated Internal function, do not use.
2317
 *
2318
 * @param ctxt  the XML parser context
2319
 * @returns the number of space chars skipped
2320
 */
2321
int
2322
3.33M
xmlSkipBlankChars(xmlParserCtxt *ctxt) {
2323
3.33M
    const xmlChar *cur;
2324
3.33M
    int res = 0;
2325
2326
3.33M
    cur = ctxt->input->cur;
2327
3.98M
    while (IS_BLANK_CH(*cur)) {
2328
3.98M
        if (*cur == '\n') {
2329
3.02M
            ctxt->input->line++; ctxt->input->col = 1;
2330
3.02M
        } else {
2331
953k
            ctxt->input->col++;
2332
953k
        }
2333
3.98M
        cur++;
2334
3.98M
        if (res < INT_MAX)
2335
3.98M
            res++;
2336
3.98M
        if (*cur == 0) {
2337
12.1k
            ctxt->input->cur = cur;
2338
12.1k
            xmlParserGrow(ctxt);
2339
12.1k
            cur = ctxt->input->cur;
2340
12.1k
        }
2341
3.98M
    }
2342
3.33M
    ctxt->input->cur = cur;
2343
2344
3.33M
    if (res > 4)
2345
31.7k
        GROW;
2346
2347
3.33M
    return(res);
2348
3.33M
}
2349
2350
static void
2351
76.4k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2352
76.4k
    unsigned long consumed;
2353
76.4k
    xmlEntityPtr ent;
2354
2355
76.4k
    ent = ctxt->input->entity;
2356
2357
76.4k
    ent->flags &= ~XML_ENT_EXPANDING;
2358
2359
76.4k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2360
7.40k
        int result;
2361
2362
        /*
2363
         * Read the rest of the stream in case of errors. We want
2364
         * to account for the whole entity size.
2365
         */
2366
7.65k
        do {
2367
7.65k
            ctxt->input->cur = ctxt->input->end;
2368
7.65k
            xmlParserShrink(ctxt);
2369
7.65k
            result = xmlParserGrow(ctxt);
2370
7.65k
        } while (result > 0);
2371
2372
7.40k
        consumed = ctxt->input->consumed;
2373
7.40k
        xmlSaturatedAddSizeT(&consumed,
2374
7.40k
                             ctxt->input->end - ctxt->input->base);
2375
2376
7.40k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2377
2378
        /*
2379
         * Add to sizeentities when parsing an external entity
2380
         * for the first time.
2381
         */
2382
7.40k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2383
4.48k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2384
4.48k
        }
2385
2386
7.40k
        ent->flags |= XML_ENT_CHECKED;
2387
7.40k
    }
2388
2389
76.4k
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2390
2391
76.4k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2392
2393
76.4k
    GROW;
2394
76.4k
}
2395
2396
/**
2397
 * Skip whitespace in the input stream, also handling parameter
2398
 * entities.
2399
 *
2400
 * @param ctxt  the XML parser context
2401
 * @returns the number of space chars skipped
2402
 */
2403
static int
2404
842k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2405
842k
    int res = 0;
2406
842k
    int inParam;
2407
842k
    int expandParam;
2408
2409
842k
    inParam = PARSER_IN_PE(ctxt);
2410
842k
    expandParam = PARSER_EXTERNAL(ctxt);
2411
2412
842k
    if (!inParam && !expandParam)
2413
234k
        return(xmlSkipBlankChars(ctxt));
2414
2415
    /*
2416
     * It's Okay to use CUR/NEXT here since all the blanks are on
2417
     * the ASCII range.
2418
     */
2419
1.72M
    while (PARSER_STOPPED(ctxt) == 0) {
2420
1.72M
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2421
1.09M
            NEXT;
2422
1.09M
        } else if (CUR == '%') {
2423
26.4k
            if ((expandParam == 0) ||
2424
26.4k
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2425
15.4k
                break;
2426
2427
            /*
2428
             * Expand parameter entity. We continue to consume
2429
             * whitespace at the start of the entity and possible
2430
             * even consume the whole entity and pop it. We might
2431
             * even pop multiple PEs in this loop.
2432
             */
2433
11.0k
            xmlParsePERefInternal(ctxt, 0);
2434
2435
11.0k
            inParam = PARSER_IN_PE(ctxt);
2436
11.0k
            expandParam = PARSER_EXTERNAL(ctxt);
2437
600k
        } else if (CUR == 0) {
2438
31.3k
            if (inParam == 0)
2439
123
                break;
2440
2441
            /*
2442
             * Don't pop parameter entities that start a markup
2443
             * declaration to detect Well-formedness constraint:
2444
             * PE Between Declarations.
2445
             */
2446
31.2k
            if (ctxt->input->flags & XML_INPUT_MARKUP_DECL)
2447
22.6k
                break;
2448
2449
8.60k
            xmlPopPE(ctxt);
2450
2451
8.60k
            inParam = PARSER_IN_PE(ctxt);
2452
8.60k
            expandParam = PARSER_EXTERNAL(ctxt);
2453
569k
        } else {
2454
569k
            break;
2455
569k
        }
2456
2457
        /*
2458
         * Also increase the counter when entering or exiting a PERef.
2459
         * The spec says: "When a parameter-entity reference is recognized
2460
         * in the DTD and included, its replacement text MUST be enlarged
2461
         * by the attachment of one leading and one following space (#x20)
2462
         * character."
2463
         */
2464
1.11M
        if (res < INT_MAX)
2465
1.11M
            res++;
2466
1.11M
    }
2467
2468
608k
    return(res);
2469
842k
}
2470
2471
/************************************************************************
2472
 *                  *
2473
 *    Commodity functions to handle entities      *
2474
 *                  *
2475
 ************************************************************************/
2476
2477
/**
2478
 * @deprecated Internal function, don't use.
2479
 *
2480
 * @param ctxt  an XML parser context
2481
 * @returns the current xmlChar in the parser context
2482
 */
2483
xmlChar
2484
0
xmlPopInput(xmlParserCtxt *ctxt) {
2485
0
    xmlParserInputPtr input;
2486
2487
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2488
0
    input = xmlCtxtPopInput(ctxt);
2489
0
    xmlFreeInputStream(input);
2490
0
    if (*ctxt->input->cur == 0)
2491
0
        xmlParserGrow(ctxt);
2492
0
    return(CUR);
2493
0
}
2494
2495
/**
2496
 * Push an input stream onto the stack.
2497
 *
2498
 * @deprecated Internal function, don't use.
2499
 *
2500
 * @param ctxt  an XML parser context
2501
 * @param input  an XML parser input fragment (entity, XML fragment ...).
2502
 * @returns -1 in case of error or the index in the input stack
2503
 */
2504
int
2505
0
xmlPushInput(xmlParserCtxt *ctxt, xmlParserInput *input) {
2506
0
    int ret;
2507
2508
0
    if ((ctxt == NULL) || (input == NULL))
2509
0
        return(-1);
2510
2511
0
    ret = xmlCtxtPushInput(ctxt, input);
2512
0
    if (ret >= 0)
2513
0
        GROW;
2514
0
    return(ret);
2515
0
}
2516
2517
/**
2518
 * Parse a numeric character reference. Always consumes '&'.
2519
 *
2520
 * @deprecated Internal function, don't use.
2521
 *
2522
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2523
 *                      '&#x' [0-9a-fA-F]+ ';'
2524
 *
2525
 * [ WFC: Legal Character ]
2526
 * Characters referred to using character references must match the
2527
 * production for Char.
2528
 *
2529
 * @param ctxt  an XML parser context
2530
 * @returns the value parsed (as an int), 0 in case of error
2531
 */
2532
int
2533
63.5k
xmlParseCharRef(xmlParserCtxt *ctxt) {
2534
63.5k
    int val = 0;
2535
63.5k
    int count = 0;
2536
2537
    /*
2538
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2539
     */
2540
63.5k
    if ((RAW == '&') && (NXT(1) == '#') &&
2541
63.5k
        (NXT(2) == 'x')) {
2542
31.1k
  SKIP(3);
2543
31.1k
  GROW;
2544
127k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2545
107k
      if (count++ > 20) {
2546
4.21k
    count = 0;
2547
4.21k
    GROW;
2548
4.21k
      }
2549
107k
      if ((RAW >= '0') && (RAW <= '9'))
2550
64.7k
          val = val * 16 + (CUR - '0');
2551
42.5k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2552
21.0k
          val = val * 16 + (CUR - 'a') + 10;
2553
21.5k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2554
11.0k
          val = val * 16 + (CUR - 'A') + 10;
2555
10.4k
      else {
2556
10.4k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2557
10.4k
    val = 0;
2558
10.4k
    break;
2559
10.4k
      }
2560
96.8k
      if (val > 0x110000)
2561
49.7k
          val = 0x110000;
2562
2563
96.8k
      NEXT;
2564
96.8k
      count++;
2565
96.8k
  }
2566
31.1k
  if (RAW == ';') {
2567
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2568
20.6k
      ctxt->input->col++;
2569
20.6k
      ctxt->input->cur++;
2570
20.6k
  }
2571
32.4k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2572
32.4k
  SKIP(2);
2573
32.4k
  GROW;
2574
127k
  while (RAW != ';') { /* loop blocked by count */
2575
99.9k
      if (count++ > 20) {
2576
2.77k
    count = 0;
2577
2.77k
    GROW;
2578
2.77k
      }
2579
99.9k
      if ((RAW >= '0') && (RAW <= '9'))
2580
94.8k
          val = val * 10 + (CUR - '0');
2581
5.06k
      else {
2582
5.06k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2583
5.06k
    val = 0;
2584
5.06k
    break;
2585
5.06k
      }
2586
94.8k
      if (val > 0x110000)
2587
30.0k
          val = 0x110000;
2588
2589
94.8k
      NEXT;
2590
94.8k
      count++;
2591
94.8k
  }
2592
32.4k
  if (RAW == ';') {
2593
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2594
27.4k
      ctxt->input->col++;
2595
27.4k
      ctxt->input->cur++;
2596
27.4k
  }
2597
32.4k
    } else {
2598
0
        if (RAW == '&')
2599
0
            SKIP(1);
2600
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2601
0
    }
2602
2603
    /*
2604
     * [ WFC: Legal Character ]
2605
     * Characters referred to using character references must match the
2606
     * production for Char.
2607
     */
2608
63.5k
    if (val >= 0x110000) {
2609
664
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2610
664
                "xmlParseCharRef: character reference out of bounds\n",
2611
664
          val);
2612
664
        val = 0xFFFD;
2613
62.9k
    } else if (!IS_CHAR(val)) {
2614
17.8k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2615
17.8k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2616
17.8k
                    val);
2617
17.8k
    }
2618
63.5k
    return(val);
2619
63.5k
}
2620
2621
/**
2622
 * Parse Reference declarations, variant parsing from a string rather
2623
 * than an an input flow.
2624
 *
2625
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2626
 *                      '&#x' [0-9a-fA-F]+ ';'
2627
 *
2628
 * [ WFC: Legal Character ]
2629
 * Characters referred to using character references must match the
2630
 * production for Char.
2631
 *
2632
 * @param ctxt  an XML parser context
2633
 * @param str  a pointer to an index in the string
2634
 * @returns the value parsed (as an int), 0 in case of error, str will be
2635
 *         updated to the current value of the index
2636
 */
2637
static int
2638
327k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2639
327k
    const xmlChar *ptr;
2640
327k
    xmlChar cur;
2641
327k
    int val = 0;
2642
2643
327k
    if ((str == NULL) || (*str == NULL)) return(0);
2644
327k
    ptr = *str;
2645
327k
    cur = *ptr;
2646
327k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2647
197k
  ptr += 3;
2648
197k
  cur = *ptr;
2649
1.17M
  while (cur != ';') { /* Non input consuming loop */
2650
979k
      if ((cur >= '0') && (cur <= '9'))
2651
199k
          val = val * 16 + (cur - '0');
2652
779k
      else if ((cur >= 'a') && (cur <= 'f'))
2653
389k
          val = val * 16 + (cur - 'a') + 10;
2654
390k
      else if ((cur >= 'A') && (cur <= 'F'))
2655
389k
          val = val * 16 + (cur - 'A') + 10;
2656
499
      else {
2657
499
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2658
499
    val = 0;
2659
499
    break;
2660
499
      }
2661
979k
      if (val > 0x110000)
2662
760
          val = 0x110000;
2663
2664
979k
      ptr++;
2665
979k
      cur = *ptr;
2666
979k
  }
2667
197k
  if (cur == ';')
2668
197k
      ptr++;
2669
197k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2670
129k
  ptr += 2;
2671
129k
  cur = *ptr;
2672
410k
  while (cur != ';') { /* Non input consuming loops */
2673
281k
      if ((cur >= '0') && (cur <= '9'))
2674
280k
          val = val * 10 + (cur - '0');
2675
1.18k
      else {
2676
1.18k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2677
1.18k
    val = 0;
2678
1.18k
    break;
2679
1.18k
      }
2680
280k
      if (val > 0x110000)
2681
5.77k
          val = 0x110000;
2682
2683
280k
      ptr++;
2684
280k
      cur = *ptr;
2685
280k
  }
2686
129k
  if (cur == ';')
2687
128k
      ptr++;
2688
129k
    } else {
2689
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2690
0
  return(0);
2691
0
    }
2692
327k
    *str = ptr;
2693
2694
    /*
2695
     * [ WFC: Legal Character ]
2696
     * Characters referred to using character references must match the
2697
     * production for Char.
2698
     */
2699
327k
    if (val >= 0x110000) {
2700
236
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2701
236
                "xmlParseStringCharRef: character reference out of bounds\n",
2702
236
                val);
2703
327k
    } else if (IS_CHAR(val)) {
2704
324k
        return(val);
2705
324k
    } else {
2706
2.63k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2707
2.63k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2708
2.63k
        val);
2709
2.63k
    }
2710
2.87k
    return(0);
2711
327k
}
2712
2713
/**
2714
 *     [69] PEReference ::= '%' Name ';'
2715
 *
2716
 * @deprecated Internal function, do not use.
2717
 *
2718
 * [ WFC: No Recursion ]
2719
 * A parsed entity must not contain a recursive
2720
 * reference to itself, either directly or indirectly.
2721
 *
2722
 * [ WFC: Entity Declared ]
2723
 * In a document without any DTD, a document with only an internal DTD
2724
 * subset which contains no parameter entity references, or a document
2725
 * with "standalone='yes'", ...  ... The declaration of a parameter
2726
 * entity must precede any reference to it...
2727
 *
2728
 * [ VC: Entity Declared ]
2729
 * In a document with an external subset or external parameter entities
2730
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2731
 * must precede any reference to it...
2732
 *
2733
 * [ WFC: In DTD ]
2734
 * Parameter-entity references may only appear in the DTD.
2735
 * NOTE: misleading but this is handled.
2736
 *
2737
 * A PEReference may have been detected in the current input stream
2738
 * the handling is done accordingly to
2739
 *      http://www.w3.org/TR/REC-xml#entproc
2740
 * i.e.
2741
 *   - Included in literal in entity values
2742
 *   - Included as Parameter Entity reference within DTDs
2743
 * @param ctxt  the parser context
2744
 */
2745
void
2746
0
xmlParserHandlePEReference(xmlParserCtxt *ctxt) {
2747
0
    xmlParsePERefInternal(ctxt, 0);
2748
0
}
2749
2750
/**
2751
 * @deprecated Internal function, don't use.
2752
 *
2753
 * @param ctxt  the parser context
2754
 * @param str  the input string
2755
 * @param len  the string length
2756
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2757
 * @param end  an end marker xmlChar, 0 if none
2758
 * @param end2  an end marker xmlChar, 0 if none
2759
 * @param end3  an end marker xmlChar, 0 if none
2760
 * @returns A newly allocated string with the substitution done. The caller
2761
 *      must deallocate it !
2762
 */
2763
xmlChar *
2764
xmlStringLenDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str, int len,
2765
                           int what ATTRIBUTE_UNUSED,
2766
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2767
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2768
0
        return(NULL);
2769
2770
0
    if ((str[len] != 0) ||
2771
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2772
0
        return(NULL);
2773
2774
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2775
0
}
2776
2777
/**
2778
 * @deprecated Internal function, don't use.
2779
 *
2780
 * @param ctxt  the parser context
2781
 * @param str  the input string
2782
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2783
 * @param end  an end marker xmlChar, 0 if none
2784
 * @param end2  an end marker xmlChar, 0 if none
2785
 * @param end3  an end marker xmlChar, 0 if none
2786
 * @returns A newly allocated string with the substitution done. The caller
2787
 *      must deallocate it !
2788
 */
2789
xmlChar *
2790
xmlStringDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str,
2791
                        int what ATTRIBUTE_UNUSED,
2792
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2793
0
    if ((ctxt == NULL) || (str == NULL))
2794
0
        return(NULL);
2795
2796
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2797
0
        return(NULL);
2798
2799
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2800
0
}
2801
2802
/************************************************************************
2803
 *                  *
2804
 *    Commodity functions, cleanup needed ?     *
2805
 *                  *
2806
 ************************************************************************/
2807
2808
/**
2809
 * Is this a sequence of blank chars that one can ignore ?
2810
 *
2811
 * @param ctxt  an XML parser context
2812
 * @param str  a xmlChar *
2813
 * @param len  the size of `str`
2814
 * @param blank_chars  we know the chars are blanks
2815
 * @returns 1 if ignorable 0 otherwise.
2816
 */
2817
2818
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2819
659k
                     int blank_chars) {
2820
659k
    int i;
2821
659k
    xmlNodePtr lastChild;
2822
2823
    /*
2824
     * Check for xml:space value.
2825
     */
2826
659k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2827
659k
        (*(ctxt->space) == -2))
2828
578k
  return(0);
2829
2830
    /*
2831
     * Check that the string is made of blanks
2832
     */
2833
81.1k
    if (blank_chars == 0) {
2834
145k
  for (i = 0;i < len;i++)
2835
141k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2836
67.2k
    }
2837
2838
    /*
2839
     * Look if the element is mixed content in the DTD if available
2840
     */
2841
18.0k
    if (ctxt->node == NULL) return(0);
2842
18.0k
    if (ctxt->myDoc != NULL) {
2843
18.0k
        xmlElementPtr elemDecl = NULL;
2844
18.0k
        xmlDocPtr doc = ctxt->myDoc;
2845
18.0k
        const xmlChar *prefix = NULL;
2846
2847
18.0k
        if (ctxt->node->ns)
2848
8.60k
            prefix = ctxt->node->ns->prefix;
2849
18.0k
        if (doc->intSubset != NULL)
2850
12.9k
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2851
12.9k
                                      prefix);
2852
18.0k
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2853
787
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2854
787
                                      prefix);
2855
18.0k
        if (elemDecl != NULL) {
2856
1.71k
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2857
743
                return(1);
2858
973
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2859
973
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2860
392
                return(0);
2861
973
        }
2862
18.0k
    }
2863
2864
    /*
2865
     * Otherwise, heuristic :-\
2866
     *
2867
     * When push parsing, we could be at the end of a chunk.
2868
     * This makes the look-ahead and consequently the NOBLANKS
2869
     * option unreliable.
2870
     */
2871
16.9k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2872
15.0k
    if ((ctxt->node->children == NULL) &&
2873
15.0k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2874
2875
14.6k
    lastChild = xmlGetLastChild(ctxt->node);
2876
14.6k
    if (lastChild == NULL) {
2877
7.85k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2878
7.85k
            (ctxt->node->content != NULL)) return(0);
2879
7.85k
    } else if (xmlNodeIsText(lastChild))
2880
366
        return(0);
2881
6.40k
    else if ((ctxt->node->children != NULL) &&
2882
6.40k
             (xmlNodeIsText(ctxt->node->children)))
2883
272
        return(0);
2884
13.9k
    return(1);
2885
14.6k
}
2886
2887
/************************************************************************
2888
 *                  *
2889
 *    Extra stuff for namespace support     *
2890
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2891
 *                  *
2892
 ************************************************************************/
2893
2894
/**
2895
 * Parse an UTF8 encoded XML qualified name string
2896
 *
2897
 * @deprecated Don't use.
2898
 *
2899
 * @param ctxt  an XML parser context
2900
 * @param name  an XML parser context
2901
 * @param prefixOut  a xmlChar **
2902
 * @returns the local part, and prefix is updated
2903
 *   to get the Prefix if any.
2904
 */
2905
2906
xmlChar *
2907
0
xmlSplitQName(xmlParserCtxt *ctxt, const xmlChar *name, xmlChar **prefixOut) {
2908
0
    xmlChar *ret;
2909
0
    const xmlChar *localname;
2910
2911
0
    localname = xmlSplitQName4(name, prefixOut);
2912
0
    if (localname == NULL) {
2913
0
        xmlCtxtErrMemory(ctxt);
2914
0
        return(NULL);
2915
0
    }
2916
2917
0
    ret = xmlStrdup(localname);
2918
0
    if (ret == NULL) {
2919
0
        xmlCtxtErrMemory(ctxt);
2920
0
        xmlFree(*prefixOut);
2921
0
    }
2922
2923
0
    return(ret);
2924
0
}
2925
2926
/************************************************************************
2927
 *                  *
2928
 *      The parser itself       *
2929
 *  Relates to http://www.w3.org/TR/REC-xml       *
2930
 *                  *
2931
 ************************************************************************/
2932
2933
/************************************************************************
2934
 *                  *
2935
 *  Routines to parse Name, NCName and NmToken      *
2936
 *                  *
2937
 ************************************************************************/
2938
2939
/*
2940
 * The two following functions are related to the change of accepted
2941
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2942
 * They correspond to the modified production [4] and the new production [4a]
2943
 * changes in that revision. Also note that the macros used for the
2944
 * productions Letter, Digit, CombiningChar and Extender are not needed
2945
 * anymore.
2946
 * We still keep compatibility to pre-revision5 parsing semantic if the
2947
 * new XML_PARSE_OLD10 option is given to the parser.
2948
 */
2949
2950
static int
2951
1.06M
xmlIsNameStartCharNew(int c) {
2952
    /*
2953
     * Use the new checks of production [4] [4a] amd [5] of the
2954
     * Update 5 of XML-1.0
2955
     */
2956
1.06M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2957
1.06M
        (((c >= 'a') && (c <= 'z')) ||
2958
1.06M
         ((c >= 'A') && (c <= 'Z')) ||
2959
1.06M
         (c == '_') || (c == ':') ||
2960
1.06M
         ((c >= 0xC0) && (c <= 0xD6)) ||
2961
1.06M
         ((c >= 0xD8) && (c <= 0xF6)) ||
2962
1.06M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2963
1.06M
         ((c >= 0x370) && (c <= 0x37D)) ||
2964
1.06M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2965
1.06M
         ((c >= 0x200C) && (c <= 0x200D)) ||
2966
1.06M
         ((c >= 0x2070) && (c <= 0x218F)) ||
2967
1.06M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2968
1.06M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2969
1.06M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2970
1.06M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2971
1.06M
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2972
637k
        return(1);
2973
432k
    return(0);
2974
1.06M
}
2975
2976
static int
2977
22.4M
xmlIsNameCharNew(int c) {
2978
    /*
2979
     * Use the new checks of production [4] [4a] amd [5] of the
2980
     * Update 5 of XML-1.0
2981
     */
2982
22.4M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2983
22.4M
        (((c >= 'a') && (c <= 'z')) ||
2984
22.3M
         ((c >= 'A') && (c <= 'Z')) ||
2985
22.3M
         ((c >= '0') && (c <= '9')) || /* !start */
2986
22.3M
         (c == '_') || (c == ':') ||
2987
22.3M
         (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2988
22.3M
         ((c >= 0xC0) && (c <= 0xD6)) ||
2989
22.3M
         ((c >= 0xD8) && (c <= 0xF6)) ||
2990
22.3M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2991
22.3M
         ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2992
22.3M
         ((c >= 0x370) && (c <= 0x37D)) ||
2993
22.3M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2994
22.3M
         ((c >= 0x200C) && (c <= 0x200D)) ||
2995
22.3M
         ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2996
22.3M
         ((c >= 0x2070) && (c <= 0x218F)) ||
2997
22.3M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2998
22.3M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2999
22.3M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3000
22.3M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3001
22.3M
         ((c >= 0x10000) && (c <= 0xEFFFF))))
3002
21.7M
         return(1);
3003
650k
    return(0);
3004
22.4M
}
3005
3006
static int
3007
410k
xmlIsNameStartCharOld(int c) {
3008
410k
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3009
410k
        ((IS_LETTER(c) || (c == '_') || (c == ':'))))
3010
300k
        return(1);
3011
110k
    return(0);
3012
410k
}
3013
3014
static int
3015
6.38M
xmlIsNameCharOld(int c) {
3016
6.38M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3017
6.38M
        ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3018
6.38M
         (c == '.') || (c == '-') ||
3019
6.38M
         (c == '_') || (c == ':') ||
3020
6.38M
         (IS_COMBINING(c)) ||
3021
6.38M
         (IS_EXTENDER(c))))
3022
6.08M
        return(1);
3023
309k
    return(0);
3024
6.38M
}
3025
3026
static int
3027
1.48M
xmlIsNameStartChar(int c, int old10) {
3028
1.48M
    if (!old10)
3029
1.06M
        return(xmlIsNameStartCharNew(c));
3030
410k
    else
3031
410k
        return(xmlIsNameStartCharOld(c));
3032
1.48M
}
3033
3034
static int
3035
28.7M
xmlIsNameChar(int c, int old10) {
3036
28.7M
    if (!old10)
3037
22.4M
        return(xmlIsNameCharNew(c));
3038
6.38M
    else
3039
6.38M
        return(xmlIsNameCharOld(c));
3040
28.7M
}
3041
3042
/*
3043
 * Scan an XML Name, NCName or Nmtoken.
3044
 *
3045
 * Returns a pointer to the end of the name on success. If the
3046
 * name is invalid, returns `ptr`. If the name is longer than
3047
 * `maxSize` bytes, returns NULL.
3048
 *
3049
 * @param ptr  pointer to the start of the name
3050
 * @param maxSize  maximum size in bytes
3051
 * @param flags  XML_SCAN_* flags
3052
 * @returns a pointer to the end of the name or NULL
3053
 */
3054
const xmlChar *
3055
814k
xmlScanName(const xmlChar *ptr, size_t maxSize, int flags) {
3056
814k
    int stop = flags & XML_SCAN_NC ? ':' : 0;
3057
814k
    int old10 = flags & XML_SCAN_OLD10 ? 1 : 0;
3058
3059
8.30M
    while (1) {
3060
8.30M
        int c, len;
3061
3062
8.30M
        c = *ptr;
3063
8.30M
        if (c < 0x80) {
3064
3.75M
            if (c == stop)
3065
1.12k
                break;
3066
3.75M
            len = 1;
3067
4.55M
        } else {
3068
4.55M
            len = 4;
3069
4.55M
            c = xmlGetUTF8Char(ptr, &len);
3070
4.55M
            if (c < 0)
3071
2.81k
                break;
3072
4.55M
        }
3073
3074
8.30M
        if (flags & XML_SCAN_NMTOKEN ?
3075
7.48M
                !xmlIsNameChar(c, old10) :
3076
8.30M
                !xmlIsNameStartChar(c, old10))
3077
810k
            break;
3078
3079
7.49M
        if ((size_t) len > maxSize)
3080
106
            return(NULL);
3081
7.49M
        ptr += len;
3082
7.49M
        maxSize -= len;
3083
7.49M
        flags |= XML_SCAN_NMTOKEN;
3084
7.49M
    }
3085
3086
814k
    return(ptr);
3087
814k
}
3088
3089
static const xmlChar *
3090
149k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3091
149k
    const xmlChar *ret;
3092
149k
    int len = 0, l;
3093
149k
    int c;
3094
149k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3095
74.6k
                    XML_MAX_TEXT_LENGTH :
3096
149k
                    XML_MAX_NAME_LENGTH;
3097
149k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3098
3099
    /*
3100
     * Handler for more complex cases
3101
     */
3102
149k
    c = xmlCurrentChar(ctxt, &l);
3103
149k
    if (!xmlIsNameStartChar(c, old10))
3104
99.9k
        return(NULL);
3105
50.0k
    len += l;
3106
50.0k
    NEXTL(l);
3107
50.0k
    c = xmlCurrentChar(ctxt, &l);
3108
3.46M
    while (xmlIsNameChar(c, old10)) {
3109
3.41M
        if (len <= INT_MAX - l)
3110
3.41M
            len += l;
3111
3.41M
        NEXTL(l);
3112
3.41M
        c = xmlCurrentChar(ctxt, &l);
3113
3.41M
    }
3114
50.0k
    if (len > maxLength) {
3115
63
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3116
63
        return(NULL);
3117
63
    }
3118
49.9k
    if (ctxt->input->cur - ctxt->input->base < len) {
3119
        /*
3120
         * There were a couple of bugs where PERefs lead to to a change
3121
         * of the buffer. Check the buffer size to avoid passing an invalid
3122
         * pointer to xmlDictLookup.
3123
         */
3124
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3125
0
                    "unexpected change of input buffer");
3126
0
        return (NULL);
3127
0
    }
3128
49.9k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3129
464
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3130
49.4k
    else
3131
49.4k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3132
49.9k
    if (ret == NULL)
3133
6
        xmlErrMemory(ctxt);
3134
49.9k
    return(ret);
3135
49.9k
}
3136
3137
/**
3138
 * Parse an XML name.
3139
 *
3140
 * @deprecated Internal function, don't use.
3141
 *
3142
 *     [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3143
 *                      CombiningChar | Extender
3144
 *
3145
 *     [5] Name ::= (Letter | '_' | ':') (NameChar)*
3146
 *
3147
 *     [6] Names ::= Name (#x20 Name)*
3148
 *
3149
 * @param ctxt  an XML parser context
3150
 * @returns the Name parsed or NULL
3151
 */
3152
3153
const xmlChar *
3154
1.97M
xmlParseName(xmlParserCtxt *ctxt) {
3155
1.97M
    const xmlChar *in;
3156
1.97M
    const xmlChar *ret;
3157
1.97M
    size_t count = 0;
3158
1.97M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3159
1.26M
                       XML_MAX_TEXT_LENGTH :
3160
1.97M
                       XML_MAX_NAME_LENGTH;
3161
3162
1.97M
    GROW;
3163
3164
    /*
3165
     * Accelerator for simple ASCII names
3166
     */
3167
1.97M
    in = ctxt->input->cur;
3168
1.97M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3169
1.97M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3170
1.97M
  (*in == '_') || (*in == ':')) {
3171
1.86M
  in++;
3172
10.1M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3173
10.1M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3174
10.1M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3175
10.1M
         (*in == '_') || (*in == '-') ||
3176
10.1M
         (*in == ':') || (*in == '.'))
3177
8.30M
      in++;
3178
1.86M
  if ((*in > 0) && (*in < 0x80)) {
3179
1.82M
      count = in - ctxt->input->cur;
3180
1.82M
            if (count > maxLength) {
3181
93
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3182
93
                return(NULL);
3183
93
            }
3184
1.82M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3185
1.82M
      ctxt->input->cur = in;
3186
1.82M
      ctxt->input->col += count;
3187
1.82M
      if (ret == NULL)
3188
11
          xmlErrMemory(ctxt);
3189
1.82M
      return(ret);
3190
1.82M
  }
3191
1.86M
    }
3192
    /* accelerator for special cases */
3193
149k
    return(xmlParseNameComplex(ctxt));
3194
1.97M
}
3195
3196
static xmlHashedString
3197
534k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3198
534k
    xmlHashedString ret;
3199
534k
    int len = 0, l;
3200
534k
    int c;
3201
534k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3202
433k
                    XML_MAX_TEXT_LENGTH :
3203
534k
                    XML_MAX_NAME_LENGTH;
3204
534k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3205
534k
    size_t startPosition = 0;
3206
3207
534k
    ret.name = NULL;
3208
534k
    ret.hashValue = 0;
3209
3210
    /*
3211
     * Handler for more complex cases
3212
     */
3213
534k
    startPosition = CUR_PTR - BASE_PTR;
3214
534k
    c = xmlCurrentChar(ctxt, &l);
3215
534k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3216
534k
  (!xmlIsNameStartChar(c, old10) || (c == ':'))) {
3217
460k
  return(ret);
3218
460k
    }
3219
3220
12.0M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3221
12.0M
     (xmlIsNameChar(c, old10) && (c != ':'))) {
3222
11.9M
        if (len <= INT_MAX - l)
3223
11.9M
      len += l;
3224
11.9M
  NEXTL(l);
3225
11.9M
  c = xmlCurrentChar(ctxt, &l);
3226
11.9M
    }
3227
74.1k
    if (len > maxLength) {
3228
156
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3229
156
        return(ret);
3230
156
    }
3231
73.9k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3232
73.9k
    if (ret.name == NULL)
3233
3
        xmlErrMemory(ctxt);
3234
73.9k
    return(ret);
3235
74.1k
}
3236
3237
/**
3238
 * Parse an XML name.
3239
 *
3240
 *     [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3241
 *                          CombiningChar | Extender
3242
 *
3243
 *     [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3244
 *
3245
 * @param ctxt  an XML parser context
3246
 * @returns the Name parsed or NULL
3247
 */
3248
3249
static xmlHashedString
3250
1.60M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3251
1.60M
    const xmlChar *in, *e;
3252
1.60M
    xmlHashedString ret;
3253
1.60M
    size_t count = 0;
3254
1.60M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3255
1.10M
                       XML_MAX_TEXT_LENGTH :
3256
1.60M
                       XML_MAX_NAME_LENGTH;
3257
3258
1.60M
    ret.name = NULL;
3259
3260
    /*
3261
     * Accelerator for simple ASCII names
3262
     */
3263
1.60M
    in = ctxt->input->cur;
3264
1.60M
    e = ctxt->input->end;
3265
1.60M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3266
1.60M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3267
1.60M
   (*in == '_')) && (in < e)) {
3268
1.12M
  in++;
3269
10.0M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3270
10.0M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3271
10.0M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3272
10.0M
          (*in == '_') || (*in == '-') ||
3273
10.0M
          (*in == '.')) && (in < e))
3274
8.91M
      in++;
3275
1.12M
  if (in >= e)
3276
2.25k
      goto complex;
3277
1.12M
  if ((*in > 0) && (*in < 0x80)) {
3278
1.07M
      count = in - ctxt->input->cur;
3279
1.07M
            if (count > maxLength) {
3280
81
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3281
81
                return(ret);
3282
81
            }
3283
1.07M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3284
1.07M
      ctxt->input->cur = in;
3285
1.07M
      ctxt->input->col += count;
3286
1.07M
      if (ret.name == NULL) {
3287
6
          xmlErrMemory(ctxt);
3288
6
      }
3289
1.07M
      return(ret);
3290
1.07M
  }
3291
1.12M
    }
3292
534k
complex:
3293
534k
    return(xmlParseNCNameComplex(ctxt));
3294
1.60M
}
3295
3296
/**
3297
 * Parse an XML name and compares for match
3298
 * (specialized for endtag parsing)
3299
 *
3300
 * @param ctxt  an XML parser context
3301
 * @param other  the name to compare with
3302
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
3303
 * and the name for mismatch
3304
 */
3305
3306
static const xmlChar *
3307
68.3k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3308
68.3k
    register const xmlChar *cmp = other;
3309
68.3k
    register const xmlChar *in;
3310
68.3k
    const xmlChar *ret;
3311
3312
68.3k
    GROW;
3313
3314
68.3k
    in = ctxt->input->cur;
3315
169k
    while (*in != 0 && *in == *cmp) {
3316
100k
  ++in;
3317
100k
  ++cmp;
3318
100k
    }
3319
68.3k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3320
  /* success */
3321
36.9k
  ctxt->input->col += in - ctxt->input->cur;
3322
36.9k
  ctxt->input->cur = in;
3323
36.9k
  return (const xmlChar*) 1;
3324
36.9k
    }
3325
    /* failure (or end of input buffer), check with full function */
3326
31.4k
    ret = xmlParseName (ctxt);
3327
    /* strings coming from the dictionary direct compare possible */
3328
31.4k
    if (ret == other) {
3329
2.36k
  return (const xmlChar*) 1;
3330
2.36k
    }
3331
29.0k
    return ret;
3332
31.4k
}
3333
3334
/**
3335
 * Parse an XML name.
3336
 *
3337
 * @param ctxt  an XML parser context
3338
 * @param str  a pointer to the string pointer (IN/OUT)
3339
 * @returns the Name parsed or NULL. The `str` pointer
3340
 * is updated to the current location in the string.
3341
 */
3342
3343
static xmlChar *
3344
810k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3345
810k
    xmlChar *ret;
3346
810k
    const xmlChar *cur = *str;
3347
810k
    int flags = 0;
3348
810k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3349
256k
                    XML_MAX_TEXT_LENGTH :
3350
810k
                    XML_MAX_NAME_LENGTH;
3351
3352
810k
    if (ctxt->options & XML_PARSE_OLD10)
3353
255k
        flags |= XML_SCAN_OLD10;
3354
3355
810k
    cur = xmlScanName(*str, maxLength, flags);
3356
810k
    if (cur == NULL) {
3357
106
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3358
106
        return(NULL);
3359
106
    }
3360
810k
    if (cur == *str)
3361
5.36k
        return(NULL);
3362
3363
804k
    ret = xmlStrndup(*str, cur - *str);
3364
804k
    if (ret == NULL)
3365
92
        xmlErrMemory(ctxt);
3366
804k
    *str = cur;
3367
804k
    return(ret);
3368
810k
}
3369
3370
/**
3371
 * Parse an XML Nmtoken.
3372
 *
3373
 * @deprecated Internal function, don't use.
3374
 *
3375
 *     [7] Nmtoken ::= (NameChar)+
3376
 *
3377
 *     [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3378
 *
3379
 * @param ctxt  an XML parser context
3380
 * @returns the Nmtoken parsed or NULL
3381
 */
3382
3383
xmlChar *
3384
42.8k
xmlParseNmtoken(xmlParserCtxt *ctxt) {
3385
42.8k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3386
42.8k
    xmlChar *ret;
3387
42.8k
    int len = 0, l;
3388
42.8k
    int c;
3389
42.8k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3390
19.2k
                    XML_MAX_TEXT_LENGTH :
3391
42.8k
                    XML_MAX_NAME_LENGTH;
3392
42.8k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3393
3394
42.8k
    c = xmlCurrentChar(ctxt, &l);
3395
3396
208k
    while (xmlIsNameChar(c, old10)) {
3397
167k
  COPY_BUF(buf, len, c);
3398
167k
  NEXTL(l);
3399
167k
  c = xmlCurrentChar(ctxt, &l);
3400
167k
  if (len >= XML_MAX_NAMELEN) {
3401
      /*
3402
       * Okay someone managed to make a huge token, so he's ready to pay
3403
       * for the processing speed.
3404
       */
3405
1.27k
      xmlChar *buffer;
3406
1.27k
      int max = len * 2;
3407
3408
1.27k
      buffer = xmlMalloc(max);
3409
1.27k
      if (buffer == NULL) {
3410
5
          xmlErrMemory(ctxt);
3411
5
    return(NULL);
3412
5
      }
3413
1.26k
      memcpy(buffer, buf, len);
3414
5.63M
      while (xmlIsNameChar(c, old10)) {
3415
5.63M
    if (len + 10 > max) {
3416
3.23k
        xmlChar *tmp;
3417
3.23k
                    int newSize;
3418
3419
3.23k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3420
3.23k
                    if (newSize < 0) {
3421
82
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3422
82
                        xmlFree(buffer);
3423
82
                        return(NULL);
3424
82
                    }
3425
3.15k
        tmp = xmlRealloc(buffer, newSize);
3426
3.15k
        if (tmp == NULL) {
3427
5
      xmlErrMemory(ctxt);
3428
5
      xmlFree(buffer);
3429
5
      return(NULL);
3430
5
        }
3431
3.15k
        buffer = tmp;
3432
3.15k
                    max = newSize;
3433
3.15k
    }
3434
5.63M
    COPY_BUF(buffer, len, c);
3435
5.63M
    NEXTL(l);
3436
5.63M
    c = xmlCurrentChar(ctxt, &l);
3437
5.63M
      }
3438
1.18k
      buffer[len] = 0;
3439
1.18k
      return(buffer);
3440
1.26k
  }
3441
167k
    }
3442
41.5k
    if (len == 0)
3443
7.90k
        return(NULL);
3444
33.6k
    if (len > maxLength) {
3445
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3446
0
        return(NULL);
3447
0
    }
3448
33.6k
    ret = xmlStrndup(buf, len);
3449
33.6k
    if (ret == NULL)
3450
17
        xmlErrMemory(ctxt);
3451
33.6k
    return(ret);
3452
33.6k
}
3453
3454
/**
3455
 * Validate an entity value and expand parameter entities.
3456
 *
3457
 * @param ctxt  parser context
3458
 * @param buf  string buffer
3459
 * @param str  entity value
3460
 * @param length  size of entity value
3461
 * @param depth  nesting depth
3462
 */
3463
static void
3464
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3465
79.5k
                          const xmlChar *str, int length, int depth) {
3466
79.5k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3467
79.5k
    const xmlChar *end, *chunk;
3468
79.5k
    int c, l;
3469
3470
79.5k
    if (str == NULL)
3471
19.6k
        return;
3472
3473
59.9k
    depth += 1;
3474
59.9k
    if (depth > maxDepth) {
3475
5
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3476
5
                       "Maximum entity nesting depth exceeded");
3477
5
  return;
3478
5
    }
3479
3480
59.9k
    end = str + length;
3481
59.9k
    chunk = str;
3482
3483
433M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3484
433M
        c = *str;
3485
3486
433M
        if (c >= 0x80) {
3487
362M
            l = xmlUTF8MultibyteLen(ctxt, str,
3488
362M
                    "invalid character in entity value\n");
3489
362M
            if (l == 0) {
3490
28.1M
                if (chunk < str)
3491
62.1k
                    xmlSBufAddString(buf, chunk, str - chunk);
3492
28.1M
                xmlSBufAddReplChar(buf);
3493
28.1M
                str += 1;
3494
28.1M
                chunk = str;
3495
334M
            } else {
3496
334M
                str += l;
3497
334M
            }
3498
362M
        } else if (c == '&') {
3499
53.2k
            if (str[1] == '#') {
3500
15.7k
                if (chunk < str)
3501
7.22k
                    xmlSBufAddString(buf, chunk, str - chunk);
3502
3503
15.7k
                c = xmlParseStringCharRef(ctxt, &str);
3504
15.7k
                if (c == 0)
3505
2.83k
                    return;
3506
3507
12.9k
                xmlSBufAddChar(buf, c);
3508
3509
12.9k
                chunk = str;
3510
37.4k
            } else {
3511
37.4k
                xmlChar *name;
3512
3513
                /*
3514
                 * General entity references are checked for
3515
                 * syntactic validity.
3516
                 */
3517
37.4k
                str++;
3518
37.4k
                name = xmlParseStringName(ctxt, &str);
3519
3520
37.4k
                if ((name == NULL) || (*str++ != ';')) {
3521
1.55k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3522
1.55k
                            "EntityValue: '&' forbidden except for entities "
3523
1.55k
                            "references\n");
3524
1.55k
                    xmlFree(name);
3525
1.55k
                    return;
3526
1.55k
                }
3527
3528
35.9k
                xmlFree(name);
3529
35.9k
            }
3530
70.8M
        } else if (c == '%') {
3531
46.4k
            xmlEntityPtr ent;
3532
3533
46.4k
            if (chunk < str)
3534
7.65k
                xmlSBufAddString(buf, chunk, str - chunk);
3535
3536
46.4k
            ent = xmlParseStringPEReference(ctxt, &str);
3537
46.4k
            if (ent == NULL)
3538
11.5k
                return;
3539
3540
34.8k
            if (!PARSER_EXTERNAL(ctxt)) {
3541
326
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3542
326
                return;
3543
326
            }
3544
3545
34.5k
            if (ent->content == NULL) {
3546
                /*
3547
                 * Note: external parsed entities will not be loaded,
3548
                 * it is not required for a non-validating parser to
3549
                 * complete external PEReferences coming from the
3550
                 * internal subset
3551
                 */
3552
19.9k
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3553
19.9k
                    ((ctxt->replaceEntities) ||
3554
19.9k
                     (ctxt->validate))) {
3555
19.5k
                    xmlLoadEntityContent(ctxt, ent);
3556
19.5k
                } else {
3557
344
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3558
344
                                  "not validating will not read content for "
3559
344
                                  "PE entity %s\n", ent->name, NULL);
3560
344
                }
3561
19.9k
            }
3562
3563
            /*
3564
             * TODO: Skip if ent->content is still NULL.
3565
             */
3566
3567
34.5k
            if (xmlParserEntityCheck(ctxt, ent->length))
3568
11
                return;
3569
3570
34.5k
            if (ent->flags & XML_ENT_EXPANDING) {
3571
171
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3572
171
                return;
3573
171
            }
3574
3575
34.3k
            ent->flags |= XML_ENT_EXPANDING;
3576
34.3k
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3577
34.3k
                                      depth);
3578
34.3k
            ent->flags &= ~XML_ENT_EXPANDING;
3579
3580
34.3k
            chunk = str;
3581
70.7M
        } else {
3582
            /* Normal ASCII char */
3583
70.7M
            if (!IS_BYTE_CHAR(c)) {
3584
7.41M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3585
7.41M
                        "invalid character in entity value\n");
3586
7.41M
                if (chunk < str)
3587
19.0k
                    xmlSBufAddString(buf, chunk, str - chunk);
3588
7.41M
                xmlSBufAddReplChar(buf);
3589
7.41M
                str += 1;
3590
7.41M
                chunk = str;
3591
63.3M
            } else {
3592
63.3M
                str += 1;
3593
63.3M
            }
3594
70.7M
        }
3595
433M
    }
3596
3597
43.4k
    if (chunk < str)
3598
36.0k
        xmlSBufAddString(buf, chunk, str - chunk);
3599
43.4k
}
3600
3601
/**
3602
 * Parse a value for ENTITY declarations
3603
 *
3604
 * @deprecated Internal function, don't use.
3605
 *
3606
 *     [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3607
 *                         "'" ([^%&'] | PEReference | Reference)* "'"
3608
 *
3609
 * @param ctxt  an XML parser context
3610
 * @param orig  if non-NULL store a copy of the original entity value
3611
 * @returns the EntityValue parsed with reference substituted or NULL
3612
 */
3613
xmlChar *
3614
46.1k
xmlParseEntityValue(xmlParserCtxt *ctxt, xmlChar **orig) {
3615
46.1k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3616
21.2k
                         XML_MAX_HUGE_LENGTH :
3617
46.1k
                         XML_MAX_TEXT_LENGTH;
3618
46.1k
    xmlSBuf buf;
3619
46.1k
    const xmlChar *start;
3620
46.1k
    int quote, length;
3621
3622
46.1k
    xmlSBufInit(&buf, maxLength);
3623
3624
46.1k
    GROW;
3625
3626
46.1k
    quote = CUR;
3627
46.1k
    if ((quote != '"') && (quote != '\'')) {
3628
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3629
0
  return(NULL);
3630
0
    }
3631
46.1k
    CUR_PTR++;
3632
3633
46.1k
    length = 0;
3634
3635
    /*
3636
     * Copy raw content of the entity into a buffer
3637
     */
3638
1.02G
    while (1) {
3639
1.02G
        int c;
3640
3641
1.02G
        if (PARSER_STOPPED(ctxt))
3642
18
            goto error;
3643
3644
1.02G
        if (CUR_PTR >= ctxt->input->end) {
3645
786
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3646
786
            goto error;
3647
786
        }
3648
3649
1.02G
        c = CUR;
3650
3651
1.02G
        if (c == 0) {
3652
87
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3653
87
                    "invalid character in entity value\n");
3654
87
            goto error;
3655
87
        }
3656
1.02G
        if (c == quote)
3657
45.2k
            break;
3658
1.02G
        NEXTL(1);
3659
1.02G
        length += 1;
3660
3661
        /*
3662
         * TODO: Check growth threshold
3663
         */
3664
1.02G
        if (ctxt->input->end - CUR_PTR < 10)
3665
87.3k
            GROW;
3666
1.02G
    }
3667
3668
45.2k
    start = CUR_PTR - length;
3669
3670
45.2k
    if (orig != NULL) {
3671
45.2k
        *orig = xmlStrndup(start, length);
3672
45.2k
        if (*orig == NULL)
3673
48
            xmlErrMemory(ctxt);
3674
45.2k
    }
3675
3676
45.2k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3677
3678
45.2k
    NEXTL(1);
3679
3680
45.2k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3681
3682
891
error:
3683
891
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3684
891
    return(NULL);
3685
46.1k
}
3686
3687
/**
3688
 * Check an entity reference in an attribute value for validity
3689
 * without expanding it.
3690
 *
3691
 * @param ctxt  parser context
3692
 * @param pent  entity
3693
 * @param depth  nesting depth
3694
 */
3695
static void
3696
6.08k
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3697
6.08k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3698
6.08k
    const xmlChar *str;
3699
6.08k
    unsigned long expandedSize = pent->length;
3700
6.08k
    int c, flags;
3701
3702
6.08k
    depth += 1;
3703
6.08k
    if (depth > maxDepth) {
3704
3
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3705
3
                       "Maximum entity nesting depth exceeded");
3706
3
  return;
3707
3
    }
3708
3709
6.08k
    if (pent->flags & XML_ENT_EXPANDING) {
3710
28
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3711
28
        return;
3712
28
    }
3713
3714
    /*
3715
     * If we're parsing a default attribute value in DTD content,
3716
     * the entity might reference other entities which weren't
3717
     * defined yet, so the check isn't reliable.
3718
     */
3719
6.05k
    if (ctxt->inSubset == 0)
3720
5.92k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3721
133
    else
3722
133
        flags = XML_ENT_VALIDATED;
3723
3724
6.05k
    str = pent->content;
3725
6.05k
    if (str == NULL)
3726
3
        goto done;
3727
3728
    /*
3729
     * Note that entity values are already validated. We only check
3730
     * for illegal less-than signs and compute the expanded size
3731
     * of the entity. No special handling for multi-byte characters
3732
     * is needed.
3733
     */
3734
91.3M
    while (!PARSER_STOPPED(ctxt)) {
3735
91.3M
        c = *str;
3736
3737
91.3M
  if (c != '&') {
3738
91.2M
            if (c == 0)
3739
5.83k
                break;
3740
3741
91.2M
            if (c == '<')
3742
1.53k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3743
1.53k
                        "'<' in entity '%s' is not allowed in attributes "
3744
1.53k
                        "values\n", pent->name);
3745
3746
91.2M
            str += 1;
3747
91.2M
        } else if (str[1] == '#') {
3748
1.87k
            int val;
3749
3750
1.87k
      val = xmlParseStringCharRef(ctxt, &str);
3751
1.87k
      if (val == 0) {
3752
18
                pent->content[0] = 0;
3753
18
                break;
3754
18
            }
3755
13.2k
  } else {
3756
13.2k
            xmlChar *name;
3757
13.2k
            xmlEntityPtr ent;
3758
3759
13.2k
      name = xmlParseStringEntityRef(ctxt, &str);
3760
13.2k
      if (name == NULL) {
3761
31
                pent->content[0] = 0;
3762
31
                break;
3763
31
            }
3764
3765
13.2k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3766
13.2k
            xmlFree(name);
3767
3768
13.2k
            if ((ent != NULL) &&
3769
13.2k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
3770
10.2k
                if ((ent->flags & flags) != flags) {
3771
4.92k
                    pent->flags |= XML_ENT_EXPANDING;
3772
4.92k
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
3773
4.92k
                    pent->flags &= ~XML_ENT_EXPANDING;
3774
4.92k
                }
3775
3776
10.2k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
3777
10.2k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
3778
10.2k
            }
3779
13.2k
        }
3780
91.3M
    }
3781
3782
6.05k
done:
3783
6.05k
    if (ctxt->inSubset == 0)
3784
5.92k
        pent->expandedSize = expandedSize;
3785
3786
6.05k
    pent->flags |= flags;
3787
6.05k
}
3788
3789
/**
3790
 * Expand general entity references in an entity or attribute value.
3791
 * Perform attribute value normalization.
3792
 *
3793
 * @param ctxt  parser context
3794
 * @param buf  string buffer
3795
 * @param str  entity or attribute value
3796
 * @param pent  entity for entity value, NULL for attribute values
3797
 * @param normalize  whether to collapse whitespace
3798
 * @param inSpace  whitespace state
3799
 * @param depth  nesting depth
3800
 * @param check  whether to check for amplification
3801
 * @returns  whether there was a normalization change
3802
 */
3803
static int
3804
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3805
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
3806
687k
                          int *inSpace, int depth, int check) {
3807
687k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3808
687k
    int c, chunkSize;
3809
687k
    int normChange = 0;
3810
3811
687k
    if (str == NULL)
3812
36
        return(0);
3813
3814
687k
    depth += 1;
3815
687k
    if (depth > maxDepth) {
3816
3
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3817
3
                       "Maximum entity nesting depth exceeded");
3818
3
  return(0);
3819
3
    }
3820
3821
687k
    if (pent != NULL) {
3822
687k
        if (pent->flags & XML_ENT_EXPANDING) {
3823
11
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3824
11
            return(0);
3825
11
        }
3826
3827
687k
        if (check) {
3828
687k
            if (xmlParserEntityCheck(ctxt, pent->length))
3829
357
                return(0);
3830
687k
        }
3831
687k
    }
3832
3833
687k
    chunkSize = 0;
3834
3835
    /*
3836
     * Note that entity values are already validated. No special
3837
     * handling for multi-byte characters is needed.
3838
     */
3839
4.10G
    while (!PARSER_STOPPED(ctxt)) {
3840
4.10G
        c = *str;
3841
3842
4.10G
  if (c != '&') {
3843
4.10G
            if (c == 0)
3844
599k
                break;
3845
3846
            /*
3847
             * If this function is called without an entity, it is used to
3848
             * expand entities in an attribute content where less-than was
3849
             * already unscaped and is allowed.
3850
             */
3851
4.10G
            if ((pent != NULL) && (c == '<')) {
3852
87.0k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3853
87.0k
                        "'<' in entity '%s' is not allowed in attributes "
3854
87.0k
                        "values\n", pent->name);
3855
87.0k
                break;
3856
87.0k
            }
3857
3858
4.10G
            if (c <= 0x20) {
3859
13.5M
                if ((normalize) && (*inSpace)) {
3860
                    /* Skip char */
3861
835k
                    if (chunkSize > 0) {
3862
149k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3863
149k
                        chunkSize = 0;
3864
149k
                    }
3865
835k
                    normChange = 1;
3866
12.7M
                } else if (c < 0x20) {
3867
11.7M
                    if (chunkSize > 0) {
3868
98.3k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3869
98.3k
                        chunkSize = 0;
3870
98.3k
                    }
3871
3872
11.7M
                    xmlSBufAddCString(buf, " ", 1);
3873
11.7M
                } else {
3874
968k
                    chunkSize += 1;
3875
968k
                }
3876
3877
13.5M
                *inSpace = 1;
3878
4.09G
            } else {
3879
4.09G
                chunkSize += 1;
3880
4.09G
                *inSpace = 0;
3881
4.09G
            }
3882
3883
4.10G
            str += 1;
3884
4.10G
        } else if (str[1] == '#') {
3885
309k
            int val;
3886
3887
309k
            if (chunkSize > 0) {
3888
306k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3889
306k
                chunkSize = 0;
3890
306k
            }
3891
3892
309k
      val = xmlParseStringCharRef(ctxt, &str);
3893
309k
      if (val == 0) {
3894
17
                if (pent != NULL)
3895
17
                    pent->content[0] = 0;
3896
17
                break;
3897
17
            }
3898
3899
309k
            if (val == ' ') {
3900
4.43k
                if ((normalize) && (*inSpace))
3901
221
                    normChange = 1;
3902
4.21k
                else
3903
4.21k
                    xmlSBufAddCString(buf, " ", 1);
3904
4.43k
                *inSpace = 1;
3905
305k
            } else {
3906
305k
                xmlSBufAddChar(buf, val);
3907
305k
                *inSpace = 0;
3908
305k
            }
3909
713k
  } else {
3910
713k
            xmlChar *name;
3911
713k
            xmlEntityPtr ent;
3912
3913
713k
            if (chunkSize > 0) {
3914
186k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3915
186k
                chunkSize = 0;
3916
186k
            }
3917
3918
713k
      name = xmlParseStringEntityRef(ctxt, &str);
3919
713k
            if (name == NULL) {
3920
43
                if (pent != NULL)
3921
43
                    pent->content[0] = 0;
3922
43
                break;
3923
43
            }
3924
3925
713k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3926
713k
            xmlFree(name);
3927
3928
713k
      if ((ent != NULL) &&
3929
713k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3930
164k
    if (ent->content == NULL) {
3931
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
3932
0
          "predefined entity has no content\n");
3933
0
                    break;
3934
0
                }
3935
3936
164k
                xmlSBufAddString(buf, ent->content, ent->length);
3937
3938
164k
                *inSpace = 0;
3939
548k
      } else if ((ent != NULL) && (ent->content != NULL)) {
3940
397k
                if (pent != NULL)
3941
397k
                    pent->flags |= XML_ENT_EXPANDING;
3942
397k
    normChange |= xmlExpandEntityInAttValue(ctxt, buf,
3943
397k
                        ent->content, ent, normalize, inSpace, depth, check);
3944
397k
                if (pent != NULL)
3945
397k
                    pent->flags &= ~XML_ENT_EXPANDING;
3946
397k
      }
3947
713k
        }
3948
4.10G
    }
3949
3950
687k
    if (chunkSize > 0)
3951
242k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3952
3953
687k
    return(normChange);
3954
687k
}
3955
3956
/**
3957
 * Expand general entity references in an entity or attribute value.
3958
 * Perform attribute value normalization.
3959
 *
3960
 * @param ctxt  parser context
3961
 * @param str  entity or attribute value
3962
 * @param normalize  whether to collapse whitespace
3963
 * @returns the expanded attribtue value.
3964
 */
3965
xmlChar *
3966
xmlExpandEntitiesInAttValue(xmlParserCtxt *ctxt, const xmlChar *str,
3967
0
                            int normalize) {
3968
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3969
0
                         XML_MAX_HUGE_LENGTH :
3970
0
                         XML_MAX_TEXT_LENGTH;
3971
0
    xmlSBuf buf;
3972
0
    int inSpace = 1;
3973
3974
0
    xmlSBufInit(&buf, maxLength);
3975
3976
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
3977
0
                              ctxt->inputNr, /* check */ 0);
3978
3979
0
    if ((normalize) && (inSpace) && (buf.size > 0))
3980
0
        buf.size--;
3981
3982
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
3983
0
}
3984
3985
/**
3986
 * Parse a value for an attribute.
3987
 *
3988
 * NOTE: if no normalization is needed, the routine will return pointers
3989
 * directly from the data buffer.
3990
 *
3991
 * 3.3.3 Attribute-Value Normalization:
3992
 *
3993
 * Before the value of an attribute is passed to the application or
3994
 * checked for validity, the XML processor must normalize it as follows:
3995
 *
3996
 * - a character reference is processed by appending the referenced
3997
 *   character to the attribute value
3998
 * - an entity reference is processed by recursively processing the
3999
 *   replacement text of the entity
4000
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4001
 *   appending \#x20 to the normalized value, except that only a single
4002
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4003
 *   parsed entity or the literal entity value of an internal parsed entity
4004
 * - other characters are processed by appending them to the normalized value
4005
 *
4006
 * If the declared value is not CDATA, then the XML processor must further
4007
 * process the normalized attribute value by discarding any leading and
4008
 * trailing space (\#x20) characters, and by replacing sequences of space
4009
 * (\#x20) characters by a single space (\#x20) character.
4010
 * All attributes for which no declaration has been read should be treated
4011
 * by a non-validating parser as if declared CDATA.
4012
 *
4013
 * @param ctxt  an XML parser context
4014
 * @param attlen  attribute len result
4015
 * @param outFlags  resulting XML_ATTVAL_* flags
4016
 * @param special  value from attsSpecial
4017
 * @param isNamespace  whether this is a namespace declaration
4018
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4019
 *     caller if it was copied, this can be detected by val[*len] == 0.
4020
 */
4021
static xmlChar *
4022
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *outFlags,
4023
331k
                         int special, int isNamespace) {
4024
331k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4025
174k
                         XML_MAX_HUGE_LENGTH :
4026
331k
                         XML_MAX_TEXT_LENGTH;
4027
331k
    xmlSBuf buf;
4028
331k
    xmlChar *ret;
4029
331k
    int c, l, quote, entFlags, chunkSize;
4030
331k
    int inSpace = 1;
4031
331k
    int replaceEntities;
4032
331k
    int normalize = (special & XML_SPECIAL_TYPE_MASK) != 0;
4033
331k
    int attvalFlags = 0;
4034
4035
    /* Always expand namespace URIs */
4036
331k
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4037
4038
331k
    xmlSBufInit(&buf, maxLength);
4039
4040
331k
    GROW;
4041
4042
331k
    quote = CUR;
4043
331k
    if ((quote != '"') && (quote != '\'')) {
4044
6.90k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4045
6.90k
  return(NULL);
4046
6.90k
    }
4047
324k
    NEXTL(1);
4048
4049
324k
    if (ctxt->inSubset == 0)
4050
269k
        entFlags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4051
55.1k
    else
4052
55.1k
        entFlags = XML_ENT_VALIDATED;
4053
4054
324k
    inSpace = 1;
4055
324k
    chunkSize = 0;
4056
4057
130M
    while (1) {
4058
130M
        if (PARSER_STOPPED(ctxt))
4059
540
            goto error;
4060
4061
130M
        if (CUR_PTR >= ctxt->input->end) {
4062
8.69k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4063
8.69k
                           "AttValue: ' expected\n");
4064
8.69k
            goto error;
4065
8.69k
        }
4066
4067
        /*
4068
         * TODO: Check growth threshold
4069
         */
4070
130M
        if (ctxt->input->end - CUR_PTR < 10)
4071
130k
            GROW;
4072
4073
130M
        c = CUR;
4074
4075
130M
        if (c >= 0x80) {
4076
79.4M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4077
79.4M
                    "invalid character in attribute value\n");
4078
79.4M
            if (l == 0) {
4079
33.9M
                if (chunkSize > 0) {
4080
82.5k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4081
82.5k
                    chunkSize = 0;
4082
82.5k
                }
4083
33.9M
                xmlSBufAddReplChar(&buf);
4084
33.9M
                NEXTL(1);
4085
45.4M
            } else {
4086
45.4M
                chunkSize += l;
4087
45.4M
                NEXTL(l);
4088
45.4M
            }
4089
4090
79.4M
            inSpace = 0;
4091
79.4M
        } else if (c != '&') {
4092
50.0M
            if (c > 0x20) {
4093
8.91M
                if (c == quote)
4094
312k
                    break;
4095
4096
8.60M
                if (c == '<')
4097
142k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4098
4099
8.60M
                chunkSize += 1;
4100
8.60M
                inSpace = 0;
4101
41.0M
            } else if (!IS_BYTE_CHAR(c)) {
4102
37.3M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4103
37.3M
                        "invalid character in attribute value\n");
4104
37.3M
                if (chunkSize > 0) {
4105
62.7k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4106
62.7k
                    chunkSize = 0;
4107
62.7k
                }
4108
37.3M
                xmlSBufAddReplChar(&buf);
4109
37.3M
                inSpace = 0;
4110
37.3M
            } else {
4111
                /* Whitespace */
4112
3.75M
                if ((normalize) && (inSpace)) {
4113
                    /* Skip char */
4114
923k
                    if (chunkSize > 0) {
4115
5.54k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4116
5.54k
                        chunkSize = 0;
4117
5.54k
                    }
4118
923k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4119
2.83M
                } else if (c < 0x20) {
4120
                    /* Convert to space */
4121
2.65M
                    if (chunkSize > 0) {
4122
44.2k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4123
44.2k
                        chunkSize = 0;
4124
44.2k
                    }
4125
4126
2.65M
                    xmlSBufAddCString(&buf, " ", 1);
4127
2.65M
                } else {
4128
173k
                    chunkSize += 1;
4129
173k
                }
4130
4131
3.75M
                inSpace = 1;
4132
4133
3.75M
                if ((c == 0xD) && (NXT(1) == 0xA))
4134
2.23k
                    CUR_PTR++;
4135
3.75M
            }
4136
4137
49.7M
            NEXTL(1);
4138
49.7M
        } else if (NXT(1) == '#') {
4139
25.3k
            int val;
4140
4141
25.3k
            if (chunkSize > 0) {
4142
10.4k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4143
10.4k
                chunkSize = 0;
4144
10.4k
            }
4145
4146
25.3k
            val = xmlParseCharRef(ctxt);
4147
25.3k
            if (val == 0)
4148
2.93k
                goto error;
4149
4150
22.4k
            if ((val == '&') && (!replaceEntities)) {
4151
                /*
4152
                 * The reparsing will be done in xmlNodeParseContent()
4153
                 * called from SAX2.c
4154
                 */
4155
3.11k
                xmlSBufAddCString(&buf, "&#38;", 5);
4156
3.11k
                inSpace = 0;
4157
19.3k
            } else if (val == ' ') {
4158
4.63k
                if ((normalize) && (inSpace))
4159
237
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4160
4.40k
                else
4161
4.40k
                    xmlSBufAddCString(&buf, " ", 1);
4162
4.63k
                inSpace = 1;
4163
14.7k
            } else {
4164
14.7k
                xmlSBufAddChar(&buf, val);
4165
14.7k
                inSpace = 0;
4166
14.7k
            }
4167
1.12M
        } else {
4168
1.12M
            const xmlChar *name;
4169
1.12M
            xmlEntityPtr ent;
4170
4171
1.12M
            if (chunkSize > 0) {
4172
83.4k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4173
83.4k
                chunkSize = 0;
4174
83.4k
            }
4175
4176
1.12M
            name = xmlParseEntityRefInternal(ctxt);
4177
1.12M
            if (name == NULL) {
4178
                /*
4179
                 * Probably a literal '&' which wasn't escaped.
4180
                 * TODO: Handle gracefully in recovery mode.
4181
                 */
4182
50.6k
                continue;
4183
50.6k
            }
4184
4185
1.07M
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4186
1.07M
            if (ent == NULL)
4187
88.9k
                continue;
4188
4189
982k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4190
7.52k
                if ((ent->content[0] == '&') && (!replaceEntities))
4191
1.72k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4192
5.79k
                else
4193
5.79k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4194
7.52k
                inSpace = 0;
4195
974k
            } else if (replaceEntities) {
4196
289k
                if (xmlExpandEntityInAttValue(ctxt, &buf,
4197
289k
                        ent->content, ent, normalize, &inSpace, ctxt->inputNr,
4198
289k
                        /* check */ 1) > 0)
4199
51.0k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4200
684k
            } else {
4201
684k
                if ((ent->flags & entFlags) != entFlags)
4202
1.16k
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4203
4204
684k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4205
167
                    ent->content[0] = 0;
4206
167
                    goto error;
4207
167
                }
4208
4209
                /*
4210
                 * Just output the reference
4211
                 */
4212
684k
                xmlSBufAddCString(&buf, "&", 1);
4213
684k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4214
684k
                xmlSBufAddCString(&buf, ";", 1);
4215
4216
684k
                inSpace = 0;
4217
684k
            }
4218
982k
  }
4219
130M
    }
4220
4221
312k
    if ((buf.mem == NULL) && (outFlags != NULL)) {
4222
201k
        ret = (xmlChar *) CUR_PTR - chunkSize;
4223
4224
201k
        if (attlen != NULL)
4225
201k
            *attlen = chunkSize;
4226
201k
        if ((normalize) && (inSpace) && (chunkSize > 0)) {
4227
275
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4228
275
            *attlen -= 1;
4229
275
        }
4230
4231
        /* Report potential error */
4232
201k
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4233
201k
    } else {
4234
110k
        if (chunkSize > 0)
4235
82.1k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4236
4237
110k
        if ((normalize) && (inSpace) && (buf.size > 0)) {
4238
2.20k
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4239
2.20k
            buf.size--;
4240
2.20k
        }
4241
4242
110k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4243
110k
        attvalFlags |= XML_ATTVAL_ALLOC;
4244
4245
110k
        if (ret != NULL) {
4246
110k
            if (attlen != NULL)
4247
62.2k
                *attlen = buf.size;
4248
110k
        }
4249
110k
    }
4250
4251
312k
    if (outFlags != NULL)
4252
264k
        *outFlags = attvalFlags;
4253
4254
312k
    NEXTL(1);
4255
4256
312k
    return(ret);
4257
4258
12.3k
error:
4259
12.3k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4260
12.3k
    return(NULL);
4261
324k
}
4262
4263
/**
4264
 * Parse a value for an attribute
4265
 * Note: the parser won't do substitution of entities here, this
4266
 * will be handled later in #xmlStringGetNodeList
4267
 *
4268
 * @deprecated Internal function, don't use.
4269
 *
4270
 *     [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4271
 *                       "'" ([^<&'] | Reference)* "'"
4272
 *
4273
 * 3.3.3 Attribute-Value Normalization:
4274
 *
4275
 * Before the value of an attribute is passed to the application or
4276
 * checked for validity, the XML processor must normalize it as follows:
4277
 *
4278
 * - a character reference is processed by appending the referenced
4279
 *   character to the attribute value
4280
 * - an entity reference is processed by recursively processing the
4281
 *   replacement text of the entity
4282
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4283
 *   appending \#x20 to the normalized value, except that only a single
4284
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4285
 *   parsed entity or the literal entity value of an internal parsed entity
4286
 * - other characters are processed by appending them to the normalized value
4287
 *
4288
 * If the declared value is not CDATA, then the XML processor must further
4289
 * process the normalized attribute value by discarding any leading and
4290
 * trailing space (\#x20) characters, and by replacing sequences of space
4291
 * (\#x20) characters by a single space (\#x20) character.
4292
 * All attributes for which no declaration has been read should be treated
4293
 * by a non-validating parser as if declared CDATA.
4294
 *
4295
 * @param ctxt  an XML parser context
4296
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4297
 * caller.
4298
 */
4299
xmlChar *
4300
56.1k
xmlParseAttValue(xmlParserCtxt *ctxt) {
4301
56.1k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4302
56.1k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4303
56.1k
}
4304
4305
/**
4306
 * Parse an XML Literal
4307
 *
4308
 * @deprecated Internal function, don't use.
4309
 *
4310
 *     [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4311
 *
4312
 * @param ctxt  an XML parser context
4313
 * @returns the SystemLiteral parsed or NULL
4314
 */
4315
4316
xmlChar *
4317
29.3k
xmlParseSystemLiteral(xmlParserCtxt *ctxt) {
4318
29.3k
    xmlChar *buf = NULL;
4319
29.3k
    int len = 0;
4320
29.3k
    int size = XML_PARSER_BUFFER_SIZE;
4321
29.3k
    int cur, l;
4322
29.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4323
12.1k
                    XML_MAX_TEXT_LENGTH :
4324
29.3k
                    XML_MAX_NAME_LENGTH;
4325
29.3k
    xmlChar stop;
4326
4327
29.3k
    if (RAW == '"') {
4328
14.6k
        NEXT;
4329
14.6k
  stop = '"';
4330
14.6k
    } else if (RAW == '\'') {
4331
12.0k
        NEXT;
4332
12.0k
  stop = '\'';
4333
12.0k
    } else {
4334
2.59k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4335
2.59k
  return(NULL);
4336
2.59k
    }
4337
4338
26.7k
    buf = xmlMalloc(size);
4339
26.7k
    if (buf == NULL) {
4340
23
        xmlErrMemory(ctxt);
4341
23
  return(NULL);
4342
23
    }
4343
26.6k
    cur = xmlCurrentCharRecover(ctxt, &l);
4344
12.9M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4345
12.8M
  if (len + 5 >= size) {
4346
6.09k
      xmlChar *tmp;
4347
6.09k
            int newSize;
4348
4349
6.09k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4350
6.09k
            if (newSize < 0) {
4351
3
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4352
3
                xmlFree(buf);
4353
3
                return(NULL);
4354
3
            }
4355
6.09k
      tmp = xmlRealloc(buf, newSize);
4356
6.09k
      if (tmp == NULL) {
4357
9
          xmlFree(buf);
4358
9
    xmlErrMemory(ctxt);
4359
9
    return(NULL);
4360
9
      }
4361
6.08k
      buf = tmp;
4362
6.08k
            size = newSize;
4363
6.08k
  }
4364
12.8M
  COPY_BUF(buf, len, cur);
4365
12.8M
  NEXTL(l);
4366
12.8M
  cur = xmlCurrentCharRecover(ctxt, &l);
4367
12.8M
    }
4368
26.6k
    buf[len] = 0;
4369
26.6k
    if (!IS_CHAR(cur)) {
4370
1.22k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4371
25.4k
    } else {
4372
25.4k
  NEXT;
4373
25.4k
    }
4374
26.6k
    return(buf);
4375
26.6k
}
4376
4377
/**
4378
 * Parse an XML public literal
4379
 *
4380
 * @deprecated Internal function, don't use.
4381
 *
4382
 *     [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4383
 *
4384
 * @param ctxt  an XML parser context
4385
 * @returns the PubidLiteral parsed or NULL.
4386
 */
4387
4388
xmlChar *
4389
9.24k
xmlParsePubidLiteral(xmlParserCtxt *ctxt) {
4390
9.24k
    xmlChar *buf = NULL;
4391
9.24k
    int len = 0;
4392
9.24k
    int size = XML_PARSER_BUFFER_SIZE;
4393
9.24k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4394
4.42k
                    XML_MAX_TEXT_LENGTH :
4395
9.24k
                    XML_MAX_NAME_LENGTH;
4396
9.24k
    xmlChar cur;
4397
9.24k
    xmlChar stop;
4398
4399
9.24k
    if (RAW == '"') {
4400
5.61k
        NEXT;
4401
5.61k
  stop = '"';
4402
5.61k
    } else if (RAW == '\'') {
4403
3.36k
        NEXT;
4404
3.36k
  stop = '\'';
4405
3.36k
    } else {
4406
261
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4407
261
  return(NULL);
4408
261
    }
4409
8.98k
    buf = xmlMalloc(size);
4410
8.98k
    if (buf == NULL) {
4411
18
  xmlErrMemory(ctxt);
4412
18
  return(NULL);
4413
18
    }
4414
8.96k
    cur = CUR;
4415
777k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4416
777k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4417
768k
  if (len + 1 >= size) {
4418
1.27k
      xmlChar *tmp;
4419
1.27k
            int newSize;
4420
4421
1.27k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4422
1.27k
            if (newSize < 0) {
4423
3
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4424
3
                xmlFree(buf);
4425
3
                return(NULL);
4426
3
            }
4427
1.27k
      tmp = xmlRealloc(buf, newSize);
4428
1.27k
      if (tmp == NULL) {
4429
5
    xmlErrMemory(ctxt);
4430
5
    xmlFree(buf);
4431
5
    return(NULL);
4432
5
      }
4433
1.27k
      buf = tmp;
4434
1.27k
            size = newSize;
4435
1.27k
  }
4436
768k
  buf[len++] = cur;
4437
768k
  NEXT;
4438
768k
  cur = CUR;
4439
768k
    }
4440
8.95k
    buf[len] = 0;
4441
8.95k
    if (cur != stop) {
4442
1.22k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4443
7.73k
    } else {
4444
7.73k
  NEXTL(1);
4445
7.73k
    }
4446
8.95k
    return(buf);
4447
8.96k
}
4448
4449
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4450
4451
/*
4452
 * used for the test in the inner loop of the char data testing
4453
 */
4454
static const unsigned char test_char_data[256] = {
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4457
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4458
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4459
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4460
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4461
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4462
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4463
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4464
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4465
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4466
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4467
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4468
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4469
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4470
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4471
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4472
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4474
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4475
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4476
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4477
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4478
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4479
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4482
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4483
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4484
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4485
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4486
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4487
};
4488
4489
static void
4490
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4491
1.38M
              int isBlank) {
4492
1.38M
    int checkBlanks;
4493
4494
1.38M
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4495
64.5k
        return;
4496
4497
1.31M
    checkBlanks = (!ctxt->keepBlanks) ||
4498
1.31M
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4499
4500
    /*
4501
     * Calling areBlanks with only parts of a text node
4502
     * is fundamentally broken, making the NOBLANKS option
4503
     * essentially unusable.
4504
     */
4505
1.31M
    if ((checkBlanks) &&
4506
1.31M
        (areBlanks(ctxt, buf, size, isBlank))) {
4507
14.7k
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4508
14.7k
            (ctxt->keepBlanks))
4509
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4510
1.30M
    } else {
4511
1.30M
        if (ctxt->sax->characters != NULL)
4512
1.30M
            ctxt->sax->characters(ctxt->userData, buf, size);
4513
4514
        /*
4515
         * The old code used to update this value for "complex" data
4516
         * even if checkBlanks was false. This was probably a bug.
4517
         */
4518
1.30M
        if ((checkBlanks) && (*ctxt->space == -1))
4519
65.0k
            *ctxt->space = -2;
4520
1.30M
    }
4521
1.31M
}
4522
4523
/**
4524
 * Parse character data. Always makes progress if the first char isn't
4525
 * '<' or '&'.
4526
 *
4527
 * The right angle bracket (>) may be represented using the string "&gt;",
4528
 * and must, for compatibility, be escaped using "&gt;" or a character
4529
 * reference when it appears in the string "]]>" in content, when that
4530
 * string is not marking the end of a CDATA section.
4531
 *
4532
 *     [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4533
 * @param ctxt  an XML parser context
4534
 * @param partial  buffer may contain partial UTF-8 sequences
4535
 */
4536
static void
4537
3.43M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4538
3.43M
    const xmlChar *in;
4539
3.43M
    int line = ctxt->input->line;
4540
3.43M
    int col = ctxt->input->col;
4541
3.43M
    int ccol;
4542
3.43M
    int terminate = 0;
4543
4544
3.43M
    GROW;
4545
    /*
4546
     * Accelerated common case where input don't need to be
4547
     * modified before passing it to the handler.
4548
     */
4549
3.43M
    in = ctxt->input->cur;
4550
3.46M
    do {
4551
3.53M
get_more_space:
4552
3.64M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4553
3.53M
        if (*in == 0xA) {
4554
7.24M
            do {
4555
7.24M
                ctxt->input->line++; ctxt->input->col = 1;
4556
7.24M
                in++;
4557
7.24M
            } while (*in == 0xA);
4558
74.7k
            goto get_more_space;
4559
74.7k
        }
4560
3.46M
        if (*in == '<') {
4561
120k
            while (in > ctxt->input->cur) {
4562
60.3k
                const xmlChar *tmp = ctxt->input->cur;
4563
60.3k
                size_t nbchar = in - tmp;
4564
4565
60.3k
                if (nbchar > XML_MAX_ITEMS)
4566
0
                    nbchar = XML_MAX_ITEMS;
4567
60.3k
                ctxt->input->cur += nbchar;
4568
4569
60.3k
                xmlCharacters(ctxt, tmp, nbchar, 1);
4570
60.3k
            }
4571
60.3k
            return;
4572
60.3k
        }
4573
4574
5.46M
get_more:
4575
5.46M
        ccol = ctxt->input->col;
4576
11.4M
        while (test_char_data[*in]) {
4577
5.98M
            in++;
4578
5.98M
            ccol++;
4579
5.98M
        }
4580
5.46M
        ctxt->input->col = ccol;
4581
5.46M
        if (*in == 0xA) {
4582
4.37M
            do {
4583
4.37M
                ctxt->input->line++; ctxt->input->col = 1;
4584
4.37M
                in++;
4585
4.37M
            } while (*in == 0xA);
4586
80.8k
            goto get_more;
4587
80.8k
        }
4588
5.38M
        if (*in == ']') {
4589
1.98M
            size_t avail = ctxt->input->end - in;
4590
4591
1.98M
            if (partial && avail < 2) {
4592
363
                terminate = 1;
4593
363
                goto invoke_callback;
4594
363
            }
4595
1.98M
            if (in[1] == ']') {
4596
1.70M
                if (partial && avail < 3) {
4597
1.86k
                    terminate = 1;
4598
1.86k
                    goto invoke_callback;
4599
1.86k
                }
4600
1.70M
                if (in[2] == '>')
4601
5.31k
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4602
1.70M
            }
4603
4604
1.98M
            in++;
4605
1.98M
            ctxt->input->col++;
4606
1.98M
            goto get_more;
4607
1.98M
        }
4608
4609
3.40M
invoke_callback:
4610
3.84M
        while (in > ctxt->input->cur) {
4611
441k
            const xmlChar *tmp = ctxt->input->cur;
4612
441k
            size_t nbchar = in - tmp;
4613
4614
441k
            if (nbchar > XML_MAX_ITEMS)
4615
0
                nbchar = XML_MAX_ITEMS;
4616
441k
            ctxt->input->cur += nbchar;
4617
4618
441k
            xmlCharacters(ctxt, tmp, nbchar, 0);
4619
4620
441k
            line = ctxt->input->line;
4621
441k
            col = ctxt->input->col;
4622
441k
        }
4623
3.40M
        ctxt->input->cur = in;
4624
3.40M
        if (*in == 0xD) {
4625
26.4k
            in++;
4626
26.4k
            if (*in == 0xA) {
4627
20.2k
                ctxt->input->cur = in;
4628
20.2k
                in++;
4629
20.2k
                ctxt->input->line++; ctxt->input->col = 1;
4630
20.2k
                continue; /* while */
4631
20.2k
            }
4632
6.21k
            in--;
4633
6.21k
        }
4634
3.37M
        if (*in == '<') {
4635
191k
            return;
4636
191k
        }
4637
3.18M
        if (*in == '&') {
4638
58.9k
            return;
4639
58.9k
        }
4640
3.12M
        if (terminate) {
4641
2.23k
            return;
4642
2.23k
        }
4643
3.12M
        SHRINK;
4644
3.12M
        GROW;
4645
3.12M
        in = ctxt->input->cur;
4646
3.14M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4647
3.14M
             (*in == 0x09) || (*in == 0x0a));
4648
3.12M
    ctxt->input->line = line;
4649
3.12M
    ctxt->input->col = col;
4650
3.12M
    xmlParseCharDataComplex(ctxt, partial);
4651
3.12M
}
4652
4653
/**
4654
 * Always makes progress if the first char isn't '<' or '&'.
4655
 *
4656
 * parse a CharData section.this is the fallback function
4657
 * of #xmlParseCharData when the parsing requires handling
4658
 * of non-ASCII characters.
4659
 *
4660
 * @param ctxt  an XML parser context
4661
 * @param partial  whether the input can end with truncated UTF-8
4662
 */
4663
static void
4664
3.12M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4665
3.12M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4666
3.12M
    int nbchar = 0;
4667
3.12M
    int cur, l;
4668
4669
3.12M
    cur = xmlCurrentCharRecover(ctxt, &l);
4670
71.5M
    while ((cur != '<') && /* checked */
4671
71.5M
           (cur != '&') &&
4672
71.5M
     (IS_CHAR(cur))) {
4673
68.4M
        if (cur == ']') {
4674
2.38M
            size_t avail = ctxt->input->end - ctxt->input->cur;
4675
4676
2.38M
            if (partial && avail < 2)
4677
139
                break;
4678
2.38M
            if (NXT(1) == ']') {
4679
2.13M
                if (partial && avail < 3)
4680
742
                    break;
4681
2.13M
                if (NXT(2) == '>')
4682
1.82k
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4683
2.13M
            }
4684
2.38M
        }
4685
4686
68.4M
  COPY_BUF(buf, nbchar, cur);
4687
  /* move current position before possible calling of ctxt->sax->characters */
4688
68.4M
  NEXTL(l);
4689
68.4M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4690
580k
      buf[nbchar] = 0;
4691
4692
580k
            xmlCharacters(ctxt, buf, nbchar, 0);
4693
580k
      nbchar = 0;
4694
580k
            SHRINK;
4695
580k
  }
4696
68.4M
  cur = xmlCurrentCharRecover(ctxt, &l);
4697
68.4M
    }
4698
3.12M
    if (nbchar != 0) {
4699
299k
        buf[nbchar] = 0;
4700
4701
299k
        xmlCharacters(ctxt, buf, nbchar, 0);
4702
299k
    }
4703
    /*
4704
     * cur == 0 can mean
4705
     *
4706
     * - End of buffer.
4707
     * - An actual 0 character.
4708
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4709
     */
4710
3.12M
    if (ctxt->input->cur < ctxt->input->end) {
4711
3.11M
        if ((cur == 0) && (CUR != 0)) {
4712
1.52k
            if (partial == 0) {
4713
1.31k
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4714
1.31k
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4715
1.31k
                NEXTL(1);
4716
1.31k
            }
4717
3.11M
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4718
            /* Generate the error and skip the offending character */
4719
2.99M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4720
2.99M
                              "PCDATA invalid Char value %d\n", cur);
4721
2.99M
            NEXTL(l);
4722
2.99M
        }
4723
3.11M
    }
4724
3.12M
}
4725
4726
/**
4727
 * @deprecated Internal function, don't use.
4728
 * @param ctxt  an XML parser context
4729
 * @param cdata  unused
4730
 */
4731
void
4732
0
xmlParseCharData(xmlParserCtxt *ctxt, ATTRIBUTE_UNUSED int cdata) {
4733
0
    xmlParseCharDataInternal(ctxt, 0);
4734
0
}
4735
4736
/**
4737
 * Parse an External ID or a Public ID
4738
 *
4739
 * @deprecated Internal function, don't use.
4740
 *
4741
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4742
 * `'PUBLIC' S PubidLiteral S SystemLiteral`
4743
 *
4744
 *     [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4745
 *                       | 'PUBLIC' S PubidLiteral S SystemLiteral
4746
 *
4747
 *     [83] PublicID ::= 'PUBLIC' S PubidLiteral
4748
 *
4749
 * @param ctxt  an XML parser context
4750
 * @param publicId  a xmlChar** receiving PubidLiteral
4751
 * @param strict  indicate whether we should restrict parsing to only
4752
 *          production [75], see NOTE below
4753
 * @returns the function returns SystemLiteral and in the second
4754
 *                case publicID receives PubidLiteral, is strict is off
4755
 *                it is possible to return NULL and have publicID set.
4756
 */
4757
4758
xmlChar *
4759
56.1k
xmlParseExternalID(xmlParserCtxt *ctxt, xmlChar **publicId, int strict) {
4760
56.1k
    xmlChar *URI = NULL;
4761
4762
56.1k
    *publicId = NULL;
4763
56.1k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4764
22.3k
        SKIP(6);
4765
22.3k
  if (SKIP_BLANKS == 0) {
4766
823
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4767
823
                     "Space required after 'SYSTEM'\n");
4768
823
  }
4769
22.3k
  URI = xmlParseSystemLiteral(ctxt);
4770
22.3k
  if (URI == NULL) {
4771
760
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4772
760
        }
4773
33.7k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4774
9.24k
        SKIP(6);
4775
9.24k
  if (SKIP_BLANKS == 0) {
4776
782
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4777
782
        "Space required after 'PUBLIC'\n");
4778
782
  }
4779
9.24k
  *publicId = xmlParsePubidLiteral(ctxt);
4780
9.24k
  if (*publicId == NULL) {
4781
287
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4782
287
  }
4783
9.24k
  if (strict) {
4784
      /*
4785
       * We don't handle [83] so "S SystemLiteral" is required.
4786
       */
4787
6.50k
      if (SKIP_BLANKS == 0) {
4788
2.17k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4789
2.17k
      "Space required after the Public Identifier\n");
4790
2.17k
      }
4791
6.50k
  } else {
4792
      /*
4793
       * We handle [83] so we return immediately, if
4794
       * "S SystemLiteral" is not detected. We skip blanks if no
4795
             * system literal was found, but this is harmless since we must
4796
             * be at the end of a NotationDecl.
4797
       */
4798
2.73k
      if (SKIP_BLANKS == 0) return(NULL);
4799
791
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4800
791
  }
4801
6.93k
  URI = xmlParseSystemLiteral(ctxt);
4802
6.93k
  if (URI == NULL) {
4803
1.86k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4804
1.86k
        }
4805
6.93k
    }
4806
53.8k
    return(URI);
4807
56.1k
}
4808
4809
/**
4810
 * Skip an XML (SGML) comment <!-- .... -->
4811
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4812
 *  must not occur within comments. "
4813
 * This is the slow routine in case the accelerator for ascii didn't work
4814
 *
4815
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4816
 * @param ctxt  an XML parser context
4817
 * @param buf  the already parsed part of the buffer
4818
 * @param len  number of bytes in the buffer
4819
 * @param size  allocated size of the buffer
4820
 */
4821
static void
4822
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4823
16.9k
                       size_t len, size_t size) {
4824
16.9k
    int q, ql;
4825
16.9k
    int r, rl;
4826
16.9k
    int cur, l;
4827
16.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4828
6.90k
                    XML_MAX_HUGE_LENGTH :
4829
16.9k
                    XML_MAX_TEXT_LENGTH;
4830
4831
16.9k
    if (buf == NULL) {
4832
4.42k
        len = 0;
4833
4.42k
  size = XML_PARSER_BUFFER_SIZE;
4834
4.42k
  buf = xmlMalloc(size);
4835
4.42k
  if (buf == NULL) {
4836
25
      xmlErrMemory(ctxt);
4837
25
      return;
4838
25
  }
4839
4.42k
    }
4840
16.9k
    q = xmlCurrentCharRecover(ctxt, &ql);
4841
16.9k
    if (q == 0)
4842
3.63k
        goto not_terminated;
4843
13.2k
    if (!IS_CHAR(q)) {
4844
761
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4845
761
                          "xmlParseComment: invalid xmlChar value %d\n",
4846
761
                    q);
4847
761
  xmlFree (buf);
4848
761
  return;
4849
761
    }
4850
12.5k
    NEXTL(ql);
4851
12.5k
    r = xmlCurrentCharRecover(ctxt, &rl);
4852
12.5k
    if (r == 0)
4853
376
        goto not_terminated;
4854
12.1k
    if (!IS_CHAR(r)) {
4855
763
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4856
763
                          "xmlParseComment: invalid xmlChar value %d\n",
4857
763
                    r);
4858
763
  xmlFree (buf);
4859
763
  return;
4860
763
    }
4861
11.3k
    NEXTL(rl);
4862
11.3k
    cur = xmlCurrentCharRecover(ctxt, &l);
4863
11.3k
    if (cur == 0)
4864
487
        goto not_terminated;
4865
19.4M
    while (IS_CHAR(cur) && /* checked */
4866
19.4M
           ((cur != '>') ||
4867
19.4M
      (r != '-') || (q != '-'))) {
4868
19.3M
  if ((r == '-') && (q == '-')) {
4869
2.07M
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4870
2.07M
  }
4871
19.3M
  if (len + 5 >= size) {
4872
5.44k
      xmlChar *tmp;
4873
5.44k
            int newSize;
4874
4875
5.44k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4876
5.44k
            if (newSize < 0) {
4877
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4878
0
                             "Comment too big found", NULL);
4879
0
                xmlFree (buf);
4880
0
                return;
4881
0
            }
4882
5.44k
      tmp = xmlRealloc(buf, newSize);
4883
5.44k
      if (tmp == NULL) {
4884
10
    xmlErrMemory(ctxt);
4885
10
    xmlFree(buf);
4886
10
    return;
4887
10
      }
4888
5.43k
      buf = tmp;
4889
5.43k
            size = newSize;
4890
5.43k
  }
4891
19.3M
  COPY_BUF(buf, len, q);
4892
4893
19.3M
  q = r;
4894
19.3M
  ql = rl;
4895
19.3M
  r = cur;
4896
19.3M
  rl = l;
4897
4898
19.3M
  NEXTL(l);
4899
19.3M
  cur = xmlCurrentCharRecover(ctxt, &l);
4900
4901
19.3M
    }
4902
10.8k
    buf[len] = 0;
4903
10.8k
    if (cur == 0) {
4904
2.84k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4905
2.84k
                       "Comment not terminated \n<!--%.50s\n", buf);
4906
8.05k
    } else if (!IS_CHAR(cur)) {
4907
751
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4908
751
                          "xmlParseComment: invalid xmlChar value %d\n",
4909
751
                    cur);
4910
7.30k
    } else {
4911
7.30k
        NEXT;
4912
7.30k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4913
7.30k
      (!ctxt->disableSAX))
4914
6.00k
      ctxt->sax->comment(ctxt->userData, buf);
4915
7.30k
    }
4916
10.8k
    xmlFree(buf);
4917
10.8k
    return;
4918
4.50k
not_terminated:
4919
4.50k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4920
4.50k
       "Comment not terminated\n", NULL);
4921
4.50k
    xmlFree(buf);
4922
4.50k
}
4923
4924
/**
4925
 * Parse an XML (SGML) comment. Always consumes '<!'.
4926
 *
4927
 * @deprecated Internal function, don't use.
4928
 *
4929
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4930
 *  must not occur within comments. "
4931
 *
4932
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4933
 * @param ctxt  an XML parser context
4934
 */
4935
void
4936
352k
xmlParseComment(xmlParserCtxt *ctxt) {
4937
352k
    xmlChar *buf = NULL;
4938
352k
    size_t size = XML_PARSER_BUFFER_SIZE;
4939
352k
    size_t len = 0;
4940
352k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4941
88.3k
                       XML_MAX_HUGE_LENGTH :
4942
352k
                       XML_MAX_TEXT_LENGTH;
4943
352k
    const xmlChar *in;
4944
352k
    size_t nbchar = 0;
4945
352k
    int ccol;
4946
4947
    /*
4948
     * Check that there is a comment right here.
4949
     */
4950
352k
    if ((RAW != '<') || (NXT(1) != '!'))
4951
0
        return;
4952
352k
    SKIP(2);
4953
352k
    if ((RAW != '-') || (NXT(1) != '-'))
4954
35
        return;
4955
352k
    SKIP(2);
4956
352k
    GROW;
4957
4958
    /*
4959
     * Accelerated common case where input don't need to be
4960
     * modified before passing it to the handler.
4961
     */
4962
352k
    in = ctxt->input->cur;
4963
353k
    do {
4964
353k
  if (*in == 0xA) {
4965
4.17k
      do {
4966
4.17k
    ctxt->input->line++; ctxt->input->col = 1;
4967
4.17k
    in++;
4968
4.17k
      } while (*in == 0xA);
4969
3.93k
  }
4970
3.64M
get_more:
4971
3.64M
        ccol = ctxt->input->col;
4972
11.9M
  while (((*in > '-') && (*in <= 0x7F)) ||
4973
11.9M
         ((*in >= 0x20) && (*in < '-')) ||
4974
11.9M
         (*in == 0x09)) {
4975
8.34M
        in++;
4976
8.34M
        ccol++;
4977
8.34M
  }
4978
3.64M
  ctxt->input->col = ccol;
4979
3.64M
  if (*in == 0xA) {
4980
86.4k
      do {
4981
86.4k
    ctxt->input->line++; ctxt->input->col = 1;
4982
86.4k
    in++;
4983
86.4k
      } while (*in == 0xA);
4984
25.0k
      goto get_more;
4985
25.0k
  }
4986
3.62M
  nbchar = in - ctxt->input->cur;
4987
  /*
4988
   * save current set of data
4989
   */
4990
3.62M
  if (nbchar > 0) {
4991
3.29M
            if (nbchar > maxLength - len) {
4992
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4993
0
                                  "Comment too big found", NULL);
4994
0
                xmlFree(buf);
4995
0
                return;
4996
0
            }
4997
3.29M
            if (buf == NULL) {
4998
37.4k
                if ((*in == '-') && (in[1] == '-'))
4999
17.4k
                    size = nbchar + 1;
5000
19.9k
                else
5001
19.9k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5002
37.4k
                buf = xmlMalloc(size);
5003
37.4k
                if (buf == NULL) {
5004
26
                    xmlErrMemory(ctxt);
5005
26
                    return;
5006
26
                }
5007
37.3k
                len = 0;
5008
3.25M
            } else if (len + nbchar + 1 >= size) {
5009
12.5k
                xmlChar *new_buf;
5010
12.5k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5011
12.5k
                new_buf = xmlRealloc(buf, size);
5012
12.5k
                if (new_buf == NULL) {
5013
7
                    xmlErrMemory(ctxt);
5014
7
                    xmlFree(buf);
5015
7
                    return;
5016
7
                }
5017
12.5k
                buf = new_buf;
5018
12.5k
            }
5019
3.29M
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5020
3.29M
            len += nbchar;
5021
3.29M
            buf[len] = 0;
5022
3.29M
  }
5023
3.62M
  ctxt->input->cur = in;
5024
3.62M
  if (*in == 0xA) {
5025
0
      in++;
5026
0
      ctxt->input->line++; ctxt->input->col = 1;
5027
0
  }
5028
3.62M
  if (*in == 0xD) {
5029
6.93k
      in++;
5030
6.93k
      if (*in == 0xA) {
5031
6.21k
    ctxt->input->cur = in;
5032
6.21k
    in++;
5033
6.21k
    ctxt->input->line++; ctxt->input->col = 1;
5034
6.21k
    goto get_more;
5035
6.21k
      }
5036
724
      in--;
5037
724
  }
5038
3.61M
  SHRINK;
5039
3.61M
  GROW;
5040
3.61M
  in = ctxt->input->cur;
5041
3.61M
  if (*in == '-') {
5042
3.59M
      if (in[1] == '-') {
5043
2.16M
          if (in[2] == '>') {
5044
335k
        SKIP(3);
5045
335k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5046
335k
            (!ctxt->disableSAX)) {
5047
317k
      if (buf != NULL)
5048
20.2k
          ctxt->sax->comment(ctxt->userData, buf);
5049
297k
      else
5050
297k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5051
317k
        }
5052
335k
        if (buf != NULL)
5053
24.8k
            xmlFree(buf);
5054
335k
        return;
5055
335k
    }
5056
1.83M
    if (buf != NULL) {
5057
1.82M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5058
1.82M
                          "Double hyphen within comment: "
5059
1.82M
                                      "<!--%.50s\n",
5060
1.82M
              buf);
5061
1.82M
    } else
5062
5.32k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5063
5.32k
                          "Double hyphen within comment\n", NULL);
5064
1.83M
    in++;
5065
1.83M
    ctxt->input->col++;
5066
1.83M
      }
5067
3.26M
      in++;
5068
3.26M
      ctxt->input->col++;
5069
3.26M
      goto get_more;
5070
3.59M
  }
5071
3.61M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5072
16.9k
    xmlParseCommentComplex(ctxt, buf, len, size);
5073
16.9k
}
5074
5075
5076
/**
5077
 * Parse the name of a PI
5078
 *
5079
 * @deprecated Internal function, don't use.
5080
 *
5081
 *     [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5082
 *
5083
 * @param ctxt  an XML parser context
5084
 * @returns the PITarget name or NULL
5085
 */
5086
5087
const xmlChar *
5088
60.6k
xmlParsePITarget(xmlParserCtxt *ctxt) {
5089
60.6k
    const xmlChar *name;
5090
5091
60.6k
    name = xmlParseName(ctxt);
5092
60.6k
    if ((name != NULL) &&
5093
60.6k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5094
60.6k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5095
60.6k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5096
6.83k
  int i;
5097
6.83k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5098
6.83k
      (name[2] == 'l') && (name[3] == 0)) {
5099
5.19k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5100
5.19k
     "XML declaration allowed only at the start of the document\n");
5101
5.19k
      return(name);
5102
5.19k
  } else if (name[3] == 0) {
5103
600
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5104
600
      return(name);
5105
600
  }
5106
2.75k
  for (i = 0;;i++) {
5107
2.75k
      if (xmlW3CPIs[i] == NULL) break;
5108
1.93k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5109
226
          return(name);
5110
1.93k
  }
5111
821
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5112
821
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5113
821
          NULL, NULL);
5114
821
    }
5115
54.5k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5116
512
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5117
512
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5118
512
    }
5119
54.5k
    return(name);
5120
60.6k
}
5121
5122
#ifdef LIBXML_CATALOG_ENABLED
5123
/**
5124
 * Parse an XML Catalog Processing Instruction.
5125
 *
5126
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5127
 *
5128
 * Occurs only if allowed by the user and if happening in the Misc
5129
 * part of the document before any doctype information
5130
 * This will add the given catalog to the parsing context in order
5131
 * to be used if there is a resolution need further down in the document
5132
 *
5133
 * @param ctxt  an XML parser context
5134
 * @param catalog  the PI value string
5135
 */
5136
5137
static void
5138
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5139
0
    xmlChar *URL = NULL;
5140
0
    const xmlChar *tmp, *base;
5141
0
    xmlChar marker;
5142
5143
0
    tmp = catalog;
5144
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5145
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5146
0
  goto error;
5147
0
    tmp += 7;
5148
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5149
0
    if (*tmp != '=') {
5150
0
  return;
5151
0
    }
5152
0
    tmp++;
5153
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5154
0
    marker = *tmp;
5155
0
    if ((marker != '\'') && (marker != '"'))
5156
0
  goto error;
5157
0
    tmp++;
5158
0
    base = tmp;
5159
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5160
0
    if (*tmp == 0)
5161
0
  goto error;
5162
0
    URL = xmlStrndup(base, tmp - base);
5163
0
    tmp++;
5164
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5165
0
    if (*tmp != 0)
5166
0
  goto error;
5167
5168
0
    if (URL != NULL) {
5169
        /*
5170
         * Unfortunately, the catalog API doesn't report OOM errors.
5171
         * xmlGetLastError isn't very helpful since we don't know
5172
         * where the last error came from. We'd have to reset it
5173
         * before this call and restore it afterwards.
5174
         */
5175
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5176
0
  xmlFree(URL);
5177
0
    }
5178
0
    return;
5179
5180
0
error:
5181
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5182
0
            "Catalog PI syntax error: %s\n",
5183
0
      catalog, NULL);
5184
0
    if (URL != NULL)
5185
0
  xmlFree(URL);
5186
0
}
5187
#endif
5188
5189
/**
5190
 * Parse an XML Processing Instruction.
5191
 *
5192
 * @deprecated Internal function, don't use.
5193
 *
5194
 *     [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5195
 *
5196
 * The processing is transferred to SAX once parsed.
5197
 *
5198
 * @param ctxt  an XML parser context
5199
 */
5200
5201
void
5202
60.6k
xmlParsePI(xmlParserCtxt *ctxt) {
5203
60.6k
    xmlChar *buf = NULL;
5204
60.6k
    size_t len = 0;
5205
60.6k
    size_t size = XML_PARSER_BUFFER_SIZE;
5206
60.6k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5207
23.7k
                       XML_MAX_HUGE_LENGTH :
5208
60.6k
                       XML_MAX_TEXT_LENGTH;
5209
60.6k
    int cur, l;
5210
60.6k
    const xmlChar *target;
5211
5212
60.6k
    if ((RAW == '<') && (NXT(1) == '?')) {
5213
  /*
5214
   * this is a Processing Instruction.
5215
   */
5216
60.6k
  SKIP(2);
5217
5218
  /*
5219
   * Parse the target name and check for special support like
5220
   * namespace.
5221
   */
5222
60.6k
        target = xmlParsePITarget(ctxt);
5223
60.6k
  if (target != NULL) {
5224
54.7k
      if ((RAW == '?') && (NXT(1) == '>')) {
5225
11.6k
    SKIP(2);
5226
5227
    /*
5228
     * SAX: PI detected.
5229
     */
5230
11.6k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5231
11.6k
        (ctxt->sax->processingInstruction != NULL))
5232
11.3k
        ctxt->sax->processingInstruction(ctxt->userData,
5233
11.3k
                                         target, NULL);
5234
11.6k
    return;
5235
11.6k
      }
5236
43.1k
      buf = xmlMalloc(size);
5237
43.1k
      if (buf == NULL) {
5238
48
    xmlErrMemory(ctxt);
5239
48
    return;
5240
48
      }
5241
43.0k
      if (SKIP_BLANKS == 0) {
5242
24.7k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5243
24.7k
        "ParsePI: PI %s space expected\n", target);
5244
24.7k
      }
5245
43.0k
      cur = xmlCurrentCharRecover(ctxt, &l);
5246
11.8M
      while (IS_CHAR(cur) && /* checked */
5247
11.8M
       ((cur != '?') || (NXT(1) != '>'))) {
5248
11.8M
    if (len + 5 >= size) {
5249
14.6k
        xmlChar *tmp;
5250
14.6k
                    int newSize;
5251
5252
14.6k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5253
14.6k
                    if (newSize < 0) {
5254
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5255
0
                                          "PI %s too big found", target);
5256
0
                        xmlFree(buf);
5257
0
                        return;
5258
0
                    }
5259
14.6k
        tmp = xmlRealloc(buf, newSize);
5260
14.6k
        if (tmp == NULL) {
5261
6
      xmlErrMemory(ctxt);
5262
6
      xmlFree(buf);
5263
6
      return;
5264
6
        }
5265
14.6k
        buf = tmp;
5266
14.6k
                    size = newSize;
5267
14.6k
    }
5268
11.8M
    COPY_BUF(buf, len, cur);
5269
11.8M
    NEXTL(l);
5270
11.8M
    cur = xmlCurrentCharRecover(ctxt, &l);
5271
11.8M
      }
5272
43.0k
      buf[len] = 0;
5273
43.0k
      if (cur != '?') {
5274
11.5k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5275
11.5k
          "ParsePI: PI %s never end ...\n", target);
5276
31.5k
      } else {
5277
31.5k
    SKIP(2);
5278
5279
31.5k
#ifdef LIBXML_CATALOG_ENABLED
5280
31.5k
    if ((ctxt->inSubset == 0) &&
5281
31.5k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5282
3.60k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5283
5284
3.60k
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5285
3.60k
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5286
1.02k
       (allow == XML_CATA_ALLOW_ALL)))
5287
0
      xmlParseCatalogPI(ctxt, buf);
5288
3.60k
    }
5289
31.5k
#endif
5290
5291
    /*
5292
     * SAX: PI detected.
5293
     */
5294
31.5k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5295
31.5k
        (ctxt->sax->processingInstruction != NULL))
5296
28.2k
        ctxt->sax->processingInstruction(ctxt->userData,
5297
28.2k
                                         target, buf);
5298
31.5k
      }
5299
43.0k
      xmlFree(buf);
5300
43.0k
  } else {
5301
5.81k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5302
5.81k
  }
5303
60.6k
    }
5304
60.6k
}
5305
5306
/**
5307
 * Parse a notation declaration. Always consumes '<!'.
5308
 *
5309
 * @deprecated Internal function, don't use.
5310
 *
5311
 *     [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID)
5312
 *                           S? '>'
5313
 *
5314
 * Hence there is actually 3 choices:
5315
 *
5316
 *     'PUBLIC' S PubidLiteral
5317
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5318
 *     'SYSTEM' S SystemLiteral
5319
 *
5320
 * See the NOTE on #xmlParseExternalID.
5321
 *
5322
 * @param ctxt  an XML parser context
5323
 */
5324
5325
void
5326
10.0k
xmlParseNotationDecl(xmlParserCtxt *ctxt) {
5327
10.0k
    const xmlChar *name;
5328
10.0k
    xmlChar *Pubid;
5329
10.0k
    xmlChar *Systemid;
5330
5331
10.0k
    if ((CUR != '<') || (NXT(1) != '!'))
5332
0
        return;
5333
10.0k
    SKIP(2);
5334
5335
10.0k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5336
10.0k
#ifdef LIBXML_VALID_ENABLED
5337
10.0k
  int oldInputNr = ctxt->inputNr;
5338
10.0k
#endif
5339
5340
10.0k
  SKIP(8);
5341
10.0k
  if (SKIP_BLANKS_PE == 0) {
5342
410
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5343
410
         "Space required after '<!NOTATION'\n");
5344
410
      return;
5345
410
  }
5346
5347
9.61k
        name = xmlParseName(ctxt);
5348
9.61k
  if (name == NULL) {
5349
267
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5350
267
      return;
5351
267
  }
5352
9.35k
  if (xmlStrchr(name, ':') != NULL) {
5353
231
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5354
231
         "colons are forbidden from notation names '%s'\n",
5355
231
         name, NULL, NULL);
5356
231
  }
5357
9.35k
  if (SKIP_BLANKS_PE == 0) {
5358
227
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5359
227
         "Space required after the NOTATION name'\n");
5360
227
      return;
5361
227
  }
5362
5363
  /*
5364
   * Parse the IDs.
5365
   */
5366
9.12k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5367
9.12k
  SKIP_BLANKS_PE;
5368
5369
9.12k
  if (RAW == '>') {
5370
7.00k
#ifdef LIBXML_VALID_ENABLED
5371
7.00k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5372
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5373
0
                           "Notation declaration doesn't start and stop"
5374
0
                                 " in the same entity\n",
5375
0
                                 NULL, NULL);
5376
0
      }
5377
7.00k
#endif
5378
7.00k
      NEXT;
5379
7.00k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5380
7.00k
    (ctxt->sax->notationDecl != NULL))
5381
6.44k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5382
7.00k
  } else {
5383
2.12k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5384
2.12k
  }
5385
9.12k
  if (Systemid != NULL) xmlFree(Systemid);
5386
9.12k
  if (Pubid != NULL) xmlFree(Pubid);
5387
9.12k
    }
5388
10.0k
}
5389
5390
/**
5391
 * Parse an entity declaration. Always consumes '<!'.
5392
 *
5393
 * @deprecated Internal function, don't use.
5394
 *
5395
 *     [70] EntityDecl ::= GEDecl | PEDecl
5396
 *
5397
 *     [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5398
 *
5399
 *     [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5400
 *
5401
 *     [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5402
 *
5403
 *     [74] PEDef ::= EntityValue | ExternalID
5404
 *
5405
 *     [76] NDataDecl ::= S 'NDATA' S Name
5406
 *
5407
 * [ VC: Notation Declared ]
5408
 * The Name must match the declared name of a notation.
5409
 *
5410
 * @param ctxt  an XML parser context
5411
 */
5412
5413
void
5414
67.3k
xmlParseEntityDecl(xmlParserCtxt *ctxt) {
5415
67.3k
    const xmlChar *name = NULL;
5416
67.3k
    xmlChar *value = NULL;
5417
67.3k
    xmlChar *URI = NULL, *literal = NULL;
5418
67.3k
    const xmlChar *ndata = NULL;
5419
67.3k
    int isParameter = 0;
5420
67.3k
    xmlChar *orig = NULL;
5421
5422
67.3k
    if ((CUR != '<') || (NXT(1) != '!'))
5423
0
        return;
5424
67.3k
    SKIP(2);
5425
5426
    /* GROW; done in the caller */
5427
67.3k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5428
67.2k
#ifdef LIBXML_VALID_ENABLED
5429
67.2k
  int oldInputNr = ctxt->inputNr;
5430
67.2k
#endif
5431
5432
67.2k
  SKIP(6);
5433
67.2k
  if (SKIP_BLANKS_PE == 0) {
5434
4.89k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5435
4.89k
         "Space required after '<!ENTITY'\n");
5436
4.89k
  }
5437
5438
67.2k
  if (RAW == '%') {
5439
24.8k
      NEXT;
5440
24.8k
      if (SKIP_BLANKS_PE == 0) {
5441
632
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5442
632
             "Space required after '%%'\n");
5443
632
      }
5444
24.8k
      isParameter = 1;
5445
24.8k
  }
5446
5447
67.2k
        name = xmlParseName(ctxt);
5448
67.2k
  if (name == NULL) {
5449
1.42k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5450
1.42k
                     "xmlParseEntityDecl: no name\n");
5451
1.42k
            return;
5452
1.42k
  }
5453
65.8k
  if (xmlStrchr(name, ':') != NULL) {
5454
472
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5455
472
         "colons are forbidden from entities names '%s'\n",
5456
472
         name, NULL, NULL);
5457
472
  }
5458
65.8k
  if (SKIP_BLANKS_PE == 0) {
5459
3.66k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5460
3.66k
         "Space required after the entity name\n");
5461
3.66k
  }
5462
5463
  /*
5464
   * handle the various case of definitions...
5465
   */
5466
65.8k
  if (isParameter) {
5467
24.3k
      if ((RAW == '"') || (RAW == '\'')) {
5468
15.0k
          value = xmlParseEntityValue(ctxt, &orig);
5469
15.0k
    if (value) {
5470
14.7k
        if ((ctxt->sax != NULL) &&
5471
14.7k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5472
13.2k
      ctxt->sax->entityDecl(ctxt->userData, name,
5473
13.2k
                        XML_INTERNAL_PARAMETER_ENTITY,
5474
13.2k
            NULL, NULL, value);
5475
14.7k
    }
5476
15.0k
      } else {
5477
9.25k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5478
9.25k
    if ((URI == NULL) && (literal == NULL)) {
5479
251
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5480
251
    }
5481
9.25k
    if (URI) {
5482
8.80k
                    if (xmlStrchr(URI, '#')) {
5483
289
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5484
8.51k
                    } else {
5485
8.51k
                        if ((ctxt->sax != NULL) &&
5486
8.51k
                            (!ctxt->disableSAX) &&
5487
8.51k
                            (ctxt->sax->entityDecl != NULL))
5488
7.06k
                            ctxt->sax->entityDecl(ctxt->userData, name,
5489
7.06k
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5490
7.06k
                                        literal, URI, NULL);
5491
8.51k
                    }
5492
8.80k
    }
5493
9.25k
      }
5494
41.5k
  } else {
5495
41.5k
      if ((RAW == '"') || (RAW == '\'')) {
5496
31.0k
          value = xmlParseEntityValue(ctxt, &orig);
5497
31.0k
    if ((ctxt->sax != NULL) &&
5498
31.0k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5499
28.2k
        ctxt->sax->entityDecl(ctxt->userData, name,
5500
28.2k
        XML_INTERNAL_GENERAL_ENTITY,
5501
28.2k
        NULL, NULL, value);
5502
    /*
5503
     * For expat compatibility in SAX mode.
5504
     */
5505
31.0k
    if ((ctxt->myDoc == NULL) ||
5506
31.0k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5507
1.27k
        if (ctxt->myDoc == NULL) {
5508
214
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5509
214
      if (ctxt->myDoc == NULL) {
5510
1
          xmlErrMemory(ctxt);
5511
1
          goto done;
5512
1
      }
5513
213
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5514
213
        }
5515
1.27k
        if (ctxt->myDoc->intSubset == NULL) {
5516
213
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5517
213
              BAD_CAST "fake", NULL, NULL);
5518
213
                        if (ctxt->myDoc->intSubset == NULL) {
5519
1
                            xmlErrMemory(ctxt);
5520
1
                            goto done;
5521
1
                        }
5522
213
                    }
5523
5524
1.27k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5525
1.27k
                    NULL, NULL, value);
5526
1.27k
    }
5527
31.0k
      } else {
5528
10.4k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5529
10.4k
    if ((URI == NULL) && (literal == NULL)) {
5530
1.98k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5531
1.98k
    }
5532
10.4k
    if (URI) {
5533
8.07k
                    if (xmlStrchr(URI, '#')) {
5534
363
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5535
363
                    }
5536
8.07k
    }
5537
10.4k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5538
1.23k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5539
1.23k
           "Space required before 'NDATA'\n");
5540
1.23k
    }
5541
10.4k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5542
851
        SKIP(5);
5543
851
        if (SKIP_BLANKS_PE == 0) {
5544
344
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5545
344
               "Space required after 'NDATA'\n");
5546
344
        }
5547
851
        ndata = xmlParseName(ctxt);
5548
851
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5549
851
            (ctxt->sax->unparsedEntityDecl != NULL))
5550
581
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5551
581
            literal, URI, ndata);
5552
9.60k
    } else {
5553
9.60k
        if ((ctxt->sax != NULL) &&
5554
9.60k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5555
9.06k
      ctxt->sax->entityDecl(ctxt->userData, name,
5556
9.06k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5557
9.06k
            literal, URI, NULL);
5558
        /*
5559
         * For expat compatibility in SAX mode.
5560
         * assuming the entity replacement was asked for
5561
         */
5562
9.60k
        if ((ctxt->replaceEntities != 0) &&
5563
9.60k
      ((ctxt->myDoc == NULL) ||
5564
5.86k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5565
78
      if (ctxt->myDoc == NULL) {
5566
12
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5567
12
          if (ctxt->myDoc == NULL) {
5568
1
              xmlErrMemory(ctxt);
5569
1
        goto done;
5570
1
          }
5571
11
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5572
11
      }
5573
5574
77
      if (ctxt->myDoc->intSubset == NULL) {
5575
11
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5576
11
            BAD_CAST "fake", NULL, NULL);
5577
11
                            if (ctxt->myDoc->intSubset == NULL) {
5578
1
                                xmlErrMemory(ctxt);
5579
1
                                goto done;
5580
1
                            }
5581
11
                        }
5582
76
      xmlSAX2EntityDecl(ctxt, name,
5583
76
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5584
76
                  literal, URI, NULL);
5585
76
        }
5586
9.60k
    }
5587
10.4k
      }
5588
41.5k
  }
5589
65.8k
  SKIP_BLANKS_PE;
5590
65.8k
  if (RAW != '>') {
5591
3.78k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5592
3.78k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5593
62.0k
  } else {
5594
62.0k
#ifdef LIBXML_VALID_ENABLED
5595
62.0k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5596
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5597
0
                           "Entity declaration doesn't start and stop in"
5598
0
                                 " the same entity\n",
5599
0
                                 NULL, NULL);
5600
0
      }
5601
62.0k
#endif
5602
62.0k
      NEXT;
5603
62.0k
  }
5604
65.8k
  if (orig != NULL) {
5605
      /*
5606
       * Ugly mechanism to save the raw entity value.
5607
       */
5608
45.2k
      xmlEntityPtr cur = NULL;
5609
5610
45.2k
      if (isParameter) {
5611
14.7k
          if ((ctxt->sax != NULL) &&
5612
14.7k
        (ctxt->sax->getParameterEntity != NULL))
5613
14.7k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5614
30.4k
      } else {
5615
30.4k
          if ((ctxt->sax != NULL) &&
5616
30.4k
        (ctxt->sax->getEntity != NULL))
5617
30.4k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5618
30.4k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5619
1.16k
        cur = xmlSAX2GetEntity(ctxt, name);
5620
1.16k
    }
5621
30.4k
      }
5622
45.2k
            if ((cur != NULL) && (cur->orig == NULL)) {
5623
23.2k
    cur->orig = orig;
5624
23.2k
                orig = NULL;
5625
23.2k
      }
5626
45.2k
  }
5627
5628
65.8k
done:
5629
65.8k
  if (value != NULL) xmlFree(value);
5630
65.8k
  if (URI != NULL) xmlFree(URI);
5631
65.8k
  if (literal != NULL) xmlFree(literal);
5632
65.8k
        if (orig != NULL) xmlFree(orig);
5633
65.8k
    }
5634
67.3k
}
5635
5636
/**
5637
 * Parse an attribute default declaration
5638
 *
5639
 * @deprecated Internal function, don't use.
5640
 *
5641
 *     [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5642
 *
5643
 * [ VC: Required Attribute ]
5644
 * if the default declaration is the keyword \#REQUIRED, then the
5645
 * attribute must be specified for all elements of the type in the
5646
 * attribute-list declaration.
5647
 *
5648
 * [ VC: Attribute Default Legal ]
5649
 * The declared default value must meet the lexical constraints of
5650
 * the declared attribute type c.f. #xmlValidateAttributeDecl
5651
 *
5652
 * [ VC: Fixed Attribute Default ]
5653
 * if an attribute has a default value declared with the \#FIXED
5654
 * keyword, instances of that attribute must match the default value.
5655
 *
5656
 * [ WFC: No < in Attribute Values ]
5657
 * handled in #xmlParseAttValue
5658
 *
5659
 * @param ctxt  an XML parser context
5660
 * @param value  Receive a possible fixed default value for the attribute
5661
 * @returns XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5662
 *          or XML_ATTRIBUTE_FIXED.
5663
 */
5664
5665
int
5666
93.9k
xmlParseDefaultDecl(xmlParserCtxt *ctxt, xmlChar **value) {
5667
93.9k
    int val;
5668
93.9k
    xmlChar *ret;
5669
5670
93.9k
    *value = NULL;
5671
93.9k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5672
7.11k
  SKIP(9);
5673
7.11k
  return(XML_ATTRIBUTE_REQUIRED);
5674
7.11k
    }
5675
86.8k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5676
30.6k
  SKIP(8);
5677
30.6k
  return(XML_ATTRIBUTE_IMPLIED);
5678
30.6k
    }
5679
56.1k
    val = XML_ATTRIBUTE_NONE;
5680
56.1k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5681
6.44k
  SKIP(6);
5682
6.44k
  val = XML_ATTRIBUTE_FIXED;
5683
6.44k
  if (SKIP_BLANKS_PE == 0) {
5684
946
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5685
946
         "Space required after '#FIXED'\n");
5686
946
  }
5687
6.44k
    }
5688
56.1k
    ret = xmlParseAttValue(ctxt);
5689
56.1k
    if (ret == NULL) {
5690
7.93k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5691
7.93k
           "Attribute default value declaration error\n");
5692
7.93k
    } else
5693
48.2k
        *value = ret;
5694
56.1k
    return(val);
5695
86.8k
}
5696
5697
/**
5698
 * Parse an Notation attribute type.
5699
 *
5700
 * @deprecated Internal function, don't use.
5701
 *
5702
 * Note: the leading 'NOTATION' S part has already being parsed...
5703
 *
5704
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5705
 *
5706
 * [ VC: Notation Attributes ]
5707
 * Values of this type must match one of the notation names included
5708
 * in the declaration; all notation names in the declaration must be declared.
5709
 *
5710
 * @param ctxt  an XML parser context
5711
 * @returns the notation attribute tree built while parsing
5712
 */
5713
5714
xmlEnumeration *
5715
1.93k
xmlParseNotationType(xmlParserCtxt *ctxt) {
5716
1.93k
    const xmlChar *name;
5717
1.93k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5718
5719
1.93k
    if (RAW != '(') {
5720
278
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5721
278
  return(NULL);
5722
278
    }
5723
2.00k
    do {
5724
2.00k
        NEXT;
5725
2.00k
  SKIP_BLANKS_PE;
5726
2.00k
        name = xmlParseName(ctxt);
5727
2.00k
  if (name == NULL) {
5728
226
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5729
226
         "Name expected in NOTATION declaration\n");
5730
226
            xmlFreeEnumeration(ret);
5731
226
      return(NULL);
5732
226
  }
5733
1.78k
        tmp = NULL;
5734
1.78k
#ifdef LIBXML_VALID_ENABLED
5735
1.78k
        if (ctxt->validate) {
5736
0
            tmp = ret;
5737
0
            while (tmp != NULL) {
5738
0
                if (xmlStrEqual(name, tmp->name)) {
5739
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5740
0
              "standalone: attribute notation value token %s duplicated\n",
5741
0
                                     name, NULL);
5742
0
                    if (!xmlDictOwns(ctxt->dict, name))
5743
0
                        xmlFree((xmlChar *) name);
5744
0
                    break;
5745
0
                }
5746
0
                tmp = tmp->next;
5747
0
            }
5748
0
        }
5749
1.78k
#endif /* LIBXML_VALID_ENABLED */
5750
1.78k
  if (tmp == NULL) {
5751
1.78k
      cur = xmlCreateEnumeration(name);
5752
1.78k
      if (cur == NULL) {
5753
6
                xmlErrMemory(ctxt);
5754
6
                xmlFreeEnumeration(ret);
5755
6
                return(NULL);
5756
6
            }
5757
1.77k
      if (last == NULL) ret = last = cur;
5758
352
      else {
5759
352
    last->next = cur;
5760
352
    last = cur;
5761
352
      }
5762
1.77k
  }
5763
1.77k
  SKIP_BLANKS_PE;
5764
1.77k
    } while (RAW == '|');
5765
1.42k
    if (RAW != ')') {
5766
270
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5767
270
        xmlFreeEnumeration(ret);
5768
270
  return(NULL);
5769
270
    }
5770
1.15k
    NEXT;
5771
1.15k
    return(ret);
5772
1.42k
}
5773
5774
/**
5775
 * Parse an Enumeration attribute type.
5776
 *
5777
 * @deprecated Internal function, don't use.
5778
 *
5779
 *     [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5780
 *
5781
 * [ VC: Enumeration ]
5782
 * Values of this type must match one of the Nmtoken tokens in
5783
 * the declaration
5784
 *
5785
 * @param ctxt  an XML parser context
5786
 * @returns the enumeration attribute tree built while parsing
5787
 */
5788
5789
xmlEnumeration *
5790
17.9k
xmlParseEnumerationType(xmlParserCtxt *ctxt) {
5791
17.9k
    xmlChar *name;
5792
17.9k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5793
5794
17.9k
    if (RAW != '(') {
5795
2.16k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5796
2.16k
  return(NULL);
5797
2.16k
    }
5798
25.6k
    do {
5799
25.6k
        NEXT;
5800
25.6k
  SKIP_BLANKS_PE;
5801
25.6k
        name = xmlParseNmtoken(ctxt);
5802
25.6k
  if (name == NULL) {
5803
232
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5804
232
      return(ret);
5805
232
  }
5806
25.4k
        tmp = NULL;
5807
25.4k
#ifdef LIBXML_VALID_ENABLED
5808
25.4k
        if (ctxt->validate) {
5809
0
            tmp = ret;
5810
0
            while (tmp != NULL) {
5811
0
                if (xmlStrEqual(name, tmp->name)) {
5812
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5813
0
              "standalone: attribute enumeration value token %s duplicated\n",
5814
0
                                     name, NULL);
5815
0
                    if (!xmlDictOwns(ctxt->dict, name))
5816
0
                        xmlFree(name);
5817
0
                    break;
5818
0
                }
5819
0
                tmp = tmp->next;
5820
0
            }
5821
0
        }
5822
25.4k
#endif /* LIBXML_VALID_ENABLED */
5823
25.4k
  if (tmp == NULL) {
5824
25.4k
      cur = xmlCreateEnumeration(name);
5825
25.4k
      if (!xmlDictOwns(ctxt->dict, name))
5826
25.4k
    xmlFree(name);
5827
25.4k
      if (cur == NULL) {
5828
25
                xmlErrMemory(ctxt);
5829
25
                xmlFreeEnumeration(ret);
5830
25
                return(NULL);
5831
25
            }
5832
25.3k
      if (last == NULL) ret = last = cur;
5833
9.82k
      else {
5834
9.82k
    last->next = cur;
5835
9.82k
    last = cur;
5836
9.82k
      }
5837
25.3k
  }
5838
25.3k
  SKIP_BLANKS_PE;
5839
25.3k
    } while (RAW == '|');
5840
15.5k
    if (RAW != ')') {
5841
440
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5842
440
  return(ret);
5843
440
    }
5844
15.1k
    NEXT;
5845
15.1k
    return(ret);
5846
15.5k
}
5847
5848
/**
5849
 * Parse an Enumerated attribute type.
5850
 *
5851
 * @deprecated Internal function, don't use.
5852
 *
5853
 *     [57] EnumeratedType ::= NotationType | Enumeration
5854
 *
5855
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5856
 *
5857
 * @param ctxt  an XML parser context
5858
 * @param tree  the enumeration tree built while parsing
5859
 * @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5860
 */
5861
5862
int
5863
20.1k
xmlParseEnumeratedType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5864
20.1k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5865
2.16k
  SKIP(8);
5866
2.16k
  if (SKIP_BLANKS_PE == 0) {
5867
232
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5868
232
         "Space required after 'NOTATION'\n");
5869
232
      return(0);
5870
232
  }
5871
1.93k
  *tree = xmlParseNotationType(ctxt);
5872
1.93k
  if (*tree == NULL) return(0);
5873
1.15k
  return(XML_ATTRIBUTE_NOTATION);
5874
1.93k
    }
5875
17.9k
    *tree = xmlParseEnumerationType(ctxt);
5876
17.9k
    if (*tree == NULL) return(0);
5877
15.5k
    return(XML_ATTRIBUTE_ENUMERATION);
5878
17.9k
}
5879
5880
/**
5881
 * Parse the Attribute list def for an element
5882
 *
5883
 * @deprecated Internal function, don't use.
5884
 *
5885
 *     [54] AttType ::= StringType | TokenizedType | EnumeratedType
5886
 *
5887
 *     [55] StringType ::= 'CDATA'
5888
 *
5889
 *     [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5890
 *                            'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5891
 *
5892
 * Validity constraints for attribute values syntax are checked in
5893
 * #xmlValidateAttributeValue
5894
 *
5895
 * [ VC: ID ]
5896
 * Values of type ID must match the Name production. A name must not
5897
 * appear more than once in an XML document as a value of this type;
5898
 * i.e., ID values must uniquely identify the elements which bear them.
5899
 *
5900
 * [ VC: One ID per Element Type ]
5901
 * No element type may have more than one ID attribute specified.
5902
 *
5903
 * [ VC: ID Attribute Default ]
5904
 * An ID attribute must have a declared default of \#IMPLIED or \#REQUIRED.
5905
 *
5906
 * [ VC: IDREF ]
5907
 * Values of type IDREF must match the Name production, and values
5908
 * of type IDREFS must match Names; each IDREF Name must match the value
5909
 * of an ID attribute on some element in the XML document; i.e. IDREF
5910
 * values must match the value of some ID attribute.
5911
 *
5912
 * [ VC: Entity Name ]
5913
 * Values of type ENTITY must match the Name production, values
5914
 * of type ENTITIES must match Names; each Entity Name must match the
5915
 * name of an unparsed entity declared in the DTD.
5916
 *
5917
 * [ VC: Name Token ]
5918
 * Values of type NMTOKEN must match the Nmtoken production; values
5919
 * of type NMTOKENS must match Nmtokens.
5920
 *
5921
 * @param ctxt  an XML parser context
5922
 * @param tree  the enumeration tree built while parsing
5923
 * @returns the attribute type
5924
 */
5925
int
5926
99.9k
xmlParseAttributeType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5927
99.9k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5928
15.5k
  SKIP(5);
5929
15.5k
  return(XML_ATTRIBUTE_CDATA);
5930
84.3k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5931
1.94k
  SKIP(6);
5932
1.94k
  return(XML_ATTRIBUTE_IDREFS);
5933
82.3k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5934
2.13k
  SKIP(5);
5935
2.13k
  return(XML_ATTRIBUTE_IDREF);
5936
80.2k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5937
22.4k
        SKIP(2);
5938
22.4k
  return(XML_ATTRIBUTE_ID);
5939
57.8k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5940
14.0k
  SKIP(6);
5941
14.0k
  return(XML_ATTRIBUTE_ENTITY);
5942
43.7k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5943
6.14k
  SKIP(8);
5944
6.14k
  return(XML_ATTRIBUTE_ENTITIES);
5945
37.6k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5946
7.65k
  SKIP(8);
5947
7.65k
  return(XML_ATTRIBUTE_NMTOKENS);
5948
29.9k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5949
9.83k
  SKIP(7);
5950
9.83k
  return(XML_ATTRIBUTE_NMTOKEN);
5951
9.83k
     }
5952
20.1k
     return(xmlParseEnumeratedType(ctxt, tree));
5953
99.9k
}
5954
5955
/**
5956
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5957
 *
5958
 * @deprecated Internal function, don't use.
5959
 *
5960
 *     [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5961
 *
5962
 *     [53] AttDef ::= S Name S AttType S DefaultDecl
5963
 * @param ctxt  an XML parser context
5964
 */
5965
void
5966
70.0k
xmlParseAttributeListDecl(xmlParserCtxt *ctxt) {
5967
70.0k
    const xmlChar *elemName;
5968
70.0k
    const xmlChar *attrName;
5969
70.0k
    xmlEnumerationPtr tree;
5970
5971
70.0k
    if ((CUR != '<') || (NXT(1) != '!'))
5972
0
        return;
5973
70.0k
    SKIP(2);
5974
5975
70.0k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5976
69.9k
#ifdef LIBXML_VALID_ENABLED
5977
69.9k
  int oldInputNr = ctxt->inputNr;
5978
69.9k
#endif
5979
5980
69.9k
  SKIP(7);
5981
69.9k
  if (SKIP_BLANKS_PE == 0) {
5982
13.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5983
13.9k
                     "Space required after '<!ATTLIST'\n");
5984
13.9k
  }
5985
69.9k
        elemName = xmlParseName(ctxt);
5986
69.9k
  if (elemName == NULL) {
5987
7.68k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5988
7.68k
         "ATTLIST: no name for Element\n");
5989
7.68k
      return;
5990
7.68k
  }
5991
62.3k
  SKIP_BLANKS_PE;
5992
62.3k
  GROW;
5993
143k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
5994
107k
      int type;
5995
107k
      int def;
5996
107k
      xmlChar *defaultValue = NULL;
5997
5998
107k
      GROW;
5999
107k
            tree = NULL;
6000
107k
      attrName = xmlParseName(ctxt);
6001
107k
      if (attrName == NULL) {
6002
3.11k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6003
3.11k
             "ATTLIST: no name for Attribute\n");
6004
3.11k
    break;
6005
3.11k
      }
6006
104k
      GROW;
6007
104k
      if (SKIP_BLANKS_PE == 0) {
6008
4.87k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6009
4.87k
            "Space required after the attribute name\n");
6010
4.87k
    break;
6011
4.87k
      }
6012
6013
99.9k
      type = xmlParseAttributeType(ctxt, &tree);
6014
99.9k
      if (type <= 0) {
6015
3.43k
          break;
6016
3.43k
      }
6017
6018
96.4k
      GROW;
6019
96.4k
      if (SKIP_BLANKS_PE == 0) {
6020
2.50k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6021
2.50k
             "Space required after the attribute type\n");
6022
2.50k
          if (tree != NULL)
6023
826
        xmlFreeEnumeration(tree);
6024
2.50k
    break;
6025
2.50k
      }
6026
6027
93.9k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6028
93.9k
      if (def <= 0) {
6029
0
                if (defaultValue != NULL)
6030
0
        xmlFree(defaultValue);
6031
0
          if (tree != NULL)
6032
0
        xmlFreeEnumeration(tree);
6033
0
          break;
6034
0
      }
6035
93.9k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6036
47.4k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6037
6038
93.9k
      GROW;
6039
93.9k
            if (RAW != '>') {
6040
66.9k
    if (SKIP_BLANKS_PE == 0) {
6041
12.4k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6042
12.4k
      "Space required after the attribute default value\n");
6043
12.4k
        if (defaultValue != NULL)
6044
4.43k
      xmlFree(defaultValue);
6045
12.4k
        if (tree != NULL)
6046
744
      xmlFreeEnumeration(tree);
6047
12.4k
        break;
6048
12.4k
    }
6049
66.9k
      }
6050
81.5k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6051
81.5k
    (ctxt->sax->attributeDecl != NULL))
6052
77.9k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6053
77.9k
                          type, def, defaultValue, tree);
6054
3.57k
      else if (tree != NULL)
6055
1.42k
    xmlFreeEnumeration(tree);
6056
6057
81.5k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6058
81.5k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6059
81.5k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6060
43.7k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6061
43.7k
      }
6062
81.5k
      if (ctxt->sax2) {
6063
81.5k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6064
81.5k
      }
6065
81.5k
      if (defaultValue != NULL)
6066
43.7k
          xmlFree(defaultValue);
6067
81.5k
      GROW;
6068
81.5k
  }
6069
62.3k
  if (RAW == '>') {
6070
37.0k
#ifdef LIBXML_VALID_ENABLED
6071
37.0k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6072
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6073
0
                                 "Attribute list declaration doesn't start and"
6074
0
                                 " stop in the same entity\n",
6075
0
                                 NULL, NULL);
6076
0
      }
6077
37.0k
#endif
6078
37.0k
      NEXT;
6079
37.0k
  }
6080
62.3k
    }
6081
70.0k
}
6082
6083
/**
6084
 * Handle PEs and check that we don't pop the entity that started
6085
 * a balanced group.
6086
 *
6087
 * @param ctxt  parser context
6088
 * @param openInputNr  input nr of the entity with opening '('
6089
 */
6090
static void
6091
292k
xmlSkipBlankCharsPEBalanced(xmlParserCtxt *ctxt, int openInputNr) {
6092
292k
    SKIP_BLANKS;
6093
292k
    GROW;
6094
6095
292k
    (void) openInputNr;
6096
6097
292k
    if (!PARSER_EXTERNAL(ctxt) && !PARSER_IN_PE(ctxt))
6098
137k
        return;
6099
6100
168k
    while (!PARSER_STOPPED(ctxt)) {
6101
168k
        if (ctxt->input->cur >= ctxt->input->end) {
6102
5.53k
#ifdef LIBXML_VALID_ENABLED
6103
5.53k
            if ((ctxt->validate) && (ctxt->inputNr <= openInputNr)) {
6104
0
                xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6105
0
                                 "Element content declaration doesn't start "
6106
0
                                 "and stop in the same entity\n",
6107
0
                                 NULL, NULL);
6108
0
            }
6109
5.53k
#endif
6110
5.53k
            if (PARSER_IN_PE(ctxt))
6111
5.49k
                xmlPopPE(ctxt);
6112
46
            else
6113
46
                break;
6114
162k
        } else if (RAW == '%') {
6115
8.06k
            xmlParsePERefInternal(ctxt, 0);
6116
154k
        } else {
6117
154k
            break;
6118
154k
        }
6119
6120
13.5k
        SKIP_BLANKS;
6121
13.5k
        GROW;
6122
13.5k
    }
6123
154k
}
6124
6125
/**
6126
 * Parse the declaration for a Mixed Element content
6127
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6128
 *
6129
 * @deprecated Internal function, don't use.
6130
 *
6131
 *     [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6132
 *                    '(' S? '#PCDATA' S? ')'
6133
 *
6134
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6135
 *
6136
 * [ VC: No Duplicate Types ]
6137
 * The same name must not appear more than once in a single
6138
 * mixed-content declaration.
6139
 *
6140
 * @param ctxt  an XML parser context
6141
 * @param openInputNr  the input used for the current entity, needed for
6142
 * boundary checks
6143
 * @returns the list of the xmlElementContent describing the element choices
6144
 */
6145
xmlElementContent *
6146
10.5k
xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int openInputNr) {
6147
10.5k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6148
10.5k
    const xmlChar *elem = NULL;
6149
6150
10.5k
    GROW;
6151
10.5k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6152
10.5k
  SKIP(7);
6153
10.5k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6154
10.5k
  if (RAW == ')') {
6155
7.21k
#ifdef LIBXML_VALID_ENABLED
6156
7.21k
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6157
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6158
0
                                 "Element content declaration doesn't start "
6159
0
                                 "and stop in the same entity\n",
6160
0
                                 NULL, NULL);
6161
0
      }
6162
7.21k
#endif
6163
7.21k
      NEXT;
6164
7.21k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6165
7.21k
      if (ret == NULL)
6166
10
                goto mem_error;
6167
7.20k
      if (RAW == '*') {
6168
220
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6169
220
    NEXT;
6170
220
      }
6171
7.20k
      return(ret);
6172
7.21k
  }
6173
3.37k
  if ((RAW == '(') || (RAW == '|')) {
6174
2.17k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6175
2.17k
      if (ret == NULL)
6176
8
                goto mem_error;
6177
2.17k
  }
6178
23.9k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6179
20.7k
      NEXT;
6180
20.7k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6181
20.7k
            if (n == NULL)
6182
5
                goto mem_error;
6183
20.7k
      if (elem == NULL) {
6184
2.15k
    n->c1 = cur;
6185
2.15k
    if (cur != NULL)
6186
2.15k
        cur->parent = n;
6187
2.15k
    ret = cur = n;
6188
18.6k
      } else {
6189
18.6k
          cur->c2 = n;
6190
18.6k
    n->parent = cur;
6191
18.6k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6192
18.6k
                if (n->c1 == NULL)
6193
3
                    goto mem_error;
6194
18.6k
    n->c1->parent = n;
6195
18.6k
    cur = n;
6196
18.6k
      }
6197
20.7k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6198
20.7k
      elem = xmlParseName(ctxt);
6199
20.7k
      if (elem == NULL) {
6200
133
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6201
133
      "xmlParseElementMixedContentDecl : Name expected\n");
6202
133
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6203
133
    return(NULL);
6204
133
      }
6205
20.6k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6206
20.6k
  }
6207
3.22k
  if ((RAW == ')') && (NXT(1) == '*')) {
6208
1.85k
      if (elem != NULL) {
6209
1.85k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6210
1.85k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6211
1.85k
    if (cur->c2 == NULL)
6212
5
                    goto mem_error;
6213
1.85k
    cur->c2->parent = cur;
6214
1.85k
            }
6215
1.85k
            if (ret != NULL)
6216
1.85k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6217
1.85k
#ifdef LIBXML_VALID_ENABLED
6218
1.85k
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6219
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6220
0
                                 "Element content declaration doesn't start "
6221
0
                                 "and stop in the same entity\n",
6222
0
                                 NULL, NULL);
6223
0
      }
6224
1.85k
#endif
6225
1.85k
      SKIP(2);
6226
1.85k
  } else {
6227
1.36k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6228
1.36k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6229
1.36k
      return(NULL);
6230
1.36k
  }
6231
6232
3.22k
    } else {
6233
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6234
0
    }
6235
1.85k
    return(ret);
6236
6237
31
mem_error:
6238
31
    xmlErrMemory(ctxt);
6239
31
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6240
31
    return(NULL);
6241
10.5k
}
6242
6243
/**
6244
 * Parse the declaration for a Mixed Element content
6245
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6246
 *
6247
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6248
 *
6249
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6250
 *
6251
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6252
 *
6253
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6254
 *
6255
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6256
 * TODO Parameter-entity replacement text must be properly nested
6257
 *  with parenthesized groups. That is to say, if either of the
6258
 *  opening or closing parentheses in a choice, seq, or Mixed
6259
 *  construct is contained in the replacement text for a parameter
6260
 *  entity, both must be contained in the same replacement text. For
6261
 *  interoperability, if a parameter-entity reference appears in a
6262
 *  choice, seq, or Mixed construct, its replacement text should not
6263
 *  be empty, and neither the first nor last non-blank character of
6264
 *  the replacement text should be a connector (| or ,).
6265
 *
6266
 * @param ctxt  an XML parser context
6267
 * @param openInputNr  the input used for the current entity, needed for
6268
 * boundary checks
6269
 * @param depth  the level of recursion
6270
 * @returns the tree of xmlElementContent describing the element
6271
 *          hierarchy.
6272
 */
6273
static xmlElementContentPtr
6274
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int openInputNr,
6275
95.9k
                                       int depth) {
6276
95.9k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6277
95.9k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6278
95.9k
    const xmlChar *elem;
6279
95.9k
    xmlChar type = 0;
6280
6281
95.9k
    if (depth > maxDepth) {
6282
3
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6283
3
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6284
3
                "use XML_PARSE_HUGE\n", depth);
6285
3
  return(NULL);
6286
3
    }
6287
95.9k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6288
95.9k
    if (RAW == '(') {
6289
73.5k
        int newInputNr = ctxt->inputNr;
6290
6291
        /* Recurse on first child */
6292
73.5k
  NEXT;
6293
73.5k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6294
73.5k
                                                           depth + 1);
6295
73.5k
        if (cur == NULL)
6296
54.5k
            return(NULL);
6297
73.5k
    } else {
6298
22.4k
  elem = xmlParseName(ctxt);
6299
22.4k
  if (elem == NULL) {
6300
442
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6301
442
      return(NULL);
6302
442
  }
6303
22.0k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6304
22.0k
  if (cur == NULL) {
6305
30
      xmlErrMemory(ctxt);
6306
30
      return(NULL);
6307
30
  }
6308
22.0k
  GROW;
6309
22.0k
  if (RAW == '?') {
6310
3.62k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6311
3.62k
      NEXT;
6312
18.3k
  } else if (RAW == '*') {
6313
1.74k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6314
1.74k
      NEXT;
6315
16.6k
  } else if (RAW == '+') {
6316
2.03k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6317
2.03k
      NEXT;
6318
14.5k
  } else {
6319
14.5k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6320
14.5k
  }
6321
22.0k
  GROW;
6322
22.0k
    }
6323
77.7k
    while (!PARSER_STOPPED(ctxt)) {
6324
76.4k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6325
76.4k
        if (RAW == ')')
6326
30.9k
            break;
6327
        /*
6328
   * Each loop we parse one separator and one element.
6329
   */
6330
45.5k
        if (RAW == ',') {
6331
14.6k
      if (type == 0) type = CUR;
6332
6333
      /*
6334
       * Detect "Name | Name , Name" error
6335
       */
6336
7.47k
      else if (type != CUR) {
6337
5
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6338
5
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6339
5
                      type);
6340
5
    if ((last != NULL) && (last != ret))
6341
5
        xmlFreeDocElementContent(ctxt->myDoc, last);
6342
5
    if (ret != NULL)
6343
5
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6344
5
    return(NULL);
6345
5
      }
6346
14.6k
      NEXT;
6347
6348
14.6k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6349
14.6k
      if (op == NULL) {
6350
7
                xmlErrMemory(ctxt);
6351
7
    if ((last != NULL) && (last != ret))
6352
4
        xmlFreeDocElementContent(ctxt->myDoc, last);
6353
7
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6354
7
    return(NULL);
6355
7
      }
6356
14.6k
      if (last == NULL) {
6357
7.16k
    op->c1 = ret;
6358
7.16k
    if (ret != NULL)
6359
7.16k
        ret->parent = op;
6360
7.16k
    ret = cur = op;
6361
7.46k
      } else {
6362
7.46k
          cur->c2 = op;
6363
7.46k
    if (op != NULL)
6364
7.46k
        op->parent = cur;
6365
7.46k
    op->c1 = last;
6366
7.46k
    if (last != NULL)
6367
7.46k
        last->parent = op;
6368
7.46k
    cur =op;
6369
7.46k
    last = NULL;
6370
7.46k
      }
6371
30.8k
  } else if (RAW == '|') {
6372
26.8k
      if (type == 0) type = CUR;
6373
6374
      /*
6375
       * Detect "Name , Name | Name" error
6376
       */
6377
15.5k
      else if (type != CUR) {
6378
3
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6379
3
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6380
3
          type);
6381
3
    if ((last != NULL) && (last != ret))
6382
3
        xmlFreeDocElementContent(ctxt->myDoc, last);
6383
3
    if (ret != NULL)
6384
3
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6385
3
    return(NULL);
6386
3
      }
6387
26.8k
      NEXT;
6388
6389
26.8k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6390
26.8k
      if (op == NULL) {
6391
11
                xmlErrMemory(ctxt);
6392
11
    if ((last != NULL) && (last != ret))
6393
4
        xmlFreeDocElementContent(ctxt->myDoc, last);
6394
11
    if (ret != NULL)
6395
11
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6396
11
    return(NULL);
6397
11
      }
6398
26.8k
      if (last == NULL) {
6399
11.2k
    op->c1 = ret;
6400
11.2k
    if (ret != NULL)
6401
11.2k
        ret->parent = op;
6402
11.2k
    ret = cur = op;
6403
15.5k
      } else {
6404
15.5k
          cur->c2 = op;
6405
15.5k
    if (op != NULL)
6406
15.5k
        op->parent = cur;
6407
15.5k
    op->c1 = last;
6408
15.5k
    if (last != NULL)
6409
15.5k
        last->parent = op;
6410
15.5k
    cur =op;
6411
15.5k
    last = NULL;
6412
15.5k
      }
6413
26.8k
  } else {
6414
4.00k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6415
4.00k
      if ((last != NULL) && (last != ret))
6416
1.15k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6417
4.00k
      if (ret != NULL)
6418
4.00k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
4.00k
      return(NULL);
6420
4.00k
  }
6421
41.4k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6422
41.4k
        if (RAW == '(') {
6423
6.59k
            int newInputNr = ctxt->inputNr;
6424
6425
      /* Recurse on second child */
6426
6.59k
      NEXT;
6427
6.59k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6428
6.59k
                                                          depth + 1);
6429
6.59k
            if (last == NULL) {
6430
2.63k
    if (ret != NULL)
6431
2.63k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
2.63k
    return(NULL);
6433
2.63k
            }
6434
34.8k
  } else {
6435
34.8k
      elem = xmlParseName(ctxt);
6436
34.8k
      if (elem == NULL) {
6437
2.00k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6438
2.00k
    if (ret != NULL)
6439
2.00k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6440
2.00k
    return(NULL);
6441
2.00k
      }
6442
32.8k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6443
32.8k
      if (last == NULL) {
6444
11
                xmlErrMemory(ctxt);
6445
11
    if (ret != NULL)
6446
11
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6447
11
    return(NULL);
6448
11
      }
6449
32.8k
      if (RAW == '?') {
6450
6.71k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6451
6.71k
    NEXT;
6452
26.1k
      } else if (RAW == '*') {
6453
5.12k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6454
5.12k
    NEXT;
6455
21.0k
      } else if (RAW == '+') {
6456
916
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6457
916
    NEXT;
6458
20.1k
      } else {
6459
20.1k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6460
20.1k
      }
6461
32.8k
  }
6462
41.4k
    }
6463
32.2k
    if ((cur != NULL) && (last != NULL)) {
6464
12.6k
        cur->c2 = last;
6465
12.6k
  if (last != NULL)
6466
12.6k
      last->parent = cur;
6467
12.6k
    }
6468
32.2k
#ifdef LIBXML_VALID_ENABLED
6469
32.2k
    if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6470
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6471
0
                         "Element content declaration doesn't start "
6472
0
                         "and stop in the same entity\n",
6473
0
                         NULL, NULL);
6474
0
    }
6475
32.2k
#endif
6476
32.2k
    NEXT;
6477
32.2k
    if (RAW == '?') {
6478
773
  if (ret != NULL) {
6479
773
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6480
773
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6481
106
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6482
667
      else
6483
667
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6484
773
  }
6485
773
  NEXT;
6486
31.5k
    } else if (RAW == '*') {
6487
3.05k
  if (ret != NULL) {
6488
3.05k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6489
3.05k
      cur = ret;
6490
      /*
6491
       * Some normalization:
6492
       * (a | b* | c?)* == (a | b | c)*
6493
       */
6494
12.0k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6495
9.03k
    if ((cur->c1 != NULL) &&
6496
9.03k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6497
9.03k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6498
1.53k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6499
9.03k
    if ((cur->c2 != NULL) &&
6500
9.03k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6501
9.03k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6502
767
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6503
9.03k
    cur = cur->c2;
6504
9.03k
      }
6505
3.05k
  }
6506
3.05k
  NEXT;
6507
28.4k
    } else if (RAW == '+') {
6508
7.60k
  if (ret != NULL) {
6509
7.60k
      int found = 0;
6510
6511
7.60k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6512
7.60k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6513
614
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6514
6.98k
      else
6515
6.98k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6516
      /*
6517
       * Some normalization:
6518
       * (a | b*)+ == (a | b)*
6519
       * (a | b?)+ == (a | b)*
6520
       */
6521
13.5k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6522
5.93k
    if ((cur->c1 != NULL) &&
6523
5.93k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6524
5.93k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6525
3.25k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6526
3.25k
        found = 1;
6527
3.25k
    }
6528
5.93k
    if ((cur->c2 != NULL) &&
6529
5.93k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6530
5.93k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6531
1.78k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6532
1.78k
        found = 1;
6533
1.78k
    }
6534
5.93k
    cur = cur->c2;
6535
5.93k
      }
6536
7.60k
      if (found)
6537
3.35k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6538
7.60k
  }
6539
7.60k
  NEXT;
6540
7.60k
    }
6541
32.2k
    return(ret);
6542
40.9k
}
6543
6544
/**
6545
 * Parse the declaration for a Mixed Element content
6546
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6547
 *
6548
 * @deprecated Internal function, don't use.
6549
 *
6550
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6551
 *
6552
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6553
 *
6554
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6555
 *
6556
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6557
 *
6558
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6559
 * TODO Parameter-entity replacement text must be properly nested
6560
 *  with parenthesized groups. That is to say, if either of the
6561
 *  opening or closing parentheses in a choice, seq, or Mixed
6562
 *  construct is contained in the replacement text for a parameter
6563
 *  entity, both must be contained in the same replacement text. For
6564
 *  interoperability, if a parameter-entity reference appears in a
6565
 *  choice, seq, or Mixed construct, its replacement text should not
6566
 *  be empty, and neither the first nor last non-blank character of
6567
 *  the replacement text should be a connector (| or ,).
6568
 *
6569
 * @param ctxt  an XML parser context
6570
 * @param inputchk  the input used for the current entity, needed for boundary checks
6571
 * @returns the tree of xmlElementContent describing the element
6572
 *          hierarchy.
6573
 */
6574
xmlElementContent *
6575
0
xmlParseElementChildrenContentDecl(xmlParserCtxt *ctxt, int inputchk) {
6576
    /* stub left for API/ABI compat */
6577
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6578
0
}
6579
6580
/**
6581
 * Parse the declaration for an Element content either Mixed or Children,
6582
 * the cases EMPTY and ANY are handled directly in #xmlParseElementDecl
6583
 *
6584
 * @deprecated Internal function, don't use.
6585
 *
6586
 *     [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6587
 *
6588
 * @param ctxt  an XML parser context
6589
 * @param name  the name of the element being defined.
6590
 * @param result  the Element Content pointer will be stored here if any
6591
 * @returns an xmlElementTypeVal value or -1 on error
6592
 */
6593
6594
int
6595
xmlParseElementContentDecl(xmlParserCtxt *ctxt, const xmlChar *name,
6596
26.4k
                           xmlElementContent **result) {
6597
6598
26.4k
    xmlElementContentPtr tree = NULL;
6599
26.4k
    int openInputNr = ctxt->inputNr;
6600
26.4k
    int res;
6601
6602
26.4k
    *result = NULL;
6603
6604
26.4k
    if (RAW != '(') {
6605
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6606
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6607
0
  return(-1);
6608
0
    }
6609
26.4k
    NEXT;
6610
26.4k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6611
26.4k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6612
10.5k
        tree = xmlParseElementMixedContentDecl(ctxt, openInputNr);
6613
10.5k
  res = XML_ELEMENT_TYPE_MIXED;
6614
15.8k
    } else {
6615
15.8k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, openInputNr, 1);
6616
15.8k
  res = XML_ELEMENT_TYPE_ELEMENT;
6617
15.8k
    }
6618
26.4k
    if (tree == NULL)
6619
8.04k
        return(-1);
6620
18.4k
    SKIP_BLANKS_PE;
6621
18.4k
    *result = tree;
6622
18.4k
    return(res);
6623
26.4k
}
6624
6625
/**
6626
 * Parse an element declaration. Always consumes '<!'.
6627
 *
6628
 * @deprecated Internal function, don't use.
6629
 *
6630
 *     [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6631
 *
6632
 * [ VC: Unique Element Type Declaration ]
6633
 * No element type may be declared more than once
6634
 *
6635
 * @param ctxt  an XML parser context
6636
 * @returns the type of the element, or -1 in case of error
6637
 */
6638
int
6639
33.5k
xmlParseElementDecl(xmlParserCtxt *ctxt) {
6640
33.5k
    const xmlChar *name;
6641
33.5k
    int ret = -1;
6642
33.5k
    xmlElementContentPtr content  = NULL;
6643
6644
33.5k
    if ((CUR != '<') || (NXT(1) != '!'))
6645
0
        return(ret);
6646
33.5k
    SKIP(2);
6647
6648
    /* GROW; done in the caller */
6649
33.5k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6650
33.4k
#ifdef LIBXML_VALID_ENABLED
6651
33.4k
  int oldInputNr = ctxt->inputNr;
6652
33.4k
#endif
6653
6654
33.4k
  SKIP(7);
6655
33.4k
  if (SKIP_BLANKS_PE == 0) {
6656
257
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6657
257
               "Space required after 'ELEMENT'\n");
6658
257
      return(-1);
6659
257
  }
6660
33.2k
        name = xmlParseName(ctxt);
6661
33.2k
  if (name == NULL) {
6662
595
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6663
595
         "xmlParseElementDecl: no name for Element\n");
6664
595
      return(-1);
6665
595
  }
6666
32.6k
  if (SKIP_BLANKS_PE == 0) {
6667
5.73k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6668
5.73k
         "Space required after the element name\n");
6669
5.73k
  }
6670
32.6k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6671
4.40k
      SKIP(5);
6672
      /*
6673
       * Element must always be empty.
6674
       */
6675
4.40k
      ret = XML_ELEMENT_TYPE_EMPTY;
6676
28.2k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6677
28.2k
             (NXT(2) == 'Y')) {
6678
566
      SKIP(3);
6679
      /*
6680
       * Element is a generic container.
6681
       */
6682
566
      ret = XML_ELEMENT_TYPE_ANY;
6683
27.6k
  } else if (RAW == '(') {
6684
26.4k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6685
26.4k
            if (ret <= 0)
6686
8.04k
                return(-1);
6687
26.4k
  } else {
6688
      /*
6689
       * [ WFC: PEs in Internal Subset ] error handling.
6690
       */
6691
1.18k
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6692
1.18k
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6693
1.18k
      return(-1);
6694
1.18k
  }
6695
6696
23.3k
  SKIP_BLANKS_PE;
6697
6698
23.3k
  if (RAW != '>') {
6699
958
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6700
958
      if (content != NULL) {
6701
632
    xmlFreeDocElementContent(ctxt->myDoc, content);
6702
632
      }
6703
22.4k
  } else {
6704
22.4k
#ifdef LIBXML_VALID_ENABLED
6705
22.4k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6706
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6707
0
                                 "Element declaration doesn't start and stop in"
6708
0
                                 " the same entity\n",
6709
0
                                 NULL, NULL);
6710
0
      }
6711
22.4k
#endif
6712
6713
22.4k
      NEXT;
6714
22.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6715
22.4k
    (ctxt->sax->elementDecl != NULL)) {
6716
17.7k
    if (content != NULL)
6717
13.5k
        content->parent = NULL;
6718
17.7k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6719
17.7k
                           content);
6720
17.7k
    if ((content != NULL) && (content->parent == NULL)) {
6721
        /*
6722
         * this is a trick: if xmlAddElementDecl is called,
6723
         * instead of copying the full tree it is plugged directly
6724
         * if called from the parser. Avoid duplicating the
6725
         * interfaces or change the API/ABI
6726
         */
6727
5.44k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6728
5.44k
    }
6729
17.7k
      } else if (content != NULL) {
6730
4.24k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6731
4.24k
      }
6732
22.4k
  }
6733
23.3k
    }
6734
23.4k
    return(ret);
6735
33.5k
}
6736
6737
/**
6738
 * Parse a conditional section. Always consumes '<!['.
6739
 *
6740
 *     [61] conditionalSect ::= includeSect | ignoreSect
6741
 *     [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6742
 *     [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6743
 *     [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>'
6744
 *                                 Ignore)*
6745
 *     [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6746
 * @param ctxt  an XML parser context
6747
 */
6748
6749
static void
6750
8.14k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6751
8.14k
    size_t depth = 0;
6752
8.14k
    int isFreshPE = 0;
6753
8.14k
    int oldInputNr = ctxt->inputNr;
6754
8.14k
    int declInputNr = ctxt->inputNr;
6755
6756
32.9k
    while (!PARSER_STOPPED(ctxt)) {
6757
32.8k
        if (ctxt->input->cur >= ctxt->input->end) {
6758
1.13k
            if (ctxt->inputNr <= oldInputNr) {
6759
430
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6760
430
                return;
6761
430
            }
6762
6763
706
            xmlPopPE(ctxt);
6764
706
            declInputNr = ctxt->inputNr;
6765
31.7k
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6766
9.05k
            SKIP(3);
6767
9.05k
            SKIP_BLANKS_PE;
6768
6769
9.05k
            isFreshPE = 0;
6770
6771
9.05k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6772
4.40k
                SKIP(7);
6773
4.40k
                SKIP_BLANKS_PE;
6774
4.40k
                if (RAW != '[') {
6775
278
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6776
278
                    return;
6777
278
                }
6778
4.13k
#ifdef LIBXML_VALID_ENABLED
6779
4.13k
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6780
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6781
0
                                     "All markup of the conditional section is"
6782
0
                                     " not in the same entity\n",
6783
0
                                     NULL, NULL);
6784
0
                }
6785
4.13k
#endif
6786
4.13k
                NEXT;
6787
6788
4.13k
                depth++;
6789
4.64k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6790
2.11k
                size_t ignoreDepth = 0;
6791
6792
2.11k
                SKIP(6);
6793
2.11k
                SKIP_BLANKS_PE;
6794
2.11k
                if (RAW != '[') {
6795
613
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6796
613
                    return;
6797
613
                }
6798
1.50k
#ifdef LIBXML_VALID_ENABLED
6799
1.50k
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6800
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6801
0
                                     "All markup of the conditional section is"
6802
0
                                     " not in the same entity\n",
6803
0
                                     NULL, NULL);
6804
0
                }
6805
1.50k
#endif
6806
1.50k
                NEXT;
6807
6808
19.4k
                while (PARSER_STOPPED(ctxt) == 0) {
6809
19.4k
                    if (RAW == 0) {
6810
545
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6811
545
                        return;
6812
545
                    }
6813
18.9k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6814
409
                        SKIP(3);
6815
409
                        ignoreDepth++;
6816
                        /* Check for integer overflow */
6817
409
                        if (ignoreDepth == 0) {
6818
0
                            xmlErrMemory(ctxt);
6819
0
                            return;
6820
0
                        }
6821
18.5k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6822
18.5k
                               (NXT(2) == '>')) {
6823
1.16k
                        SKIP(3);
6824
1.16k
                        if (ignoreDepth == 0)
6825
943
                            break;
6826
226
                        ignoreDepth--;
6827
17.3k
                    } else {
6828
17.3k
                        NEXT;
6829
17.3k
                    }
6830
18.9k
                }
6831
6832
956
#ifdef LIBXML_VALID_ENABLED
6833
956
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6834
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6835
0
                                     "All markup of the conditional section is"
6836
0
                                     " not in the same entity\n",
6837
0
                                     NULL, NULL);
6838
0
                }
6839
956
#endif
6840
2.52k
            } else {
6841
2.52k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6842
2.52k
                return;
6843
2.52k
            }
6844
22.7k
        } else if ((depth > 0) &&
6845
22.7k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6846
2.73k
            if (isFreshPE) {
6847
5
                xmlFatalErrMsg(ctxt, XML_ERR_CONDSEC_INVALID,
6848
5
                               "Parameter entity must match "
6849
5
                               "extSubsetDecl\n");
6850
5
                return;
6851
5
            }
6852
6853
2.72k
            depth--;
6854
2.72k
#ifdef LIBXML_VALID_ENABLED
6855
2.72k
            if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6856
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
0
                                 "All markup of the conditional section is not"
6858
0
                                 " in the same entity\n",
6859
0
                                 NULL, NULL);
6860
0
            }
6861
2.72k
#endif
6862
2.72k
            SKIP(3);
6863
19.9k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6864
18.4k
            isFreshPE = 0;
6865
18.4k
            xmlParseMarkupDecl(ctxt);
6866
18.4k
        } else if (RAW == '%') {
6867
1.47k
            xmlParsePERefInternal(ctxt, 1);
6868
1.47k
            if (ctxt->inputNr > declInputNr) {
6869
738
                isFreshPE = 1;
6870
738
                declInputNr = ctxt->inputNr;
6871
738
            }
6872
1.47k
        } else {
6873
48
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6874
48
            return;
6875
48
        }
6876
6877
28.4k
        if (depth == 0)
6878
3.66k
            break;
6879
6880
24.7k
        SKIP_BLANKS;
6881
24.7k
        SHRINK;
6882
24.7k
        GROW;
6883
24.7k
    }
6884
8.14k
}
6885
6886
/**
6887
 * Parse markup declarations. Always consumes '<!' or '<?'.
6888
 *
6889
 * @deprecated Internal function, don't use.
6890
 *
6891
 *     [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6892
 *                         NotationDecl | PI | Comment
6893
 *
6894
 * [ VC: Proper Declaration/PE Nesting ]
6895
 * Parameter-entity replacement text must be properly nested with
6896
 * markup declarations. That is to say, if either the first character
6897
 * or the last character of a markup declaration (markupdecl above) is
6898
 * contained in the replacement text for a parameter-entity reference,
6899
 * both must be contained in the same replacement text.
6900
 *
6901
 * [ WFC: PEs in Internal Subset ]
6902
 * In the internal DTD subset, parameter-entity references can occur
6903
 * only where markup declarations can occur, not within markup declarations.
6904
 * (This does not apply to references that occur in external parameter
6905
 * entities or to the external subset.)
6906
 *
6907
 * @param ctxt  an XML parser context
6908
 */
6909
void
6910
524k
xmlParseMarkupDecl(xmlParserCtxt *ctxt) {
6911
524k
    GROW;
6912
524k
    if (CUR == '<') {
6913
524k
        if (NXT(1) == '!') {
6914
502k
      switch (NXT(2)) {
6915
100k
          case 'E':
6916
100k
        if (NXT(3) == 'L')
6917
33.5k
      xmlParseElementDecl(ctxt);
6918
67.3k
        else if (NXT(3) == 'N')
6919
67.3k
      xmlParseEntityDecl(ctxt);
6920
26
                    else
6921
26
                        SKIP(2);
6922
100k
        break;
6923
70.0k
          case 'A':
6924
70.0k
        xmlParseAttributeListDecl(ctxt);
6925
70.0k
        break;
6926
10.0k
          case 'N':
6927
10.0k
        xmlParseNotationDecl(ctxt);
6928
10.0k
        break;
6929
308k
          case '-':
6930
308k
        xmlParseComment(ctxt);
6931
308k
        break;
6932
12.4k
    default:
6933
12.4k
                    xmlFatalErr(ctxt,
6934
12.4k
                                ctxt->inSubset == 2 ?
6935
1.27k
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
6936
12.4k
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
6937
12.4k
                                NULL);
6938
12.4k
                    SKIP(2);
6939
12.4k
        break;
6940
502k
      }
6941
502k
  } else if (NXT(1) == '?') {
6942
21.6k
      xmlParsePI(ctxt);
6943
21.6k
  }
6944
524k
    }
6945
524k
}
6946
6947
/**
6948
 * Parse an XML declaration header for external entities
6949
 *
6950
 * @deprecated Internal function, don't use.
6951
 *
6952
 *     [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6953
 * @param ctxt  an XML parser context
6954
 */
6955
6956
void
6957
24.1k
xmlParseTextDecl(xmlParserCtxt *ctxt) {
6958
24.1k
    xmlChar *version;
6959
6960
    /*
6961
     * We know that '<?xml' is here.
6962
     */
6963
24.1k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6964
24.1k
  SKIP(5);
6965
24.1k
    } else {
6966
6
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6967
6
  return;
6968
6
    }
6969
6970
24.1k
    if (SKIP_BLANKS == 0) {
6971
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6972
0
           "Space needed after '<?xml'\n");
6973
0
    }
6974
6975
    /*
6976
     * We may have the VersionInfo here.
6977
     */
6978
24.1k
    version = xmlParseVersionInfo(ctxt);
6979
24.1k
    if (version == NULL) {
6980
19.6k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6981
19.6k
        if (version == NULL) {
6982
41
            xmlErrMemory(ctxt);
6983
41
            return;
6984
41
        }
6985
19.6k
    } else {
6986
4.52k
  if (SKIP_BLANKS == 0) {
6987
840
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6988
840
               "Space needed here\n");
6989
840
  }
6990
4.52k
    }
6991
24.1k
    ctxt->input->version = version;
6992
6993
    /*
6994
     * We must have the encoding declaration
6995
     */
6996
24.1k
    xmlParseEncodingDecl(ctxt);
6997
6998
24.1k
    SKIP_BLANKS;
6999
24.1k
    if ((RAW == '?') && (NXT(1) == '>')) {
7000
1.86k
        SKIP(2);
7001
22.2k
    } else if (RAW == '>') {
7002
        /* Deprecated old WD ... */
7003
1.04k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7004
1.04k
  NEXT;
7005
21.2k
    } else {
7006
21.2k
        int c;
7007
7008
21.2k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7009
369M
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7010
369M
            NEXT;
7011
369M
            if (c == '>')
7012
6.00k
                break;
7013
369M
        }
7014
21.2k
    }
7015
24.1k
}
7016
7017
/**
7018
 * Parse Markup declarations from an external subset
7019
 *
7020
 * @deprecated Internal function, don't use.
7021
 *
7022
 *     [30] extSubset ::= textDecl? extSubsetDecl
7023
 *
7024
 *     [31] extSubsetDecl ::= (markupdecl | conditionalSect |
7025
 *                             PEReference | S) *
7026
 * @param ctxt  an XML parser context
7027
 * @param publicId  the public identifier
7028
 * @param systemId  the system identifier (URL)
7029
 */
7030
void
7031
xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
7032
1.51k
                       const xmlChar *systemId) {
7033
1.51k
    int oldInputNr;
7034
7035
1.51k
    xmlCtxtInitializeLate(ctxt);
7036
7037
1.51k
    xmlDetectEncoding(ctxt);
7038
7039
1.51k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7040
104
  xmlParseTextDecl(ctxt);
7041
104
    }
7042
1.51k
    if (ctxt->myDoc == NULL) {
7043
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7044
0
  if (ctxt->myDoc == NULL) {
7045
0
      xmlErrMemory(ctxt);
7046
0
      return;
7047
0
  }
7048
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7049
0
    }
7050
1.51k
    if ((ctxt->myDoc->intSubset == NULL) &&
7051
1.51k
        (xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) {
7052
0
        xmlErrMemory(ctxt);
7053
0
    }
7054
7055
1.51k
    ctxt->inSubset = 2;
7056
1.51k
    oldInputNr = ctxt->inputNr;
7057
7058
1.51k
    SKIP_BLANKS;
7059
308k
    while (!PARSER_STOPPED(ctxt)) {
7060
307k
        if (ctxt->input->cur >= ctxt->input->end) {
7061
1.27k
            if (ctxt->inputNr <= oldInputNr) {
7062
371
                xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7063
371
                break;
7064
371
            }
7065
7066
899
            xmlPopPE(ctxt);
7067
306k
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7068
4.72k
            xmlParseConditionalSections(ctxt);
7069
301k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7070
298k
            xmlParseMarkupDecl(ctxt);
7071
298k
        } else if (RAW == '%') {
7072
2.11k
            xmlParsePERefInternal(ctxt, 1);
7073
2.11k
        } else {
7074
761
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7075
7076
1.01k
            while (ctxt->inputNr > oldInputNr)
7077
256
                xmlPopPE(ctxt);
7078
761
            break;
7079
761
        }
7080
306k
        SKIP_BLANKS;
7081
306k
        SHRINK;
7082
306k
        GROW;
7083
306k
    }
7084
1.51k
}
7085
7086
/**
7087
 * Parse and handle entity references in content, depending on the SAX
7088
 * interface, this may end-up in a call to character() if this is a
7089
 * CharRef, a predefined entity, if there is no reference() callback.
7090
 * or if the parser was asked to switch to that mode.
7091
 *
7092
 * @deprecated Internal function, don't use.
7093
 *
7094
 * Always consumes '&'.
7095
 *
7096
 *     [67] Reference ::= EntityRef | CharRef
7097
 * @param ctxt  an XML parser context
7098
 */
7099
void
7100
244k
xmlParseReference(xmlParserCtxt *ctxt) {
7101
244k
    xmlEntityPtr ent = NULL;
7102
244k
    const xmlChar *name;
7103
244k
    xmlChar *val;
7104
7105
244k
    if (RAW != '&')
7106
0
        return;
7107
7108
    /*
7109
     * Simple case of a CharRef
7110
     */
7111
244k
    if (NXT(1) == '#') {
7112
38.2k
  int i = 0;
7113
38.2k
  xmlChar out[16];
7114
38.2k
  int value = xmlParseCharRef(ctxt);
7115
7116
38.2k
  if (value == 0)
7117
13.0k
      return;
7118
7119
        /*
7120
         * Just encode the value in UTF-8
7121
         */
7122
25.1k
        COPY_BUF(out, i, value);
7123
25.1k
        out[i] = 0;
7124
25.1k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7125
25.1k
            (!ctxt->disableSAX))
7126
24.7k
            ctxt->sax->characters(ctxt->userData, out, i);
7127
25.1k
  return;
7128
38.2k
    }
7129
7130
    /*
7131
     * We are seeing an entity reference
7132
     */
7133
206k
    name = xmlParseEntityRefInternal(ctxt);
7134
206k
    if (name == NULL)
7135
34.0k
        return;
7136
172k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7137
172k
    if (ent == NULL) {
7138
        /*
7139
         * Create a reference for undeclared entities.
7140
         */
7141
64.2k
        if ((ctxt->replaceEntities == 0) &&
7142
64.2k
            (ctxt->sax != NULL) &&
7143
64.2k
            (ctxt->disableSAX == 0) &&
7144
64.2k
            (ctxt->sax->reference != NULL)) {
7145
15.4k
            ctxt->sax->reference(ctxt->userData, name);
7146
15.4k
        }
7147
64.2k
        return;
7148
64.2k
    }
7149
107k
    if (!ctxt->wellFormed)
7150
24.5k
  return;
7151
7152
    /* special case of predefined entities */
7153
83.2k
    if ((ent->name == NULL) ||
7154
83.2k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7155
329
  val = ent->content;
7156
329
  if (val == NULL) return;
7157
  /*
7158
   * inline the entity.
7159
   */
7160
329
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7161
329
      (!ctxt->disableSAX))
7162
329
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7163
329
  return;
7164
329
    }
7165
7166
    /*
7167
     * Some users try to parse entities on their own and used to set
7168
     * the renamed "checked" member. Fix the flags to cover this
7169
     * case.
7170
     */
7171
82.9k
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7172
0
        ent->flags |= XML_ENT_PARSED;
7173
7174
    /*
7175
     * The first reference to the entity trigger a parsing phase
7176
     * where the ent->children is filled with the result from
7177
     * the parsing.
7178
     * Note: external parsed entities will not be loaded, it is not
7179
     * required for a non-validating parser, unless the parsing option
7180
     * of validating, or substituting entities were given. Doing so is
7181
     * far more secure as the parser will only process data coming from
7182
     * the document entity by default.
7183
     *
7184
     * FIXME: This doesn't work correctly since entities can be
7185
     * expanded with different namespace declarations in scope.
7186
     * For example:
7187
     *
7188
     * <!DOCTYPE doc [
7189
     *   <!ENTITY ent "<ns:elem/>">
7190
     * ]>
7191
     * <doc>
7192
     *   <decl1 xmlns:ns="urn:ns1">
7193
     *     &ent;
7194
     *   </decl1>
7195
     *   <decl2 xmlns:ns="urn:ns2">
7196
     *     &ent;
7197
     *   </decl2>
7198
     * </doc>
7199
     *
7200
     * Proposed fix:
7201
     *
7202
     * - Ignore current namespace declarations when parsing the
7203
     *   entity. If a prefix can't be resolved, don't report an error
7204
     *   but mark it as unresolved.
7205
     * - Try to resolve these prefixes when expanding the entity.
7206
     *   This will require a specialized version of xmlStaticCopyNode
7207
     *   which can also make use of the namespace hash table to avoid
7208
     *   quadratic behavior.
7209
     *
7210
     * Alternatively, we could simply reparse the entity on each
7211
     * expansion like we already do with custom SAX callbacks.
7212
     * External entity content should be cached in this case.
7213
     */
7214
82.9k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7215
82.9k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7216
9.83k
         ((ctxt->replaceEntities) ||
7217
82.4k
          (ctxt->validate)))) {
7218
82.4k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7219
4.52k
            xmlCtxtParseEntity(ctxt, ent);
7220
77.9k
        } else if (ent->children == NULL) {
7221
            /*
7222
             * Probably running in SAX mode and the callbacks don't
7223
             * build the entity content. Parse the entity again.
7224
             *
7225
             * This will also be triggered in normal tree builder mode
7226
             * if an entity happens to be empty, causing unnecessary
7227
             * reloads. It's hard to come up with a reliable check in
7228
             * which mode we're running.
7229
             */
7230
69.5k
            xmlCtxtParseEntity(ctxt, ent);
7231
69.5k
        }
7232
82.4k
    }
7233
7234
    /*
7235
     * We also check for amplification if entities aren't substituted.
7236
     * They might be expanded later.
7237
     */
7238
82.9k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7239
105
        return;
7240
7241
82.8k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7242
886
        return;
7243
7244
81.9k
    if (ctxt->replaceEntities == 0) {
7245
  /*
7246
   * Create a reference
7247
   */
7248
791
        if (ctxt->sax->reference != NULL)
7249
791
      ctxt->sax->reference(ctxt->userData, ent->name);
7250
81.1k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7251
11.2k
        xmlNodePtr copy, cur;
7252
7253
        /*
7254
         * Seems we are generating the DOM content, copy the tree
7255
   */
7256
11.2k
        cur = ent->children;
7257
7258
        /*
7259
         * Handle first text node with SAX to coalesce text efficiently
7260
         */
7261
11.2k
        if ((cur->type == XML_TEXT_NODE) ||
7262
11.2k
            (cur->type == XML_CDATA_SECTION_NODE)) {
7263
10.7k
            int len = xmlStrlen(cur->content);
7264
7265
10.7k
            if ((cur->type == XML_TEXT_NODE) ||
7266
10.7k
                (ctxt->options & XML_PARSE_NOCDATA)) {
7267
10.2k
                if (ctxt->sax->characters != NULL)
7268
10.2k
                    ctxt->sax->characters(ctxt, cur->content, len);
7269
10.2k
            } else {
7270
532
                if (ctxt->sax->cdataBlock != NULL)
7271
532
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7272
532
            }
7273
7274
10.7k
            cur = cur->next;
7275
10.7k
        }
7276
7277
72.3k
        while (cur != NULL) {
7278
63.5k
            xmlNodePtr last;
7279
7280
            /*
7281
             * Handle last text node with SAX to coalesce text efficiently
7282
             */
7283
63.5k
            if ((cur->next == NULL) &&
7284
63.5k
                ((cur->type == XML_TEXT_NODE) ||
7285
4.60k
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7286
2.18k
                int len = xmlStrlen(cur->content);
7287
7288
2.18k
                if ((cur->type == XML_TEXT_NODE) ||
7289
2.18k
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7290
2.11k
                    if (ctxt->sax->characters != NULL)
7291
2.11k
                        ctxt->sax->characters(ctxt, cur->content, len);
7292
2.11k
                } else {
7293
76
                    if (ctxt->sax->cdataBlock != NULL)
7294
76
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7295
76
                }
7296
7297
2.18k
                break;
7298
2.18k
            }
7299
7300
            /*
7301
             * Reset coalesce buffer stats only for non-text nodes.
7302
             */
7303
61.3k
            ctxt->nodemem = 0;
7304
61.3k
            ctxt->nodelen = 0;
7305
7306
61.3k
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7307
7308
61.3k
            if (copy == NULL) {
7309
241
                xmlErrMemory(ctxt);
7310
241
                break;
7311
241
            }
7312
7313
61.0k
            if (ctxt->parseMode == XML_PARSE_READER) {
7314
                /* Needed for reader */
7315
0
                copy->extra = cur->extra;
7316
                /* Maybe needed for reader */
7317
0
                copy->_private = cur->_private;
7318
0
            }
7319
7320
61.0k
            copy->parent = ctxt->node;
7321
61.0k
            last = ctxt->node->last;
7322
61.0k
            if (last == NULL) {
7323
138
                ctxt->node->children = copy;
7324
60.9k
            } else {
7325
60.9k
                last->next = copy;
7326
60.9k
                copy->prev = last;
7327
60.9k
            }
7328
61.0k
            ctxt->node->last = copy;
7329
7330
61.0k
            cur = cur->next;
7331
61.0k
        }
7332
11.2k
    }
7333
81.9k
}
7334
7335
static void
7336
359k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7337
    /*
7338
     * [ WFC: Entity Declared ]
7339
     * In a document without any DTD, a document with only an
7340
     * internal DTD subset which contains no parameter entity
7341
     * references, or a document with "standalone='yes'", the
7342
     * Name given in the entity reference must match that in an
7343
     * entity declaration, except that well-formed documents
7344
     * need not declare any of the following entities: amp, lt,
7345
     * gt, apos, quot.
7346
     * The declaration of a parameter entity must precede any
7347
     * reference to it.
7348
     * Similarly, the declaration of a general entity must
7349
     * precede any reference to it which appears in a default
7350
     * value in an attribute-list declaration. Note that if
7351
     * entities are declared in the external subset or in
7352
     * external parameter entities, a non-validating processor
7353
     * is not obligated to read and process their declarations;
7354
     * for such documents, the rule that an entity must be
7355
     * declared is a well-formedness constraint only if
7356
     * standalone='yes'.
7357
     */
7358
359k
    if ((ctxt->standalone == 1) ||
7359
359k
        ((ctxt->hasExternalSubset == 0) &&
7360
358k
         (ctxt->hasPErefs == 0))) {
7361
280k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7362
280k
                          "Entity '%s' not defined\n", name);
7363
280k
#ifdef LIBXML_VALID_ENABLED
7364
280k
    } else if (ctxt->validate) {
7365
        /*
7366
         * [ VC: Entity Declared ]
7367
         * In a document with an external subset or external
7368
         * parameter entities with "standalone='no'", ...
7369
         * ... The declaration of a parameter entity must
7370
         * precede any reference to it...
7371
         */
7372
0
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7373
0
                         "Entity '%s' not defined\n", name, NULL);
7374
0
#endif
7375
79.0k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7376
79.0k
               ((ctxt->replaceEntities) &&
7377
73.9k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7378
        /*
7379
         * Also raise a non-fatal error
7380
         *
7381
         * - if the external subset is loaded and all entity declarations
7382
         *   should be available, or
7383
         * - entity substition was requested without restricting
7384
         *   external entity access.
7385
         */
7386
73.9k
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7387
73.9k
                     "Entity '%s' not defined\n", name);
7388
73.9k
    } else {
7389
5.03k
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7390
5.03k
                      "Entity '%s' not defined\n", name, NULL);
7391
5.03k
    }
7392
7393
359k
    ctxt->valid = 0;
7394
359k
}
7395
7396
static xmlEntityPtr
7397
1.96M
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7398
1.96M
    xmlEntityPtr ent = NULL;
7399
7400
    /*
7401
     * Predefined entities override any extra definition
7402
     */
7403
1.96M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7404
1.21M
        ent = xmlGetPredefinedEntity(name);
7405
1.21M
        if (ent != NULL)
7406
131k
            return(ent);
7407
1.21M
    }
7408
7409
    /*
7410
     * Ask first SAX for entity resolution, otherwise try the
7411
     * entities which may have stored in the parser context.
7412
     */
7413
1.83M
    if (ctxt->sax != NULL) {
7414
1.83M
  if (ctxt->sax->getEntity != NULL)
7415
1.83M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7416
1.83M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7417
1.83M
      (ctxt->options & XML_PARSE_OLDSAX))
7418
271
      ent = xmlGetPredefinedEntity(name);
7419
1.83M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7420
1.83M
      (ctxt->userData==ctxt)) {
7421
17.0k
      ent = xmlSAX2GetEntity(ctxt, name);
7422
17.0k
  }
7423
1.83M
    }
7424
7425
1.83M
    if (ent == NULL) {
7426
303k
        xmlHandleUndeclaredEntity(ctxt, name);
7427
303k
    }
7428
7429
    /*
7430
     * [ WFC: Parsed Entity ]
7431
     * An entity reference must not contain the name of an
7432
     * unparsed entity
7433
     */
7434
1.53M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7435
216
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7436
216
     "Entity reference to unparsed entity %s\n", name);
7437
216
        ent = NULL;
7438
216
    }
7439
7440
    /*
7441
     * [ WFC: No External Entity References ]
7442
     * Attribute values cannot contain direct or indirect
7443
     * entity references to external entities.
7444
     */
7445
1.53M
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7446
21.4k
        if (inAttr) {
7447
1.91k
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7448
1.91k
                 "Attribute references external entity '%s'\n", name);
7449
1.91k
            ent = NULL;
7450
1.91k
        }
7451
21.4k
    }
7452
7453
1.83M
    return(ent);
7454
1.96M
}
7455
7456
/**
7457
 * Parse an entity reference. Always consumes '&'.
7458
 *
7459
 *     [68] EntityRef ::= '&' Name ';'
7460
 *
7461
 * @param ctxt  an XML parser context
7462
 * @returns the name, or NULL in case of error.
7463
 */
7464
static const xmlChar *
7465
1.32M
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7466
1.32M
    const xmlChar *name;
7467
7468
1.32M
    GROW;
7469
7470
1.32M
    if (RAW != '&')
7471
0
        return(NULL);
7472
1.32M
    NEXT;
7473
1.32M
    name = xmlParseName(ctxt);
7474
1.32M
    if (name == NULL) {
7475
54.5k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7476
54.5k
           "xmlParseEntityRef: no name\n");
7477
54.5k
        return(NULL);
7478
54.5k
    }
7479
1.27M
    if (RAW != ';') {
7480
30.1k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7481
30.1k
  return(NULL);
7482
30.1k
    }
7483
1.24M
    NEXT;
7484
7485
1.24M
    return(name);
7486
1.27M
}
7487
7488
/**
7489
 * @deprecated Internal function, don't use.
7490
 *
7491
 * @param ctxt  an XML parser context
7492
 * @returns the xmlEntity if found, or NULL otherwise.
7493
 */
7494
xmlEntity *
7495
0
xmlParseEntityRef(xmlParserCtxt *ctxt) {
7496
0
    const xmlChar *name;
7497
7498
0
    if (ctxt == NULL)
7499
0
        return(NULL);
7500
7501
0
    name = xmlParseEntityRefInternal(ctxt);
7502
0
    if (name == NULL)
7503
0
        return(NULL);
7504
7505
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7506
0
}
7507
7508
/**
7509
 * Parse ENTITY references declarations, but this version parses it from
7510
 * a string value.
7511
 *
7512
 *     [68] EntityRef ::= '&' Name ';'
7513
 *
7514
 * [ WFC: Entity Declared ]
7515
 * In a document without any DTD, a document with only an internal DTD
7516
 * subset which contains no parameter entity references, or a document
7517
 * with "standalone='yes'", the Name given in the entity reference
7518
 * must match that in an entity declaration, except that well-formed
7519
 * documents need not declare any of the following entities: amp, lt,
7520
 * gt, apos, quot.  The declaration of a parameter entity must precede
7521
 * any reference to it.  Similarly, the declaration of a general entity
7522
 * must precede any reference to it which appears in a default value in an
7523
 * attribute-list declaration. Note that if entities are declared in the
7524
 * external subset or in external parameter entities, a non-validating
7525
 * processor is not obligated to read and process their declarations;
7526
 * for such documents, the rule that an entity must be declared is a
7527
 * well-formedness constraint only if standalone='yes'.
7528
 *
7529
 * [ WFC: Parsed Entity ]
7530
 * An entity reference must not contain the name of an unparsed entity
7531
 *
7532
 * @param ctxt  an XML parser context
7533
 * @param str  a pointer to an index in the string
7534
 * @returns the xmlEntity if found, or NULL otherwise. The str pointer
7535
 * is updated to the current location in the string.
7536
 */
7537
static xmlChar *
7538
726k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7539
726k
    xmlChar *name;
7540
726k
    const xmlChar *ptr;
7541
726k
    xmlChar cur;
7542
7543
726k
    if ((str == NULL) || (*str == NULL))
7544
0
        return(NULL);
7545
726k
    ptr = *str;
7546
726k
    cur = *ptr;
7547
726k
    if (cur != '&')
7548
0
  return(NULL);
7549
7550
726k
    ptr++;
7551
726k
    name = xmlParseStringName(ctxt, &ptr);
7552
726k
    if (name == NULL) {
7553
55
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7554
55
           "xmlParseStringEntityRef: no name\n");
7555
55
  *str = ptr;
7556
55
  return(NULL);
7557
55
    }
7558
726k
    if (*ptr != ';') {
7559
19
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7560
19
        xmlFree(name);
7561
19
  *str = ptr;
7562
19
  return(NULL);
7563
19
    }
7564
726k
    ptr++;
7565
7566
726k
    *str = ptr;
7567
726k
    return(name);
7568
726k
}
7569
7570
/**
7571
 * Parse a parameter entity reference. Always consumes '%'.
7572
 *
7573
 * The entity content is handled directly by pushing it's content as
7574
 * a new input stream.
7575
 *
7576
 *     [69] PEReference ::= '%' Name ';'
7577
 *
7578
 * [ WFC: No Recursion ]
7579
 * A parsed entity must not contain a recursive
7580
 * reference to itself, either directly or indirectly.
7581
 *
7582
 * [ WFC: Entity Declared ]
7583
 * In a document without any DTD, a document with only an internal DTD
7584
 * subset which contains no parameter entity references, or a document
7585
 * with "standalone='yes'", ...  ... The declaration of a parameter
7586
 * entity must precede any reference to it...
7587
 *
7588
 * [ VC: Entity Declared ]
7589
 * In a document with an external subset or external parameter entities
7590
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7591
 * must precede any reference to it...
7592
 *
7593
 * [ WFC: In DTD ]
7594
 * Parameter-entity references may only appear in the DTD.
7595
 * NOTE: misleading but this is handled.
7596
 *
7597
 * @param ctxt  an XML parser context
7598
 * @param markupDecl  whether the PERef starts a markup declaration
7599
 */
7600
static void
7601
162k
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl) {
7602
162k
    const xmlChar *name;
7603
162k
    xmlEntityPtr entity = NULL;
7604
162k
    xmlParserInputPtr input;
7605
7606
162k
    if (RAW != '%')
7607
0
        return;
7608
162k
    NEXT;
7609
162k
    name = xmlParseName(ctxt);
7610
162k
    if (name == NULL) {
7611
10.4k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7612
10.4k
  return;
7613
10.4k
    }
7614
151k
    if (RAW != ';') {
7615
15.1k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7616
15.1k
        return;
7617
15.1k
    }
7618
7619
136k
    NEXT;
7620
7621
    /* Must be set before xmlHandleUndeclaredEntity */
7622
136k
    ctxt->hasPErefs = 1;
7623
7624
    /*
7625
     * Request the entity from SAX
7626
     */
7627
136k
    if ((ctxt->sax != NULL) &&
7628
136k
  (ctxt->sax->getParameterEntity != NULL))
7629
136k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7630
7631
136k
    if (entity == NULL) {
7632
53.2k
        xmlHandleUndeclaredEntity(ctxt, name);
7633
83.2k
    } else {
7634
  /*
7635
   * Internal checking in case the entity quest barfed
7636
   */
7637
83.2k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7638
83.2k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7639
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7640
0
      "Internal: %%%s; is not a parameter entity\n",
7641
0
        name, NULL);
7642
83.2k
  } else {
7643
83.2k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7644
83.2k
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7645
58.6k
     (((ctxt->loadsubset & ~XML_SKIP_IDS) == 0) &&
7646
58.4k
      (ctxt->replaceEntities == 0) &&
7647
58.4k
      (ctxt->validate == 0))))
7648
388
    return;
7649
7650
82.9k
            if (entity->flags & XML_ENT_EXPANDING) {
7651
28
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7652
28
                return;
7653
28
            }
7654
7655
82.8k
      input = xmlNewEntityInputStream(ctxt, entity);
7656
82.8k
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7657
4.76k
                xmlFreeInputStream(input);
7658
4.76k
    return;
7659
4.76k
            }
7660
7661
78.1k
            entity->flags |= XML_ENT_EXPANDING;
7662
7663
78.1k
            if (markupDecl)
7664
64.2k
                input->flags |= XML_INPUT_MARKUP_DECL;
7665
7666
78.1k
            GROW;
7667
7668
78.1k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7669
53.5k
                xmlDetectEncoding(ctxt);
7670
7671
53.5k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7672
53.5k
                    (IS_BLANK_CH(NXT(5)))) {
7673
11.5k
                    xmlParseTextDecl(ctxt);
7674
11.5k
                }
7675
53.5k
            }
7676
78.1k
  }
7677
83.2k
    }
7678
136k
}
7679
7680
/**
7681
 * Parse a parameter entity reference.
7682
 *
7683
 * @deprecated Internal function, don't use.
7684
 *
7685
 * @param ctxt  an XML parser context
7686
 */
7687
void
7688
0
xmlParsePEReference(xmlParserCtxt *ctxt) {
7689
0
    xmlParsePERefInternal(ctxt, 0);
7690
0
}
7691
7692
/**
7693
 * Load the content of an entity.
7694
 *
7695
 * @param ctxt  an XML parser context
7696
 * @param entity  an unloaded system entity
7697
 * @returns 0 in case of success and -1 in case of failure
7698
 */
7699
static int
7700
19.5k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7701
19.5k
    xmlParserInputPtr oldinput, input = NULL;
7702
19.5k
    xmlParserInputPtr *oldinputTab;
7703
19.5k
    xmlChar *oldencoding;
7704
19.5k
    xmlChar *content = NULL;
7705
19.5k
    xmlResourceType rtype;
7706
19.5k
    size_t length, i;
7707
19.5k
    int oldinputNr, oldinputMax;
7708
19.5k
    int ret = -1;
7709
19.5k
    int res;
7710
7711
19.5k
    if ((ctxt == NULL) || (entity == NULL) ||
7712
19.5k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7713
19.5k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7714
19.5k
  (entity->content != NULL)) {
7715
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7716
0
              "xmlLoadEntityContent parameter error");
7717
0
        return(-1);
7718
0
    }
7719
7720
19.5k
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7721
19.5k
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7722
0
    else
7723
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7724
7725
19.5k
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7726
19.5k
                            (char *) entity->ExternalID, rtype);
7727
19.5k
    if (input == NULL)
7728
544
        return(-1);
7729
7730
19.0k
    oldinput = ctxt->input;
7731
19.0k
    oldinputNr = ctxt->inputNr;
7732
19.0k
    oldinputMax = ctxt->inputMax;
7733
19.0k
    oldinputTab = ctxt->inputTab;
7734
19.0k
    oldencoding = ctxt->encoding;
7735
7736
19.0k
    ctxt->input = NULL;
7737
19.0k
    ctxt->inputNr = 0;
7738
19.0k
    ctxt->inputMax = 1;
7739
19.0k
    ctxt->encoding = NULL;
7740
19.0k
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7741
19.0k
    if (ctxt->inputTab == NULL) {
7742
3
        xmlErrMemory(ctxt);
7743
3
        xmlFreeInputStream(input);
7744
3
        goto error;
7745
3
    }
7746
7747
19.0k
    xmlBufResetInput(input->buf->buffer, input);
7748
7749
19.0k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7750
19
        xmlFreeInputStream(input);
7751
19
        goto error;
7752
19
    }
7753
7754
19.0k
    xmlDetectEncoding(ctxt);
7755
7756
    /*
7757
     * Parse a possible text declaration first
7758
     */
7759
19.0k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7760
12.1k
  xmlParseTextDecl(ctxt);
7761
        /*
7762
         * An XML-1.0 document can't reference an entity not XML-1.0
7763
         */
7764
12.1k
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7765
12.1k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7766
1.77k
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7767
1.77k
                           "Version mismatch between document and entity\n");
7768
1.77k
        }
7769
12.1k
    }
7770
7771
19.0k
    length = input->cur - input->base;
7772
19.0k
    xmlBufShrink(input->buf->buffer, length);
7773
19.0k
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7774
7775
24.5k
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7776
5.54k
        ;
7777
7778
19.0k
    xmlBufResetInput(input->buf->buffer, input);
7779
7780
19.0k
    if (res < 0) {
7781
1.19k
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7782
1.19k
        goto error;
7783
1.19k
    }
7784
7785
17.8k
    length = xmlBufUse(input->buf->buffer);
7786
17.8k
    if (length > INT_MAX) {
7787
0
        xmlErrMemory(ctxt);
7788
0
        goto error;
7789
0
    }
7790
7791
17.8k
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
7792
17.8k
    if (content == NULL) {
7793
26
        xmlErrMemory(ctxt);
7794
26
        goto error;
7795
26
    }
7796
7797
4.96M
    for (i = 0; i < length; ) {
7798
4.96M
        int clen = length - i;
7799
4.96M
        int c = xmlGetUTF8Char(content + i, &clen);
7800
7801
4.96M
        if ((c < 0) || (!IS_CHAR(c))) {
7802
17.6k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7803
17.6k
                              "xmlLoadEntityContent: invalid char value %d\n",
7804
17.6k
                              content[i]);
7805
17.6k
            goto error;
7806
17.6k
        }
7807
4.94M
        i += clen;
7808
4.94M
    }
7809
7810
175
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7811
175
    entity->content = content;
7812
175
    entity->length = length;
7813
175
    content = NULL;
7814
175
    ret = 0;
7815
7816
19.0k
error:
7817
38.0k
    while (ctxt->inputNr > 0)
7818
19.0k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
7819
19.0k
    xmlFree(ctxt->inputTab);
7820
19.0k
    xmlFree(ctxt->encoding);
7821
7822
19.0k
    ctxt->input = oldinput;
7823
19.0k
    ctxt->inputNr = oldinputNr;
7824
19.0k
    ctxt->inputMax = oldinputMax;
7825
19.0k
    ctxt->inputTab = oldinputTab;
7826
19.0k
    ctxt->encoding = oldencoding;
7827
7828
19.0k
    xmlFree(content);
7829
7830
19.0k
    return(ret);
7831
175
}
7832
7833
/**
7834
 * Parse PEReference declarations
7835
 *
7836
 *     [69] PEReference ::= '%' Name ';'
7837
 *
7838
 * [ WFC: No Recursion ]
7839
 * A parsed entity must not contain a recursive
7840
 * reference to itself, either directly or indirectly.
7841
 *
7842
 * [ WFC: Entity Declared ]
7843
 * In a document without any DTD, a document with only an internal DTD
7844
 * subset which contains no parameter entity references, or a document
7845
 * with "standalone='yes'", ...  ... The declaration of a parameter
7846
 * entity must precede any reference to it...
7847
 *
7848
 * [ VC: Entity Declared ]
7849
 * In a document with an external subset or external parameter entities
7850
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7851
 * must precede any reference to it...
7852
 *
7853
 * [ WFC: In DTD ]
7854
 * Parameter-entity references may only appear in the DTD.
7855
 * NOTE: misleading but this is handled.
7856
 *
7857
 * @param ctxt  an XML parser context
7858
 * @param str  a pointer to an index in the string
7859
 * @returns the string of the entity content.
7860
 *         str is updated to the current value of the index
7861
 */
7862
static xmlEntityPtr
7863
46.4k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7864
46.4k
    const xmlChar *ptr;
7865
46.4k
    xmlChar cur;
7866
46.4k
    xmlChar *name;
7867
46.4k
    xmlEntityPtr entity = NULL;
7868
7869
46.4k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7870
46.4k
    ptr = *str;
7871
46.4k
    cur = *ptr;
7872
46.4k
    if (cur != '%')
7873
0
        return(NULL);
7874
46.4k
    ptr++;
7875
46.4k
    name = xmlParseStringName(ctxt, &ptr);
7876
46.4k
    if (name == NULL) {
7877
4.67k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7878
4.67k
           "xmlParseStringPEReference: no name\n");
7879
4.67k
  *str = ptr;
7880
4.67k
  return(NULL);
7881
4.67k
    }
7882
41.7k
    cur = *ptr;
7883
41.7k
    if (cur != ';') {
7884
4.30k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7885
4.30k
  xmlFree(name);
7886
4.30k
  *str = ptr;
7887
4.30k
  return(NULL);
7888
4.30k
    }
7889
37.4k
    ptr++;
7890
7891
    /* Must be set before xmlHandleUndeclaredEntity */
7892
37.4k
    ctxt->hasPErefs = 1;
7893
7894
    /*
7895
     * Request the entity from SAX
7896
     */
7897
37.4k
    if ((ctxt->sax != NULL) &&
7898
37.4k
  (ctxt->sax->getParameterEntity != NULL))
7899
37.4k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7900
7901
37.4k
    if (entity == NULL) {
7902
2.59k
        xmlHandleUndeclaredEntity(ctxt, name);
7903
34.8k
    } else {
7904
  /*
7905
   * Internal checking in case the entity quest barfed
7906
   */
7907
34.8k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7908
34.8k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7909
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7910
0
        "%%%s; is not a parameter entity\n",
7911
0
        name, NULL);
7912
0
  }
7913
34.8k
    }
7914
7915
37.4k
    xmlFree(name);
7916
37.4k
    *str = ptr;
7917
37.4k
    return(entity);
7918
41.7k
}
7919
7920
/**
7921
 * Parse a DOCTYPE declaration
7922
 *
7923
 * @deprecated Internal function, don't use.
7924
 *
7925
 *     [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7926
 *                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7927
 *
7928
 * [ VC: Root Element Type ]
7929
 * The Name in the document type declaration must match the element
7930
 * type of the root element.
7931
 *
7932
 * @param ctxt  an XML parser context
7933
 */
7934
7935
void
7936
27.3k
xmlParseDocTypeDecl(xmlParserCtxt *ctxt) {
7937
27.3k
    const xmlChar *name = NULL;
7938
27.3k
    xmlChar *publicId = NULL;
7939
27.3k
    xmlChar *URI = NULL;
7940
7941
    /*
7942
     * We know that '<!DOCTYPE' has been detected.
7943
     */
7944
27.3k
    SKIP(9);
7945
7946
27.3k
    if (SKIP_BLANKS == 0) {
7947
7.61k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7948
7.61k
                       "Space required after 'DOCTYPE'\n");
7949
7.61k
    }
7950
7951
    /*
7952
     * Parse the DOCTYPE name.
7953
     */
7954
27.3k
    name = xmlParseName(ctxt);
7955
27.3k
    if (name == NULL) {
7956
6.36k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7957
6.36k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7958
6.36k
    }
7959
27.3k
    ctxt->intSubName = name;
7960
7961
27.3k
    SKIP_BLANKS;
7962
7963
    /*
7964
     * Check for public and system identifier (URI)
7965
     */
7966
27.3k
    URI = xmlParseExternalID(ctxt, &publicId, 1);
7967
7968
27.3k
    if ((URI != NULL) || (publicId != NULL)) {
7969
5.43k
        ctxt->hasExternalSubset = 1;
7970
5.43k
    }
7971
27.3k
    ctxt->extSubURI = URI;
7972
27.3k
    ctxt->extSubSystem = publicId;
7973
7974
27.3k
    SKIP_BLANKS;
7975
7976
    /*
7977
     * Create and update the internal subset.
7978
     */
7979
27.3k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7980
27.3k
  (!ctxt->disableSAX))
7981
25.1k
  ctxt->sax->internalSubset(ctxt->userData, name, publicId, URI);
7982
7983
27.3k
    if ((RAW != '[') && (RAW != '>')) {
7984
1.18k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7985
1.18k
    }
7986
27.3k
}
7987
7988
/**
7989
 * Parse the internal subset declaration
7990
 *
7991
 *     [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7992
 * @param ctxt  an XML parser context
7993
 */
7994
7995
static void
7996
20.8k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7997
    /*
7998
     * Is there any DTD definition ?
7999
     */
8000
20.8k
    if (RAW == '[') {
8001
20.8k
        int oldInputNr = ctxt->inputNr;
8002
8003
20.8k
        NEXT;
8004
  /*
8005
   * Parse the succession of Markup declarations and
8006
   * PEReferences.
8007
   * Subsequence (markupdecl | PEReference | S)*
8008
   */
8009
20.8k
  SKIP_BLANKS;
8010
430k
        while (1) {
8011
430k
            if (PARSER_STOPPED(ctxt)) {
8012
2.70k
                return;
8013
427k
            } else if (ctxt->input->cur >= ctxt->input->end) {
8014
62.9k
                if (ctxt->inputNr <= oldInputNr) {
8015
3.15k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8016
3.15k
                    return;
8017
3.15k
                }
8018
59.7k
                xmlPopPE(ctxt);
8019
364k
            } else if ((RAW == ']') && (ctxt->inputNr <= oldInputNr)) {
8020
8.83k
                NEXT;
8021
8.83k
                SKIP_BLANKS;
8022
8.83k
                break;
8023
355k
            } else if ((PARSER_EXTERNAL(ctxt)) &&
8024
355k
                       (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8025
                /*
8026
                 * Conditional sections are allowed in external entities
8027
                 * included by PE References in the internal subset.
8028
                 */
8029
3.42k
                xmlParseConditionalSections(ctxt);
8030
352k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8031
206k
                xmlParseMarkupDecl(ctxt);
8032
206k
            } else if (RAW == '%') {
8033
139k
                xmlParsePERefInternal(ctxt, 1);
8034
139k
            } else {
8035
6.20k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8036
8037
6.93k
                while (ctxt->inputNr > oldInputNr)
8038
730
                    xmlPopPE(ctxt);
8039
6.20k
                return;
8040
6.20k
            }
8041
409k
            SKIP_BLANKS;
8042
409k
            SHRINK;
8043
409k
            GROW;
8044
409k
        }
8045
20.8k
    }
8046
8047
    /*
8048
     * We should be at the end of the DOCTYPE declaration.
8049
     */
8050
8.83k
    if (RAW != '>') {
8051
322
        xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8052
322
        return;
8053
322
    }
8054
8.51k
    NEXT;
8055
8.51k
}
8056
8057
#ifdef LIBXML_SAX1_ENABLED
8058
/**
8059
 * Parse an attribute
8060
 *
8061
 * @deprecated Internal function, don't use.
8062
 *
8063
 *     [41] Attribute ::= Name Eq AttValue
8064
 *
8065
 * [ WFC: No External Entity References ]
8066
 * Attribute values cannot contain direct or indirect entity references
8067
 * to external entities.
8068
 *
8069
 * [ WFC: No < in Attribute Values ]
8070
 * The replacement text of any entity referred to directly or indirectly in
8071
 * an attribute value (other than "&lt;") must not contain a <.
8072
 *
8073
 * [ VC: Attribute Value Type ]
8074
 * The attribute must have been declared; the value must be of the type
8075
 * declared for it.
8076
 *
8077
 *     [25] Eq ::= S? '=' S?
8078
 *
8079
 * With namespace:
8080
 *
8081
 *     [NS 11] Attribute ::= QName Eq AttValue
8082
 *
8083
 * Also the case QName == xmlns:??? is handled independently as a namespace
8084
 * definition.
8085
 *
8086
 * @param ctxt  an XML parser context
8087
 * @param value  a xmlChar ** used to store the value of the attribute
8088
 * @returns the attribute name, and the value in *value.
8089
 */
8090
8091
const xmlChar *
8092
0
xmlParseAttribute(xmlParserCtxt *ctxt, xmlChar **value) {
8093
0
    const xmlChar *name;
8094
0
    xmlChar *val;
8095
8096
0
    *value = NULL;
8097
0
    GROW;
8098
0
    name = xmlParseName(ctxt);
8099
0
    if (name == NULL) {
8100
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8101
0
                 "error parsing attribute name\n");
8102
0
        return(NULL);
8103
0
    }
8104
8105
    /*
8106
     * read the value
8107
     */
8108
0
    SKIP_BLANKS;
8109
0
    if (RAW == '=') {
8110
0
        NEXT;
8111
0
  SKIP_BLANKS;
8112
0
  val = xmlParseAttValue(ctxt);
8113
0
    } else {
8114
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8115
0
         "Specification mandates value for attribute %s\n", name);
8116
0
  return(name);
8117
0
    }
8118
8119
    /*
8120
     * Check that xml:lang conforms to the specification
8121
     * No more registered as an error, just generate a warning now
8122
     * since this was deprecated in XML second edition
8123
     */
8124
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8125
0
  if (!xmlCheckLanguageID(val)) {
8126
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8127
0
              "Malformed value for xml:lang : %s\n",
8128
0
        val, NULL);
8129
0
  }
8130
0
    }
8131
8132
    /*
8133
     * Check that xml:space conforms to the specification
8134
     */
8135
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8136
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8137
0
      *(ctxt->space) = 0;
8138
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8139
0
      *(ctxt->space) = 1;
8140
0
  else {
8141
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8142
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8143
0
                                 val, NULL);
8144
0
  }
8145
0
    }
8146
8147
0
    *value = val;
8148
0
    return(name);
8149
0
}
8150
8151
/**
8152
 * Parse a start tag. Always consumes '<'.
8153
 *
8154
 * @deprecated Internal function, don't use.
8155
 *
8156
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8157
 *
8158
 * [ WFC: Unique Att Spec ]
8159
 * No attribute name may appear more than once in the same start-tag or
8160
 * empty-element tag.
8161
 *
8162
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8163
 *
8164
 * [ WFC: Unique Att Spec ]
8165
 * No attribute name may appear more than once in the same start-tag or
8166
 * empty-element tag.
8167
 *
8168
 * With namespace:
8169
 *
8170
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8171
 *
8172
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8173
 *
8174
 * @param ctxt  an XML parser context
8175
 * @returns the element name parsed
8176
 */
8177
8178
const xmlChar *
8179
0
xmlParseStartTag(xmlParserCtxt *ctxt) {
8180
0
    const xmlChar *name;
8181
0
    const xmlChar *attname;
8182
0
    xmlChar *attvalue;
8183
0
    const xmlChar **atts = ctxt->atts;
8184
0
    int nbatts = 0;
8185
0
    int maxatts = ctxt->maxatts;
8186
0
    int i;
8187
8188
0
    if (RAW != '<') return(NULL);
8189
0
    NEXT1;
8190
8191
0
    name = xmlParseName(ctxt);
8192
0
    if (name == NULL) {
8193
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8194
0
       "xmlParseStartTag: invalid element name\n");
8195
0
        return(NULL);
8196
0
    }
8197
8198
    /*
8199
     * Now parse the attributes, it ends up with the ending
8200
     *
8201
     * (S Attribute)* S?
8202
     */
8203
0
    SKIP_BLANKS;
8204
0
    GROW;
8205
8206
0
    while (((RAW != '>') &&
8207
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8208
0
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8209
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8210
0
        if (attname == NULL)
8211
0
      break;
8212
0
        if (attvalue != NULL) {
8213
      /*
8214
       * [ WFC: Unique Att Spec ]
8215
       * No attribute name may appear more than once in the same
8216
       * start-tag or empty-element tag.
8217
       */
8218
0
      for (i = 0; i < nbatts;i += 2) {
8219
0
          if (xmlStrEqual(atts[i], attname)) {
8220
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8221
0
        goto failed;
8222
0
    }
8223
0
      }
8224
      /*
8225
       * Add the pair to atts
8226
       */
8227
0
      if (nbatts + 4 > maxatts) {
8228
0
          const xmlChar **n;
8229
0
                int newSize;
8230
8231
0
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8232
0
                                          11, XML_MAX_ATTRS);
8233
0
                if (newSize < 0) {
8234
0
        xmlErrMemory(ctxt);
8235
0
        goto failed;
8236
0
    }
8237
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8238
0
                if (newSize < 2)
8239
0
                    newSize = 2;
8240
0
#endif
8241
0
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8242
0
    if (n == NULL) {
8243
0
        xmlErrMemory(ctxt);
8244
0
        goto failed;
8245
0
    }
8246
0
    atts = n;
8247
0
                maxatts = newSize * 2;
8248
0
    ctxt->atts = atts;
8249
0
    ctxt->maxatts = maxatts;
8250
0
      }
8251
8252
0
      atts[nbatts++] = attname;
8253
0
      atts[nbatts++] = attvalue;
8254
0
      atts[nbatts] = NULL;
8255
0
      atts[nbatts + 1] = NULL;
8256
8257
0
            attvalue = NULL;
8258
0
  }
8259
8260
0
failed:
8261
8262
0
        if (attvalue != NULL)
8263
0
            xmlFree(attvalue);
8264
8265
0
  GROW
8266
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8267
0
      break;
8268
0
  if (SKIP_BLANKS == 0) {
8269
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8270
0
         "attributes construct error\n");
8271
0
  }
8272
0
  SHRINK;
8273
0
        GROW;
8274
0
    }
8275
8276
    /*
8277
     * SAX: Start of Element !
8278
     */
8279
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8280
0
  (!ctxt->disableSAX)) {
8281
0
  if (nbatts > 0)
8282
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8283
0
  else
8284
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8285
0
    }
8286
8287
0
    if (atts != NULL) {
8288
        /* Free only the content strings */
8289
0
        for (i = 1;i < nbatts;i+=2)
8290
0
      if (atts[i] != NULL)
8291
0
         xmlFree((xmlChar *) atts[i]);
8292
0
    }
8293
0
    return(name);
8294
0
}
8295
8296
/**
8297
 * Parse an end tag. Always consumes '</'.
8298
 *
8299
 *     [42] ETag ::= '</' Name S? '>'
8300
 *
8301
 * With namespace
8302
 *
8303
 *     [NS 9] ETag ::= '</' QName S? '>'
8304
 * @param ctxt  an XML parser context
8305
 * @param line  line of the start tag
8306
 */
8307
8308
static void
8309
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8310
0
    const xmlChar *name;
8311
8312
0
    GROW;
8313
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8314
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8315
0
           "xmlParseEndTag: '</' not found\n");
8316
0
  return;
8317
0
    }
8318
0
    SKIP(2);
8319
8320
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8321
8322
    /*
8323
     * We should definitely be at the ending "S? '>'" part
8324
     */
8325
0
    GROW;
8326
0
    SKIP_BLANKS;
8327
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8328
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8329
0
    } else
8330
0
  NEXT1;
8331
8332
    /*
8333
     * [ WFC: Element Type Match ]
8334
     * The Name in an element's end-tag must match the element type in the
8335
     * start-tag.
8336
     *
8337
     */
8338
0
    if (name != (xmlChar*)1) {
8339
0
        if (name == NULL) name = BAD_CAST "unparsable";
8340
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8341
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8342
0
                    ctxt->name, line, name);
8343
0
    }
8344
8345
    /*
8346
     * SAX: End of Tag
8347
     */
8348
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8349
0
  (!ctxt->disableSAX))
8350
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8351
8352
0
    namePop(ctxt);
8353
0
    spacePop(ctxt);
8354
0
}
8355
8356
/**
8357
 * Parse an end of tag
8358
 *
8359
 * @deprecated Internal function, don't use.
8360
 *
8361
 *     [42] ETag ::= '</' Name S? '>'
8362
 *
8363
 * With namespace
8364
 *
8365
 *     [NS 9] ETag ::= '</' QName S? '>'
8366
 * @param ctxt  an XML parser context
8367
 */
8368
8369
void
8370
0
xmlParseEndTag(xmlParserCtxt *ctxt) {
8371
0
    xmlParseEndTag1(ctxt, 0);
8372
0
}
8373
#endif /* LIBXML_SAX1_ENABLED */
8374
8375
/************************************************************************
8376
 *                  *
8377
 *          SAX 2 specific operations       *
8378
 *                  *
8379
 ************************************************************************/
8380
8381
/**
8382
 * Parse an XML Namespace QName
8383
 *
8384
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8385
 *     [7]  Prefix  ::= NCName
8386
 *     [8]  LocalPart  ::= NCName
8387
 *
8388
 * @param ctxt  an XML parser context
8389
 * @param prefix  pointer to store the prefix part
8390
 * @returns the Name parsed or NULL
8391
 */
8392
8393
static xmlHashedString
8394
1.45M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8395
1.45M
    xmlHashedString l, p;
8396
1.45M
    int start, isNCName = 0;
8397
8398
1.45M
    l.name = NULL;
8399
1.45M
    p.name = NULL;
8400
8401
1.45M
    GROW;
8402
1.45M
    start = CUR_PTR - BASE_PTR;
8403
8404
1.45M
    l = xmlParseNCName(ctxt);
8405
1.45M
    if (l.name != NULL) {
8406
1.00M
        isNCName = 1;
8407
1.00M
        if (CUR == ':') {
8408
155k
            NEXT;
8409
155k
            p = l;
8410
155k
            l = xmlParseNCName(ctxt);
8411
155k
        }
8412
1.00M
    }
8413
1.45M
    if ((l.name == NULL) || (CUR == ':')) {
8414
462k
        xmlChar *tmp;
8415
8416
462k
        l.name = NULL;
8417
462k
        p.name = NULL;
8418
462k
        if ((isNCName == 0) && (CUR != ':'))
8419
445k
            return(l);
8420
17.1k
        tmp = xmlParseNmtoken(ctxt);
8421
17.1k
        if (tmp != NULL)
8422
9.40k
            xmlFree(tmp);
8423
17.1k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8424
17.1k
                                CUR_PTR - (BASE_PTR + start));
8425
17.1k
        if (l.name == NULL) {
8426
3
            xmlErrMemory(ctxt);
8427
3
            return(l);
8428
3
        }
8429
17.1k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8430
17.1k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8431
17.1k
    }
8432
8433
1.00M
    *prefix = p;
8434
1.00M
    return(l);
8435
1.45M
}
8436
8437
/**
8438
 * Parse an XML Namespace QName
8439
 *
8440
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8441
 *     [7]  Prefix  ::= NCName
8442
 *     [8]  LocalPart  ::= NCName
8443
 *
8444
 * @param ctxt  an XML parser context
8445
 * @param prefix  pointer to store the prefix part
8446
 * @returns the Name parsed or NULL
8447
 */
8448
8449
static const xmlChar *
8450
29.0k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8451
29.0k
    xmlHashedString n, p;
8452
8453
29.0k
    n = xmlParseQNameHashed(ctxt, &p);
8454
29.0k
    if (n.name == NULL)
8455
2.69k
        return(NULL);
8456
26.3k
    *prefix = p.name;
8457
26.3k
    return(n.name);
8458
29.0k
}
8459
8460
/**
8461
 * Parse an XML name and compares for match
8462
 * (specialized for endtag parsing)
8463
 *
8464
 * @param ctxt  an XML parser context
8465
 * @param name  the localname
8466
 * @param prefix  the prefix, if any.
8467
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
8468
 * and the name for mismatch
8469
 */
8470
8471
static const xmlChar *
8472
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8473
31.5k
                        xmlChar const *prefix) {
8474
31.5k
    const xmlChar *cmp;
8475
31.5k
    const xmlChar *in;
8476
31.5k
    const xmlChar *ret;
8477
31.5k
    const xmlChar *prefix2;
8478
8479
31.5k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8480
8481
31.5k
    GROW;
8482
31.5k
    in = ctxt->input->cur;
8483
8484
31.5k
    cmp = prefix;
8485
61.9k
    while (*in != 0 && *in == *cmp) {
8486
30.3k
  ++in;
8487
30.3k
  ++cmp;
8488
30.3k
    }
8489
31.5k
    if ((*cmp == 0) && (*in == ':')) {
8490
4.43k
        in++;
8491
4.43k
  cmp = name;
8492
19.3k
  while (*in != 0 && *in == *cmp) {
8493
14.9k
      ++in;
8494
14.9k
      ++cmp;
8495
14.9k
  }
8496
4.43k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8497
      /* success */
8498
2.54k
            ctxt->input->col += in - ctxt->input->cur;
8499
2.54k
      ctxt->input->cur = in;
8500
2.54k
      return((const xmlChar*) 1);
8501
2.54k
  }
8502
4.43k
    }
8503
    /*
8504
     * all strings coms from the dictionary, equality can be done directly
8505
     */
8506
29.0k
    ret = xmlParseQName (ctxt, &prefix2);
8507
29.0k
    if (ret == NULL)
8508
2.69k
        return(NULL);
8509
26.3k
    if ((ret == name) && (prefix == prefix2))
8510
872
  return((const xmlChar*) 1);
8511
25.4k
    return ret;
8512
26.3k
}
8513
8514
/**
8515
 * Parse an attribute in the new SAX2 framework.
8516
 *
8517
 * @param ctxt  an XML parser context
8518
 * @param pref  the element prefix
8519
 * @param elem  the element name
8520
 * @param hprefix  resulting attribute prefix
8521
 * @param value  resulting value of the attribute
8522
 * @param len  resulting length of the attribute
8523
 * @param alloc  resulting indicator if the attribute was allocated
8524
 * @returns the attribute name, and the value in *value, .
8525
 */
8526
8527
static xmlHashedString
8528
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8529
                   const xmlChar * pref, const xmlChar * elem,
8530
                   xmlHashedString * hprefix, xmlChar ** value,
8531
                   int *len, int *alloc)
8532
399k
{
8533
399k
    xmlHashedString hname;
8534
399k
    const xmlChar *prefix, *name;
8535
399k
    xmlChar *val = NULL, *internal_val = NULL;
8536
399k
    int special = 0;
8537
399k
    int isNamespace;
8538
399k
    int flags;
8539
8540
399k
    *value = NULL;
8541
399k
    GROW;
8542
399k
    hname = xmlParseQNameHashed(ctxt, hprefix);
8543
399k
    if (hname.name == NULL) {
8544
92.8k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8545
92.8k
                       "error parsing attribute name\n");
8546
92.8k
        return(hname);
8547
92.8k
    }
8548
306k
    name = hname.name;
8549
306k
    prefix = hprefix->name;
8550
8551
    /*
8552
     * get the type if needed
8553
     */
8554
306k
    if (ctxt->attsSpecial != NULL) {
8555
100k
        special = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8556
100k
                                              prefix, name));
8557
100k
    }
8558
8559
    /*
8560
     * read the value
8561
     */
8562
306k
    SKIP_BLANKS;
8563
306k
    if (RAW != '=') {
8564
31.4k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8565
31.4k
                          "Specification mandates value for attribute %s\n",
8566
31.4k
                          name);
8567
31.4k
        goto error;
8568
31.4k
    }
8569
8570
8571
275k
    NEXT;
8572
275k
    SKIP_BLANKS;
8573
275k
    flags = 0;
8574
275k
    isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8575
275k
                   (prefix == ctxt->str_xmlns));
8576
275k
    val = xmlParseAttValueInternal(ctxt, len, &flags, special,
8577
275k
                                   isNamespace);
8578
275k
    if (val == NULL)
8579
11.5k
        goto error;
8580
8581
263k
    *alloc = (flags & XML_ATTVAL_ALLOC) != 0;
8582
8583
263k
#ifdef LIBXML_VALID_ENABLED
8584
263k
    if ((ctxt->validate) &&
8585
263k
        (ctxt->standalone) &&
8586
263k
        (special & XML_SPECIAL_EXTERNAL) &&
8587
263k
        (flags & XML_ATTVAL_NORM_CHANGE)) {
8588
0
        xmlValidityError(ctxt, XML_DTD_NOT_STANDALONE,
8589
0
                         "standalone: normalization of attribute %s on %s "
8590
0
                         "by external subset declaration\n",
8591
0
                         name, elem);
8592
0
    }
8593
263k
#endif
8594
8595
263k
    if (prefix == ctxt->str_xml) {
8596
        /*
8597
         * Check that xml:lang conforms to the specification
8598
         * No more registered as an error, just generate a warning now
8599
         * since this was deprecated in XML second edition
8600
         */
8601
20.8k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8602
11.5k
            internal_val = xmlStrndup(val, *len);
8603
11.5k
            if (internal_val == NULL)
8604
5
                goto mem_error;
8605
11.5k
            if (!xmlCheckLanguageID(internal_val)) {
8606
9.04k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8607
9.04k
                              "Malformed value for xml:lang : %s\n",
8608
9.04k
                              internal_val, NULL);
8609
9.04k
            }
8610
11.5k
        }
8611
8612
        /*
8613
         * Check that xml:space conforms to the specification
8614
         */
8615
20.8k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8616
711
            internal_val = xmlStrndup(val, *len);
8617
711
            if (internal_val == NULL)
8618
3
                goto mem_error;
8619
708
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8620
234
                *(ctxt->space) = 0;
8621
474
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8622
212
                *(ctxt->space) = 1;
8623
262
            else {
8624
262
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8625
262
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8626
262
                              internal_val, NULL);
8627
262
            }
8628
708
        }
8629
20.8k
        if (internal_val) {
8630
12.2k
            xmlFree(internal_val);
8631
12.2k
        }
8632
20.8k
    }
8633
8634
263k
    *value = val;
8635
263k
    return (hname);
8636
8637
8
mem_error:
8638
8
    xmlErrMemory(ctxt);
8639
42.9k
error:
8640
42.9k
    if ((val != NULL) && (*alloc != 0))
8641
3
        xmlFree(val);
8642
42.9k
    return(hname);
8643
8
}
8644
8645
/**
8646
 * Inserts a new attribute into the hash table.
8647
 *
8648
 * @param ctxt  parser context
8649
 * @param size  size of the hash table
8650
 * @param name  attribute name
8651
 * @param uri  namespace uri
8652
 * @param hashValue  combined hash value of name and uri
8653
 * @param aindex  attribute index (this is a multiple of 5)
8654
 * @returns INT_MAX if no existing attribute was found, the attribute
8655
 * index if an attribute was found, -1 if a memory allocation failed.
8656
 */
8657
static int
8658
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8659
200k
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8660
200k
    xmlAttrHashBucket *table = ctxt->attrHash;
8661
200k
    xmlAttrHashBucket *bucket;
8662
200k
    unsigned hindex;
8663
8664
200k
    hindex = hashValue & (size - 1);
8665
200k
    bucket = &table[hindex];
8666
8667
236k
    while (bucket->index >= 0) {
8668
57.8k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8669
8670
57.8k
        if (name == atts[0]) {
8671
23.8k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8672
8673
23.8k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8674
23.8k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8675
8.31k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8676
21.7k
                return(bucket->index);
8677
23.8k
        }
8678
8679
36.1k
        hindex++;
8680
36.1k
        bucket++;
8681
36.1k
        if (hindex >= size) {
8682
4.39k
            hindex = 0;
8683
4.39k
            bucket = table;
8684
4.39k
        }
8685
36.1k
    }
8686
8687
178k
    bucket->index = aindex;
8688
8689
178k
    return(INT_MAX);
8690
200k
}
8691
8692
static int
8693
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8694
                       const xmlChar *name, const xmlChar *prefix,
8695
3.91k
                       unsigned hashValue, int aindex) {
8696
3.91k
    xmlAttrHashBucket *table = ctxt->attrHash;
8697
3.91k
    xmlAttrHashBucket *bucket;
8698
3.91k
    unsigned hindex;
8699
8700
3.91k
    hindex = hashValue & (size - 1);
8701
3.91k
    bucket = &table[hindex];
8702
8703
6.02k
    while (bucket->index >= 0) {
8704
3.27k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8705
8706
3.27k
        if ((name == atts[0]) && (prefix == atts[1]))
8707
1.16k
            return(bucket->index);
8708
8709
2.11k
        hindex++;
8710
2.11k
        bucket++;
8711
2.11k
        if (hindex >= size) {
8712
194
            hindex = 0;
8713
194
            bucket = table;
8714
194
        }
8715
2.11k
    }
8716
8717
2.75k
    bucket->index = aindex;
8718
8719
2.75k
    return(INT_MAX);
8720
3.91k
}
8721
/**
8722
 * Parse a start tag. Always consumes '<'.
8723
 *
8724
 * This routine is called when running SAX2 parsing
8725
 *
8726
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8727
 *
8728
 * [ WFC: Unique Att Spec ]
8729
 * No attribute name may appear more than once in the same start-tag or
8730
 * empty-element tag.
8731
 *
8732
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8733
 *
8734
 * [ WFC: Unique Att Spec ]
8735
 * No attribute name may appear more than once in the same start-tag or
8736
 * empty-element tag.
8737
 *
8738
 * With namespace:
8739
 *
8740
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8741
 *
8742
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8743
 *
8744
 * @param ctxt  an XML parser context
8745
 * @param pref  resulting namespace prefix
8746
 * @param URI  resulting namespace URI
8747
 * @param nbNsPtr  resulting number of namespace declarations
8748
 * @returns the element name parsed
8749
 */
8750
8751
static const xmlChar *
8752
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8753
1.02M
                  const xmlChar **URI, int *nbNsPtr) {
8754
1.02M
    xmlHashedString hlocalname;
8755
1.02M
    xmlHashedString hprefix;
8756
1.02M
    xmlHashedString hattname;
8757
1.02M
    xmlHashedString haprefix;
8758
1.02M
    const xmlChar *localname;
8759
1.02M
    const xmlChar *prefix;
8760
1.02M
    const xmlChar *attname;
8761
1.02M
    const xmlChar *aprefix;
8762
1.02M
    const xmlChar *uri;
8763
1.02M
    xmlChar *attvalue = NULL;
8764
1.02M
    const xmlChar **atts = ctxt->atts;
8765
1.02M
    unsigned attrHashSize = 0;
8766
1.02M
    int maxatts = ctxt->maxatts;
8767
1.02M
    int nratts, nbatts, nbdef;
8768
1.02M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8769
1.02M
    int alloc = 0;
8770
1.02M
    int numNsErr = 0;
8771
1.02M
    int numDupErr = 0;
8772
8773
1.02M
    if (RAW != '<') return(NULL);
8774
1.02M
    NEXT1;
8775
8776
1.02M
    nbatts = 0;
8777
1.02M
    nratts = 0;
8778
1.02M
    nbdef = 0;
8779
1.02M
    nbNs = 0;
8780
1.02M
    nbTotalDef = 0;
8781
1.02M
    attval = 0;
8782
8783
1.02M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8784
0
        xmlErrMemory(ctxt);
8785
0
        return(NULL);
8786
0
    }
8787
8788
1.02M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8789
1.02M
    if (hlocalname.name == NULL) {
8790
350k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8791
350k
           "StartTag: invalid element name\n");
8792
350k
        return(NULL);
8793
350k
    }
8794
673k
    localname = hlocalname.name;
8795
673k
    prefix = hprefix.name;
8796
8797
    /*
8798
     * Now parse the attributes, it ends up with the ending
8799
     *
8800
     * (S Attribute)* S?
8801
     */
8802
673k
    SKIP_BLANKS;
8803
673k
    GROW;
8804
8805
    /*
8806
     * The ctxt->atts array will be ultimately passed to the SAX callback
8807
     * containing five xmlChar pointers for each attribute:
8808
     *
8809
     * [0] attribute name
8810
     * [1] attribute prefix
8811
     * [2] namespace URI
8812
     * [3] attribute value
8813
     * [4] end of attribute value
8814
     *
8815
     * To save memory, we reuse this array temporarily and store integers
8816
     * in these pointer variables.
8817
     *
8818
     * [0] attribute name
8819
     * [1] attribute prefix
8820
     * [2] hash value of attribute prefix, and later namespace index
8821
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
8822
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
8823
     *
8824
     * The ctxt->attallocs array contains an additional unsigned int for
8825
     * each attribute, containing the hash value of the attribute name
8826
     * and the alloc flag in bit 31.
8827
     */
8828
8829
796k
    while (((RAW != '>') &&
8830
796k
     ((RAW != '/') || (NXT(1) != '>')) &&
8831
796k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8832
399k
  int len = -1;
8833
8834
399k
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
8835
399k
                                          &haprefix, &attvalue, &len,
8836
399k
                                          &alloc);
8837
399k
        if (hattname.name == NULL)
8838
92.8k
      break;
8839
306k
        if (attvalue == NULL)
8840
42.9k
            goto next_attr;
8841
263k
        attname = hattname.name;
8842
263k
        aprefix = haprefix.name;
8843
263k
  if (len < 0) len = xmlStrlen(attvalue);
8844
8845
263k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8846
55.3k
            xmlHashedString huri;
8847
55.3k
            xmlURIPtr parsedUri;
8848
8849
55.3k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8850
55.3k
            uri = huri.name;
8851
55.3k
            if (uri == NULL) {
8852
7
                xmlErrMemory(ctxt);
8853
7
                goto next_attr;
8854
7
            }
8855
55.3k
            if (*uri != 0) {
8856
54.0k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8857
81
                    xmlErrMemory(ctxt);
8858
81
                    goto next_attr;
8859
81
                }
8860
53.9k
                if (parsedUri == NULL) {
8861
40.0k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8862
40.0k
                             "xmlns: '%s' is not a valid URI\n",
8863
40.0k
                                       uri, NULL, NULL);
8864
40.0k
                } else {
8865
13.9k
                    if (parsedUri->scheme == NULL) {
8866
9.59k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8867
9.59k
                                  "xmlns: URI %s is not absolute\n",
8868
9.59k
                                  uri, NULL, NULL);
8869
9.59k
                    }
8870
13.9k
                    xmlFreeURI(parsedUri);
8871
13.9k
                }
8872
53.9k
                if (uri == ctxt->str_xml_ns) {
8873
200
                    if (attname != ctxt->str_xml) {
8874
200
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8875
200
                     "xml namespace URI cannot be the default namespace\n",
8876
200
                                 NULL, NULL, NULL);
8877
200
                    }
8878
200
                    goto next_attr;
8879
200
                }
8880
53.7k
                if ((len == 29) &&
8881
53.7k
                    (xmlStrEqual(uri,
8882
775
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8883
406
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8884
406
                         "reuse of the xmlns namespace name is forbidden\n",
8885
406
                             NULL, NULL, NULL);
8886
406
                    goto next_attr;
8887
406
                }
8888
53.7k
            }
8889
8890
54.6k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
8891
44.3k
                nbNs++;
8892
208k
        } else if (aprefix == ctxt->str_xmlns) {
8893
19.2k
            xmlHashedString huri;
8894
19.2k
            xmlURIPtr parsedUri;
8895
8896
19.2k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8897
19.2k
            uri = huri.name;
8898
19.2k
            if (uri == NULL) {
8899
3
                xmlErrMemory(ctxt);
8900
3
                goto next_attr;
8901
3
            }
8902
8903
19.2k
            if (attname == ctxt->str_xml) {
8904
198
                if (uri != ctxt->str_xml_ns) {
8905
198
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8906
198
                             "xml namespace prefix mapped to wrong URI\n",
8907
198
                             NULL, NULL, NULL);
8908
198
                }
8909
                /*
8910
                 * Do not keep a namespace definition node
8911
                 */
8912
198
                goto next_attr;
8913
198
            }
8914
19.0k
            if (uri == ctxt->str_xml_ns) {
8915
194
                if (attname != ctxt->str_xml) {
8916
194
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8917
194
                             "xml namespace URI mapped to wrong prefix\n",
8918
194
                             NULL, NULL, NULL);
8919
194
                }
8920
194
                goto next_attr;
8921
194
            }
8922
18.8k
            if (attname == ctxt->str_xmlns) {
8923
195
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8924
195
                         "redefinition of the xmlns prefix is forbidden\n",
8925
195
                         NULL, NULL, NULL);
8926
195
                goto next_attr;
8927
195
            }
8928
18.6k
            if ((len == 29) &&
8929
18.6k
                (xmlStrEqual(uri,
8930
951
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8931
358
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8932
358
                         "reuse of the xmlns namespace name is forbidden\n",
8933
358
                         NULL, NULL, NULL);
8934
358
                goto next_attr;
8935
358
            }
8936
18.3k
            if ((uri == NULL) || (uri[0] == 0)) {
8937
246
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8938
246
                         "xmlns:%s: Empty XML namespace is not allowed\n",
8939
246
                              attname, NULL, NULL);
8940
246
                goto next_attr;
8941
18.0k
            } else {
8942
18.0k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8943
5
                    xmlErrMemory(ctxt);
8944
5
                    goto next_attr;
8945
5
                }
8946
18.0k
                if (parsedUri == NULL) {
8947
6.68k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8948
6.68k
                         "xmlns:%s: '%s' is not a valid URI\n",
8949
6.68k
                                       attname, uri, NULL);
8950
11.3k
                } else {
8951
11.3k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
8952
1.54k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8953
1.54k
                                  "xmlns:%s: URI %s is not absolute\n",
8954
1.54k
                                  attname, uri, NULL);
8955
1.54k
                    }
8956
11.3k
                    xmlFreeURI(parsedUri);
8957
11.3k
                }
8958
18.0k
            }
8959
8960
18.0k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
8961
16.2k
                nbNs++;
8962
189k
        } else {
8963
            /*
8964
             * Populate attributes array, see above for repurposing
8965
             * of xmlChar pointers.
8966
             */
8967
189k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8968
11.0k
                int res = xmlCtxtGrowAttrs(ctxt);
8969
8970
11.0k
                maxatts = ctxt->maxatts;
8971
11.0k
                atts = ctxt->atts;
8972
8973
11.0k
                if (res < 0)
8974
15
                    goto next_attr;
8975
11.0k
            }
8976
189k
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
8977
189k
                                        ((unsigned) alloc << 31);
8978
189k
            atts[nbatts++] = attname;
8979
189k
            atts[nbatts++] = aprefix;
8980
189k
            atts[nbatts++] = XML_INT_TO_PTR(haprefix.hashValue);
8981
189k
            if (alloc) {
8982
34.3k
                atts[nbatts++] = attvalue;
8983
34.3k
                attvalue += len;
8984
34.3k
                atts[nbatts++] = attvalue;
8985
155k
            } else {
8986
                /*
8987
                 * attvalue points into the input buffer which can be
8988
                 * reallocated. Store differences to input->base instead.
8989
                 * The pointers will be reconstructed later.
8990
                 */
8991
155k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8992
155k
                attvalue += len;
8993
155k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8994
155k
            }
8995
            /*
8996
             * tag if some deallocation is needed
8997
             */
8998
189k
            if (alloc != 0) attval = 1;
8999
189k
            attvalue = NULL; /* moved into atts */
9000
189k
        }
9001
9002
306k
next_attr:
9003
306k
        if ((attvalue != NULL) && (alloc != 0)) {
9004
27.8k
            xmlFree(attvalue);
9005
27.8k
            attvalue = NULL;
9006
27.8k
        }
9007
9008
306k
  GROW
9009
306k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9010
94.6k
      break;
9011
212k
  if (SKIP_BLANKS == 0) {
9012
89.1k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9013
89.1k
         "attributes construct error\n");
9014
89.1k
      break;
9015
89.1k
  }
9016
123k
        GROW;
9017
123k
    }
9018
9019
    /*
9020
     * Namespaces from default attributes
9021
     */
9022
673k
    if (ctxt->attsDefault != NULL) {
9023
135k
        xmlDefAttrsPtr defaults;
9024
9025
135k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9026
135k
  if (defaults != NULL) {
9027
576k
      for (i = 0; i < defaults->nbAttrs; i++) {
9028
466k
                xmlDefAttr *attr = &defaults->attrs[i];
9029
9030
466k
          attname = attr->name.name;
9031
466k
    aprefix = attr->prefix.name;
9032
9033
466k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9034
8.43k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9035
9036
8.43k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9037
7.61k
                        nbNs++;
9038
458k
    } else if (aprefix == ctxt->str_xmlns) {
9039
274k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9040
9041
274k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9042
274k
                                      NULL, 1) > 0)
9043
262k
                        nbNs++;
9044
274k
    } else {
9045
184k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9046
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9047
0
                                    "Maximum number of attributes exceeded");
9048
0
                        break;
9049
0
                    }
9050
184k
                    nbTotalDef += 1;
9051
184k
                }
9052
466k
      }
9053
109k
  }
9054
135k
    }
9055
9056
    /*
9057
     * Resolve attribute namespaces
9058
     */
9059
862k
    for (i = 0; i < nbatts; i += 5) {
9060
189k
        attname = atts[i];
9061
189k
        aprefix = atts[i+1];
9062
9063
        /*
9064
  * The default namespace does not apply to attribute names.
9065
  */
9066
189k
  if (aprefix == NULL) {
9067
135k
            nsIndex = NS_INDEX_EMPTY;
9068
135k
        } else if (aprefix == ctxt->str_xml) {
9069
20.8k
            nsIndex = NS_INDEX_XML;
9070
32.8k
        } else {
9071
32.8k
            haprefix.name = aprefix;
9072
32.8k
            haprefix.hashValue = (size_t) atts[i+2];
9073
32.8k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9074
9075
32.8k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9076
17.4k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9077
17.4k
        "Namespace prefix %s for %s on %s is not defined\n",
9078
17.4k
        aprefix, attname, localname);
9079
17.4k
                nsIndex = NS_INDEX_EMPTY;
9080
17.4k
            }
9081
32.8k
        }
9082
9083
189k
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9084
189k
    }
9085
9086
    /*
9087
     * Maximum number of attributes including default attributes.
9088
     */
9089
673k
    maxAtts = nratts + nbTotalDef;
9090
9091
    /*
9092
     * Verify that attribute names are unique.
9093
     */
9094
673k
    if (maxAtts > 1) {
9095
47.6k
        attrHashSize = 4;
9096
63.7k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9097
16.1k
            attrHashSize *= 2;
9098
9099
47.6k
        if (attrHashSize > ctxt->attrHashMax) {
9100
2.29k
            xmlAttrHashBucket *tmp;
9101
9102
2.29k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9103
2.29k
            if (tmp == NULL) {
9104
3
                xmlErrMemory(ctxt);
9105
3
                goto done;
9106
3
            }
9107
9108
2.29k
            ctxt->attrHash = tmp;
9109
2.29k
            ctxt->attrHashMax = attrHashSize;
9110
2.29k
        }
9111
9112
47.6k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9113
9114
117k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9115
70.2k
            const xmlChar *nsuri;
9116
70.2k
            unsigned hashValue, nameHashValue, uriHashValue;
9117
70.2k
            int res;
9118
9119
70.2k
            attname = atts[i];
9120
70.2k
            aprefix = atts[i+1];
9121
70.2k
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9122
            /* Hash values always have bit 31 set, see dict.c */
9123
70.2k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9124
9125
70.2k
            if (nsIndex == NS_INDEX_EMPTY) {
9126
                /*
9127
                 * Prefix with empty namespace means an undeclared
9128
                 * prefix which was already reported above.
9129
                 */
9130
53.6k
                if (aprefix != NULL)
9131
16.0k
                    continue;
9132
37.6k
                nsuri = NULL;
9133
37.6k
                uriHashValue = URI_HASH_EMPTY;
9134
37.6k
            } else if (nsIndex == NS_INDEX_XML) {
9135
3.55k
                nsuri = ctxt->str_xml_ns;
9136
3.55k
                uriHashValue = URI_HASH_XML;
9137
12.9k
            } else {
9138
12.9k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9139
12.9k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9140
12.9k
            }
9141
9142
54.2k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9143
54.2k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9144
54.2k
                                    hashValue, i);
9145
54.2k
            if (res < 0)
9146
0
                continue;
9147
9148
            /*
9149
             * [ WFC: Unique Att Spec ]
9150
             * No attribute name may appear more than once in the same
9151
             * start-tag or empty-element tag.
9152
             * As extended by the Namespace in XML REC.
9153
             */
9154
54.2k
            if (res < INT_MAX) {
9155
17.3k
                if (aprefix == atts[res+1]) {
9156
15.3k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9157
15.3k
                    numDupErr += 1;
9158
15.3k
                } else {
9159
2.08k
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9160
2.08k
                             "Namespaced Attribute %s in '%s' redefined\n",
9161
2.08k
                             attname, nsuri, NULL);
9162
2.08k
                    numNsErr += 1;
9163
2.08k
                }
9164
17.3k
            }
9165
54.2k
        }
9166
47.6k
    }
9167
9168
    /*
9169
     * Default attributes
9170
     */
9171
673k
    if (ctxt->attsDefault != NULL) {
9172
135k
        xmlDefAttrsPtr defaults;
9173
9174
135k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9175
135k
  if (defaults != NULL) {
9176
576k
      for (i = 0; i < defaults->nbAttrs; i++) {
9177
466k
                xmlDefAttr *attr = &defaults->attrs[i];
9178
466k
                const xmlChar *nsuri = NULL;
9179
466k
                unsigned hashValue, uriHashValue = 0;
9180
466k
                int res;
9181
9182
466k
          attname = attr->name.name;
9183
466k
    aprefix = attr->prefix.name;
9184
9185
466k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9186
8.43k
                    continue;
9187
458k
    if (aprefix == ctxt->str_xmlns)
9188
274k
                    continue;
9189
9190
184k
                if (aprefix == NULL) {
9191
111k
                    nsIndex = NS_INDEX_EMPTY;
9192
111k
                    nsuri = NULL;
9193
111k
                    uriHashValue = URI_HASH_EMPTY;
9194
111k
                } else if (aprefix == ctxt->str_xml) {
9195
21.1k
                    nsIndex = NS_INDEX_XML;
9196
21.1k
                    nsuri = ctxt->str_xml_ns;
9197
21.1k
                    uriHashValue = URI_HASH_XML;
9198
52.1k
                } else {
9199
52.1k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9200
52.1k
                    if ((nsIndex == INT_MAX) ||
9201
52.1k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9202
46.0k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9203
46.0k
                                 "Namespace prefix %s for %s on %s is not "
9204
46.0k
                                 "defined\n",
9205
46.0k
                                 aprefix, attname, localname);
9206
46.0k
                        nsIndex = NS_INDEX_EMPTY;
9207
46.0k
                        nsuri = NULL;
9208
46.0k
                        uriHashValue = URI_HASH_EMPTY;
9209
46.0k
                    } else {
9210
6.12k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9211
6.12k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9212
6.12k
                    }
9213
52.1k
                }
9214
9215
                /*
9216
                 * Check whether the attribute exists
9217
                 */
9218
184k
                if (maxAtts > 1) {
9219
145k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9220
145k
                                                   uriHashValue);
9221
145k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9222
145k
                                            hashValue, nbatts);
9223
145k
                    if (res < 0)
9224
0
                        continue;
9225
145k
                    if (res < INT_MAX) {
9226
4.33k
                        if (aprefix == atts[res+1])
9227
3.15k
                            continue;
9228
1.18k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9229
1.18k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9230
1.18k
                                 attname, nsuri, NULL);
9231
1.18k
                    }
9232
145k
                }
9233
9234
181k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9235
9236
181k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9237
2.58k
                    res = xmlCtxtGrowAttrs(ctxt);
9238
9239
2.58k
                    maxatts = ctxt->maxatts;
9240
2.58k
                    atts = ctxt->atts;
9241
9242
2.58k
                    if (res < 0) {
9243
6
                        localname = NULL;
9244
6
                        goto done;
9245
6
                    }
9246
2.58k
                }
9247
9248
181k
                atts[nbatts++] = attname;
9249
181k
                atts[nbatts++] = aprefix;
9250
181k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9251
181k
                atts[nbatts++] = attr->value.name;
9252
181k
                atts[nbatts++] = attr->valueEnd;
9253
9254
181k
#ifdef LIBXML_VALID_ENABLED
9255
                /*
9256
                 * This should be moved to valid.c, but we don't keep track
9257
                 * whether an attribute was defaulted.
9258
                 */
9259
181k
                if ((ctxt->validate) &&
9260
181k
                    (ctxt->standalone == 1) &&
9261
181k
                    (attr->external != 0)) {
9262
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9263
0
                            "standalone: attribute %s on %s defaulted "
9264
0
                            "from external subset\n",
9265
0
                            attname, localname);
9266
0
                }
9267
181k
#endif
9268
181k
                nbdef++;
9269
181k
      }
9270
109k
  }
9271
135k
    }
9272
9273
    /*
9274
     * Using a single hash table for nsUri/localName pairs cannot
9275
     * detect duplicate QNames reliably. The following example will
9276
     * only result in two namespace errors.
9277
     *
9278
     * <doc xmlns:a="a" xmlns:b="a">
9279
     *   <elem a:a="" b:a="" b:a=""/>
9280
     * </doc>
9281
     *
9282
     * If we saw more than one namespace error but no duplicate QNames
9283
     * were found, we have to scan for duplicate QNames.
9284
     */
9285
673k
    if ((numDupErr == 0) && (numNsErr > 1)) {
9286
725
        memset(ctxt->attrHash, -1,
9287
725
               attrHashSize * sizeof(ctxt->attrHash[0]));
9288
9289
4.92k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9290
4.19k
            unsigned hashValue, nameHashValue, prefixHashValue;
9291
4.19k
            int res;
9292
9293
4.19k
            aprefix = atts[i+1];
9294
4.19k
            if (aprefix == NULL)
9295
284
                continue;
9296
9297
3.91k
            attname = atts[i];
9298
            /* Hash values always have bit 31 set, see dict.c */
9299
3.91k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9300
3.91k
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9301
9302
3.91k
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9303
3.91k
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9304
3.91k
                                         aprefix, hashValue, i);
9305
3.91k
            if (res < INT_MAX)
9306
1.16k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9307
3.91k
        }
9308
725
    }
9309
9310
    /*
9311
     * Reconstruct attribute pointers
9312
     */
9313
1.04M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9314
        /* namespace URI */
9315
370k
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9316
370k
        if (nsIndex == INT_MAX)
9317
307k
            atts[i+2] = NULL;
9318
63.0k
        else if (nsIndex == INT_MAX - 1)
9319
41.7k
            atts[i+2] = ctxt->str_xml_ns;
9320
21.3k
        else
9321
21.3k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9322
9323
370k
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9324
155k
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9325
155k
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9326
155k
        }
9327
370k
    }
9328
9329
673k
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9330
673k
    if ((prefix != NULL) && (uri == NULL)) {
9331
39.6k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9332
39.6k
           "Namespace prefix %s on %s is not defined\n",
9333
39.6k
     prefix, localname, NULL);
9334
39.6k
    }
9335
673k
    *pref = prefix;
9336
673k
    *URI = uri;
9337
9338
    /*
9339
     * SAX callback
9340
     */
9341
673k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9342
673k
  (!ctxt->disableSAX)) {
9343
601k
  if (nbNs > 0)
9344
85.8k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9345
85.8k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9346
85.8k
        nbatts / 5, nbdef, atts);
9347
515k
  else
9348
515k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9349
515k
                          0, NULL, nbatts / 5, nbdef, atts);
9350
601k
    }
9351
9352
673k
done:
9353
    /*
9354
     * Free allocated attribute values
9355
     */
9356
673k
    if (attval != 0) {
9357
77.9k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9358
46.2k
      if (ctxt->attallocs[j] & 0x80000000)
9359
34.3k
          xmlFree((xmlChar *) atts[i+3]);
9360
31.7k
    }
9361
9362
673k
    *nbNsPtr = nbNs;
9363
673k
    return(localname);
9364
673k
}
9365
9366
/**
9367
 * Parse an end tag. Always consumes '</'.
9368
 *
9369
 *     [42] ETag ::= '</' Name S? '>'
9370
 *
9371
 * With namespace
9372
 *
9373
 *     [NS 9] ETag ::= '</' QName S? '>'
9374
 * @param ctxt  an XML parser context
9375
 * @param tag  the corresponding start tag
9376
 */
9377
9378
static void
9379
101k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9380
101k
    const xmlChar *name;
9381
9382
101k
    GROW;
9383
101k
    if ((RAW != '<') || (NXT(1) != '/')) {
9384
1.13k
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9385
1.13k
  return;
9386
1.13k
    }
9387
99.9k
    SKIP(2);
9388
9389
99.9k
    if (tag->prefix == NULL)
9390
68.3k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9391
31.5k
    else
9392
31.5k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9393
9394
    /*
9395
     * We should definitely be at the ending "S? '>'" part
9396
     */
9397
99.9k
    GROW;
9398
99.9k
    SKIP_BLANKS;
9399
99.9k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9400
46.4k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9401
46.4k
    } else
9402
53.5k
  NEXT1;
9403
9404
    /*
9405
     * [ WFC: Element Type Match ]
9406
     * The Name in an element's end-tag must match the element type in the
9407
     * start-tag.
9408
     *
9409
     */
9410
99.9k
    if (name != (xmlChar*)1) {
9411
57.2k
        if (name == NULL) name = BAD_CAST "unparsable";
9412
57.2k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9413
57.2k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9414
57.2k
                    ctxt->name, tag->line, name);
9415
57.2k
    }
9416
9417
    /*
9418
     * SAX: End of Tag
9419
     */
9420
99.9k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9421
99.9k
  (!ctxt->disableSAX))
9422
92.6k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9423
92.6k
                                tag->URI);
9424
9425
99.9k
    spacePop(ctxt);
9426
99.9k
    if (tag->nsNr != 0)
9427
4.48k
  xmlParserNsPop(ctxt, tag->nsNr);
9428
99.9k
}
9429
9430
/**
9431
 * Parse escaped pure raw content. Always consumes '<!['.
9432
 *
9433
 * @deprecated Internal function, don't use.
9434
 *
9435
 *     [18] CDSect ::= CDStart CData CDEnd
9436
 *
9437
 *     [19] CDStart ::= '<![CDATA['
9438
 *
9439
 *     [20] Data ::= (Char* - (Char* ']]>' Char*))
9440
 *
9441
 *     [21] CDEnd ::= ']]>'
9442
 * @param ctxt  an XML parser context
9443
 */
9444
void
9445
26.2k
xmlParseCDSect(xmlParserCtxt *ctxt) {
9446
26.2k
    xmlChar *buf = NULL;
9447
26.2k
    int len = 0;
9448
26.2k
    int size = XML_PARSER_BUFFER_SIZE;
9449
26.2k
    int r, rl;
9450
26.2k
    int s, sl;
9451
26.2k
    int cur, l;
9452
26.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9453
11.8k
                    XML_MAX_HUGE_LENGTH :
9454
26.2k
                    XML_MAX_TEXT_LENGTH;
9455
9456
26.2k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9457
0
        return;
9458
26.2k
    SKIP(3);
9459
9460
26.2k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9461
0
        return;
9462
26.2k
    SKIP(6);
9463
9464
26.2k
    r = xmlCurrentCharRecover(ctxt, &rl);
9465
26.2k
    if (!IS_CHAR(r)) {
9466
575
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9467
575
        goto out;
9468
575
    }
9469
25.6k
    NEXTL(rl);
9470
25.6k
    s = xmlCurrentCharRecover(ctxt, &sl);
9471
25.6k
    if (!IS_CHAR(s)) {
9472
644
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9473
644
        goto out;
9474
644
    }
9475
25.0k
    NEXTL(sl);
9476
25.0k
    cur = xmlCurrentCharRecover(ctxt, &l);
9477
25.0k
    buf = xmlMalloc(size);
9478
25.0k
    if (buf == NULL) {
9479
4
  xmlErrMemory(ctxt);
9480
4
        goto out;
9481
4
    }
9482
4.74M
    while (IS_CHAR(cur) &&
9483
4.74M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9484
4.72M
  if (len + 5 >= size) {
9485
5.24k
      xmlChar *tmp;
9486
5.24k
            int newSize;
9487
9488
5.24k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9489
5.24k
            if (newSize < 0) {
9490
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9491
0
                               "CData section too big found\n");
9492
0
                goto out;
9493
0
            }
9494
5.24k
      tmp = xmlRealloc(buf, newSize);
9495
5.24k
      if (tmp == NULL) {
9496
8
    xmlErrMemory(ctxt);
9497
8
                goto out;
9498
8
      }
9499
5.23k
      buf = tmp;
9500
5.23k
      size = newSize;
9501
5.23k
  }
9502
4.72M
  COPY_BUF(buf, len, r);
9503
4.72M
  r = s;
9504
4.72M
  rl = sl;
9505
4.72M
  s = cur;
9506
4.72M
  sl = l;
9507
4.72M
  NEXTL(l);
9508
4.72M
  cur = xmlCurrentCharRecover(ctxt, &l);
9509
4.72M
    }
9510
25.0k
    buf[len] = 0;
9511
25.0k
    if (cur != '>') {
9512
5.56k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9513
5.56k
                       "CData section not finished\n%.50s\n", buf);
9514
5.56k
        goto out;
9515
5.56k
    }
9516
19.4k
    NEXTL(l);
9517
9518
    /*
9519
     * OK the buffer is to be consumed as cdata.
9520
     */
9521
19.4k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9522
16.6k
        if ((ctxt->sax->cdataBlock != NULL) &&
9523
16.6k
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9524
14.3k
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9525
14.3k
        } else if (ctxt->sax->characters != NULL) {
9526
2.29k
            ctxt->sax->characters(ctxt->userData, buf, len);
9527
2.29k
        }
9528
16.6k
    }
9529
9530
26.2k
out:
9531
26.2k
    xmlFree(buf);
9532
26.2k
}
9533
9534
/**
9535
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9536
 * unexpected EOF to the caller.
9537
 *
9538
 * @param ctxt  an XML parser context
9539
 */
9540
9541
static void
9542
82.7k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9543
82.7k
    int oldNameNr = ctxt->nameNr;
9544
82.7k
    int oldSpaceNr = ctxt->spaceNr;
9545
82.7k
    int oldNodeNr = ctxt->nodeNr;
9546
9547
82.7k
    GROW;
9548
4.61M
    while ((ctxt->input->cur < ctxt->input->end) &&
9549
4.61M
     (PARSER_STOPPED(ctxt) == 0)) {
9550
4.53M
  const xmlChar *cur = ctxt->input->cur;
9551
9552
  /*
9553
   * First case : a Processing Instruction.
9554
   */
9555
4.53M
  if ((*cur == '<') && (cur[1] == '?')) {
9556
25.3k
      xmlParsePI(ctxt);
9557
25.3k
  }
9558
9559
  /*
9560
   * Second case : a CDSection
9561
   */
9562
  /* 2.6.0 test was *cur not RAW */
9563
4.50M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9564
24.1k
      xmlParseCDSect(ctxt);
9565
24.1k
  }
9566
9567
  /*
9568
   * Third case :  a comment
9569
   */
9570
4.48M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9571
4.48M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9572
38.0k
      xmlParseComment(ctxt);
9573
38.0k
  }
9574
9575
  /*
9576
   * Fourth case :  a sub-element.
9577
   */
9578
4.44M
  else if (*cur == '<') {
9579
985k
            if (NXT(1) == '/') {
9580
93.7k
                if (ctxt->nameNr <= oldNameNr)
9581
386
                    break;
9582
93.3k
          xmlParseElementEnd(ctxt);
9583
891k
            } else {
9584
891k
          xmlParseElementStart(ctxt);
9585
891k
            }
9586
985k
  }
9587
9588
  /*
9589
   * Fifth case : a reference. If if has not been resolved,
9590
   *    parsing returns it's Name, create the node
9591
   */
9592
9593
3.45M
  else if (*cur == '&') {
9594
225k
      xmlParseReference(ctxt);
9595
225k
  }
9596
9597
  /*
9598
   * Last case, text. Note that References are handled directly.
9599
   */
9600
3.23M
  else {
9601
3.23M
      xmlParseCharDataInternal(ctxt, 0);
9602
3.23M
  }
9603
9604
4.53M
  SHRINK;
9605
4.53M
  GROW;
9606
4.53M
    }
9607
9608
82.7k
    if ((ctxt->nameNr > oldNameNr) &&
9609
82.7k
        (ctxt->input->cur >= ctxt->input->end) &&
9610
82.7k
        (ctxt->wellFormed)) {
9611
123
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9612
123
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9613
123
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9614
123
                "Premature end of data in tag %s line %d\n",
9615
123
                name, line, NULL);
9616
123
    }
9617
9618
    /*
9619
     * Clean up in error case
9620
     */
9621
9622
265k
    while (ctxt->nodeNr > oldNodeNr)
9623
182k
        nodePop(ctxt);
9624
9625
304k
    while (ctxt->nameNr > oldNameNr) {
9626
221k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9627
9628
221k
        if (tag->nsNr != 0)
9629
60.2k
            xmlParserNsPop(ctxt, tag->nsNr);
9630
9631
221k
        namePop(ctxt);
9632
221k
    }
9633
9634
304k
    while (ctxt->spaceNr > oldSpaceNr)
9635
222k
        spacePop(ctxt);
9636
82.7k
}
9637
9638
/**
9639
 * Parse XML element content. This is useful if you're only interested
9640
 * in custom SAX callbacks. If you want a node list, use
9641
 * #xmlCtxtParseContent.
9642
 *
9643
 * @param ctxt  an XML parser context
9644
 */
9645
void
9646
0
xmlParseContent(xmlParserCtxt *ctxt) {
9647
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9648
0
        return;
9649
9650
0
    xmlCtxtInitializeLate(ctxt);
9651
9652
0
    xmlParseContentInternal(ctxt);
9653
9654
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9655
0
}
9656
9657
/**
9658
 * Parse an XML element
9659
 *
9660
 * @deprecated Internal function, don't use.
9661
 *
9662
 *     [39] element ::= EmptyElemTag | STag content ETag
9663
 *
9664
 * [ WFC: Element Type Match ]
9665
 * The Name in an element's end-tag must match the element type in the
9666
 * start-tag.
9667
 *
9668
 * @param ctxt  an XML parser context
9669
 */
9670
9671
void
9672
15.3k
xmlParseElement(xmlParserCtxt *ctxt) {
9673
15.3k
    if (xmlParseElementStart(ctxt) != 0)
9674
5.71k
        return;
9675
9676
9.67k
    xmlParseContentInternal(ctxt);
9677
9678
9.67k
    if (ctxt->input->cur >= ctxt->input->end) {
9679
8.11k
        if (ctxt->wellFormed) {
9680
349
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9681
349
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9682
349
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9683
349
                    "Premature end of data in tag %s line %d\n",
9684
349
                    name, line, NULL);
9685
349
        }
9686
8.11k
        return;
9687
8.11k
    }
9688
9689
1.56k
    xmlParseElementEnd(ctxt);
9690
1.56k
}
9691
9692
/**
9693
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9694
 * opening tag was parsed, 1 if an empty element was parsed.
9695
 *
9696
 * Always consumes '<'.
9697
 *
9698
 * @param ctxt  an XML parser context
9699
 */
9700
static int
9701
906k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9702
906k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9703
906k
    const xmlChar *name;
9704
906k
    const xmlChar *prefix = NULL;
9705
906k
    const xmlChar *URI = NULL;
9706
906k
    xmlParserNodeInfo node_info;
9707
906k
    int line;
9708
906k
    xmlNodePtr cur;
9709
906k
    int nbNs = 0;
9710
9711
906k
    if (ctxt->nameNr > maxDepth) {
9712
12
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9713
12
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9714
12
                ctxt->nameNr);
9715
12
  return(-1);
9716
12
    }
9717
9718
    /* Capture start position */
9719
906k
    if (ctxt->record_info) {
9720
0
        node_info.begin_pos = ctxt->input->consumed +
9721
0
                          (CUR_PTR - ctxt->input->base);
9722
0
  node_info.begin_line = ctxt->input->line;
9723
0
    }
9724
9725
906k
    if (ctxt->spaceNr == 0)
9726
15.3k
  spacePush(ctxt, -1);
9727
891k
    else if (*ctxt->space == -2)
9728
187k
  spacePush(ctxt, -1);
9729
703k
    else
9730
703k
  spacePush(ctxt, *ctxt->space);
9731
9732
906k
    line = ctxt->input->line;
9733
906k
#ifdef LIBXML_SAX1_ENABLED
9734
906k
    if (ctxt->sax2)
9735
906k
#endif /* LIBXML_SAX1_ENABLED */
9736
906k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9737
0
#ifdef LIBXML_SAX1_ENABLED
9738
0
    else
9739
0
  name = xmlParseStartTag(ctxt);
9740
906k
#endif /* LIBXML_SAX1_ENABLED */
9741
906k
    if (name == NULL) {
9742
346k
  spacePop(ctxt);
9743
346k
        return(-1);
9744
346k
    }
9745
559k
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9746
559k
    cur = ctxt->node;
9747
9748
559k
#ifdef LIBXML_VALID_ENABLED
9749
    /*
9750
     * [ VC: Root Element Type ]
9751
     * The Name in the document type declaration must match the element
9752
     * type of the root element.
9753
     */
9754
559k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9755
559k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9756
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9757
559k
#endif /* LIBXML_VALID_ENABLED */
9758
9759
    /*
9760
     * Check for an Empty Element.
9761
     */
9762
559k
    if ((RAW == '/') && (NXT(1) == '>')) {
9763
68.2k
        SKIP(2);
9764
68.2k
  if (ctxt->sax2) {
9765
68.2k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9766
68.2k
    (!ctxt->disableSAX))
9767
60.2k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9768
68.2k
#ifdef LIBXML_SAX1_ENABLED
9769
68.2k
  } else {
9770
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9771
0
    (!ctxt->disableSAX))
9772
0
    ctxt->sax->endElement(ctxt->userData, name);
9773
0
#endif /* LIBXML_SAX1_ENABLED */
9774
0
  }
9775
68.2k
  namePop(ctxt);
9776
68.2k
  spacePop(ctxt);
9777
68.2k
  if (nbNs > 0)
9778
3.59k
      xmlParserNsPop(ctxt, nbNs);
9779
68.2k
  if (cur != NULL && ctxt->record_info) {
9780
0
            node_info.node = cur;
9781
0
            node_info.end_pos = ctxt->input->consumed +
9782
0
                                (CUR_PTR - ctxt->input->base);
9783
0
            node_info.end_line = ctxt->input->line;
9784
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9785
0
  }
9786
68.2k
  return(1);
9787
68.2k
    }
9788
491k
    if (RAW == '>') {
9789
325k
        NEXT1;
9790
325k
        if (cur != NULL && ctxt->record_info) {
9791
0
            node_info.node = cur;
9792
0
            node_info.end_pos = 0;
9793
0
            node_info.end_line = 0;
9794
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9795
0
        }
9796
325k
    } else {
9797
166k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9798
166k
         "Couldn't find end of Start Tag %s line %d\n",
9799
166k
                    name, line, NULL);
9800
9801
  /*
9802
   * end of parsing of this node.
9803
   */
9804
166k
  nodePop(ctxt);
9805
166k
  namePop(ctxt);
9806
166k
  spacePop(ctxt);
9807
166k
  if (nbNs > 0)
9808
20.7k
      xmlParserNsPop(ctxt, nbNs);
9809
166k
  return(-1);
9810
166k
    }
9811
9812
325k
    return(0);
9813
491k
}
9814
9815
/**
9816
 * Parse the end of an XML element. Always consumes '</'.
9817
 *
9818
 * @param ctxt  an XML parser context
9819
 */
9820
static void
9821
94.9k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9822
94.9k
    xmlNodePtr cur = ctxt->node;
9823
9824
94.9k
    if (ctxt->nameNr <= 0) {
9825
18
        if ((RAW == '<') && (NXT(1) == '/'))
9826
2
            SKIP(2);
9827
18
        return;
9828
18
    }
9829
9830
    /*
9831
     * parse the end of tag: '</' should be here.
9832
     */
9833
94.8k
    if (ctxt->sax2) {
9834
94.8k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9835
94.8k
  namePop(ctxt);
9836
94.8k
    }
9837
0
#ifdef LIBXML_SAX1_ENABLED
9838
0
    else
9839
0
  xmlParseEndTag1(ctxt, 0);
9840
94.8k
#endif /* LIBXML_SAX1_ENABLED */
9841
9842
    /*
9843
     * Capture end position
9844
     */
9845
94.8k
    if (cur != NULL && ctxt->record_info) {
9846
0
        xmlParserNodeInfoPtr node_info;
9847
9848
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9849
0
        if (node_info != NULL) {
9850
0
            node_info->end_pos = ctxt->input->consumed +
9851
0
                                 (CUR_PTR - ctxt->input->base);
9852
0
            node_info->end_line = ctxt->input->line;
9853
0
        }
9854
0
    }
9855
94.8k
}
9856
9857
/**
9858
 * Parse the XML version value.
9859
 *
9860
 * @deprecated Internal function, don't use.
9861
 *
9862
 *     [26] VersionNum ::= '1.' [0-9]+
9863
 *
9864
 * In practice allow [0-9].[0-9]+ at that level
9865
 *
9866
 * @param ctxt  an XML parser context
9867
 * @returns the string giving the XML version number, or NULL
9868
 */
9869
xmlChar *
9870
9.31k
xmlParseVersionNum(xmlParserCtxt *ctxt) {
9871
9.31k
    xmlChar *buf = NULL;
9872
9.31k
    int len = 0;
9873
9.31k
    int size = 10;
9874
9.31k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9875
4.07k
                    XML_MAX_TEXT_LENGTH :
9876
9.31k
                    XML_MAX_NAME_LENGTH;
9877
9.31k
    xmlChar cur;
9878
9879
9.31k
    buf = xmlMalloc(size);
9880
9.31k
    if (buf == NULL) {
9881
28
  xmlErrMemory(ctxt);
9882
28
  return(NULL);
9883
28
    }
9884
9.28k
    cur = CUR;
9885
9.28k
    if (!((cur >= '0') && (cur <= '9'))) {
9886
917
  xmlFree(buf);
9887
917
  return(NULL);
9888
917
    }
9889
8.37k
    buf[len++] = cur;
9890
8.37k
    NEXT;
9891
8.37k
    cur=CUR;
9892
8.37k
    if (cur != '.') {
9893
448
  xmlFree(buf);
9894
448
  return(NULL);
9895
448
    }
9896
7.92k
    buf[len++] = cur;
9897
7.92k
    NEXT;
9898
7.92k
    cur=CUR;
9899
281k
    while ((cur >= '0') && (cur <= '9')) {
9900
273k
  if (len + 1 >= size) {
9901
475
      xmlChar *tmp;
9902
475
            int newSize;
9903
9904
475
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9905
475
            if (newSize < 0) {
9906
3
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
9907
3
                xmlFree(buf);
9908
3
                return(NULL);
9909
3
            }
9910
472
      tmp = xmlRealloc(buf, newSize);
9911
472
      if (tmp == NULL) {
9912
4
    xmlErrMemory(ctxt);
9913
4
          xmlFree(buf);
9914
4
    return(NULL);
9915
4
      }
9916
468
      buf = tmp;
9917
468
            size = newSize;
9918
468
  }
9919
273k
  buf[len++] = cur;
9920
273k
  NEXT;
9921
273k
  cur=CUR;
9922
273k
    }
9923
7.91k
    buf[len] = 0;
9924
7.91k
    return(buf);
9925
7.92k
}
9926
9927
/**
9928
 * Parse the XML version.
9929
 *
9930
 * @deprecated Internal function, don't use.
9931
 *
9932
 *     [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9933
 *
9934
 *     [25] Eq ::= S? '=' S?
9935
 *
9936
 * @param ctxt  an XML parser context
9937
 * @returns the version string, e.g. "1.0"
9938
 */
9939
9940
xmlChar *
9941
31.0k
xmlParseVersionInfo(xmlParserCtxt *ctxt) {
9942
31.0k
    xmlChar *version = NULL;
9943
9944
31.0k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9945
10.4k
  SKIP(7);
9946
10.4k
  SKIP_BLANKS;
9947
10.4k
  if (RAW != '=') {
9948
674
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9949
674
      return(NULL);
9950
674
        }
9951
9.78k
  NEXT;
9952
9.78k
  SKIP_BLANKS;
9953
9.78k
  if (RAW == '"') {
9954
5.44k
      NEXT;
9955
5.44k
      version = xmlParseVersionNum(ctxt);
9956
5.44k
      if (RAW != '"') {
9957
1.08k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9958
1.08k
      } else
9959
4.36k
          NEXT;
9960
5.44k
  } else if (RAW == '\''){
9961
3.86k
      NEXT;
9962
3.86k
      version = xmlParseVersionNum(ctxt);
9963
3.86k
      if (RAW != '\'') {
9964
829
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9965
829
      } else
9966
3.03k
          NEXT;
9967
3.86k
  } else {
9968
468
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9969
468
  }
9970
9.78k
    }
9971
30.3k
    return(version);
9972
31.0k
}
9973
9974
/**
9975
 * Parse the XML encoding name
9976
 *
9977
 * @deprecated Internal function, don't use.
9978
 *
9979
 *     [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9980
 *
9981
 * @param ctxt  an XML parser context
9982
 * @returns the encoding name value or NULL
9983
 */
9984
xmlChar *
9985
10.0k
xmlParseEncName(xmlParserCtxt *ctxt) {
9986
10.0k
    xmlChar *buf = NULL;
9987
10.0k
    int len = 0;
9988
10.0k
    int size = 10;
9989
10.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9990
1.99k
                    XML_MAX_TEXT_LENGTH :
9991
10.0k
                    XML_MAX_NAME_LENGTH;
9992
10.0k
    xmlChar cur;
9993
9994
10.0k
    cur = CUR;
9995
10.0k
    if (((cur >= 'a') && (cur <= 'z')) ||
9996
10.0k
        ((cur >= 'A') && (cur <= 'Z'))) {
9997
9.65k
  buf = xmlMalloc(size);
9998
9.65k
  if (buf == NULL) {
9999
30
      xmlErrMemory(ctxt);
10000
30
      return(NULL);
10001
30
  }
10002
10003
9.62k
  buf[len++] = cur;
10004
9.62k
  NEXT;
10005
9.62k
  cur = CUR;
10006
377k
  while (((cur >= 'a') && (cur <= 'z')) ||
10007
377k
         ((cur >= 'A') && (cur <= 'Z')) ||
10008
377k
         ((cur >= '0') && (cur <= '9')) ||
10009
377k
         (cur == '.') || (cur == '_') ||
10010
377k
         (cur == '-')) {
10011
367k
      if (len + 1 >= size) {
10012
5.69k
          xmlChar *tmp;
10013
5.69k
                int newSize;
10014
10015
5.69k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10016
5.69k
                if (newSize < 0) {
10017
3
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10018
3
                    xmlFree(buf);
10019
3
                    return(NULL);
10020
3
                }
10021
5.69k
    tmp = xmlRealloc(buf, newSize);
10022
5.69k
    if (tmp == NULL) {
10023
6
        xmlErrMemory(ctxt);
10024
6
        xmlFree(buf);
10025
6
        return(NULL);
10026
6
    }
10027
5.68k
    buf = tmp;
10028
5.68k
                size = newSize;
10029
5.68k
      }
10030
367k
      buf[len++] = cur;
10031
367k
      NEXT;
10032
367k
      cur = CUR;
10033
367k
        }
10034
9.61k
  buf[len] = 0;
10035
9.61k
    } else {
10036
414
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10037
414
    }
10038
10.0k
    return(buf);
10039
10.0k
}
10040
10041
/**
10042
 * Parse the XML encoding declaration
10043
 *
10044
 * @deprecated Internal function, don't use.
10045
 *
10046
 *     [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | 
10047
 *                           "'" EncName "'")
10048
 *
10049
 * this setups the conversion filters.
10050
 *
10051
 * @param ctxt  an XML parser context
10052
 * @returns the encoding value or NULL
10053
 */
10054
10055
const xmlChar *
10056
30.1k
xmlParseEncodingDecl(xmlParserCtxt *ctxt) {
10057
30.1k
    xmlChar *encoding = NULL;
10058
10059
30.1k
    SKIP_BLANKS;
10060
30.1k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10061
19.2k
        return(NULL);
10062
10063
10.9k
    SKIP(8);
10064
10.9k
    SKIP_BLANKS;
10065
10.9k
    if (RAW != '=') {
10066
403
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10067
403
        return(NULL);
10068
403
    }
10069
10.5k
    NEXT;
10070
10.5k
    SKIP_BLANKS;
10071
10.5k
    if (RAW == '"') {
10072
5.61k
        NEXT;
10073
5.61k
        encoding = xmlParseEncName(ctxt);
10074
5.61k
        if (RAW != '"') {
10075
531
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10076
531
            xmlFree(encoding);
10077
531
            return(NULL);
10078
531
        } else
10079
5.08k
            NEXT;
10080
5.61k
    } else if (RAW == '\''){
10081
4.46k
        NEXT;
10082
4.46k
        encoding = xmlParseEncName(ctxt);
10083
4.46k
        if (RAW != '\'') {
10084
790
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10085
790
            xmlFree(encoding);
10086
790
            return(NULL);
10087
790
        } else
10088
3.67k
            NEXT;
10089
4.46k
    } else {
10090
432
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10091
432
    }
10092
10093
9.18k
    if (encoding == NULL)
10094
435
        return(NULL);
10095
10096
8.74k
    xmlSetDeclaredEncoding(ctxt, encoding);
10097
10098
8.74k
    return(ctxt->encoding);
10099
9.18k
}
10100
10101
/**
10102
 * Parse the XML standalone declaration
10103
 *
10104
 * @deprecated Internal function, don't use.
10105
 *
10106
 *     [32] SDDecl ::= S 'standalone' Eq
10107
 *                     (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10108
 *
10109
 * [ VC: Standalone Document Declaration ]
10110
 * TODO The standalone document declaration must have the value "no"
10111
 * if any external markup declarations contain declarations of:
10112
 *  - attributes with default values, if elements to which these
10113
 *    attributes apply appear in the document without specifications
10114
 *    of values for these attributes, or
10115
 *  - entities (other than amp, lt, gt, apos, quot), if references
10116
 *    to those entities appear in the document, or
10117
 *  - attributes with values subject to normalization, where the
10118
 *    attribute appears in the document with a value which will change
10119
 *    as a result of normalization, or
10120
 *  - element types with element content, if white space occurs directly
10121
 *    within any instance of those types.
10122
 *
10123
 * @param ctxt  an XML parser context
10124
 * @returns
10125
 *   1 if standalone="yes"
10126
 *   0 if standalone="no"
10127
 *  -2 if standalone attribute is missing or invalid
10128
 *    (A standalone value of -2 means that the XML declaration was found,
10129
 *     but no value was specified for the standalone attribute).
10130
 */
10131
10132
int
10133
3.45k
xmlParseSDDecl(xmlParserCtxt *ctxt) {
10134
3.45k
    int standalone = -2;
10135
10136
3.45k
    SKIP_BLANKS;
10137
3.45k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10138
176
  SKIP(10);
10139
176
        SKIP_BLANKS;
10140
176
  if (RAW != '=') {
10141
3
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10142
3
      return(standalone);
10143
3
        }
10144
173
  NEXT;
10145
173
  SKIP_BLANKS;
10146
173
        if (RAW == '\''){
10147
83
      NEXT;
10148
83
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10149
60
          standalone = 0;
10150
60
                SKIP(2);
10151
60
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10152
23
                 (NXT(2) == 's')) {
10153
6
          standalone = 1;
10154
6
    SKIP(3);
10155
17
            } else {
10156
17
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10157
17
      }
10158
83
      if (RAW != '\'') {
10159
25
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10160
25
      } else
10161
58
          NEXT;
10162
90
  } else if (RAW == '"'){
10163
84
      NEXT;
10164
84
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10165
8
          standalone = 0;
10166
8
    SKIP(2);
10167
76
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10168
76
                 (NXT(2) == 's')) {
10169
62
          standalone = 1;
10170
62
                SKIP(3);
10171
62
            } else {
10172
14
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10173
14
      }
10174
84
      if (RAW != '"') {
10175
44
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10176
44
      } else
10177
40
          NEXT;
10178
84
  } else {
10179
6
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10180
6
        }
10181
173
    }
10182
3.45k
    return(standalone);
10183
3.45k
}
10184
10185
/**
10186
 * Parse an XML declaration header
10187
 *
10188
 * @deprecated Internal function, don't use.
10189
 *
10190
 *     [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10191
 * @param ctxt  an XML parser context
10192
 */
10193
10194
void
10195
6.88k
xmlParseXMLDecl(xmlParserCtxt *ctxt) {
10196
6.88k
    xmlChar *version;
10197
10198
    /*
10199
     * This value for standalone indicates that the document has an
10200
     * XML declaration but it does not have a standalone attribute.
10201
     * It will be overwritten later if a standalone attribute is found.
10202
     */
10203
10204
6.88k
    ctxt->standalone = -2;
10205
10206
    /*
10207
     * We know that '<?xml' is here.
10208
     */
10209
6.88k
    SKIP(5);
10210
10211
6.88k
    if (!IS_BLANK_CH(RAW)) {
10212
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10213
0
                 "Blank needed after '<?xml'\n");
10214
0
    }
10215
6.88k
    SKIP_BLANKS;
10216
10217
    /*
10218
     * We must have the VersionInfo here.
10219
     */
10220
6.88k
    version = xmlParseVersionInfo(ctxt);
10221
6.88k
    if (version == NULL) {
10222
3.49k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10223
3.49k
    } else {
10224
3.38k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10225
      /*
10226
       * Changed here for XML-1.0 5th edition
10227
       */
10228
1.49k
      if (ctxt->options & XML_PARSE_OLD10) {
10229
230
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10230
230
                "Unsupported version '%s'\n",
10231
230
                version);
10232
1.26k
      } else {
10233
1.26k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10234
693
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10235
693
                      "Unsupported version '%s'\n",
10236
693
          version, NULL);
10237
693
    } else {
10238
570
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10239
570
              "Unsupported version '%s'\n",
10240
570
              version);
10241
570
    }
10242
1.26k
      }
10243
1.49k
  }
10244
3.38k
  if (ctxt->version != NULL)
10245
0
      xmlFree(ctxt->version);
10246
3.38k
  ctxt->version = version;
10247
3.38k
    }
10248
10249
    /*
10250
     * We may have the encoding declaration
10251
     */
10252
6.88k
    if (!IS_BLANK_CH(RAW)) {
10253
4.49k
        if ((RAW == '?') && (NXT(1) == '>')) {
10254
868
      SKIP(2);
10255
868
      return;
10256
868
  }
10257
3.62k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10258
3.62k
    }
10259
6.01k
    xmlParseEncodingDecl(ctxt);
10260
10261
    /*
10262
     * We may have the standalone status.
10263
     */
10264
6.01k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10265
3.48k
        if ((RAW == '?') && (NXT(1) == '>')) {
10266
2.56k
      SKIP(2);
10267
2.56k
      return;
10268
2.56k
  }
10269
921
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10270
921
    }
10271
10272
    /*
10273
     * We can grow the input buffer freely at that point
10274
     */
10275
3.45k
    GROW;
10276
10277
3.45k
    SKIP_BLANKS;
10278
3.45k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10279
10280
3.45k
    SKIP_BLANKS;
10281
3.45k
    if ((RAW == '?') && (NXT(1) == '>')) {
10282
152
        SKIP(2);
10283
3.30k
    } else if (RAW == '>') {
10284
        /* Deprecated old WD ... */
10285
224
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10286
224
  NEXT;
10287
3.07k
    } else {
10288
3.07k
        int c;
10289
10290
3.07k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10291
319k
        while ((PARSER_STOPPED(ctxt) == 0) &&
10292
319k
               ((c = CUR) != 0)) {
10293
318k
            NEXT;
10294
318k
            if (c == '>')
10295
2.16k
                break;
10296
318k
        }
10297
3.07k
    }
10298
3.45k
}
10299
10300
/**
10301
 * @since 2.14.0
10302
 *
10303
 * @param ctxt  parser context
10304
 * @returns the version from the XML declaration.
10305
 */
10306
const xmlChar *
10307
0
xmlCtxtGetVersion(xmlParserCtxt *ctxt) {
10308
0
    if (ctxt == NULL)
10309
0
        return(NULL);
10310
10311
0
    return(ctxt->version);
10312
0
}
10313
10314
/**
10315
 * @since 2.14.0
10316
 *
10317
 * @param ctxt  parser context
10318
 * @returns the value from the standalone document declaration.
10319
 */
10320
int
10321
0
xmlCtxtGetStandalone(xmlParserCtxt *ctxt) {
10322
0
    if (ctxt == NULL)
10323
0
        return(0);
10324
10325
0
    return(ctxt->standalone);
10326
0
}
10327
10328
/**
10329
 * Parse an XML Misc* optional field.
10330
 *
10331
 * @deprecated Internal function, don't use.
10332
 *
10333
 *     [27] Misc ::= Comment | PI |  S
10334
 * @param ctxt  an XML parser context
10335
 */
10336
10337
void
10338
53.6k
xmlParseMisc(xmlParserCtxt *ctxt) {
10339
59.8k
    while (PARSER_STOPPED(ctxt) == 0) {
10340
54.0k
        SKIP_BLANKS;
10341
54.0k
        GROW;
10342
54.0k
        if ((RAW == '<') && (NXT(1) == '?')) {
10343
4.44k
      xmlParsePI(ctxt);
10344
49.6k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10345
1.74k
      xmlParseComment(ctxt);
10346
47.8k
        } else {
10347
47.8k
            break;
10348
47.8k
        }
10349
54.0k
    }
10350
53.6k
}
10351
10352
static void
10353
39.0k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10354
39.0k
    xmlDocPtr doc;
10355
10356
    /*
10357
     * SAX: end of the document processing.
10358
     */
10359
39.0k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10360
39.0k
        ctxt->sax->endDocument(ctxt->userData);
10361
10362
    /*
10363
     * Remove locally kept entity definitions if the tree was not built
10364
     */
10365
39.0k
    doc = ctxt->myDoc;
10366
39.0k
    if ((doc != NULL) &&
10367
39.0k
        (xmlStrEqual(doc->version, SAX_COMPAT_MODE))) {
10368
224
        xmlFreeDoc(doc);
10369
224
        ctxt->myDoc = NULL;
10370
224
    }
10371
39.0k
}
10372
10373
/**
10374
 * Parse an XML document and invoke the SAX handlers. This is useful
10375
 * if you're only interested in custom SAX callbacks. If you want a
10376
 * document tree, use #xmlCtxtParseDocument.
10377
 *
10378
 * @param ctxt  an XML parser context
10379
 * @returns 0, -1 in case of error.
10380
 */
10381
10382
int
10383
24.7k
xmlParseDocument(xmlParserCtxt *ctxt) {
10384
24.7k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10385
0
        return(-1);
10386
10387
24.7k
    GROW;
10388
10389
    /*
10390
     * SAX: detecting the level.
10391
     */
10392
24.7k
    xmlCtxtInitializeLate(ctxt);
10393
10394
24.7k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10395
24.7k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10396
24.7k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10397
24.7k
    }
10398
10399
24.7k
    xmlDetectEncoding(ctxt);
10400
10401
24.7k
    if (CUR == 0) {
10402
278
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10403
278
  return(-1);
10404
278
    }
10405
10406
24.4k
    GROW;
10407
24.4k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10408
10409
  /*
10410
   * Note that we will switch encoding on the fly.
10411
   */
10412
3.49k
  xmlParseXMLDecl(ctxt);
10413
3.49k
  SKIP_BLANKS;
10414
21.0k
    } else {
10415
21.0k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10416
21.0k
        if (ctxt->version == NULL) {
10417
12
            xmlErrMemory(ctxt);
10418
12
            return(-1);
10419
12
        }
10420
21.0k
    }
10421
24.4k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10422
23.5k
        ctxt->sax->startDocument(ctxt->userData);
10423
24.4k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10424
24.4k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10425
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10426
0
    }
10427
10428
    /*
10429
     * The Misc part of the Prolog
10430
     */
10431
24.4k
    xmlParseMisc(ctxt);
10432
10433
    /*
10434
     * Then possibly doc type declaration(s) and more Misc
10435
     * (doctypedecl Misc*)?
10436
     */
10437
24.4k
    GROW;
10438
24.4k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10439
10440
13.8k
  ctxt->inSubset = 1;
10441
13.8k
  xmlParseDocTypeDecl(ctxt);
10442
13.8k
  if (RAW == '[') {
10443
10.9k
      xmlParseInternalSubset(ctxt);
10444
10.9k
  } else if (RAW == '>') {
10445
2.21k
            NEXT;
10446
2.21k
        }
10447
10448
  /*
10449
   * Create and update the external subset.
10450
   */
10451
13.8k
  ctxt->inSubset = 2;
10452
13.8k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10453
13.8k
      (!ctxt->disableSAX))
10454
10.5k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10455
10.5k
                                ctxt->extSubSystem, ctxt->extSubURI);
10456
13.8k
  ctxt->inSubset = 0;
10457
10458
13.8k
        xmlCleanSpecialAttr(ctxt);
10459
10460
13.8k
  xmlParseMisc(ctxt);
10461
13.8k
    }
10462
10463
    /*
10464
     * Time to start parsing the tree itself
10465
     */
10466
24.4k
    GROW;
10467
24.4k
    if (RAW != '<') {
10468
9.09k
        if (ctxt->wellFormed)
10469
931
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10470
931
                           "Start tag expected, '<' not found\n");
10471
15.3k
    } else {
10472
15.3k
  xmlParseElement(ctxt);
10473
10474
  /*
10475
   * The Misc part at the end
10476
   */
10477
15.3k
  xmlParseMisc(ctxt);
10478
10479
15.3k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10480
15.3k
    }
10481
10482
24.4k
    ctxt->instate = XML_PARSER_EOF;
10483
24.4k
    xmlFinishDocument(ctxt);
10484
10485
24.4k
    if (! ctxt->wellFormed) {
10486
24.3k
  ctxt->valid = 0;
10487
24.3k
  return(-1);
10488
24.3k
    }
10489
10490
156
    return(0);
10491
24.4k
}
10492
10493
/**
10494
 * Parse a general parsed entity
10495
 * An external general parsed entity is well-formed if it matches the
10496
 * production labeled extParsedEnt.
10497
 *
10498
 * @deprecated Internal function, don't use.
10499
 *
10500
 *     [78] extParsedEnt ::= TextDecl? content
10501
 *
10502
 * @param ctxt  an XML parser context
10503
 * @returns 0, -1 in case of error. the parser context is augmented
10504
 *                as a result of the parsing.
10505
 */
10506
10507
int
10508
0
xmlParseExtParsedEnt(xmlParserCtxt *ctxt) {
10509
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10510
0
        return(-1);
10511
10512
0
    xmlCtxtInitializeLate(ctxt);
10513
10514
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10515
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10516
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10517
0
    }
10518
10519
0
    xmlDetectEncoding(ctxt);
10520
10521
0
    if (CUR == 0) {
10522
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10523
0
    }
10524
10525
    /*
10526
     * Check for the XMLDecl in the Prolog.
10527
     */
10528
0
    GROW;
10529
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10530
10531
  /*
10532
   * Note that we will switch encoding on the fly.
10533
   */
10534
0
  xmlParseXMLDecl(ctxt);
10535
0
  SKIP_BLANKS;
10536
0
    } else {
10537
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10538
0
    }
10539
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10540
0
        ctxt->sax->startDocument(ctxt->userData);
10541
10542
    /*
10543
     * Doing validity checking on chunk doesn't make sense
10544
     */
10545
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10546
0
    ctxt->validate = 0;
10547
0
    ctxt->depth = 0;
10548
10549
0
    xmlParseContentInternal(ctxt);
10550
10551
0
    if (ctxt->input->cur < ctxt->input->end)
10552
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10553
10554
    /*
10555
     * SAX: end of the document processing.
10556
     */
10557
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10558
0
        ctxt->sax->endDocument(ctxt->userData);
10559
10560
0
    if (! ctxt->wellFormed) return(-1);
10561
0
    return(0);
10562
0
}
10563
10564
#ifdef LIBXML_PUSH_ENABLED
10565
/************************************************************************
10566
 *                  *
10567
 *    Progressive parsing interfaces        *
10568
 *                  *
10569
 ************************************************************************/
10570
10571
/**
10572
 * Check whether the input buffer contains a character.
10573
 *
10574
 * @param ctxt  an XML parser context
10575
 * @param c  character
10576
 */
10577
static int
10578
13.8k
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10579
13.8k
    const xmlChar *cur;
10580
10581
13.8k
    if (ctxt->checkIndex == 0) {
10582
10.7k
        cur = ctxt->input->cur + 1;
10583
10.7k
    } else {
10584
3.08k
        cur = ctxt->input->cur + ctxt->checkIndex;
10585
3.08k
    }
10586
10587
13.8k
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10588
3.69k
        size_t index = ctxt->input->end - ctxt->input->cur;
10589
10590
3.69k
        if (index > LONG_MAX) {
10591
0
            ctxt->checkIndex = 0;
10592
0
            return(1);
10593
0
        }
10594
3.69k
        ctxt->checkIndex = index;
10595
3.69k
        return(0);
10596
10.1k
    } else {
10597
10.1k
        ctxt->checkIndex = 0;
10598
10.1k
        return(1);
10599
10.1k
    }
10600
13.8k
}
10601
10602
/**
10603
 * Check whether the input buffer contains a string.
10604
 *
10605
 * @param ctxt  an XML parser context
10606
 * @param startDelta  delta to apply at the start
10607
 * @param str  string
10608
 * @param strLen  length of string
10609
 */
10610
static const xmlChar *
10611
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10612
95.0k
                     const char *str, size_t strLen) {
10613
95.0k
    const xmlChar *cur, *term;
10614
10615
95.0k
    if (ctxt->checkIndex == 0) {
10616
9.00k
        cur = ctxt->input->cur + startDelta;
10617
86.0k
    } else {
10618
86.0k
        cur = ctxt->input->cur + ctxt->checkIndex;
10619
86.0k
    }
10620
10621
95.0k
    term = BAD_CAST strstr((const char *) cur, str);
10622
95.0k
    if (term == NULL) {
10623
88.0k
        const xmlChar *end = ctxt->input->end;
10624
88.0k
        size_t index;
10625
10626
        /* Rescan (strLen - 1) characters. */
10627
88.0k
        if ((size_t) (end - cur) < strLen)
10628
3.54k
            end = cur;
10629
84.4k
        else
10630
84.4k
            end -= strLen - 1;
10631
88.0k
        index = end - ctxt->input->cur;
10632
88.0k
        if (index > LONG_MAX) {
10633
0
            ctxt->checkIndex = 0;
10634
0
            return(ctxt->input->end - strLen);
10635
0
        }
10636
88.0k
        ctxt->checkIndex = index;
10637
88.0k
    } else {
10638
7.01k
        ctxt->checkIndex = 0;
10639
7.01k
    }
10640
10641
95.0k
    return(term);
10642
95.0k
}
10643
10644
/**
10645
 * Check whether the input buffer contains terminated char data.
10646
 *
10647
 * @param ctxt  an XML parser context
10648
 */
10649
static int
10650
30.6k
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10651
30.6k
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10652
30.6k
    const xmlChar *end = ctxt->input->end;
10653
30.6k
    size_t index;
10654
10655
1.13M
    while (cur < end) {
10656
1.11M
        if ((*cur == '<') || (*cur == '&')) {
10657
14.9k
            ctxt->checkIndex = 0;
10658
14.9k
            return(1);
10659
14.9k
        }
10660
1.10M
        cur++;
10661
1.10M
    }
10662
10663
15.7k
    index = cur - ctxt->input->cur;
10664
15.7k
    if (index > LONG_MAX) {
10665
0
        ctxt->checkIndex = 0;
10666
0
        return(1);
10667
0
    }
10668
15.7k
    ctxt->checkIndex = index;
10669
15.7k
    return(0);
10670
15.7k
}
10671
10672
/**
10673
 * Check whether there's enough data in the input buffer to finish parsing
10674
 * a start tag. This has to take quotes into account.
10675
 *
10676
 * @param ctxt  an XML parser context
10677
 */
10678
static int
10679
133k
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10680
133k
    const xmlChar *cur;
10681
133k
    const xmlChar *end = ctxt->input->end;
10682
133k
    int state = ctxt->endCheckState;
10683
133k
    size_t index;
10684
10685
133k
    if (ctxt->checkIndex == 0)
10686
61.0k
        cur = ctxt->input->cur + 1;
10687
72.6k
    else
10688
72.6k
        cur = ctxt->input->cur + ctxt->checkIndex;
10689
10690
58.1M
    while (cur < end) {
10691
58.0M
        if (state) {
10692
51.8M
            if (*cur == state)
10693
60.5k
                state = 0;
10694
51.8M
        } else if (*cur == '\'' || *cur == '"') {
10695
64.1k
            state = *cur;
10696
6.17M
        } else if (*cur == '>') {
10697
53.8k
            ctxt->checkIndex = 0;
10698
53.8k
            ctxt->endCheckState = 0;
10699
53.8k
            return(1);
10700
53.8k
        }
10701
58.0M
        cur++;
10702
58.0M
    }
10703
10704
79.8k
    index = cur - ctxt->input->cur;
10705
79.8k
    if (index > LONG_MAX) {
10706
0
        ctxt->checkIndex = 0;
10707
0
        ctxt->endCheckState = 0;
10708
0
        return(1);
10709
0
    }
10710
79.8k
    ctxt->checkIndex = index;
10711
79.8k
    ctxt->endCheckState = state;
10712
79.8k
    return(0);
10713
79.8k
}
10714
10715
/**
10716
 * Check whether there's enough data in the input buffer to finish parsing
10717
 * the internal subset.
10718
 *
10719
 * @param ctxt  an XML parser context
10720
 */
10721
static int
10722
294k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10723
    /*
10724
     * Sorry, but progressive parsing of the internal subset is not
10725
     * supported. We first check that the full content of the internal
10726
     * subset is available and parsing is launched only at that point.
10727
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10728
     * not in a ']]>' sequence which are conditional sections.
10729
     */
10730
294k
    const xmlChar *cur, *start;
10731
294k
    const xmlChar *end = ctxt->input->end;
10732
294k
    int state = ctxt->endCheckState;
10733
294k
    size_t index;
10734
10735
294k
    if (ctxt->checkIndex == 0) {
10736
5.13k
        cur = ctxt->input->cur + 1;
10737
289k
    } else {
10738
289k
        cur = ctxt->input->cur + ctxt->checkIndex;
10739
289k
    }
10740
294k
    start = cur;
10741
10742
268M
    while (cur < end) {
10743
268M
        if (state == '-') {
10744
23.9k
            if ((*cur == '-') &&
10745
23.9k
                (cur[1] == '-') &&
10746
23.9k
                (cur[2] == '>')) {
10747
1.76k
                state = 0;
10748
1.76k
                cur += 3;
10749
1.76k
                start = cur;
10750
1.76k
                continue;
10751
1.76k
            }
10752
23.9k
        }
10753
268M
        else if (state == ']') {
10754
4.67k
            if (*cur == '>') {
10755
2.61k
                ctxt->checkIndex = 0;
10756
2.61k
                ctxt->endCheckState = 0;
10757
2.61k
                return(1);
10758
2.61k
            }
10759
2.05k
            if (IS_BLANK_CH(*cur)) {
10760
1.05k
                state = ' ';
10761
1.05k
            } else if (*cur != ']') {
10762
554
                state = 0;
10763
554
                start = cur;
10764
554
                continue;
10765
554
            }
10766
2.05k
        }
10767
268M
        else if (state == ' ') {
10768
2.28k
            if (*cur == '>') {
10769
45
                ctxt->checkIndex = 0;
10770
45
                ctxt->endCheckState = 0;
10771
45
                return(1);
10772
45
            }
10773
2.24k
            if (!IS_BLANK_CH(*cur)) {
10774
993
                state = 0;
10775
993
                start = cur;
10776
993
                continue;
10777
993
            }
10778
2.24k
        }
10779
268M
        else if (state != 0) {
10780
267M
            if (*cur == state) {
10781
15.1k
                state = 0;
10782
15.1k
                start = cur + 1;
10783
15.1k
            }
10784
267M
        }
10785
949k
        else if (*cur == '<') {
10786
22.7k
            if ((cur[1] == '!') &&
10787
22.7k
                (cur[2] == '-') &&
10788
22.7k
                (cur[3] == '-')) {
10789
1.79k
                state = '-';
10790
1.79k
                cur += 4;
10791
                /* Don't treat <!--> as comment */
10792
1.79k
                start = cur;
10793
1.79k
                continue;
10794
1.79k
            }
10795
22.7k
        }
10796
926k
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10797
19.9k
            state = *cur;
10798
19.9k
        }
10799
10800
268M
        cur++;
10801
268M
    }
10802
10803
    /*
10804
     * Rescan the three last characters to detect "<!--" and "-->"
10805
     * split across chunks.
10806
     */
10807
292k
    if ((state == 0) || (state == '-')) {
10808
10.7k
        if (cur - start < 3)
10809
1.72k
            cur = start;
10810
9.02k
        else
10811
9.02k
            cur -= 3;
10812
10.7k
    }
10813
292k
    index = cur - ctxt->input->cur;
10814
292k
    if (index > LONG_MAX) {
10815
0
        ctxt->checkIndex = 0;
10816
0
        ctxt->endCheckState = 0;
10817
0
        return(1);
10818
0
    }
10819
292k
    ctxt->checkIndex = index;
10820
292k
    ctxt->endCheckState = state;
10821
292k
    return(0);
10822
292k
}
10823
10824
/**
10825
 * Try to progress on parsing
10826
 *
10827
 * @param ctxt  an XML parser context
10828
 * @param terminate  last chunk indicator
10829
 * @returns zero if no parsing was possible
10830
 */
10831
static int
10832
539k
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10833
539k
    int ret = 0;
10834
539k
    size_t avail;
10835
539k
    xmlChar cur, next;
10836
10837
539k
    if (ctxt->input == NULL)
10838
0
        return(0);
10839
10840
539k
    if ((ctxt->input != NULL) &&
10841
539k
        (ctxt->input->cur - ctxt->input->base > 4096)) {
10842
825
        xmlParserShrink(ctxt);
10843
825
    }
10844
10845
1.09M
    while (ctxt->disableSAX == 0) {
10846
1.08M
        avail = ctxt->input->end - ctxt->input->cur;
10847
1.08M
        if (avail < 1)
10848
13.3k
      goto done;
10849
1.07M
        switch (ctxt->instate) {
10850
11.1k
            case XML_PARSER_EOF:
10851
          /*
10852
     * Document parsing is done !
10853
     */
10854
11.1k
          goto done;
10855
33.3k
            case XML_PARSER_START:
10856
                /*
10857
                 * Very first chars read from the document flow.
10858
                 */
10859
33.3k
                if ((!terminate) && (avail < 4))
10860
7.56k
                    goto done;
10861
10862
                /*
10863
                 * We need more bytes to detect EBCDIC code pages.
10864
                 * See xmlDetectEBCDIC.
10865
                 */
10866
25.7k
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
10867
25.7k
                    (!terminate) && (avail < 200))
10868
1.36k
                    goto done;
10869
10870
24.3k
                xmlDetectEncoding(ctxt);
10871
24.3k
                ctxt->instate = XML_PARSER_XML_DECL;
10872
24.3k
    break;
10873
10874
43.3k
            case XML_PARSER_XML_DECL:
10875
43.3k
    if ((!terminate) && (avail < 2))
10876
207
        goto done;
10877
43.1k
    cur = ctxt->input->cur[0];
10878
43.1k
    next = ctxt->input->cur[1];
10879
43.1k
          if ((cur == '<') && (next == '?')) {
10880
        /* PI or XML decl */
10881
23.9k
        if ((!terminate) &&
10882
23.9k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
10883
18.8k
      goto done;
10884
5.13k
        if ((ctxt->input->cur[2] == 'x') &&
10885
5.13k
      (ctxt->input->cur[3] == 'm') &&
10886
5.13k
      (ctxt->input->cur[4] == 'l') &&
10887
5.13k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
10888
3.39k
      ret += 5;
10889
3.39k
      xmlParseXMLDecl(ctxt);
10890
3.39k
        } else {
10891
1.73k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10892
1.73k
                        if (ctxt->version == NULL) {
10893
4
                            xmlErrMemory(ctxt);
10894
4
                            break;
10895
4
                        }
10896
1.73k
        }
10897
19.1k
    } else {
10898
19.1k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10899
19.1k
        if (ctxt->version == NULL) {
10900
36
            xmlErrMemory(ctxt);
10901
36
      break;
10902
36
        }
10903
19.1k
    }
10904
24.2k
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10905
24.2k
                    ctxt->sax->setDocumentLocator(ctxt->userData,
10906
24.2k
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
10907
24.2k
                }
10908
24.2k
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10909
24.2k
                    (!ctxt->disableSAX))
10910
23.6k
                    ctxt->sax->startDocument(ctxt->userData);
10911
24.2k
                ctxt->instate = XML_PARSER_MISC;
10912
24.2k
    break;
10913
160k
            case XML_PARSER_START_TAG: {
10914
160k
          const xmlChar *name;
10915
160k
    const xmlChar *prefix = NULL;
10916
160k
    const xmlChar *URI = NULL;
10917
160k
                int line = ctxt->input->line;
10918
160k
    int nbNs = 0;
10919
10920
160k
    if ((!terminate) && (avail < 2))
10921
26
        goto done;
10922
160k
    cur = ctxt->input->cur[0];
10923
160k
          if (cur != '<') {
10924
2.57k
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10925
2.57k
                                   "Start tag expected, '<' not found");
10926
2.57k
                    ctxt->instate = XML_PARSER_EOF;
10927
2.57k
                    xmlFinishDocument(ctxt);
10928
2.57k
        goto done;
10929
2.57k
    }
10930
157k
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
10931
41.3k
                    goto done;
10932
116k
    if (ctxt->spaceNr == 0)
10933
0
        spacePush(ctxt, -1);
10934
116k
    else if (*ctxt->space == -2)
10935
23.6k
        spacePush(ctxt, -1);
10936
92.7k
    else
10937
92.7k
        spacePush(ctxt, *ctxt->space);
10938
116k
#ifdef LIBXML_SAX1_ENABLED
10939
116k
    if (ctxt->sax2)
10940
116k
#endif /* LIBXML_SAX1_ENABLED */
10941
116k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10942
0
#ifdef LIBXML_SAX1_ENABLED
10943
0
    else
10944
0
        name = xmlParseStartTag(ctxt);
10945
116k
#endif /* LIBXML_SAX1_ENABLED */
10946
116k
    if (name == NULL) {
10947
3.14k
        spacePop(ctxt);
10948
3.14k
                    ctxt->instate = XML_PARSER_EOF;
10949
3.14k
                    xmlFinishDocument(ctxt);
10950
3.14k
        goto done;
10951
3.14k
    }
10952
113k
#ifdef LIBXML_VALID_ENABLED
10953
    /*
10954
     * [ VC: Root Element Type ]
10955
     * The Name in the document type declaration must match
10956
     * the element type of the root element.
10957
     */
10958
113k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10959
113k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10960
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10961
113k
#endif /* LIBXML_VALID_ENABLED */
10962
10963
    /*
10964
     * Check for an Empty Element.
10965
     */
10966
113k
    if ((RAW == '/') && (NXT(1) == '>')) {
10967
4.20k
        SKIP(2);
10968
10969
4.20k
        if (ctxt->sax2) {
10970
4.20k
      if ((ctxt->sax != NULL) &&
10971
4.20k
          (ctxt->sax->endElementNs != NULL) &&
10972
4.20k
          (!ctxt->disableSAX))
10973
4.18k
          ctxt->sax->endElementNs(ctxt->userData, name,
10974
4.18k
                                  prefix, URI);
10975
4.20k
      if (nbNs > 0)
10976
1.74k
          xmlParserNsPop(ctxt, nbNs);
10977
4.20k
#ifdef LIBXML_SAX1_ENABLED
10978
4.20k
        } else {
10979
0
      if ((ctxt->sax != NULL) &&
10980
0
          (ctxt->sax->endElement != NULL) &&
10981
0
          (!ctxt->disableSAX))
10982
0
          ctxt->sax->endElement(ctxt->userData, name);
10983
0
#endif /* LIBXML_SAX1_ENABLED */
10984
0
        }
10985
4.20k
        spacePop(ctxt);
10986
109k
    } else if (RAW == '>') {
10987
75.2k
        NEXT;
10988
75.2k
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
10989
75.2k
    } else {
10990
33.7k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10991
33.7k
           "Couldn't find end of Start Tag %s\n",
10992
33.7k
           name);
10993
33.7k
        nodePop(ctxt);
10994
33.7k
        spacePop(ctxt);
10995
33.7k
                    if (nbNs > 0)
10996
6.45k
                        xmlParserNsPop(ctxt, nbNs);
10997
33.7k
    }
10998
10999
113k
                if (ctxt->nameNr == 0)
11000
4.09k
                    ctxt->instate = XML_PARSER_EPILOG;
11001
109k
                else
11002
109k
                    ctxt->instate = XML_PARSER_CONTENT;
11003
113k
                break;
11004
116k
      }
11005
427k
            case XML_PARSER_CONTENT: {
11006
427k
    cur = ctxt->input->cur[0];
11007
11008
427k
    if (cur == '<') {
11009
184k
                    if ((!terminate) && (avail < 2))
11010
1.85k
                        goto done;
11011
182k
        next = ctxt->input->cur[1];
11012
11013
182k
                    if (next == '/') {
11014
6.18k
                        ctxt->instate = XML_PARSER_END_TAG;
11015
6.18k
                        break;
11016
175k
                    } else if (next == '?') {
11017
19.7k
                        if ((!terminate) &&
11018
19.7k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11019
14.6k
                            goto done;
11020
5.06k
                        xmlParsePI(ctxt);
11021
5.06k
                        ctxt->instate = XML_PARSER_CONTENT;
11022
5.06k
                        break;
11023
156k
                    } else if (next == '!') {
11024
54.4k
                        if ((!terminate) && (avail < 3))
11025
566
                            goto done;
11026
53.8k
                        next = ctxt->input->cur[2];
11027
11028
53.8k
                        if (next == '-') {
11029
5.23k
                            if ((!terminate) && (avail < 4))
11030
437
                                goto done;
11031
4.79k
                            if (ctxt->input->cur[3] == '-') {
11032
4.77k
                                if ((!terminate) &&
11033
4.77k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11034
2.42k
                                    goto done;
11035
2.34k
                                xmlParseComment(ctxt);
11036
2.34k
                                ctxt->instate = XML_PARSER_CONTENT;
11037
2.34k
                                break;
11038
4.77k
                            }
11039
48.6k
                        } else if (next == '[') {
11040
48.0k
                            if ((!terminate) && (avail < 9))
11041
620
                                goto done;
11042
47.4k
                            if ((ctxt->input->cur[2] == '[') &&
11043
47.4k
                                (ctxt->input->cur[3] == 'C') &&
11044
47.4k
                                (ctxt->input->cur[4] == 'D') &&
11045
47.4k
                                (ctxt->input->cur[5] == 'A') &&
11046
47.4k
                                (ctxt->input->cur[6] == 'T') &&
11047
47.4k
                                (ctxt->input->cur[7] == 'A') &&
11048
47.4k
                                (ctxt->input->cur[8] == '[')) {
11049
47.3k
                                if ((!terminate) &&
11050
47.3k
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11051
45.2k
                                    goto done;
11052
2.04k
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11053
2.04k
                                xmlParseCDSect(ctxt);
11054
2.04k
                                ctxt->instate = XML_PARSER_CONTENT;
11055
2.04k
                                break;
11056
47.3k
                            }
11057
47.4k
                        }
11058
53.8k
                    }
11059
243k
    } else if (cur == '&') {
11060
21.6k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11061
2.75k
      goto done;
11062
18.8k
        xmlParseReference(ctxt);
11063
18.8k
                    break;
11064
221k
    } else {
11065
        /* TODO Avoid the extra copy, handle directly !!! */
11066
        /*
11067
         * Goal of the following test is:
11068
         *  - minimize calls to the SAX 'character' callback
11069
         *    when they are mergeable
11070
         *  - handle an problem for isBlank when we only parse
11071
         *    a sequence of blank chars and the next one is
11072
         *    not available to check against '<' presence.
11073
         *  - tries to homogenize the differences in SAX
11074
         *    callbacks between the push and pull versions
11075
         *    of the parser.
11076
         */
11077
221k
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11078
52.6k
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11079
15.7k
          goto done;
11080
52.6k
                    }
11081
205k
                    ctxt->checkIndex = 0;
11082
205k
        xmlParseCharDataInternal(ctxt, !terminate);
11083
205k
                    break;
11084
221k
    }
11085
11086
102k
                ctxt->instate = XML_PARSER_START_TAG;
11087
102k
    break;
11088
427k
      }
11089
7.12k
            case XML_PARSER_END_TAG:
11090
7.12k
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11091
939
        goto done;
11092
6.18k
    if (ctxt->sax2) {
11093
6.18k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11094
6.18k
        nameNsPop(ctxt);
11095
6.18k
    }
11096
0
#ifdef LIBXML_SAX1_ENABLED
11097
0
      else
11098
0
        xmlParseEndTag1(ctxt, 0);
11099
6.18k
#endif /* LIBXML_SAX1_ENABLED */
11100
6.18k
    if (ctxt->nameNr == 0) {
11101
282
        ctxt->instate = XML_PARSER_EPILOG;
11102
5.90k
    } else {
11103
5.90k
        ctxt->instate = XML_PARSER_CONTENT;
11104
5.90k
    }
11105
6.18k
    break;
11106
75.8k
            case XML_PARSER_MISC:
11107
85.5k
            case XML_PARSER_PROLOG:
11108
89.6k
            case XML_PARSER_EPILOG:
11109
89.6k
    SKIP_BLANKS;
11110
89.6k
                avail = ctxt->input->end - ctxt->input->cur;
11111
89.6k
    if (avail < 1)
11112
866
        goto done;
11113
88.7k
    if (ctxt->input->cur[0] == '<') {
11114
85.3k
                    if ((!terminate) && (avail < 2))
11115
727
                        goto done;
11116
84.6k
                    next = ctxt->input->cur[1];
11117
84.6k
                    if (next == '?') {
11118
6.75k
                        if ((!terminate) &&
11119
6.75k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11120
2.66k
                            goto done;
11121
4.09k
                        xmlParsePI(ctxt);
11122
4.09k
                        break;
11123
77.9k
                    } else if (next == '!') {
11124
64.0k
                        if ((!terminate) && (avail < 3))
11125
290
                            goto done;
11126
11127
63.7k
                        if (ctxt->input->cur[2] == '-') {
11128
6.02k
                            if ((!terminate) && (avail < 4))
11129
147
                                goto done;
11130
5.88k
                            if (ctxt->input->cur[3] == '-') {
11131
5.86k
                                if ((!terminate) &&
11132
5.86k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11133
4.17k
                                    goto done;
11134
1.69k
                                xmlParseComment(ctxt);
11135
1.69k
                                break;
11136
5.86k
                            }
11137
57.7k
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11138
57.7k
                            if ((!terminate) && (avail < 9))
11139
5.58k
                                goto done;
11140
52.1k
                            if ((ctxt->input->cur[2] == 'D') &&
11141
52.1k
                                (ctxt->input->cur[3] == 'O') &&
11142
52.1k
                                (ctxt->input->cur[4] == 'C') &&
11143
52.1k
                                (ctxt->input->cur[5] == 'T') &&
11144
52.1k
                                (ctxt->input->cur[6] == 'Y') &&
11145
52.1k
                                (ctxt->input->cur[7] == 'P') &&
11146
52.1k
                                (ctxt->input->cur[8] == 'E')) {
11147
52.0k
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11148
38.5k
                                    goto done;
11149
13.5k
                                ctxt->inSubset = 1;
11150
13.5k
                                xmlParseDocTypeDecl(ctxt);
11151
13.5k
                                if (RAW == '[') {
11152
10.7k
                                    ctxt->instate = XML_PARSER_DTD;
11153
10.7k
                                } else {
11154
2.77k
                                    if (RAW == '>')
11155
2.20k
                                        NEXT;
11156
                                    /*
11157
                                     * Create and update the external subset.
11158
                                     */
11159
2.77k
                                    ctxt->inSubset = 2;
11160
2.77k
                                    if ((ctxt->sax != NULL) &&
11161
2.77k
                                        (!ctxt->disableSAX) &&
11162
2.77k
                                        (ctxt->sax->externalSubset != NULL))
11163
2.63k
                                        ctxt->sax->externalSubset(
11164
2.63k
                                                ctxt->userData,
11165
2.63k
                                                ctxt->intSubName,
11166
2.63k
                                                ctxt->extSubSystem,
11167
2.63k
                                                ctxt->extSubURI);
11168
2.77k
                                    ctxt->inSubset = 0;
11169
2.77k
                                    xmlCleanSpecialAttr(ctxt);
11170
2.77k
                                    ctxt->instate = XML_PARSER_PROLOG;
11171
2.77k
                                }
11172
13.5k
                                break;
11173
52.0k
                            }
11174
52.1k
                        }
11175
63.7k
                    }
11176
84.6k
                }
11177
11178
17.3k
                if (ctxt->instate == XML_PARSER_EPILOG) {
11179
870
                    if (ctxt->errNo == XML_ERR_OK)
11180
20
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11181
870
        ctxt->instate = XML_PARSER_EOF;
11182
870
                    xmlFinishDocument(ctxt);
11183
16.4k
                } else {
11184
16.4k
        ctxt->instate = XML_PARSER_START_TAG;
11185
16.4k
    }
11186
17.3k
    break;
11187
302k
            case XML_PARSER_DTD: {
11188
302k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11189
292k
                    goto done;
11190
9.90k
    xmlParseInternalSubset(ctxt);
11191
9.90k
    ctxt->inSubset = 2;
11192
9.90k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11193
9.90k
        (ctxt->sax->externalSubset != NULL))
11194
7.90k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11195
7.90k
          ctxt->extSubSystem, ctxt->extSubURI);
11196
9.90k
    ctxt->inSubset = 0;
11197
9.90k
    xmlCleanSpecialAttr(ctxt);
11198
9.90k
    ctxt->instate = XML_PARSER_PROLOG;
11199
9.90k
                break;
11200
302k
      }
11201
0
            default:
11202
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11203
0
      "PP: internal error\n");
11204
0
    ctxt->instate = XML_PARSER_EOF;
11205
0
    break;
11206
1.07M
  }
11207
1.07M
    }
11208
539k
done:
11209
539k
    return(ret);
11210
539k
}
11211
11212
/**
11213
 * Parse a chunk of memory in push parser mode.
11214
 *
11215
 * Assumes that the parser context was initialized with
11216
 * #xmlCreatePushParserCtxt.
11217
 *
11218
 * The last chunk, which will often be empty, must be marked with
11219
 * the `terminate` flag. With the default SAX callbacks, the resulting
11220
 * document will be available in ctxt->myDoc. This pointer will not
11221
 * be freed when calling #xmlFreeParserCtxt and must be freed by the
11222
 * caller. If the document isn't well-formed, it will still be returned
11223
 * in ctxt->myDoc.
11224
 *
11225
 * As an exception, #xmlCtxtResetPush will free the document in
11226
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11227
 * the document.
11228
 *
11229
 * Since 2.14.0, #xmlCtxtGetDocument can be used to retrieve the
11230
 * result document.
11231
 *
11232
 * @param ctxt  an XML parser context
11233
 * @param chunk  chunk of memory
11234
 * @param size  size of chunk in bytes
11235
 * @param terminate  last chunk indicator
11236
 * @returns an xmlParserErrors code (0 on success).
11237
 */
11238
int
11239
xmlParseChunk(xmlParserCtxt *ctxt, const char *chunk, int size,
11240
728k
              int terminate) {
11241
728k
    size_t curBase;
11242
728k
    size_t maxLength;
11243
728k
    size_t pos;
11244
728k
    int end_in_lf = 0;
11245
728k
    int res;
11246
11247
728k
    if ((ctxt == NULL) || (size < 0))
11248
0
        return(XML_ERR_ARGUMENT);
11249
728k
    if ((chunk == NULL) && (size > 0))
11250
0
        return(XML_ERR_ARGUMENT);
11251
728k
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11252
0
        return(XML_ERR_ARGUMENT);
11253
728k
    if (ctxt->disableSAX != 0)
11254
187k
        return(ctxt->errNo);
11255
11256
540k
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11257
540k
    if (ctxt->instate == XML_PARSER_START)
11258
33.7k
        xmlCtxtInitializeLate(ctxt);
11259
540k
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11260
540k
        (chunk[size - 1] == '\r')) {
11261
1.54k
  end_in_lf = 1;
11262
1.54k
  size--;
11263
1.54k
    }
11264
11265
    /*
11266
     * Also push an empty chunk to make sure that the raw buffer
11267
     * will be flushed if there is an encoder.
11268
     */
11269
540k
    pos = ctxt->input->cur - ctxt->input->base;
11270
540k
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11271
540k
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11272
540k
    if (res < 0) {
11273
298
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11274
298
        return(ctxt->errNo);
11275
298
    }
11276
11277
539k
    xmlParseTryOrFinish(ctxt, terminate);
11278
11279
539k
    curBase = ctxt->input->cur - ctxt->input->base;
11280
539k
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11281
145k
                XML_MAX_HUGE_LENGTH :
11282
539k
                XML_MAX_LOOKUP_LIMIT;
11283
539k
    if (curBase > maxLength) {
11284
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11285
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11286
0
    }
11287
11288
539k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11289
10.7k
        return(ctxt->errNo);
11290
11291
529k
    if (end_in_lf == 1) {
11292
1.53k
  pos = ctxt->input->cur - ctxt->input->base;
11293
1.53k
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11294
1.53k
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11295
1.53k
        if (res < 0) {
11296
1
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11297
1
            return(ctxt->errNo);
11298
1
        }
11299
1.53k
    }
11300
529k
    if (terminate) {
11301
  /*
11302
   * Check for termination
11303
   */
11304
13.9k
        if ((ctxt->instate != XML_PARSER_EOF) &&
11305
13.9k
            (ctxt->instate != XML_PARSER_EPILOG)) {
11306
6.52k
            if (ctxt->nameNr > 0) {
11307
3.97k
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11308
3.97k
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11309
3.97k
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11310
3.97k
                        "Premature end of data in tag %s line %d\n",
11311
3.97k
                        name, line, NULL);
11312
3.97k
            } else if (ctxt->instate == XML_PARSER_START) {
11313
171
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11314
2.38k
            } else {
11315
2.38k
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11316
2.38k
                               "Start tag expected, '<' not found\n");
11317
2.38k
            }
11318
7.40k
        } else {
11319
7.40k
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11320
7.40k
        }
11321
13.9k
  if (ctxt->instate != XML_PARSER_EOF) {
11322
7.99k
            ctxt->instate = XML_PARSER_EOF;
11323
7.99k
            xmlFinishDocument(ctxt);
11324
7.99k
  }
11325
13.9k
    }
11326
529k
    if (ctxt->wellFormed == 0)
11327
340k
  return((xmlParserErrors) ctxt->errNo);
11328
189k
    else
11329
189k
        return(0);
11330
529k
}
11331
11332
/************************************************************************
11333
 *                  *
11334
 *    I/O front end functions to the parser     *
11335
 *                  *
11336
 ************************************************************************/
11337
11338
/**
11339
 * Create a parser context for using the XML parser in push mode.
11340
 * See #xmlParseChunk.
11341
 *
11342
 * Passing an initial chunk is useless and deprecated.
11343
 *
11344
 * The push parser doesn't support recovery mode or the
11345
 * XML_PARSE_NOBLANKS option.
11346
 *
11347
 * `filename` is used as base URI to fetch external entities and for
11348
 * error reports.
11349
 *
11350
 * @param sax  a SAX handler (optional)
11351
 * @param user_data  user data for SAX callbacks (optional)
11352
 * @param chunk  initial chunk (optional, deprecated)
11353
 * @param size  size of initial chunk in bytes
11354
 * @param filename  file name or URI (optional)
11355
 * @returns the new parser context or NULL if a memory allocation
11356
 * failed.
11357
 */
11358
11359
xmlParserCtxt *
11360
xmlCreatePushParserCtxt(xmlSAXHandler *sax, void *user_data,
11361
24.8k
                        const char *chunk, int size, const char *filename) {
11362
24.8k
    xmlParserCtxtPtr ctxt;
11363
24.8k
    xmlParserInputPtr input;
11364
11365
24.8k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11366
24.8k
    if (ctxt == NULL)
11367
30
  return(NULL);
11368
11369
24.8k
    ctxt->options &= ~XML_PARSE_NODICT;
11370
24.8k
    ctxt->dictNames = 1;
11371
11372
24.8k
    input = xmlNewPushInput(filename, chunk, size);
11373
24.8k
    if (input == NULL) {
11374
15
  xmlFreeParserCtxt(ctxt);
11375
15
  return(NULL);
11376
15
    }
11377
24.7k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11378
7
        xmlFreeInputStream(input);
11379
7
        xmlFreeParserCtxt(ctxt);
11380
7
        return(NULL);
11381
7
    }
11382
11383
24.7k
    return(ctxt);
11384
24.7k
}
11385
#endif /* LIBXML_PUSH_ENABLED */
11386
11387
/**
11388
 * Blocks further parser processing
11389
 *
11390
 * @param ctxt  an XML parser context
11391
 */
11392
void
11393
0
xmlStopParser(xmlParserCtxt *ctxt) {
11394
0
    if (ctxt == NULL)
11395
0
        return;
11396
11397
    /* This stops the parser */
11398
0
    ctxt->disableSAX = 2;
11399
11400
    /*
11401
     * xmlStopParser is often called from error handlers,
11402
     * so we can't raise an error here to avoid infinite
11403
     * loops. Just make sure that an error condition is
11404
     * reported.
11405
     */
11406
0
    if (ctxt->errNo == XML_ERR_OK) {
11407
0
        ctxt->errNo = XML_ERR_USER_STOP;
11408
0
        ctxt->lastError.code = XML_ERR_USER_STOP;
11409
0
        ctxt->wellFormed = 0;
11410
0
    }
11411
0
}
11412
11413
/**
11414
 * Create a parser context for using the XML parser with an existing
11415
 * I/O stream
11416
 *
11417
 * @param sax  a SAX handler (optional)
11418
 * @param user_data  user data for SAX callbacks (optional)
11419
 * @param ioread  an I/O read function
11420
 * @param ioclose  an I/O close function (optional)
11421
 * @param ioctx  an I/O handler
11422
 * @param enc  the charset encoding if known (deprecated)
11423
 * @returns the new parser context or NULL
11424
 */
11425
xmlParserCtxt *
11426
xmlCreateIOParserCtxt(xmlSAXHandler *sax, void *user_data,
11427
                      xmlInputReadCallback ioread,
11428
                      xmlInputCloseCallback ioclose,
11429
0
                      void *ioctx, xmlCharEncoding enc) {
11430
0
    xmlParserCtxtPtr ctxt;
11431
0
    xmlParserInputPtr input;
11432
0
    const char *encoding;
11433
11434
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11435
0
    if (ctxt == NULL)
11436
0
  return(NULL);
11437
11438
0
    encoding = xmlGetCharEncodingName(enc);
11439
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11440
0
                                  encoding, 0);
11441
0
    if (input == NULL) {
11442
0
  xmlFreeParserCtxt(ctxt);
11443
0
        return (NULL);
11444
0
    }
11445
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11446
0
        xmlFreeInputStream(input);
11447
0
        xmlFreeParserCtxt(ctxt);
11448
0
        return(NULL);
11449
0
    }
11450
11451
0
    return(ctxt);
11452
0
}
11453
11454
#ifdef LIBXML_VALID_ENABLED
11455
/************************************************************************
11456
 *                  *
11457
 *    Front ends when parsing a DTD       *
11458
 *                  *
11459
 ************************************************************************/
11460
11461
/**
11462
 * Parse a DTD.
11463
 *
11464
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11465
 * to make external entities work.
11466
 *
11467
 * @since 2.14.0
11468
 *
11469
 * @param ctxt  a parser context
11470
 * @param input  a parser input
11471
 * @param publicId  public ID of the DTD (optional)
11472
 * @param systemId  system ID of the DTD (optional)
11473
 * @returns the resulting xmlDtd or NULL in case of error.
11474
 * `input` will be freed by the function in any case.
11475
 */
11476
xmlDtd *
11477
xmlCtxtParseDtd(xmlParserCtxt *ctxt, xmlParserInput *input,
11478
0
                const xmlChar *publicId, const xmlChar *systemId) {
11479
0
    xmlDtdPtr ret = NULL;
11480
11481
0
    if ((ctxt == NULL) || (input == NULL)) {
11482
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11483
0
        xmlFreeInputStream(input);
11484
0
        return(NULL);
11485
0
    }
11486
11487
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11488
0
        xmlFreeInputStream(input);
11489
0
        return(NULL);
11490
0
    }
11491
11492
0
    if (publicId == NULL)
11493
0
        publicId = BAD_CAST "none";
11494
0
    if (systemId == NULL)
11495
0
        systemId = BAD_CAST "none";
11496
11497
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11498
0
    if (ctxt->myDoc == NULL) {
11499
0
        xmlErrMemory(ctxt);
11500
0
        goto error;
11501
0
    }
11502
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11503
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11504
0
                                       publicId, systemId);
11505
0
    if (ctxt->myDoc->extSubset == NULL) {
11506
0
        xmlErrMemory(ctxt);
11507
0
        xmlFreeDoc(ctxt->myDoc);
11508
0
        goto error;
11509
0
    }
11510
11511
0
    xmlParseExternalSubset(ctxt, publicId, systemId);
11512
11513
0
    if (ctxt->wellFormed) {
11514
0
        ret = ctxt->myDoc->extSubset;
11515
0
        ctxt->myDoc->extSubset = NULL;
11516
0
        if (ret != NULL) {
11517
0
            xmlNodePtr tmp;
11518
11519
0
            ret->doc = NULL;
11520
0
            tmp = ret->children;
11521
0
            while (tmp != NULL) {
11522
0
                tmp->doc = NULL;
11523
0
                tmp = tmp->next;
11524
0
            }
11525
0
        }
11526
0
    } else {
11527
0
        ret = NULL;
11528
0
    }
11529
0
    xmlFreeDoc(ctxt->myDoc);
11530
0
    ctxt->myDoc = NULL;
11531
11532
0
error:
11533
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11534
11535
0
    return(ret);
11536
0
}
11537
11538
/**
11539
 * Load and parse a DTD
11540
 *
11541
 * @deprecated Use #xmlCtxtParseDtd.
11542
 *
11543
 * @param sax  the SAX handler block or NULL
11544
 * @param input  an Input Buffer
11545
 * @param enc  the charset encoding if known
11546
 * @returns the resulting xmlDtd or NULL in case of error.
11547
 * `input` will be freed by the function in any case.
11548
 */
11549
11550
xmlDtd *
11551
xmlIOParseDTD(xmlSAXHandler *sax, xmlParserInputBuffer *input,
11552
0
        xmlCharEncoding enc) {
11553
0
    xmlDtdPtr ret = NULL;
11554
0
    xmlParserCtxtPtr ctxt;
11555
0
    xmlParserInputPtr pinput = NULL;
11556
11557
0
    if (input == NULL)
11558
0
  return(NULL);
11559
11560
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11561
0
    if (ctxt == NULL) {
11562
0
        xmlFreeParserInputBuffer(input);
11563
0
  return(NULL);
11564
0
    }
11565
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11566
11567
    /*
11568
     * generate a parser input from the I/O handler
11569
     */
11570
11571
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11572
0
    if (pinput == NULL) {
11573
0
        xmlFreeParserInputBuffer(input);
11574
0
  xmlFreeParserCtxt(ctxt);
11575
0
  return(NULL);
11576
0
    }
11577
11578
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11579
0
        xmlSwitchEncoding(ctxt, enc);
11580
0
    }
11581
11582
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11583
11584
0
    xmlFreeParserCtxt(ctxt);
11585
0
    return(ret);
11586
0
}
11587
11588
/**
11589
 * Load and parse an external subset.
11590
 *
11591
 * @deprecated Use #xmlCtxtParseDtd.
11592
 *
11593
 * @param sax  the SAX handler block
11594
 * @param publicId  public identifier of the DTD (optional)
11595
 * @param systemId  system identifier (URL) of the DTD
11596
 * @returns the resulting xmlDtd or NULL in case of error.
11597
 */
11598
11599
xmlDtd *
11600
xmlSAXParseDTD(xmlSAXHandler *sax, const xmlChar *publicId,
11601
0
               const xmlChar *systemId) {
11602
0
    xmlDtdPtr ret = NULL;
11603
0
    xmlParserCtxtPtr ctxt;
11604
0
    xmlParserInputPtr input = NULL;
11605
0
    xmlChar* systemIdCanonic;
11606
11607
0
    if ((publicId == NULL) && (systemId == NULL)) return(NULL);
11608
11609
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11610
0
    if (ctxt == NULL) {
11611
0
  return(NULL);
11612
0
    }
11613
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11614
11615
    /*
11616
     * Canonicalise the system ID
11617
     */
11618
0
    systemIdCanonic = xmlCanonicPath(systemId);
11619
0
    if ((systemId != NULL) && (systemIdCanonic == NULL)) {
11620
0
  xmlFreeParserCtxt(ctxt);
11621
0
  return(NULL);
11622
0
    }
11623
11624
    /*
11625
     * Ask the Entity resolver to load the damn thing
11626
     */
11627
11628
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11629
0
  input = ctxt->sax->resolveEntity(ctxt->userData, publicId,
11630
0
                                   systemIdCanonic);
11631
0
    if (input == NULL) {
11632
0
  xmlFreeParserCtxt(ctxt);
11633
0
  if (systemIdCanonic != NULL)
11634
0
      xmlFree(systemIdCanonic);
11635
0
  return(NULL);
11636
0
    }
11637
11638
0
    if (input->filename == NULL)
11639
0
  input->filename = (char *) systemIdCanonic;
11640
0
    else
11641
0
  xmlFree(systemIdCanonic);
11642
11643
0
    ret = xmlCtxtParseDtd(ctxt, input, publicId, systemId);
11644
11645
0
    xmlFreeParserCtxt(ctxt);
11646
0
    return(ret);
11647
0
}
11648
11649
11650
/**
11651
 * Load and parse an external subset.
11652
 *
11653
 * @param publicId  public identifier of the DTD (optional)
11654
 * @param systemId  system identifier (URL) of the DTD
11655
 * @returns the resulting xmlDtd or NULL in case of error.
11656
 */
11657
11658
xmlDtd *
11659
0
xmlParseDTD(const xmlChar *publicId, const xmlChar *systemId) {
11660
0
    return(xmlSAXParseDTD(NULL, publicId, systemId));
11661
0
}
11662
#endif /* LIBXML_VALID_ENABLED */
11663
11664
/************************************************************************
11665
 *                  *
11666
 *    Front ends when parsing an Entity     *
11667
 *                  *
11668
 ************************************************************************/
11669
11670
static xmlNodePtr
11671
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11672
73.1k
                            int hasTextDecl, int buildTree) {
11673
73.1k
    xmlNodePtr root = NULL;
11674
73.1k
    xmlNodePtr list = NULL;
11675
73.1k
    xmlChar *rootName = BAD_CAST "#root";
11676
73.1k
    int result;
11677
11678
73.1k
    if (buildTree) {
11679
73.1k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11680
73.1k
        if (root == NULL) {
11681
27
            xmlErrMemory(ctxt);
11682
27
            goto error;
11683
27
        }
11684
73.1k
    }
11685
11686
73.1k
    if (xmlCtxtPushInput(ctxt, input) < 0)
11687
20
        goto error;
11688
11689
73.0k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11690
73.0k
    spacePush(ctxt, -1);
11691
11692
73.0k
    if (buildTree)
11693
73.0k
        nodePush(ctxt, root);
11694
11695
73.0k
    if (hasTextDecl) {
11696
3.02k
        xmlDetectEncoding(ctxt);
11697
11698
        /*
11699
         * Parse a possible text declaration first
11700
         */
11701
3.02k
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11702
3.02k
            (IS_BLANK_CH(NXT(5)))) {
11703
347
            xmlParseTextDecl(ctxt);
11704
            /*
11705
             * An XML-1.0 document can't reference an entity not XML-1.0
11706
             */
11707
347
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11708
347
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11709
5
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11710
5
                               "Version mismatch between document and "
11711
5
                               "entity\n");
11712
5
            }
11713
347
        }
11714
3.02k
    }
11715
11716
73.0k
    xmlParseContentInternal(ctxt);
11717
11718
73.0k
    if (ctxt->input->cur < ctxt->input->end)
11719
443
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11720
11721
73.0k
    if ((ctxt->wellFormed) ||
11722
73.0k
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
11723
72.2k
        if (root != NULL) {
11724
72.2k
            xmlNodePtr cur;
11725
11726
            /*
11727
             * Unlink newly created node list.
11728
             */
11729
72.2k
            list = root->children;
11730
72.2k
            root->children = NULL;
11731
72.2k
            root->last = NULL;
11732
103k
            for (cur = list; cur != NULL; cur = cur->next)
11733
31.1k
                cur->parent = NULL;
11734
72.2k
        }
11735
72.2k
    }
11736
11737
    /*
11738
     * Read the rest of the stream in case of errors. We want
11739
     * to account for the whole entity size.
11740
     */
11741
73.4k
    do {
11742
73.4k
        ctxt->input->cur = ctxt->input->end;
11743
73.4k
        xmlParserShrink(ctxt);
11744
73.4k
        result = xmlParserGrow(ctxt);
11745
73.4k
    } while (result > 0);
11746
11747
73.0k
    if (buildTree)
11748
73.0k
        nodePop(ctxt);
11749
11750
73.0k
    namePop(ctxt);
11751
73.0k
    spacePop(ctxt);
11752
11753
73.0k
    xmlCtxtPopInput(ctxt);
11754
11755
73.1k
error:
11756
73.1k
    xmlFreeNode(root);
11757
11758
73.1k
    return(list);
11759
73.0k
}
11760
11761
static void
11762
74.0k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11763
74.0k
    xmlParserInputPtr input;
11764
74.0k
    xmlNodePtr list;
11765
74.0k
    unsigned long consumed;
11766
74.0k
    int isExternal;
11767
74.0k
    int buildTree;
11768
74.0k
    int oldMinNsIndex;
11769
74.0k
    int oldNodelen, oldNodemem;
11770
11771
74.0k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11772
74.0k
    buildTree = (ctxt->node != NULL);
11773
11774
    /*
11775
     * Recursion check
11776
     */
11777
74.0k
    if (ent->flags & XML_ENT_EXPANDING) {
11778
14
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11779
14
        goto error;
11780
14
    }
11781
11782
    /*
11783
     * Load entity
11784
     */
11785
74.0k
    input = xmlNewEntityInputStream(ctxt, ent);
11786
74.0k
    if (input == NULL)
11787
941
        goto error;
11788
11789
    /*
11790
     * When building a tree, we need to limit the scope of namespace
11791
     * declarations, so that entities don't reference xmlNs structs
11792
     * from the parent of a reference.
11793
     */
11794
73.1k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
11795
73.1k
    if (buildTree)
11796
73.1k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
11797
11798
73.1k
    oldNodelen = ctxt->nodelen;
11799
73.1k
    oldNodemem = ctxt->nodemem;
11800
73.1k
    ctxt->nodelen = 0;
11801
73.1k
    ctxt->nodemem = 0;
11802
11803
    /*
11804
     * Parse content
11805
     *
11806
     * This initiates a recursive call chain:
11807
     *
11808
     * - xmlCtxtParseContentInternal
11809
     * - xmlParseContentInternal
11810
     * - xmlParseReference
11811
     * - xmlCtxtParseEntity
11812
     *
11813
     * The nesting depth is limited by the maximum number of inputs,
11814
     * see xmlCtxtPushInput.
11815
     *
11816
     * It's possible to make this non-recursive (minNsIndex must be
11817
     * stored in the input struct) at the expense of code readability.
11818
     */
11819
11820
73.1k
    ent->flags |= XML_ENT_EXPANDING;
11821
11822
73.1k
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
11823
11824
73.1k
    ent->flags &= ~XML_ENT_EXPANDING;
11825
11826
73.1k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
11827
73.1k
    ctxt->nodelen = oldNodelen;
11828
73.1k
    ctxt->nodemem = oldNodemem;
11829
11830
    /*
11831
     * Entity size accounting
11832
     */
11833
73.1k
    consumed = input->consumed;
11834
73.1k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
11835
11836
73.1k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
11837
4.37k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
11838
11839
73.1k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
11840
4.37k
        if (isExternal)
11841
2.82k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
11842
11843
4.37k
        ent->children = list;
11844
11845
35.5k
        while (list != NULL) {
11846
31.1k
            list->parent = (xmlNodePtr) ent;
11847
11848
            /*
11849
             * Downstream code like the nginx xslt module can set
11850
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
11851
             * might have a different or a NULL document.
11852
             */
11853
31.1k
            if (list->doc != ent->doc)
11854
0
                xmlSetTreeDoc(list, ent->doc);
11855
11856
31.1k
            if (list->next == NULL)
11857
3.34k
                ent->last = list;
11858
31.1k
            list = list->next;
11859
31.1k
        }
11860
68.7k
    } else {
11861
68.7k
        xmlFreeNodeList(list);
11862
68.7k
    }
11863
11864
73.1k
    xmlFreeInputStream(input);
11865
11866
74.0k
error:
11867
74.0k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
11868
74.0k
}
11869
11870
/**
11871
 * Parse an external general entity within an existing parsing context
11872
 * An external general parsed entity is well-formed if it matches the
11873
 * production labeled extParsedEnt.
11874
 *
11875
 *     [78] extParsedEnt ::= TextDecl? content
11876
 *
11877
 * @param ctxt  the existing parsing context
11878
 * @param URL  the URL for the entity to load
11879
 * @param ID  the System ID for the entity to load
11880
 * @param listOut  the return value for the set of parsed nodes
11881
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11882
 *    the parser error code otherwise
11883
 */
11884
11885
int
11886
xmlParseCtxtExternalEntity(xmlParserCtxt *ctxt, const xmlChar *URL,
11887
0
                           const xmlChar *ID, xmlNode **listOut) {
11888
0
    xmlParserInputPtr input;
11889
0
    xmlNodePtr list;
11890
11891
0
    if (listOut != NULL)
11892
0
        *listOut = NULL;
11893
11894
0
    if (ctxt == NULL)
11895
0
        return(XML_ERR_ARGUMENT);
11896
11897
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
11898
0
                            XML_RESOURCE_GENERAL_ENTITY);
11899
0
    if (input == NULL)
11900
0
        return(ctxt->errNo);
11901
11902
0
    xmlCtxtInitializeLate(ctxt);
11903
11904
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
11905
0
    if (listOut != NULL)
11906
0
        *listOut = list;
11907
0
    else
11908
0
        xmlFreeNodeList(list);
11909
11910
0
    xmlFreeInputStream(input);
11911
0
    return(ctxt->errNo);
11912
0
}
11913
11914
#ifdef LIBXML_SAX1_ENABLED
11915
/**
11916
 * Parse an external general entity
11917
 * An external general parsed entity is well-formed if it matches the
11918
 * production labeled extParsedEnt.
11919
 *
11920
 * This function uses deprecated global variables to set parser options
11921
 * which default to XML_PARSE_NODICT.
11922
 *
11923
 * @deprecated Use #xmlParseCtxtExternalEntity.
11924
 *
11925
 *     [78] extParsedEnt ::= TextDecl? content
11926
 *
11927
 * @param doc  the document the chunk pertains to
11928
 * @param sax  the SAX handler block (possibly NULL)
11929
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11930
 * @param depth  Used for loop detection, use 0
11931
 * @param URL  the URL for the entity to load
11932
 * @param ID  the System ID for the entity to load
11933
 * @param list  the return value for the set of parsed nodes
11934
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11935
 *    the parser error code otherwise
11936
 */
11937
11938
int
11939
xmlParseExternalEntity(xmlDoc *doc, xmlSAXHandler *sax, void *user_data,
11940
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNode **list) {
11941
0
    xmlParserCtxtPtr ctxt;
11942
0
    int ret;
11943
11944
0
    if (list != NULL)
11945
0
        *list = NULL;
11946
11947
0
    if (doc == NULL)
11948
0
        return(XML_ERR_ARGUMENT);
11949
11950
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11951
0
    if (ctxt == NULL)
11952
0
        return(XML_ERR_NO_MEMORY);
11953
11954
0
    ctxt->depth = depth;
11955
0
    ctxt->myDoc = doc;
11956
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
11957
11958
0
    xmlFreeParserCtxt(ctxt);
11959
0
    return(ret);
11960
0
}
11961
11962
/**
11963
 * Parse a well-balanced chunk of an XML document
11964
 * called by the parser
11965
 * The allowed sequence for the Well Balanced Chunk is the one defined by
11966
 * the content production in the XML grammar:
11967
 *
11968
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
11969
 *                       Comment)*
11970
 *
11971
 * This function uses deprecated global variables to set parser options
11972
 * which default to XML_PARSE_NODICT.
11973
 *
11974
 * @param doc  the document the chunk pertains to (must not be NULL)
11975
 * @param sax  the SAX handler block (possibly NULL)
11976
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11977
 * @param depth  Used for loop detection, use 0
11978
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
11979
 * @param lst  the return value for the set of parsed nodes
11980
 * @returns 0 if the chunk is well balanced, -1 in case of args problem and
11981
 *    the parser error code otherwise
11982
 */
11983
11984
int
11985
xmlParseBalancedChunkMemory(xmlDoc *doc, xmlSAXHandler *sax,
11986
0
     void *user_data, int depth, const xmlChar *string, xmlNode **lst) {
11987
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11988
0
                                                depth, string, lst, 0 );
11989
0
}
11990
#endif /* LIBXML_SAX1_ENABLED */
11991
11992
/**
11993
 * Parse a well-balanced chunk of XML matching the 'content' production.
11994
 *
11995
 * Namespaces in scope of `node` and entities of `node`'s document are
11996
 * recognized. When validating, the DTD of `node`'s document is used.
11997
 *
11998
 * Always consumes `input` even in error case.
11999
 *
12000
 * @since 2.14.0
12001
 *
12002
 * @param ctxt  parser context
12003
 * @param input  parser input
12004
 * @param node  target node or document
12005
 * @param hasTextDecl  whether to parse text declaration
12006
 * @returns a node list or NULL in case of error.
12007
 */
12008
xmlNode *
12009
xmlCtxtParseContent(xmlParserCtxt *ctxt, xmlParserInput *input,
12010
0
                    xmlNode *node, int hasTextDecl) {
12011
0
    xmlDocPtr doc;
12012
0
    xmlNodePtr cur, list = NULL;
12013
0
    int nsnr = 0;
12014
0
    xmlDictPtr oldDict;
12015
0
    int oldOptions, oldDictNames, oldLoadSubset;
12016
12017
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12018
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12019
0
        goto exit;
12020
0
    }
12021
12022
0
    doc = node->doc;
12023
0
    if (doc == NULL) {
12024
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12025
0
        goto exit;
12026
0
    }
12027
12028
0
    switch (node->type) {
12029
0
        case XML_ELEMENT_NODE:
12030
0
        case XML_DOCUMENT_NODE:
12031
0
        case XML_HTML_DOCUMENT_NODE:
12032
0
            break;
12033
12034
0
        case XML_ATTRIBUTE_NODE:
12035
0
        case XML_TEXT_NODE:
12036
0
        case XML_CDATA_SECTION_NODE:
12037
0
        case XML_ENTITY_REF_NODE:
12038
0
        case XML_PI_NODE:
12039
0
        case XML_COMMENT_NODE:
12040
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12041
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12042
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12043
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12044
0
                    node = cur;
12045
0
                    break;
12046
0
                }
12047
0
            }
12048
0
            break;
12049
12050
0
        default:
12051
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12052
0
            goto exit;
12053
0
    }
12054
12055
0
    xmlCtxtReset(ctxt);
12056
12057
0
    oldDict = ctxt->dict;
12058
0
    oldOptions = ctxt->options;
12059
0
    oldDictNames = ctxt->dictNames;
12060
0
    oldLoadSubset = ctxt->loadsubset;
12061
12062
    /*
12063
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12064
     */
12065
0
    if (doc->dict != NULL) {
12066
0
        ctxt->dict = doc->dict;
12067
0
    } else {
12068
0
        ctxt->options |= XML_PARSE_NODICT;
12069
0
        ctxt->dictNames = 0;
12070
0
    }
12071
12072
    /*
12073
     * Disable IDs
12074
     */
12075
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12076
0
    ctxt->options |= XML_PARSE_SKIP_IDS;
12077
12078
0
    ctxt->myDoc = doc;
12079
12080
0
#ifdef LIBXML_HTML_ENABLED
12081
0
    if (ctxt->html) {
12082
        /*
12083
         * When parsing in context, it makes no sense to add implied
12084
         * elements like html/body/etc...
12085
         */
12086
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12087
12088
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12089
0
    } else
12090
0
#endif
12091
0
    {
12092
0
        xmlCtxtInitializeLate(ctxt);
12093
12094
        /*
12095
         * initialize the SAX2 namespaces stack
12096
         */
12097
0
        cur = node;
12098
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12099
0
            xmlNsPtr ns = cur->nsDef;
12100
0
            xmlHashedString hprefix, huri;
12101
12102
0
            while (ns != NULL) {
12103
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12104
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12105
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12106
0
                    nsnr++;
12107
0
                ns = ns->next;
12108
0
            }
12109
0
            cur = cur->parent;
12110
0
        }
12111
12112
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12113
12114
0
        if (nsnr > 0)
12115
0
            xmlParserNsPop(ctxt, nsnr);
12116
0
    }
12117
12118
0
    ctxt->dict = oldDict;
12119
0
    ctxt->options = oldOptions;
12120
0
    ctxt->dictNames = oldDictNames;
12121
0
    ctxt->loadsubset = oldLoadSubset;
12122
0
    ctxt->myDoc = NULL;
12123
0
    ctxt->node = NULL;
12124
12125
0
exit:
12126
0
    xmlFreeInputStream(input);
12127
0
    return(list);
12128
0
}
12129
12130
/**
12131
 * Parse a well-balanced chunk of an XML document
12132
 * within the context (DTD, namespaces, etc ...) of the given node.
12133
 *
12134
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12135
 * the content production in the XML grammar:
12136
 *
12137
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12138
 *                       Comment)*
12139
 *
12140
 * This function assumes the encoding of `node`'s document which is
12141
 * typically not what you want. A better alternative is
12142
 * #xmlCtxtParseContent.
12143
 *
12144
 * @param node  the context node
12145
 * @param data  the input string
12146
 * @param datalen  the input string length in bytes
12147
 * @param options  a combination of xmlParserOption
12148
 * @param listOut  the return value for the set of parsed nodes
12149
 * @returns XML_ERR_OK if the chunk is well balanced, and the parser
12150
 * error code otherwise
12151
 */
12152
xmlParserErrors
12153
xmlParseInNodeContext(xmlNode *node, const char *data, int datalen,
12154
0
                      int options, xmlNode **listOut) {
12155
0
    xmlParserCtxtPtr ctxt;
12156
0
    xmlParserInputPtr input;
12157
0
    xmlDocPtr doc;
12158
0
    xmlNodePtr list;
12159
0
    xmlParserErrors ret;
12160
12161
0
    if (listOut == NULL)
12162
0
        return(XML_ERR_INTERNAL_ERROR);
12163
0
    *listOut = NULL;
12164
12165
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12166
0
        return(XML_ERR_INTERNAL_ERROR);
12167
12168
0
    doc = node->doc;
12169
0
    if (doc == NULL)
12170
0
        return(XML_ERR_INTERNAL_ERROR);
12171
12172
0
#ifdef LIBXML_HTML_ENABLED
12173
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12174
0
        ctxt = htmlNewParserCtxt();
12175
0
    }
12176
0
    else
12177
0
#endif
12178
0
        ctxt = xmlNewParserCtxt();
12179
12180
0
    if (ctxt == NULL)
12181
0
        return(XML_ERR_NO_MEMORY);
12182
12183
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12184
0
                                      (const char *) doc->encoding,
12185
0
                                      XML_INPUT_BUF_STATIC);
12186
0
    if (input == NULL) {
12187
0
        xmlFreeParserCtxt(ctxt);
12188
0
        return(XML_ERR_NO_MEMORY);
12189
0
    }
12190
12191
0
    xmlCtxtUseOptions(ctxt, options);
12192
12193
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12194
12195
0
    if (list == NULL) {
12196
0
        ret = ctxt->errNo;
12197
0
        if (ret == XML_ERR_ARGUMENT)
12198
0
            ret = XML_ERR_INTERNAL_ERROR;
12199
0
    } else {
12200
0
        ret = XML_ERR_OK;
12201
0
        *listOut = list;
12202
0
    }
12203
12204
0
    xmlFreeParserCtxt(ctxt);
12205
12206
0
    return(ret);
12207
0
}
12208
12209
#ifdef LIBXML_SAX1_ENABLED
12210
/**
12211
 * Parse a well-balanced chunk of an XML document
12212
 *
12213
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12214
 * the content production in the XML grammar:
12215
 *
12216
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12217
 *                       Comment)*
12218
 *
12219
 * In case recover is set to 1, the nodelist will not be empty even if
12220
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12221
 * some extent.
12222
 *
12223
 * This function uses deprecated global variables to set parser options
12224
 * which default to XML_PARSE_NODICT.
12225
 *
12226
 * @param doc  the document the chunk pertains to (must not be NULL)
12227
 * @param sax  the SAX handler block (possibly NULL)
12228
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
12229
 * @param depth  Used for loop detection, use 0
12230
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
12231
 * @param listOut  the return value for the set of parsed nodes
12232
 * @param recover  return nodes even if the data is broken (use 0)
12233
 * @returns 0 if the chunk is well balanced, or thehe parser error code
12234
 * otherwise.
12235
 */
12236
int
12237
xmlParseBalancedChunkMemoryRecover(xmlDoc *doc, xmlSAXHandler *sax,
12238
     void *user_data, int depth, const xmlChar *string, xmlNode **listOut,
12239
0
     int recover) {
12240
0
    xmlParserCtxtPtr ctxt;
12241
0
    xmlParserInputPtr input;
12242
0
    xmlNodePtr list;
12243
0
    int ret;
12244
12245
0
    if (listOut != NULL)
12246
0
        *listOut = NULL;
12247
12248
0
    if (string == NULL)
12249
0
        return(XML_ERR_ARGUMENT);
12250
12251
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12252
0
    if (ctxt == NULL)
12253
0
        return(XML_ERR_NO_MEMORY);
12254
12255
0
    xmlCtxtInitializeLate(ctxt);
12256
12257
0
    ctxt->depth = depth;
12258
0
    ctxt->myDoc = doc;
12259
0
    if (recover) {
12260
0
        ctxt->options |= XML_PARSE_RECOVER;
12261
0
        ctxt->recovery = 1;
12262
0
    }
12263
12264
0
    input = xmlNewStringInputStream(ctxt, string);
12265
0
    if (input == NULL) {
12266
0
        ret = ctxt->errNo;
12267
0
        goto error;
12268
0
    }
12269
12270
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12271
0
    if (listOut != NULL)
12272
0
        *listOut = list;
12273
0
    else
12274
0
        xmlFreeNodeList(list);
12275
12276
0
    if (!ctxt->wellFormed)
12277
0
        ret = ctxt->errNo;
12278
0
    else
12279
0
        ret = XML_ERR_OK;
12280
12281
0
error:
12282
0
    xmlFreeInputStream(input);
12283
0
    xmlFreeParserCtxt(ctxt);
12284
0
    return(ret);
12285
0
}
12286
12287
/**
12288
 * Parse an XML external entity out of context and build a tree.
12289
 * It use the given SAX function block to handle the parsing callback.
12290
 * If sax is NULL, fallback to the default DOM tree building routines.
12291
 *
12292
 * @deprecated Don't use.
12293
 *
12294
 *     [78] extParsedEnt ::= TextDecl? content
12295
 *
12296
 * This correspond to a "Well Balanced" chunk
12297
 *
12298
 * This function uses deprecated global variables to set parser options
12299
 * which default to XML_PARSE_NODICT.
12300
 *
12301
 * @param sax  the SAX handler block
12302
 * @param filename  the filename
12303
 * @returns the resulting document tree
12304
 */
12305
12306
xmlDoc *
12307
0
xmlSAXParseEntity(xmlSAXHandler *sax, const char *filename) {
12308
0
    xmlDocPtr ret;
12309
0
    xmlParserCtxtPtr ctxt;
12310
12311
0
    ctxt = xmlCreateFileParserCtxt(filename);
12312
0
    if (ctxt == NULL) {
12313
0
  return(NULL);
12314
0
    }
12315
0
    if (sax != NULL) {
12316
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12317
0
            *ctxt->sax = *sax;
12318
0
        } else {
12319
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12320
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12321
0
        }
12322
0
        ctxt->userData = NULL;
12323
0
    }
12324
12325
0
    xmlParseExtParsedEnt(ctxt);
12326
12327
0
    if (ctxt->wellFormed) {
12328
0
  ret = ctxt->myDoc;
12329
0
    } else {
12330
0
        ret = NULL;
12331
0
        xmlFreeDoc(ctxt->myDoc);
12332
0
    }
12333
12334
0
    xmlFreeParserCtxt(ctxt);
12335
12336
0
    return(ret);
12337
0
}
12338
12339
/**
12340
 * Parse an XML external entity out of context and build a tree.
12341
 *
12342
 *     [78] extParsedEnt ::= TextDecl? content
12343
 *
12344
 * This correspond to a "Well Balanced" chunk
12345
 *
12346
 * This function uses deprecated global variables to set parser options
12347
 * which default to XML_PARSE_NODICT.
12348
 *
12349
 * @deprecated Don't use.
12350
 *
12351
 * @param filename  the filename
12352
 * @returns the resulting document tree
12353
 */
12354
12355
xmlDoc *
12356
0
xmlParseEntity(const char *filename) {
12357
0
    return(xmlSAXParseEntity(NULL, filename));
12358
0
}
12359
#endif /* LIBXML_SAX1_ENABLED */
12360
12361
/**
12362
 * Create a parser context for an external entity
12363
 * Automatic support for ZLIB/Compress compressed document is provided
12364
 * by default if found at compile-time.
12365
 *
12366
 * @deprecated Don't use.
12367
 *
12368
 * @param URL  the entity URL
12369
 * @param ID  the entity PUBLIC ID
12370
 * @param base  a possible base for the target URI
12371
 * @returns the new parser context or NULL
12372
 */
12373
xmlParserCtxt *
12374
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12375
0
                    const xmlChar *base) {
12376
0
    xmlParserCtxtPtr ctxt;
12377
0
    xmlParserInputPtr input;
12378
0
    xmlChar *uri = NULL;
12379
12380
0
    ctxt = xmlNewParserCtxt();
12381
0
    if (ctxt == NULL)
12382
0
  return(NULL);
12383
12384
0
    if (base != NULL) {
12385
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12386
0
            goto error;
12387
0
        if (uri != NULL)
12388
0
            URL = uri;
12389
0
    }
12390
12391
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12392
0
                            XML_RESOURCE_UNKNOWN);
12393
0
    if (input == NULL)
12394
0
        goto error;
12395
12396
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12397
0
        xmlFreeInputStream(input);
12398
0
        goto error;
12399
0
    }
12400
12401
0
    xmlFree(uri);
12402
0
    return(ctxt);
12403
12404
0
error:
12405
0
    xmlFree(uri);
12406
0
    xmlFreeParserCtxt(ctxt);
12407
0
    return(NULL);
12408
0
}
12409
12410
/************************************************************************
12411
 *                  *
12412
 *    Front ends when parsing from a file     *
12413
 *                  *
12414
 ************************************************************************/
12415
12416
/**
12417
 * Create a parser context for a file or URL content.
12418
 * Automatic support for ZLIB/Compress compressed document is provided
12419
 * by default if found at compile-time and for file accesses
12420
 *
12421
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12422
 *
12423
 * @param filename  the filename or URL
12424
 * @param options  a combination of xmlParserOption
12425
 * @returns the new parser context or NULL
12426
 */
12427
xmlParserCtxt *
12428
xmlCreateURLParserCtxt(const char *filename, int options)
12429
0
{
12430
0
    xmlParserCtxtPtr ctxt;
12431
0
    xmlParserInputPtr input;
12432
12433
0
    ctxt = xmlNewParserCtxt();
12434
0
    if (ctxt == NULL)
12435
0
  return(NULL);
12436
12437
0
    options |= XML_PARSE_UNZIP;
12438
12439
0
    xmlCtxtUseOptions(ctxt, options);
12440
12441
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12442
0
    if (input == NULL) {
12443
0
  xmlFreeParserCtxt(ctxt);
12444
0
  return(NULL);
12445
0
    }
12446
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12447
0
        xmlFreeInputStream(input);
12448
0
        xmlFreeParserCtxt(ctxt);
12449
0
        return(NULL);
12450
0
    }
12451
12452
0
    return(ctxt);
12453
0
}
12454
12455
/**
12456
 * Create a parser context for a file content.
12457
 * Automatic support for ZLIB/Compress compressed document is provided
12458
 * by default if found at compile-time.
12459
 *
12460
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12461
 *
12462
 * @param filename  the filename
12463
 * @returns the new parser context or NULL
12464
 */
12465
xmlParserCtxt *
12466
xmlCreateFileParserCtxt(const char *filename)
12467
0
{
12468
0
    return(xmlCreateURLParserCtxt(filename, 0));
12469
0
}
12470
12471
#ifdef LIBXML_SAX1_ENABLED
12472
/**
12473
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12474
 * compressed document is provided by default if found at compile-time.
12475
 * It use the given SAX function block to handle the parsing callback.
12476
 * If sax is NULL, fallback to the default DOM tree building routines.
12477
 *
12478
 * This function uses deprecated global variables to set parser options
12479
 * which default to XML_PARSE_NODICT.
12480
 *
12481
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12482
 *
12483
 * User data (void *) is stored within the parser context in the
12484
 * context's _private member, so it is available nearly everywhere in libxml
12485
 *
12486
 * @param sax  the SAX handler block
12487
 * @param filename  the filename
12488
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12489
 *             documents
12490
 * @param data  the userdata
12491
 * @returns the resulting document tree
12492
 */
12493
12494
xmlDoc *
12495
xmlSAXParseFileWithData(xmlSAXHandler *sax, const char *filename,
12496
0
                        int recovery, void *data) {
12497
0
    xmlDocPtr ret = NULL;
12498
0
    xmlParserCtxtPtr ctxt;
12499
0
    xmlParserInputPtr input;
12500
12501
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12502
0
    if (ctxt == NULL)
12503
0
  return(NULL);
12504
12505
0
    if (data != NULL)
12506
0
  ctxt->_private = data;
12507
12508
0
    if (recovery) {
12509
0
        ctxt->options |= XML_PARSE_RECOVER;
12510
0
        ctxt->recovery = 1;
12511
0
    }
12512
12513
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12514
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12515
0
    else
12516
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12517
12518
0
    if (input != NULL)
12519
0
        ret = xmlCtxtParseDocument(ctxt, input);
12520
12521
0
    xmlFreeParserCtxt(ctxt);
12522
0
    return(ret);
12523
0
}
12524
12525
/**
12526
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12527
 * compressed document is provided by default if found at compile-time.
12528
 * It use the given SAX function block to handle the parsing callback.
12529
 * If sax is NULL, fallback to the default DOM tree building routines.
12530
 *
12531
 * This function uses deprecated global variables to set parser options
12532
 * which default to XML_PARSE_NODICT.
12533
 *
12534
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12535
 *
12536
 * @param sax  the SAX handler block
12537
 * @param filename  the filename
12538
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12539
 *             documents
12540
 * @returns the resulting document tree
12541
 */
12542
12543
xmlDoc *
12544
xmlSAXParseFile(xmlSAXHandler *sax, const char *filename,
12545
0
                          int recovery) {
12546
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12547
0
}
12548
12549
/**
12550
 * Parse an XML in-memory document and build a tree.
12551
 * In the case the document is not Well Formed, a attempt to build a
12552
 * tree is tried anyway
12553
 *
12554
 * This function uses deprecated global variables to set parser options
12555
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12556
 *
12557
 * @deprecated Use #xmlReadDoc with XML_PARSE_RECOVER.
12558
 *
12559
 * @param cur  a pointer to an array of xmlChar
12560
 * @returns the resulting document tree or NULL in case of failure
12561
 */
12562
12563
xmlDoc *
12564
0
xmlRecoverDoc(const xmlChar *cur) {
12565
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12566
0
}
12567
12568
/**
12569
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12570
 * compressed document is provided by default if found at compile-time.
12571
 *
12572
 * This function uses deprecated global variables to set parser options
12573
 * which default to XML_PARSE_NODICT.
12574
 *
12575
 * @deprecated Use #xmlReadFile.
12576
 *
12577
 * @param filename  the filename
12578
 * @returns the resulting document tree if the file was wellformed,
12579
 * NULL otherwise.
12580
 */
12581
12582
xmlDoc *
12583
0
xmlParseFile(const char *filename) {
12584
0
    return(xmlSAXParseFile(NULL, filename, 0));
12585
0
}
12586
12587
/**
12588
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12589
 * compressed document is provided by default if found at compile-time.
12590
 * In the case the document is not Well Formed, it attempts to build
12591
 * a tree anyway
12592
 *
12593
 * This function uses deprecated global variables to set parser options
12594
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12595
 *
12596
 * @deprecated Use #xmlReadFile with XML_PARSE_RECOVER.
12597
 *
12598
 * @param filename  the filename
12599
 * @returns the resulting document tree or NULL in case of failure
12600
 */
12601
12602
xmlDoc *
12603
0
xmlRecoverFile(const char *filename) {
12604
0
    return(xmlSAXParseFile(NULL, filename, 1));
12605
0
}
12606
12607
12608
/**
12609
 * Setup the parser context to parse a new buffer; Clears any prior
12610
 * contents from the parser context. The buffer parameter must not be
12611
 * NULL, but the filename parameter can be
12612
 *
12613
 * @deprecated Don't use.
12614
 *
12615
 * @param ctxt  an XML parser context
12616
 * @param buffer  a xmlChar * buffer
12617
 * @param filename  a file name
12618
 */
12619
void
12620
xmlSetupParserForBuffer(xmlParserCtxt *ctxt, const xmlChar* buffer,
12621
                             const char* filename)
12622
0
{
12623
0
    xmlParserInputPtr input;
12624
12625
0
    if ((ctxt == NULL) || (buffer == NULL))
12626
0
        return;
12627
12628
0
    xmlCtxtReset(ctxt);
12629
12630
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12631
0
                                      NULL, 0);
12632
0
    if (input == NULL)
12633
0
        return;
12634
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12635
0
        xmlFreeInputStream(input);
12636
0
}
12637
12638
/**
12639
 * Parse an XML file and call the given SAX handler routines.
12640
 * Automatic support for ZLIB/Compress compressed document is provided
12641
 *
12642
 * This function uses deprecated global variables to set parser options
12643
 * which default to XML_PARSE_NODICT.
12644
 *
12645
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12646
 *
12647
 * @param sax  a SAX handler
12648
 * @param user_data  The user data returned on SAX callbacks
12649
 * @param filename  a file name
12650
 * @returns 0 in case of success or a error number otherwise
12651
 */
12652
int
12653
xmlSAXUserParseFile(xmlSAXHandler *sax, void *user_data,
12654
0
                    const char *filename) {
12655
0
    int ret = 0;
12656
0
    xmlParserCtxtPtr ctxt;
12657
12658
0
    ctxt = xmlCreateFileParserCtxt(filename);
12659
0
    if (ctxt == NULL) return -1;
12660
0
    if (sax != NULL) {
12661
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12662
0
            *ctxt->sax = *sax;
12663
0
        } else {
12664
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12665
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12666
0
        }
12667
0
  ctxt->userData = user_data;
12668
0
    }
12669
12670
0
    xmlParseDocument(ctxt);
12671
12672
0
    if (ctxt->wellFormed)
12673
0
  ret = 0;
12674
0
    else {
12675
0
        if (ctxt->errNo != 0)
12676
0
      ret = ctxt->errNo;
12677
0
  else
12678
0
      ret = -1;
12679
0
    }
12680
0
    if (ctxt->myDoc != NULL) {
12681
0
        xmlFreeDoc(ctxt->myDoc);
12682
0
  ctxt->myDoc = NULL;
12683
0
    }
12684
0
    xmlFreeParserCtxt(ctxt);
12685
12686
0
    return ret;
12687
0
}
12688
#endif /* LIBXML_SAX1_ENABLED */
12689
12690
/************************************************************************
12691
 *                  *
12692
 *    Front ends when parsing from memory     *
12693
 *                  *
12694
 ************************************************************************/
12695
12696
/**
12697
 * Create a parser context for an XML in-memory document. The input buffer
12698
 * must not contain a terminating null byte.
12699
 *
12700
 * @param buffer  a pointer to a char array
12701
 * @param size  the size of the array
12702
 * @returns the new parser context or NULL
12703
 */
12704
xmlParserCtxt *
12705
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12706
0
    xmlParserCtxtPtr ctxt;
12707
0
    xmlParserInputPtr input;
12708
12709
0
    if (size < 0)
12710
0
  return(NULL);
12711
12712
0
    ctxt = xmlNewParserCtxt();
12713
0
    if (ctxt == NULL)
12714
0
  return(NULL);
12715
12716
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12717
0
    if (input == NULL) {
12718
0
  xmlFreeParserCtxt(ctxt);
12719
0
  return(NULL);
12720
0
    }
12721
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12722
0
        xmlFreeInputStream(input);
12723
0
        xmlFreeParserCtxt(ctxt);
12724
0
        return(NULL);
12725
0
    }
12726
12727
0
    return(ctxt);
12728
0
}
12729
12730
#ifdef LIBXML_SAX1_ENABLED
12731
/**
12732
 * Parse an XML in-memory block and use the given SAX function block
12733
 * to handle the parsing callback. If sax is NULL, fallback to the default
12734
 * DOM tree building routines.
12735
 *
12736
 * This function uses deprecated global variables to set parser options
12737
 * which default to XML_PARSE_NODICT.
12738
 *
12739
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12740
 *
12741
 * User data (void *) is stored within the parser context in the
12742
 * context's _private member, so it is available nearly everywhere in libxml
12743
 *
12744
 * @param sax  the SAX handler block
12745
 * @param buffer  an pointer to a char array
12746
 * @param size  the size of the array
12747
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12748
 *             documents
12749
 * @param data  the userdata
12750
 * @returns the resulting document tree
12751
 */
12752
12753
xmlDoc *
12754
xmlSAXParseMemoryWithData(xmlSAXHandler *sax, const char *buffer,
12755
0
                          int size, int recovery, void *data) {
12756
0
    xmlDocPtr ret = NULL;
12757
0
    xmlParserCtxtPtr ctxt;
12758
0
    xmlParserInputPtr input;
12759
12760
0
    if (size < 0)
12761
0
        return(NULL);
12762
12763
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12764
0
    if (ctxt == NULL)
12765
0
        return(NULL);
12766
12767
0
    if (data != NULL)
12768
0
  ctxt->_private=data;
12769
12770
0
    if (recovery) {
12771
0
        ctxt->options |= XML_PARSE_RECOVER;
12772
0
        ctxt->recovery = 1;
12773
0
    }
12774
12775
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12776
0
                                      XML_INPUT_BUF_STATIC);
12777
12778
0
    if (input != NULL)
12779
0
        ret = xmlCtxtParseDocument(ctxt, input);
12780
12781
0
    xmlFreeParserCtxt(ctxt);
12782
0
    return(ret);
12783
0
}
12784
12785
/**
12786
 * Parse an XML in-memory block and use the given SAX function block
12787
 * to handle the parsing callback. If sax is NULL, fallback to the default
12788
 * DOM tree building routines.
12789
 *
12790
 * This function uses deprecated global variables to set parser options
12791
 * which default to XML_PARSE_NODICT.
12792
 *
12793
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12794
 *
12795
 * @param sax  the SAX handler block
12796
 * @param buffer  an pointer to a char array
12797
 * @param size  the size of the array
12798
 * @param recovery  work in recovery mode, i.e. tries to read not Well Formed
12799
 *             documents
12800
 * @returns the resulting document tree
12801
 */
12802
xmlDoc *
12803
xmlSAXParseMemory(xmlSAXHandler *sax, const char *buffer,
12804
0
            int size, int recovery) {
12805
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12806
0
}
12807
12808
/**
12809
 * Parse an XML in-memory block and build a tree.
12810
 *
12811
 * This function uses deprecated global variables to set parser options
12812
 * which default to XML_PARSE_NODICT.
12813
 *
12814
 * @deprecated Use #xmlReadMemory.
12815
 *
12816
 * @param buffer  an pointer to a char array
12817
 * @param size  the size of the array
12818
 * @returns the resulting document tree
12819
 */
12820
12821
0
xmlDoc *xmlParseMemory(const char *buffer, int size) {
12822
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
12823
0
}
12824
12825
/**
12826
 * Parse an XML in-memory block and build a tree.
12827
 * In the case the document is not Well Formed, an attempt to
12828
 * build a tree is tried anyway
12829
 *
12830
 * This function uses deprecated global variables to set parser options
12831
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12832
 *
12833
 * @deprecated Use #xmlReadMemory with XML_PARSE_RECOVER.
12834
 *
12835
 * @param buffer  an pointer to a char array
12836
 * @param size  the size of the array
12837
 * @returns the resulting document tree or NULL in case of error
12838
 */
12839
12840
0
xmlDoc *xmlRecoverMemory(const char *buffer, int size) {
12841
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
12842
0
}
12843
12844
/**
12845
 * Parse an XML in-memory buffer and call the given SAX handler routines.
12846
 *
12847
 * This function uses deprecated global variables to set parser options
12848
 * which default to XML_PARSE_NODICT.
12849
 *
12850
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12851
 *
12852
 * @param sax  a SAX handler
12853
 * @param user_data  The user data returned on SAX callbacks
12854
 * @param buffer  an in-memory XML document input
12855
 * @param size  the length of the XML document in bytes
12856
 * @returns 0 in case of success or a error number otherwise
12857
 */
12858
int xmlSAXUserParseMemory(xmlSAXHandler *sax, void *user_data,
12859
0
        const char *buffer, int size) {
12860
0
    int ret = 0;
12861
0
    xmlParserCtxtPtr ctxt;
12862
12863
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12864
0
    if (ctxt == NULL) return -1;
12865
0
    if (sax != NULL) {
12866
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12867
0
            *ctxt->sax = *sax;
12868
0
        } else {
12869
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12870
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12871
0
        }
12872
0
  ctxt->userData = user_data;
12873
0
    }
12874
12875
0
    xmlParseDocument(ctxt);
12876
12877
0
    if (ctxt->wellFormed)
12878
0
  ret = 0;
12879
0
    else {
12880
0
        if (ctxt->errNo != 0)
12881
0
      ret = ctxt->errNo;
12882
0
  else
12883
0
      ret = -1;
12884
0
    }
12885
0
    if (ctxt->myDoc != NULL) {
12886
0
        xmlFreeDoc(ctxt->myDoc);
12887
0
  ctxt->myDoc = NULL;
12888
0
    }
12889
0
    xmlFreeParserCtxt(ctxt);
12890
12891
0
    return ret;
12892
0
}
12893
#endif /* LIBXML_SAX1_ENABLED */
12894
12895
/**
12896
 * Creates a parser context for an XML in-memory document.
12897
 *
12898
 * @param str  a pointer to an array of xmlChar
12899
 * @returns the new parser context or NULL
12900
 */
12901
xmlParserCtxt *
12902
0
xmlCreateDocParserCtxt(const xmlChar *str) {
12903
0
    xmlParserCtxtPtr ctxt;
12904
0
    xmlParserInputPtr input;
12905
12906
0
    ctxt = xmlNewParserCtxt();
12907
0
    if (ctxt == NULL)
12908
0
  return(NULL);
12909
12910
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
12911
0
    if (input == NULL) {
12912
0
  xmlFreeParserCtxt(ctxt);
12913
0
  return(NULL);
12914
0
    }
12915
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12916
0
        xmlFreeInputStream(input);
12917
0
        xmlFreeParserCtxt(ctxt);
12918
0
        return(NULL);
12919
0
    }
12920
12921
0
    return(ctxt);
12922
0
}
12923
12924
#ifdef LIBXML_SAX1_ENABLED
12925
/**
12926
 * Parse an XML in-memory document and build a tree.
12927
 * It use the given SAX function block to handle the parsing callback.
12928
 * If sax is NULL, fallback to the default DOM tree building routines.
12929
 *
12930
 * This function uses deprecated global variables to set parser options
12931
 * which default to XML_PARSE_NODICT.
12932
 *
12933
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadDoc.
12934
 *
12935
 * @param sax  the SAX handler block
12936
 * @param cur  a pointer to an array of xmlChar
12937
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12938
 *             documents
12939
 * @returns the resulting document tree
12940
 */
12941
12942
xmlDoc *
12943
0
xmlSAXParseDoc(xmlSAXHandler *sax, const xmlChar *cur, int recovery) {
12944
0
    xmlDocPtr ret;
12945
0
    xmlParserCtxtPtr ctxt;
12946
0
    xmlSAXHandlerPtr oldsax = NULL;
12947
12948
0
    if (cur == NULL) return(NULL);
12949
12950
12951
0
    ctxt = xmlCreateDocParserCtxt(cur);
12952
0
    if (ctxt == NULL) return(NULL);
12953
0
    if (sax != NULL) {
12954
0
        oldsax = ctxt->sax;
12955
0
        ctxt->sax = sax;
12956
0
        ctxt->userData = NULL;
12957
0
    }
12958
12959
0
    xmlParseDocument(ctxt);
12960
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12961
0
    else {
12962
0
       ret = NULL;
12963
0
       xmlFreeDoc(ctxt->myDoc);
12964
0
       ctxt->myDoc = NULL;
12965
0
    }
12966
0
    if (sax != NULL)
12967
0
  ctxt->sax = oldsax;
12968
0
    xmlFreeParserCtxt(ctxt);
12969
12970
0
    return(ret);
12971
0
}
12972
12973
/**
12974
 * Parse an XML in-memory document and build a tree.
12975
 *
12976
 * This function uses deprecated global variables to set parser options
12977
 * which default to XML_PARSE_NODICT.
12978
 *
12979
 * @deprecated Use #xmlReadDoc.
12980
 *
12981
 * @param cur  a pointer to an array of xmlChar
12982
 * @returns the resulting document tree
12983
 */
12984
12985
xmlDoc *
12986
0
xmlParseDoc(const xmlChar *cur) {
12987
0
    return(xmlSAXParseDoc(NULL, cur, 0));
12988
0
}
12989
#endif /* LIBXML_SAX1_ENABLED */
12990
12991
/************************************************************************
12992
 *                  *
12993
 *  New set (2.6.0) of simpler and more flexible APIs   *
12994
 *                  *
12995
 ************************************************************************/
12996
12997
/**
12998
 * Reset a parser context
12999
 *
13000
 * @param ctxt  an XML parser context
13001
 */
13002
void
13003
xmlCtxtReset(xmlParserCtxt *ctxt)
13004
24.8k
{
13005
24.8k
    xmlParserInputPtr input;
13006
13007
24.8k
    if (ctxt == NULL)
13008
0
        return;
13009
13010
24.8k
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
13011
0
        xmlFreeInputStream(input);
13012
0
    }
13013
24.8k
    ctxt->inputNr = 0;
13014
24.8k
    ctxt->input = NULL;
13015
13016
24.8k
    ctxt->spaceNr = 0;
13017
24.8k
    if (ctxt->spaceTab != NULL) {
13018
24.8k
  ctxt->spaceTab[0] = -1;
13019
24.8k
  ctxt->space = &ctxt->spaceTab[0];
13020
24.8k
    } else {
13021
0
        ctxt->space = NULL;
13022
0
    }
13023
13024
13025
24.8k
    ctxt->nodeNr = 0;
13026
24.8k
    ctxt->node = NULL;
13027
13028
24.8k
    ctxt->nameNr = 0;
13029
24.8k
    ctxt->name = NULL;
13030
13031
24.8k
    ctxt->nsNr = 0;
13032
24.8k
    xmlParserNsReset(ctxt->nsdb);
13033
13034
24.8k
    if (ctxt->version != NULL) {
13035
0
        xmlFree(ctxt->version);
13036
0
        ctxt->version = NULL;
13037
0
    }
13038
24.8k
    if (ctxt->encoding != NULL) {
13039
0
        xmlFree(ctxt->encoding);
13040
0
        ctxt->encoding = NULL;
13041
0
    }
13042
24.8k
    if (ctxt->extSubURI != NULL) {
13043
0
        xmlFree(ctxt->extSubURI);
13044
0
        ctxt->extSubURI = NULL;
13045
0
    }
13046
24.8k
    if (ctxt->extSubSystem != NULL) {
13047
0
        xmlFree(ctxt->extSubSystem);
13048
0
        ctxt->extSubSystem = NULL;
13049
0
    }
13050
24.8k
    if (ctxt->directory != NULL) {
13051
0
        xmlFree(ctxt->directory);
13052
0
        ctxt->directory = NULL;
13053
0
    }
13054
13055
24.8k
    if (ctxt->myDoc != NULL)
13056
0
        xmlFreeDoc(ctxt->myDoc);
13057
24.8k
    ctxt->myDoc = NULL;
13058
13059
24.8k
    ctxt->standalone = -1;
13060
24.8k
    ctxt->hasExternalSubset = 0;
13061
24.8k
    ctxt->hasPErefs = 0;
13062
24.8k
    ctxt->html = ctxt->html ? 1 : 0;
13063
24.8k
    ctxt->instate = XML_PARSER_START;
13064
13065
24.8k
    ctxt->wellFormed = 1;
13066
24.8k
    ctxt->nsWellFormed = 1;
13067
24.8k
    ctxt->disableSAX = 0;
13068
24.8k
    ctxt->valid = 1;
13069
24.8k
    ctxt->record_info = 0;
13070
24.8k
    ctxt->checkIndex = 0;
13071
24.8k
    ctxt->endCheckState = 0;
13072
24.8k
    ctxt->inSubset = 0;
13073
24.8k
    ctxt->errNo = XML_ERR_OK;
13074
24.8k
    ctxt->depth = 0;
13075
24.8k
    ctxt->catalogs = NULL;
13076
24.8k
    ctxt->sizeentities = 0;
13077
24.8k
    ctxt->sizeentcopy = 0;
13078
24.8k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13079
13080
24.8k
    if (ctxt->attsDefault != NULL) {
13081
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13082
0
        ctxt->attsDefault = NULL;
13083
0
    }
13084
24.8k
    if (ctxt->attsSpecial != NULL) {
13085
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13086
0
        ctxt->attsSpecial = NULL;
13087
0
    }
13088
13089
24.8k
#ifdef LIBXML_CATALOG_ENABLED
13090
24.8k
    if (ctxt->catalogs != NULL)
13091
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13092
24.8k
#endif
13093
24.8k
    ctxt->nbErrors = 0;
13094
24.8k
    ctxt->nbWarnings = 0;
13095
24.8k
    if (ctxt->lastError.code != XML_ERR_OK)
13096
0
        xmlResetError(&ctxt->lastError);
13097
24.8k
}
13098
13099
/**
13100
 * Reset a push parser context
13101
 *
13102
 * @param ctxt  an XML parser context
13103
 * @param chunk  a pointer to an array of chars
13104
 * @param size  number of chars in the array
13105
 * @param filename  an optional file name or URI
13106
 * @param encoding  the document encoding, or NULL
13107
 * @returns 0 in case of success and 1 in case of error
13108
 */
13109
int
13110
xmlCtxtResetPush(xmlParserCtxt *ctxt, const char *chunk,
13111
                 int size, const char *filename, const char *encoding)
13112
0
{
13113
0
    xmlParserInputPtr input;
13114
13115
0
    if (ctxt == NULL)
13116
0
        return(1);
13117
13118
0
    xmlCtxtReset(ctxt);
13119
13120
0
    input = xmlNewPushInput(filename, chunk, size);
13121
0
    if (input == NULL)
13122
0
        return(1);
13123
13124
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13125
0
        xmlFreeInputStream(input);
13126
0
        return(1);
13127
0
    }
13128
13129
0
    if (encoding != NULL)
13130
0
        xmlSwitchEncodingName(ctxt, encoding);
13131
13132
0
    return(0);
13133
0
}
13134
13135
static int
13136
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13137
49.5k
{
13138
49.5k
    int allMask;
13139
13140
49.5k
    if (ctxt == NULL)
13141
0
        return(-1);
13142
13143
    /*
13144
     * XInclude options aren't handled by the parser.
13145
     *
13146
     * XML_PARSE_XINCLUDE
13147
     * XML_PARSE_NOXINCNODE
13148
     * XML_PARSE_NOBASEFIX
13149
     */
13150
49.5k
    allMask = XML_PARSE_RECOVER |
13151
49.5k
              XML_PARSE_NOENT |
13152
49.5k
              XML_PARSE_DTDLOAD |
13153
49.5k
              XML_PARSE_DTDATTR |
13154
49.5k
              XML_PARSE_DTDVALID |
13155
49.5k
              XML_PARSE_NOERROR |
13156
49.5k
              XML_PARSE_NOWARNING |
13157
49.5k
              XML_PARSE_PEDANTIC |
13158
49.5k
              XML_PARSE_NOBLANKS |
13159
49.5k
#ifdef LIBXML_SAX1_ENABLED
13160
49.5k
              XML_PARSE_SAX1 |
13161
49.5k
#endif
13162
49.5k
              XML_PARSE_NONET |
13163
49.5k
              XML_PARSE_NODICT |
13164
49.5k
              XML_PARSE_NSCLEAN |
13165
49.5k
              XML_PARSE_NOCDATA |
13166
49.5k
              XML_PARSE_COMPACT |
13167
49.5k
              XML_PARSE_OLD10 |
13168
49.5k
              XML_PARSE_HUGE |
13169
49.5k
              XML_PARSE_OLDSAX |
13170
49.5k
              XML_PARSE_IGNORE_ENC |
13171
49.5k
              XML_PARSE_BIG_LINES |
13172
49.5k
              XML_PARSE_NO_XXE |
13173
49.5k
              XML_PARSE_UNZIP |
13174
49.5k
              XML_PARSE_NO_SYS_CATALOG |
13175
49.5k
              XML_PARSE_CATALOG_PI;
13176
13177
49.5k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13178
13179
    /*
13180
     * For some options, struct members are historically the source
13181
     * of truth. The values are initalized from global variables and
13182
     * old code could also modify them directly. Several older API
13183
     * functions that don't take an options argument rely on these
13184
     * deprecated mechanisms.
13185
     *
13186
     * Once public access to struct members and the globals are
13187
     * disabled, we can use the options bitmask as source of
13188
     * truth, making all these struct members obsolete.
13189
     *
13190
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13191
     * loading of the external subset.
13192
     */
13193
49.5k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13194
49.5k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13195
49.5k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13196
49.5k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13197
49.5k
    ctxt->loadsubset |= (options & XML_PARSE_SKIP_IDS) ? XML_SKIP_IDS : 0;
13198
49.5k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13199
49.5k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13200
49.5k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13201
49.5k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13202
13203
49.5k
    return(options & ~allMask);
13204
49.5k
}
13205
13206
/**
13207
 * Applies the options to the parser context. Unset options are
13208
 * cleared.
13209
 *
13210
 * @since 2.13.0
13211
 *
13212
 * With older versions, you can use #xmlCtxtUseOptions.
13213
 *
13214
 * @param ctxt  an XML parser context
13215
 * @param options  a bitmask of xmlParserOption values
13216
 * @returns 0 in case of success, the set of unknown or unimplemented options
13217
 *         in case of error.
13218
 */
13219
int
13220
xmlCtxtSetOptions(xmlParserCtxt *ctxt, int options)
13221
0
{
13222
0
#ifdef LIBXML_HTML_ENABLED
13223
0
    if ((ctxt != NULL) && (ctxt->html))
13224
0
        return(htmlCtxtSetOptions(ctxt, options));
13225
0
#endif
13226
13227
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13228
0
}
13229
13230
/**
13231
 * Get the current options of the parser context.
13232
 *
13233
 * @since 2.14.0
13234
 *
13235
 * @param ctxt  an XML parser context
13236
 * @returns the current options set in the parser context, or -1 if ctxt is NULL.
13237
 */
13238
int
13239
xmlCtxtGetOptions(xmlParserCtxt *ctxt)
13240
0
{
13241
0
    if (ctxt == NULL)
13242
0
        return(-1);
13243
13244
0
    return(ctxt->options);
13245
0
}
13246
13247
/**
13248
 * Applies the options to the parser context. The following options
13249
 * are never cleared and can only be enabled:
13250
 *
13251
 * - XML_PARSE_NOERROR
13252
 * - XML_PARSE_NOWARNING
13253
 * - XML_PARSE_NONET
13254
 * - XML_PARSE_NSCLEAN
13255
 * - XML_PARSE_NOCDATA
13256
 * - XML_PARSE_COMPACT
13257
 * - XML_PARSE_OLD10
13258
 * - XML_PARSE_HUGE
13259
 * - XML_PARSE_OLDSAX
13260
 * - XML_PARSE_IGNORE_ENC
13261
 * - XML_PARSE_BIG_LINES
13262
 *
13263
 * @deprecated Use #xmlCtxtSetOptions.
13264
 *
13265
 * @param ctxt  an XML parser context
13266
 * @param options  a combination of xmlParserOption
13267
 * @returns 0 in case of success, the set of unknown or unimplemented options
13268
 *         in case of error.
13269
 */
13270
int
13271
xmlCtxtUseOptions(xmlParserCtxt *ctxt, int options)
13272
49.5k
{
13273
49.5k
    int keepMask;
13274
13275
49.5k
#ifdef LIBXML_HTML_ENABLED
13276
49.5k
    if ((ctxt != NULL) && (ctxt->html))
13277
0
        return(htmlCtxtUseOptions(ctxt, options));
13278
49.5k
#endif
13279
13280
    /*
13281
     * For historic reasons, some options can only be enabled.
13282
     */
13283
49.5k
    keepMask = XML_PARSE_NOERROR |
13284
49.5k
               XML_PARSE_NOWARNING |
13285
49.5k
               XML_PARSE_NONET |
13286
49.5k
               XML_PARSE_NSCLEAN |
13287
49.5k
               XML_PARSE_NOCDATA |
13288
49.5k
               XML_PARSE_COMPACT |
13289
49.5k
               XML_PARSE_OLD10 |
13290
49.5k
               XML_PARSE_HUGE |
13291
49.5k
               XML_PARSE_OLDSAX |
13292
49.5k
               XML_PARSE_IGNORE_ENC |
13293
49.5k
               XML_PARSE_BIG_LINES;
13294
13295
49.5k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13296
49.5k
}
13297
13298
/**
13299
 * To protect against exponential entity expansion ("billion laughs"), the
13300
 * size of serialized output is (roughly) limited to the input size
13301
 * multiplied by this factor. The default value is 5.
13302
 *
13303
 * When working with documents making heavy use of entity expansion, it can
13304
 * be necessary to increase the value. For security reasons, this should only
13305
 * be considered when processing trusted input.
13306
 *
13307
 * @param ctxt  an XML parser context
13308
 * @param maxAmpl  maximum amplification factor
13309
 */
13310
void
13311
xmlCtxtSetMaxAmplification(xmlParserCtxt *ctxt, unsigned maxAmpl)
13312
0
{
13313
0
    if (ctxt == NULL)
13314
0
        return;
13315
0
    ctxt->maxAmpl = maxAmpl;
13316
0
}
13317
13318
/**
13319
 * Parse an XML document and return the resulting document tree.
13320
 * Takes ownership of the input object.
13321
 *
13322
 * @since 2.13.0
13323
 *
13324
 * @param ctxt  an XML parser context
13325
 * @param input  parser input
13326
 * @returns the resulting document tree or NULL
13327
 */
13328
xmlDoc *
13329
xmlCtxtParseDocument(xmlParserCtxt *ctxt, xmlParserInput *input)
13330
24.7k
{
13331
24.7k
    xmlDocPtr ret = NULL;
13332
13333
24.7k
    if ((ctxt == NULL) || (input == NULL)) {
13334
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13335
0
        xmlFreeInputStream(input);
13336
0
        return(NULL);
13337
0
    }
13338
13339
    /* assert(ctxt->inputNr == 0); */
13340
24.7k
    while (ctxt->inputNr > 0)
13341
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13342
13343
24.7k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13344
8
        xmlFreeInputStream(input);
13345
8
        return(NULL);
13346
8
    }
13347
13348
24.7k
    xmlParseDocument(ctxt);
13349
13350
24.7k
    ret = xmlCtxtGetDocument(ctxt);
13351
13352
    /* assert(ctxt->inputNr == 1); */
13353
50.2k
    while (ctxt->inputNr > 0)
13354
25.4k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13355
13356
24.7k
    return(ret);
13357
24.7k
}
13358
13359
/**
13360
 * Convenience function to parse an XML document from a
13361
 * zero-terminated string.
13362
 *
13363
 * See #xmlCtxtReadDoc for details.
13364
 *
13365
 * @param cur  a pointer to a zero terminated string
13366
 * @param URL  base URL (optional)
13367
 * @param encoding  the document encoding (optional)
13368
 * @param options  a combination of xmlParserOption
13369
 * @returns the resulting document tree
13370
 */
13371
xmlDoc *
13372
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13373
           int options)
13374
0
{
13375
0
    xmlParserCtxtPtr ctxt;
13376
0
    xmlParserInputPtr input;
13377
0
    xmlDocPtr doc = NULL;
13378
13379
0
    ctxt = xmlNewParserCtxt();
13380
0
    if (ctxt == NULL)
13381
0
        return(NULL);
13382
13383
0
    xmlCtxtUseOptions(ctxt, options);
13384
13385
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13386
0
                                      XML_INPUT_BUF_STATIC);
13387
13388
0
    if (input != NULL)
13389
0
        doc = xmlCtxtParseDocument(ctxt, input);
13390
13391
0
    xmlFreeParserCtxt(ctxt);
13392
0
    return(doc);
13393
0
}
13394
13395
/**
13396
 * Convenience function to parse an XML file from the filesystem
13397
 * or a global, user-defined resource loader.
13398
 *
13399
 * This function always enables the XML_PARSE_UNZIP option for
13400
 * backward compatibility. If a "-" filename is passed, it will
13401
 * read from stdin. Both of these features are potentially
13402
 * insecure and might be removed from later versions.
13403
 *
13404
 * See #xmlCtxtReadFile for details.
13405
 *
13406
 * @param filename  a file or URL
13407
 * @param encoding  the document encoding (optional)
13408
 * @param options  a combination of xmlParserOption
13409
 * @returns the resulting document tree
13410
 */
13411
xmlDoc *
13412
xmlReadFile(const char *filename, const char *encoding, int options)
13413
0
{
13414
0
    xmlParserCtxtPtr ctxt;
13415
0
    xmlParserInputPtr input;
13416
0
    xmlDocPtr doc = NULL;
13417
13418
0
    ctxt = xmlNewParserCtxt();
13419
0
    if (ctxt == NULL)
13420
0
        return(NULL);
13421
13422
0
    options |= XML_PARSE_UNZIP;
13423
13424
0
    xmlCtxtUseOptions(ctxt, options);
13425
13426
    /*
13427
     * Backward compatibility for users of command line utilities like
13428
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13429
     * should be removed at some point.
13430
     */
13431
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13432
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13433
0
                                      encoding, 0);
13434
0
    else
13435
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13436
13437
0
    if (input != NULL)
13438
0
        doc = xmlCtxtParseDocument(ctxt, input);
13439
13440
0
    xmlFreeParserCtxt(ctxt);
13441
0
    return(doc);
13442
0
}
13443
13444
/**
13445
 * Parse an XML in-memory document and build a tree. The input buffer must
13446
 * not contain a terminating null byte.
13447
 *
13448
 * See #xmlCtxtReadMemory for details.
13449
 *
13450
 * @param buffer  a pointer to a char array
13451
 * @param size  the size of the array
13452
 * @param url  base URL (optional)
13453
 * @param encoding  the document encoding (optional)
13454
 * @param options  a combination of xmlParserOption
13455
 * @returns the resulting document tree
13456
 */
13457
xmlDoc *
13458
xmlReadMemory(const char *buffer, int size, const char *url,
13459
              const char *encoding, int options)
13460
0
{
13461
0
    xmlParserCtxtPtr ctxt;
13462
0
    xmlParserInputPtr input;
13463
0
    xmlDocPtr doc = NULL;
13464
13465
0
    if (size < 0)
13466
0
  return(NULL);
13467
13468
0
    ctxt = xmlNewParserCtxt();
13469
0
    if (ctxt == NULL)
13470
0
        return(NULL);
13471
13472
0
    xmlCtxtUseOptions(ctxt, options);
13473
13474
0
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13475
0
                                      XML_INPUT_BUF_STATIC);
13476
13477
0
    if (input != NULL)
13478
0
        doc = xmlCtxtParseDocument(ctxt, input);
13479
13480
0
    xmlFreeParserCtxt(ctxt);
13481
0
    return(doc);
13482
0
}
13483
13484
/**
13485
 * Parse an XML from a file descriptor and build a tree.
13486
 *
13487
 * See #xmlCtxtReadFd for details.
13488
 *
13489
 * NOTE that the file descriptor will not be closed when the
13490
 * context is freed or reset.
13491
 *
13492
 * @param fd  an open file descriptor
13493
 * @param URL  base URL (optional)
13494
 * @param encoding  the document encoding (optional)
13495
 * @param options  a combination of xmlParserOption
13496
 * @returns the resulting document tree
13497
 */
13498
xmlDoc *
13499
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13500
0
{
13501
0
    xmlParserCtxtPtr ctxt;
13502
0
    xmlParserInputPtr input;
13503
0
    xmlDocPtr doc = NULL;
13504
13505
0
    ctxt = xmlNewParserCtxt();
13506
0
    if (ctxt == NULL)
13507
0
        return(NULL);
13508
13509
0
    xmlCtxtUseOptions(ctxt, options);
13510
13511
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13512
13513
0
    if (input != NULL)
13514
0
        doc = xmlCtxtParseDocument(ctxt, input);
13515
13516
0
    xmlFreeParserCtxt(ctxt);
13517
0
    return(doc);
13518
0
}
13519
13520
/**
13521
 * Parse an XML document from I/O functions and context and build a tree.
13522
 *
13523
 * See #xmlCtxtReadIO for details.
13524
 *
13525
 * @param ioread  an I/O read function
13526
 * @param ioclose  an I/O close function (optional)
13527
 * @param ioctx  an I/O handler
13528
 * @param URL  base URL (optional)
13529
 * @param encoding  the document encoding (optional)
13530
 * @param options  a combination of xmlParserOption
13531
 * @returns the resulting document tree
13532
 */
13533
xmlDoc *
13534
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13535
          void *ioctx, const char *URL, const char *encoding, int options)
13536
0
{
13537
0
    xmlParserCtxtPtr ctxt;
13538
0
    xmlParserInputPtr input;
13539
0
    xmlDocPtr doc = NULL;
13540
13541
0
    ctxt = xmlNewParserCtxt();
13542
0
    if (ctxt == NULL)
13543
0
        return(NULL);
13544
13545
0
    xmlCtxtUseOptions(ctxt, options);
13546
13547
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13548
0
                                  encoding, 0);
13549
13550
0
    if (input != NULL)
13551
0
        doc = xmlCtxtParseDocument(ctxt, input);
13552
13553
0
    xmlFreeParserCtxt(ctxt);
13554
0
    return(doc);
13555
0
}
13556
13557
/**
13558
 * Parse an XML in-memory document and build a tree.
13559
 *
13560
 * `URL` is used as base to resolve external entities and for error
13561
 * reporting.
13562
 *
13563
 * @param ctxt  an XML parser context
13564
 * @param str  a pointer to a zero terminated string
13565
 * @param URL  base URL (optional)
13566
 * @param encoding  the document encoding (optional)
13567
 * @param options  a combination of xmlParserOption
13568
 * @returns the resulting document tree
13569
 */
13570
xmlDoc *
13571
xmlCtxtReadDoc(xmlParserCtxt *ctxt, const xmlChar *str,
13572
               const char *URL, const char *encoding, int options)
13573
0
{
13574
0
    xmlParserInputPtr input;
13575
13576
0
    if (ctxt == NULL)
13577
0
        return(NULL);
13578
13579
0
    xmlCtxtReset(ctxt);
13580
0
    xmlCtxtUseOptions(ctxt, options);
13581
13582
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13583
0
                                      XML_INPUT_BUF_STATIC);
13584
0
    if (input == NULL)
13585
0
        return(NULL);
13586
13587
0
    return(xmlCtxtParseDocument(ctxt, input));
13588
0
}
13589
13590
/**
13591
 * Parse an XML file from the filesystem or a global, user-defined
13592
 * resource loader.
13593
 *
13594
 * This function always enables the XML_PARSE_UNZIP option for
13595
 * backward compatibility. This feature is potentially insecure
13596
 * and might be removed from later versions.
13597
 *
13598
 * @param ctxt  an XML parser context
13599
 * @param filename  a file or URL
13600
 * @param encoding  the document encoding (optional)
13601
 * @param options  a combination of xmlParserOption
13602
 * @returns the resulting document tree
13603
 */
13604
xmlDoc *
13605
xmlCtxtReadFile(xmlParserCtxt *ctxt, const char *filename,
13606
                const char *encoding, int options)
13607
0
{
13608
0
    xmlParserInputPtr input;
13609
13610
0
    if (ctxt == NULL)
13611
0
        return(NULL);
13612
13613
0
    options |= XML_PARSE_UNZIP;
13614
13615
0
    xmlCtxtReset(ctxt);
13616
0
    xmlCtxtUseOptions(ctxt, options);
13617
13618
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13619
0
    if (input == NULL)
13620
0
        return(NULL);
13621
13622
0
    return(xmlCtxtParseDocument(ctxt, input));
13623
0
}
13624
13625
/**
13626
 * Parse an XML in-memory document and build a tree. The input buffer must
13627
 * not contain a terminating null byte.
13628
 *
13629
 * `URL` is used as base to resolve external entities and for error
13630
 * reporting.
13631
 *
13632
 * @param ctxt  an XML parser context
13633
 * @param buffer  a pointer to a char array
13634
 * @param size  the size of the array
13635
 * @param URL  base URL (optional)
13636
 * @param encoding  the document encoding (optional)
13637
 * @param options  a combination of xmlParserOption
13638
 * @returns the resulting document tree
13639
 */
13640
xmlDoc *
13641
xmlCtxtReadMemory(xmlParserCtxt *ctxt, const char *buffer, int size,
13642
                  const char *URL, const char *encoding, int options)
13643
24.8k
{
13644
24.8k
    xmlParserInputPtr input;
13645
13646
24.8k
    if ((ctxt == NULL) || (size < 0))
13647
0
        return(NULL);
13648
13649
24.8k
    xmlCtxtReset(ctxt);
13650
24.8k
    xmlCtxtUseOptions(ctxt, options);
13651
13652
24.8k
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
13653
24.8k
                                      XML_INPUT_BUF_STATIC);
13654
24.8k
    if (input == NULL)
13655
22
        return(NULL);
13656
13657
24.7k
    return(xmlCtxtParseDocument(ctxt, input));
13658
24.8k
}
13659
13660
/**
13661
 * Parse an XML document from a file descriptor and build a tree.
13662
 *
13663
 * NOTE that the file descriptor will not be closed when the
13664
 * context is freed or reset.
13665
 *
13666
 * `URL` is used as base to resolve external entities and for error
13667
 * reporting.
13668
 *
13669
 * @param ctxt  an XML parser context
13670
 * @param fd  an open file descriptor
13671
 * @param URL  base URL (optional)
13672
 * @param encoding  the document encoding (optional)
13673
 * @param options  a combination of xmlParserOption
13674
 * @returns the resulting document tree
13675
 */
13676
xmlDoc *
13677
xmlCtxtReadFd(xmlParserCtxt *ctxt, int fd,
13678
              const char *URL, const char *encoding, int options)
13679
0
{
13680
0
    xmlParserInputPtr input;
13681
13682
0
    if (ctxt == NULL)
13683
0
        return(NULL);
13684
13685
0
    xmlCtxtReset(ctxt);
13686
0
    xmlCtxtUseOptions(ctxt, options);
13687
13688
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13689
0
    if (input == NULL)
13690
0
        return(NULL);
13691
13692
0
    return(xmlCtxtParseDocument(ctxt, input));
13693
0
}
13694
13695
/**
13696
 * Parse an XML document from I/O functions and source and build a tree.
13697
 * This reuses the existing `ctxt` parser context
13698
 *
13699
 * `URL` is used as base to resolve external entities and for error
13700
 * reporting.
13701
 *
13702
 * @param ctxt  an XML parser context
13703
 * @param ioread  an I/O read function
13704
 * @param ioclose  an I/O close function
13705
 * @param ioctx  an I/O handler
13706
 * @param URL  the base URL to use for the document
13707
 * @param encoding  the document encoding, or NULL
13708
 * @param options  a combination of xmlParserOption
13709
 * @returns the resulting document tree
13710
 */
13711
xmlDoc *
13712
xmlCtxtReadIO(xmlParserCtxt *ctxt, xmlInputReadCallback ioread,
13713
              xmlInputCloseCallback ioclose, void *ioctx,
13714
        const char *URL,
13715
              const char *encoding, int options)
13716
0
{
13717
0
    xmlParserInputPtr input;
13718
13719
0
    if (ctxt == NULL)
13720
0
        return(NULL);
13721
13722
0
    xmlCtxtReset(ctxt);
13723
0
    xmlCtxtUseOptions(ctxt, options);
13724
13725
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13726
0
                                  encoding, 0);
13727
0
    if (input == NULL)
13728
0
        return(NULL);
13729
13730
0
    return(xmlCtxtParseDocument(ctxt, input));
13731
0
}
13732