Coverage Report

Created: 2025-07-11 06:13

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * Author: Daniel Veillard
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
#include "private/tree.h"
79
80
5.37M
#define NS_INDEX_EMPTY  INT_MAX
81
255k
#define NS_INDEX_XML    (INT_MAX - 1)
82
1.03M
#define URI_HASH_EMPTY  0xD943A04E
83
124k
#define URI_HASH_XML    0xF0451F02
84
85
#ifndef STDIN_FILENO
86
0
  #define STDIN_FILENO 0
87
#endif
88
89
#ifndef SIZE_MAX
90
  #define SIZE_MAX ((size_t) -1)
91
#endif
92
93
1.05M
#define XML_MAX_ATTRS 100000000 /* 100 million */
94
95
5.33M
#define XML_SPECIAL_EXTERNAL    (1 << 20)
96
5.30M
#define XML_SPECIAL_TYPE_MASK   (XML_SPECIAL_EXTERNAL - 1)
97
98
5.56M
#define XML_ATTVAL_ALLOC        (1 << 0)
99
112k
#define XML_ATTVAL_NORM_CHANGE  (1 << 1)
100
101
struct _xmlStartTag {
102
    const xmlChar *prefix;
103
    const xmlChar *URI;
104
    int line;
105
    int nsNr;
106
};
107
108
typedef struct {
109
    void *saxData;
110
    unsigned prefixHashValue;
111
    unsigned uriHashValue;
112
    unsigned elementId;
113
    int oldIndex;
114
} xmlParserNsExtra;
115
116
typedef struct {
117
    unsigned hashValue;
118
    int index;
119
} xmlParserNsBucket;
120
121
struct _xmlParserNsData {
122
    xmlParserNsExtra *extra;
123
124
    unsigned hashSize;
125
    unsigned hashElems;
126
    xmlParserNsBucket *hash;
127
128
    unsigned elementId;
129
    int defaultNsIndex;
130
    int minNsIndex;
131
};
132
133
static int
134
xmlParseElementStart(xmlParserCtxtPtr ctxt);
135
136
static void
137
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
138
139
static xmlEntityPtr
140
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
141
142
static const xmlChar *
143
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
144
145
/************************************************************************
146
 *                  *
147
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
148
 *                  *
149
 ************************************************************************/
150
151
#define XML_PARSER_BIG_ENTITY 1000
152
#define XML_PARSER_LOT_ENTITY 5000
153
154
/*
155
 * Constants for protection against abusive entity expansion
156
 * ("billion laughs").
157
 */
158
159
/*
160
 * A certain amount of entity expansion which is always allowed.
161
 */
162
814k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
163
164
/*
165
 * Fixed cost for each entity reference. This crudely models processing time
166
 * as well to protect, for example, against exponential expansion of empty
167
 * or very short entities.
168
 */
169
817k
#define XML_ENT_FIXED_COST 20
170
171
274M
#define XML_PARSER_BIG_BUFFER_SIZE 300
172
730k
#define XML_PARSER_BUFFER_SIZE 100
173
865k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
174
175
/**
176
 * XML_PARSER_CHUNK_SIZE
177
 *
178
 * When calling GROW that's the minimal amount of data
179
 * the parser expected to have received. It is not a hard
180
 * limit but an optimization when reading strings like Names
181
 * It is not strictly needed as long as inputs available characters
182
 * are followed by 0, which should be provided by the I/O level
183
 */
184
#define XML_PARSER_CHUNK_SIZE 100
185
186
/**
187
 * Constant string describing the version of the library used at
188
 * run-time.
189
 */
190
const char *const
191
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
192
193
/*
194
 * List of XML prefixed PI allowed by W3C specs
195
 */
196
197
static const char* const xmlW3CPIs[] = {
198
    "xml-stylesheet",
199
    "xml-model",
200
    NULL
201
};
202
203
204
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206
                                              const xmlChar **str);
207
208
static void
209
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
210
211
static int
212
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
213
214
static void
215
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl);
216
217
/************************************************************************
218
 *                  *
219
 *    Some factorized error routines        *
220
 *                  *
221
 ************************************************************************/
222
223
static void
224
2.28k
xmlErrMemory(xmlParserCtxtPtr ctxt) {
225
2.28k
    xmlCtxtErrMemory(ctxt);
226
2.28k
}
227
228
/**
229
 * Handle a redefinition of attribute error
230
 *
231
 * @param ctxt  an XML parser context
232
 * @param prefix  the attribute prefix
233
 * @param localname  the attribute localname
234
 */
235
static void
236
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237
                   const xmlChar * localname)
238
17.0k
{
239
17.0k
    if (prefix == NULL)
240
3.24k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
3.24k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
242
3.24k
                   "Attribute %s redefined\n", localname);
243
13.7k
    else
244
13.7k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245
13.7k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
246
13.7k
                   "Attribute %s:%s redefined\n", prefix, localname);
247
17.0k
}
248
249
/**
250
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
251
 *
252
 * @param ctxt  an XML parser context
253
 * @param error  the error number
254
 * @param msg  the error message
255
 */
256
static void LIBXML_ATTR_FORMAT(3,0)
257
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
258
               const char *msg)
259
72.0M
{
260
72.0M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
261
72.0M
               NULL, NULL, NULL, 0, "%s", msg);
262
72.0M
}
263
264
/**
265
 * Handle a warning.
266
 *
267
 * @param ctxt  an XML parser context
268
 * @param error  the error number
269
 * @param msg  the error message
270
 * @param str1  extra data
271
 * @param str2  extra data
272
 */
273
void LIBXML_ATTR_FORMAT(3,0)
274
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
275
              const char *msg, const xmlChar *str1, const xmlChar *str2)
276
57.7k
{
277
57.7k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
278
57.7k
               str1, str2, NULL, 0, msg, str1, str2);
279
57.7k
}
280
281
#ifdef LIBXML_VALID_ENABLED
282
/**
283
 * Handle a validity error.
284
 *
285
 * @param ctxt  an XML parser context
286
 * @param error  the error number
287
 * @param msg  the error message
288
 * @param str1  extra data
289
 * @param str2  extra data
290
 */
291
static void LIBXML_ATTR_FORMAT(3,0)
292
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
293
              const char *msg, const xmlChar *str1, const xmlChar *str2)
294
{
295
    ctxt->valid = 0;
296
297
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
298
               str1, str2, NULL, 0, msg, str1, str2);
299
}
300
#endif
301
302
/**
303
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
304
 *
305
 * @param ctxt  an XML parser context
306
 * @param error  the error number
307
 * @param msg  the error message
308
 * @param val  an integer value
309
 */
310
static void LIBXML_ATTR_FORMAT(3,0)
311
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
312
                  const char *msg, int val)
313
8.12M
{
314
8.12M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
315
8.12M
               NULL, NULL, NULL, val, msg, val);
316
8.12M
}
317
318
/**
319
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
320
 *
321
 * @param ctxt  an XML parser context
322
 * @param error  the error number
323
 * @param msg  the error message
324
 * @param str1  an string info
325
 * @param val  an integer value
326
 * @param str2  an string info
327
 */
328
static void LIBXML_ATTR_FORMAT(3,0)
329
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
330
                  const char *msg, const xmlChar *str1, int val,
331
      const xmlChar *str2)
332
3.03M
{
333
3.03M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
334
3.03M
               str1, str2, NULL, val, msg, str1, val, str2);
335
3.03M
}
336
337
/**
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 *
340
 * @param ctxt  an XML parser context
341
 * @param error  the error number
342
 * @param msg  the error message
343
 * @param val  a string value
344
 */
345
static void LIBXML_ATTR_FORMAT(3,0)
346
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
347
                  const char *msg, const xmlChar * val)
348
540k
{
349
540k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
350
540k
               val, NULL, NULL, 0, msg, val);
351
540k
}
352
353
/**
354
 * Handle a non fatal parser error
355
 *
356
 * @param ctxt  an XML parser context
357
 * @param error  the error number
358
 * @param msg  the error message
359
 * @param val  a string value
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
19.7k
{
365
19.7k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
19.7k
               val, NULL, NULL, 0, msg, val);
367
19.7k
}
368
369
/**
370
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
371
 *
372
 * @param ctxt  an XML parser context
373
 * @param error  the error number
374
 * @param msg  the message
375
 * @param info1  extra information string
376
 * @param info2  extra information string
377
 * @param info3  extra information string
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
806k
{
385
806k
    ctxt->nsWellFormed = 0;
386
387
806k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
806k
               info1, info2, info3, 0, msg, info1, info2, info3);
389
806k
}
390
391
/**
392
 * Handle a namespace warning error
393
 *
394
 * @param ctxt  an XML parser context
395
 * @param error  the error number
396
 * @param msg  the message
397
 * @param info1  extra information string
398
 * @param info2  extra information string
399
 * @param info3  extra information string
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
21.6k
{
407
21.6k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
21.6k
               info1, info2, info3, 0, msg, info1, info2, info3);
409
21.6k
}
410
411
static void
412
2.50M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
413
2.50M
    if (val > ULONG_MAX - *dst)
414
0
        *dst = ULONG_MAX;
415
2.50M
    else
416
2.50M
        *dst += val;
417
2.50M
}
418
419
static void
420
843k
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
421
843k
    if (val > ULONG_MAX - *dst)
422
0
        *dst = ULONG_MAX;
423
843k
    else
424
843k
        *dst += val;
425
843k
}
426
427
/**
428
 * Check for non-linear entity expansion behaviour.
429
 *
430
 * In some cases like xmlExpandEntityInAttValue, this function is called
431
 * for each, possibly nested entity and its unexpanded content length.
432
 *
433
 * In other cases like #xmlParseReference, it's only called for each
434
 * top-level entity with its unexpanded content length plus the sum of
435
 * the unexpanded content lengths (plus fixed cost) of all nested
436
 * entities.
437
 *
438
 * Summing the unexpanded lengths also adds the length of the reference.
439
 * This is by design. Taking the length of the entity name into account
440
 * discourages attacks that try to waste CPU time with abusively long
441
 * entity names. See test/recurse/lol6.xml for example. Each call also
442
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
443
 * short entities.
444
 *
445
 * @param ctxt  parser context
446
 * @param extra  sum of unexpanded entity sizes
447
 * @returns 1 on error, 0 on success.
448
 */
449
static int
450
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
451
817k
{
452
817k
    unsigned long consumed;
453
817k
    unsigned long *expandedSize;
454
817k
    xmlParserInputPtr input = ctxt->input;
455
817k
    xmlEntityPtr entity = input->entity;
456
457
817k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
458
2.46k
        return(0);
459
460
    /*
461
     * Compute total consumed bytes so far, including input streams of
462
     * external entities.
463
     */
464
814k
    consumed = input->consumed;
465
814k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
466
814k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
467
468
814k
    if (entity)
469
32.6k
        expandedSize = &entity->expandedSize;
470
782k
    else
471
782k
        expandedSize = &ctxt->sizeentcopy;
472
473
    /*
474
     * Add extra cost and some fixed cost.
475
     */
476
814k
    xmlSaturatedAdd(expandedSize, extra);
477
814k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
478
479
    /*
480
     * It's important to always use saturation arithmetic when tracking
481
     * entity sizes to make the size checks reliable. If "sizeentcopy"
482
     * overflows, we have to abort.
483
     */
484
814k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
485
814k
        ((*expandedSize >= ULONG_MAX) ||
486
22.1k
         (*expandedSize / ctxt->maxAmpl > consumed))) {
487
1.20k
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
488
1.20k
                       "Maximum entity amplification factor exceeded, see "
489
1.20k
                       "xmlCtxtSetMaxAmplification.\n");
490
1.20k
        xmlHaltParser(ctxt);
491
1.20k
        return(1);
492
1.20k
    }
493
494
813k
    return(0);
495
814k
}
496
497
/************************************************************************
498
 *                  *
499
 *    Library wide options          *
500
 *                  *
501
 ************************************************************************/
502
503
/**
504
 * Examines if the library has been compiled with a given feature.
505
 *
506
 * @param feature  the feature to be examined
507
 * @returns zero (0) if the feature does not exist or an unknown
508
 * feature is requested, non-zero otherwise.
509
 */
510
int
511
xmlHasFeature(xmlFeature feature)
512
0
{
513
0
    switch (feature) {
514
0
  case XML_WITH_THREAD:
515
0
#ifdef LIBXML_THREAD_ENABLED
516
0
      return(1);
517
#else
518
      return(0);
519
#endif
520
0
        case XML_WITH_TREE:
521
0
            return(1);
522
0
        case XML_WITH_OUTPUT:
523
0
#ifdef LIBXML_OUTPUT_ENABLED
524
0
            return(1);
525
#else
526
            return(0);
527
#endif
528
0
        case XML_WITH_PUSH:
529
#ifdef LIBXML_PUSH_ENABLED
530
            return(1);
531
#else
532
0
            return(0);
533
0
#endif
534
0
        case XML_WITH_READER:
535
#ifdef LIBXML_READER_ENABLED
536
            return(1);
537
#else
538
0
            return(0);
539
0
#endif
540
0
        case XML_WITH_PATTERN:
541
0
#ifdef LIBXML_PATTERN_ENABLED
542
0
            return(1);
543
#else
544
            return(0);
545
#endif
546
0
        case XML_WITH_WRITER:
547
#ifdef LIBXML_WRITER_ENABLED
548
            return(1);
549
#else
550
0
            return(0);
551
0
#endif
552
0
        case XML_WITH_SAX1:
553
#ifdef LIBXML_SAX1_ENABLED
554
            return(1);
555
#else
556
0
            return(0);
557
0
#endif
558
0
        case XML_WITH_HTTP:
559
0
            return(0);
560
0
        case XML_WITH_VALID:
561
#ifdef LIBXML_VALID_ENABLED
562
            return(1);
563
#else
564
0
            return(0);
565
0
#endif
566
0
        case XML_WITH_HTML:
567
0
#ifdef LIBXML_HTML_ENABLED
568
0
            return(1);
569
#else
570
            return(0);
571
#endif
572
0
        case XML_WITH_LEGACY:
573
0
            return(0);
574
0
        case XML_WITH_C14N:
575
#ifdef LIBXML_C14N_ENABLED
576
            return(1);
577
#else
578
0
            return(0);
579
0
#endif
580
0
        case XML_WITH_CATALOG:
581
0
#ifdef LIBXML_CATALOG_ENABLED
582
0
            return(1);
583
#else
584
            return(0);
585
#endif
586
0
        case XML_WITH_XPATH:
587
0
#ifdef LIBXML_XPATH_ENABLED
588
0
            return(1);
589
#else
590
            return(0);
591
#endif
592
0
        case XML_WITH_XPTR:
593
0
#ifdef LIBXML_XPTR_ENABLED
594
0
            return(1);
595
#else
596
            return(0);
597
#endif
598
0
        case XML_WITH_XINCLUDE:
599
0
#ifdef LIBXML_XINCLUDE_ENABLED
600
0
            return(1);
601
#else
602
            return(0);
603
#endif
604
0
        case XML_WITH_ICONV:
605
0
#ifdef LIBXML_ICONV_ENABLED
606
0
            return(1);
607
#else
608
            return(0);
609
#endif
610
0
        case XML_WITH_ISO8859X:
611
0
#ifdef LIBXML_ISO8859X_ENABLED
612
0
            return(1);
613
#else
614
            return(0);
615
#endif
616
0
        case XML_WITH_UNICODE:
617
0
            return(0);
618
0
        case XML_WITH_REGEXP:
619
#ifdef LIBXML_REGEXP_ENABLED
620
            return(1);
621
#else
622
0
            return(0);
623
0
#endif
624
0
        case XML_WITH_AUTOMATA:
625
#ifdef LIBXML_REGEXP_ENABLED
626
            return(1);
627
#else
628
0
            return(0);
629
0
#endif
630
0
        case XML_WITH_EXPR:
631
0
            return(0);
632
0
        case XML_WITH_RELAXNG:
633
#ifdef LIBXML_RELAXNG_ENABLED
634
            return(1);
635
#else
636
0
            return(0);
637
0
#endif
638
0
        case XML_WITH_SCHEMAS:
639
#ifdef LIBXML_SCHEMAS_ENABLED
640
            return(1);
641
#else
642
0
            return(0);
643
0
#endif
644
0
        case XML_WITH_SCHEMATRON:
645
#ifdef LIBXML_SCHEMATRON_ENABLED
646
            return(1);
647
#else
648
0
            return(0);
649
0
#endif
650
0
        case XML_WITH_MODULES:
651
0
#ifdef LIBXML_MODULES_ENABLED
652
0
            return(1);
653
#else
654
            return(0);
655
#endif
656
0
        case XML_WITH_DEBUG:
657
0
#ifdef LIBXML_DEBUG_ENABLED
658
0
            return(1);
659
#else
660
            return(0);
661
#endif
662
0
        case XML_WITH_DEBUG_MEM:
663
0
            return(0);
664
0
        case XML_WITH_ZLIB:
665
#ifdef LIBXML_ZLIB_ENABLED
666
            return(1);
667
#else
668
0
            return(0);
669
0
#endif
670
0
        case XML_WITH_LZMA:
671
#ifdef LIBXML_LZMA_ENABLED
672
            return(1);
673
#else
674
0
            return(0);
675
0
#endif
676
0
        case XML_WITH_ICU:
677
#ifdef LIBXML_ICU_ENABLED
678
            return(1);
679
#else
680
0
            return(0);
681
0
#endif
682
0
        default:
683
0
      break;
684
0
     }
685
0
     return(0);
686
0
}
687
688
/************************************************************************
689
 *                  *
690
 *      Simple string buffer        *
691
 *                  *
692
 ************************************************************************/
693
694
typedef struct {
695
    xmlChar *mem;
696
    unsigned size;
697
    unsigned cap; /* size < cap */
698
    unsigned max; /* size <= max */
699
    xmlParserErrors code;
700
} xmlSBuf;
701
702
static void
703
5.42M
xmlSBufInit(xmlSBuf *buf, unsigned max) {
704
5.42M
    buf->mem = NULL;
705
5.42M
    buf->size = 0;
706
5.42M
    buf->cap = 0;
707
5.42M
    buf->max = max;
708
5.42M
    buf->code = XML_ERR_OK;
709
5.42M
}
710
711
static int
712
682k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
713
682k
    xmlChar *mem;
714
682k
    unsigned cap;
715
716
682k
    if (len >= UINT_MAX / 2 - buf->size) {
717
0
        if (buf->code == XML_ERR_OK)
718
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
719
0
        return(-1);
720
0
    }
721
722
682k
    cap = (buf->size + len) * 2;
723
682k
    if (cap < 240)
724
440k
        cap = 240;
725
726
682k
    mem = xmlRealloc(buf->mem, cap);
727
682k
    if (mem == NULL) {
728
137k
        buf->code = XML_ERR_NO_MEMORY;
729
137k
        return(-1);
730
137k
    }
731
732
545k
    buf->mem = mem;
733
545k
    buf->cap = cap;
734
735
545k
    return(0);
736
682k
}
737
738
static void
739
145M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
740
145M
    if (buf->max - buf->size < len) {
741
2.72k
        if (buf->code == XML_ERR_OK)
742
42
            buf->code = XML_ERR_RESOURCE_LIMIT;
743
2.72k
        return;
744
2.72k
    }
745
746
145M
    if (buf->cap - buf->size <= len) {
747
674k
        if (xmlSBufGrow(buf, len) < 0)
748
136k
            return;
749
674k
    }
750
751
145M
    if (len > 0)
752
145M
        memcpy(buf->mem + buf->size, str, len);
753
145M
    buf->size += len;
754
145M
}
755
756
static void
757
141M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
758
141M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
759
141M
}
760
761
static void
762
300k
xmlSBufAddChar(xmlSBuf *buf, int c) {
763
300k
    xmlChar *end;
764
765
300k
    if (buf->max - buf->size < 4) {
766
105
        if (buf->code == XML_ERR_OK)
767
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
768
105
        return;
769
105
    }
770
771
300k
    if (buf->cap - buf->size <= 4) {
772
7.89k
        if (xmlSBufGrow(buf, 4) < 0)
773
516
            return;
774
7.89k
    }
775
776
300k
    end = buf->mem + buf->size;
777
778
300k
    if (c < 0x80) {
779
148k
        *end = (xmlChar) c;
780
148k
        buf->size += 1;
781
151k
    } else {
782
151k
        buf->size += xmlCopyCharMultiByte(end, c);
783
151k
    }
784
300k
}
785
786
static void
787
131M
xmlSBufAddReplChar(xmlSBuf *buf) {
788
131M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
789
131M
}
790
791
static void
792
230
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
793
230
    if (buf->code == XML_ERR_NO_MEMORY)
794
188
        xmlCtxtErrMemory(ctxt);
795
42
    else
796
42
        xmlFatalErr(ctxt, buf->code, errMsg);
797
230
}
798
799
static xmlChar *
800
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
801
465k
              const char *errMsg) {
802
465k
    if (buf->mem == NULL) {
803
36.4k
        buf->mem = xmlMalloc(1);
804
36.4k
        if (buf->mem == NULL) {
805
28
            buf->code = XML_ERR_NO_MEMORY;
806
36.4k
        } else {
807
36.4k
            buf->mem[0] = 0;
808
36.4k
        }
809
428k
    } else {
810
428k
        buf->mem[buf->size] = 0;
811
428k
    }
812
813
465k
    if (buf->code == XML_ERR_OK) {
814
465k
        if (sizeOut != NULL)
815
298k
            *sizeOut = buf->size;
816
465k
        return(buf->mem);
817
465k
    }
818
819
93
    xmlSBufReportError(buf, ctxt, errMsg);
820
821
93
    xmlFree(buf->mem);
822
823
93
    if (sizeOut != NULL)
824
41
        *sizeOut = 0;
825
93
    return(NULL);
826
465k
}
827
828
static void
829
4.93M
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
830
4.93M
    if (buf->code != XML_ERR_OK)
831
137
        xmlSBufReportError(buf, ctxt, errMsg);
832
833
4.93M
    xmlFree(buf->mem);
834
4.93M
}
835
836
static int
837
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
838
337M
                    const char *errMsg) {
839
337M
    int c = str[0];
840
337M
    int c1 = str[1];
841
842
337M
    if ((c1 & 0xC0) != 0x80)
843
24.8M
        goto encoding_error;
844
845
313M
    if (c < 0xE0) {
846
        /* 2-byte sequence */
847
86.3M
        if (c < 0xC2)
848
38.2M
            goto encoding_error;
849
850
48.1M
        return(2);
851
226M
    } else {
852
226M
        int c2 = str[2];
853
854
226M
        if ((c2 & 0xC0) != 0x80)
855
38.3k
            goto encoding_error;
856
857
226M
        if (c < 0xF0) {
858
            /* 3-byte sequence */
859
226M
            if (c == 0xE0) {
860
                /* overlong */
861
3.83M
                if (c1 < 0xA0)
862
484
                    goto encoding_error;
863
222M
            } else if (c == 0xED) {
864
                /* surrogate */
865
2.74k
                if (c1 >= 0xA0)
866
1.02k
                    goto encoding_error;
867
222M
            } else if (c == 0xEF) {
868
                /* U+FFFE and U+FFFF are invalid Chars */
869
322k
                if ((c1 == 0xBF) && (c2 >= 0xBE))
870
7.69k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
871
322k
            }
872
873
226M
            return(3);
874
226M
        } else {
875
            /* 4-byte sequence */
876
76.8k
            if ((str[3] & 0xC0) != 0x80)
877
9.06k
                goto encoding_error;
878
67.8k
            if (c == 0xF0) {
879
                /* overlong */
880
23.1k
                if (c1 < 0x90)
881
3.03k
                    goto encoding_error;
882
44.6k
            } else if (c >= 0xF4) {
883
                /* greater than 0x10FFFF */
884
33.7k
                if ((c > 0xF4) || (c1 >= 0x90))
885
33.4k
                    goto encoding_error;
886
33.7k
            }
887
888
31.3k
            return(4);
889
67.8k
        }
890
226M
    }
891
892
63.1M
encoding_error:
893
    /* Only report the first error */
894
63.1M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
895
20.5k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
896
20.5k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
897
20.5k
    }
898
899
63.1M
    return(0);
900
313M
}
901
902
/************************************************************************
903
 *                  *
904
 *    SAX2 defaulted attributes handling      *
905
 *                  *
906
 ************************************************************************/
907
908
/**
909
 * Final initialization of the parser context before starting to parse.
910
 *
911
 * This accounts for users modifying struct members of parser context
912
 * directly.
913
 *
914
 * @param ctxt  an XML parser context
915
 */
916
static void
917
851k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
918
851k
    xmlSAXHandlerPtr sax;
919
920
    /* Avoid unused variable warning if features are disabled. */
921
851k
    (void) sax;
922
923
    /*
924
     * Changing the SAX struct directly is still widespread practice
925
     * in internal and external code.
926
     */
927
851k
    if (ctxt == NULL) return;
928
851k
    sax = ctxt->sax;
929
#ifdef LIBXML_SAX1_ENABLED
930
    /*
931
     * Only enable SAX2 if there SAX2 element handlers, except when there
932
     * are no element handlers at all.
933
     */
934
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
935
        (sax) &&
936
        (sax->initialized == XML_SAX2_MAGIC) &&
937
        ((sax->startElementNs != NULL) ||
938
         (sax->endElementNs != NULL) ||
939
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
940
        ctxt->sax2 = 1;
941
#else
942
851k
    ctxt->sax2 = 1;
943
851k
#endif /* LIBXML_SAX1_ENABLED */
944
945
    /*
946
     * Some users replace the dictionary directly in the context struct.
947
     * We really need an API function to do that cleanly.
948
     */
949
851k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
950
851k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
951
851k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
952
851k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
953
851k
    (ctxt->str_xml_ns == NULL)) {
954
0
        xmlErrMemory(ctxt);
955
0
    }
956
957
851k
    xmlDictSetLimit(ctxt->dict,
958
851k
                    (ctxt->options & XML_PARSE_HUGE) ?
959
0
                        0 :
960
851k
                        XML_MAX_DICTIONARY_LIMIT);
961
962
#ifdef LIBXML_VALID_ENABLED
963
    if (ctxt->validate)
964
        ctxt->vctxt.flags |= XML_VCTXT_VALIDATE;
965
    else
966
        ctxt->vctxt.flags &= ~XML_VCTXT_VALIDATE;
967
#endif /* LIBXML_VALID_ENABLED */
968
851k
}
969
970
typedef struct {
971
    xmlHashedString prefix;
972
    xmlHashedString name;
973
    xmlHashedString value;
974
    const xmlChar *valueEnd;
975
    int external;
976
    int expandedSize;
977
} xmlDefAttr;
978
979
typedef struct _xmlDefAttrs xmlDefAttrs;
980
typedef xmlDefAttrs *xmlDefAttrsPtr;
981
struct _xmlDefAttrs {
982
    int nbAttrs;  /* number of defaulted attributes on that element */
983
    int maxAttrs;       /* the size of the array */
984
#if __STDC_VERSION__ >= 199901L
985
    /* Using a C99 flexible array member avoids UBSan errors. */
986
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
987
#else
988
    xmlDefAttr attrs[1];
989
#endif
990
};
991
992
/**
993
 * Normalize the space in non CDATA attribute values:
994
 * If the attribute type is not CDATA, then the XML processor MUST further
995
 * process the normalized attribute value by discarding any leading and
996
 * trailing space (\#x20) characters, and by replacing sequences of space
997
 * (\#x20) characters by a single space (\#x20) character.
998
 * Note that the size of dst need to be at least src, and if one doesn't need
999
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1000
 * passing src as dst is just fine.
1001
 *
1002
 * @param src  the source string
1003
 * @param dst  the target string
1004
 * @returns a pointer to the normalized value (dst) or NULL if no conversion
1005
 *         is needed.
1006
 */
1007
static xmlChar *
1008
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1009
41.2k
{
1010
41.2k
    if ((src == NULL) || (dst == NULL))
1011
0
        return(NULL);
1012
1013
42.1k
    while (*src == 0x20) src++;
1014
14.3M
    while (*src != 0) {
1015
14.3M
  if (*src == 0x20) {
1016
32.8k
      while (*src == 0x20) src++;
1017
14.1k
      if (*src != 0)
1018
13.0k
    *dst++ = 0x20;
1019
14.3M
  } else {
1020
14.3M
      *dst++ = *src++;
1021
14.3M
  }
1022
14.3M
    }
1023
41.2k
    *dst = 0;
1024
41.2k
    if (dst == src)
1025
39.5k
       return(NULL);
1026
1.65k
    return(dst);
1027
41.2k
}
1028
1029
/**
1030
 * Add a defaulted attribute for an element
1031
 *
1032
 * @param ctxt  an XML parser context
1033
 * @param fullname  the element fullname
1034
 * @param fullattr  the attribute fullname
1035
 * @param value  the attribute value
1036
 */
1037
static void
1038
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1039
               const xmlChar *fullname,
1040
               const xmlChar *fullattr,
1041
51.9k
               const xmlChar *value) {
1042
51.9k
    xmlDefAttrsPtr defaults;
1043
51.9k
    xmlDefAttr *attr;
1044
51.9k
    int len, expandedSize;
1045
51.9k
    xmlHashedString name;
1046
51.9k
    xmlHashedString prefix;
1047
51.9k
    xmlHashedString hvalue;
1048
51.9k
    const xmlChar *localname;
1049
1050
    /*
1051
     * Allows to detect attribute redefinitions
1052
     */
1053
51.9k
    if (ctxt->attsSpecial != NULL) {
1054
38.8k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1055
5.32k
      return;
1056
38.8k
    }
1057
1058
46.6k
    if (ctxt->attsDefault == NULL) {
1059
15.8k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1060
15.8k
  if (ctxt->attsDefault == NULL)
1061
21
      goto mem_error;
1062
15.8k
    }
1063
1064
    /*
1065
     * split the element name into prefix:localname , the string found
1066
     * are within the DTD and then not associated to namespace names.
1067
     */
1068
46.5k
    localname = xmlSplitQName3(fullname, &len);
1069
46.5k
    if (localname == NULL) {
1070
45.7k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1071
45.7k
  prefix.name = NULL;
1072
45.7k
    } else {
1073
847
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1074
847
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1075
847
        if (prefix.name == NULL)
1076
0
            goto mem_error;
1077
847
    }
1078
46.5k
    if (name.name == NULL)
1079
0
        goto mem_error;
1080
1081
    /*
1082
     * make sure there is some storage
1083
     */
1084
46.5k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1085
46.5k
    if ((defaults == NULL) ||
1086
46.5k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1087
20.7k
        xmlDefAttrsPtr temp;
1088
20.7k
        int newSize;
1089
1090
20.7k
        if (defaults == NULL) {
1091
16.8k
            newSize = 4;
1092
16.8k
        } else {
1093
3.81k
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1094
3.81k
                ((size_t) defaults->maxAttrs >
1095
3.81k
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1096
0
                goto mem_error;
1097
1098
3.81k
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1099
0
                newSize = XML_MAX_ATTRS;
1100
3.81k
            else
1101
3.81k
                newSize = defaults->maxAttrs * 2;
1102
3.81k
        }
1103
20.7k
        temp = xmlRealloc(defaults,
1104
20.7k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1105
20.7k
  if (temp == NULL)
1106
1
      goto mem_error;
1107
20.7k
        if (defaults == NULL)
1108
16.8k
            temp->nbAttrs = 0;
1109
20.7k
  temp->maxAttrs = newSize;
1110
20.7k
        defaults = temp;
1111
20.7k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1112
20.7k
                          defaults, NULL) < 0) {
1113
0
      xmlFree(defaults);
1114
0
      goto mem_error;
1115
0
  }
1116
20.7k
    }
1117
1118
    /*
1119
     * Split the attribute name into prefix:localname , the string found
1120
     * are within the DTD and hen not associated to namespace names.
1121
     */
1122
46.5k
    localname = xmlSplitQName3(fullattr, &len);
1123
46.5k
    if (localname == NULL) {
1124
21.5k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1125
21.5k
  prefix.name = NULL;
1126
25.0k
    } else {
1127
25.0k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1128
25.0k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1129
25.0k
        if (prefix.name == NULL)
1130
1
            goto mem_error;
1131
25.0k
    }
1132
46.5k
    if (name.name == NULL)
1133
1
        goto mem_error;
1134
1135
    /* intern the string and precompute the end */
1136
46.5k
    len = strlen((const char *) value);
1137
46.5k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1138
46.5k
    if (hvalue.name == NULL)
1139
1
        goto mem_error;
1140
1141
46.5k
    expandedSize = strlen((const char *) name.name);
1142
46.5k
    if (prefix.name != NULL)
1143
25.0k
        expandedSize += strlen((const char *) prefix.name);
1144
46.5k
    expandedSize += len;
1145
1146
46.5k
    attr = &defaults->attrs[defaults->nbAttrs++];
1147
46.5k
    attr->name = name;
1148
46.5k
    attr->prefix = prefix;
1149
46.5k
    attr->value = hvalue;
1150
46.5k
    attr->valueEnd = hvalue.name + len;
1151
46.5k
    attr->external = PARSER_EXTERNAL(ctxt);
1152
46.5k
    attr->expandedSize = expandedSize;
1153
1154
46.5k
    return;
1155
1156
25
mem_error:
1157
25
    xmlErrMemory(ctxt);
1158
25
}
1159
1160
/**
1161
 * Register this attribute type
1162
 *
1163
 * @param ctxt  an XML parser context
1164
 * @param fullname  the element fullname
1165
 * @param fullattr  the attribute fullname
1166
 * @param type  the attribute type
1167
 */
1168
static void
1169
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1170
      const xmlChar *fullname,
1171
      const xmlChar *fullattr,
1172
      int type)
1173
83.1k
{
1174
83.1k
    if (ctxt->attsSpecial == NULL) {
1175
19.6k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1176
19.6k
  if (ctxt->attsSpecial == NULL)
1177
26
      goto mem_error;
1178
19.6k
    }
1179
1180
83.0k
    if (PARSER_EXTERNAL(ctxt))
1181
35.5k
        type |= XML_SPECIAL_EXTERNAL;
1182
1183
83.0k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1184
83.0k
                    XML_INT_TO_PTR(type)) < 0)
1185
2
        goto mem_error;
1186
83.0k
    return;
1187
1188
83.0k
mem_error:
1189
28
    xmlErrMemory(ctxt);
1190
28
}
1191
1192
/**
1193
 * Removes CDATA attributes from the special attribute table
1194
 */
1195
static void
1196
xmlCleanSpecialAttrCallback(void *payload, void *data,
1197
                            const xmlChar *fullname, const xmlChar *fullattr,
1198
76.1k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1199
76.1k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1200
1201
76.1k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1202
2.73k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1203
2.73k
    }
1204
76.1k
}
1205
1206
/**
1207
 * Trim the list of attributes defined to remove all those of type
1208
 * CDATA as they are not special. This call should be done when finishing
1209
 * to parse the DTD and before starting to parse the document root.
1210
 *
1211
 * @param ctxt  an XML parser context
1212
 */
1213
static void
1214
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1215
148k
{
1216
148k
    if (ctxt->attsSpecial == NULL)
1217
129k
        return;
1218
1219
19.6k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1220
1221
19.6k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1222
270
        xmlHashFree(ctxt->attsSpecial, NULL);
1223
270
        ctxt->attsSpecial = NULL;
1224
270
    }
1225
19.6k
}
1226
1227
/**
1228
 * Checks that the value conforms to the LanguageID production:
1229
 *
1230
 * @deprecated Internal function, do not use.
1231
 *
1232
 * NOTE: this is somewhat deprecated, those productions were removed from
1233
 * the XML Second edition.
1234
 *
1235
 *     [33] LanguageID ::= Langcode ('-' Subcode)*
1236
 *     [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1237
 *     [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1238
 *     [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1239
 *     [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1240
 *     [38] Subcode ::= ([a-z] | [A-Z])+
1241
 *
1242
 * The current REC reference the successors of RFC 1766, currently 5646
1243
 *
1244
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1245
 *
1246
 *     langtag       = language
1247
 *                     ["-" script]
1248
 *                     ["-" region]
1249
 *                     *("-" variant)
1250
 *                     *("-" extension)
1251
 *                     ["-" privateuse]
1252
 *     language      = 2*3ALPHA            ; shortest ISO 639 code
1253
 *                     ["-" extlang]       ; sometimes followed by
1254
 *                                         ; extended language subtags
1255
 *                   / 4ALPHA              ; or reserved for future use
1256
 *                   / 5*8ALPHA            ; or registered language subtag
1257
 *
1258
 *     extlang       = 3ALPHA              ; selected ISO 639 codes
1259
 *                     *2("-" 3ALPHA)      ; permanently reserved
1260
 *
1261
 *     script        = 4ALPHA              ; ISO 15924 code
1262
 *
1263
 *     region        = 2ALPHA              ; ISO 3166-1 code
1264
 *                   / 3DIGIT              ; UN M.49 code
1265
 *
1266
 *     variant       = 5*8alphanum         ; registered variants
1267
 *                   / (DIGIT 3alphanum)
1268
 *
1269
 *     extension     = singleton 1*("-" (2*8alphanum))
1270
 *
1271
 *                                         ; Single alphanumerics
1272
 *                                         ; "x" reserved for private use
1273
 *     singleton     = DIGIT               ; 0 - 9
1274
 *                   / %x41-57             ; A - W
1275
 *                   / %x59-5A             ; Y - Z
1276
 *                   / %x61-77             ; a - w
1277
 *                   / %x79-7A             ; y - z
1278
 *
1279
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1280
 * The parser below doesn't try to cope with extension or privateuse
1281
 * that could be added but that's not interoperable anyway
1282
 *
1283
 * @param lang  pointer to the string value
1284
 * @returns 1 if correct 0 otherwise
1285
 **/
1286
int
1287
xmlCheckLanguageID(const xmlChar * lang)
1288
0
{
1289
0
    const xmlChar *cur = lang, *nxt;
1290
1291
0
    if (cur == NULL)
1292
0
        return (0);
1293
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1294
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1295
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1296
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1297
        /*
1298
         * Still allow IANA code and user code which were coming
1299
         * from the previous version of the XML-1.0 specification
1300
         * it's deprecated but we should not fail
1301
         */
1302
0
        cur += 2;
1303
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1304
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1305
0
            cur++;
1306
0
        return(cur[0] == 0);
1307
0
    }
1308
0
    nxt = cur;
1309
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1310
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1311
0
           nxt++;
1312
0
    if (nxt - cur >= 4) {
1313
        /*
1314
         * Reserved
1315
         */
1316
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1317
0
            return(0);
1318
0
        return(1);
1319
0
    }
1320
0
    if (nxt - cur < 2)
1321
0
        return(0);
1322
    /* we got an ISO 639 code */
1323
0
    if (nxt[0] == 0)
1324
0
        return(1);
1325
0
    if (nxt[0] != '-')
1326
0
        return(0);
1327
1328
0
    nxt++;
1329
0
    cur = nxt;
1330
    /* now we can have extlang or script or region or variant */
1331
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1332
0
        goto region_m49;
1333
1334
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1335
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1336
0
           nxt++;
1337
0
    if (nxt - cur == 4)
1338
0
        goto script;
1339
0
    if (nxt - cur == 2)
1340
0
        goto region;
1341
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1342
0
        goto variant;
1343
0
    if (nxt - cur != 3)
1344
0
        return(0);
1345
    /* we parsed an extlang */
1346
0
    if (nxt[0] == 0)
1347
0
        return(1);
1348
0
    if (nxt[0] != '-')
1349
0
        return(0);
1350
1351
0
    nxt++;
1352
0
    cur = nxt;
1353
    /* now we can have script or region or variant */
1354
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1355
0
        goto region_m49;
1356
1357
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1358
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1359
0
           nxt++;
1360
0
    if (nxt - cur == 2)
1361
0
        goto region;
1362
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1363
0
        goto variant;
1364
0
    if (nxt - cur != 4)
1365
0
        return(0);
1366
    /* we parsed a script */
1367
0
script:
1368
0
    if (nxt[0] == 0)
1369
0
        return(1);
1370
0
    if (nxt[0] != '-')
1371
0
        return(0);
1372
1373
0
    nxt++;
1374
0
    cur = nxt;
1375
    /* now we can have region or variant */
1376
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1377
0
        goto region_m49;
1378
1379
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1380
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1381
0
           nxt++;
1382
1383
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1384
0
        goto variant;
1385
0
    if (nxt - cur != 2)
1386
0
        return(0);
1387
    /* we parsed a region */
1388
0
region:
1389
0
    if (nxt[0] == 0)
1390
0
        return(1);
1391
0
    if (nxt[0] != '-')
1392
0
        return(0);
1393
1394
0
    nxt++;
1395
0
    cur = nxt;
1396
    /* now we can just have a variant */
1397
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1398
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1399
0
           nxt++;
1400
1401
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1402
0
        return(0);
1403
1404
    /* we parsed a variant */
1405
0
variant:
1406
0
    if (nxt[0] == 0)
1407
0
        return(1);
1408
0
    if (nxt[0] != '-')
1409
0
        return(0);
1410
    /* extensions and private use subtags not checked */
1411
0
    return (1);
1412
1413
0
region_m49:
1414
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1415
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1416
0
        nxt += 3;
1417
0
        goto region;
1418
0
    }
1419
0
    return(0);
1420
0
}
1421
1422
/************************************************************************
1423
 *                  *
1424
 *    Parser stacks related functions and macros    *
1425
 *                  *
1426
 ************************************************************************/
1427
1428
static xmlChar *
1429
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1430
1431
/**
1432
 * Create a new namespace database.
1433
 *
1434
 * @returns the new obejct.
1435
 */
1436
xmlParserNsData *
1437
1.00M
xmlParserNsCreate(void) {
1438
1.00M
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1439
1440
1.00M
    if (nsdb == NULL)
1441
43
        return(NULL);
1442
1.00M
    memset(nsdb, 0, sizeof(*nsdb));
1443
1.00M
    nsdb->defaultNsIndex = INT_MAX;
1444
1445
1.00M
    return(nsdb);
1446
1.00M
}
1447
1448
/**
1449
 * Free a namespace database.
1450
 *
1451
 * @param nsdb  namespace database
1452
 */
1453
void
1454
1.00M
xmlParserNsFree(xmlParserNsData *nsdb) {
1455
1.00M
    if (nsdb == NULL)
1456
0
        return;
1457
1458
1.00M
    xmlFree(nsdb->extra);
1459
1.00M
    xmlFree(nsdb->hash);
1460
1.00M
    xmlFree(nsdb);
1461
1.00M
}
1462
1463
/**
1464
 * Reset a namespace database.
1465
 *
1466
 * @param nsdb  namespace database
1467
 */
1468
static void
1469
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1470
0
    if (nsdb == NULL)
1471
0
        return;
1472
1473
0
    nsdb->hashElems = 0;
1474
0
    nsdb->elementId = 0;
1475
0
    nsdb->defaultNsIndex = INT_MAX;
1476
1477
0
    if (nsdb->hash)
1478
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1479
0
}
1480
1481
/**
1482
 * Signal that a new element has started.
1483
 *
1484
 * @param nsdb  namespace database
1485
 * @returns 0 on success, -1 if the element counter overflowed.
1486
 */
1487
static int
1488
8.28M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1489
8.28M
    if (nsdb->elementId == UINT_MAX)
1490
0
        return(-1);
1491
8.28M
    nsdb->elementId++;
1492
1493
8.28M
    return(0);
1494
8.28M
}
1495
1496
/**
1497
 * Lookup namespace with given prefix. If `bucketPtr` is non-NULL, it will
1498
 * be set to the matching bucket, or the first empty bucket if no match
1499
 * was found.
1500
 *
1501
 * @param ctxt  parser context
1502
 * @param prefix  namespace prefix
1503
 * @param bucketPtr  optional bucket (return value)
1504
 * @returns the namespace index on success, INT_MAX if no namespace was
1505
 * found.
1506
 */
1507
static int
1508
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1509
13.1M
                  xmlParserNsBucket **bucketPtr) {
1510
13.1M
    xmlParserNsBucket *bucket, *tombstone;
1511
13.1M
    unsigned index, hashValue;
1512
1513
13.1M
    if (prefix->name == NULL)
1514
4.15M
        return(ctxt->nsdb->defaultNsIndex);
1515
1516
9.04M
    if (ctxt->nsdb->hashSize == 0)
1517
715k
        return(INT_MAX);
1518
1519
8.32M
    hashValue = prefix->hashValue;
1520
8.32M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1521
8.32M
    bucket = &ctxt->nsdb->hash[index];
1522
8.32M
    tombstone = NULL;
1523
1524
8.94M
    while (bucket->hashValue) {
1525
8.37M
        if (bucket->index == INT_MAX) {
1526
161k
            if (tombstone == NULL)
1527
147k
                tombstone = bucket;
1528
8.21M
        } else if (bucket->hashValue == hashValue) {
1529
7.76M
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1530
7.76M
                if (bucketPtr != NULL)
1531
1.08M
                    *bucketPtr = bucket;
1532
7.76M
                return(bucket->index);
1533
7.76M
            }
1534
7.76M
        }
1535
1536
614k
        index++;
1537
614k
        bucket++;
1538
614k
        if (index == ctxt->nsdb->hashSize) {
1539
20.4k
            index = 0;
1540
20.4k
            bucket = ctxt->nsdb->hash;
1541
20.4k
        }
1542
614k
    }
1543
1544
564k
    if (bucketPtr != NULL)
1545
172k
        *bucketPtr = tombstone ? tombstone : bucket;
1546
564k
    return(INT_MAX);
1547
8.32M
}
1548
1549
/**
1550
 * Lookup namespace URI with given prefix.
1551
 *
1552
 * @param ctxt  parser context
1553
 * @param prefix  namespace prefix
1554
 * @returns the namespace URI on success, NULL if no namespace was found.
1555
 */
1556
static const xmlChar *
1557
7.71M
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1558
7.71M
    const xmlChar *ret;
1559
7.71M
    int nsIndex;
1560
1561
7.71M
    if (prefix->name == ctxt->str_xml)
1562
17.9k
        return(ctxt->str_xml_ns);
1563
1564
    /*
1565
     * minNsIndex is used when building an entity tree. We must
1566
     * ignore namespaces declared outside the entity.
1567
     */
1568
7.69M
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1569
7.69M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1570
3.74M
        return(NULL);
1571
1572
3.94M
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1573
3.94M
    if (ret[0] == 0)
1574
19.0k
        ret = NULL;
1575
3.94M
    return(ret);
1576
7.69M
}
1577
1578
/**
1579
 * Lookup extra data for the given prefix. This returns data stored
1580
 * with xmlParserNsUdpateSax.
1581
 *
1582
 * @param ctxt  parser context
1583
 * @param prefix  namespace prefix
1584
 * @returns the data on success, NULL if no namespace was found.
1585
 */
1586
void *
1587
2.88M
xmlParserNsLookupSax(xmlParserCtxt *ctxt, const xmlChar *prefix) {
1588
2.88M
    xmlHashedString hprefix;
1589
2.88M
    int nsIndex;
1590
1591
2.88M
    if (prefix == ctxt->str_xml)
1592
123k
        return(NULL);
1593
1594
2.76M
    hprefix.name = prefix;
1595
2.76M
    if (prefix != NULL)
1596
2.55M
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1597
207k
    else
1598
207k
        hprefix.hashValue = 0;
1599
2.76M
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1600
2.76M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1601
0
        return(NULL);
1602
1603
2.76M
    return(ctxt->nsdb->extra[nsIndex].saxData);
1604
2.76M
}
1605
1606
/**
1607
 * Sets or updates extra data for the given prefix. This value will be
1608
 * returned by xmlParserNsLookupSax as long as the namespace with the
1609
 * given prefix is in scope.
1610
 *
1611
 * @param ctxt  parser context
1612
 * @param prefix  namespace prefix
1613
 * @param saxData  extra data for SAX handler
1614
 * @returns the data on success, NULL if no namespace was found.
1615
 */
1616
int
1617
xmlParserNsUpdateSax(xmlParserCtxt *ctxt, const xmlChar *prefix,
1618
743k
                     void *saxData) {
1619
743k
    xmlHashedString hprefix;
1620
743k
    int nsIndex;
1621
1622
743k
    if (prefix == ctxt->str_xml)
1623
0
        return(-1);
1624
1625
743k
    hprefix.name = prefix;
1626
743k
    if (prefix != NULL)
1627
720k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1628
23.2k
    else
1629
23.2k
        hprefix.hashValue = 0;
1630
743k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1631
743k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1632
0
        return(-1);
1633
1634
743k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1635
743k
    return(0);
1636
743k
}
1637
1638
/**
1639
 * Grows the namespace tables.
1640
 *
1641
 * @param ctxt  parser context
1642
 * @returns 0 on success, -1 if a memory allocation failed.
1643
 */
1644
static int
1645
769k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1646
769k
    const xmlChar **table;
1647
769k
    xmlParserNsExtra *extra;
1648
769k
    int newSize;
1649
1650
769k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1651
769k
                              sizeof(table[0]) + sizeof(extra[0]),
1652
769k
                              16, XML_MAX_ITEMS);
1653
769k
    if (newSize < 0)
1654
0
        goto error;
1655
1656
769k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1657
769k
    if (table == NULL)
1658
75
        goto error;
1659
769k
    ctxt->nsTab = table;
1660
1661
769k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1662
769k
    if (extra == NULL)
1663
29
        goto error;
1664
769k
    ctxt->nsdb->extra = extra;
1665
1666
769k
    ctxt->nsMax = newSize;
1667
769k
    return(0);
1668
1669
104
error:
1670
104
    xmlErrMemory(ctxt);
1671
104
    return(-1);
1672
769k
}
1673
1674
/**
1675
 * Push a new namespace on the table.
1676
 *
1677
 * @param ctxt  parser context
1678
 * @param prefix  prefix with hash value
1679
 * @param uri  uri with hash value
1680
 * @param saxData  extra data for SAX handler
1681
 * @param defAttr  whether the namespace comes from a default attribute
1682
 * @returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1683
 * -1 if a memory allocation failed.
1684
 */
1685
static int
1686
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1687
1.03M
                const xmlHashedString *uri, void *saxData, int defAttr) {
1688
1.03M
    xmlParserNsBucket *bucket = NULL;
1689
1.03M
    xmlParserNsExtra *extra;
1690
1.03M
    const xmlChar **ns;
1691
1.03M
    unsigned hashValue, nsIndex, oldIndex;
1692
1693
1.03M
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1694
302
        return(0);
1695
1696
1.02M
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1697
104
        xmlErrMemory(ctxt);
1698
104
        return(-1);
1699
104
    }
1700
1701
    /*
1702
     * Default namespace and 'xml' namespace
1703
     */
1704
1.02M
    if ((prefix == NULL) || (prefix->name == NULL)) {
1705
80.9k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1706
1707
80.9k
        if (oldIndex != INT_MAX) {
1708
49.8k
            extra = &ctxt->nsdb->extra[oldIndex];
1709
1710
49.8k
            if (extra->elementId == ctxt->nsdb->elementId) {
1711
2.09k
                if (defAttr == 0)
1712
768
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1713
2.09k
                return(0);
1714
2.09k
            }
1715
1716
47.7k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1717
47.7k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1718
0
                return(0);
1719
47.7k
        }
1720
1721
78.8k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1722
78.8k
        goto populate_entry;
1723
80.9k
    }
1724
1725
    /*
1726
     * Hash table lookup
1727
     */
1728
948k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1729
948k
    if (oldIndex != INT_MAX) {
1730
164k
        extra = &ctxt->nsdb->extra[oldIndex];
1731
1732
        /*
1733
         * Check for duplicate definitions on the same element.
1734
         */
1735
164k
        if (extra->elementId == ctxt->nsdb->elementId) {
1736
5.68k
            if (defAttr == 0)
1737
5.48k
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1738
5.68k
            return(0);
1739
5.68k
        }
1740
1741
158k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1742
158k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1743
0
            return(0);
1744
1745
158k
        bucket->index = ctxt->nsNr;
1746
158k
        goto populate_entry;
1747
158k
    }
1748
1749
    /*
1750
     * Insert new bucket
1751
     */
1752
1753
784k
    hashValue = prefix->hashValue;
1754
1755
    /*
1756
     * Grow hash table, 50% fill factor
1757
     */
1758
784k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1759
615k
        xmlParserNsBucket *newHash;
1760
615k
        unsigned newSize, i, index;
1761
1762
615k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1763
0
            xmlErrMemory(ctxt);
1764
0
            return(-1);
1765
0
        }
1766
615k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1767
615k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1768
615k
        if (newHash == NULL) {
1769
46
            xmlErrMemory(ctxt);
1770
46
            return(-1);
1771
46
        }
1772
615k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1773
1774
798k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1775
183k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1776
183k
            unsigned newIndex;
1777
1778
183k
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1779
172k
                continue;
1780
10.7k
            newIndex = hv & (newSize - 1);
1781
1782
15.4k
            while (newHash[newIndex].hashValue != 0) {
1783
4.69k
                newIndex++;
1784
4.69k
                if (newIndex == newSize)
1785
1.32k
                    newIndex = 0;
1786
4.69k
            }
1787
1788
10.7k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1789
10.7k
        }
1790
1791
615k
        xmlFree(ctxt->nsdb->hash);
1792
615k
        ctxt->nsdb->hash = newHash;
1793
615k
        ctxt->nsdb->hashSize = newSize;
1794
1795
        /*
1796
         * Relookup
1797
         */
1798
615k
        index = hashValue & (newSize - 1);
1799
1800
618k
        while (newHash[index].hashValue != 0) {
1801
3.16k
            index++;
1802
3.16k
            if (index == newSize)
1803
490
                index = 0;
1804
3.16k
        }
1805
1806
615k
        bucket = &newHash[index];
1807
615k
    }
1808
1809
784k
    bucket->hashValue = hashValue;
1810
784k
    bucket->index = ctxt->nsNr;
1811
784k
    ctxt->nsdb->hashElems++;
1812
784k
    oldIndex = INT_MAX;
1813
1814
1.02M
populate_entry:
1815
1.02M
    nsIndex = ctxt->nsNr;
1816
1817
1.02M
    ns = &ctxt->nsTab[nsIndex * 2];
1818
1.02M
    ns[0] = prefix ? prefix->name : NULL;
1819
1.02M
    ns[1] = uri->name;
1820
1821
1.02M
    extra = &ctxt->nsdb->extra[nsIndex];
1822
1.02M
    extra->saxData = saxData;
1823
1.02M
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1824
1.02M
    extra->uriHashValue = uri->hashValue;
1825
1.02M
    extra->elementId = ctxt->nsdb->elementId;
1826
1.02M
    extra->oldIndex = oldIndex;
1827
1828
1.02M
    ctxt->nsNr++;
1829
1830
1.02M
    return(1);
1831
784k
}
1832
1833
/**
1834
 * Pops the top `nr` namespaces and restores the hash table.
1835
 *
1836
 * @param ctxt  an XML parser context
1837
 * @param nr  the number to pop
1838
 * @returns the number of namespaces popped.
1839
 */
1840
static int
1841
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1842
739k
{
1843
739k
    int i;
1844
1845
    /* assert(nr <= ctxt->nsNr); */
1846
1847
1.73M
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1848
997k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1849
997k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1850
1851
997k
        if (prefix == NULL) {
1852
73.3k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1853
924k
        } else {
1854
924k
            xmlHashedString hprefix;
1855
924k
            xmlParserNsBucket *bucket = NULL;
1856
1857
924k
            hprefix.name = prefix;
1858
924k
            hprefix.hashValue = extra->prefixHashValue;
1859
924k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1860
            /* assert(bucket && bucket->hashValue); */
1861
924k
            bucket->index = extra->oldIndex;
1862
924k
        }
1863
997k
    }
1864
1865
739k
    ctxt->nsNr -= nr;
1866
739k
    return(nr);
1867
739k
}
1868
1869
static int
1870
887k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1871
887k
    const xmlChar **atts;
1872
887k
    unsigned *attallocs;
1873
887k
    int newSize;
1874
1875
887k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1876
887k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1877
887k
                              10, XML_MAX_ATTRS);
1878
887k
    if (newSize < 0) {
1879
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1880
0
                    "Maximum number of attributes exceeded");
1881
0
        return(-1);
1882
0
    }
1883
1884
887k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1885
887k
    if (atts == NULL)
1886
60
        goto mem_error;
1887
887k
    ctxt->atts = atts;
1888
1889
887k
    attallocs = xmlRealloc(ctxt->attallocs,
1890
887k
                           newSize * sizeof(attallocs[0]));
1891
887k
    if (attallocs == NULL)
1892
45
        goto mem_error;
1893
887k
    ctxt->attallocs = attallocs;
1894
1895
887k
    ctxt->maxatts = newSize * 5;
1896
1897
887k
    return(0);
1898
1899
105
mem_error:
1900
105
    xmlErrMemory(ctxt);
1901
105
    return(-1);
1902
887k
}
1903
1904
/**
1905
 * Pushes a new parser input on top of the input stack
1906
 *
1907
 * @param ctxt  an XML parser context
1908
 * @param value  the parser input
1909
 * @returns -1 in case of error, the index in the stack otherwise
1910
 */
1911
int
1912
xmlCtxtPushInput(xmlParserCtxt *ctxt, xmlParserInput *value)
1913
907k
{
1914
907k
    char *directory = NULL;
1915
907k
    int maxDepth;
1916
1917
907k
    if ((ctxt == NULL) || (value == NULL))
1918
9.01k
        return(-1);
1919
1920
898k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1921
1922
898k
    if (ctxt->inputNr >= ctxt->inputMax) {
1923
33.0k
        xmlParserInputPtr *tmp;
1924
33.0k
        int newSize;
1925
1926
33.0k
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1927
33.0k
                                  5, maxDepth);
1928
33.0k
        if (newSize < 0) {
1929
0
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1930
0
                           "Maximum entity nesting depth exceeded");
1931
0
            xmlHaltParser(ctxt);
1932
0
            return(-1);
1933
0
        }
1934
33.0k
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1935
33.0k
        if (tmp == NULL) {
1936
5
            xmlErrMemory(ctxt);
1937
5
            return(-1);
1938
5
        }
1939
33.0k
        ctxt->inputTab = tmp;
1940
33.0k
        ctxt->inputMax = newSize;
1941
33.0k
    }
1942
1943
898k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1944
66.4k
        directory = xmlParserGetDirectory(value->filename);
1945
66.4k
        if (directory == NULL) {
1946
0
            xmlErrMemory(ctxt);
1947
0
            return(-1);
1948
0
        }
1949
66.4k
    }
1950
1951
898k
    if (ctxt->input_id >= INT_MAX) {
1952
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1953
0
        return(-1);
1954
0
    }
1955
1956
898k
    ctxt->inputTab[ctxt->inputNr] = value;
1957
898k
    ctxt->input = value;
1958
1959
898k
    if (ctxt->inputNr == 0) {
1960
857k
        xmlFree(ctxt->directory);
1961
857k
        ctxt->directory = directory;
1962
857k
    }
1963
1964
    /*
1965
     * The input ID is unused internally, but there are entity
1966
     * loaders in downstream code that detect the main document
1967
     * by checking for "input_id == 1".
1968
     */
1969
898k
    value->id = ctxt->input_id++;
1970
1971
898k
    return(ctxt->inputNr++);
1972
898k
}
1973
1974
/**
1975
 * Pops the top parser input from the input stack
1976
 *
1977
 * @param ctxt  an XML parser context
1978
 * @returns the input just removed
1979
 */
1980
xmlParserInput *
1981
xmlCtxtPopInput(xmlParserCtxt *ctxt)
1982
2.89M
{
1983
2.89M
    xmlParserInputPtr ret;
1984
1985
2.89M
    if (ctxt == NULL)
1986
0
        return(NULL);
1987
2.89M
    if (ctxt->inputNr <= 0)
1988
2.01M
        return (NULL);
1989
883k
    ctxt->inputNr--;
1990
883k
    if (ctxt->inputNr > 0)
1991
40.2k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1992
843k
    else
1993
843k
        ctxt->input = NULL;
1994
883k
    ret = ctxt->inputTab[ctxt->inputNr];
1995
883k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1996
883k
    return (ret);
1997
2.89M
}
1998
1999
/**
2000
 * Pushes a new element node on top of the node stack
2001
 *
2002
 * @deprecated Internal function, do not use.
2003
 *
2004
 * @param ctxt  an XML parser context
2005
 * @param value  the element node
2006
 * @returns -1 in case of error, the index in the stack otherwise
2007
 */
2008
int
2009
nodePush(xmlParserCtxt *ctxt, xmlNode *value)
2010
6.56M
{
2011
6.56M
    if (ctxt == NULL)
2012
0
        return(0);
2013
2014
6.56M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2015
837k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2016
837k
        xmlNodePtr *tmp;
2017
837k
        int newSize;
2018
2019
837k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
2020
837k
                                  10, maxDepth);
2021
837k
        if (newSize < 0) {
2022
324
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2023
324
                    "Excessive depth in document: %d,"
2024
324
                    " use XML_PARSE_HUGE option\n",
2025
324
                    ctxt->nodeNr);
2026
324
            xmlHaltParser(ctxt);
2027
324
            return(-1);
2028
324
        }
2029
2030
837k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2031
837k
        if (tmp == NULL) {
2032
48
            xmlErrMemory(ctxt);
2033
48
            return (-1);
2034
48
        }
2035
837k
        ctxt->nodeTab = tmp;
2036
837k
  ctxt->nodeMax = newSize;
2037
837k
    }
2038
2039
6.56M
    ctxt->nodeTab[ctxt->nodeNr] = value;
2040
6.56M
    ctxt->node = value;
2041
6.56M
    return (ctxt->nodeNr++);
2042
6.56M
}
2043
2044
/**
2045
 * Pops the top element node from the node stack
2046
 *
2047
 * @deprecated Internal function, do not use.
2048
 *
2049
 * @param ctxt  an XML parser context
2050
 * @returns the node just removed
2051
 */
2052
xmlNode *
2053
nodePop(xmlParserCtxt *ctxt)
2054
6.89M
{
2055
6.89M
    xmlNodePtr ret;
2056
2057
6.89M
    if (ctxt == NULL) return(NULL);
2058
6.89M
    if (ctxt->nodeNr <= 0)
2059
349k
        return (NULL);
2060
6.54M
    ctxt->nodeNr--;
2061
6.54M
    if (ctxt->nodeNr > 0)
2062
5.91M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2063
626k
    else
2064
626k
        ctxt->node = NULL;
2065
6.54M
    ret = ctxt->nodeTab[ctxt->nodeNr];
2066
6.54M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2067
6.54M
    return (ret);
2068
6.89M
}
2069
2070
/**
2071
 * Pushes a new element name/prefix/URL on top of the name stack
2072
 *
2073
 * @param ctxt  an XML parser context
2074
 * @param value  the element name
2075
 * @param prefix  the element prefix
2076
 * @param URI  the element namespace name
2077
 * @param line  the current line number for error messages
2078
 * @param nsNr  the number of namespaces pushed on the namespace table
2079
 * @returns -1 in case of error, the index in the stack otherwise
2080
 */
2081
static int
2082
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2083
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2084
7.72M
{
2085
7.72M
    xmlStartTag *tag;
2086
2087
7.72M
    if (ctxt->nameNr >= ctxt->nameMax) {
2088
930k
        const xmlChar **tmp;
2089
930k
        xmlStartTag *tmp2;
2090
930k
        int newSize;
2091
2092
930k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2093
930k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2094
930k
                                  10, XML_MAX_ITEMS);
2095
930k
        if (newSize < 0)
2096
0
            goto mem_error;
2097
2098
930k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2099
930k
        if (tmp == NULL)
2100
316
      goto mem_error;
2101
930k
  ctxt->nameTab = tmp;
2102
2103
930k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2104
930k
        if (tmp2 == NULL)
2105
32
      goto mem_error;
2106
930k
  ctxt->pushTab = tmp2;
2107
2108
930k
        ctxt->nameMax = newSize;
2109
6.79M
    } else if (ctxt->pushTab == NULL) {
2110
685k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2111
685k
        if (ctxt->pushTab == NULL)
2112
754
            goto mem_error;
2113
685k
    }
2114
7.72M
    ctxt->nameTab[ctxt->nameNr] = value;
2115
7.72M
    ctxt->name = value;
2116
7.72M
    tag = &ctxt->pushTab[ctxt->nameNr];
2117
7.72M
    tag->prefix = prefix;
2118
7.72M
    tag->URI = URI;
2119
7.72M
    tag->line = line;
2120
7.72M
    tag->nsNr = nsNr;
2121
7.72M
    return (ctxt->nameNr++);
2122
1.10k
mem_error:
2123
1.10k
    xmlErrMemory(ctxt);
2124
1.10k
    return (-1);
2125
7.72M
}
2126
#ifdef LIBXML_PUSH_ENABLED
2127
/**
2128
 * Pops the top element/prefix/URI name from the name stack
2129
 *
2130
 * @param ctxt  an XML parser context
2131
 * @returns the name just removed
2132
 */
2133
static const xmlChar *
2134
nameNsPop(xmlParserCtxtPtr ctxt)
2135
{
2136
    const xmlChar *ret;
2137
2138
    if (ctxt->nameNr <= 0)
2139
        return (NULL);
2140
    ctxt->nameNr--;
2141
    if (ctxt->nameNr > 0)
2142
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2143
    else
2144
        ctxt->name = NULL;
2145
    ret = ctxt->nameTab[ctxt->nameNr];
2146
    ctxt->nameTab[ctxt->nameNr] = NULL;
2147
    return (ret);
2148
}
2149
#endif /* LIBXML_PUSH_ENABLED */
2150
2151
/**
2152
 * Pops the top element name from the name stack
2153
 *
2154
 * @deprecated Internal function, do not use.
2155
 *
2156
 * @param ctxt  an XML parser context
2157
 * @returns the name just removed
2158
 */
2159
static const xmlChar *
2160
namePop(xmlParserCtxtPtr ctxt)
2161
7.67M
{
2162
7.67M
    const xmlChar *ret;
2163
2164
7.67M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2165
268
        return (NULL);
2166
7.67M
    ctxt->nameNr--;
2167
7.67M
    if (ctxt->nameNr > 0)
2168
7.03M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2169
640k
    else
2170
640k
        ctxt->name = NULL;
2171
7.67M
    ret = ctxt->nameTab[ctxt->nameNr];
2172
7.67M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2173
7.67M
    return (ret);
2174
7.67M
}
2175
2176
8.29M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2177
8.29M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2178
1.58M
        int *tmp;
2179
1.58M
        int newSize;
2180
2181
1.58M
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2182
1.58M
                                  10, XML_MAX_ITEMS);
2183
1.58M
        if (newSize < 0) {
2184
0
      xmlErrMemory(ctxt);
2185
0
      return(-1);
2186
0
        }
2187
2188
1.58M
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2189
1.58M
        if (tmp == NULL) {
2190
242
      xmlErrMemory(ctxt);
2191
242
      return(-1);
2192
242
  }
2193
1.57M
  ctxt->spaceTab = tmp;
2194
2195
1.57M
        ctxt->spaceMax = newSize;
2196
1.57M
    }
2197
8.29M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2198
8.29M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2199
8.29M
    return(ctxt->spaceNr++);
2200
8.29M
}
2201
2202
8.25M
static int spacePop(xmlParserCtxtPtr ctxt) {
2203
8.25M
    int ret;
2204
8.25M
    if (ctxt->spaceNr <= 0) return(0);
2205
8.25M
    ctxt->spaceNr--;
2206
8.25M
    if (ctxt->spaceNr > 0)
2207
8.25M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2208
203
    else
2209
203
        ctxt->space = &ctxt->spaceTab[0];
2210
8.25M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2211
8.25M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2212
8.25M
    return(ret);
2213
8.25M
}
2214
2215
/*
2216
 * Macros for accessing the content. Those should be used only by the parser,
2217
 * and not exported.
2218
 *
2219
 * Dirty macros, i.e. one often need to make assumption on the context to
2220
 * use them
2221
 *
2222
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2223
 *           To be used with extreme caution since operations consuming
2224
 *           characters may move the input buffer to a different location !
2225
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2226
 *           This should be used internally by the parser
2227
 *           only to compare to ASCII values otherwise it would break when
2228
 *           running with UTF-8 encoding.
2229
 *   RAW     same as CUR but in the input buffer, bypass any token
2230
 *           extraction that may have been done
2231
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2232
 *           to compare on ASCII based substring.
2233
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2234
 *           strings without newlines within the parser.
2235
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2236
 *           defined char within the parser.
2237
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2238
 *
2239
 *   NEXT    Skip to the next character, this does the proper decoding
2240
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2241
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2242
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2243
 *            the index
2244
 *   GROW, SHRINK  handling of input buffers
2245
 */
2246
2247
70.7M
#define RAW (*ctxt->input->cur)
2248
817M
#define CUR (*ctxt->input->cur)
2249
33.7M
#define NXT(val) ctxt->input->cur[(val)]
2250
1.78G
#define CUR_PTR ctxt->input->cur
2251
28.4M
#define BASE_PTR ctxt->input->base
2252
2253
#define CMP4( s, c1, c2, c3, c4 ) \
2254
56.9M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2255
29.3M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2256
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2257
54.1M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2258
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2259
52.1M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2260
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2261
50.9M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2262
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2263
50.1M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2264
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2265
24.9M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2266
24.9M
    ((unsigned char *) s)[ 8 ] == c9 )
2267
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2268
82.0k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2269
82.0k
    ((unsigned char *) s)[ 9 ] == c10 )
2270
2271
7.33M
#define SKIP(val) do {             \
2272
7.33M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2273
7.33M
    if (*ctxt->input->cur == 0)           \
2274
7.33M
        xmlParserGrow(ctxt);           \
2275
7.33M
  } while (0)
2276
2277
#define SKIPL(val) do {             \
2278
    int skipl;                \
2279
    for(skipl=0; skipl<val; skipl++) {          \
2280
  if (*(ctxt->input->cur) == '\n') {        \
2281
  ctxt->input->line++; ctxt->input->col = 1;      \
2282
  } else ctxt->input->col++;          \
2283
  ctxt->input->cur++;           \
2284
    }                 \
2285
    if (*ctxt->input->cur == 0)           \
2286
        xmlParserGrow(ctxt);            \
2287
  } while (0)
2288
2289
#define SHRINK \
2290
34.6M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2291
34.6M
  xmlParserShrink(ctxt);
2292
2293
#define GROW \
2294
108M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2295
108M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2296
35.9M
  xmlParserGrow(ctxt);
2297
2298
27.0M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2299
2300
1.35M
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2301
2302
40.4M
#define NEXT xmlNextChar(ctxt)
2303
2304
12.0M
#define NEXT1 {               \
2305
12.0M
  ctxt->input->col++;           \
2306
12.0M
  ctxt->input->cur++;           \
2307
12.0M
  if (*ctxt->input->cur == 0)         \
2308
12.0M
      xmlParserGrow(ctxt);           \
2309
12.0M
    }
2310
2311
1.16G
#define NEXTL(l) do {             \
2312
1.16G
    if (*(ctxt->input->cur) == '\n') {         \
2313
94.0M
  ctxt->input->line++; ctxt->input->col = 1;      \
2314
1.07G
    } else ctxt->input->col++;           \
2315
1.16G
    ctxt->input->cur += l;        \
2316
1.16G
  } while (0)
2317
2318
#define COPY_BUF(b, i, v)           \
2319
366M
    if (v < 0x80) b[i++] = v;           \
2320
366M
    else i += xmlCopyCharMultiByte(&b[i],v)
2321
2322
static int
2323
360M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2324
360M
    int c = xmlCurrentChar(ctxt, len);
2325
2326
360M
    if (c == XML_INVALID_CHAR)
2327
75.9M
        c = 0xFFFD; /* replacement character */
2328
2329
360M
    return(c);
2330
360M
}
2331
2332
/**
2333
 * Skip whitespace in the input stream.
2334
 *
2335
 * @deprecated Internal function, do not use.
2336
 *
2337
 * @param ctxt  the XML parser context
2338
 * @returns the number of space chars skipped
2339
 */
2340
int
2341
27.8M
xmlSkipBlankChars(xmlParserCtxt *ctxt) {
2342
27.8M
    const xmlChar *cur;
2343
27.8M
    int res = 0;
2344
2345
27.8M
    cur = ctxt->input->cur;
2346
27.8M
    while (IS_BLANK_CH(*cur)) {
2347
15.0M
        if (*cur == '\n') {
2348
2.89M
            ctxt->input->line++; ctxt->input->col = 1;
2349
12.1M
        } else {
2350
12.1M
            ctxt->input->col++;
2351
12.1M
        }
2352
15.0M
        cur++;
2353
15.0M
        if (res < INT_MAX)
2354
15.0M
            res++;
2355
15.0M
        if (*cur == 0) {
2356
580k
            ctxt->input->cur = cur;
2357
580k
            xmlParserGrow(ctxt);
2358
580k
            cur = ctxt->input->cur;
2359
580k
        }
2360
15.0M
    }
2361
27.8M
    ctxt->input->cur = cur;
2362
2363
27.8M
    if (res > 4)
2364
454k
        GROW;
2365
2366
27.8M
    return(res);
2367
27.8M
}
2368
2369
static void
2370
25.6k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2371
25.6k
    unsigned long consumed;
2372
25.6k
    xmlEntityPtr ent;
2373
2374
25.6k
    ent = ctxt->input->entity;
2375
2376
25.6k
    ent->flags &= ~XML_ENT_EXPANDING;
2377
2378
25.6k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2379
18.7k
        int result;
2380
2381
        /*
2382
         * Read the rest of the stream in case of errors. We want
2383
         * to account for the whole entity size.
2384
         */
2385
23.5k
        do {
2386
23.5k
            ctxt->input->cur = ctxt->input->end;
2387
23.5k
            xmlParserShrink(ctxt);
2388
23.5k
            result = xmlParserGrow(ctxt);
2389
23.5k
        } while (result > 0);
2390
2391
18.7k
        consumed = ctxt->input->consumed;
2392
18.7k
        xmlSaturatedAddSizeT(&consumed,
2393
18.7k
                             ctxt->input->end - ctxt->input->base);
2394
2395
18.7k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2396
2397
        /*
2398
         * Add to sizeentities when parsing an external entity
2399
         * for the first time.
2400
         */
2401
18.7k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2402
15.5k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2403
15.5k
        }
2404
2405
18.7k
        ent->flags |= XML_ENT_CHECKED;
2406
18.7k
    }
2407
2408
25.6k
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2409
2410
25.6k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2411
2412
25.6k
    GROW;
2413
25.6k
}
2414
2415
/**
2416
 * Skip whitespace in the input stream, also handling parameter
2417
 * entities.
2418
 *
2419
 * @param ctxt  the XML parser context
2420
 * @returns the number of space chars skipped
2421
 */
2422
static int
2423
1.35M
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2424
1.35M
    int res = 0;
2425
1.35M
    int inParam;
2426
1.35M
    int expandParam;
2427
2428
1.35M
    inParam = PARSER_IN_PE(ctxt);
2429
1.35M
    expandParam = PARSER_EXTERNAL(ctxt);
2430
2431
1.35M
    if (!inParam && !expandParam)
2432
781k
        return(xmlSkipBlankChars(ctxt));
2433
2434
    /*
2435
     * It's Okay to use CUR/NEXT here since all the blanks are on
2436
     * the ASCII range.
2437
     */
2438
1.88M
    while (PARSER_STOPPED(ctxt) == 0) {
2439
1.87M
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2440
1.28M
            NEXT;
2441
1.28M
        } else if (CUR == '%') {
2442
86.1k
            if ((expandParam == 0) ||
2443
86.1k
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2444
60.9k
                break;
2445
2446
            /*
2447
             * Expand parameter entity. We continue to consume
2448
             * whitespace at the start of the entity and possible
2449
             * even consume the whole entity and pop it. We might
2450
             * even pop multiple PEs in this loop.
2451
             */
2452
25.1k
            xmlParsePERefInternal(ctxt, 0);
2453
2454
25.1k
            inParam = PARSER_IN_PE(ctxt);
2455
25.1k
            expandParam = PARSER_EXTERNAL(ctxt);
2456
510k
        } else if (CUR == 0) {
2457
10.2k
            if (inParam == 0)
2458
1.04k
                break;
2459
2460
            /*
2461
             * Don't pop parameter entities that start a markup
2462
             * declaration to detect Well-formedness constraint:
2463
             * PE Between Declarations.
2464
             */
2465
9.23k
            if (ctxt->input->flags & XML_INPUT_MARKUP_DECL)
2466
3.59k
                break;
2467
2468
5.64k
            xmlPopPE(ctxt);
2469
2470
5.64k
            inParam = PARSER_IN_PE(ctxt);
2471
5.64k
            expandParam = PARSER_EXTERNAL(ctxt);
2472
500k
        } else {
2473
500k
            break;
2474
500k
        }
2475
2476
        /*
2477
         * Also increase the counter when entering or exiting a PERef.
2478
         * The spec says: "When a parameter-entity reference is recognized
2479
         * in the DTD and included, its replacement text MUST be enlarged
2480
         * by the attachment of one leading and one following space (#x20)
2481
         * character."
2482
         */
2483
1.31M
        if (res < INT_MAX)
2484
1.31M
            res++;
2485
1.31M
    }
2486
2487
568k
    return(res);
2488
1.35M
}
2489
2490
/************************************************************************
2491
 *                  *
2492
 *    Commodity functions to handle entities      *
2493
 *                  *
2494
 ************************************************************************/
2495
2496
/**
2497
 * @deprecated Internal function, don't use.
2498
 *
2499
 * @param ctxt  an XML parser context
2500
 * @returns the current xmlChar in the parser context
2501
 */
2502
xmlChar
2503
0
xmlPopInput(xmlParserCtxt *ctxt) {
2504
0
    xmlParserInputPtr input;
2505
2506
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2507
0
    input = xmlCtxtPopInput(ctxt);
2508
0
    xmlFreeInputStream(input);
2509
0
    if (*ctxt->input->cur == 0)
2510
0
        xmlParserGrow(ctxt);
2511
0
    return(CUR);
2512
0
}
2513
2514
/**
2515
 * Push an input stream onto the stack.
2516
 *
2517
 * @deprecated Internal function, don't use.
2518
 *
2519
 * @param ctxt  an XML parser context
2520
 * @param input  an XML parser input fragment (entity, XML fragment ...).
2521
 * @returns -1 in case of error or the index in the input stack
2522
 */
2523
int
2524
0
xmlPushInput(xmlParserCtxt *ctxt, xmlParserInput *input) {
2525
0
    int ret;
2526
2527
0
    if ((ctxt == NULL) || (input == NULL))
2528
0
        return(-1);
2529
2530
0
    ret = xmlCtxtPushInput(ctxt, input);
2531
0
    if (ret >= 0)
2532
0
        GROW;
2533
0
    return(ret);
2534
0
}
2535
2536
/**
2537
 * Parse a numeric character reference. Always consumes '&'.
2538
 *
2539
 * @deprecated Internal function, don't use.
2540
 *
2541
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2542
 *                      '&#x' [0-9a-fA-F]+ ';'
2543
 *
2544
 * [ WFC: Legal Character ]
2545
 * Characters referred to using character references must match the
2546
 * production for Char.
2547
 *
2548
 * @param ctxt  an XML parser context
2549
 * @returns the value parsed (as an int), 0 in case of error
2550
 */
2551
int
2552
178k
xmlParseCharRef(xmlParserCtxt *ctxt) {
2553
178k
    int val = 0;
2554
178k
    int count = 0;
2555
2556
    /*
2557
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2558
     */
2559
178k
    if ((RAW == '&') && (NXT(1) == '#') &&
2560
178k
        (NXT(2) == 'x')) {
2561
28.0k
  SKIP(3);
2562
28.0k
  GROW;
2563
101k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2564
83.1k
      if (count++ > 20) {
2565
848
    count = 0;
2566
848
    GROW;
2567
848
      }
2568
83.1k
      if ((RAW >= '0') && (RAW <= '9'))
2569
54.5k
          val = val * 16 + (CUR - '0');
2570
28.6k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2571
7.99k
          val = val * 16 + (CUR - 'a') + 10;
2572
20.6k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2573
11.3k
          val = val * 16 + (CUR - 'A') + 10;
2574
9.37k
      else {
2575
9.37k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2576
9.37k
    val = 0;
2577
9.37k
    break;
2578
9.37k
      }
2579
73.8k
      if (val > 0x110000)
2580
10.0k
          val = 0x110000;
2581
2582
73.8k
      NEXT;
2583
73.8k
      count++;
2584
73.8k
  }
2585
28.0k
  if (RAW == ';') {
2586
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2587
18.6k
      ctxt->input->col++;
2588
18.6k
      ctxt->input->cur++;
2589
18.6k
  }
2590
150k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2591
150k
  SKIP(2);
2592
150k
  GROW;
2593
419k
  while (RAW != ';') { /* loop blocked by count */
2594
278k
      if (count++ > 20) {
2595
1.19k
    count = 0;
2596
1.19k
    GROW;
2597
1.19k
      }
2598
278k
      if ((RAW >= '0') && (RAW <= '9'))
2599
268k
          val = val * 10 + (CUR - '0');
2600
9.79k
      else {
2601
9.79k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2602
9.79k
    val = 0;
2603
9.79k
    break;
2604
9.79k
      }
2605
268k
      if (val > 0x110000)
2606
14.7k
          val = 0x110000;
2607
2608
268k
      NEXT;
2609
268k
      count++;
2610
268k
  }
2611
150k
  if (RAW == ';') {
2612
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2613
140k
      ctxt->input->col++;
2614
140k
      ctxt->input->cur++;
2615
140k
  }
2616
150k
    } else {
2617
0
        if (RAW == '&')
2618
0
            SKIP(1);
2619
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2620
0
    }
2621
2622
    /*
2623
     * [ WFC: Legal Character ]
2624
     * Characters referred to using character references must match the
2625
     * production for Char.
2626
     */
2627
178k
    if (val >= 0x110000) {
2628
304
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2629
304
                "xmlParseCharRef: character reference out of bounds\n",
2630
304
          val);
2631
304
        val = 0xFFFD;
2632
177k
    } else if (!IS_CHAR(val)) {
2633
20.1k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2634
20.1k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2635
20.1k
                    val);
2636
20.1k
    }
2637
178k
    return(val);
2638
178k
}
2639
2640
/**
2641
 * Parse Reference declarations, variant parsing from a string rather
2642
 * than an an input flow.
2643
 *
2644
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2645
 *                      '&#x' [0-9a-fA-F]+ ';'
2646
 *
2647
 * [ WFC: Legal Character ]
2648
 * Characters referred to using character references must match the
2649
 * production for Char.
2650
 *
2651
 * @param ctxt  an XML parser context
2652
 * @param str  a pointer to an index in the string
2653
 * @returns the value parsed (as an int), 0 in case of error, str will be
2654
 *         updated to the current value of the index
2655
 */
2656
static int
2657
189k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2658
189k
    const xmlChar *ptr;
2659
189k
    xmlChar cur;
2660
189k
    int val = 0;
2661
2662
189k
    if ((str == NULL) || (*str == NULL)) return(0);
2663
189k
    ptr = *str;
2664
189k
    cur = *ptr;
2665
189k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2666
150k
  ptr += 3;
2667
150k
  cur = *ptr;
2668
731k
  while (cur != ';') { /* Non input consuming loop */
2669
581k
      if ((cur >= '0') && (cur <= '9'))
2670
449k
          val = val * 16 + (cur - '0');
2671
131k
      else if ((cur >= 'a') && (cur <= 'f'))
2672
3.73k
          val = val * 16 + (cur - 'a') + 10;
2673
128k
      else if ((cur >= 'A') && (cur <= 'F'))
2674
126k
          val = val * 16 + (cur - 'A') + 10;
2675
1.18k
      else {
2676
1.18k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2677
1.18k
    val = 0;
2678
1.18k
    break;
2679
1.18k
      }
2680
580k
      if (val > 0x110000)
2681
12.3k
          val = 0x110000;
2682
2683
580k
      ptr++;
2684
580k
      cur = *ptr;
2685
580k
  }
2686
150k
  if (cur == ';')
2687
149k
      ptr++;
2688
150k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2689
38.1k
  ptr += 2;
2690
38.1k
  cur = *ptr;
2691
134k
  while (cur != ';') { /* Non input consuming loops */
2692
99.7k
      if ((cur >= '0') && (cur <= '9'))
2693
96.6k
          val = val * 10 + (cur - '0');
2694
3.11k
      else {
2695
3.11k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2696
3.11k
    val = 0;
2697
3.11k
    break;
2698
3.11k
      }
2699
96.6k
      if (val > 0x110000)
2700
2.83k
          val = 0x110000;
2701
2702
96.6k
      ptr++;
2703
96.6k
      cur = *ptr;
2704
96.6k
  }
2705
38.1k
  if (cur == ';')
2706
35.0k
      ptr++;
2707
38.1k
    } else {
2708
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2709
0
  return(0);
2710
0
    }
2711
189k
    *str = ptr;
2712
2713
    /*
2714
     * [ WFC: Legal Character ]
2715
     * Characters referred to using character references must match the
2716
     * production for Char.
2717
     */
2718
189k
    if (val >= 0x110000) {
2719
264
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2720
264
                "xmlParseStringCharRef: character reference out of bounds\n",
2721
264
                val);
2722
188k
    } else if (IS_CHAR(val)) {
2723
183k
        return(val);
2724
183k
    } else {
2725
4.99k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2726
4.99k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2727
4.99k
        val);
2728
4.99k
    }
2729
5.26k
    return(0);
2730
189k
}
2731
2732
/**
2733
 *     [69] PEReference ::= '%' Name ';'
2734
 *
2735
 * @deprecated Internal function, do not use.
2736
 *
2737
 * [ WFC: No Recursion ]
2738
 * A parsed entity must not contain a recursive
2739
 * reference to itself, either directly or indirectly.
2740
 *
2741
 * [ WFC: Entity Declared ]
2742
 * In a document without any DTD, a document with only an internal DTD
2743
 * subset which contains no parameter entity references, or a document
2744
 * with "standalone='yes'", ...  ... The declaration of a parameter
2745
 * entity must precede any reference to it...
2746
 *
2747
 * [ VC: Entity Declared ]
2748
 * In a document with an external subset or external parameter entities
2749
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2750
 * must precede any reference to it...
2751
 *
2752
 * [ WFC: In DTD ]
2753
 * Parameter-entity references may only appear in the DTD.
2754
 * NOTE: misleading but this is handled.
2755
 *
2756
 * A PEReference may have been detected in the current input stream
2757
 * the handling is done accordingly to
2758
 *      http://www.w3.org/TR/REC-xml#entproc
2759
 * i.e.
2760
 *   - Included in literal in entity values
2761
 *   - Included as Parameter Entity reference within DTDs
2762
 * @param ctxt  the parser context
2763
 */
2764
void
2765
0
xmlParserHandlePEReference(xmlParserCtxt *ctxt) {
2766
0
    xmlParsePERefInternal(ctxt, 0);
2767
0
}
2768
2769
/**
2770
 * @deprecated Internal function, don't use.
2771
 *
2772
 * @param ctxt  the parser context
2773
 * @param str  the input string
2774
 * @param len  the string length
2775
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2776
 * @param end  an end marker xmlChar, 0 if none
2777
 * @param end2  an end marker xmlChar, 0 if none
2778
 * @param end3  an end marker xmlChar, 0 if none
2779
 * @returns A newly allocated string with the substitution done. The caller
2780
 *      must deallocate it !
2781
 */
2782
xmlChar *
2783
xmlStringLenDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str, int len,
2784
                           int what ATTRIBUTE_UNUSED,
2785
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2786
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2787
0
        return(NULL);
2788
2789
0
    if ((str[len] != 0) ||
2790
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2791
0
        return(NULL);
2792
2793
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2794
0
}
2795
2796
/**
2797
 * @deprecated Internal function, don't use.
2798
 *
2799
 * @param ctxt  the parser context
2800
 * @param str  the input string
2801
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2802
 * @param end  an end marker xmlChar, 0 if none
2803
 * @param end2  an end marker xmlChar, 0 if none
2804
 * @param end3  an end marker xmlChar, 0 if none
2805
 * @returns A newly allocated string with the substitution done. The caller
2806
 *      must deallocate it !
2807
 */
2808
xmlChar *
2809
xmlStringDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str,
2810
                        int what ATTRIBUTE_UNUSED,
2811
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2812
0
    if ((ctxt == NULL) || (str == NULL))
2813
0
        return(NULL);
2814
2815
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2816
0
        return(NULL);
2817
2818
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2819
0
}
2820
2821
/************************************************************************
2822
 *                  *
2823
 *    Commodity functions, cleanup needed ?     *
2824
 *                  *
2825
 ************************************************************************/
2826
2827
/**
2828
 * Is this a sequence of blank chars that one can ignore ?
2829
 *
2830
 * @param ctxt  an XML parser context
2831
 * @param str  a xmlChar *
2832
 * @param len  the size of `str`
2833
 * @param blank_chars  we know the chars are blanks
2834
 * @returns 1 if ignorable 0 otherwise.
2835
 */
2836
2837
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2838
0
                     int blank_chars) {
2839
0
    int i;
2840
0
    xmlNodePtr lastChild;
2841
2842
    /*
2843
     * Check for xml:space value.
2844
     */
2845
0
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2846
0
        (*(ctxt->space) == -2))
2847
0
  return(0);
2848
2849
    /*
2850
     * Check that the string is made of blanks
2851
     */
2852
0
    if (blank_chars == 0) {
2853
0
  for (i = 0;i < len;i++)
2854
0
      if (!(IS_BLANK_CH(str[i]))) return(0);
2855
0
    }
2856
2857
    /*
2858
     * Look if the element is mixed content in the DTD if available
2859
     */
2860
0
    if (ctxt->node == NULL) return(0);
2861
0
    if (ctxt->myDoc != NULL) {
2862
0
        xmlElementPtr elemDecl = NULL;
2863
0
        xmlDocPtr doc = ctxt->myDoc;
2864
0
        const xmlChar *prefix = NULL;
2865
2866
0
        if (ctxt->node->ns)
2867
0
            prefix = ctxt->node->ns->prefix;
2868
0
        if (doc->intSubset != NULL)
2869
0
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2870
0
                                      prefix);
2871
0
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2872
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2873
0
                                      prefix);
2874
0
        if (elemDecl != NULL) {
2875
0
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2876
0
                return(1);
2877
0
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2878
0
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2879
0
                return(0);
2880
0
        }
2881
0
    }
2882
2883
    /*
2884
     * Otherwise, heuristic :-\
2885
     *
2886
     * When push parsing, we could be at the end of a chunk.
2887
     * This makes the look-ahead and consequently the NOBLANKS
2888
     * option unreliable.
2889
     */
2890
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2891
0
    if ((ctxt->node->children == NULL) &&
2892
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2893
2894
0
    lastChild = xmlGetLastChild(ctxt->node);
2895
0
    if (lastChild == NULL) {
2896
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2897
0
            (ctxt->node->content != NULL)) return(0);
2898
0
    } else if (xmlNodeIsText(lastChild))
2899
0
        return(0);
2900
0
    else if ((ctxt->node->children != NULL) &&
2901
0
             (xmlNodeIsText(ctxt->node->children)))
2902
0
        return(0);
2903
0
    return(1);
2904
0
}
2905
2906
/************************************************************************
2907
 *                  *
2908
 *    Extra stuff for namespace support     *
2909
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2910
 *                  *
2911
 ************************************************************************/
2912
2913
/**
2914
 * Parse an UTF8 encoded XML qualified name string
2915
 *
2916
 * @deprecated Don't use.
2917
 *
2918
 * @param ctxt  an XML parser context
2919
 * @param name  an XML parser context
2920
 * @param prefixOut  a xmlChar **
2921
 * @returns the local part, and prefix is updated
2922
 *   to get the Prefix if any.
2923
 */
2924
2925
xmlChar *
2926
0
xmlSplitQName(xmlParserCtxt *ctxt, const xmlChar *name, xmlChar **prefixOut) {
2927
0
    xmlChar *ret;
2928
0
    const xmlChar *localname;
2929
2930
0
    localname = xmlSplitQName4(name, prefixOut);
2931
0
    if (localname == NULL) {
2932
0
        xmlCtxtErrMemory(ctxt);
2933
0
        return(NULL);
2934
0
    }
2935
2936
0
    ret = xmlStrdup(localname);
2937
0
    if (ret == NULL) {
2938
0
        xmlCtxtErrMemory(ctxt);
2939
0
        xmlFree(*prefixOut);
2940
0
    }
2941
2942
0
    return(ret);
2943
0
}
2944
2945
/************************************************************************
2946
 *                  *
2947
 *      The parser itself       *
2948
 *  Relates to http://www.w3.org/TR/REC-xml       *
2949
 *                  *
2950
 ************************************************************************/
2951
2952
/************************************************************************
2953
 *                  *
2954
 *  Routines to parse Name, NCName and NmToken      *
2955
 *                  *
2956
 ************************************************************************/
2957
2958
/*
2959
 * The two following functions are related to the change of accepted
2960
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2961
 * They correspond to the modified production [4] and the new production [4a]
2962
 * changes in that revision. Also note that the macros used for the
2963
 * productions Letter, Digit, CombiningChar and Extender are not needed
2964
 * anymore.
2965
 * We still keep compatibility to pre-revision5 parsing semantic if the
2966
 * new XML_PARSE_OLD10 option is given to the parser.
2967
 */
2968
2969
static int
2970
9.65M
xmlIsNameStartCharNew(int c) {
2971
    /*
2972
     * Use the new checks of production [4] [4a] amd [5] of the
2973
     * Update 5 of XML-1.0
2974
     */
2975
9.65M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2976
9.65M
        (((c >= 'a') && (c <= 'z')) ||
2977
9.54M
         ((c >= 'A') && (c <= 'Z')) ||
2978
9.54M
         (c == '_') || (c == ':') ||
2979
9.54M
         ((c >= 0xC0) && (c <= 0xD6)) ||
2980
9.54M
         ((c >= 0xD8) && (c <= 0xF6)) ||
2981
9.54M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2982
9.54M
         ((c >= 0x370) && (c <= 0x37D)) ||
2983
9.54M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2984
9.54M
         ((c >= 0x200C) && (c <= 0x200D)) ||
2985
9.54M
         ((c >= 0x2070) && (c <= 0x218F)) ||
2986
9.54M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2987
9.54M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2988
9.54M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2989
9.54M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2990
9.54M
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2991
5.64M
        return(1);
2992
4.01M
    return(0);
2993
9.65M
}
2994
2995
static int
2996
108M
xmlIsNameCharNew(int c) {
2997
    /*
2998
     * Use the new checks of production [4] [4a] amd [5] of the
2999
     * Update 5 of XML-1.0
3000
     */
3001
108M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3002
108M
        (((c >= 'a') && (c <= 'z')) ||
3003
108M
         ((c >= 'A') && (c <= 'Z')) ||
3004
108M
         ((c >= '0') && (c <= '9')) || /* !start */
3005
108M
         (c == '_') || (c == ':') ||
3006
108M
         (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3007
108M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3008
108M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3009
108M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3010
108M
         ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3011
108M
         ((c >= 0x370) && (c <= 0x37D)) ||
3012
108M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3013
108M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3014
108M
         ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3015
108M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3016
108M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3017
108M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3018
108M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3019
108M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3020
108M
         ((c >= 0x10000) && (c <= 0xEFFFF))))
3021
103M
         return(1);
3022
5.09M
    return(0);
3023
108M
}
3024
3025
static int
3026
0
xmlIsNameStartCharOld(int c) {
3027
0
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3028
0
        ((IS_LETTER(c) || (c == '_') || (c == ':'))))
3029
0
        return(1);
3030
0
    return(0);
3031
0
}
3032
3033
static int
3034
0
xmlIsNameCharOld(int c) {
3035
0
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3036
0
        ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3037
0
         (c == '.') || (c == '-') ||
3038
0
         (c == '_') || (c == ':') ||
3039
0
         (IS_COMBINING(c)) ||
3040
0
         (IS_EXTENDER(c))))
3041
0
        return(1);
3042
0
    return(0);
3043
0
}
3044
3045
static int
3046
9.65M
xmlIsNameStartChar(int c, int old10) {
3047
9.65M
    if (!old10)
3048
9.65M
        return(xmlIsNameStartCharNew(c));
3049
0
    else
3050
0
        return(xmlIsNameStartCharOld(c));
3051
9.65M
}
3052
3053
static int
3054
108M
xmlIsNameChar(int c, int old10) {
3055
108M
    if (!old10)
3056
108M
        return(xmlIsNameCharNew(c));
3057
0
    else
3058
0
        return(xmlIsNameCharOld(c));
3059
108M
}
3060
3061
/*
3062
 * Scan an XML Name, NCName or Nmtoken.
3063
 *
3064
 * Returns a pointer to the end of the name on success. If the
3065
 * name is invalid, returns `ptr`. If the name is longer than
3066
 * `maxSize` bytes, returns NULL.
3067
 *
3068
 * @param ptr  pointer to the start of the name
3069
 * @param maxSize  maximum size in bytes
3070
 * @param flags  XML_SCAN_* flags
3071
 * @returns a pointer to the end of the name or NULL
3072
 */
3073
const xmlChar *
3074
5.72M
xmlScanName(const xmlChar *ptr, size_t maxSize, int flags) {
3075
5.72M
    int stop = flags & XML_SCAN_NC ? ':' : 0;
3076
5.72M
    int old10 = flags & XML_SCAN_OLD10 ? 1 : 0;
3077
3078
63.4M
    while (1) {
3079
63.4M
        int c, len;
3080
3081
63.4M
        c = *ptr;
3082
63.4M
        if (c < 0x80) {
3083
59.0M
            if (c == stop)
3084
690k
                break;
3085
58.3M
            len = 1;
3086
58.3M
        } else {
3087
4.39M
            len = 4;
3088
4.39M
            c = xmlGetUTF8Char(ptr, &len);
3089
4.39M
            if (c < 0)
3090
2.39k
                break;
3091
4.39M
        }
3092
3093
62.7M
        if (flags & XML_SCAN_NMTOKEN ?
3094
57.1M
                !xmlIsNameChar(c, old10) :
3095
62.7M
                !xmlIsNameStartChar(c, old10))
3096
5.02M
            break;
3097
3098
57.7M
        if ((size_t) len > maxSize)
3099
613
            return(NULL);
3100
57.7M
        ptr += len;
3101
57.7M
        maxSize -= len;
3102
57.7M
        flags |= XML_SCAN_NMTOKEN;
3103
57.7M
    }
3104
3105
5.72M
    return(ptr);
3106
5.72M
}
3107
3108
static const xmlChar *
3109
579k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3110
579k
    const xmlChar *ret;
3111
579k
    int len = 0, l;
3112
579k
    int c;
3113
579k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3114
0
                    XML_MAX_TEXT_LENGTH :
3115
579k
                    XML_MAX_NAME_LENGTH;
3116
579k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3117
3118
    /*
3119
     * Handler for more complex cases
3120
     */
3121
579k
    c = xmlCurrentChar(ctxt, &l);
3122
579k
    if (!xmlIsNameStartChar(c, old10))
3123
500k
        return(NULL);
3124
78.4k
    len += l;
3125
78.4k
    NEXTL(l);
3126
78.4k
    c = xmlCurrentChar(ctxt, &l);
3127
21.0M
    while (xmlIsNameChar(c, old10)) {
3128
20.9M
        if (len <= INT_MAX - l)
3129
20.9M
            len += l;
3130
20.9M
        NEXTL(l);
3131
20.9M
        c = xmlCurrentChar(ctxt, &l);
3132
20.9M
    }
3133
78.4k
    if (len > maxLength) {
3134
252
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3135
252
        return(NULL);
3136
252
    }
3137
78.2k
    if (ctxt->input->cur - ctxt->input->base < len) {
3138
        /*
3139
         * There were a couple of bugs where PERefs lead to to a change
3140
         * of the buffer. Check the buffer size to avoid passing an invalid
3141
         * pointer to xmlDictLookup.
3142
         */
3143
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3144
0
                    "unexpected change of input buffer");
3145
0
        return (NULL);
3146
0
    }
3147
78.2k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3148
838
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3149
77.4k
    else
3150
77.4k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3151
78.2k
    if (ret == NULL)
3152
1
        xmlErrMemory(ctxt);
3153
78.2k
    return(ret);
3154
78.2k
}
3155
3156
/**
3157
 * Parse an XML name.
3158
 *
3159
 * @deprecated Internal function, don't use.
3160
 *
3161
 *     [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3162
 *                      CombiningChar | Extender
3163
 *
3164
 *     [5] Name ::= (Letter | '_' | ':') (NameChar)*
3165
 *
3166
 *     [6] Names ::= Name (#x20 Name)*
3167
 *
3168
 * @param ctxt  an XML parser context
3169
 * @returns the Name parsed or NULL
3170
 */
3171
3172
const xmlChar *
3173
2.06M
xmlParseName(xmlParserCtxt *ctxt) {
3174
2.06M
    const xmlChar *in;
3175
2.06M
    const xmlChar *ret;
3176
2.06M
    size_t count = 0;
3177
2.06M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3178
0
                       XML_MAX_TEXT_LENGTH :
3179
2.06M
                       XML_MAX_NAME_LENGTH;
3180
3181
2.06M
    GROW;
3182
3183
    /*
3184
     * Accelerator for simple ASCII names
3185
     */
3186
2.06M
    in = ctxt->input->cur;
3187
2.06M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3188
2.06M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3189
2.06M
  (*in == '_') || (*in == ':')) {
3190
1.55M
  in++;
3191
233M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3192
233M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3193
233M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3194
233M
         (*in == '_') || (*in == '-') ||
3195
233M
         (*in == ':') || (*in == '.'))
3196
231M
      in++;
3197
1.55M
  if ((*in > 0) && (*in < 0x80)) {
3198
1.48M
      count = in - ctxt->input->cur;
3199
1.48M
            if (count > maxLength) {
3200
2.02k
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3201
2.02k
                return(NULL);
3202
2.02k
            }
3203
1.48M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3204
1.48M
      ctxt->input->cur = in;
3205
1.48M
      ctxt->input->col += count;
3206
1.48M
      if (ret == NULL)
3207
3
          xmlErrMemory(ctxt);
3208
1.48M
      return(ret);
3209
1.48M
  }
3210
1.55M
    }
3211
    /* accelerator for special cases */
3212
579k
    return(xmlParseNameComplex(ctxt));
3213
2.06M
}
3214
3215
static xmlHashedString
3216
3.47M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3217
3.47M
    xmlHashedString ret;
3218
3.47M
    int len = 0, l;
3219
3.47M
    int c;
3220
3.47M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3221
0
                    XML_MAX_TEXT_LENGTH :
3222
3.47M
                    XML_MAX_NAME_LENGTH;
3223
3.47M
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3224
3.47M
    size_t startPosition = 0;
3225
3226
3.47M
    ret.name = NULL;
3227
3.47M
    ret.hashValue = 0;
3228
3229
    /*
3230
     * Handler for more complex cases
3231
     */
3232
3.47M
    startPosition = CUR_PTR - BASE_PTR;
3233
3.47M
    c = xmlCurrentChar(ctxt, &l);
3234
3.47M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3235
3.47M
  (!xmlIsNameStartChar(c, old10) || (c == ':'))) {
3236
3.29M
  return(ret);
3237
3.29M
    }
3238
3239
14.9M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3240
14.9M
     (xmlIsNameChar(c, old10) && (c != ':'))) {
3241
14.8M
        if (len <= INT_MAX - l)
3242
14.8M
      len += l;
3243
14.8M
  NEXTL(l);
3244
14.8M
  c = xmlCurrentChar(ctxt, &l);
3245
14.8M
    }
3246
175k
    if (len > maxLength) {
3247
424
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3248
424
        return(ret);
3249
424
    }
3250
175k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3251
175k
    if (ret.name == NULL)
3252
1
        xmlErrMemory(ctxt);
3253
175k
    return(ret);
3254
175k
}
3255
3256
/**
3257
 * Parse an XML name.
3258
 *
3259
 *     [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3260
 *                          CombiningChar | Extender
3261
 *
3262
 *     [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3263
 *
3264
 * @param ctxt  an XML parser context
3265
 * @returns the Name parsed or NULL
3266
 */
3267
3268
static xmlHashedString
3269
21.2M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3270
21.2M
    const xmlChar *in, *e;
3271
21.2M
    xmlHashedString ret;
3272
21.2M
    size_t count = 0;
3273
21.2M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3274
0
                       XML_MAX_TEXT_LENGTH :
3275
21.2M
                       XML_MAX_NAME_LENGTH;
3276
3277
21.2M
    ret.name = NULL;
3278
3279
    /*
3280
     * Accelerator for simple ASCII names
3281
     */
3282
21.2M
    in = ctxt->input->cur;
3283
21.2M
    e = ctxt->input->end;
3284
21.2M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3285
21.2M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3286
21.2M
   (*in == '_')) && (in < e)) {
3287
17.9M
  in++;
3288
95.2M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3289
95.2M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3290
95.2M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3291
95.2M
          (*in == '_') || (*in == '-') ||
3292
95.2M
          (*in == '.')) && (in < e))
3293
77.2M
      in++;
3294
17.9M
  if (in >= e)
3295
3.68k
      goto complex;
3296
17.9M
  if ((*in > 0) && (*in < 0x80)) {
3297
17.8M
      count = in - ctxt->input->cur;
3298
17.8M
            if (count > maxLength) {
3299
276
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3300
276
                return(ret);
3301
276
            }
3302
17.8M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3303
17.8M
      ctxt->input->cur = in;
3304
17.8M
      ctxt->input->col += count;
3305
17.8M
      if (ret.name == NULL) {
3306
4
          xmlErrMemory(ctxt);
3307
4
      }
3308
17.8M
      return(ret);
3309
17.8M
  }
3310
17.9M
    }
3311
3.47M
complex:
3312
3.47M
    return(xmlParseNCNameComplex(ctxt));
3313
21.2M
}
3314
3315
/**
3316
 * Parse an XML name and compares for match
3317
 * (specialized for endtag parsing)
3318
 *
3319
 * @param ctxt  an XML parser context
3320
 * @param other  the name to compare with
3321
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
3322
 * and the name for mismatch
3323
 */
3324
3325
static const xmlChar *
3326
510k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3327
510k
    register const xmlChar *cmp = other;
3328
510k
    register const xmlChar *in;
3329
510k
    const xmlChar *ret;
3330
3331
510k
    GROW;
3332
3333
510k
    in = ctxt->input->cur;
3334
1.73M
    while (*in != 0 && *in == *cmp) {
3335
1.22M
  ++in;
3336
1.22M
  ++cmp;
3337
1.22M
    }
3338
510k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3339
  /* success */
3340
368k
  ctxt->input->col += in - ctxt->input->cur;
3341
368k
  ctxt->input->cur = in;
3342
368k
  return (const xmlChar*) 1;
3343
368k
    }
3344
    /* failure (or end of input buffer), check with full function */
3345
142k
    ret = xmlParseName (ctxt);
3346
    /* strings coming from the dictionary direct compare possible */
3347
142k
    if (ret == other) {
3348
978
  return (const xmlChar*) 1;
3349
978
    }
3350
141k
    return ret;
3351
142k
}
3352
3353
/**
3354
 * Parse an XML name.
3355
 *
3356
 * @param ctxt  an XML parser context
3357
 * @param str  a pointer to the string pointer (IN/OUT)
3358
 * @returns the Name parsed or NULL. The `str` pointer
3359
 * is updated to the current location in the string.
3360
 */
3361
3362
static xmlChar *
3363
503k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3364
503k
    xmlChar *ret;
3365
503k
    const xmlChar *cur = *str;
3366
503k
    int flags = 0;
3367
503k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3368
0
                    XML_MAX_TEXT_LENGTH :
3369
503k
                    XML_MAX_NAME_LENGTH;
3370
3371
503k
    if (ctxt->options & XML_PARSE_OLD10)
3372
0
        flags |= XML_SCAN_OLD10;
3373
3374
503k
    cur = xmlScanName(*str, maxLength, flags);
3375
503k
    if (cur == NULL) {
3376
101
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3377
101
        return(NULL);
3378
101
    }
3379
503k
    if (cur == *str)
3380
7.63k
        return(NULL);
3381
3382
495k
    ret = xmlStrndup(*str, cur - *str);
3383
495k
    if (ret == NULL)
3384
13
        xmlErrMemory(ctxt);
3385
495k
    *str = cur;
3386
495k
    return(ret);
3387
503k
}
3388
3389
/**
3390
 * Parse an XML Nmtoken.
3391
 *
3392
 * @deprecated Internal function, don't use.
3393
 *
3394
 *     [7] Nmtoken ::= (NameChar)+
3395
 *
3396
 *     [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3397
 *
3398
 * @param ctxt  an XML parser context
3399
 * @returns the Nmtoken parsed or NULL
3400
 */
3401
3402
xmlChar *
3403
148k
xmlParseNmtoken(xmlParserCtxt *ctxt) {
3404
148k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3405
148k
    xmlChar *ret;
3406
148k
    int len = 0, l;
3407
148k
    int c;
3408
148k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3409
0
                    XML_MAX_TEXT_LENGTH :
3410
148k
                    XML_MAX_NAME_LENGTH;
3411
148k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3412
3413
148k
    c = xmlCurrentChar(ctxt, &l);
3414
3415
690k
    while (xmlIsNameChar(c, old10)) {
3416
543k
  COPY_BUF(buf, len, c);
3417
543k
  NEXTL(l);
3418
543k
  c = xmlCurrentChar(ctxt, &l);
3419
543k
  if (len >= XML_MAX_NAMELEN) {
3420
      /*
3421
       * Okay someone managed to make a huge token, so he's ready to pay
3422
       * for the processing speed.
3423
       */
3424
1.92k
      xmlChar *buffer;
3425
1.92k
      int max = len * 2;
3426
3427
1.92k
      buffer = xmlMalloc(max);
3428
1.92k
      if (buffer == NULL) {
3429
1
          xmlErrMemory(ctxt);
3430
1
    return(NULL);
3431
1
      }
3432
1.92k
      memcpy(buffer, buf, len);
3433
15.0M
      while (xmlIsNameChar(c, old10)) {
3434
15.0M
    if (len + 10 > max) {
3435
6.61k
        xmlChar *tmp;
3436
6.61k
                    int newSize;
3437
3438
6.61k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3439
6.61k
                    if (newSize < 0) {
3440
312
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3441
312
                        xmlFree(buffer);
3442
312
                        return(NULL);
3443
312
                    }
3444
6.30k
        tmp = xmlRealloc(buffer, newSize);
3445
6.30k
        if (tmp == NULL) {
3446
2
      xmlErrMemory(ctxt);
3447
2
      xmlFree(buffer);
3448
2
      return(NULL);
3449
2
        }
3450
6.29k
        buffer = tmp;
3451
6.29k
                    max = newSize;
3452
6.29k
    }
3453
15.0M
    COPY_BUF(buffer, len, c);
3454
15.0M
    NEXTL(l);
3455
15.0M
    c = xmlCurrentChar(ctxt, &l);
3456
15.0M
      }
3457
1.60k
      buffer[len] = 0;
3458
1.60k
      return(buffer);
3459
1.92k
  }
3460
543k
    }
3461
146k
    if (len == 0)
3462
60.4k
        return(NULL);
3463
86.5k
    if (len > maxLength) {
3464
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3465
0
        return(NULL);
3466
0
    }
3467
86.5k
    ret = xmlStrndup(buf, len);
3468
86.5k
    if (ret == NULL)
3469
6
        xmlErrMemory(ctxt);
3470
86.5k
    return(ret);
3471
86.5k
}
3472
3473
/**
3474
 * Validate an entity value and expand parameter entities.
3475
 *
3476
 * @param ctxt  parser context
3477
 * @param buf  string buffer
3478
 * @param str  entity value
3479
 * @param length  size of entity value
3480
 * @param depth  nesting depth
3481
 */
3482
static void
3483
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3484
129k
                          const xmlChar *str, int length, int depth) {
3485
129k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3486
129k
    const xmlChar *end, *chunk;
3487
129k
    int c, l;
3488
3489
129k
    if (str == NULL)
3490
5.09k
        return;
3491
3492
124k
    depth += 1;
3493
124k
    if (depth > maxDepth) {
3494
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3495
0
                       "Maximum entity nesting depth exceeded");
3496
0
  return;
3497
0
    }
3498
3499
124k
    end = str + length;
3500
124k
    chunk = str;
3501
3502
94.6M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3503
94.5M
        c = *str;
3504
3505
94.5M
        if (c >= 0x80) {
3506
80.9M
            l = xmlUTF8MultibyteLen(ctxt, str,
3507
80.9M
                    "invalid character in entity value\n");
3508
80.9M
            if (l == 0) {
3509
17.5M
                if (chunk < str)
3510
223k
                    xmlSBufAddString(buf, chunk, str - chunk);
3511
17.5M
                xmlSBufAddReplChar(buf);
3512
17.5M
                str += 1;
3513
17.5M
                chunk = str;
3514
63.4M
            } else {
3515
63.4M
                str += l;
3516
63.4M
            }
3517
80.9M
        } else if (c == '&') {
3518
103k
            if (str[1] == '#') {
3519
46.3k
                if (chunk < str)
3520
33.6k
                    xmlSBufAddString(buf, chunk, str - chunk);
3521
3522
46.3k
                c = xmlParseStringCharRef(ctxt, &str);
3523
46.3k
                if (c == 0)
3524
4.49k
                    return;
3525
3526
41.8k
                xmlSBufAddChar(buf, c);
3527
3528
41.8k
                chunk = str;
3529
57.6k
            } else {
3530
57.6k
                xmlChar *name;
3531
3532
                /*
3533
                 * General entity references are checked for
3534
                 * syntactic validity.
3535
                 */
3536
57.6k
                str++;
3537
57.6k
                name = xmlParseStringName(ctxt, &str);
3538
3539
57.6k
                if ((name == NULL) || (*str++ != ';')) {
3540
3.20k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3541
3.20k
                            "EntityValue: '&' forbidden except for entities "
3542
3.20k
                            "references\n");
3543
3.20k
                    xmlFree(name);
3544
3.20k
                    return;
3545
3.20k
                }
3546
3547
54.4k
                xmlFree(name);
3548
54.4k
            }
3549
13.4M
        } else if (c == '%') {
3550
25.5k
            xmlEntityPtr ent;
3551
3552
25.5k
            if (chunk < str)
3553
14.4k
                xmlSBufAddString(buf, chunk, str - chunk);
3554
3555
25.5k
            ent = xmlParseStringPEReference(ctxt, &str);
3556
25.5k
            if (ent == NULL)
3557
8.35k
                return;
3558
3559
17.1k
            if (!PARSER_EXTERNAL(ctxt)) {
3560
309
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3561
309
                return;
3562
309
            }
3563
3564
16.8k
            if (ent->content == NULL) {
3565
                /*
3566
                 * Note: external parsed entities will not be loaded,
3567
                 * it is not required for a non-validating parser to
3568
                 * complete external PEReferences coming from the
3569
                 * internal subset
3570
                 */
3571
6.93k
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3572
6.93k
                    ((ctxt->replaceEntities) ||
3573
6.93k
                     (ctxt->validate))) {
3574
6.93k
                    xmlLoadEntityContent(ctxt, ent);
3575
6.93k
                } else {
3576
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3577
0
                                  "not validating will not read content for "
3578
0
                                  "PE entity %s\n", ent->name, NULL);
3579
0
                }
3580
6.93k
            }
3581
3582
            /*
3583
             * TODO: Skip if ent->content is still NULL.
3584
             */
3585
3586
16.8k
            if (xmlParserEntityCheck(ctxt, ent->length))
3587
9
                return;
3588
3589
16.8k
            if (ent->flags & XML_ENT_EXPANDING) {
3590
1.30k
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3591
1.30k
                xmlHaltParser(ctxt);
3592
1.30k
                return;
3593
1.30k
            }
3594
3595
15.5k
            ent->flags |= XML_ENT_EXPANDING;
3596
15.5k
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3597
15.5k
                                      depth);
3598
15.5k
            ent->flags &= ~XML_ENT_EXPANDING;
3599
3600
15.5k
            chunk = str;
3601
13.4M
        } else {
3602
            /* Normal ASCII char */
3603
13.4M
            if (!IS_BYTE_CHAR(c)) {
3604
3.68M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3605
3.68M
                        "invalid character in entity value\n");
3606
3.68M
                if (chunk < str)
3607
50.4k
                    xmlSBufAddString(buf, chunk, str - chunk);
3608
3.68M
                xmlSBufAddReplChar(buf);
3609
3.68M
                str += 1;
3610
3.68M
                chunk = str;
3611
9.78M
            } else {
3612
9.78M
                str += 1;
3613
9.78M
            }
3614
13.4M
        }
3615
94.5M
    }
3616
3617
106k
    if (chunk < str)
3618
99.3k
        xmlSBufAddString(buf, chunk, str - chunk);
3619
106k
}
3620
3621
/**
3622
 * Parse a value for ENTITY declarations
3623
 *
3624
 * @deprecated Internal function, don't use.
3625
 *
3626
 *     [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3627
 *                         "'" ([^%&'] | PEReference | Reference)* "'"
3628
 *
3629
 * @param ctxt  an XML parser context
3630
 * @param orig  if non-NULL store a copy of the original entity value
3631
 * @returns the EntityValue parsed with reference substituted or NULL
3632
 */
3633
xmlChar *
3634
116k
xmlParseEntityValue(xmlParserCtxt *ctxt, xmlChar **orig) {
3635
116k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3636
0
                         XML_MAX_HUGE_LENGTH :
3637
116k
                         XML_MAX_TEXT_LENGTH;
3638
116k
    xmlSBuf buf;
3639
116k
    const xmlChar *start;
3640
116k
    int quote, length;
3641
3642
116k
    xmlSBufInit(&buf, maxLength);
3643
3644
116k
    GROW;
3645
3646
116k
    quote = CUR;
3647
116k
    if ((quote != '"') && (quote != '\'')) {
3648
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3649
0
  return(NULL);
3650
0
    }
3651
116k
    CUR_PTR++;
3652
3653
116k
    length = 0;
3654
3655
    /*
3656
     * Copy raw content of the entity into a buffer
3657
     */
3658
218M
    while (1) {
3659
218M
        int c;
3660
3661
218M
        if (PARSER_STOPPED(ctxt))
3662
77
            goto error;
3663
3664
218M
        if (CUR_PTR >= ctxt->input->end) {
3665
1.05k
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3666
1.05k
            goto error;
3667
1.05k
        }
3668
3669
218M
        c = CUR;
3670
3671
218M
        if (c == 0) {
3672
1.00k
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3673
1.00k
                    "invalid character in entity value\n");
3674
1.00k
            goto error;
3675
1.00k
        }
3676
218M
        if (c == quote)
3677
114k
            break;
3678
218M
        NEXTL(1);
3679
218M
        length += 1;
3680
3681
        /*
3682
         * TODO: Check growth threshold
3683
         */
3684
218M
        if (ctxt->input->end - CUR_PTR < 10)
3685
38.0k
            GROW;
3686
218M
    }
3687
3688
114k
    start = CUR_PTR - length;
3689
3690
114k
    if (orig != NULL) {
3691
114k
        *orig = xmlStrndup(start, length);
3692
114k
        if (*orig == NULL)
3693
4
            xmlErrMemory(ctxt);
3694
114k
    }
3695
3696
114k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3697
3698
114k
    NEXTL(1);
3699
3700
114k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3701
3702
2.13k
error:
3703
2.13k
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3704
2.13k
    return(NULL);
3705
116k
}
3706
3707
/**
3708
 * Check an entity reference in an attribute value for validity
3709
 * without expanding it.
3710
 *
3711
 * @param ctxt  parser context
3712
 * @param pent  entity
3713
 * @param depth  nesting depth
3714
 */
3715
static void
3716
52
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3717
52
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3718
52
    const xmlChar *str;
3719
52
    unsigned long expandedSize = pent->length;
3720
52
    int c, flags;
3721
3722
52
    depth += 1;
3723
52
    if (depth > maxDepth) {
3724
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3725
0
                       "Maximum entity nesting depth exceeded");
3726
0
  return;
3727
0
    }
3728
3729
52
    if (pent->flags & XML_ENT_EXPANDING) {
3730
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3731
0
        xmlHaltParser(ctxt);
3732
0
        return;
3733
0
    }
3734
3735
    /*
3736
     * If we're parsing a default attribute value in DTD content,
3737
     * the entity might reference other entities which weren't
3738
     * defined yet, so the check isn't reliable.
3739
     */
3740
52
    if (ctxt->inSubset == 0)
3741
52
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3742
0
    else
3743
0
        flags = XML_ENT_VALIDATED;
3744
3745
52
    str = pent->content;
3746
52
    if (str == NULL)
3747
0
        goto done;
3748
3749
    /*
3750
     * Note that entity values are already validated. We only check
3751
     * for illegal less-than signs and compute the expanded size
3752
     * of the entity. No special handling for multi-byte characters
3753
     * is needed.
3754
     */
3755
3.28M
    while (!PARSER_STOPPED(ctxt)) {
3756
3.28M
        c = *str;
3757
3758
3.28M
  if (c != '&') {
3759
3.28M
            if (c == 0)
3760
51
                break;
3761
3762
3.28M
            if (c == '<')
3763
7.67k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3764
7.67k
                        "'<' in entity '%s' is not allowed in attributes "
3765
7.67k
                        "values\n", pent->name);
3766
3767
3.28M
            str += 1;
3768
3.28M
        } else if (str[1] == '#') {
3769
1
            int val;
3770
3771
1
      val = xmlParseStringCharRef(ctxt, &str);
3772
1
      if (val == 0) {
3773
1
                pent->content[0] = 0;
3774
1
                break;
3775
1
            }
3776
4.24k
  } else {
3777
4.24k
            xmlChar *name;
3778
4.24k
            xmlEntityPtr ent;
3779
3780
4.24k
      name = xmlParseStringEntityRef(ctxt, &str);
3781
4.24k
      if (name == NULL) {
3782
0
                pent->content[0] = 0;
3783
0
                break;
3784
0
            }
3785
3786
4.24k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3787
4.24k
            xmlFree(name);
3788
3789
4.24k
            if ((ent != NULL) &&
3790
4.24k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
3791
2.76k
                if ((ent->flags & flags) != flags) {
3792
34
                    pent->flags |= XML_ENT_EXPANDING;
3793
34
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
3794
34
                    pent->flags &= ~XML_ENT_EXPANDING;
3795
34
                }
3796
3797
2.76k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
3798
2.76k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
3799
2.76k
            }
3800
4.24k
        }
3801
3.28M
    }
3802
3803
52
done:
3804
52
    if (ctxt->inSubset == 0)
3805
52
        pent->expandedSize = expandedSize;
3806
3807
52
    pent->flags |= flags;
3808
52
}
3809
3810
/**
3811
 * Expand general entity references in an entity or attribute value.
3812
 * Perform attribute value normalization.
3813
 *
3814
 * @param ctxt  parser context
3815
 * @param buf  string buffer
3816
 * @param str  entity or attribute value
3817
 * @param pent  entity for entity value, NULL for attribute values
3818
 * @param normalize  whether to collapse whitespace
3819
 * @param inSpace  whitespace state
3820
 * @param depth  nesting depth
3821
 * @param check  whether to check for amplification
3822
 * @returns  whether there was a normalization change
3823
 */
3824
static int
3825
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3826
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
3827
491k
                          int *inSpace, int depth, int check) {
3828
491k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3829
491k
    int c, chunkSize;
3830
491k
    int normChange = 0;
3831
3832
491k
    if (str == NULL)
3833
0
        return(0);
3834
3835
491k
    depth += 1;
3836
491k
    if (depth > maxDepth) {
3837
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3838
0
                       "Maximum entity nesting depth exceeded");
3839
0
  return(0);
3840
0
    }
3841
3842
491k
    if (pent != NULL) {
3843
491k
        if (pent->flags & XML_ENT_EXPANDING) {
3844
264
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3845
264
            xmlHaltParser(ctxt);
3846
264
            return(0);
3847
264
        }
3848
3849
491k
        if (check) {
3850
491k
            if (xmlParserEntityCheck(ctxt, pent->length))
3851
453
                return(0);
3852
491k
        }
3853
491k
    }
3854
3855
490k
    chunkSize = 0;
3856
3857
    /*
3858
     * Note that entity values are already validated. No special
3859
     * handling for multi-byte characters is needed.
3860
     */
3861
1.20G
    while (!PARSER_STOPPED(ctxt)) {
3862
1.20G
        c = *str;
3863
3864
1.20G
  if (c != '&') {
3865
1.20G
            if (c == 0)
3866
449k
                break;
3867
3868
            /*
3869
             * If this function is called without an entity, it is used to
3870
             * expand entities in an attribute content where less-than was
3871
             * already unscaped and is allowed.
3872
             */
3873
1.20G
            if ((pent != NULL) && (c == '<')) {
3874
39.0k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3875
39.0k
                        "'<' in entity '%s' is not allowed in attributes "
3876
39.0k
                        "values\n", pent->name);
3877
39.0k
                break;
3878
39.0k
            }
3879
3880
1.20G
            if (c <= 0x20) {
3881
4.01M
                if ((normalize) && (*inSpace)) {
3882
                    /* Skip char */
3883
177k
                    if (chunkSize > 0) {
3884
37.0k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3885
37.0k
                        chunkSize = 0;
3886
37.0k
                    }
3887
177k
                    normChange = 1;
3888
3.83M
                } else if (c < 0x20) {
3889
3.24M
                    if (chunkSize > 0) {
3890
393k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3891
393k
                        chunkSize = 0;
3892
393k
                    }
3893
3894
3.24M
                    xmlSBufAddCString(buf, " ", 1);
3895
3.24M
                } else {
3896
590k
                    chunkSize += 1;
3897
590k
                }
3898
3899
4.01M
                *inSpace = 1;
3900
1.19G
            } else {
3901
1.19G
                chunkSize += 1;
3902
1.19G
                *inSpace = 0;
3903
1.19G
            }
3904
3905
1.20G
            str += 1;
3906
1.20G
        } else if (str[1] == '#') {
3907
142k
            int val;
3908
3909
142k
            if (chunkSize > 0) {
3910
132k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3911
132k
                chunkSize = 0;
3912
132k
            }
3913
3914
142k
      val = xmlParseStringCharRef(ctxt, &str);
3915
142k
      if (val == 0) {
3916
769
                if (pent != NULL)
3917
769
                    pent->content[0] = 0;
3918
769
                break;
3919
769
            }
3920
3921
142k
            if (val == ' ') {
3922
8.70k
                if ((normalize) && (*inSpace))
3923
0
                    normChange = 1;
3924
8.70k
                else
3925
8.70k
                    xmlSBufAddCString(buf, " ", 1);
3926
8.70k
                *inSpace = 1;
3927
133k
            } else {
3928
133k
                xmlSBufAddChar(buf, val);
3929
133k
                *inSpace = 0;
3930
133k
            }
3931
415k
  } else {
3932
415k
            xmlChar *name;
3933
415k
            xmlEntityPtr ent;
3934
3935
415k
            if (chunkSize > 0) {
3936
275k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3937
275k
                chunkSize = 0;
3938
275k
            }
3939
3940
415k
      name = xmlParseStringEntityRef(ctxt, &str);
3941
415k
            if (name == NULL) {
3942
1.13k
                if (pent != NULL)
3943
1.13k
                    pent->content[0] = 0;
3944
1.13k
                break;
3945
1.13k
            }
3946
3947
414k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3948
414k
            xmlFree(name);
3949
3950
414k
      if ((ent != NULL) &&
3951
414k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3952
36.2k
    if (ent->content == NULL) {
3953
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
3954
0
          "predefined entity has no content\n");
3955
0
                    break;
3956
0
                }
3957
3958
36.2k
                xmlSBufAddString(buf, ent->content, ent->length);
3959
3960
36.2k
                *inSpace = 0;
3961
378k
      } else if ((ent != NULL) && (ent->content != NULL)) {
3962
301k
                if (pent != NULL)
3963
301k
                    pent->flags |= XML_ENT_EXPANDING;
3964
301k
    normChange |= xmlExpandEntityInAttValue(ctxt, buf,
3965
301k
                        ent->content, ent, normalize, inSpace, depth, check);
3966
301k
                if (pent != NULL)
3967
301k
                    pent->flags &= ~XML_ENT_EXPANDING;
3968
301k
      }
3969
414k
        }
3970
1.20G
    }
3971
3972
490k
    if (chunkSize > 0)
3973
394k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3974
3975
490k
    return(normChange);
3976
491k
}
3977
3978
/**
3979
 * Expand general entity references in an entity or attribute value.
3980
 * Perform attribute value normalization.
3981
 *
3982
 * @param ctxt  parser context
3983
 * @param str  entity or attribute value
3984
 * @param normalize  whether to collapse whitespace
3985
 * @returns the expanded attribtue value.
3986
 */
3987
xmlChar *
3988
xmlExpandEntitiesInAttValue(xmlParserCtxt *ctxt, const xmlChar *str,
3989
0
                            int normalize) {
3990
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3991
0
                         XML_MAX_HUGE_LENGTH :
3992
0
                         XML_MAX_TEXT_LENGTH;
3993
0
    xmlSBuf buf;
3994
0
    int inSpace = 1;
3995
3996
0
    xmlSBufInit(&buf, maxLength);
3997
3998
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
3999
0
                              ctxt->inputNr, /* check */ 0);
4000
4001
0
    if ((normalize) && (inSpace) && (buf.size > 0))
4002
0
        buf.size--;
4003
4004
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4005
0
}
4006
4007
/**
4008
 * Parse a value for an attribute.
4009
 *
4010
 * NOTE: if no normalization is needed, the routine will return pointers
4011
 * directly from the data buffer.
4012
 *
4013
 * 3.3.3 Attribute-Value Normalization:
4014
 *
4015
 * Before the value of an attribute is passed to the application or
4016
 * checked for validity, the XML processor must normalize it as follows:
4017
 *
4018
 * - a character reference is processed by appending the referenced
4019
 *   character to the attribute value
4020
 * - an entity reference is processed by recursively processing the
4021
 *   replacement text of the entity
4022
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4023
 *   appending \#x20 to the normalized value, except that only a single
4024
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4025
 *   parsed entity or the literal entity value of an internal parsed entity
4026
 * - other characters are processed by appending them to the normalized value
4027
 *
4028
 * If the declared value is not CDATA, then the XML processor must further
4029
 * process the normalized attribute value by discarding any leading and
4030
 * trailing space (\#x20) characters, and by replacing sequences of space
4031
 * (\#x20) characters by a single space (\#x20) character.
4032
 * All attributes for which no declaration has been read should be treated
4033
 * by a non-validating parser as if declared CDATA.
4034
 *
4035
 * @param ctxt  an XML parser context
4036
 * @param attlen  attribute len result
4037
 * @param outFlags  resulting XML_ATTVAL_* flags
4038
 * @param special  value from attsSpecial
4039
 * @param isNamespace  whether this is a namespace declaration
4040
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4041
 *     caller if it was copied, this can be detected by val[*len] == 0.
4042
 */
4043
static xmlChar *
4044
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *outFlags,
4045
5.30M
                         int special, int isNamespace) {
4046
5.30M
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4047
0
                         XML_MAX_HUGE_LENGTH :
4048
5.30M
                         XML_MAX_TEXT_LENGTH;
4049
5.30M
    xmlSBuf buf;
4050
5.30M
    xmlChar *ret;
4051
5.30M
    int c, l, quote, entFlags, chunkSize;
4052
5.30M
    int inSpace = 1;
4053
5.30M
    int replaceEntities;
4054
5.30M
    int normalize = (special & XML_SPECIAL_TYPE_MASK) != 0;
4055
5.30M
    int attvalFlags = 0;
4056
4057
    /* Always expand namespace URIs */
4058
5.30M
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4059
4060
5.30M
    xmlSBufInit(&buf, maxLength);
4061
4062
5.30M
    GROW;
4063
4064
5.30M
    quote = CUR;
4065
5.30M
    if ((quote != '"') && (quote != '\'')) {
4066
22.2k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4067
22.2k
  return(NULL);
4068
22.2k
    }
4069
5.28M
    NEXTL(1);
4070
4071
5.28M
    if (ctxt->inSubset == 0)
4072
5.22M
        entFlags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4073
53.3k
    else
4074
53.3k
        entFlags = XML_ENT_VALIDATED;
4075
4076
5.28M
    inSpace = 1;
4077
5.28M
    chunkSize = 0;
4078
4079
530M
    while (1) {
4080
530M
        if (PARSER_STOPPED(ctxt))
4081
794
            goto error;
4082
4083
530M
        if (CUR_PTR >= ctxt->input->end) {
4084
12.0k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4085
12.0k
                           "AttValue: ' expected\n");
4086
12.0k
            goto error;
4087
12.0k
        }
4088
4089
        /*
4090
         * TODO: Check growth threshold
4091
         */
4092
530M
        if (ctxt->input->end - CUR_PTR < 10)
4093
260k
            GROW;
4094
4095
530M
        c = CUR;
4096
4097
530M
        if (c >= 0x80) {
4098
256M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4099
256M
                    "invalid character in attribute value\n");
4100
256M
            if (l == 0) {
4101
45.5M
                if (chunkSize > 0) {
4102
423k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4103
423k
                    chunkSize = 0;
4104
423k
                }
4105
45.5M
                xmlSBufAddReplChar(&buf);
4106
45.5M
                NEXTL(1);
4107
211M
            } else {
4108
211M
                chunkSize += l;
4109
211M
                NEXTL(l);
4110
211M
            }
4111
4112
256M
            inSpace = 0;
4113
273M
        } else if (c != '&') {
4114
272M
            if (c > 0x20) {
4115
198M
                if (c == quote)
4116
5.26M
                    break;
4117
4118
193M
                if (c == '<')
4119
2.05M
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4120
4121
193M
                chunkSize += 1;
4122
193M
                inSpace = 0;
4123
193M
            } else if (!IS_BYTE_CHAR(c)) {
4124
64.3M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4125
64.3M
                        "invalid character in attribute value\n");
4126
64.3M
                if (chunkSize > 0) {
4127
199k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4128
199k
                    chunkSize = 0;
4129
199k
                }
4130
64.3M
                xmlSBufAddReplChar(&buf);
4131
64.3M
                inSpace = 0;
4132
64.3M
            } else {
4133
                /* Whitespace */
4134
9.91M
                if ((normalize) && (inSpace)) {
4135
                    /* Skip char */
4136
101k
                    if (chunkSize > 0) {
4137
19.1k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4138
19.1k
                        chunkSize = 0;
4139
19.1k
                    }
4140
101k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4141
9.81M
                } else if (c < 0x20) {
4142
                    /* Convert to space */
4143
7.36M
                    if (chunkSize > 0) {
4144
582k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4145
582k
                        chunkSize = 0;
4146
582k
                    }
4147
4148
7.36M
                    xmlSBufAddCString(&buf, " ", 1);
4149
7.36M
                } else {
4150
2.45M
                    chunkSize += 1;
4151
2.45M
                }
4152
4153
9.91M
                inSpace = 1;
4154
4155
9.91M
                if ((c == 0xD) && (NXT(1) == 0xA))
4156
29.8k
                    CUR_PTR++;
4157
9.91M
            }
4158
4159
267M
            NEXTL(1);
4160
267M
        } else if (NXT(1) == '#') {
4161
134k
            int val;
4162
4163
134k
            if (chunkSize > 0) {
4164
75.3k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4165
75.3k
                chunkSize = 0;
4166
75.3k
            }
4167
4168
134k
            val = xmlParseCharRef(ctxt);
4169
134k
            if (val == 0)
4170
2.81k
                goto error;
4171
4172
131k
            if ((val == '&') && (!replaceEntities)) {
4173
                /*
4174
                 * The reparsing will be done in xmlNodeParseContent()
4175
                 * called from SAX2.c
4176
                 */
4177
119
                xmlSBufAddCString(&buf, "&#38;", 5);
4178
119
                inSpace = 0;
4179
131k
            } else if (val == ' ') {
4180
6.13k
                if ((normalize) && (inSpace))
4181
783
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4182
5.35k
                else
4183
5.35k
                    xmlSBufAddCString(&buf, " ", 1);
4184
6.13k
                inSpace = 1;
4185
125k
            } else {
4186
125k
                xmlSBufAddChar(&buf, val);
4187
125k
                inSpace = 0;
4188
125k
            }
4189
586k
        } else {
4190
586k
            const xmlChar *name;
4191
586k
            xmlEntityPtr ent;
4192
4193
586k
            if (chunkSize > 0) {
4194
251k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4195
251k
                chunkSize = 0;
4196
251k
            }
4197
4198
586k
            name = xmlParseEntityRefInternal(ctxt);
4199
586k
            if (name == NULL) {
4200
                /*
4201
                 * Probably a literal '&' which wasn't escaped.
4202
                 * TODO: Handle gracefully in recovery mode.
4203
                 */
4204
247k
                continue;
4205
247k
            }
4206
4207
339k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4208
339k
            if (ent == NULL)
4209
92.5k
                continue;
4210
4211
246k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4212
35.4k
                if ((ent->content[0] == '&') && (!replaceEntities))
4213
433
                    xmlSBufAddCString(&buf, "&#38;", 5);
4214
34.9k
                else
4215
34.9k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4216
35.4k
                inSpace = 0;
4217
211k
            } else if (replaceEntities) {
4218
189k
                if (xmlExpandEntityInAttValue(ctxt, &buf,
4219
189k
                        ent->content, ent, normalize, &inSpace, ctxt->inputNr,
4220
189k
                        /* check */ 1) > 0)
4221
9.17k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4222
189k
            } else {
4223
21.2k
                if ((ent->flags & entFlags) != entFlags)
4224
18
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4225
4226
21.2k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4227
12
                    ent->content[0] = 0;
4228
12
                    goto error;
4229
12
                }
4230
4231
                /*
4232
                 * Just output the reference
4233
                 */
4234
21.2k
                xmlSBufAddCString(&buf, "&", 1);
4235
21.2k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4236
21.2k
                xmlSBufAddCString(&buf, ";", 1);
4237
4238
21.2k
                inSpace = 0;
4239
21.2k
            }
4240
246k
  }
4241
530M
    }
4242
4243
5.26M
    if ((buf.mem == NULL) && (outFlags != NULL)) {
4244
4.91M
        ret = (xmlChar *) CUR_PTR - chunkSize;
4245
4246
4.91M
        if (attlen != NULL)
4247
4.91M
            *attlen = chunkSize;
4248
4.91M
        if ((normalize) && (inSpace) && (chunkSize > 0)) {
4249
304
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4250
304
            *attlen -= 1;
4251
304
        }
4252
4253
        /* Report potential error */
4254
4.91M
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4255
4.91M
    } else {
4256
351k
        if (chunkSize > 0)
4257
208k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4258
4259
351k
        if ((normalize) && (inSpace) && (buf.size > 0)) {
4260
528
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4261
528
            buf.size--;
4262
528
        }
4263
4264
351k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4265
351k
        attvalFlags |= XML_ATTVAL_ALLOC;
4266
4267
351k
        if (ret != NULL) {
4268
351k
            if (attlen != NULL)
4269
298k
                *attlen = buf.size;
4270
351k
        }
4271
351k
    }
4272
4273
5.26M
    if (outFlags != NULL)
4274
5.21M
        *outFlags = attvalFlags;
4275
4276
5.26M
    NEXTL(1);
4277
4278
5.26M
    return(ret);
4279
4280
15.7k
error:
4281
15.7k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4282
15.7k
    return(NULL);
4283
5.28M
}
4284
4285
/**
4286
 * Parse a value for an attribute
4287
 * Note: the parser won't do substitution of entities here, this
4288
 * will be handled later in #xmlStringGetNodeList
4289
 *
4290
 * @deprecated Internal function, don't use.
4291
 *
4292
 *     [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4293
 *                       "'" ([^<&'] | Reference)* "'"
4294
 *
4295
 * 3.3.3 Attribute-Value Normalization:
4296
 *
4297
 * Before the value of an attribute is passed to the application or
4298
 * checked for validity, the XML processor must normalize it as follows:
4299
 *
4300
 * - a character reference is processed by appending the referenced
4301
 *   character to the attribute value
4302
 * - an entity reference is processed by recursively processing the
4303
 *   replacement text of the entity
4304
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4305
 *   appending \#x20 to the normalized value, except that only a single
4306
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4307
 *   parsed entity or the literal entity value of an internal parsed entity
4308
 * - other characters are processed by appending them to the normalized value
4309
 *
4310
 * If the declared value is not CDATA, then the XML processor must further
4311
 * process the normalized attribute value by discarding any leading and
4312
 * trailing space (\#x20) characters, and by replacing sequences of space
4313
 * (\#x20) characters by a single space (\#x20) character.
4314
 * All attributes for which no declaration has been read should be treated
4315
 * by a non-validating parser as if declared CDATA.
4316
 *
4317
 * @param ctxt  an XML parser context
4318
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4319
 * caller.
4320
 */
4321
xmlChar *
4322
60.1k
xmlParseAttValue(xmlParserCtxt *ctxt) {
4323
60.1k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4324
60.1k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4325
60.1k
}
4326
4327
/**
4328
 * Parse an XML Literal
4329
 *
4330
 * @deprecated Internal function, don't use.
4331
 *
4332
 *     [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4333
 *
4334
 * @param ctxt  an XML parser context
4335
 * @returns the SystemLiteral parsed or NULL
4336
 */
4337
4338
xmlChar *
4339
94.2k
xmlParseSystemLiteral(xmlParserCtxt *ctxt) {
4340
94.2k
    xmlChar *buf = NULL;
4341
94.2k
    int len = 0;
4342
94.2k
    int size = XML_PARSER_BUFFER_SIZE;
4343
94.2k
    int cur, l;
4344
94.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4345
0
                    XML_MAX_TEXT_LENGTH :
4346
94.2k
                    XML_MAX_NAME_LENGTH;
4347
94.2k
    xmlChar stop;
4348
4349
94.2k
    if (RAW == '"') {
4350
76.8k
        NEXT;
4351
76.8k
  stop = '"';
4352
76.8k
    } else if (RAW == '\'') {
4353
14.6k
        NEXT;
4354
14.6k
  stop = '\'';
4355
14.6k
    } else {
4356
2.73k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4357
2.73k
  return(NULL);
4358
2.73k
    }
4359
4360
91.4k
    buf = xmlMalloc(size);
4361
91.4k
    if (buf == NULL) {
4362
15
        xmlErrMemory(ctxt);
4363
15
  return(NULL);
4364
15
    }
4365
91.4k
    cur = xmlCurrentCharRecover(ctxt, &l);
4366
11.2M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4367
11.1M
  if (len + 5 >= size) {
4368
16.1k
      xmlChar *tmp;
4369
16.1k
            int newSize;
4370
4371
16.1k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4372
16.1k
            if (newSize < 0) {
4373
342
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4374
342
                xmlFree(buf);
4375
342
                return(NULL);
4376
342
            }
4377
15.7k
      tmp = xmlRealloc(buf, newSize);
4378
15.7k
      if (tmp == NULL) {
4379
3
          xmlFree(buf);
4380
3
    xmlErrMemory(ctxt);
4381
3
    return(NULL);
4382
3
      }
4383
15.7k
      buf = tmp;
4384
15.7k
            size = newSize;
4385
15.7k
  }
4386
11.1M
  COPY_BUF(buf, len, cur);
4387
11.1M
  NEXTL(l);
4388
11.1M
  cur = xmlCurrentCharRecover(ctxt, &l);
4389
11.1M
    }
4390
91.1k
    buf[len] = 0;
4391
91.1k
    if (!IS_CHAR(cur)) {
4392
1.18k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4393
89.9k
    } else {
4394
89.9k
  NEXT;
4395
89.9k
    }
4396
91.1k
    return(buf);
4397
91.4k
}
4398
4399
/**
4400
 * Parse an XML public literal
4401
 *
4402
 * @deprecated Internal function, don't use.
4403
 *
4404
 *     [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4405
 *
4406
 * @param ctxt  an XML parser context
4407
 * @returns the PubidLiteral parsed or NULL.
4408
 */
4409
4410
xmlChar *
4411
25.9k
xmlParsePubidLiteral(xmlParserCtxt *ctxt) {
4412
25.9k
    xmlChar *buf = NULL;
4413
25.9k
    int len = 0;
4414
25.9k
    int size = XML_PARSER_BUFFER_SIZE;
4415
25.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4416
0
                    XML_MAX_TEXT_LENGTH :
4417
25.9k
                    XML_MAX_NAME_LENGTH;
4418
25.9k
    xmlChar cur;
4419
25.9k
    xmlChar stop;
4420
4421
25.9k
    if (RAW == '"') {
4422
9.78k
        NEXT;
4423
9.78k
  stop = '"';
4424
16.2k
    } else if (RAW == '\'') {
4425
15.2k
        NEXT;
4426
15.2k
  stop = '\'';
4427
15.2k
    } else {
4428
956
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4429
956
  return(NULL);
4430
956
    }
4431
25.0k
    buf = xmlMalloc(size);
4432
25.0k
    if (buf == NULL) {
4433
1
  xmlErrMemory(ctxt);
4434
1
  return(NULL);
4435
1
    }
4436
25.0k
    cur = CUR;
4437
124k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4438
124k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4439
99.1k
  if (len + 1 >= size) {
4440
354
      xmlChar *tmp;
4441
354
            int newSize;
4442
4443
354
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4444
354
            if (newSize < 0) {
4445
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4446
0
                xmlFree(buf);
4447
0
                return(NULL);
4448
0
            }
4449
354
      tmp = xmlRealloc(buf, newSize);
4450
354
      if (tmp == NULL) {
4451
1
    xmlErrMemory(ctxt);
4452
1
    xmlFree(buf);
4453
1
    return(NULL);
4454
1
      }
4455
353
      buf = tmp;
4456
353
            size = newSize;
4457
353
  }
4458
99.1k
  buf[len++] = cur;
4459
99.1k
  NEXT;
4460
99.1k
  cur = CUR;
4461
99.1k
    }
4462
25.0k
    buf[len] = 0;
4463
25.0k
    if (cur != stop) {
4464
1.42k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4465
23.6k
    } else {
4466
23.6k
  NEXTL(1);
4467
23.6k
    }
4468
25.0k
    return(buf);
4469
25.0k
}
4470
4471
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4472
4473
/*
4474
 * used for the test in the inner loop of the char data testing
4475
 */
4476
static const unsigned char test_char_data[256] = {
4477
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4478
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4479
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4482
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4483
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4484
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4485
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4486
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4487
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4488
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4489
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4490
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4491
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4492
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4493
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4494
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4495
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4496
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4497
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4498
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4499
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4500
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4501
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4502
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4503
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4504
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4505
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4506
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4507
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4508
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4509
};
4510
4511
static void
4512
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4513
8.75M
              int isBlank) {
4514
8.75M
    int checkBlanks;
4515
4516
8.75M
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4517
3.37M
        return;
4518
4519
5.38M
    checkBlanks = (!ctxt->keepBlanks) ||
4520
5.38M
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4521
4522
    /*
4523
     * Calling areBlanks with only parts of a text node
4524
     * is fundamentally broken, making the NOBLANKS option
4525
     * essentially unusable.
4526
     */
4527
5.38M
    if ((checkBlanks) &&
4528
5.38M
        (areBlanks(ctxt, buf, size, isBlank))) {
4529
0
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4530
0
            (ctxt->keepBlanks))
4531
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4532
5.38M
    } else {
4533
5.38M
        if (ctxt->sax->characters != NULL)
4534
5.38M
            ctxt->sax->characters(ctxt->userData, buf, size);
4535
4536
        /*
4537
         * The old code used to update this value for "complex" data
4538
         * even if checkBlanks was false. This was probably a bug.
4539
         */
4540
5.38M
        if ((checkBlanks) && (*ctxt->space == -1))
4541
0
            *ctxt->space = -2;
4542
5.38M
    }
4543
5.38M
}
4544
4545
/**
4546
 * Parse character data. Always makes progress if the first char isn't
4547
 * '<' or '&'.
4548
 *
4549
 * The right angle bracket (>) may be represented using the string "&gt;",
4550
 * and must, for compatibility, be escaped using "&gt;" or a character
4551
 * reference when it appears in the string "]]>" in content, when that
4552
 * string is not marking the end of a CDATA section.
4553
 *
4554
 *     [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4555
 * @param ctxt  an XML parser context
4556
 * @param partial  buffer may contain partial UTF-8 sequences
4557
 */
4558
static void
4559
13.8M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4560
13.8M
    const xmlChar *in;
4561
13.8M
    int line = ctxt->input->line;
4562
13.8M
    int col = ctxt->input->col;
4563
13.8M
    int ccol;
4564
13.8M
    int terminate = 0;
4565
4566
13.8M
    GROW;
4567
    /*
4568
     * Accelerated common case where input don't need to be
4569
     * modified before passing it to the handler.
4570
     */
4571
13.8M
    in = ctxt->input->cur;
4572
14.0M
    do {
4573
16.5M
get_more_space:
4574
22.5M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4575
16.5M
        if (*in == 0xA) {
4576
40.5M
            do {
4577
40.5M
                ctxt->input->line++; ctxt->input->col = 1;
4578
40.5M
                in++;
4579
40.5M
            } while (*in == 0xA);
4580
2.50M
            goto get_more_space;
4581
2.50M
        }
4582
14.0M
        if (*in == '<') {
4583
6.24M
            while (in > ctxt->input->cur) {
4584
3.12M
                const xmlChar *tmp = ctxt->input->cur;
4585
3.12M
                size_t nbchar = in - tmp;
4586
4587
3.12M
                if (nbchar > XML_MAX_ITEMS)
4588
0
                    nbchar = XML_MAX_ITEMS;
4589
3.12M
                ctxt->input->cur += nbchar;
4590
4591
3.12M
                xmlCharacters(ctxt, tmp, nbchar, 1);
4592
3.12M
            }
4593
3.12M
            return;
4594
3.12M
        }
4595
4596
11.8M
get_more:
4597
11.8M
        ccol = ctxt->input->col;
4598
314M
        while (test_char_data[*in]) {
4599
302M
            in++;
4600
302M
            ccol++;
4601
302M
        }
4602
11.8M
        ctxt->input->col = ccol;
4603
11.8M
        if (*in == 0xA) {
4604
17.7M
            do {
4605
17.7M
                ctxt->input->line++; ctxt->input->col = 1;
4606
17.7M
                in++;
4607
17.7M
            } while (*in == 0xA);
4608
794k
            goto get_more;
4609
794k
        }
4610
11.0M
        if (*in == ']') {
4611
128k
            size_t avail = ctxt->input->end - in;
4612
4613
128k
            if (partial && avail < 2) {
4614
0
                terminate = 1;
4615
0
                goto invoke_callback;
4616
0
            }
4617
128k
            if (in[1] == ']') {
4618
14.0k
                if (partial && avail < 3) {
4619
0
                    terminate = 1;
4620
0
                    goto invoke_callback;
4621
0
                }
4622
14.0k
                if (in[2] == '>')
4623
1.32k
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4624
14.0k
            }
4625
4626
128k
            in++;
4627
128k
            ctxt->input->col++;
4628
128k
            goto get_more;
4629
128k
        }
4630
4631
10.9M
invoke_callback:
4632
13.7M
        while (in > ctxt->input->cur) {
4633
2.82M
            const xmlChar *tmp = ctxt->input->cur;
4634
2.82M
            size_t nbchar = in - tmp;
4635
4636
2.82M
            if (nbchar > XML_MAX_ITEMS)
4637
0
                nbchar = XML_MAX_ITEMS;
4638
2.82M
            ctxt->input->cur += nbchar;
4639
4640
2.82M
            xmlCharacters(ctxt, tmp, nbchar, 0);
4641
4642
2.82M
            line = ctxt->input->line;
4643
2.82M
            col = ctxt->input->col;
4644
2.82M
        }
4645
10.9M
        ctxt->input->cur = in;
4646
10.9M
        if (*in == 0xD) {
4647
217k
            in++;
4648
217k
            if (*in == 0xA) {
4649
192k
                ctxt->input->cur = in;
4650
192k
                in++;
4651
192k
                ctxt->input->line++; ctxt->input->col = 1;
4652
192k
                continue; /* while */
4653
192k
            }
4654
24.6k
            in--;
4655
24.6k
        }
4656
10.7M
        if (*in == '<') {
4657
1.73M
            return;
4658
1.73M
        }
4659
8.97M
        if (*in == '&') {
4660
135k
            return;
4661
135k
        }
4662
8.84M
        if (terminate) {
4663
0
            return;
4664
0
        }
4665
8.84M
        SHRINK;
4666
8.84M
        GROW;
4667
8.84M
        in = ctxt->input->cur;
4668
9.03M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4669
9.03M
             (*in == 0x09) || (*in == 0x0a));
4670
8.85M
    ctxt->input->line = line;
4671
8.85M
    ctxt->input->col = col;
4672
8.85M
    xmlParseCharDataComplex(ctxt, partial);
4673
8.85M
}
4674
4675
/**
4676
 * Always makes progress if the first char isn't '<' or '&'.
4677
 *
4678
 * parse a CharData section.this is the fallback function
4679
 * of #xmlParseCharData when the parsing requires handling
4680
 * of non-ASCII characters.
4681
 *
4682
 * @param ctxt  an XML parser context
4683
 * @param partial  whether the input can end with truncated UTF-8
4684
 */
4685
static void
4686
8.85M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4687
8.85M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4688
8.85M
    int nbchar = 0;
4689
8.85M
    int cur, l;
4690
4691
8.85M
    cur = xmlCurrentCharRecover(ctxt, &l);
4692
283M
    while ((cur != '<') && /* checked */
4693
283M
           (cur != '&') &&
4694
283M
     (IS_CHAR(cur))) {
4695
274M
        if (cur == ']') {
4696
250k
            size_t avail = ctxt->input->end - ctxt->input->cur;
4697
4698
250k
            if (partial && avail < 2)
4699
0
                break;
4700
250k
            if (NXT(1) == ']') {
4701
141k
                if (partial && avail < 3)
4702
0
                    break;
4703
141k
                if (NXT(2) == '>')
4704
7.75k
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4705
141k
            }
4706
250k
        }
4707
4708
274M
  COPY_BUF(buf, nbchar, cur);
4709
  /* move current position before possible calling of ctxt->sax->characters */
4710
274M
  NEXTL(l);
4711
274M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4712
1.52M
      buf[nbchar] = 0;
4713
4714
1.52M
            xmlCharacters(ctxt, buf, nbchar, 0);
4715
1.52M
      nbchar = 0;
4716
1.52M
            SHRINK;
4717
1.52M
  }
4718
274M
  cur = xmlCurrentCharRecover(ctxt, &l);
4719
274M
    }
4720
8.85M
    if (nbchar != 0) {
4721
1.28M
        buf[nbchar] = 0;
4722
4723
1.28M
        xmlCharacters(ctxt, buf, nbchar, 0);
4724
1.28M
    }
4725
    /*
4726
     * cur == 0 can mean
4727
     *
4728
     * - End of buffer.
4729
     * - An actual 0 character.
4730
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4731
     */
4732
8.85M
    if (ctxt->input->cur < ctxt->input->end) {
4733
8.82M
        if ((cur == 0) && (CUR != 0)) {
4734
6.76k
            if (partial == 0) {
4735
6.76k
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4736
6.76k
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4737
6.76k
                NEXTL(1);
4738
6.76k
            }
4739
8.81M
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4740
            /* Generate the error and skip the offending character */
4741
8.07M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4742
8.07M
                              "PCDATA invalid Char value %d\n", cur);
4743
8.07M
            NEXTL(l);
4744
8.07M
        }
4745
8.82M
    }
4746
8.85M
}
4747
4748
/**
4749
 * @deprecated Internal function, don't use.
4750
 * @param ctxt  an XML parser context
4751
 * @param cdata  unused
4752
 */
4753
void
4754
0
xmlParseCharData(xmlParserCtxt *ctxt, ATTRIBUTE_UNUSED int cdata) {
4755
0
    xmlParseCharDataInternal(ctxt, 0);
4756
0
}
4757
4758
/**
4759
 * Parse an External ID or a Public ID
4760
 *
4761
 * @deprecated Internal function, don't use.
4762
 *
4763
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4764
 * `'PUBLIC' S PubidLiteral S SystemLiteral`
4765
 *
4766
 *     [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4767
 *                       | 'PUBLIC' S PubidLiteral S SystemLiteral
4768
 *
4769
 *     [83] PublicID ::= 'PUBLIC' S PubidLiteral
4770
 *
4771
 * @param ctxt  an XML parser context
4772
 * @param publicId  a xmlChar** receiving PubidLiteral
4773
 * @param strict  indicate whether we should restrict parsing to only
4774
 *          production [75], see NOTE below
4775
 * @returns the function returns SystemLiteral and in the second
4776
 *                case publicID receives PubidLiteral, is strict is off
4777
 *                it is possible to return NULL and have publicID set.
4778
 */
4779
4780
xmlChar *
4781
217k
xmlParseExternalID(xmlParserCtxt *ctxt, xmlChar **publicId, int strict) {
4782
217k
    xmlChar *URI = NULL;
4783
4784
217k
    *publicId = NULL;
4785
217k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4786
73.1k
        SKIP(6);
4787
73.1k
  if (SKIP_BLANKS == 0) {
4788
682
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4789
682
                     "Space required after 'SYSTEM'\n");
4790
682
  }
4791
73.1k
  URI = xmlParseSystemLiteral(ctxt);
4792
73.1k
  if (URI == NULL) {
4793
865
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4794
865
        }
4795
144k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4796
25.9k
        SKIP(6);
4797
25.9k
  if (SKIP_BLANKS == 0) {
4798
1.11k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4799
1.11k
        "Space required after 'PUBLIC'\n");
4800
1.11k
  }
4801
25.9k
  *publicId = xmlParsePubidLiteral(ctxt);
4802
25.9k
  if (*publicId == NULL) {
4803
958
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4804
958
  }
4805
25.9k
  if (strict) {
4806
      /*
4807
       * We don't handle [83] so "S SystemLiteral" is required.
4808
       */
4809
19.9k
      if (SKIP_BLANKS == 0) {
4810
2.46k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4811
2.46k
      "Space required after the Public Identifier\n");
4812
2.46k
      }
4813
19.9k
  } else {
4814
      /*
4815
       * We handle [83] so we return immediately, if
4816
       * "S SystemLiteral" is not detected. We skip blanks if no
4817
             * system literal was found, but this is harmless since we must
4818
             * be at the end of a NotationDecl.
4819
       */
4820
6.03k
      if (SKIP_BLANKS == 0) return(NULL);
4821
1.37k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4822
1.37k
  }
4823
21.0k
  URI = xmlParseSystemLiteral(ctxt);
4824
21.0k
  if (URI == NULL) {
4825
2.22k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4826
2.22k
        }
4827
21.0k
    }
4828
212k
    return(URI);
4829
217k
}
4830
4831
/**
4832
 * Skip an XML (SGML) comment <!-- .... -->
4833
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4834
 *  must not occur within comments. "
4835
 * This is the slow routine in case the accelerator for ascii didn't work
4836
 *
4837
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4838
 * @param ctxt  an XML parser context
4839
 * @param buf  the already parsed part of the buffer
4840
 * @param len  number of bytes in the buffer
4841
 * @param size  allocated size of the buffer
4842
 */
4843
static void
4844
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4845
56.7k
                       size_t len, size_t size) {
4846
56.7k
    int q, ql;
4847
56.7k
    int r, rl;
4848
56.7k
    int cur, l;
4849
56.7k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4850
0
                    XML_MAX_HUGE_LENGTH :
4851
56.7k
                    XML_MAX_TEXT_LENGTH;
4852
4853
56.7k
    if (buf == NULL) {
4854
3.32k
        len = 0;
4855
3.32k
  size = XML_PARSER_BUFFER_SIZE;
4856
3.32k
  buf = xmlMalloc(size);
4857
3.32k
  if (buf == NULL) {
4858
1
      xmlErrMemory(ctxt);
4859
1
      return;
4860
1
  }
4861
3.32k
    }
4862
56.7k
    q = xmlCurrentCharRecover(ctxt, &ql);
4863
56.7k
    if (q == 0)
4864
5.53k
        goto not_terminated;
4865
51.2k
    if (!IS_CHAR(q)) {
4866
4.17k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4867
4.17k
                          "xmlParseComment: invalid xmlChar value %d\n",
4868
4.17k
                    q);
4869
4.17k
  xmlFree (buf);
4870
4.17k
  return;
4871
4.17k
    }
4872
47.0k
    NEXTL(ql);
4873
47.0k
    r = xmlCurrentCharRecover(ctxt, &rl);
4874
47.0k
    if (r == 0)
4875
2.95k
        goto not_terminated;
4876
44.0k
    if (!IS_CHAR(r)) {
4877
1.41k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4878
1.41k
                          "xmlParseComment: invalid xmlChar value %d\n",
4879
1.41k
                    r);
4880
1.41k
  xmlFree (buf);
4881
1.41k
  return;
4882
1.41k
    }
4883
42.6k
    NEXTL(rl);
4884
42.6k
    cur = xmlCurrentCharRecover(ctxt, &l);
4885
42.6k
    if (cur == 0)
4886
1.52k
        goto not_terminated;
4887
5.34M
    while (IS_CHAR(cur) && /* checked */
4888
5.34M
           ((cur != '>') ||
4889
5.33M
      (r != '-') || (q != '-'))) {
4890
5.30M
  if ((r == '-') && (q == '-')) {
4891
27.9k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4892
27.9k
  }
4893
5.30M
  if (len + 5 >= size) {
4894
17.7k
      xmlChar *tmp;
4895
17.7k
            int newSize;
4896
4897
17.7k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4898
17.7k
            if (newSize < 0) {
4899
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4900
0
                             "Comment too big found", NULL);
4901
0
                xmlFree (buf);
4902
0
                return;
4903
0
            }
4904
17.7k
      tmp = xmlRealloc(buf, newSize);
4905
17.7k
      if (tmp == NULL) {
4906
7
    xmlErrMemory(ctxt);
4907
7
    xmlFree(buf);
4908
7
    return;
4909
7
      }
4910
17.7k
      buf = tmp;
4911
17.7k
            size = newSize;
4912
17.7k
  }
4913
5.30M
  COPY_BUF(buf, len, q);
4914
4915
5.30M
  q = r;
4916
5.30M
  ql = rl;
4917
5.30M
  r = cur;
4918
5.30M
  rl = l;
4919
4920
5.30M
  NEXTL(l);
4921
5.30M
  cur = xmlCurrentCharRecover(ctxt, &l);
4922
4923
5.30M
    }
4924
41.1k
    buf[len] = 0;
4925
41.1k
    if (cur == 0) {
4926
4.32k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4927
4.32k
                       "Comment not terminated \n<!--%.50s\n", buf);
4928
36.8k
    } else if (!IS_CHAR(cur)) {
4929
3.91k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4930
3.91k
                          "xmlParseComment: invalid xmlChar value %d\n",
4931
3.91k
                    cur);
4932
32.8k
    } else {
4933
32.8k
        NEXT;
4934
32.8k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4935
32.8k
      (!ctxt->disableSAX))
4936
23.9k
      ctxt->sax->comment(ctxt->userData, buf);
4937
32.8k
    }
4938
41.1k
    xmlFree(buf);
4939
41.1k
    return;
4940
10.0k
not_terminated:
4941
10.0k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4942
10.0k
       "Comment not terminated\n", NULL);
4943
10.0k
    xmlFree(buf);
4944
10.0k
}
4945
4946
/**
4947
 * Parse an XML (SGML) comment. Always consumes '<!'.
4948
 *
4949
 * @deprecated Internal function, don't use.
4950
 *
4951
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4952
 *  must not occur within comments. "
4953
 *
4954
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4955
 * @param ctxt  an XML parser context
4956
 */
4957
void
4958
229k
xmlParseComment(xmlParserCtxt *ctxt) {
4959
229k
    xmlChar *buf = NULL;
4960
229k
    size_t size = XML_PARSER_BUFFER_SIZE;
4961
229k
    size_t len = 0;
4962
229k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4963
0
                       XML_MAX_HUGE_LENGTH :
4964
229k
                       XML_MAX_TEXT_LENGTH;
4965
229k
    const xmlChar *in;
4966
229k
    size_t nbchar = 0;
4967
229k
    int ccol;
4968
4969
    /*
4970
     * Check that there is a comment right here.
4971
     */
4972
229k
    if ((RAW != '<') || (NXT(1) != '!'))
4973
0
        return;
4974
229k
    SKIP(2);
4975
229k
    if ((RAW != '-') || (NXT(1) != '-'))
4976
907
        return;
4977
228k
    SKIP(2);
4978
228k
    GROW;
4979
4980
    /*
4981
     * Accelerated common case where input don't need to be
4982
     * modified before passing it to the handler.
4983
     */
4984
228k
    in = ctxt->input->cur;
4985
229k
    do {
4986
229k
  if (*in == 0xA) {
4987
23.1k
      do {
4988
23.1k
    ctxt->input->line++; ctxt->input->col = 1;
4989
23.1k
    in++;
4990
23.1k
      } while (*in == 0xA);
4991
22.9k
  }
4992
681k
get_more:
4993
681k
        ccol = ctxt->input->col;
4994
12.5M
  while (((*in > '-') && (*in <= 0x7F)) ||
4995
12.5M
         ((*in >= 0x20) && (*in < '-')) ||
4996
12.5M
         (*in == 0x09)) {
4997
11.8M
        in++;
4998
11.8M
        ccol++;
4999
11.8M
  }
5000
681k
  ctxt->input->col = ccol;
5001
681k
  if (*in == 0xA) {
5002
115k
      do {
5003
115k
    ctxt->input->line++; ctxt->input->col = 1;
5004
115k
    in++;
5005
115k
      } while (*in == 0xA);
5006
96.2k
      goto get_more;
5007
96.2k
  }
5008
584k
  nbchar = in - ctxt->input->cur;
5009
  /*
5010
   * save current set of data
5011
   */
5012
584k
  if (nbchar > 0) {
5013
562k
            if (nbchar > maxLength - len) {
5014
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5015
0
                                  "Comment too big found", NULL);
5016
0
                xmlFree(buf);
5017
0
                return;
5018
0
            }
5019
562k
            if (buf == NULL) {
5020
222k
                if ((*in == '-') && (in[1] == '-'))
5021
101k
                    size = nbchar + 1;
5022
121k
                else
5023
121k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5024
222k
                buf = xmlMalloc(size);
5025
222k
                if (buf == NULL) {
5026
7
                    xmlErrMemory(ctxt);
5027
7
                    return;
5028
7
                }
5029
222k
                len = 0;
5030
340k
            } else if (len + nbchar + 1 >= size) {
5031
37.3k
                xmlChar *new_buf;
5032
37.3k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5033
37.3k
                new_buf = xmlRealloc(buf, size);
5034
37.3k
                if (new_buf == NULL) {
5035
2
                    xmlErrMemory(ctxt);
5036
2
                    xmlFree(buf);
5037
2
                    return;
5038
2
                }
5039
37.3k
                buf = new_buf;
5040
37.3k
            }
5041
562k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5042
562k
            len += nbchar;
5043
562k
            buf[len] = 0;
5044
562k
  }
5045
584k
  ctxt->input->cur = in;
5046
584k
  if (*in == 0xA) {
5047
0
      in++;
5048
0
      ctxt->input->line++; ctxt->input->col = 1;
5049
0
  }
5050
584k
  if (*in == 0xD) {
5051
145k
      in++;
5052
145k
      if (*in == 0xA) {
5053
135k
    ctxt->input->cur = in;
5054
135k
    in++;
5055
135k
    ctxt->input->line++; ctxt->input->col = 1;
5056
135k
    goto get_more;
5057
135k
      }
5058
9.75k
      in--;
5059
9.75k
  }
5060
449k
  SHRINK;
5061
449k
  GROW;
5062
449k
  in = ctxt->input->cur;
5063
449k
  if (*in == '-') {
5064
392k
      if (in[1] == '-') {
5065
261k
          if (in[2] == '>') {
5066
171k
        SKIP(3);
5067
171k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5068
171k
            (!ctxt->disableSAX)) {
5069
75.7k
      if (buf != NULL)
5070
73.5k
          ctxt->sax->comment(ctxt->userData, buf);
5071
2.21k
      else
5072
2.21k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5073
75.7k
        }
5074
171k
        if (buf != NULL)
5075
169k
            xmlFree(buf);
5076
171k
        return;
5077
171k
    }
5078
89.2k
    if (buf != NULL) {
5079
86.8k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5080
86.8k
                          "Double hyphen within comment: "
5081
86.8k
                                      "<!--%.50s\n",
5082
86.8k
              buf);
5083
86.8k
    } else
5084
2.32k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
2.32k
                          "Double hyphen within comment\n", NULL);
5086
89.2k
    in++;
5087
89.2k
    ctxt->input->col++;
5088
89.2k
      }
5089
220k
      in++;
5090
220k
      ctxt->input->col++;
5091
220k
      goto get_more;
5092
392k
  }
5093
449k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5094
56.7k
    xmlParseCommentComplex(ctxt, buf, len, size);
5095
56.7k
}
5096
5097
5098
/**
5099
 * Parse the name of a PI
5100
 *
5101
 * @deprecated Internal function, don't use.
5102
 *
5103
 *     [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5104
 *
5105
 * @param ctxt  an XML parser context
5106
 * @returns the PITarget name or NULL
5107
 */
5108
5109
const xmlChar *
5110
132k
xmlParsePITarget(xmlParserCtxt *ctxt) {
5111
132k
    const xmlChar *name;
5112
5113
132k
    name = xmlParseName(ctxt);
5114
132k
    if ((name != NULL) &&
5115
132k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5116
132k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5117
132k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5118
13.0k
  int i;
5119
13.0k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5120
13.0k
      (name[2] == 'l') && (name[3] == 0)) {
5121
4.96k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5122
4.96k
     "XML declaration allowed only at the start of the document\n");
5123
4.96k
      return(name);
5124
8.10k
  } else if (name[3] == 0) {
5125
1.26k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5126
1.26k
      return(name);
5127
1.26k
  }
5128
20.0k
  for (i = 0;;i++) {
5129
20.0k
      if (xmlW3CPIs[i] == NULL) break;
5130
13.4k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5131
304
          return(name);
5132
13.4k
  }
5133
6.54k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5134
6.54k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5135
6.54k
          NULL, NULL);
5136
6.54k
    }
5137
126k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5138
584
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5139
584
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5140
584
    }
5141
126k
    return(name);
5142
132k
}
5143
5144
#ifdef LIBXML_CATALOG_ENABLED
5145
/**
5146
 * Parse an XML Catalog Processing Instruction.
5147
 *
5148
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5149
 *
5150
 * Occurs only if allowed by the user and if happening in the Misc
5151
 * part of the document before any doctype information
5152
 * This will add the given catalog to the parsing context in order
5153
 * to be used if there is a resolution need further down in the document
5154
 *
5155
 * @param ctxt  an XML parser context
5156
 * @param catalog  the PI value string
5157
 */
5158
5159
static void
5160
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5161
0
    xmlChar *URL = NULL;
5162
0
    const xmlChar *tmp, *base;
5163
0
    xmlChar marker;
5164
5165
0
    tmp = catalog;
5166
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5167
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5168
0
  goto error;
5169
0
    tmp += 7;
5170
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5171
0
    if (*tmp != '=') {
5172
0
  return;
5173
0
    }
5174
0
    tmp++;
5175
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5176
0
    marker = *tmp;
5177
0
    if ((marker != '\'') && (marker != '"'))
5178
0
  goto error;
5179
0
    tmp++;
5180
0
    base = tmp;
5181
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5182
0
    if (*tmp == 0)
5183
0
  goto error;
5184
0
    URL = xmlStrndup(base, tmp - base);
5185
0
    tmp++;
5186
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5187
0
    if (*tmp != 0)
5188
0
  goto error;
5189
5190
0
    if (URL != NULL) {
5191
        /*
5192
         * Unfortunately, the catalog API doesn't report OOM errors.
5193
         * xmlGetLastError isn't very helpful since we don't know
5194
         * where the last error came from. We'd have to reset it
5195
         * before this call and restore it afterwards.
5196
         */
5197
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5198
0
  xmlFree(URL);
5199
0
    }
5200
0
    return;
5201
5202
0
error:
5203
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5204
0
            "Catalog PI syntax error: %s\n",
5205
0
      catalog, NULL);
5206
0
    if (URL != NULL)
5207
0
  xmlFree(URL);
5208
0
}
5209
#endif
5210
5211
/**
5212
 * Parse an XML Processing Instruction.
5213
 *
5214
 * @deprecated Internal function, don't use.
5215
 *
5216
 *     [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5217
 *
5218
 * The processing is transferred to SAX once parsed.
5219
 *
5220
 * @param ctxt  an XML parser context
5221
 */
5222
5223
void
5224
132k
xmlParsePI(xmlParserCtxt *ctxt) {
5225
132k
    xmlChar *buf = NULL;
5226
132k
    size_t len = 0;
5227
132k
    size_t size = XML_PARSER_BUFFER_SIZE;
5228
132k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5229
0
                       XML_MAX_HUGE_LENGTH :
5230
132k
                       XML_MAX_TEXT_LENGTH;
5231
132k
    int cur, l;
5232
132k
    const xmlChar *target;
5233
5234
132k
    if ((RAW == '<') && (NXT(1) == '?')) {
5235
  /*
5236
   * this is a Processing Instruction.
5237
   */
5238
132k
  SKIP(2);
5239
5240
  /*
5241
   * Parse the target name and check for special support like
5242
   * namespace.
5243
   */
5244
132k
        target = xmlParsePITarget(ctxt);
5245
132k
  if (target != NULL) {
5246
128k
      if ((RAW == '?') && (NXT(1) == '>')) {
5247
48.8k
    SKIP(2);
5248
5249
    /*
5250
     * SAX: PI detected.
5251
     */
5252
48.8k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5253
48.8k
        (ctxt->sax->processingInstruction != NULL))
5254
43.3k
        ctxt->sax->processingInstruction(ctxt->userData,
5255
43.3k
                                         target, NULL);
5256
48.8k
    return;
5257
48.8k
      }
5258
79.5k
      buf = xmlMalloc(size);
5259
79.5k
      if (buf == NULL) {
5260
12
    xmlErrMemory(ctxt);
5261
12
    return;
5262
12
      }
5263
79.5k
      if (SKIP_BLANKS == 0) {
5264
15.3k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5265
15.3k
        "ParsePI: PI %s space expected\n", target);
5266
15.3k
      }
5267
79.5k
      cur = xmlCurrentCharRecover(ctxt, &l);
5268
15.4M
      while (IS_CHAR(cur) && /* checked */
5269
15.4M
       ((cur != '?') || (NXT(1) != '>'))) {
5270
15.3M
    if (len + 5 >= size) {
5271
11.5k
        xmlChar *tmp;
5272
11.5k
                    int newSize;
5273
5274
11.5k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5275
11.5k
                    if (newSize < 0) {
5276
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5277
0
                                          "PI %s too big found", target);
5278
0
                        xmlFree(buf);
5279
0
                        return;
5280
0
                    }
5281
11.5k
        tmp = xmlRealloc(buf, newSize);
5282
11.5k
        if (tmp == NULL) {
5283
2
      xmlErrMemory(ctxt);
5284
2
      xmlFree(buf);
5285
2
      return;
5286
2
        }
5287
11.5k
        buf = tmp;
5288
11.5k
                    size = newSize;
5289
11.5k
    }
5290
15.3M
    COPY_BUF(buf, len, cur);
5291
15.3M
    NEXTL(l);
5292
15.3M
    cur = xmlCurrentCharRecover(ctxt, &l);
5293
15.3M
      }
5294
79.5k
      buf[len] = 0;
5295
79.5k
      if (cur != '?') {
5296
15.0k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5297
15.0k
          "ParsePI: PI %s never end ...\n", target);
5298
64.4k
      } else {
5299
64.4k
    SKIP(2);
5300
5301
64.4k
#ifdef LIBXML_CATALOG_ENABLED
5302
64.4k
    if ((ctxt->inSubset == 0) &&
5303
64.4k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5304
314
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5305
5306
314
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5307
314
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5308
0
       (allow == XML_CATA_ALLOW_ALL)))
5309
0
      xmlParseCatalogPI(ctxt, buf);
5310
314
    }
5311
64.4k
#endif
5312
5313
    /*
5314
     * SAX: PI detected.
5315
     */
5316
64.4k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5317
64.4k
        (ctxt->sax->processingInstruction != NULL))
5318
54.9k
        ctxt->sax->processingInstruction(ctxt->userData,
5319
54.9k
                                         target, buf);
5320
64.4k
      }
5321
79.5k
      xmlFree(buf);
5322
79.5k
  } else {
5323
4.41k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5324
4.41k
  }
5325
132k
    }
5326
132k
}
5327
5328
/**
5329
 * Parse a notation declaration. Always consumes '<!'.
5330
 *
5331
 * @deprecated Internal function, don't use.
5332
 *
5333
 *     [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID)
5334
 *                           S? '>'
5335
 *
5336
 * Hence there is actually 3 choices:
5337
 *
5338
 *     'PUBLIC' S PubidLiteral
5339
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5340
 *     'SYSTEM' S SystemLiteral
5341
 *
5342
 * See the NOTE on #xmlParseExternalID.
5343
 *
5344
 * @param ctxt  an XML parser context
5345
 */
5346
5347
void
5348
13.3k
xmlParseNotationDecl(xmlParserCtxt *ctxt) {
5349
13.3k
    const xmlChar *name;
5350
13.3k
    xmlChar *Pubid;
5351
13.3k
    xmlChar *Systemid;
5352
5353
13.3k
    if ((CUR != '<') || (NXT(1) != '!'))
5354
0
        return;
5355
13.3k
    SKIP(2);
5356
5357
13.3k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5358
#ifdef LIBXML_VALID_ENABLED
5359
  int oldInputNr = ctxt->inputNr;
5360
#endif
5361
5362
10.9k
  SKIP(8);
5363
10.9k
  if (SKIP_BLANKS_PE == 0) {
5364
1.06k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5365
1.06k
         "Space required after '<!NOTATION'\n");
5366
1.06k
      return;
5367
1.06k
  }
5368
5369
9.92k
        name = xmlParseName(ctxt);
5370
9.92k
  if (name == NULL) {
5371
371
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5372
371
      return;
5373
371
  }
5374
9.55k
  if (xmlStrchr(name, ':') != NULL) {
5375
584
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5376
584
         "colons are forbidden from notation names '%s'\n",
5377
584
         name, NULL, NULL);
5378
584
  }
5379
9.55k
  if (SKIP_BLANKS_PE == 0) {
5380
227
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5381
227
         "Space required after the NOTATION name'\n");
5382
227
      return;
5383
227
  }
5384
5385
  /*
5386
   * Parse the IDs.
5387
   */
5388
9.33k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5389
9.33k
  SKIP_BLANKS_PE;
5390
5391
9.33k
  if (RAW == '>') {
5392
#ifdef LIBXML_VALID_ENABLED
5393
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5394
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5395
                           "Notation declaration doesn't start and stop"
5396
                                 " in the same entity\n",
5397
                                 NULL, NULL);
5398
      }
5399
#endif
5400
6.52k
      NEXT;
5401
6.52k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5402
6.52k
    (ctxt->sax->notationDecl != NULL))
5403
6.25k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5404
6.52k
  } else {
5405
2.80k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5406
2.80k
  }
5407
9.33k
  if (Systemid != NULL) xmlFree(Systemid);
5408
9.33k
  if (Pubid != NULL) xmlFree(Pubid);
5409
9.33k
    }
5410
13.3k
}
5411
5412
/**
5413
 * Parse an entity declaration. Always consumes '<!'.
5414
 *
5415
 * @deprecated Internal function, don't use.
5416
 *
5417
 *     [70] EntityDecl ::= GEDecl | PEDecl
5418
 *
5419
 *     [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5420
 *
5421
 *     [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5422
 *
5423
 *     [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5424
 *
5425
 *     [74] PEDef ::= EntityValue | ExternalID
5426
 *
5427
 *     [76] NDataDecl ::= S 'NDATA' S Name
5428
 *
5429
 * [ VC: Notation Declared ]
5430
 * The Name must match the declared name of a notation.
5431
 *
5432
 * @param ctxt  an XML parser context
5433
 */
5434
5435
void
5436
184k
xmlParseEntityDecl(xmlParserCtxt *ctxt) {
5437
184k
    const xmlChar *name = NULL;
5438
184k
    xmlChar *value = NULL;
5439
184k
    xmlChar *URI = NULL, *literal = NULL;
5440
184k
    const xmlChar *ndata = NULL;
5441
184k
    int isParameter = 0;
5442
184k
    xmlChar *orig = NULL;
5443
5444
184k
    if ((CUR != '<') || (NXT(1) != '!'))
5445
0
        return;
5446
184k
    SKIP(2);
5447
5448
    /* GROW; done in the caller */
5449
184k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5450
#ifdef LIBXML_VALID_ENABLED
5451
  int oldInputNr = ctxt->inputNr;
5452
#endif
5453
5454
181k
  SKIP(6);
5455
181k
  if (SKIP_BLANKS_PE == 0) {
5456
5.79k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5457
5.79k
         "Space required after '<!ENTITY'\n");
5458
5.79k
  }
5459
5460
181k
  if (RAW == '%') {
5461
106k
      NEXT;
5462
106k
      if (SKIP_BLANKS_PE == 0) {
5463
7.74k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5464
7.74k
             "Space required after '%%'\n");
5465
7.74k
      }
5466
106k
      isParameter = 1;
5467
106k
  }
5468
5469
181k
        name = xmlParseName(ctxt);
5470
181k
  if (name == NULL) {
5471
5.74k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5472
5.74k
                     "xmlParseEntityDecl: no name\n");
5473
5.74k
            return;
5474
5.74k
  }
5475
176k
  if (xmlStrchr(name, ':') != NULL) {
5476
510
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5477
510
         "colons are forbidden from entities names '%s'\n",
5478
510
         name, NULL, NULL);
5479
510
  }
5480
176k
  if (SKIP_BLANKS_PE == 0) {
5481
2.86k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5482
2.86k
         "Space required after the entity name\n");
5483
2.86k
  }
5484
5485
  /*
5486
   * handle the various case of definitions...
5487
   */
5488
176k
  if (isParameter) {
5489
102k
      if ((RAW == '"') || (RAW == '\'')) {
5490
64.3k
          value = xmlParseEntityValue(ctxt, &orig);
5491
64.3k
    if (value) {
5492
63.4k
        if ((ctxt->sax != NULL) &&
5493
63.4k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5494
43.7k
      ctxt->sax->entityDecl(ctxt->userData, name,
5495
43.7k
                        XML_INTERNAL_PARAMETER_ENTITY,
5496
43.7k
            NULL, NULL, value);
5497
63.4k
    }
5498
64.3k
      } else {
5499
38.6k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5500
38.6k
    if ((URI == NULL) && (literal == NULL)) {
5501
1.75k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5502
1.75k
    }
5503
38.6k
    if (URI) {
5504
36.5k
                    if (xmlStrchr(URI, '#')) {
5505
381
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5506
36.1k
                    } else {
5507
36.1k
                        if ((ctxt->sax != NULL) &&
5508
36.1k
                            (!ctxt->disableSAX) &&
5509
36.1k
                            (ctxt->sax->entityDecl != NULL))
5510
34.0k
                            ctxt->sax->entityDecl(ctxt->userData, name,
5511
34.0k
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5512
34.0k
                                        literal, URI, NULL);
5513
36.1k
                    }
5514
36.5k
    }
5515
38.6k
      }
5516
102k
  } else {
5517
73.3k
      if ((RAW == '"') || (RAW == '\'')) {
5518
51.9k
          value = xmlParseEntityValue(ctxt, &orig);
5519
51.9k
    if ((ctxt->sax != NULL) &&
5520
51.9k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5521
26.0k
        ctxt->sax->entityDecl(ctxt->userData, name,
5522
26.0k
        XML_INTERNAL_GENERAL_ENTITY,
5523
26.0k
        NULL, NULL, value);
5524
    /*
5525
     * For expat compatibility in SAX mode.
5526
     */
5527
51.9k
    if ((ctxt->myDoc == NULL) ||
5528
51.9k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5529
20.2k
        if (ctxt->myDoc == NULL) {
5530
15.1k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5531
15.1k
      if (ctxt->myDoc == NULL) {
5532
22
          xmlErrMemory(ctxt);
5533
22
          goto done;
5534
22
      }
5535
15.0k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5536
15.0k
        }
5537
20.2k
        if (ctxt->myDoc->intSubset == NULL) {
5538
15.0k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5539
15.0k
              BAD_CAST "fake", NULL, NULL);
5540
15.0k
                        if (ctxt->myDoc->intSubset == NULL) {
5541
5
                            xmlErrMemory(ctxt);
5542
5
                            goto done;
5543
5
                        }
5544
15.0k
                    }
5545
5546
20.2k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5547
20.2k
                    NULL, NULL, value);
5548
20.2k
    }
5549
51.9k
      } else {
5550
21.3k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5551
21.3k
    if ((URI == NULL) && (literal == NULL)) {
5552
4.87k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5553
4.87k
    }
5554
21.3k
    if (URI) {
5555
15.7k
                    if (xmlStrchr(URI, '#')) {
5556
314
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5557
314
                    }
5558
15.7k
    }
5559
21.3k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5560
5.97k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5561
5.97k
           "Space required before 'NDATA'\n");
5562
5.97k
    }
5563
21.3k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5564
1.28k
        SKIP(5);
5565
1.28k
        if (SKIP_BLANKS_PE == 0) {
5566
767
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5567
767
               "Space required after 'NDATA'\n");
5568
767
        }
5569
1.28k
        ndata = xmlParseName(ctxt);
5570
1.28k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5571
1.28k
            (ctxt->sax->unparsedEntityDecl != NULL))
5572
223
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5573
223
            literal, URI, ndata);
5574
20.0k
    } else {
5575
20.0k
        if ((ctxt->sax != NULL) &&
5576
20.0k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5577
12.8k
      ctxt->sax->entityDecl(ctxt->userData, name,
5578
12.8k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5579
12.8k
            literal, URI, NULL);
5580
        /*
5581
         * For expat compatibility in SAX mode.
5582
         * assuming the entity replacement was asked for
5583
         */
5584
20.0k
        if ((ctxt->replaceEntities != 0) &&
5585
20.0k
      ((ctxt->myDoc == NULL) ||
5586
20.0k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5587
2.38k
      if (ctxt->myDoc == NULL) {
5588
903
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5589
903
          if (ctxt->myDoc == NULL) {
5590
1
              xmlErrMemory(ctxt);
5591
1
        goto done;
5592
1
          }
5593
902
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5594
902
      }
5595
5596
2.38k
      if (ctxt->myDoc->intSubset == NULL) {
5597
902
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5598
902
            BAD_CAST "fake", NULL, NULL);
5599
902
                            if (ctxt->myDoc->intSubset == NULL) {
5600
1
                                xmlErrMemory(ctxt);
5601
1
                                goto done;
5602
1
                            }
5603
902
                        }
5604
2.38k
      xmlSAX2EntityDecl(ctxt, name,
5605
2.38k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5606
2.38k
                  literal, URI, NULL);
5607
2.38k
        }
5608
20.0k
    }
5609
21.3k
      }
5610
73.3k
  }
5611
176k
  SKIP_BLANKS_PE;
5612
176k
  if (RAW != '>') {
5613
21.7k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5614
21.7k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5615
21.7k
      xmlHaltParser(ctxt);
5616
154k
  } else {
5617
#ifdef LIBXML_VALID_ENABLED
5618
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5619
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5620
                           "Entity declaration doesn't start and stop in"
5621
                                 " the same entity\n",
5622
                                 NULL, NULL);
5623
      }
5624
#endif
5625
154k
      NEXT;
5626
154k
  }
5627
176k
  if (orig != NULL) {
5628
      /*
5629
       * Ugly mechanism to save the raw entity value.
5630
       */
5631
114k
      xmlEntityPtr cur = NULL;
5632
5633
114k
      if (isParameter) {
5634
63.4k
          if ((ctxt->sax != NULL) &&
5635
63.4k
        (ctxt->sax->getParameterEntity != NULL))
5636
63.4k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5637
63.4k
      } else {
5638
50.6k
          if ((ctxt->sax != NULL) &&
5639
50.6k
        (ctxt->sax->getEntity != NULL))
5640
50.6k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5641
50.6k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5642
5.14k
        cur = xmlSAX2GetEntity(ctxt, name);
5643
5.14k
    }
5644
50.6k
      }
5645
114k
            if ((cur != NULL) && (cur->orig == NULL)) {
5646
73.9k
    cur->orig = orig;
5647
73.9k
                orig = NULL;
5648
73.9k
      }
5649
114k
  }
5650
5651
176k
done:
5652
176k
  if (value != NULL) xmlFree(value);
5653
176k
  if (URI != NULL) xmlFree(URI);
5654
176k
  if (literal != NULL) xmlFree(literal);
5655
176k
        if (orig != NULL) xmlFree(orig);
5656
176k
    }
5657
184k
}
5658
5659
/**
5660
 * Parse an attribute default declaration
5661
 *
5662
 * @deprecated Internal function, don't use.
5663
 *
5664
 *     [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5665
 *
5666
 * [ VC: Required Attribute ]
5667
 * if the default declaration is the keyword \#REQUIRED, then the
5668
 * attribute must be specified for all elements of the type in the
5669
 * attribute-list declaration.
5670
 *
5671
 * [ VC: Attribute Default Legal ]
5672
 * The declared default value must meet the lexical constraints of
5673
 * the declared attribute type c.f. #xmlValidateAttributeDecl
5674
 *
5675
 * [ VC: Fixed Attribute Default ]
5676
 * if an attribute has a default value declared with the \#FIXED
5677
 * keyword, instances of that attribute must match the default value.
5678
 *
5679
 * [ WFC: No < in Attribute Values ]
5680
 * handled in #xmlParseAttValue
5681
 *
5682
 * @param ctxt  an XML parser context
5683
 * @param value  Receive a possible fixed default value for the attribute
5684
 * @returns XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5685
 *          or XML_ATTRIBUTE_FIXED.
5686
 */
5687
5688
int
5689
91.5k
xmlParseDefaultDecl(xmlParserCtxt *ctxt, xmlChar **value) {
5690
91.5k
    int val;
5691
91.5k
    xmlChar *ret;
5692
5693
91.5k
    *value = NULL;
5694
91.5k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5695
1.19k
  SKIP(9);
5696
1.19k
  return(XML_ATTRIBUTE_REQUIRED);
5697
1.19k
    }
5698
90.3k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5699
30.2k
  SKIP(8);
5700
30.2k
  return(XML_ATTRIBUTE_IMPLIED);
5701
30.2k
    }
5702
60.1k
    val = XML_ATTRIBUTE_NONE;
5703
60.1k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5704
8.29k
  SKIP(6);
5705
8.29k
  val = XML_ATTRIBUTE_FIXED;
5706
8.29k
  if (SKIP_BLANKS_PE == 0) {
5707
251
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5708
251
         "Space required after '#FIXED'\n");
5709
251
  }
5710
8.29k
    }
5711
60.1k
    ret = xmlParseAttValue(ctxt);
5712
60.1k
    if (ret == NULL) {
5713
7.26k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5714
7.26k
           "Attribute default value declaration error\n");
5715
7.26k
    } else
5716
52.8k
        *value = ret;
5717
60.1k
    return(val);
5718
90.3k
}
5719
5720
/**
5721
 * Parse an Notation attribute type.
5722
 *
5723
 * @deprecated Internal function, don't use.
5724
 *
5725
 * Note: the leading 'NOTATION' S part has already being parsed...
5726
 *
5727
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5728
 *
5729
 * [ VC: Notation Attributes ]
5730
 * Values of this type must match one of the notation names included
5731
 * in the declaration; all notation names in the declaration must be declared.
5732
 *
5733
 * @param ctxt  an XML parser context
5734
 * @returns the notation attribute tree built while parsing
5735
 */
5736
5737
xmlEnumeration *
5738
928
xmlParseNotationType(xmlParserCtxt *ctxt) {
5739
928
    const xmlChar *name;
5740
928
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5741
5742
928
    if (RAW != '(') {
5743
212
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5744
212
  return(NULL);
5745
212
    }
5746
1.08k
    do {
5747
1.08k
        NEXT;
5748
1.08k
  SKIP_BLANKS_PE;
5749
1.08k
        name = xmlParseName(ctxt);
5750
1.08k
  if (name == NULL) {
5751
229
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5752
229
         "Name expected in NOTATION declaration\n");
5753
229
            xmlFreeEnumeration(ret);
5754
229
      return(NULL);
5755
229
  }
5756
851
        tmp = NULL;
5757
#ifdef LIBXML_VALID_ENABLED
5758
        if (ctxt->validate) {
5759
            tmp = ret;
5760
            while (tmp != NULL) {
5761
                if (xmlStrEqual(name, tmp->name)) {
5762
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5763
              "standalone: attribute notation value token %s duplicated\n",
5764
                                     name, NULL);
5765
                    if (!xmlDictOwns(ctxt->dict, name))
5766
                        xmlFree((xmlChar *) name);
5767
                    break;
5768
                }
5769
                tmp = tmp->next;
5770
            }
5771
        }
5772
#endif /* LIBXML_VALID_ENABLED */
5773
851
  if (tmp == NULL) {
5774
851
      cur = xmlCreateEnumeration(name);
5775
851
      if (cur == NULL) {
5776
1
                xmlErrMemory(ctxt);
5777
1
                xmlFreeEnumeration(ret);
5778
1
                return(NULL);
5779
1
            }
5780
850
      if (last == NULL) ret = last = cur;
5781
358
      else {
5782
358
    last->next = cur;
5783
358
    last = cur;
5784
358
      }
5785
850
  }
5786
850
  SKIP_BLANKS_PE;
5787
850
    } while (RAW == '|');
5788
486
    if (RAW != ')') {
5789
253
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5790
253
        xmlFreeEnumeration(ret);
5791
253
  return(NULL);
5792
253
    }
5793
233
    NEXT;
5794
233
    return(ret);
5795
486
}
5796
5797
/**
5798
 * Parse an Enumeration attribute type.
5799
 *
5800
 * @deprecated Internal function, don't use.
5801
 *
5802
 *     [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5803
 *
5804
 * [ VC: Enumeration ]
5805
 * Values of this type must match one of the Nmtoken tokens in
5806
 * the declaration
5807
 *
5808
 * @param ctxt  an XML parser context
5809
 * @returns the enumeration attribute tree built while parsing
5810
 */
5811
5812
xmlEnumeration *
5813
13.8k
xmlParseEnumerationType(xmlParserCtxt *ctxt) {
5814
13.8k
    xmlChar *name;
5815
13.8k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5816
5817
13.8k
    if (RAW != '(') {
5818
8.19k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5819
8.19k
  return(NULL);
5820
8.19k
    }
5821
20.9k
    do {
5822
20.9k
        NEXT;
5823
20.9k
  SKIP_BLANKS_PE;
5824
20.9k
        name = xmlParseNmtoken(ctxt);
5825
20.9k
  if (name == NULL) {
5826
307
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5827
307
      return(ret);
5828
307
  }
5829
20.6k
        tmp = NULL;
5830
#ifdef LIBXML_VALID_ENABLED
5831
        if (ctxt->validate) {
5832
            tmp = ret;
5833
            while (tmp != NULL) {
5834
                if (xmlStrEqual(name, tmp->name)) {
5835
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5836
              "standalone: attribute enumeration value token %s duplicated\n",
5837
                                     name, NULL);
5838
                    if (!xmlDictOwns(ctxt->dict, name))
5839
                        xmlFree(name);
5840
                    break;
5841
                }
5842
                tmp = tmp->next;
5843
            }
5844
        }
5845
#endif /* LIBXML_VALID_ENABLED */
5846
20.6k
  if (tmp == NULL) {
5847
20.6k
      cur = xmlCreateEnumeration(name);
5848
20.6k
      if (!xmlDictOwns(ctxt->dict, name))
5849
20.6k
    xmlFree(name);
5850
20.6k
      if (cur == NULL) {
5851
5
                xmlErrMemory(ctxt);
5852
5
                xmlFreeEnumeration(ret);
5853
5
                return(NULL);
5854
5
            }
5855
20.6k
      if (last == NULL) ret = last = cur;
5856
15.2k
      else {
5857
15.2k
    last->next = cur;
5858
15.2k
    last = cur;
5859
15.2k
      }
5860
20.6k
  }
5861
20.6k
  SKIP_BLANKS_PE;
5862
20.6k
    } while (RAW == '|');
5863
5.33k
    if (RAW != ')') {
5864
698
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5865
698
  return(ret);
5866
698
    }
5867
4.63k
    NEXT;
5868
4.63k
    return(ret);
5869
5.33k
}
5870
5871
/**
5872
 * Parse an Enumerated attribute type.
5873
 *
5874
 * @deprecated Internal function, don't use.
5875
 *
5876
 *     [57] EnumeratedType ::= NotationType | Enumeration
5877
 *
5878
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5879
 *
5880
 * @param ctxt  an XML parser context
5881
 * @param tree  the enumeration tree built while parsing
5882
 * @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5883
 */
5884
5885
int
5886
15.0k
xmlParseEnumeratedType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5887
15.0k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5888
1.16k
  SKIP(8);
5889
1.16k
  if (SKIP_BLANKS_PE == 0) {
5890
235
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5891
235
         "Space required after 'NOTATION'\n");
5892
235
      return(0);
5893
235
  }
5894
928
  *tree = xmlParseNotationType(ctxt);
5895
928
  if (*tree == NULL) return(0);
5896
233
  return(XML_ATTRIBUTE_NOTATION);
5897
928
    }
5898
13.8k
    *tree = xmlParseEnumerationType(ctxt);
5899
13.8k
    if (*tree == NULL) return(0);
5900
5.41k
    return(XML_ATTRIBUTE_ENUMERATION);
5901
13.8k
}
5902
5903
/**
5904
 * Parse the Attribute list def for an element
5905
 *
5906
 * @deprecated Internal function, don't use.
5907
 *
5908
 *     [54] AttType ::= StringType | TokenizedType | EnumeratedType
5909
 *
5910
 *     [55] StringType ::= 'CDATA'
5911
 *
5912
 *     [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5913
 *                            'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5914
 *
5915
 * Validity constraints for attribute values syntax are checked in
5916
 * #xmlValidateAttributeValue
5917
 *
5918
 * [ VC: ID ]
5919
 * Values of type ID must match the Name production. A name must not
5920
 * appear more than once in an XML document as a value of this type;
5921
 * i.e., ID values must uniquely identify the elements which bear them.
5922
 *
5923
 * [ VC: One ID per Element Type ]
5924
 * No element type may have more than one ID attribute specified.
5925
 *
5926
 * [ VC: ID Attribute Default ]
5927
 * An ID attribute must have a declared default of \#IMPLIED or \#REQUIRED.
5928
 *
5929
 * [ VC: IDREF ]
5930
 * Values of type IDREF must match the Name production, and values
5931
 * of type IDREFS must match Names; each IDREF Name must match the value
5932
 * of an ID attribute on some element in the XML document; i.e. IDREF
5933
 * values must match the value of some ID attribute.
5934
 *
5935
 * [ VC: Entity Name ]
5936
 * Values of type ENTITY must match the Name production, values
5937
 * of type ENTITIES must match Names; each Entity Name must match the
5938
 * name of an unparsed entity declared in the DTD.
5939
 *
5940
 * [ VC: Name Token ]
5941
 * Values of type NMTOKEN must match the Nmtoken production; values
5942
 * of type NMTOKENS must match Nmtokens.
5943
 *
5944
 * @param ctxt  an XML parser context
5945
 * @param tree  the enumeration tree built while parsing
5946
 * @returns the attribute type
5947
 */
5948
int
5949
106k
xmlParseAttributeType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5950
106k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5951
27.7k
  SKIP(5);
5952
27.7k
  return(XML_ATTRIBUTE_CDATA);
5953
78.8k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5954
598
  SKIP(6);
5955
598
  return(XML_ATTRIBUTE_IDREFS);
5956
78.2k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5957
1.75k
  SKIP(5);
5958
1.75k
  return(XML_ATTRIBUTE_IDREF);
5959
76.4k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5960
48.0k
        SKIP(2);
5961
48.0k
  return(XML_ATTRIBUTE_ID);
5962
48.0k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5963
450
  SKIP(6);
5964
450
  return(XML_ATTRIBUTE_ENTITY);
5965
27.9k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5966
2.06k
  SKIP(8);
5967
2.06k
  return(XML_ATTRIBUTE_ENTITIES);
5968
25.8k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5969
7.46k
  SKIP(8);
5970
7.46k
  return(XML_ATTRIBUTE_NMTOKENS);
5971
18.3k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5972
3.39k
  SKIP(7);
5973
3.39k
  return(XML_ATTRIBUTE_NMTOKEN);
5974
3.39k
     }
5975
15.0k
     return(xmlParseEnumeratedType(ctxt, tree));
5976
106k
}
5977
5978
/**
5979
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5980
 *
5981
 * @deprecated Internal function, don't use.
5982
 *
5983
 *     [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5984
 *
5985
 *     [53] AttDef ::= S Name S AttType S DefaultDecl
5986
 * @param ctxt  an XML parser context
5987
 */
5988
void
5989
61.0k
xmlParseAttributeListDecl(xmlParserCtxt *ctxt) {
5990
61.0k
    const xmlChar *elemName;
5991
61.0k
    const xmlChar *attrName;
5992
61.0k
    xmlEnumerationPtr tree;
5993
5994
61.0k
    if ((CUR != '<') || (NXT(1) != '!'))
5995
0
        return;
5996
61.0k
    SKIP(2);
5997
5998
61.0k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5999
#ifdef LIBXML_VALID_ENABLED
6000
  int oldInputNr = ctxt->inputNr;
6001
#endif
6002
6003
55.8k
  SKIP(7);
6004
55.8k
  if (SKIP_BLANKS_PE == 0) {
6005
942
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6006
942
                     "Space required after '<!ATTLIST'\n");
6007
942
  }
6008
55.8k
        elemName = xmlParseName(ctxt);
6009
55.8k
  if (elemName == NULL) {
6010
359
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6011
359
         "ATTLIST: no name for Element\n");
6012
359
      return;
6013
359
  }
6014
55.4k
  SKIP_BLANKS_PE;
6015
55.4k
  GROW;
6016
138k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6017
110k
      int type;
6018
110k
      int def;
6019
110k
      xmlChar *defaultValue = NULL;
6020
6021
110k
      GROW;
6022
110k
            tree = NULL;
6023
110k
      attrName = xmlParseName(ctxt);
6024
110k
      if (attrName == NULL) {
6025
1.03k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6026
1.03k
             "ATTLIST: no name for Attribute\n");
6027
1.03k
    break;
6028
1.03k
      }
6029
109k
      GROW;
6030
109k
      if (SKIP_BLANKS_PE == 0) {
6031
2.67k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6032
2.67k
            "Space required after the attribute name\n");
6033
2.67k
    break;
6034
2.67k
      }
6035
6036
106k
      type = xmlParseAttributeType(ctxt, &tree);
6037
106k
      if (type <= 0) {
6038
9.35k
          break;
6039
9.35k
      }
6040
6041
97.1k
      GROW;
6042
97.1k
      if (SKIP_BLANKS_PE == 0) {
6043
5.62k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6044
5.62k
             "Space required after the attribute type\n");
6045
5.62k
          if (tree != NULL)
6046
794
        xmlFreeEnumeration(tree);
6047
5.62k
    break;
6048
5.62k
      }
6049
6050
91.5k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6051
91.5k
      if (def <= 0) {
6052
0
                if (defaultValue != NULL)
6053
0
        xmlFree(defaultValue);
6054
0
          if (tree != NULL)
6055
0
        xmlFreeEnumeration(tree);
6056
0
          break;
6057
0
      }
6058
91.5k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6059
41.2k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6060
6061
91.5k
      GROW;
6062
91.5k
            if (RAW != '>') {
6063
80.9k
    if (SKIP_BLANKS_PE == 0) {
6064
8.43k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6065
8.43k
      "Space required after the attribute default value\n");
6066
8.43k
        if (defaultValue != NULL)
6067
920
      xmlFree(defaultValue);
6068
8.43k
        if (tree != NULL)
6069
594
      xmlFreeEnumeration(tree);
6070
8.43k
        break;
6071
8.43k
    }
6072
80.9k
      }
6073
83.1k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6074
83.1k
    (ctxt->sax->attributeDecl != NULL))
6075
52.3k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6076
52.3k
                          type, def, defaultValue, tree);
6077
30.8k
      else if (tree != NULL)
6078
1.18k
    xmlFreeEnumeration(tree);
6079
6080
83.1k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6081
83.1k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6082
83.1k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6083
51.9k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6084
51.9k
      }
6085
83.1k
      if (ctxt->sax2) {
6086
83.1k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6087
83.1k
      }
6088
83.1k
      if (defaultValue != NULL)
6089
51.9k
          xmlFree(defaultValue);
6090
83.1k
      GROW;
6091
83.1k
  }
6092
55.4k
  if (RAW == '>') {
6093
#ifdef LIBXML_VALID_ENABLED
6094
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6095
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6096
                                 "Attribute list declaration doesn't start and"
6097
                                 " stop in the same entity\n",
6098
                                 NULL, NULL);
6099
      }
6100
#endif
6101
29.3k
      NEXT;
6102
29.3k
  }
6103
55.4k
    }
6104
61.0k
}
6105
6106
/**
6107
 * Handle PEs and check that we don't pop the entity that started
6108
 * a balanced group.
6109
 *
6110
 * @param ctxt  parser context
6111
 * @param openInputNr  input nr of the entity with opening '('
6112
 */
6113
static void
6114
304k
xmlSkipBlankCharsPEBalanced(xmlParserCtxt *ctxt, int openInputNr) {
6115
304k
    SKIP_BLANKS;
6116
304k
    GROW;
6117
6118
304k
    (void) openInputNr;
6119
6120
304k
    if (!PARSER_EXTERNAL(ctxt) && !PARSER_IN_PE(ctxt))
6121
223k
        return;
6122
6123
81.9k
    while (!PARSER_STOPPED(ctxt)) {
6124
81.0k
        if (ctxt->input->cur >= ctxt->input->end) {
6125
#ifdef LIBXML_VALID_ENABLED
6126
            if ((ctxt->validate) && (ctxt->inputNr <= openInputNr)) {
6127
                xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6128
                                 "Element content declaration doesn't start "
6129
                                 "and stop in the same entity\n",
6130
                                 NULL, NULL);
6131
            }
6132
#endif
6133
592
            if (PARSER_IN_PE(ctxt))
6134
285
                xmlPopPE(ctxt);
6135
307
            else
6136
307
                break;
6137
80.4k
        } else if (RAW == '%') {
6138
1.13k
            xmlParsePERefInternal(ctxt, 0);
6139
79.3k
        } else {
6140
79.3k
            break;
6141
79.3k
        }
6142
6143
1.41k
        SKIP_BLANKS;
6144
1.41k
        GROW;
6145
1.41k
    }
6146
80.4k
}
6147
6148
/**
6149
 * Parse the declaration for a Mixed Element content
6150
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6151
 *
6152
 * @deprecated Internal function, don't use.
6153
 *
6154
 *     [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6155
 *                    '(' S? '#PCDATA' S? ')'
6156
 *
6157
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6158
 *
6159
 * [ VC: No Duplicate Types ]
6160
 * The same name must not appear more than once in a single
6161
 * mixed-content declaration.
6162
 *
6163
 * @param ctxt  an XML parser context
6164
 * @param openInputNr  the input used for the current entity, needed for
6165
 * boundary checks
6166
 * @returns the list of the xmlElementContent describing the element choices
6167
 */
6168
xmlElementContent *
6169
11.1k
xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int openInputNr) {
6170
11.1k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6171
11.1k
    const xmlChar *elem = NULL;
6172
6173
11.1k
    GROW;
6174
11.1k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6175
11.1k
  SKIP(7);
6176
11.1k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6177
11.1k
  if (RAW == ')') {
6178
#ifdef LIBXML_VALID_ENABLED
6179
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6180
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6181
                                 "Element content declaration doesn't start "
6182
                                 "and stop in the same entity\n",
6183
                                 NULL, NULL);
6184
      }
6185
#endif
6186
8.06k
      NEXT;
6187
8.06k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6188
8.06k
      if (ret == NULL)
6189
3
                goto mem_error;
6190
8.06k
      if (RAW == '*') {
6191
204
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6192
204
    NEXT;
6193
204
      }
6194
8.06k
      return(ret);
6195
8.06k
  }
6196
3.12k
  if ((RAW == '(') || (RAW == '|')) {
6197
2.51k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6198
2.51k
      if (ret == NULL)
6199
2
                goto mem_error;
6200
2.51k
  }
6201
5.43k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6202
2.62k
      NEXT;
6203
2.62k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6204
2.62k
            if (n == NULL)
6205
1
                goto mem_error;
6206
2.62k
      if (elem == NULL) {
6207
2.28k
    n->c1 = cur;
6208
2.28k
    if (cur != NULL)
6209
2.28k
        cur->parent = n;
6210
2.28k
    ret = cur = n;
6211
2.28k
      } else {
6212
337
          cur->c2 = n;
6213
337
    n->parent = cur;
6214
337
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6215
337
                if (n->c1 == NULL)
6216
1
                    goto mem_error;
6217
336
    n->c1->parent = n;
6218
336
    cur = n;
6219
336
      }
6220
2.61k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6221
2.61k
      elem = xmlParseName(ctxt);
6222
2.61k
      if (elem == NULL) {
6223
304
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6224
304
      "xmlParseElementMixedContentDecl : Name expected\n");
6225
304
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6226
304
    return(NULL);
6227
304
      }
6228
2.31k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6229
2.31k
  }
6230
2.81k
  if ((RAW == ')') && (NXT(1) == '*')) {
6231
1.36k
      if (elem != NULL) {
6232
1.36k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6233
1.36k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6234
1.36k
    if (cur->c2 == NULL)
6235
1
                    goto mem_error;
6236
1.36k
    cur->c2->parent = cur;
6237
1.36k
            }
6238
1.36k
            if (ret != NULL)
6239
1.36k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6240
#ifdef LIBXML_VALID_ENABLED
6241
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6242
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6243
                                 "Element content declaration doesn't start "
6244
                                 "and stop in the same entity\n",
6245
                                 NULL, NULL);
6246
      }
6247
#endif
6248
1.36k
      SKIP(2);
6249
1.44k
  } else {
6250
1.44k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6251
1.44k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6252
1.44k
      return(NULL);
6253
1.44k
  }
6254
6255
2.81k
    } else {
6256
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6257
0
    }
6258
1.36k
    return(ret);
6259
6260
8
mem_error:
6261
8
    xmlErrMemory(ctxt);
6262
8
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6263
8
    return(NULL);
6264
11.1k
}
6265
6266
/**
6267
 * Parse the declaration for a Mixed Element content
6268
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6269
 *
6270
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6271
 *
6272
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6273
 *
6274
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6275
 *
6276
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6277
 *
6278
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6279
 * TODO Parameter-entity replacement text must be properly nested
6280
 *  with parenthesized groups. That is to say, if either of the
6281
 *  opening or closing parentheses in a choice, seq, or Mixed
6282
 *  construct is contained in the replacement text for a parameter
6283
 *  entity, both must be contained in the same replacement text. For
6284
 *  interoperability, if a parameter-entity reference appears in a
6285
 *  choice, seq, or Mixed construct, its replacement text should not
6286
 *  be empty, and neither the first nor last non-blank character of
6287
 *  the replacement text should be a connector (| or ,).
6288
 *
6289
 * @param ctxt  an XML parser context
6290
 * @param openInputNr  the input used for the current entity, needed for
6291
 * boundary checks
6292
 * @param depth  the level of recursion
6293
 * @returns the tree of xmlElementContent describing the element
6294
 *          hierarchy.
6295
 */
6296
static xmlElementContentPtr
6297
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int openInputNr,
6298
110k
                                       int depth) {
6299
110k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6300
110k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6301
110k
    const xmlChar *elem;
6302
110k
    xmlChar type = 0;
6303
6304
110k
    if (depth > maxDepth) {
6305
229
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6306
229
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6307
229
                "use XML_PARSE_HUGE\n", depth);
6308
229
  return(NULL);
6309
229
    }
6310
110k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6311
110k
    if (RAW == '(') {
6312
70.1k
        int newInputNr = ctxt->inputNr;
6313
6314
        /* Recurse on first child */
6315
70.1k
  NEXT;
6316
70.1k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6317
70.1k
                                                           depth + 1);
6318
70.1k
        if (cur == NULL)
6319
63.4k
            return(NULL);
6320
70.1k
    } else {
6321
40.2k
  elem = xmlParseName(ctxt);
6322
40.2k
  if (elem == NULL) {
6323
2.43k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6324
2.43k
      return(NULL);
6325
2.43k
  }
6326
37.8k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6327
37.8k
  if (cur == NULL) {
6328
6
      xmlErrMemory(ctxt);
6329
6
      return(NULL);
6330
6
  }
6331
37.8k
  GROW;
6332
37.8k
  if (RAW == '?') {
6333
3.51k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6334
3.51k
      NEXT;
6335
34.3k
  } else if (RAW == '*') {
6336
2.85k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6337
2.85k
      NEXT;
6338
31.4k
  } else if (RAW == '+') {
6339
561
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6340
561
      NEXT;
6341
30.8k
  } else {
6342
30.8k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6343
30.8k
  }
6344
37.8k
  GROW;
6345
37.8k
    }
6346
87.7k
    while (!PARSER_STOPPED(ctxt)) {
6347
86.8k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6348
86.8k
        if (RAW == ')')
6349
33.8k
            break;
6350
        /*
6351
   * Each loop we parse one separator and one element.
6352
   */
6353
53.0k
        if (RAW == ',') {
6354
31.6k
      if (type == 0) type = CUR;
6355
6356
      /*
6357
       * Detect "Name | Name , Name" error
6358
       */
6359
20.7k
      else if (type != CUR) {
6360
222
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6361
222
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6362
222
                      type);
6363
222
    if ((last != NULL) && (last != ret))
6364
222
        xmlFreeDocElementContent(ctxt->myDoc, last);
6365
222
    if (ret != NULL)
6366
222
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6367
222
    return(NULL);
6368
222
      }
6369
31.4k
      NEXT;
6370
6371
31.4k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6372
31.4k
      if (op == NULL) {
6373
4
                xmlErrMemory(ctxt);
6374
4
    if ((last != NULL) && (last != ret))
6375
3
        xmlFreeDocElementContent(ctxt->myDoc, last);
6376
4
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6377
4
    return(NULL);
6378
4
      }
6379
31.4k
      if (last == NULL) {
6380
10.9k
    op->c1 = ret;
6381
10.9k
    if (ret != NULL)
6382
10.9k
        ret->parent = op;
6383
10.9k
    ret = cur = op;
6384
20.5k
      } else {
6385
20.5k
          cur->c2 = op;
6386
20.5k
    if (op != NULL)
6387
20.5k
        op->parent = cur;
6388
20.5k
    op->c1 = last;
6389
20.5k
    if (last != NULL)
6390
20.5k
        last->parent = op;
6391
20.5k
    cur =op;
6392
20.5k
    last = NULL;
6393
20.5k
      }
6394
31.4k
  } else if (RAW == '|') {
6395
17.4k
      if (type == 0) type = CUR;
6396
6397
      /*
6398
       * Detect "Name , Name | Name" error
6399
       */
6400
4.23k
      else if (type != CUR) {
6401
234
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6402
234
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6403
234
          type);
6404
234
    if ((last != NULL) && (last != ret))
6405
234
        xmlFreeDocElementContent(ctxt->myDoc, last);
6406
234
    if (ret != NULL)
6407
234
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6408
234
    return(NULL);
6409
234
      }
6410
17.2k
      NEXT;
6411
6412
17.2k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6413
17.2k
      if (op == NULL) {
6414
3
                xmlErrMemory(ctxt);
6415
3
    if ((last != NULL) && (last != ret))
6416
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6417
3
    if (ret != NULL)
6418
3
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
3
    return(NULL);
6420
3
      }
6421
17.2k
      if (last == NULL) {
6422
13.2k
    op->c1 = ret;
6423
13.2k
    if (ret != NULL)
6424
13.2k
        ret->parent = op;
6425
13.2k
    ret = cur = op;
6426
13.2k
      } else {
6427
4.00k
          cur->c2 = op;
6428
4.00k
    if (op != NULL)
6429
4.00k
        op->parent = cur;
6430
4.00k
    op->c1 = last;
6431
4.00k
    if (last != NULL)
6432
4.00k
        last->parent = op;
6433
4.00k
    cur =op;
6434
4.00k
    last = NULL;
6435
4.00k
      }
6436
17.2k
  } else {
6437
3.90k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6438
3.90k
      if ((last != NULL) && (last != ret))
6439
1.09k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6440
3.90k
      if (ret != NULL)
6441
3.90k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
3.90k
      return(NULL);
6443
3.90k
  }
6444
48.7k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6445
48.7k
        if (RAW == '(') {
6446
9.73k
            int newInputNr = ctxt->inputNr;
6447
6448
      /* Recurse on second child */
6449
9.73k
      NEXT;
6450
9.73k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6451
9.73k
                                                          depth + 1);
6452
9.73k
            if (last == NULL) {
6453
4.81k
    if (ret != NULL)
6454
4.81k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6455
4.81k
    return(NULL);
6456
4.81k
            }
6457
38.9k
  } else {
6458
38.9k
      elem = xmlParseName(ctxt);
6459
38.9k
      if (elem == NULL) {
6460
729
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6461
729
    if (ret != NULL)
6462
729
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6463
729
    return(NULL);
6464
729
      }
6465
38.2k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6466
38.2k
      if (last == NULL) {
6467
4
                xmlErrMemory(ctxt);
6468
4
    if (ret != NULL)
6469
4
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6470
4
    return(NULL);
6471
4
      }
6472
38.2k
      if (RAW == '?') {
6473
14.0k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6474
14.0k
    NEXT;
6475
24.1k
      } else if (RAW == '*') {
6476
8.69k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6477
8.69k
    NEXT;
6478
15.4k
      } else if (RAW == '+') {
6479
447
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6480
447
    NEXT;
6481
15.0k
      } else {
6482
15.0k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6483
15.0k
      }
6484
38.2k
  }
6485
48.7k
    }
6486
34.6k
    if ((cur != NULL) && (last != NULL)) {
6487
17.0k
        cur->c2 = last;
6488
17.0k
  if (last != NULL)
6489
17.0k
      last->parent = cur;
6490
17.0k
    }
6491
#ifdef LIBXML_VALID_ENABLED
6492
    if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6493
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6494
                         "Element content declaration doesn't start "
6495
                         "and stop in the same entity\n",
6496
                         NULL, NULL);
6497
    }
6498
#endif
6499
34.6k
    NEXT;
6500
34.6k
    if (RAW == '?') {
6501
1.50k
  if (ret != NULL) {
6502
1.50k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6503
1.50k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6504
524
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6505
979
      else
6506
979
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6507
1.50k
  }
6508
1.50k
  NEXT;
6509
33.1k
    } else if (RAW == '*') {
6510
6.84k
  if (ret != NULL) {
6511
6.84k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6512
6.84k
      cur = ret;
6513
      /*
6514
       * Some normalization:
6515
       * (a | b* | c?)* == (a | b | c)*
6516
       */
6517
14.4k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6518
7.56k
    if ((cur->c1 != NULL) &&
6519
7.56k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6520
7.56k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6521
435
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6522
7.56k
    if ((cur->c2 != NULL) &&
6523
7.56k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6524
7.56k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6525
1.17k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6526
7.56k
    cur = cur->c2;
6527
7.56k
      }
6528
6.84k
  }
6529
6.84k
  NEXT;
6530
26.3k
    } else if (RAW == '+') {
6531
1.95k
  if (ret != NULL) {
6532
1.95k
      int found = 0;
6533
6534
1.95k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6535
1.95k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6536
495
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6537
1.46k
      else
6538
1.46k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6539
      /*
6540
       * Some normalization:
6541
       * (a | b*)+ == (a | b)*
6542
       * (a | b?)+ == (a | b)*
6543
       */
6544
5.21k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6545
3.25k
    if ((cur->c1 != NULL) &&
6546
3.25k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6547
3.25k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6548
957
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6549
957
        found = 1;
6550
957
    }
6551
3.25k
    if ((cur->c2 != NULL) &&
6552
3.25k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6553
3.25k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6554
777
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6555
777
        found = 1;
6556
777
    }
6557
3.25k
    cur = cur->c2;
6558
3.25k
      }
6559
1.95k
      if (found)
6560
1.32k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
1.95k
  }
6562
1.95k
  NEXT;
6563
1.95k
    }
6564
34.6k
    return(ret);
6565
44.5k
}
6566
6567
/**
6568
 * Parse the declaration for a Mixed Element content
6569
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6570
 *
6571
 * @deprecated Internal function, don't use.
6572
 *
6573
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6574
 *
6575
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6576
 *
6577
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6578
 *
6579
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6580
 *
6581
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6582
 * TODO Parameter-entity replacement text must be properly nested
6583
 *  with parenthesized groups. That is to say, if either of the
6584
 *  opening or closing parentheses in a choice, seq, or Mixed
6585
 *  construct is contained in the replacement text for a parameter
6586
 *  entity, both must be contained in the same replacement text. For
6587
 *  interoperability, if a parameter-entity reference appears in a
6588
 *  choice, seq, or Mixed construct, its replacement text should not
6589
 *  be empty, and neither the first nor last non-blank character of
6590
 *  the replacement text should be a connector (| or ,).
6591
 *
6592
 * @param ctxt  an XML parser context
6593
 * @param inputchk  the input used for the current entity, needed for boundary checks
6594
 * @returns the tree of xmlElementContent describing the element
6595
 *          hierarchy.
6596
 */
6597
xmlElementContent *
6598
0
xmlParseElementChildrenContentDecl(xmlParserCtxt *ctxt, int inputchk) {
6599
    /* stub left for API/ABI compat */
6600
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6601
0
}
6602
6603
/**
6604
 * Parse the declaration for an Element content either Mixed or Children,
6605
 * the cases EMPTY and ANY are handled directly in #xmlParseElementDecl
6606
 *
6607
 * @deprecated Internal function, don't use.
6608
 *
6609
 *     [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6610
 *
6611
 * @param ctxt  an XML parser context
6612
 * @param name  the name of the element being defined.
6613
 * @param result  the Element Content pointer will be stored here if any
6614
 * @returns an xmlElementTypeVal value or -1 on error
6615
 */
6616
6617
int
6618
xmlParseElementContentDecl(xmlParserCtxt *ctxt, const xmlChar *name,
6619
41.9k
                           xmlElementContent **result) {
6620
6621
41.9k
    xmlElementContentPtr tree = NULL;
6622
41.9k
    int openInputNr = ctxt->inputNr;
6623
41.9k
    int res;
6624
6625
41.9k
    *result = NULL;
6626
6627
41.9k
    if (RAW != '(') {
6628
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6629
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6630
0
  return(-1);
6631
0
    }
6632
41.9k
    NEXT;
6633
41.9k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6634
41.9k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6635
11.1k
        tree = xmlParseElementMixedContentDecl(ctxt, openInputNr);
6636
11.1k
  res = XML_ELEMENT_TYPE_MIXED;
6637
30.7k
    } else {
6638
30.7k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, openInputNr, 1);
6639
30.7k
  res = XML_ELEMENT_TYPE_ELEMENT;
6640
30.7k
    }
6641
41.9k
    if (tree == NULL)
6642
9.52k
        return(-1);
6643
32.4k
    SKIP_BLANKS_PE;
6644
32.4k
    *result = tree;
6645
32.4k
    return(res);
6646
41.9k
}
6647
6648
/**
6649
 * Parse an element declaration. Always consumes '<!'.
6650
 *
6651
 * @deprecated Internal function, don't use.
6652
 *
6653
 *     [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6654
 *
6655
 * [ VC: Unique Element Type Declaration ]
6656
 * No element type may be declared more than once
6657
 *
6658
 * @param ctxt  an XML parser context
6659
 * @returns the type of the element, or -1 in case of error
6660
 */
6661
int
6662
65.2k
xmlParseElementDecl(xmlParserCtxt *ctxt) {
6663
65.2k
    const xmlChar *name;
6664
65.2k
    int ret = -1;
6665
65.2k
    xmlElementContentPtr content  = NULL;
6666
6667
65.2k
    if ((CUR != '<') || (NXT(1) != '!'))
6668
0
        return(ret);
6669
65.2k
    SKIP(2);
6670
6671
    /* GROW; done in the caller */
6672
65.2k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6673
#ifdef LIBXML_VALID_ENABLED
6674
  int oldInputNr = ctxt->inputNr;
6675
#endif
6676
6677
62.1k
  SKIP(7);
6678
62.1k
  if (SKIP_BLANKS_PE == 0) {
6679
2.20k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6680
2.20k
               "Space required after 'ELEMENT'\n");
6681
2.20k
      return(-1);
6682
2.20k
  }
6683
59.9k
        name = xmlParseName(ctxt);
6684
59.9k
  if (name == NULL) {
6685
272
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6686
272
         "xmlParseElementDecl: no name for Element\n");
6687
272
      return(-1);
6688
272
  }
6689
59.6k
  if (SKIP_BLANKS_PE == 0) {
6690
2.01k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6691
2.01k
         "Space required after the element name\n");
6692
2.01k
  }
6693
59.6k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6694
11.9k
      SKIP(5);
6695
      /*
6696
       * Element must always be empty.
6697
       */
6698
11.9k
      ret = XML_ELEMENT_TYPE_EMPTY;
6699
47.7k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6700
47.7k
             (NXT(2) == 'Y')) {
6701
2.02k
      SKIP(3);
6702
      /*
6703
       * Element is a generic container.
6704
       */
6705
2.02k
      ret = XML_ELEMENT_TYPE_ANY;
6706
45.7k
  } else if (RAW == '(') {
6707
41.9k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6708
41.9k
            if (ret <= 0)
6709
9.52k
                return(-1);
6710
41.9k
  } else {
6711
      /*
6712
       * [ WFC: PEs in Internal Subset ] error handling.
6713
       */
6714
3.81k
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6715
3.81k
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6716
3.81k
      return(-1);
6717
3.81k
  }
6718
6719
46.3k
  SKIP_BLANKS_PE;
6720
6721
46.3k
  if (RAW != '>') {
6722
2.89k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6723
2.89k
      if (content != NULL) {
6724
1.95k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6725
1.95k
      }
6726
43.4k
  } else {
6727
#ifdef LIBXML_VALID_ENABLED
6728
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6729
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6730
                                 "Element declaration doesn't start and stop in"
6731
                                 " the same entity\n",
6732
                                 NULL, NULL);
6733
      }
6734
#endif
6735
6736
43.4k
      NEXT;
6737
43.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6738
43.4k
    (ctxt->sax->elementDecl != NULL)) {
6739
36.6k
    if (content != NULL)
6740
25.5k
        content->parent = NULL;
6741
36.6k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6742
36.6k
                           content);
6743
36.6k
    if ((content != NULL) && (content->parent == NULL)) {
6744
        /*
6745
         * this is a trick: if xmlAddElementDecl is called,
6746
         * instead of copying the full tree it is plugged directly
6747
         * if called from the parser. Avoid duplicating the
6748
         * interfaces or change the API/ABI
6749
         */
6750
760
        xmlFreeDocElementContent(ctxt->myDoc, content);
6751
760
    }
6752
36.6k
      } else if (content != NULL) {
6753
4.87k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6754
4.87k
      }
6755
43.4k
  }
6756
46.3k
    }
6757
49.4k
    return(ret);
6758
65.2k
}
6759
6760
/**
6761
 * Parse a conditional section. Always consumes '<!['.
6762
 *
6763
 *     [61] conditionalSect ::= includeSect | ignoreSect
6764
 *     [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6765
 *     [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6766
 *     [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>'
6767
 *                                 Ignore)*
6768
 *     [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6769
 * @param ctxt  an XML parser context
6770
 */
6771
6772
static void
6773
8.40k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6774
8.40k
    size_t depth = 0;
6775
8.40k
    int isFreshPE = 0;
6776
8.40k
    int oldInputNr = ctxt->inputNr;
6777
8.40k
    int declInputNr = ctxt->inputNr;
6778
6779
18.2k
    while (!PARSER_STOPPED(ctxt)) {
6780
18.1k
        if (ctxt->input->cur >= ctxt->input->end) {
6781
304
            if (ctxt->inputNr <= oldInputNr) {
6782
304
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6783
304
                return;
6784
304
            }
6785
6786
0
            xmlPopPE(ctxt);
6787
0
            declInputNr = ctxt->inputNr;
6788
17.8k
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6789
9.61k
            SKIP(3);
6790
9.61k
            SKIP_BLANKS_PE;
6791
6792
9.61k
            isFreshPE = 0;
6793
6794
9.61k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6795
5.43k
                SKIP(7);
6796
5.43k
                SKIP_BLANKS_PE;
6797
5.43k
                if (RAW != '[') {
6798
513
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6799
513
                    return;
6800
513
                }
6801
#ifdef LIBXML_VALID_ENABLED
6802
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6803
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6804
                                     "All markup of the conditional section is"
6805
                                     " not in the same entity\n",
6806
                                     NULL, NULL);
6807
                }
6808
#endif
6809
4.92k
                NEXT;
6810
6811
4.92k
                depth++;
6812
4.92k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6813
1.15k
                size_t ignoreDepth = 0;
6814
6815
1.15k
                SKIP(6);
6816
1.15k
                SKIP_BLANKS_PE;
6817
1.15k
                if (RAW != '[') {
6818
304
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6819
304
                    return;
6820
304
                }
6821
#ifdef LIBXML_VALID_ENABLED
6822
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6823
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6824
                                     "All markup of the conditional section is"
6825
                                     " not in the same entity\n",
6826
                                     NULL, NULL);
6827
                }
6828
#endif
6829
851
                NEXT;
6830
6831
62.7k
                while (PARSER_STOPPED(ctxt) == 0) {
6832
62.7k
                    if (RAW == 0) {
6833
521
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6834
521
                        return;
6835
521
                    }
6836
62.1k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6837
885
                        SKIP(3);
6838
885
                        ignoreDepth++;
6839
                        /* Check for integer overflow */
6840
885
                        if (ignoreDepth == 0) {
6841
0
                            xmlErrMemory(ctxt);
6842
0
                            return;
6843
0
                        }
6844
61.3k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6845
61.3k
                               (NXT(2) == '>')) {
6846
831
                        SKIP(3);
6847
831
                        if (ignoreDepth == 0)
6848
328
                            break;
6849
503
                        ignoreDepth--;
6850
60.4k
                    } else {
6851
60.4k
                        NEXT;
6852
60.4k
                    }
6853
62.1k
                }
6854
6855
#ifdef LIBXML_VALID_ENABLED
6856
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6857
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6858
                                     "All markup of the conditional section is"
6859
                                     " not in the same entity\n",
6860
                                     NULL, NULL);
6861
                }
6862
#endif
6863
3.02k
            } else {
6864
3.02k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6865
3.02k
                return;
6866
3.02k
            }
6867
9.61k
        } else if ((depth > 0) &&
6868
8.22k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6869
477
            if (isFreshPE) {
6870
0
                xmlFatalErrMsg(ctxt, XML_ERR_CONDSEC_INVALID,
6871
0
                               "Parameter entity must match "
6872
0
                               "extSubsetDecl\n");
6873
0
                return;
6874
0
            }
6875
6876
477
            depth--;
6877
#ifdef LIBXML_VALID_ENABLED
6878
            if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6879
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6880
                                 "All markup of the conditional section is not"
6881
                                 " in the same entity\n",
6882
                                 NULL, NULL);
6883
            }
6884
#endif
6885
477
            SKIP(3);
6886
7.74k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6887
3.89k
            isFreshPE = 0;
6888
3.89k
            xmlParseMarkupDecl(ctxt);
6889
3.89k
        } else if (RAW == '%') {
6890
510
            xmlParsePERefInternal(ctxt, 1);
6891
510
            if (ctxt->inputNr > declInputNr) {
6892
0
                isFreshPE = 1;
6893
0
                declInputNr = ctxt->inputNr;
6894
0
            }
6895
3.33k
        } else {
6896
3.33k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6897
3.33k
            return;
6898
3.33k
        }
6899
6900
10.1k
        if (depth == 0)
6901
334
            break;
6902
6903
9.80k
        SKIP_BLANKS;
6904
9.80k
        SHRINK;
6905
9.80k
        GROW;
6906
9.80k
    }
6907
8.40k
}
6908
6909
/**
6910
 * Parse markup declarations. Always consumes '<!' or '<?'.
6911
 *
6912
 * @deprecated Internal function, don't use.
6913
 *
6914
 *     [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6915
 *                         NotationDecl | PI | Comment
6916
 *
6917
 * [ VC: Proper Declaration/PE Nesting ]
6918
 * Parameter-entity replacement text must be properly nested with
6919
 * markup declarations. That is to say, if either the first character
6920
 * or the last character of a markup declaration (markupdecl above) is
6921
 * contained in the replacement text for a parameter-entity reference,
6922
 * both must be contained in the same replacement text.
6923
 *
6924
 * [ WFC: PEs in Internal Subset ]
6925
 * In the internal DTD subset, parameter-entity references can occur
6926
 * only where markup declarations can occur, not within markup declarations.
6927
 * (This does not apply to references that occur in external parameter
6928
 * entities or to the external subset.)
6929
 *
6930
 * @param ctxt  an XML parser context
6931
 */
6932
void
6933
390k
xmlParseMarkupDecl(xmlParserCtxt *ctxt) {
6934
390k
    GROW;
6935
390k
    if (CUR == '<') {
6936
390k
        if (NXT(1) == '!') {
6937
383k
      switch (NXT(2)) {
6938
250k
          case 'E':
6939
250k
        if (NXT(3) == 'L')
6940
65.2k
      xmlParseElementDecl(ctxt);
6941
185k
        else if (NXT(3) == 'N')
6942
184k
      xmlParseEntityDecl(ctxt);
6943
286
                    else
6944
286
                        SKIP(2);
6945
250k
        break;
6946
61.0k
          case 'A':
6947
61.0k
        xmlParseAttributeListDecl(ctxt);
6948
61.0k
        break;
6949
13.3k
          case 'N':
6950
13.3k
        xmlParseNotationDecl(ctxt);
6951
13.3k
        break;
6952
56.1k
          case '-':
6953
56.1k
        xmlParseComment(ctxt);
6954
56.1k
        break;
6955
2.60k
    default:
6956
2.60k
                    xmlFatalErr(ctxt,
6957
2.60k
                                ctxt->inSubset == 2 ?
6958
559
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
6959
2.60k
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
6960
2.60k
                                NULL);
6961
2.60k
                    SKIP(2);
6962
2.60k
        break;
6963
383k
      }
6964
383k
  } else if (NXT(1) == '?') {
6965
7.27k
      xmlParsePI(ctxt);
6966
7.27k
  }
6967
390k
    }
6968
390k
}
6969
6970
/**
6971
 * Parse an XML declaration header for external entities
6972
 *
6973
 * @deprecated Internal function, don't use.
6974
 *
6975
 *     [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6976
 * @param ctxt  an XML parser context
6977
 */
6978
6979
void
6980
15.3k
xmlParseTextDecl(xmlParserCtxt *ctxt) {
6981
15.3k
    xmlChar *version;
6982
6983
    /*
6984
     * We know that '<?xml' is here.
6985
     */
6986
15.3k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6987
15.0k
  SKIP(5);
6988
15.0k
    } else {
6989
258
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6990
258
  return;
6991
258
    }
6992
6993
15.0k
    if (SKIP_BLANKS == 0) {
6994
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6995
0
           "Space needed after '<?xml'\n");
6996
0
    }
6997
6998
    /*
6999
     * We may have the VersionInfo here.
7000
     */
7001
15.0k
    version = xmlParseVersionInfo(ctxt);
7002
15.0k
    if (version == NULL) {
7003
5.75k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7004
5.75k
        if (version == NULL) {
7005
4
            xmlErrMemory(ctxt);
7006
4
            return;
7007
4
        }
7008
9.31k
    } else {
7009
9.31k
  if (SKIP_BLANKS == 0) {
7010
967
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7011
967
               "Space needed here\n");
7012
967
  }
7013
9.31k
    }
7014
15.0k
    ctxt->input->version = version;
7015
7016
    /*
7017
     * We must have the encoding declaration
7018
     */
7019
15.0k
    xmlParseEncodingDecl(ctxt);
7020
7021
15.0k
    SKIP_BLANKS;
7022
15.0k
    if ((RAW == '?') && (NXT(1) == '>')) {
7023
6.47k
        SKIP(2);
7024
8.59k
    } else if (RAW == '>') {
7025
        /* Deprecated old WD ... */
7026
296
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7027
296
  NEXT;
7028
8.29k
    } else {
7029
8.29k
        int c;
7030
7031
8.29k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7032
280k
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7033
278k
            NEXT;
7034
278k
            if (c == '>')
7035
6.90k
                break;
7036
278k
        }
7037
8.29k
    }
7038
15.0k
}
7039
7040
/**
7041
 * Parse Markup declarations from an external subset
7042
 *
7043
 * @deprecated Internal function, don't use.
7044
 *
7045
 *     [30] extSubset ::= textDecl? extSubsetDecl
7046
 *
7047
 *     [31] extSubsetDecl ::= (markupdecl | conditionalSect |
7048
 *                             PEReference | S) *
7049
 * @param ctxt  an XML parser context
7050
 * @param publicId  the public identifier
7051
 * @param systemId  the system identifier (URL)
7052
 */
7053
void
7054
xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
7055
14.7k
                       const xmlChar *systemId) {
7056
14.7k
    int oldInputNr;
7057
7058
14.7k
    xmlCtxtInitializeLate(ctxt);
7059
7060
14.7k
    xmlDetectEncoding(ctxt);
7061
7062
14.7k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7063
9.16k
  xmlParseTextDecl(ctxt);
7064
9.16k
    }
7065
14.7k
    if (ctxt->myDoc == NULL) {
7066
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7067
0
  if (ctxt->myDoc == NULL) {
7068
0
      xmlErrMemory(ctxt);
7069
0
      return;
7070
0
  }
7071
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7072
0
    }
7073
14.7k
    if ((ctxt->myDoc->intSubset == NULL) &&
7074
14.7k
        (xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) {
7075
0
        xmlErrMemory(ctxt);
7076
0
    }
7077
7078
14.7k
    ctxt->inSubset = 2;
7079
14.7k
    oldInputNr = ctxt->inputNr;
7080
7081
14.7k
    SKIP_BLANKS;
7082
55.1k
    while (!PARSER_STOPPED(ctxt)) {
7083
52.4k
        if (ctxt->input->cur >= ctxt->input->end) {
7084
1.58k
            if (ctxt->inputNr <= oldInputNr) {
7085
989
                xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7086
989
                break;
7087
989
            }
7088
7089
600
            xmlPopPE(ctxt);
7090
50.8k
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7091
4.89k
            xmlParseConditionalSections(ctxt);
7092
45.9k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7093
33.1k
            xmlParseMarkupDecl(ctxt);
7094
33.1k
        } else if (RAW == '%') {
7095
1.74k
            xmlParsePERefInternal(ctxt, 1);
7096
11.0k
        } else {
7097
11.0k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7098
7099
11.4k
            while (ctxt->inputNr > oldInputNr)
7100
348
                xmlPopPE(ctxt);
7101
11.0k
            break;
7102
11.0k
        }
7103
40.3k
        SKIP_BLANKS;
7104
40.3k
        SHRINK;
7105
40.3k
        GROW;
7106
40.3k
    }
7107
14.7k
}
7108
7109
/**
7110
 * Parse and handle entity references in content, depending on the SAX
7111
 * interface, this may end-up in a call to character() if this is a
7112
 * CharRef, a predefined entity, if there is no reference() callback.
7113
 * or if the parser was asked to switch to that mode.
7114
 *
7115
 * @deprecated Internal function, don't use.
7116
 *
7117
 * Always consumes '&'.
7118
 *
7119
 *     [67] Reference ::= EntityRef | CharRef
7120
 * @param ctxt  an XML parser context
7121
 */
7122
void
7123
530k
xmlParseReference(xmlParserCtxt *ctxt) {
7124
530k
    xmlEntityPtr ent = NULL;
7125
530k
    const xmlChar *name;
7126
530k
    xmlChar *val;
7127
7128
530k
    if (RAW != '&')
7129
0
        return;
7130
7131
    /*
7132
     * Simple case of a CharRef
7133
     */
7134
530k
    if (NXT(1) == '#') {
7135
43.5k
  int i = 0;
7136
43.5k
  xmlChar out[16];
7137
43.5k
  int value = xmlParseCharRef(ctxt);
7138
7139
43.5k
  if (value == 0)
7140
16.7k
      return;
7141
7142
        /*
7143
         * Just encode the value in UTF-8
7144
         */
7145
26.7k
        COPY_BUF(out, i, value);
7146
26.7k
        out[i] = 0;
7147
26.7k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7148
26.7k
            (!ctxt->disableSAX))
7149
9.00k
            ctxt->sax->characters(ctxt->userData, out, i);
7150
26.7k
  return;
7151
43.5k
    }
7152
7153
    /*
7154
     * We are seeing an entity reference
7155
     */
7156
487k
    name = xmlParseEntityRefInternal(ctxt);
7157
487k
    if (name == NULL)
7158
313k
        return;
7159
173k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7160
173k
    if (ent == NULL) {
7161
        /*
7162
         * Create a reference for undeclared entities.
7163
         */
7164
56.6k
        if ((ctxt->replaceEntities == 0) &&
7165
56.6k
            (ctxt->sax != NULL) &&
7166
56.6k
            (ctxt->disableSAX == 0) &&
7167
56.6k
            (ctxt->sax->reference != NULL)) {
7168
3.75k
            ctxt->sax->reference(ctxt->userData, name);
7169
3.75k
        }
7170
56.6k
        return;
7171
56.6k
    }
7172
116k
    if (!ctxt->wellFormed)
7173
68.5k
  return;
7174
7175
    /* special case of predefined entities */
7176
48.1k
    if ((ent->name == NULL) ||
7177
48.1k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7178
35.0k
  val = ent->content;
7179
35.0k
  if (val == NULL) return;
7180
  /*
7181
   * inline the entity.
7182
   */
7183
35.0k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7184
35.0k
      (!ctxt->disableSAX))
7185
35.0k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7186
35.0k
  return;
7187
35.0k
    }
7188
7189
    /*
7190
     * Some users try to parse entities on their own and used to set
7191
     * the renamed "checked" member. Fix the flags to cover this
7192
     * case.
7193
     */
7194
13.0k
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7195
0
        ent->flags |= XML_ENT_PARSED;
7196
7197
    /*
7198
     * The first reference to the entity trigger a parsing phase
7199
     * where the ent->children is filled with the result from
7200
     * the parsing.
7201
     * Note: external parsed entities will not be loaded, it is not
7202
     * required for a non-validating parser, unless the parsing option
7203
     * of validating, or substituting entities were given. Doing so is
7204
     * far more secure as the parser will only process data coming from
7205
     * the document entity by default.
7206
     *
7207
     * FIXME: This doesn't work correctly since entities can be
7208
     * expanded with different namespace declarations in scope.
7209
     * For example:
7210
     *
7211
     * <!DOCTYPE doc [
7212
     *   <!ENTITY ent "<ns:elem/>">
7213
     * ]>
7214
     * <doc>
7215
     *   <decl1 xmlns:ns="urn:ns1">
7216
     *     &ent;
7217
     *   </decl1>
7218
     *   <decl2 xmlns:ns="urn:ns2">
7219
     *     &ent;
7220
     *   </decl2>
7221
     * </doc>
7222
     *
7223
     * Proposed fix:
7224
     *
7225
     * - Ignore current namespace declarations when parsing the
7226
     *   entity. If a prefix can't be resolved, don't report an error
7227
     *   but mark it as unresolved.
7228
     * - Try to resolve these prefixes when expanding the entity.
7229
     *   This will require a specialized version of xmlStaticCopyNode
7230
     *   which can also make use of the namespace hash table to avoid
7231
     *   quadratic behavior.
7232
     *
7233
     * Alternatively, we could simply reparse the entity on each
7234
     * expansion like we already do with custom SAX callbacks.
7235
     * External entity content should be cached in this case.
7236
     */
7237
13.0k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7238
13.0k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7239
9.91k
         ((ctxt->replaceEntities) ||
7240
13.0k
          (ctxt->validate)))) {
7241
13.0k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7242
11.2k
            xmlCtxtParseEntity(ctxt, ent);
7243
11.2k
        } else if (ent->children == NULL) {
7244
            /*
7245
             * Probably running in SAX mode and the callbacks don't
7246
             * build the entity content. Parse the entity again.
7247
             *
7248
             * This will also be triggered in normal tree builder mode
7249
             * if an entity happens to be empty, causing unnecessary
7250
             * reloads. It's hard to come up with a reliable check in
7251
             * which mode we're running.
7252
             */
7253
335
            xmlCtxtParseEntity(ctxt, ent);
7254
335
        }
7255
13.0k
    }
7256
7257
    /*
7258
     * We also check for amplification if entities aren't substituted.
7259
     * They might be expanded later.
7260
     */
7261
13.0k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7262
2
        return;
7263
7264
13.0k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7265
7.72k
        return;
7266
7267
5.34k
    if (ctxt->replaceEntities == 0) {
7268
  /*
7269
   * Create a reference
7270
   */
7271
1
        if (ctxt->sax->reference != NULL)
7272
1
      ctxt->sax->reference(ctxt->userData, ent->name);
7273
5.34k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7274
3.61k
        xmlNodePtr copy, cur;
7275
7276
        /*
7277
         * Seems we are generating the DOM content, copy the tree
7278
   */
7279
3.61k
        cur = ent->children;
7280
7281
        /*
7282
         * Handle first text node with SAX to coalesce text efficiently
7283
         */
7284
3.61k
        if ((cur->type == XML_TEXT_NODE) ||
7285
3.61k
            (cur->type == XML_CDATA_SECTION_NODE)) {
7286
2.04k
            int len = xmlStrlen(cur->content);
7287
7288
2.04k
            if ((cur->type == XML_TEXT_NODE) ||
7289
2.04k
                (ctxt->options & XML_PARSE_NOCDATA)) {
7290
2.04k
                if (ctxt->sax->characters != NULL)
7291
2.04k
                    ctxt->sax->characters(ctxt, cur->content, len);
7292
2.04k
            } else {
7293
0
                if (ctxt->sax->cdataBlock != NULL)
7294
0
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7295
0
            }
7296
7297
2.04k
            cur = cur->next;
7298
2.04k
        }
7299
7300
11.3k
        while (cur != NULL) {
7301
10.4k
            xmlNodePtr last;
7302
7303
            /*
7304
             * Handle last text node with SAX to coalesce text efficiently
7305
             */
7306
10.4k
            if ((cur->next == NULL) &&
7307
10.4k
                ((cur->type == XML_TEXT_NODE) ||
7308
2.96k
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7309
2.71k
                int len = xmlStrlen(cur->content);
7310
7311
2.71k
                if ((cur->type == XML_TEXT_NODE) ||
7312
2.71k
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7313
2.71k
                    if (ctxt->sax->characters != NULL)
7314
2.71k
                        ctxt->sax->characters(ctxt, cur->content, len);
7315
2.71k
                } else {
7316
0
                    if (ctxt->sax->cdataBlock != NULL)
7317
0
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7318
0
                }
7319
7320
2.71k
                break;
7321
2.71k
            }
7322
7323
            /*
7324
             * Reset coalesce buffer stats only for non-text nodes.
7325
             */
7326
7.75k
            ctxt->nodemem = 0;
7327
7.75k
            ctxt->nodelen = 0;
7328
7329
7.75k
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7330
7331
7.75k
            if (copy == NULL) {
7332
23
                xmlErrMemory(ctxt);
7333
23
                break;
7334
23
            }
7335
7336
7.73k
            if (ctxt->parseMode == XML_PARSE_READER) {
7337
                /* Needed for reader */
7338
0
                copy->extra = cur->extra;
7339
                /* Maybe needed for reader */
7340
0
                copy->_private = cur->_private;
7341
0
            }
7342
7343
7.73k
            copy->parent = ctxt->node;
7344
7.73k
            last = ctxt->node->last;
7345
7.73k
            if (last == NULL) {
7346
430
                ctxt->node->children = copy;
7347
7.30k
            } else {
7348
7.30k
                last->next = copy;
7349
7.30k
                copy->prev = last;
7350
7.30k
            }
7351
7.73k
            ctxt->node->last = copy;
7352
7353
7.73k
            cur = cur->next;
7354
7.73k
        }
7355
3.61k
    }
7356
5.34k
}
7357
7358
static void
7359
244k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7360
    /*
7361
     * [ WFC: Entity Declared ]
7362
     * In a document without any DTD, a document with only an
7363
     * internal DTD subset which contains no parameter entity
7364
     * references, or a document with "standalone='yes'", the
7365
     * Name given in the entity reference must match that in an
7366
     * entity declaration, except that well-formed documents
7367
     * need not declare any of the following entities: amp, lt,
7368
     * gt, apos, quot.
7369
     * The declaration of a parameter entity must precede any
7370
     * reference to it.
7371
     * Similarly, the declaration of a general entity must
7372
     * precede any reference to it which appears in a default
7373
     * value in an attribute-list declaration. Note that if
7374
     * entities are declared in the external subset or in
7375
     * external parameter entities, a non-validating processor
7376
     * is not obligated to read and process their declarations;
7377
     * for such documents, the rule that an entity must be
7378
     * declared is a well-formedness constraint only if
7379
     * standalone='yes'.
7380
     */
7381
244k
    if ((ctxt->standalone == 1) ||
7382
244k
        ((ctxt->hasExternalSubset == 0) &&
7383
244k
         (ctxt->hasPErefs == 0))) {
7384
225k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7385
225k
                          "Entity '%s' not defined\n", name);
7386
#ifdef LIBXML_VALID_ENABLED
7387
    } else if (ctxt->validate) {
7388
        /*
7389
         * [ VC: Entity Declared ]
7390
         * In a document with an external subset or external
7391
         * parameter entities with "standalone='no'", ...
7392
         * ... The declaration of a parameter entity must
7393
         * precede any reference to it...
7394
         */
7395
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7396
                         "Entity '%s' not defined\n", name, NULL);
7397
#endif
7398
225k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7399
19.8k
               ((ctxt->replaceEntities) &&
7400
19.7k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7401
        /*
7402
         * Also raise a non-fatal error
7403
         *
7404
         * - if the external subset is loaded and all entity declarations
7405
         *   should be available, or
7406
         * - entity substition was requested without restricting
7407
         *   external entity access.
7408
         */
7409
19.7k
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7410
19.7k
                     "Entity '%s' not defined\n", name);
7411
19.7k
    } else {
7412
67
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7413
67
                      "Entity '%s' not defined\n", name, NULL);
7414
67
    }
7415
7416
244k
    ctxt->valid = 0;
7417
244k
}
7418
7419
static xmlEntityPtr
7420
931k
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7421
931k
    xmlEntityPtr ent = NULL;
7422
7423
    /*
7424
     * Predefined entities override any extra definition
7425
     */
7426
931k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7427
931k
        ent = xmlGetPredefinedEntity(name);
7428
931k
        if (ent != NULL)
7429
123k
            return(ent);
7430
931k
    }
7431
7432
    /*
7433
     * Ask first SAX for entity resolution, otherwise try the
7434
     * entities which may have stored in the parser context.
7435
     */
7436
807k
    if (ctxt->sax != NULL) {
7437
807k
  if (ctxt->sax->getEntity != NULL)
7438
807k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7439
807k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7440
807k
      (ctxt->options & XML_PARSE_OLDSAX))
7441
0
      ent = xmlGetPredefinedEntity(name);
7442
807k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7443
807k
      (ctxt->userData==ctxt)) {
7444
2.20k
      ent = xmlSAX2GetEntity(ctxt, name);
7445
2.20k
  }
7446
807k
    }
7447
7448
807k
    if (ent == NULL) {
7449
225k
        xmlHandleUndeclaredEntity(ctxt, name);
7450
225k
    }
7451
7452
    /*
7453
     * [ WFC: Parsed Entity ]
7454
     * An entity reference must not contain the name of an
7455
     * unparsed entity
7456
     */
7457
582k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7458
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7459
0
     "Entity reference to unparsed entity %s\n", name);
7460
0
        ent = NULL;
7461
0
    }
7462
7463
    /*
7464
     * [ WFC: No External Entity References ]
7465
     * Attribute values cannot contain direct or indirect
7466
     * entity references to external entities.
7467
     */
7468
582k
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7469
14.2k
        if (inAttr) {
7470
964
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7471
964
                 "Attribute references external entity '%s'\n", name);
7472
964
            ent = NULL;
7473
964
        }
7474
14.2k
    }
7475
7476
807k
    return(ent);
7477
931k
}
7478
7479
/**
7480
 * Parse an entity reference. Always consumes '&'.
7481
 *
7482
 *     [68] EntityRef ::= '&' Name ';'
7483
 *
7484
 * @param ctxt  an XML parser context
7485
 * @returns the name, or NULL in case of error.
7486
 */
7487
static const xmlChar *
7488
1.07M
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7489
1.07M
    const xmlChar *name;
7490
7491
1.07M
    GROW;
7492
7493
1.07M
    if (RAW != '&')
7494
0
        return(NULL);
7495
1.07M
    NEXT;
7496
1.07M
    name = xmlParseName(ctxt);
7497
1.07M
    if (name == NULL) {
7498
447k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7499
447k
           "xmlParseEntityRef: no name\n");
7500
447k
        return(NULL);
7501
447k
    }
7502
626k
    if (RAW != ';') {
7503
113k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7504
113k
  return(NULL);
7505
113k
    }
7506
512k
    NEXT;
7507
7508
512k
    return(name);
7509
626k
}
7510
7511
/**
7512
 * @deprecated Internal function, don't use.
7513
 *
7514
 * @param ctxt  an XML parser context
7515
 * @returns the xmlEntity if found, or NULL otherwise.
7516
 */
7517
xmlEntity *
7518
0
xmlParseEntityRef(xmlParserCtxt *ctxt) {
7519
0
    const xmlChar *name;
7520
7521
0
    if (ctxt == NULL)
7522
0
        return(NULL);
7523
7524
0
    name = xmlParseEntityRefInternal(ctxt);
7525
0
    if (name == NULL)
7526
0
        return(NULL);
7527
7528
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7529
0
}
7530
7531
/**
7532
 * Parse ENTITY references declarations, but this version parses it from
7533
 * a string value.
7534
 *
7535
 *     [68] EntityRef ::= '&' Name ';'
7536
 *
7537
 * [ WFC: Entity Declared ]
7538
 * In a document without any DTD, a document with only an internal DTD
7539
 * subset which contains no parameter entity references, or a document
7540
 * with "standalone='yes'", the Name given in the entity reference
7541
 * must match that in an entity declaration, except that well-formed
7542
 * documents need not declare any of the following entities: amp, lt,
7543
 * gt, apos, quot.  The declaration of a parameter entity must precede
7544
 * any reference to it.  Similarly, the declaration of a general entity
7545
 * must precede any reference to it which appears in a default value in an
7546
 * attribute-list declaration. Note that if entities are declared in the
7547
 * external subset or in external parameter entities, a non-validating
7548
 * processor is not obligated to read and process their declarations;
7549
 * for such documents, the rule that an entity must be declared is a
7550
 * well-formedness constraint only if standalone='yes'.
7551
 *
7552
 * [ WFC: Parsed Entity ]
7553
 * An entity reference must not contain the name of an unparsed entity
7554
 *
7555
 * @param ctxt  an XML parser context
7556
 * @param str  a pointer to an index in the string
7557
 * @returns the xmlEntity if found, or NULL otherwise. The str pointer
7558
 * is updated to the current location in the string.
7559
 */
7560
static xmlChar *
7561
420k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7562
420k
    xmlChar *name;
7563
420k
    const xmlChar *ptr;
7564
420k
    xmlChar cur;
7565
7566
420k
    if ((str == NULL) || (*str == NULL))
7567
0
        return(NULL);
7568
420k
    ptr = *str;
7569
420k
    cur = *ptr;
7570
420k
    if (cur != '&')
7571
0
  return(NULL);
7572
7573
420k
    ptr++;
7574
420k
    name = xmlParseStringName(ctxt, &ptr);
7575
420k
    if (name == NULL) {
7576
811
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7577
811
           "xmlParseStringEntityRef: no name\n");
7578
811
  *str = ptr;
7579
811
  return(NULL);
7580
811
    }
7581
419k
    if (*ptr != ';') {
7582
321
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7583
321
        xmlFree(name);
7584
321
  *str = ptr;
7585
321
  return(NULL);
7586
321
    }
7587
418k
    ptr++;
7588
7589
418k
    *str = ptr;
7590
418k
    return(name);
7591
419k
}
7592
7593
/**
7594
 * Parse a parameter entity reference. Always consumes '%'.
7595
 *
7596
 * The entity content is handled directly by pushing it's content as
7597
 * a new input stream.
7598
 *
7599
 *     [69] PEReference ::= '%' Name ';'
7600
 *
7601
 * [ WFC: No Recursion ]
7602
 * A parsed entity must not contain a recursive
7603
 * reference to itself, either directly or indirectly.
7604
 *
7605
 * [ WFC: Entity Declared ]
7606
 * In a document without any DTD, a document with only an internal DTD
7607
 * subset which contains no parameter entity references, or a document
7608
 * with "standalone='yes'", ...  ... The declaration of a parameter
7609
 * entity must precede any reference to it...
7610
 *
7611
 * [ VC: Entity Declared ]
7612
 * In a document with an external subset or external parameter entities
7613
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7614
 * must precede any reference to it...
7615
 *
7616
 * [ WFC: In DTD ]
7617
 * Parameter-entity references may only appear in the DTD.
7618
 * NOTE: misleading but this is handled.
7619
 *
7620
 * @param ctxt  an XML parser context
7621
 * @param markupDecl  whether the PERef starts a markup declaration
7622
 */
7623
static void
7624
64.9k
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl) {
7625
64.9k
    const xmlChar *name;
7626
64.9k
    xmlEntityPtr entity = NULL;
7627
64.9k
    xmlParserInputPtr input;
7628
7629
64.9k
    if (RAW != '%')
7630
0
        return;
7631
64.9k
    NEXT;
7632
64.9k
    name = xmlParseName(ctxt);
7633
64.9k
    if (name == NULL) {
7634
6.06k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7635
6.06k
  return;
7636
6.06k
    }
7637
58.9k
    if (RAW != ';') {
7638
1.25k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7639
1.25k
        return;
7640
1.25k
    }
7641
7642
57.6k
    NEXT;
7643
7644
    /* Must be set before xmlHandleUndeclaredEntity */
7645
57.6k
    ctxt->hasPErefs = 1;
7646
7647
    /*
7648
     * Request the entity from SAX
7649
     */
7650
57.6k
    if ((ctxt->sax != NULL) &&
7651
57.6k
  (ctxt->sax->getParameterEntity != NULL))
7652
57.6k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7653
7654
57.6k
    if (entity == NULL) {
7655
17.5k
        xmlHandleUndeclaredEntity(ctxt, name);
7656
40.1k
    } else {
7657
  /*
7658
   * Internal checking in case the entity quest barfed
7659
   */
7660
40.1k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7661
40.1k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7662
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7663
0
      "Internal: %%%s; is not a parameter entity\n",
7664
0
        name, NULL);
7665
40.1k
  } else {
7666
40.1k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7667
40.1k
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7668
31.6k
     (((ctxt->loadsubset & ~XML_SKIP_IDS) == 0) &&
7669
31.6k
      (ctxt->replaceEntities == 0) &&
7670
31.6k
      (ctxt->validate == 0))))
7671
0
    return;
7672
7673
40.1k
            if (entity->flags & XML_ENT_EXPANDING) {
7674
758
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7675
758
                xmlHaltParser(ctxt);
7676
758
                return;
7677
758
            }
7678
7679
39.3k
      input = xmlNewEntityInputStream(ctxt, entity);
7680
39.3k
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7681
9.02k
                xmlFreeInputStream(input);
7682
9.02k
    return;
7683
9.02k
            }
7684
7685
30.3k
            entity->flags |= XML_ENT_EXPANDING;
7686
7687
30.3k
            if (markupDecl)
7688
21.6k
                input->flags |= XML_INPUT_MARKUP_DECL;
7689
7690
30.3k
            GROW;
7691
7692
30.3k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7693
21.8k
                xmlDetectEncoding(ctxt);
7694
7695
21.8k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7696
21.8k
                    (IS_BLANK_CH(NXT(5)))) {
7697
2.64k
                    xmlParseTextDecl(ctxt);
7698
2.64k
                }
7699
21.8k
            }
7700
30.3k
  }
7701
40.1k
    }
7702
57.6k
}
7703
7704
/**
7705
 * Parse a parameter entity reference.
7706
 *
7707
 * @deprecated Internal function, don't use.
7708
 *
7709
 * @param ctxt  an XML parser context
7710
 */
7711
void
7712
0
xmlParsePEReference(xmlParserCtxt *ctxt) {
7713
0
    xmlParsePERefInternal(ctxt, 0);
7714
0
}
7715
7716
/**
7717
 * Load the content of an entity.
7718
 *
7719
 * @param ctxt  an XML parser context
7720
 * @param entity  an unloaded system entity
7721
 * @returns 0 in case of success and -1 in case of failure
7722
 */
7723
static int
7724
6.93k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7725
6.93k
    xmlParserInputPtr oldinput, input = NULL;
7726
6.93k
    xmlParserInputPtr *oldinputTab;
7727
6.93k
    xmlChar *oldencoding;
7728
6.93k
    xmlChar *content = NULL;
7729
6.93k
    xmlResourceType rtype;
7730
6.93k
    size_t length, i;
7731
6.93k
    int oldinputNr, oldinputMax;
7732
6.93k
    int ret = -1;
7733
6.93k
    int res;
7734
7735
6.93k
    if ((ctxt == NULL) || (entity == NULL) ||
7736
6.93k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7737
6.93k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7738
6.93k
  (entity->content != NULL)) {
7739
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7740
0
              "xmlLoadEntityContent parameter error");
7741
0
        return(-1);
7742
0
    }
7743
7744
6.93k
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7745
6.93k
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7746
0
    else
7747
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7748
7749
6.93k
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7750
6.93k
                            (char *) entity->ExternalID, rtype);
7751
6.93k
    if (input == NULL)
7752
761
        return(-1);
7753
7754
6.17k
    oldinput = ctxt->input;
7755
6.17k
    oldinputNr = ctxt->inputNr;
7756
6.17k
    oldinputMax = ctxt->inputMax;
7757
6.17k
    oldinputTab = ctxt->inputTab;
7758
6.17k
    oldencoding = ctxt->encoding;
7759
7760
6.17k
    ctxt->input = NULL;
7761
6.17k
    ctxt->inputNr = 0;
7762
6.17k
    ctxt->inputMax = 1;
7763
6.17k
    ctxt->encoding = NULL;
7764
6.17k
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7765
6.17k
    if (ctxt->inputTab == NULL) {
7766
1
        xmlErrMemory(ctxt);
7767
1
        xmlFreeInputStream(input);
7768
1
        goto error;
7769
1
    }
7770
7771
6.17k
    xmlBufResetInput(input->buf->buffer, input);
7772
7773
6.17k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7774
0
        xmlFreeInputStream(input);
7775
0
        goto error;
7776
0
    }
7777
7778
6.17k
    xmlDetectEncoding(ctxt);
7779
7780
    /*
7781
     * Parse a possible text declaration first
7782
     */
7783
6.17k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7784
976
  xmlParseTextDecl(ctxt);
7785
        /*
7786
         * An XML-1.0 document can't reference an entity not XML-1.0
7787
         */
7788
976
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7789
976
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7790
309
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7791
309
                           "Version mismatch between document and entity\n");
7792
309
        }
7793
976
    }
7794
7795
6.17k
    length = input->cur - input->base;
7796
6.17k
    xmlBufShrink(input->buf->buffer, length);
7797
6.17k
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7798
7799
8.87k
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7800
2.70k
        ;
7801
7802
6.17k
    xmlBufResetInput(input->buf->buffer, input);
7803
7804
6.17k
    if (res < 0) {
7805
839
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7806
839
        goto error;
7807
839
    }
7808
7809
5.33k
    length = xmlBufUse(input->buf->buffer);
7810
5.33k
    if (length > INT_MAX) {
7811
0
        xmlErrMemory(ctxt);
7812
0
        goto error;
7813
0
    }
7814
7815
5.33k
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
7816
5.33k
    if (content == NULL) {
7817
3
        xmlErrMemory(ctxt);
7818
3
        goto error;
7819
3
    }
7820
7821
3.94M
    for (i = 0; i < length; ) {
7822
3.94M
        int clen = length - i;
7823
3.94M
        int c = xmlGetUTF8Char(content + i, &clen);
7824
7825
3.94M
        if ((c < 0) || (!IS_CHAR(c))) {
7826
4.79k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7827
4.79k
                              "xmlLoadEntityContent: invalid char value %d\n",
7828
4.79k
                              content[i]);
7829
4.79k
            goto error;
7830
4.79k
        }
7831
3.94M
        i += clen;
7832
3.94M
    }
7833
7834
534
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7835
534
    entity->content = content;
7836
534
    entity->length = length;
7837
534
    content = NULL;
7838
534
    ret = 0;
7839
7840
6.17k
error:
7841
12.3k
    while (ctxt->inputNr > 0)
7842
6.17k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
7843
6.17k
    xmlFree(ctxt->inputTab);
7844
6.17k
    xmlFree(ctxt->encoding);
7845
7846
6.17k
    ctxt->input = oldinput;
7847
6.17k
    ctxt->inputNr = oldinputNr;
7848
6.17k
    ctxt->inputMax = oldinputMax;
7849
6.17k
    ctxt->inputTab = oldinputTab;
7850
6.17k
    ctxt->encoding = oldencoding;
7851
7852
6.17k
    xmlFree(content);
7853
7854
6.17k
    return(ret);
7855
534
}
7856
7857
/**
7858
 * Parse PEReference declarations
7859
 *
7860
 *     [69] PEReference ::= '%' Name ';'
7861
 *
7862
 * [ WFC: No Recursion ]
7863
 * A parsed entity must not contain a recursive
7864
 * reference to itself, either directly or indirectly.
7865
 *
7866
 * [ WFC: Entity Declared ]
7867
 * In a document without any DTD, a document with only an internal DTD
7868
 * subset which contains no parameter entity references, or a document
7869
 * with "standalone='yes'", ...  ... The declaration of a parameter
7870
 * entity must precede any reference to it...
7871
 *
7872
 * [ VC: Entity Declared ]
7873
 * In a document with an external subset or external parameter entities
7874
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7875
 * must precede any reference to it...
7876
 *
7877
 * [ WFC: In DTD ]
7878
 * Parameter-entity references may only appear in the DTD.
7879
 * NOTE: misleading but this is handled.
7880
 *
7881
 * @param ctxt  an XML parser context
7882
 * @param str  a pointer to an index in the string
7883
 * @returns the string of the entity content.
7884
 *         str is updated to the current value of the index
7885
 */
7886
static xmlEntityPtr
7887
25.5k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7888
25.5k
    const xmlChar *ptr;
7889
25.5k
    xmlChar cur;
7890
25.5k
    xmlChar *name;
7891
25.5k
    xmlEntityPtr entity = NULL;
7892
7893
25.5k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7894
25.5k
    ptr = *str;
7895
25.5k
    cur = *ptr;
7896
25.5k
    if (cur != '%')
7897
0
        return(NULL);
7898
25.5k
    ptr++;
7899
25.5k
    name = xmlParseStringName(ctxt, &ptr);
7900
25.5k
    if (name == NULL) {
7901
4.98k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7902
4.98k
           "xmlParseStringPEReference: no name\n");
7903
4.98k
  *str = ptr;
7904
4.98k
  return(NULL);
7905
4.98k
    }
7906
20.5k
    cur = *ptr;
7907
20.5k
    if (cur != ';') {
7908
1.16k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7909
1.16k
  xmlFree(name);
7910
1.16k
  *str = ptr;
7911
1.16k
  return(NULL);
7912
1.16k
    }
7913
19.3k
    ptr++;
7914
7915
    /* Must be set before xmlHandleUndeclaredEntity */
7916
19.3k
    ctxt->hasPErefs = 1;
7917
7918
    /*
7919
     * Request the entity from SAX
7920
     */
7921
19.3k
    if ((ctxt->sax != NULL) &&
7922
19.3k
  (ctxt->sax->getParameterEntity != NULL))
7923
19.3k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7924
7925
19.3k
    if (entity == NULL) {
7926
2.21k
        xmlHandleUndeclaredEntity(ctxt, name);
7927
17.1k
    } else {
7928
  /*
7929
   * Internal checking in case the entity quest barfed
7930
   */
7931
17.1k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7932
17.1k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7933
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7934
0
        "%%%s; is not a parameter entity\n",
7935
0
        name, NULL);
7936
0
  }
7937
17.1k
    }
7938
7939
19.3k
    xmlFree(name);
7940
19.3k
    *str = ptr;
7941
19.3k
    return(entity);
7942
20.5k
}
7943
7944
/**
7945
 * Parse a DOCTYPE declaration
7946
 *
7947
 * @deprecated Internal function, don't use.
7948
 *
7949
 *     [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7950
 *                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7951
 *
7952
 * [ VC: Root Element Type ]
7953
 * The Name in the document type declaration must match the element
7954
 * type of the root element.
7955
 *
7956
 * @param ctxt  an XML parser context
7957
 */
7958
7959
void
7960
148k
xmlParseDocTypeDecl(xmlParserCtxt *ctxt) {
7961
148k
    const xmlChar *name = NULL;
7962
148k
    xmlChar *publicId = NULL;
7963
148k
    xmlChar *URI = NULL;
7964
7965
    /*
7966
     * We know that '<!DOCTYPE' has been detected.
7967
     */
7968
148k
    SKIP(9);
7969
7970
148k
    if (SKIP_BLANKS == 0) {
7971
20.4k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7972
20.4k
                       "Space required after 'DOCTYPE'\n");
7973
20.4k
    }
7974
7975
    /*
7976
     * Parse the DOCTYPE name.
7977
     */
7978
148k
    name = xmlParseName(ctxt);
7979
148k
    if (name == NULL) {
7980
6.00k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7981
6.00k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7982
6.00k
    }
7983
148k
    ctxt->intSubName = name;
7984
7985
148k
    SKIP_BLANKS;
7986
7987
    /*
7988
     * Check for public and system identifier (URI)
7989
     */
7990
148k
    URI = xmlParseExternalID(ctxt, &publicId, 1);
7991
7992
148k
    if ((URI != NULL) || (publicId != NULL)) {
7993
36.9k
        ctxt->hasExternalSubset = 1;
7994
36.9k
    }
7995
148k
    ctxt->extSubURI = URI;
7996
148k
    ctxt->extSubSystem = publicId;
7997
7998
148k
    SKIP_BLANKS;
7999
8000
    /*
8001
     * Create and update the internal subset.
8002
     */
8003
148k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8004
148k
  (!ctxt->disableSAX))
8005
108k
  ctxt->sax->internalSubset(ctxt->userData, name, publicId, URI);
8006
8007
148k
    if ((RAW != '[') && (RAW != '>')) {
8008
5.13k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8009
5.13k
    }
8010
148k
}
8011
8012
/**
8013
 * Parse the internal subset declaration
8014
 *
8015
 *     [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8016
 * @param ctxt  an XML parser context
8017
 */
8018
8019
static void
8020
136k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8021
    /*
8022
     * Is there any DTD definition ?
8023
     */
8024
136k
    if (RAW == '[') {
8025
136k
        int oldInputNr = ctxt->inputNr;
8026
8027
136k
        NEXT;
8028
  /*
8029
   * Parse the succession of Markup declarations and
8030
   * PEReferences.
8031
   * Subsequence (markupdecl | PEReference | S)*
8032
   */
8033
136k
  SKIP_BLANKS;
8034
534k
        while (1) {
8035
534k
            if (PARSER_STOPPED(ctxt)) {
8036
20.1k
                return;
8037
514k
            } else if (ctxt->input->cur >= ctxt->input->end) {
8038
6.13k
                if (ctxt->inputNr <= oldInputNr) {
8039
1.50k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8040
1.50k
                    return;
8041
1.50k
                }
8042
4.63k
                xmlPopPE(ctxt);
8043
508k
            } else if ((RAW == ']') && (ctxt->inputNr <= oldInputNr)) {
8044
42.0k
                NEXT;
8045
42.0k
                SKIP_BLANKS;
8046
42.0k
                break;
8047
466k
            } else if ((PARSER_EXTERNAL(ctxt)) &&
8048
466k
                       (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8049
                /*
8050
                 * Conditional sections are allowed in external entities
8051
                 * included by PE References in the internal subset.
8052
                 */
8053
3.51k
                xmlParseConditionalSections(ctxt);
8054
462k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8055
353k
                xmlParseMarkupDecl(ctxt);
8056
353k
            } else if (RAW == '%') {
8057
36.4k
                xmlParsePERefInternal(ctxt, 1);
8058
72.2k
            } else {
8059
72.2k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8060
8061
86.4k
                while (ctxt->inputNr > oldInputNr)
8062
14.1k
                    xmlPopPE(ctxt);
8063
72.2k
                return;
8064
72.2k
            }
8065
398k
            SKIP_BLANKS;
8066
398k
            SHRINK;
8067
398k
            GROW;
8068
398k
        }
8069
136k
    }
8070
8071
    /*
8072
     * We should be at the end of the DOCTYPE declaration.
8073
     */
8074
42.0k
    if (RAW != '>') {
8075
526
        xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8076
526
        return;
8077
526
    }
8078
41.5k
    NEXT;
8079
41.5k
}
8080
8081
#ifdef LIBXML_SAX1_ENABLED
8082
/**
8083
 * Parse an attribute
8084
 *
8085
 * @deprecated Internal function, don't use.
8086
 *
8087
 *     [41] Attribute ::= Name Eq AttValue
8088
 *
8089
 * [ WFC: No External Entity References ]
8090
 * Attribute values cannot contain direct or indirect entity references
8091
 * to external entities.
8092
 *
8093
 * [ WFC: No < in Attribute Values ]
8094
 * The replacement text of any entity referred to directly or indirectly in
8095
 * an attribute value (other than "&lt;") must not contain a <.
8096
 *
8097
 * [ VC: Attribute Value Type ]
8098
 * The attribute must have been declared; the value must be of the type
8099
 * declared for it.
8100
 *
8101
 *     [25] Eq ::= S? '=' S?
8102
 *
8103
 * With namespace:
8104
 *
8105
 *     [NS 11] Attribute ::= QName Eq AttValue
8106
 *
8107
 * Also the case QName == xmlns:??? is handled independently as a namespace
8108
 * definition.
8109
 *
8110
 * @param ctxt  an XML parser context
8111
 * @param value  a xmlChar ** used to store the value of the attribute
8112
 * @returns the attribute name, and the value in *value.
8113
 */
8114
8115
const xmlChar *
8116
xmlParseAttribute(xmlParserCtxt *ctxt, xmlChar **value) {
8117
    const xmlChar *name;
8118
    xmlChar *val;
8119
8120
    *value = NULL;
8121
    GROW;
8122
    name = xmlParseName(ctxt);
8123
    if (name == NULL) {
8124
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8125
                 "error parsing attribute name\n");
8126
        return(NULL);
8127
    }
8128
8129
    /*
8130
     * read the value
8131
     */
8132
    SKIP_BLANKS;
8133
    if (RAW == '=') {
8134
        NEXT;
8135
  SKIP_BLANKS;
8136
  val = xmlParseAttValue(ctxt);
8137
    } else {
8138
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8139
         "Specification mandates value for attribute %s\n", name);
8140
  return(name);
8141
    }
8142
8143
    /*
8144
     * Check that xml:lang conforms to the specification
8145
     * No more registered as an error, just generate a warning now
8146
     * since this was deprecated in XML second edition
8147
     */
8148
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8149
  if (!xmlCheckLanguageID(val)) {
8150
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8151
              "Malformed value for xml:lang : %s\n",
8152
        val, NULL);
8153
  }
8154
    }
8155
8156
    /*
8157
     * Check that xml:space conforms to the specification
8158
     */
8159
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8160
  if (xmlStrEqual(val, BAD_CAST "default"))
8161
      *(ctxt->space) = 0;
8162
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8163
      *(ctxt->space) = 1;
8164
  else {
8165
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8166
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8167
                                 val, NULL);
8168
  }
8169
    }
8170
8171
    *value = val;
8172
    return(name);
8173
}
8174
8175
/**
8176
 * Parse a start tag. Always consumes '<'.
8177
 *
8178
 * @deprecated Internal function, don't use.
8179
 *
8180
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8181
 *
8182
 * [ WFC: Unique Att Spec ]
8183
 * No attribute name may appear more than once in the same start-tag or
8184
 * empty-element tag.
8185
 *
8186
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8187
 *
8188
 * [ WFC: Unique Att Spec ]
8189
 * No attribute name may appear more than once in the same start-tag or
8190
 * empty-element tag.
8191
 *
8192
 * With namespace:
8193
 *
8194
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8195
 *
8196
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8197
 *
8198
 * @param ctxt  an XML parser context
8199
 * @returns the element name parsed
8200
 */
8201
8202
const xmlChar *
8203
xmlParseStartTag(xmlParserCtxt *ctxt) {
8204
    const xmlChar *name;
8205
    const xmlChar *attname;
8206
    xmlChar *attvalue;
8207
    const xmlChar **atts = ctxt->atts;
8208
    int nbatts = 0;
8209
    int maxatts = ctxt->maxatts;
8210
    int i;
8211
8212
    if (RAW != '<') return(NULL);
8213
    NEXT1;
8214
8215
    name = xmlParseName(ctxt);
8216
    if (name == NULL) {
8217
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8218
       "xmlParseStartTag: invalid element name\n");
8219
        return(NULL);
8220
    }
8221
8222
    /*
8223
     * Now parse the attributes, it ends up with the ending
8224
     *
8225
     * (S Attribute)* S?
8226
     */
8227
    SKIP_BLANKS;
8228
    GROW;
8229
8230
    while (((RAW != '>') &&
8231
     ((RAW != '/') || (NXT(1) != '>')) &&
8232
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8233
  attname = xmlParseAttribute(ctxt, &attvalue);
8234
        if (attname == NULL)
8235
      break;
8236
        if (attvalue != NULL) {
8237
      /*
8238
       * [ WFC: Unique Att Spec ]
8239
       * No attribute name may appear more than once in the same
8240
       * start-tag or empty-element tag.
8241
       */
8242
      for (i = 0; i < nbatts;i += 2) {
8243
          if (xmlStrEqual(atts[i], attname)) {
8244
        xmlErrAttributeDup(ctxt, NULL, attname);
8245
        goto failed;
8246
    }
8247
      }
8248
      /*
8249
       * Add the pair to atts
8250
       */
8251
      if (nbatts + 4 > maxatts) {
8252
          const xmlChar **n;
8253
                int newSize;
8254
8255
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8256
                                          11, XML_MAX_ATTRS);
8257
                if (newSize < 0) {
8258
        xmlErrMemory(ctxt);
8259
        goto failed;
8260
    }
8261
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8262
                if (newSize < 2)
8263
                    newSize = 2;
8264
#endif
8265
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8266
    if (n == NULL) {
8267
        xmlErrMemory(ctxt);
8268
        goto failed;
8269
    }
8270
    atts = n;
8271
                maxatts = newSize * 2;
8272
    ctxt->atts = atts;
8273
    ctxt->maxatts = maxatts;
8274
      }
8275
8276
      atts[nbatts++] = attname;
8277
      atts[nbatts++] = attvalue;
8278
      atts[nbatts] = NULL;
8279
      atts[nbatts + 1] = NULL;
8280
8281
            attvalue = NULL;
8282
  }
8283
8284
failed:
8285
8286
        if (attvalue != NULL)
8287
            xmlFree(attvalue);
8288
8289
  GROW
8290
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8291
      break;
8292
  if (SKIP_BLANKS == 0) {
8293
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8294
         "attributes construct error\n");
8295
  }
8296
  SHRINK;
8297
        GROW;
8298
    }
8299
8300
    /*
8301
     * SAX: Start of Element !
8302
     */
8303
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8304
  (!ctxt->disableSAX)) {
8305
  if (nbatts > 0)
8306
      ctxt->sax->startElement(ctxt->userData, name, atts);
8307
  else
8308
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8309
    }
8310
8311
    if (atts != NULL) {
8312
        /* Free only the content strings */
8313
        for (i = 1;i < nbatts;i+=2)
8314
      if (atts[i] != NULL)
8315
         xmlFree((xmlChar *) atts[i]);
8316
    }
8317
    return(name);
8318
}
8319
8320
/**
8321
 * Parse an end tag. Always consumes '</'.
8322
 *
8323
 *     [42] ETag ::= '</' Name S? '>'
8324
 *
8325
 * With namespace
8326
 *
8327
 *     [NS 9] ETag ::= '</' QName S? '>'
8328
 * @param ctxt  an XML parser context
8329
 * @param line  line of the start tag
8330
 */
8331
8332
static void
8333
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8334
    const xmlChar *name;
8335
8336
    GROW;
8337
    if ((RAW != '<') || (NXT(1) != '/')) {
8338
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8339
           "xmlParseEndTag: '</' not found\n");
8340
  return;
8341
    }
8342
    SKIP(2);
8343
8344
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8345
8346
    /*
8347
     * We should definitely be at the ending "S? '>'" part
8348
     */
8349
    GROW;
8350
    SKIP_BLANKS;
8351
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8352
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8353
    } else
8354
  NEXT1;
8355
8356
    /*
8357
     * [ WFC: Element Type Match ]
8358
     * The Name in an element's end-tag must match the element type in the
8359
     * start-tag.
8360
     *
8361
     */
8362
    if (name != (xmlChar*)1) {
8363
        if (name == NULL) name = BAD_CAST "unparsable";
8364
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8365
         "Opening and ending tag mismatch: %s line %d and %s\n",
8366
                    ctxt->name, line, name);
8367
    }
8368
8369
    /*
8370
     * SAX: End of Tag
8371
     */
8372
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8373
  (!ctxt->disableSAX))
8374
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8375
8376
    namePop(ctxt);
8377
    spacePop(ctxt);
8378
}
8379
8380
/**
8381
 * Parse an end of tag
8382
 *
8383
 * @deprecated Internal function, don't use.
8384
 *
8385
 *     [42] ETag ::= '</' Name S? '>'
8386
 *
8387
 * With namespace
8388
 *
8389
 *     [NS 9] ETag ::= '</' QName S? '>'
8390
 * @param ctxt  an XML parser context
8391
 */
8392
8393
void
8394
xmlParseEndTag(xmlParserCtxt *ctxt) {
8395
    xmlParseEndTag1(ctxt, 0);
8396
}
8397
#endif /* LIBXML_SAX1_ENABLED */
8398
8399
/************************************************************************
8400
 *                  *
8401
 *          SAX 2 specific operations       *
8402
 *                  *
8403
 ************************************************************************/
8404
8405
/**
8406
 * Parse an XML Namespace QName
8407
 *
8408
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8409
 *     [7]  Prefix  ::= NCName
8410
 *     [8]  LocalPart  ::= NCName
8411
 *
8412
 * @param ctxt  an XML parser context
8413
 * @param prefix  pointer to store the prefix part
8414
 * @returns the Name parsed or NULL
8415
 */
8416
8417
static xmlHashedString
8418
16.2M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8419
16.2M
    xmlHashedString l, p;
8420
16.2M
    int start, isNCName = 0;
8421
8422
16.2M
    l.name = NULL;
8423
16.2M
    p.name = NULL;
8424
8425
16.2M
    GROW;
8426
16.2M
    start = CUR_PTR - BASE_PTR;
8427
8428
16.2M
    l = xmlParseNCName(ctxt);
8429
16.2M
    if (l.name != NULL) {
8430
13.0M
        isNCName = 1;
8431
13.0M
        if (CUR == ':') {
8432
4.97M
            NEXT;
8433
4.97M
            p = l;
8434
4.97M
            l = xmlParseNCName(ctxt);
8435
4.97M
        }
8436
13.0M
    }
8437
16.2M
    if ((l.name == NULL) || (CUR == ':')) {
8438
3.31M
        xmlChar *tmp;
8439
8440
3.31M
        l.name = NULL;
8441
3.31M
        p.name = NULL;
8442
3.31M
        if ((isNCName == 0) && (CUR != ':'))
8443
3.19M
            return(l);
8444
127k
        tmp = xmlParseNmtoken(ctxt);
8445
127k
        if (tmp != NULL)
8446
67.4k
            xmlFree(tmp);
8447
127k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8448
127k
                                CUR_PTR - (BASE_PTR + start));
8449
127k
        if (l.name == NULL) {
8450
3
            xmlErrMemory(ctxt);
8451
3
            return(l);
8452
3
        }
8453
127k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8454
127k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8455
127k
    }
8456
8457
13.1M
    *prefix = p;
8458
13.1M
    return(l);
8459
16.2M
}
8460
8461
/**
8462
 * Parse an XML Namespace QName
8463
 *
8464
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8465
 *     [7]  Prefix  ::= NCName
8466
 *     [8]  LocalPart  ::= NCName
8467
 *
8468
 * @param ctxt  an XML parser context
8469
 * @param prefix  pointer to store the prefix part
8470
 * @returns the Name parsed or NULL
8471
 */
8472
8473
static const xmlChar *
8474
58.1k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8475
58.1k
    xmlHashedString n, p;
8476
8477
58.1k
    n = xmlParseQNameHashed(ctxt, &p);
8478
58.1k
    if (n.name == NULL)
8479
3.37k
        return(NULL);
8480
54.7k
    *prefix = p.name;
8481
54.7k
    return(n.name);
8482
58.1k
}
8483
8484
/**
8485
 * Parse an XML name and compares for match
8486
 * (specialized for endtag parsing)
8487
 *
8488
 * @param ctxt  an XML parser context
8489
 * @param name  the localname
8490
 * @param prefix  the prefix, if any.
8491
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
8492
 * and the name for mismatch
8493
 */
8494
8495
static const xmlChar *
8496
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8497
1.19M
                        xmlChar const *prefix) {
8498
1.19M
    const xmlChar *cmp;
8499
1.19M
    const xmlChar *in;
8500
1.19M
    const xmlChar *ret;
8501
1.19M
    const xmlChar *prefix2;
8502
8503
1.19M
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8504
8505
1.19M
    GROW;
8506
1.19M
    in = ctxt->input->cur;
8507
8508
1.19M
    cmp = prefix;
8509
4.28M
    while (*in != 0 && *in == *cmp) {
8510
3.09M
  ++in;
8511
3.09M
  ++cmp;
8512
3.09M
    }
8513
1.19M
    if ((*cmp == 0) && (*in == ':')) {
8514
1.17M
        in++;
8515
1.17M
  cmp = name;
8516
11.2M
  while (*in != 0 && *in == *cmp) {
8517
10.1M
      ++in;
8518
10.1M
      ++cmp;
8519
10.1M
  }
8520
1.17M
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8521
      /* success */
8522
1.13M
            ctxt->input->col += in - ctxt->input->cur;
8523
1.13M
      ctxt->input->cur = in;
8524
1.13M
      return((const xmlChar*) 1);
8525
1.13M
  }
8526
1.17M
    }
8527
    /*
8528
     * all strings coms from the dictionary, equality can be done directly
8529
     */
8530
58.1k
    ret = xmlParseQName (ctxt, &prefix2);
8531
58.1k
    if (ret == NULL)
8532
3.37k
        return(NULL);
8533
54.7k
    if ((ret == name) && (prefix == prefix2))
8534
20.9k
  return((const xmlChar*) 1);
8535
33.8k
    return ret;
8536
54.7k
}
8537
8538
/**
8539
 * Parse an attribute in the new SAX2 framework.
8540
 *
8541
 * @param ctxt  an XML parser context
8542
 * @param pref  the element prefix
8543
 * @param elem  the element name
8544
 * @param hprefix  resulting attribute prefix
8545
 * @param value  resulting value of the attribute
8546
 * @param len  resulting length of the attribute
8547
 * @param alloc  resulting indicator if the attribute was allocated
8548
 * @returns the attribute name, and the value in *value, .
8549
 */
8550
8551
static xmlHashedString
8552
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8553
                   const xmlChar * pref, const xmlChar * elem,
8554
                   xmlHashedString * hprefix, xmlChar ** value,
8555
                   int *len, int *alloc)
8556
7.95M
{
8557
7.95M
    xmlHashedString hname;
8558
7.95M
    const xmlChar *prefix, *name;
8559
7.95M
    xmlChar *val = NULL, *internal_val = NULL;
8560
7.95M
    int special = 0;
8561
7.95M
    int isNamespace;
8562
7.95M
    int flags;
8563
8564
7.95M
    *value = NULL;
8565
7.95M
    GROW;
8566
7.95M
    hname = xmlParseQNameHashed(ctxt, hprefix);
8567
7.95M
    if (hname.name == NULL) {
8568
2.61M
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8569
2.61M
                       "error parsing attribute name\n");
8570
2.61M
        return(hname);
8571
2.61M
    }
8572
5.34M
    name = hname.name;
8573
5.34M
    prefix = hprefix->name;
8574
8575
    /*
8576
     * get the type if needed
8577
     */
8578
5.34M
    if (ctxt->attsSpecial != NULL) {
8579
53.6k
        special = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8580
53.6k
                                              prefix, name));
8581
53.6k
    }
8582
8583
    /*
8584
     * read the value
8585
     */
8586
5.34M
    SKIP_BLANKS;
8587
5.34M
    if (RAW != '=') {
8588
97.5k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8589
97.5k
                          "Specification mandates value for attribute %s\n",
8590
97.5k
                          name);
8591
97.5k
        goto error;
8592
97.5k
    }
8593
8594
8595
5.24M
    NEXT;
8596
5.24M
    SKIP_BLANKS;
8597
5.24M
    flags = 0;
8598
5.24M
    isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8599
5.24M
                   (prefix == ctxt->str_xmlns));
8600
5.24M
    val = xmlParseAttValueInternal(ctxt, len, &flags, special,
8601
5.24M
                                   isNamespace);
8602
5.24M
    if (val == NULL)
8603
30.7k
        goto error;
8604
8605
5.21M
    *alloc = (flags & XML_ATTVAL_ALLOC) != 0;
8606
8607
#ifdef LIBXML_VALID_ENABLED
8608
    if ((ctxt->validate) &&
8609
        (ctxt->standalone) &&
8610
        (special & XML_SPECIAL_EXTERNAL) &&
8611
        (flags & XML_ATTVAL_NORM_CHANGE)) {
8612
        xmlValidityError(ctxt, XML_DTD_NOT_STANDALONE,
8613
                         "standalone: normalization of attribute %s on %s "
8614
                         "by external subset declaration\n",
8615
                         name, elem);
8616
    }
8617
#endif
8618
8619
5.21M
    if (prefix == ctxt->str_xml) {
8620
        /*
8621
         * Check that xml:lang conforms to the specification
8622
         * No more registered as an error, just generate a warning now
8623
         * since this was deprecated in XML second edition
8624
         */
8625
103k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8626
0
            internal_val = xmlStrndup(val, *len);
8627
0
            if (internal_val == NULL)
8628
0
                goto mem_error;
8629
0
            if (!xmlCheckLanguageID(internal_val)) {
8630
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8631
0
                              "Malformed value for xml:lang : %s\n",
8632
0
                              internal_val, NULL);
8633
0
            }
8634
0
        }
8635
8636
        /*
8637
         * Check that xml:space conforms to the specification
8638
         */
8639
103k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8640
984
            internal_val = xmlStrndup(val, *len);
8641
984
            if (internal_val == NULL)
8642
2
                goto mem_error;
8643
982
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8644
319
                *(ctxt->space) = 0;
8645
663
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8646
339
                *(ctxt->space) = 1;
8647
324
            else {
8648
324
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8649
324
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8650
324
                              internal_val, NULL);
8651
324
            }
8652
982
        }
8653
103k
        if (internal_val) {
8654
982
            xmlFree(internal_val);
8655
982
        }
8656
103k
    }
8657
8658
5.21M
    *value = val;
8659
5.21M
    return (hname);
8660
8661
2
mem_error:
8662
2
    xmlErrMemory(ctxt);
8663
128k
error:
8664
128k
    if ((val != NULL) && (*alloc != 0))
8665
1
        xmlFree(val);
8666
128k
    return(hname);
8667
2
}
8668
8669
/**
8670
 * Inserts a new attribute into the hash table.
8671
 *
8672
 * @param ctxt  parser context
8673
 * @param size  size of the hash table
8674
 * @param name  attribute name
8675
 * @param uri  namespace uri
8676
 * @param hashValue  combined hash value of name and uri
8677
 * @param aindex  attribute index (this is a multiple of 5)
8678
 * @returns INT_MAX if no existing attribute was found, the attribute
8679
 * index if an attribute was found, -1 if a memory allocation failed.
8680
 */
8681
static int
8682
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8683
1.15M
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8684
1.15M
    xmlAttrHashBucket *table = ctxt->attrHash;
8685
1.15M
    xmlAttrHashBucket *bucket;
8686
1.15M
    unsigned hindex;
8687
8688
1.15M
    hindex = hashValue & (size - 1);
8689
1.15M
    bucket = &table[hindex];
8690
8691
1.28M
    while (bucket->index >= 0) {
8692
156k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8693
8694
156k
        if (name == atts[0]) {
8695
28.3k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8696
8697
28.3k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8698
28.3k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8699
11.5k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8700
26.3k
                return(bucket->index);
8701
28.3k
        }
8702
8703
129k
        hindex++;
8704
129k
        bucket++;
8705
129k
        if (hindex >= size) {
8706
22.8k
            hindex = 0;
8707
22.8k
            bucket = table;
8708
22.8k
        }
8709
129k
    }
8710
8711
1.12M
    bucket->index = aindex;
8712
8713
1.12M
    return(INT_MAX);
8714
1.15M
}
8715
8716
static int
8717
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8718
                       const xmlChar *name, const xmlChar *prefix,
8719
2.62k
                       unsigned hashValue, int aindex) {
8720
2.62k
    xmlAttrHashBucket *table = ctxt->attrHash;
8721
2.62k
    xmlAttrHashBucket *bucket;
8722
2.62k
    unsigned hindex;
8723
8724
2.62k
    hindex = hashValue & (size - 1);
8725
2.62k
    bucket = &table[hindex];
8726
8727
4.23k
    while (bucket->index >= 0) {
8728
2.19k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8729
8730
2.19k
        if ((name == atts[0]) && (prefix == atts[1]))
8731
574
            return(bucket->index);
8732
8733
1.61k
        hindex++;
8734
1.61k
        bucket++;
8735
1.61k
        if (hindex >= size) {
8736
354
            hindex = 0;
8737
354
            bucket = table;
8738
354
        }
8739
1.61k
    }
8740
8741
2.04k
    bucket->index = aindex;
8742
8743
2.04k
    return(INT_MAX);
8744
2.62k
}
8745
/**
8746
 * Parse a start tag. Always consumes '<'.
8747
 *
8748
 * This routine is called when running SAX2 parsing
8749
 *
8750
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8751
 *
8752
 * [ WFC: Unique Att Spec ]
8753
 * No attribute name may appear more than once in the same start-tag or
8754
 * empty-element tag.
8755
 *
8756
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8757
 *
8758
 * [ WFC: Unique Att Spec ]
8759
 * No attribute name may appear more than once in the same start-tag or
8760
 * empty-element tag.
8761
 *
8762
 * With namespace:
8763
 *
8764
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8765
 *
8766
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8767
 *
8768
 * @param ctxt  an XML parser context
8769
 * @param pref  resulting namespace prefix
8770
 * @param URI  resulting namespace URI
8771
 * @param nbNsPtr  resulting number of namespace declarations
8772
 * @returns the element name parsed
8773
 */
8774
8775
static const xmlChar *
8776
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8777
8.28M
                  const xmlChar **URI, int *nbNsPtr) {
8778
8.28M
    xmlHashedString hlocalname;
8779
8.28M
    xmlHashedString hprefix;
8780
8.28M
    xmlHashedString hattname;
8781
8.28M
    xmlHashedString haprefix;
8782
8.28M
    const xmlChar *localname;
8783
8.28M
    const xmlChar *prefix;
8784
8.28M
    const xmlChar *attname;
8785
8.28M
    const xmlChar *aprefix;
8786
8.28M
    const xmlChar *uri;
8787
8.28M
    xmlChar *attvalue = NULL;
8788
8.28M
    const xmlChar **atts = ctxt->atts;
8789
8.28M
    unsigned attrHashSize = 0;
8790
8.28M
    int maxatts = ctxt->maxatts;
8791
8.28M
    int nratts, nbatts, nbdef;
8792
8.28M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8793
8.28M
    int alloc = 0;
8794
8.28M
    int numNsErr = 0;
8795
8.28M
    int numDupErr = 0;
8796
8797
8.28M
    if (RAW != '<') return(NULL);
8798
8.28M
    NEXT1;
8799
8800
8.28M
    nbatts = 0;
8801
8.28M
    nratts = 0;
8802
8.28M
    nbdef = 0;
8803
8.28M
    nbNs = 0;
8804
8.28M
    nbTotalDef = 0;
8805
8.28M
    attval = 0;
8806
8807
8.28M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8808
0
        xmlErrMemory(ctxt);
8809
0
        return(NULL);
8810
0
    }
8811
8812
8.28M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8813
8.28M
    if (hlocalname.name == NULL) {
8814
576k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8815
576k
           "StartTag: invalid element name\n");
8816
576k
        return(NULL);
8817
576k
    }
8818
7.71M
    localname = hlocalname.name;
8819
7.71M
    prefix = hprefix.name;
8820
8821
    /*
8822
     * Now parse the attributes, it ends up with the ending
8823
     *
8824
     * (S Attribute)* S?
8825
     */
8826
7.71M
    SKIP_BLANKS;
8827
7.71M
    GROW;
8828
8829
    /*
8830
     * The ctxt->atts array will be ultimately passed to the SAX callback
8831
     * containing five xmlChar pointers for each attribute:
8832
     *
8833
     * [0] attribute name
8834
     * [1] attribute prefix
8835
     * [2] namespace URI
8836
     * [3] attribute value
8837
     * [4] end of attribute value
8838
     *
8839
     * To save memory, we reuse this array temporarily and store integers
8840
     * in these pointer variables.
8841
     *
8842
     * [0] attribute name
8843
     * [1] attribute prefix
8844
     * [2] hash value of attribute prefix, and later namespace index
8845
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
8846
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
8847
     *
8848
     * The ctxt->attallocs array contains an additional unsigned int for
8849
     * each attribute, containing the hash value of the attribute name
8850
     * and the alloc flag in bit 31.
8851
     */
8852
8853
9.75M
    while (((RAW != '>') &&
8854
9.75M
     ((RAW != '/') || (NXT(1) != '>')) &&
8855
9.75M
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8856
7.95M
  int len = -1;
8857
8858
7.95M
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
8859
7.95M
                                          &haprefix, &attvalue, &len,
8860
7.95M
                                          &alloc);
8861
7.95M
        if (hattname.name == NULL)
8862
2.61M
      break;
8863
5.34M
        if (attvalue == NULL)
8864
128k
            goto next_attr;
8865
5.21M
        attname = hattname.name;
8866
5.21M
        aprefix = haprefix.name;
8867
5.21M
  if (len < 0) len = xmlStrlen(attvalue);
8868
8869
5.21M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8870
63.5k
            xmlHashedString huri;
8871
63.5k
            xmlURIPtr parsedUri;
8872
8873
63.5k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8874
63.5k
            uri = huri.name;
8875
63.5k
            if (uri == NULL) {
8876
2
                xmlErrMemory(ctxt);
8877
2
                goto next_attr;
8878
2
            }
8879
63.5k
            if (*uri != 0) {
8880
61.6k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8881
6
                    xmlErrMemory(ctxt);
8882
6
                    goto next_attr;
8883
6
                }
8884
61.6k
                if (parsedUri == NULL) {
8885
34.5k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8886
34.5k
                             "xmlns: '%s' is not a valid URI\n",
8887
34.5k
                                       uri, NULL, NULL);
8888
34.5k
                } else {
8889
27.1k
                    if (parsedUri->scheme == NULL) {
8890
21.6k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8891
21.6k
                                  "xmlns: URI %s is not absolute\n",
8892
21.6k
                                  uri, NULL, NULL);
8893
21.6k
                    }
8894
27.1k
                    xmlFreeURI(parsedUri);
8895
27.1k
                }
8896
61.6k
                if (uri == ctxt->str_xml_ns) {
8897
347
                    if (attname != ctxt->str_xml) {
8898
347
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8899
347
                     "xml namespace URI cannot be the default namespace\n",
8900
347
                                 NULL, NULL, NULL);
8901
347
                    }
8902
347
                    goto next_attr;
8903
347
                }
8904
61.3k
                if ((len == 29) &&
8905
61.3k
                    (xmlStrEqual(uri,
8906
578
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8907
229
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8908
229
                         "reuse of the xmlns namespace name is forbidden\n",
8909
229
                             NULL, NULL, NULL);
8910
229
                    goto next_attr;
8911
229
                }
8912
61.3k
            }
8913
8914
62.9k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
8915
62.1k
                nbNs++;
8916
5.14M
        } else if (aprefix == ctxt->str_xmlns) {
8917
877k
            xmlHashedString huri;
8918
877k
            xmlURIPtr parsedUri;
8919
8920
877k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8921
877k
            uri = huri.name;
8922
877k
            if (uri == NULL) {
8923
2
                xmlErrMemory(ctxt);
8924
2
                goto next_attr;
8925
2
            }
8926
8927
877k
            if (attname == ctxt->str_xml) {
8928
747
                if (uri != ctxt->str_xml_ns) {
8929
383
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8930
383
                             "xml namespace prefix mapped to wrong URI\n",
8931
383
                             NULL, NULL, NULL);
8932
383
                }
8933
                /*
8934
                 * Do not keep a namespace definition node
8935
                 */
8936
747
                goto next_attr;
8937
747
            }
8938
876k
            if (uri == ctxt->str_xml_ns) {
8939
281
                if (attname != ctxt->str_xml) {
8940
281
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8941
281
                             "xml namespace URI mapped to wrong prefix\n",
8942
281
                             NULL, NULL, NULL);
8943
281
                }
8944
281
                goto next_attr;
8945
281
            }
8946
876k
            if (attname == ctxt->str_xmlns) {
8947
306
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8948
306
                         "redefinition of the xmlns prefix is forbidden\n",
8949
306
                         NULL, NULL, NULL);
8950
306
                goto next_attr;
8951
306
            }
8952
875k
            if ((len == 29) &&
8953
875k
                (xmlStrEqual(uri,
8954
3.67k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8955
364
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8956
364
                         "reuse of the xmlns namespace name is forbidden\n",
8957
364
                         NULL, NULL, NULL);
8958
364
                goto next_attr;
8959
364
            }
8960
875k
            if ((uri == NULL) || (uri[0] == 0)) {
8961
6.39k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8962
6.39k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
8963
6.39k
                              attname, NULL, NULL);
8964
6.39k
                goto next_attr;
8965
869k
            } else {
8966
869k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8967
158
                    xmlErrMemory(ctxt);
8968
158
                    goto next_attr;
8969
158
                }
8970
869k
                if (parsedUri == NULL) {
8971
126k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8972
126k
                         "xmlns:%s: '%s' is not a valid URI\n",
8973
126k
                                       attname, uri, NULL);
8974
742k
                } else {
8975
742k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
8976
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8977
0
                                  "xmlns:%s: URI %s is not absolute\n",
8978
0
                                  attname, uri, NULL);
8979
0
                    }
8980
742k
                    xmlFreeURI(parsedUri);
8981
742k
                }
8982
869k
            }
8983
8984
869k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
8985
863k
                nbNs++;
8986
4.27M
        } else {
8987
            /*
8988
             * Populate attributes array, see above for repurposing
8989
             * of xmlChar pointers.
8990
             */
8991
4.27M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8992
878k
                int res = xmlCtxtGrowAttrs(ctxt);
8993
8994
878k
                maxatts = ctxt->maxatts;
8995
878k
                atts = ctxt->atts;
8996
8997
878k
                if (res < 0)
8998
96
                    goto next_attr;
8999
878k
            }
9000
4.27M
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9001
4.27M
                                        ((unsigned) alloc << 31);
9002
4.27M
            atts[nbatts++] = attname;
9003
4.27M
            atts[nbatts++] = aprefix;
9004
4.27M
            atts[nbatts++] = XML_INT_TO_PTR(haprefix.hashValue);
9005
4.27M
            if (alloc) {
9006
162k
                atts[nbatts++] = attvalue;
9007
162k
                attvalue += len;
9008
162k
                atts[nbatts++] = attvalue;
9009
4.10M
            } else {
9010
                /*
9011
                 * attvalue points into the input buffer which can be
9012
                 * reallocated. Store differences to input->base instead.
9013
                 * The pointers will be reconstructed later.
9014
                 */
9015
4.10M
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
9016
4.10M
                attvalue += len;
9017
4.10M
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
9018
4.10M
            }
9019
            /*
9020
             * tag if some deallocation is needed
9021
             */
9022
4.27M
            if (alloc != 0) attval = 1;
9023
4.27M
            attvalue = NULL; /* moved into atts */
9024
4.27M
        }
9025
9026
5.34M
next_attr:
9027
5.34M
        if ((attvalue != NULL) && (alloc != 0)) {
9028
135k
            xmlFree(attvalue);
9029
135k
            attvalue = NULL;
9030
135k
        }
9031
9032
5.34M
  GROW
9033
5.34M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9034
3.08M
      break;
9035
2.25M
  if (SKIP_BLANKS == 0) {
9036
214k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9037
214k
         "attributes construct error\n");
9038
214k
      break;
9039
214k
  }
9040
2.04M
        GROW;
9041
2.04M
    }
9042
9043
    /*
9044
     * Namespaces from default attributes
9045
     */
9046
7.71M
    if (ctxt->attsDefault != NULL) {
9047
111k
        xmlDefAttrsPtr defaults;
9048
9049
111k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9050
111k
  if (defaults != NULL) {
9051
311k
      for (i = 0; i < defaults->nbAttrs; i++) {
9052
252k
                xmlDefAttr *attr = &defaults->attrs[i];
9053
9054
252k
          attname = attr->name.name;
9055
252k
    aprefix = attr->prefix.name;
9056
9057
252k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9058
17.9k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9059
9060
17.9k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9061
16.6k
                        nbNs++;
9062
234k
    } else if (aprefix == ctxt->str_xmlns) {
9063
80.0k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9064
9065
80.0k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9066
80.0k
                                      NULL, 1) > 0)
9067
79.4k
                        nbNs++;
9068
154k
    } else {
9069
154k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9070
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9071
0
                                    "Maximum number of attributes exceeded");
9072
0
                        break;
9073
0
                    }
9074
154k
                    nbTotalDef += 1;
9075
154k
                }
9076
252k
      }
9077
58.9k
  }
9078
111k
    }
9079
9080
    /*
9081
     * Resolve attribute namespaces
9082
     */
9083
11.9M
    for (i = 0; i < nbatts; i += 5) {
9084
4.27M
        attname = atts[i];
9085
4.27M
        aprefix = atts[i+1];
9086
9087
        /*
9088
  * The default namespace does not apply to attribute names.
9089
  */
9090
4.27M
  if (aprefix == NULL) {
9091
4.10M
            nsIndex = NS_INDEX_EMPTY;
9092
4.10M
        } else if (aprefix == ctxt->str_xml) {
9093
103k
            nsIndex = NS_INDEX_XML;
9094
103k
        } else {
9095
62.0k
            haprefix.name = aprefix;
9096
62.0k
            haprefix.hashValue = (size_t) atts[i+2];
9097
62.0k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9098
9099
62.0k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9100
45.1k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9101
45.1k
        "Namespace prefix %s for %s on %s is not defined\n",
9102
45.1k
        aprefix, attname, localname);
9103
45.1k
                nsIndex = NS_INDEX_EMPTY;
9104
45.1k
            }
9105
62.0k
        }
9106
9107
4.27M
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9108
4.27M
    }
9109
9110
    /*
9111
     * Maximum number of attributes including default attributes.
9112
     */
9113
7.71M
    maxAtts = nratts + nbTotalDef;
9114
9115
    /*
9116
     * Verify that attribute names are unique.
9117
     */
9118
7.71M
    if (maxAtts > 1) {
9119
479k
        attrHashSize = 4;
9120
648k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9121
168k
            attrHashSize *= 2;
9122
9123
479k
        if (attrHashSize > ctxt->attrHashMax) {
9124
194k
            xmlAttrHashBucket *tmp;
9125
9126
194k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9127
194k
            if (tmp == NULL) {
9128
36
                xmlErrMemory(ctxt);
9129
36
                goto done;
9130
36
            }
9131
9132
194k
            ctxt->attrHash = tmp;
9133
194k
            ctxt->attrHashMax = attrHashSize;
9134
194k
        }
9135
9136
479k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9137
9138
1.54M
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9139
1.06M
            const xmlChar *nsuri;
9140
1.06M
            unsigned hashValue, nameHashValue, uriHashValue;
9141
1.06M
            int res;
9142
9143
1.06M
            attname = atts[i];
9144
1.06M
            aprefix = atts[i+1];
9145
1.06M
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9146
            /* Hash values always have bit 31 set, see dict.c */
9147
1.06M
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9148
9149
1.06M
            if (nsIndex == NS_INDEX_EMPTY) {
9150
                /*
9151
                 * Prefix with empty namespace means an undeclared
9152
                 * prefix which was already reported above.
9153
                 */
9154
948k
                if (aprefix != NULL)
9155
40.4k
                    continue;
9156
907k
                nsuri = NULL;
9157
907k
                uriHashValue = URI_HASH_EMPTY;
9158
907k
            } else if (nsIndex == NS_INDEX_XML) {
9159
101k
                nsuri = ctxt->str_xml_ns;
9160
101k
                uriHashValue = URI_HASH_XML;
9161
101k
            } else {
9162
15.9k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9163
15.9k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9164
15.9k
            }
9165
9166
1.02M
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9167
1.02M
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9168
1.02M
                                    hashValue, i);
9169
1.02M
            if (res < 0)
9170
0
                continue;
9171
9172
            /*
9173
             * [ WFC: Unique Att Spec ]
9174
             * No attribute name may appear more than once in the same
9175
             * start-tag or empty-element tag.
9176
             * As extended by the Namespace in XML REC.
9177
             */
9178
1.02M
            if (res < INT_MAX) {
9179
11.4k
                if (aprefix == atts[res+1]) {
9180
10.1k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9181
10.1k
                    numDupErr += 1;
9182
10.1k
                } else {
9183
1.26k
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9184
1.26k
                             "Namespaced Attribute %s in '%s' redefined\n",
9185
1.26k
                             attname, nsuri, NULL);
9186
1.26k
                    numNsErr += 1;
9187
1.26k
                }
9188
11.4k
            }
9189
1.02M
        }
9190
479k
    }
9191
9192
    /*
9193
     * Default attributes
9194
     */
9195
7.71M
    if (ctxt->attsDefault != NULL) {
9196
111k
        xmlDefAttrsPtr defaults;
9197
9198
111k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9199
111k
  if (defaults != NULL) {
9200
311k
      for (i = 0; i < defaults->nbAttrs; i++) {
9201
252k
                xmlDefAttr *attr = &defaults->attrs[i];
9202
252k
                const xmlChar *nsuri = NULL;
9203
252k
                unsigned hashValue, uriHashValue = 0;
9204
252k
                int res;
9205
9206
252k
          attname = attr->name.name;
9207
252k
    aprefix = attr->prefix.name;
9208
9209
252k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9210
17.9k
                    continue;
9211
234k
    if (aprefix == ctxt->str_xmlns)
9212
79.9k
                    continue;
9213
9214
154k
                if (aprefix == NULL) {
9215
67.8k
                    nsIndex = NS_INDEX_EMPTY;
9216
67.8k
                    nsuri = NULL;
9217
67.8k
                    uriHashValue = URI_HASH_EMPTY;
9218
87.0k
                } else if (aprefix == ctxt->str_xml) {
9219
22.8k
                    nsIndex = NS_INDEX_XML;
9220
22.8k
                    nsuri = ctxt->str_xml_ns;
9221
22.8k
                    uriHashValue = URI_HASH_XML;
9222
64.2k
                } else {
9223
64.2k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9224
64.2k
                    if ((nsIndex == INT_MAX) ||
9225
64.2k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9226
61.0k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9227
61.0k
                                 "Namespace prefix %s for %s on %s is not "
9228
61.0k
                                 "defined\n",
9229
61.0k
                                 aprefix, attname, localname);
9230
61.0k
                        nsIndex = NS_INDEX_EMPTY;
9231
61.0k
                        nsuri = NULL;
9232
61.0k
                        uriHashValue = URI_HASH_EMPTY;
9233
61.0k
                    } else {
9234
3.18k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9235
3.18k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9236
3.18k
                    }
9237
64.2k
                }
9238
9239
                /*
9240
                 * Check whether the attribute exists
9241
                 */
9242
154k
                if (maxAtts > 1) {
9243
129k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9244
129k
                                                   uriHashValue);
9245
129k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9246
129k
                                            hashValue, nbatts);
9247
129k
                    if (res < 0)
9248
0
                        continue;
9249
129k
                    if (res < INT_MAX) {
9250
14.8k
                        if (aprefix == atts[res+1])
9251
3.86k
                            continue;
9252
10.9k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9253
10.9k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9254
10.9k
                                 attname, nsuri, NULL);
9255
10.9k
                    }
9256
129k
                }
9257
9258
151k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9259
9260
151k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9261
9.24k
                    res = xmlCtxtGrowAttrs(ctxt);
9262
9263
9.24k
                    maxatts = ctxt->maxatts;
9264
9.24k
                    atts = ctxt->atts;
9265
9266
9.24k
                    if (res < 0) {
9267
9
                        localname = NULL;
9268
9
                        goto done;
9269
9
                    }
9270
9.24k
                }
9271
9272
151k
                atts[nbatts++] = attname;
9273
151k
                atts[nbatts++] = aprefix;
9274
151k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9275
151k
                atts[nbatts++] = attr->value.name;
9276
151k
                atts[nbatts++] = attr->valueEnd;
9277
9278
#ifdef LIBXML_VALID_ENABLED
9279
                /*
9280
                 * This should be moved to valid.c, but we don't keep track
9281
                 * whether an attribute was defaulted.
9282
                 */
9283
                if ((ctxt->validate) &&
9284
                    (ctxt->standalone == 1) &&
9285
                    (attr->external != 0)) {
9286
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9287
                            "standalone: attribute %s on %s defaulted "
9288
                            "from external subset\n",
9289
                            attname, localname);
9290
                }
9291
#endif
9292
151k
                nbdef++;
9293
151k
      }
9294
58.9k
  }
9295
111k
    }
9296
9297
    /*
9298
     * Using a single hash table for nsUri/localName pairs cannot
9299
     * detect duplicate QNames reliably. The following example will
9300
     * only result in two namespace errors.
9301
     *
9302
     * <doc xmlns:a="a" xmlns:b="a">
9303
     *   <elem a:a="" b:a="" b:a=""/>
9304
     * </doc>
9305
     *
9306
     * If we saw more than one namespace error but no duplicate QNames
9307
     * were found, we have to scan for duplicate QNames.
9308
     */
9309
7.71M
    if ((numDupErr == 0) && (numNsErr > 1)) {
9310
538
        memset(ctxt->attrHash, -1,
9311
538
               attrHashSize * sizeof(ctxt->attrHash[0]));
9312
9313
4.41k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9314
3.87k
            unsigned hashValue, nameHashValue, prefixHashValue;
9315
3.87k
            int res;
9316
9317
3.87k
            aprefix = atts[i+1];
9318
3.87k
            if (aprefix == NULL)
9319
1.25k
                continue;
9320
9321
2.62k
            attname = atts[i];
9322
            /* Hash values always have bit 31 set, see dict.c */
9323
2.62k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9324
2.62k
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9325
9326
2.62k
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9327
2.62k
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9328
2.62k
                                         aprefix, hashValue, i);
9329
2.62k
            if (res < INT_MAX)
9330
574
                xmlErrAttributeDup(ctxt, aprefix, attname);
9331
2.62k
        }
9332
538
    }
9333
9334
    /*
9335
     * Reconstruct attribute pointers
9336
     */
9337
12.1M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9338
        /* namespace URI */
9339
4.42M
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9340
4.42M
        if (nsIndex == INT_MAX)
9341
4.27M
            atts[i+2] = NULL;
9342
146k
        else if (nsIndex == INT_MAX - 1)
9343
126k
            atts[i+2] = ctxt->str_xml_ns;
9344
20.0k
        else
9345
20.0k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9346
9347
4.42M
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9348
4.10M
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9349
4.10M
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9350
4.10M
        }
9351
4.42M
    }
9352
9353
7.71M
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9354
7.71M
    if ((prefix != NULL) && (uri == NULL)) {
9355
389k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9356
389k
           "Namespace prefix %s on %s is not defined\n",
9357
389k
     prefix, localname, NULL);
9358
389k
    }
9359
7.71M
    *pref = prefix;
9360
7.71M
    *URI = uri;
9361
9362
    /*
9363
     * SAX callback
9364
     */
9365
7.71M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9366
7.71M
  (!ctxt->disableSAX)) {
9367
6.55M
  if (nbNs > 0)
9368
625k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9369
625k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9370
625k
        nbatts / 5, nbdef, atts);
9371
5.92M
  else
9372
5.92M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9373
5.92M
                          0, NULL, nbatts / 5, nbdef, atts);
9374
6.55M
    }
9375
9376
7.71M
done:
9377
    /*
9378
     * Free allocated attribute values
9379
     */
9380
7.71M
    if (attval != 0) {
9381
381k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9382
231k
      if (ctxt->attallocs[j] & 0x80000000)
9383
162k
          xmlFree((xmlChar *) atts[i+3]);
9384
150k
    }
9385
9386
7.71M
    *nbNsPtr = nbNs;
9387
7.71M
    return(localname);
9388
7.71M
}
9389
9390
/**
9391
 * Parse an end tag. Always consumes '</'.
9392
 *
9393
 *     [42] ETag ::= '</' Name S? '>'
9394
 *
9395
 * With namespace
9396
 *
9397
 *     [NS 9] ETag ::= '</' QName S? '>'
9398
 * @param ctxt  an XML parser context
9399
 * @param tag  the corresponding start tag
9400
 */
9401
9402
static void
9403
1.70M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9404
1.70M
    const xmlChar *name;
9405
9406
1.70M
    GROW;
9407
1.70M
    if ((RAW != '<') || (NXT(1) != '/')) {
9408
2.91k
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9409
2.91k
  return;
9410
2.91k
    }
9411
1.70M
    SKIP(2);
9412
9413
1.70M
    if (tag->prefix == NULL)
9414
510k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9415
1.19M
    else
9416
1.19M
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9417
9418
    /*
9419
     * We should definitely be at the ending "S? '>'" part
9420
     */
9421
1.70M
    GROW;
9422
1.70M
    SKIP_BLANKS;
9423
1.70M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9424
114k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9425
114k
    } else
9426
1.58M
  NEXT1;
9427
9428
    /*
9429
     * [ WFC: Element Type Match ]
9430
     * The Name in an element's end-tag must match the element type in the
9431
     * start-tag.
9432
     *
9433
     */
9434
1.70M
    if (name != (xmlChar*)1) {
9435
178k
        if (name == NULL) name = BAD_CAST "unparsable";
9436
178k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9437
178k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9438
178k
                    ctxt->name, tag->line, name);
9439
178k
    }
9440
9441
    /*
9442
     * SAX: End of Tag
9443
     */
9444
1.70M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9445
1.70M
  (!ctxt->disableSAX))
9446
1.33M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9447
1.33M
                                tag->URI);
9448
9449
1.70M
    spacePop(ctxt);
9450
1.70M
    if (tag->nsNr != 0)
9451
601k
  xmlParserNsPop(ctxt, tag->nsNr);
9452
1.70M
}
9453
9454
/**
9455
 * Parse escaped pure raw content. Always consumes '<!['.
9456
 *
9457
 * @deprecated Internal function, don't use.
9458
 *
9459
 *     [18] CDSect ::= CDStart CData CDEnd
9460
 *
9461
 *     [19] CDStart ::= '<![CDATA['
9462
 *
9463
 *     [20] Data ::= (Char* - (Char* ']]>' Char*))
9464
 *
9465
 *     [21] CDEnd ::= ']]>'
9466
 * @param ctxt  an XML parser context
9467
 */
9468
void
9469
85.9k
xmlParseCDSect(xmlParserCtxt *ctxt) {
9470
85.9k
    xmlChar *buf = NULL;
9471
85.9k
    int len = 0;
9472
85.9k
    int size = XML_PARSER_BUFFER_SIZE;
9473
85.9k
    int r, rl;
9474
85.9k
    int s, sl;
9475
85.9k
    int cur, l;
9476
85.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9477
0
                    XML_MAX_HUGE_LENGTH :
9478
85.9k
                    XML_MAX_TEXT_LENGTH;
9479
9480
85.9k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9481
0
        return;
9482
85.9k
    SKIP(3);
9483
9484
85.9k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9485
0
        return;
9486
85.9k
    SKIP(6);
9487
9488
85.9k
    r = xmlCurrentCharRecover(ctxt, &rl);
9489
85.9k
    if (!IS_CHAR(r)) {
9490
2.84k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9491
2.84k
        goto out;
9492
2.84k
    }
9493
83.0k
    NEXTL(rl);
9494
83.0k
    s = xmlCurrentCharRecover(ctxt, &sl);
9495
83.0k
    if (!IS_CHAR(s)) {
9496
10.1k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9497
10.1k
        goto out;
9498
10.1k
    }
9499
72.9k
    NEXTL(sl);
9500
72.9k
    cur = xmlCurrentCharRecover(ctxt, &l);
9501
72.9k
    buf = xmlMalloc(size);
9502
72.9k
    if (buf == NULL) {
9503
3
  xmlErrMemory(ctxt);
9504
3
        goto out;
9505
3
    }
9506
44.4M
    while (IS_CHAR(cur) &&
9507
44.4M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9508
44.3M
  if (len + 5 >= size) {
9509
7.53k
      xmlChar *tmp;
9510
7.53k
            int newSize;
9511
9512
7.53k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9513
7.53k
            if (newSize < 0) {
9514
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9515
0
                               "CData section too big found\n");
9516
0
                goto out;
9517
0
            }
9518
7.53k
      tmp = xmlRealloc(buf, newSize);
9519
7.53k
      if (tmp == NULL) {
9520
1
    xmlErrMemory(ctxt);
9521
1
                goto out;
9522
1
      }
9523
7.53k
      buf = tmp;
9524
7.53k
      size = newSize;
9525
7.53k
  }
9526
44.3M
  COPY_BUF(buf, len, r);
9527
44.3M
  r = s;
9528
44.3M
  rl = sl;
9529
44.3M
  s = cur;
9530
44.3M
  sl = l;
9531
44.3M
  NEXTL(l);
9532
44.3M
  cur = xmlCurrentCharRecover(ctxt, &l);
9533
44.3M
    }
9534
72.8k
    buf[len] = 0;
9535
72.8k
    if (cur != '>') {
9536
5.59k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9537
5.59k
                       "CData section not finished\n%.50s\n", buf);
9538
5.59k
        goto out;
9539
5.59k
    }
9540
67.3k
    NEXTL(l);
9541
9542
    /*
9543
     * OK the buffer is to be consumed as cdata.
9544
     */
9545
67.3k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9546
65.7k
        if ((ctxt->sax->cdataBlock != NULL) &&
9547
65.7k
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9548
3.11k
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9549
62.5k
        } else if (ctxt->sax->characters != NULL) {
9550
62.5k
            ctxt->sax->characters(ctxt->userData, buf, len);
9551
62.5k
        }
9552
65.7k
    }
9553
9554
85.9k
out:
9555
85.9k
    xmlFree(buf);
9556
85.9k
}
9557
9558
/**
9559
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9560
 * unexpected EOF to the caller.
9561
 *
9562
 * @param ctxt  an XML parser context
9563
 */
9564
9565
static void
9566
662k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9567
662k
    int oldNameNr = ctxt->nameNr;
9568
662k
    int oldSpaceNr = ctxt->spaceNr;
9569
662k
    int oldNodeNr = ctxt->nodeNr;
9570
9571
662k
    GROW;
9572
24.0M
    while ((ctxt->input->cur < ctxt->input->end) &&
9573
24.0M
     (PARSER_STOPPED(ctxt) == 0)) {
9574
23.9M
  const xmlChar *cur = ctxt->input->cur;
9575
9576
  /*
9577
   * First case : a Processing Instruction.
9578
   */
9579
23.9M
  if ((*cur == '<') && (cur[1] == '?')) {
9580
60.9k
      xmlParsePI(ctxt);
9581
60.9k
  }
9582
9583
  /*
9584
   * Second case : a CDSection
9585
   */
9586
  /* 2.6.0 test was *cur not RAW */
9587
23.9M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9588
85.9k
      xmlParseCDSect(ctxt);
9589
85.9k
  }
9590
9591
  /*
9592
   * Third case :  a comment
9593
   */
9594
23.8M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9595
23.8M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9596
148k
      xmlParseComment(ctxt);
9597
148k
  }
9598
9599
  /*
9600
   * Fourth case :  a sub-element.
9601
   */
9602
23.6M
  else if (*cur == '<') {
9603
9.29M
            if (NXT(1) == '/') {
9604
1.70M
                if (ctxt->nameNr <= oldNameNr)
9605
606k
                    break;
9606
1.09M
          xmlParseElementEnd(ctxt);
9607
7.59M
            } else {
9608
7.59M
          xmlParseElementStart(ctxt);
9609
7.59M
            }
9610
9.29M
  }
9611
9612
  /*
9613
   * Fifth case : a reference. If if has not been resolved,
9614
   *    parsing returns it's Name, create the node
9615
   */
9616
9617
14.3M
  else if (*cur == '&') {
9618
530k
      xmlParseReference(ctxt);
9619
530k
  }
9620
9621
  /*
9622
   * Last case, text. Note that References are handled directly.
9623
   */
9624
13.8M
  else {
9625
13.8M
      xmlParseCharDataInternal(ctxt, 0);
9626
13.8M
  }
9627
9628
23.3M
  SHRINK;
9629
23.3M
  GROW;
9630
23.3M
    }
9631
9632
662k
    if ((ctxt->nameNr > oldNameNr) &&
9633
662k
        (ctxt->input->cur >= ctxt->input->end) &&
9634
662k
        (ctxt->wellFormed)) {
9635
583
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9636
583
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9637
583
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9638
583
                "Premature end of data in tag %s line %d\n",
9639
583
                name, line, NULL);
9640
583
    }
9641
9642
    /*
9643
     * Clean up in error case
9644
     */
9645
9646
844k
    while (ctxt->nodeNr > oldNodeNr)
9647
181k
        nodePop(ctxt);
9648
9649
1.10M
    while (ctxt->nameNr > oldNameNr) {
9650
443k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9651
9652
443k
        if (tag->nsNr != 0)
9653
44.2k
            xmlParserNsPop(ctxt, tag->nsNr);
9654
9655
443k
        namePop(ctxt);
9656
443k
    }
9657
9658
1.10M
    while (ctxt->spaceNr > oldSpaceNr)
9659
443k
        spacePop(ctxt);
9660
662k
}
9661
9662
/**
9663
 * Parse XML element content. This is useful if you're only interested
9664
 * in custom SAX callbacks. If you want a node list, use
9665
 * #xmlCtxtParseContent.
9666
 *
9667
 * @param ctxt  an XML parser context
9668
 */
9669
void
9670
0
xmlParseContent(xmlParserCtxt *ctxt) {
9671
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9672
0
        return;
9673
9674
0
    xmlCtxtInitializeLate(ctxt);
9675
9676
0
    xmlParseContentInternal(ctxt);
9677
9678
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9679
0
}
9680
9681
/**
9682
 * Parse an XML element
9683
 *
9684
 * @deprecated Internal function, don't use.
9685
 *
9686
 *     [39] element ::= EmptyElemTag | STag content ETag
9687
 *
9688
 * [ WFC: Element Type Match ]
9689
 * The Name in an element's end-tag must match the element type in the
9690
 * start-tag.
9691
 *
9692
 * @param ctxt  an XML parser context
9693
 */
9694
9695
void
9696
698k
xmlParseElement(xmlParserCtxt *ctxt) {
9697
698k
    if (xmlParseElementStart(ctxt) != 0)
9698
45.0k
        return;
9699
9700
653k
    xmlParseContentInternal(ctxt);
9701
9702
653k
    if (ctxt->input->cur >= ctxt->input->end) {
9703
43.5k
        if (ctxt->wellFormed) {
9704
566
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9705
566
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9706
566
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9707
566
                    "Premature end of data in tag %s line %d\n",
9708
566
                    name, line, NULL);
9709
566
        }
9710
43.5k
        return;
9711
43.5k
    }
9712
9713
609k
    xmlParseElementEnd(ctxt);
9714
609k
}
9715
9716
/**
9717
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9718
 * opening tag was parsed, 1 if an empty element was parsed.
9719
 *
9720
 * Always consumes '<'.
9721
 *
9722
 * @param ctxt  an XML parser context
9723
 */
9724
static int
9725
8.28M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9726
8.28M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9727
8.28M
    const xmlChar *name;
9728
8.28M
    const xmlChar *prefix = NULL;
9729
8.28M
    const xmlChar *URI = NULL;
9730
8.28M
    xmlParserNodeInfo node_info;
9731
8.28M
    int line;
9732
8.28M
    xmlNodePtr cur;
9733
8.28M
    int nbNs = 0;
9734
9735
8.28M
    if (ctxt->nameNr > maxDepth) {
9736
370
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9737
370
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9738
370
                ctxt->nameNr);
9739
370
  xmlHaltParser(ctxt);
9740
370
  return(-1);
9741
370
    }
9742
9743
    /* Capture start position */
9744
8.28M
    if (ctxt->record_info) {
9745
0
        node_info.begin_pos = ctxt->input->consumed +
9746
0
                          (CUR_PTR - ctxt->input->base);
9747
0
  node_info.begin_line = ctxt->input->line;
9748
0
    }
9749
9750
8.28M
    if (ctxt->spaceNr == 0)
9751
0
  spacePush(ctxt, -1);
9752
8.28M
    else if (*ctxt->space == -2)
9753
0
  spacePush(ctxt, -1);
9754
8.28M
    else
9755
8.28M
  spacePush(ctxt, *ctxt->space);
9756
9757
8.28M
    line = ctxt->input->line;
9758
#ifdef LIBXML_SAX1_ENABLED
9759
    if (ctxt->sax2)
9760
#endif /* LIBXML_SAX1_ENABLED */
9761
8.28M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9762
#ifdef LIBXML_SAX1_ENABLED
9763
    else
9764
  name = xmlParseStartTag(ctxt);
9765
#endif /* LIBXML_SAX1_ENABLED */
9766
8.28M
    if (name == NULL) {
9767
576k
  spacePop(ctxt);
9768
576k
        return(-1);
9769
576k
    }
9770
7.71M
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9771
7.71M
    cur = ctxt->node;
9772
9773
#ifdef LIBXML_VALID_ENABLED
9774
    /*
9775
     * [ VC: Root Element Type ]
9776
     * The Name in the document type declaration must match the element
9777
     * type of the root element.
9778
     */
9779
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9780
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9781
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9782
#endif /* LIBXML_VALID_ENABLED */
9783
9784
    /*
9785
     * Check for an Empty Element.
9786
     */
9787
7.71M
    if ((RAW == '/') && (NXT(1) == '>')) {
9788
2.66M
        SKIP(2);
9789
2.66M
  if (ctxt->sax2) {
9790
2.66M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9791
2.66M
    (!ctxt->disableSAX))
9792
2.50M
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9793
#ifdef LIBXML_SAX1_ENABLED
9794
  } else {
9795
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9796
    (!ctxt->disableSAX))
9797
    ctxt->sax->endElement(ctxt->userData, name);
9798
#endif /* LIBXML_SAX1_ENABLED */
9799
2.66M
  }
9800
2.66M
  namePop(ctxt);
9801
2.66M
  spacePop(ctxt);
9802
2.66M
  if (nbNs > 0)
9803
14.1k
      xmlParserNsPop(ctxt, nbNs);
9804
2.66M
  if (cur != NULL && ctxt->record_info) {
9805
0
            node_info.node = cur;
9806
0
            node_info.end_pos = ctxt->input->consumed +
9807
0
                                (CUR_PTR - ctxt->input->base);
9808
0
            node_info.end_line = ctxt->input->line;
9809
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9810
0
  }
9811
2.66M
  return(1);
9812
2.66M
    }
9813
5.05M
    if (RAW == '>') {
9814
2.19M
        NEXT1;
9815
2.19M
        if (cur != NULL && ctxt->record_info) {
9816
0
            node_info.node = cur;
9817
0
            node_info.end_pos = 0;
9818
0
            node_info.end_line = 0;
9819
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9820
0
        }
9821
2.85M
    } else {
9822
2.85M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9823
2.85M
         "Couldn't find end of Start Tag %s line %d\n",
9824
2.85M
                    name, line, NULL);
9825
9826
  /*
9827
   * end of parsing of this node.
9828
   */
9829
2.85M
  nodePop(ctxt);
9830
2.85M
  namePop(ctxt);
9831
2.85M
  spacePop(ctxt);
9832
2.85M
  if (nbNs > 0)
9833
79.2k
      xmlParserNsPop(ctxt, nbNs);
9834
2.85M
  return(-1);
9835
2.85M
    }
9836
9837
2.19M
    return(0);
9838
5.05M
}
9839
9840
/**
9841
 * Parse the end of an XML element. Always consumes '</'.
9842
 *
9843
 * @param ctxt  an XML parser context
9844
 */
9845
static void
9846
1.70M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9847
1.70M
    xmlNodePtr cur = ctxt->node;
9848
9849
1.70M
    if (ctxt->nameNr <= 0) {
9850
731
        if ((RAW == '<') && (NXT(1) == '/'))
9851
3
            SKIP(2);
9852
731
        return;
9853
731
    }
9854
9855
    /*
9856
     * parse the end of tag: '</' should be here.
9857
     */
9858
1.70M
    if (ctxt->sax2) {
9859
1.70M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9860
1.70M
  namePop(ctxt);
9861
1.70M
    }
9862
#ifdef LIBXML_SAX1_ENABLED
9863
    else
9864
  xmlParseEndTag1(ctxt, 0);
9865
#endif /* LIBXML_SAX1_ENABLED */
9866
9867
    /*
9868
     * Capture end position
9869
     */
9870
1.70M
    if (cur != NULL && ctxt->record_info) {
9871
0
        xmlParserNodeInfoPtr node_info;
9872
9873
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9874
0
        if (node_info != NULL) {
9875
0
            node_info->end_pos = ctxt->input->consumed +
9876
0
                                 (CUR_PTR - ctxt->input->base);
9877
0
            node_info->end_line = ctxt->input->line;
9878
0
        }
9879
0
    }
9880
1.70M
}
9881
9882
/**
9883
 * Parse the XML version value.
9884
 *
9885
 * @deprecated Internal function, don't use.
9886
 *
9887
 *     [26] VersionNum ::= '1.' [0-9]+
9888
 *
9889
 * In practice allow [0-9].[0-9]+ at that level
9890
 *
9891
 * @param ctxt  an XML parser context
9892
 * @returns the string giving the XML version number, or NULL
9893
 */
9894
xmlChar *
9895
174k
xmlParseVersionNum(xmlParserCtxt *ctxt) {
9896
174k
    xmlChar *buf = NULL;
9897
174k
    int len = 0;
9898
174k
    int size = 10;
9899
174k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9900
0
                    XML_MAX_TEXT_LENGTH :
9901
174k
                    XML_MAX_NAME_LENGTH;
9902
174k
    xmlChar cur;
9903
9904
174k
    buf = xmlMalloc(size);
9905
174k
    if (buf == NULL) {
9906
11
  xmlErrMemory(ctxt);
9907
11
  return(NULL);
9908
11
    }
9909
174k
    cur = CUR;
9910
174k
    if (!((cur >= '0') && (cur <= '9'))) {
9911
1.90k
  xmlFree(buf);
9912
1.90k
  return(NULL);
9913
1.90k
    }
9914
172k
    buf[len++] = cur;
9915
172k
    NEXT;
9916
172k
    cur=CUR;
9917
172k
    if (cur != '.') {
9918
3.44k
  xmlFree(buf);
9919
3.44k
  return(NULL);
9920
3.44k
    }
9921
168k
    buf[len++] = cur;
9922
168k
    NEXT;
9923
168k
    cur=CUR;
9924
327k
    while ((cur >= '0') && (cur <= '9')) {
9925
158k
  if (len + 1 >= size) {
9926
3.05k
      xmlChar *tmp;
9927
3.05k
            int newSize;
9928
9929
3.05k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9930
3.05k
            if (newSize < 0) {
9931
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
9932
0
                xmlFree(buf);
9933
0
                return(NULL);
9934
0
            }
9935
3.05k
      tmp = xmlRealloc(buf, newSize);
9936
3.05k
      if (tmp == NULL) {
9937
1
    xmlErrMemory(ctxt);
9938
1
          xmlFree(buf);
9939
1
    return(NULL);
9940
1
      }
9941
3.05k
      buf = tmp;
9942
3.05k
            size = newSize;
9943
3.05k
  }
9944
158k
  buf[len++] = cur;
9945
158k
  NEXT;
9946
158k
  cur=CUR;
9947
158k
    }
9948
168k
    buf[len] = 0;
9949
168k
    return(buf);
9950
168k
}
9951
9952
/**
9953
 * Parse the XML version.
9954
 *
9955
 * @deprecated Internal function, don't use.
9956
 *
9957
 *     [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9958
 *
9959
 *     [25] Eq ::= S? '=' S?
9960
 *
9961
 * @param ctxt  an XML parser context
9962
 * @returns the version string, e.g. "1.0"
9963
 */
9964
9965
xmlChar *
9966
199k
xmlParseVersionInfo(xmlParserCtxt *ctxt) {
9967
199k
    xmlChar *version = NULL;
9968
9969
199k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9970
177k
  SKIP(7);
9971
177k
  SKIP_BLANKS;
9972
177k
  if (RAW != '=') {
9973
1.82k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9974
1.82k
      return(NULL);
9975
1.82k
        }
9976
175k
  NEXT;
9977
175k
  SKIP_BLANKS;
9978
175k
  if (RAW == '"') {
9979
168k
      NEXT;
9980
168k
      version = xmlParseVersionNum(ctxt);
9981
168k
      if (RAW != '"') {
9982
5.19k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9983
5.19k
      } else
9984
163k
          NEXT;
9985
168k
  } else if (RAW == '\''){
9986
5.34k
      NEXT;
9987
5.34k
      version = xmlParseVersionNum(ctxt);
9988
5.34k
      if (RAW != '\'') {
9989
1.20k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9990
1.20k
      } else
9991
4.14k
          NEXT;
9992
5.34k
  } else {
9993
1.36k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9994
1.36k
  }
9995
175k
    }
9996
197k
    return(version);
9997
199k
}
9998
9999
/**
10000
 * Parse the XML encoding name
10001
 *
10002
 * @deprecated Internal function, don't use.
10003
 *
10004
 *     [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10005
 *
10006
 * @param ctxt  an XML parser context
10007
 * @returns the encoding name value or NULL
10008
 */
10009
xmlChar *
10010
136k
xmlParseEncName(xmlParserCtxt *ctxt) {
10011
136k
    xmlChar *buf = NULL;
10012
136k
    int len = 0;
10013
136k
    int size = 10;
10014
136k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10015
0
                    XML_MAX_TEXT_LENGTH :
10016
136k
                    XML_MAX_NAME_LENGTH;
10017
136k
    xmlChar cur;
10018
10019
136k
    cur = CUR;
10020
136k
    if (((cur >= 'a') && (cur <= 'z')) ||
10021
136k
        ((cur >= 'A') && (cur <= 'Z'))) {
10022
134k
  buf = xmlMalloc(size);
10023
134k
  if (buf == NULL) {
10024
13
      xmlErrMemory(ctxt);
10025
13
      return(NULL);
10026
13
  }
10027
10028
134k
  buf[len++] = cur;
10029
134k
  NEXT;
10030
134k
  cur = CUR;
10031
21.8M
  while (((cur >= 'a') && (cur <= 'z')) ||
10032
21.8M
         ((cur >= 'A') && (cur <= 'Z')) ||
10033
21.8M
         ((cur >= '0') && (cur <= '9')) ||
10034
21.8M
         (cur == '.') || (cur == '_') ||
10035
21.8M
         (cur == '-')) {
10036
21.7M
      if (len + 1 >= size) {
10037
90.3k
          xmlChar *tmp;
10038
90.3k
                int newSize;
10039
10040
90.3k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10041
90.3k
                if (newSize < 0) {
10042
220
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10043
220
                    xmlFree(buf);
10044
220
                    return(NULL);
10045
220
                }
10046
90.0k
    tmp = xmlRealloc(buf, newSize);
10047
90.0k
    if (tmp == NULL) {
10048
3
        xmlErrMemory(ctxt);
10049
3
        xmlFree(buf);
10050
3
        return(NULL);
10051
3
    }
10052
90.0k
    buf = tmp;
10053
90.0k
                size = newSize;
10054
90.0k
      }
10055
21.7M
      buf[len++] = cur;
10056
21.7M
      NEXT;
10057
21.7M
      cur = CUR;
10058
21.7M
        }
10059
134k
  buf[len] = 0;
10060
134k
    } else {
10061
1.64k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10062
1.64k
    }
10063
135k
    return(buf);
10064
136k
}
10065
10066
/**
10067
 * Parse the XML encoding declaration
10068
 *
10069
 * @deprecated Internal function, don't use.
10070
 *
10071
 *     [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | 
10072
 *                           "'" EncName "'")
10073
 *
10074
 * this setups the conversion filters.
10075
 *
10076
 * @param ctxt  an XML parser context
10077
 * @returns the encoding value or NULL
10078
 */
10079
10080
const xmlChar *
10081
173k
xmlParseEncodingDecl(xmlParserCtxt *ctxt) {
10082
173k
    xmlChar *encoding = NULL;
10083
10084
173k
    SKIP_BLANKS;
10085
173k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10086
36.7k
        return(NULL);
10087
10088
137k
    SKIP(8);
10089
137k
    SKIP_BLANKS;
10090
137k
    if (RAW != '=') {
10091
668
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10092
668
        return(NULL);
10093
668
    }
10094
136k
    NEXT;
10095
136k
    SKIP_BLANKS;
10096
136k
    if (RAW == '"') {
10097
134k
        NEXT;
10098
134k
        encoding = xmlParseEncName(ctxt);
10099
134k
        if (RAW != '"') {
10100
3.16k
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10101
3.16k
            xmlFree(encoding);
10102
3.16k
            return(NULL);
10103
3.16k
        } else
10104
130k
            NEXT;
10105
134k
    } else if (RAW == '\''){
10106
2.01k
        NEXT;
10107
2.01k
        encoding = xmlParseEncName(ctxt);
10108
2.01k
        if (RAW != '\'') {
10109
222
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10110
222
            xmlFree(encoding);
10111
222
            return(NULL);
10112
222
        } else
10113
1.79k
            NEXT;
10114
2.01k
    } else {
10115
497
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10116
497
    }
10117
10118
133k
    if (encoding == NULL)
10119
501
        return(NULL);
10120
10121
132k
    xmlSetDeclaredEncoding(ctxt, encoding);
10122
10123
132k
    return(ctxt->encoding);
10124
133k
}
10125
10126
/**
10127
 * Parse the XML standalone declaration
10128
 *
10129
 * @deprecated Internal function, don't use.
10130
 *
10131
 *     [32] SDDecl ::= S 'standalone' Eq
10132
 *                     (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10133
 *
10134
 * [ VC: Standalone Document Declaration ]
10135
 * TODO The standalone document declaration must have the value "no"
10136
 * if any external markup declarations contain declarations of:
10137
 *  - attributes with default values, if elements to which these
10138
 *    attributes apply appear in the document without specifications
10139
 *    of values for these attributes, or
10140
 *  - entities (other than amp, lt, gt, apos, quot), if references
10141
 *    to those entities appear in the document, or
10142
 *  - attributes with values subject to normalization, where the
10143
 *    attribute appears in the document with a value which will change
10144
 *    as a result of normalization, or
10145
 *  - element types with element content, if white space occurs directly
10146
 *    within any instance of those types.
10147
 *
10148
 * @param ctxt  an XML parser context
10149
 * @returns
10150
 *   1 if standalone="yes"
10151
 *   0 if standalone="no"
10152
 *  -2 if standalone attribute is missing or invalid
10153
 *    (A standalone value of -2 means that the XML declaration was found,
10154
 *     but no value was specified for the standalone attribute).
10155
 */
10156
10157
int
10158
82.0k
xmlParseSDDecl(xmlParserCtxt *ctxt) {
10159
82.0k
    int standalone = -2;
10160
10161
82.0k
    SKIP_BLANKS;
10162
82.0k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10163
13.8k
  SKIP(10);
10164
13.8k
        SKIP_BLANKS;
10165
13.8k
  if (RAW != '=') {
10166
1.67k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10167
1.67k
      return(standalone);
10168
1.67k
        }
10169
12.2k
  NEXT;
10170
12.2k
  SKIP_BLANKS;
10171
12.2k
        if (RAW == '\''){
10172
2.55k
      NEXT;
10173
2.55k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10174
1.07k
          standalone = 0;
10175
1.07k
                SKIP(2);
10176
1.47k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10177
1.47k
                 (NXT(2) == 's')) {
10178
532
          standalone = 1;
10179
532
    SKIP(3);
10180
944
            } else {
10181
944
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10182
944
      }
10183
2.55k
      if (RAW != '\'') {
10184
1.97k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10185
1.97k
      } else
10186
571
          NEXT;
10187
9.65k
  } else if (RAW == '"'){
10188
9.12k
      NEXT;
10189
9.12k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10190
4.63k
          standalone = 0;
10191
4.63k
    SKIP(2);
10192
4.63k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10193
4.49k
                 (NXT(2) == 's')) {
10194
470
          standalone = 1;
10195
470
                SKIP(3);
10196
4.02k
            } else {
10197
4.02k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10198
4.02k
      }
10199
9.12k
      if (RAW != '"') {
10200
4.46k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10201
4.46k
      } else
10202
4.66k
          NEXT;
10203
9.12k
  } else {
10204
528
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10205
528
        }
10206
12.2k
    }
10207
80.3k
    return(standalone);
10208
82.0k
}
10209
10210
/**
10211
 * Parse an XML declaration header
10212
 *
10213
 * @deprecated Internal function, don't use.
10214
 *
10215
 *     [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10216
 * @param ctxt  an XML parser context
10217
 */
10218
10219
void
10220
184k
xmlParseXMLDecl(xmlParserCtxt *ctxt) {
10221
184k
    xmlChar *version;
10222
10223
    /*
10224
     * This value for standalone indicates that the document has an
10225
     * XML declaration but it does not have a standalone attribute.
10226
     * It will be overwritten later if a standalone attribute is found.
10227
     */
10228
10229
184k
    ctxt->standalone = -2;
10230
10231
    /*
10232
     * We know that '<?xml' is here.
10233
     */
10234
184k
    SKIP(5);
10235
10236
184k
    if (!IS_BLANK_CH(RAW)) {
10237
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10238
0
                 "Blank needed after '<?xml'\n");
10239
0
    }
10240
184k
    SKIP_BLANKS;
10241
10242
    /*
10243
     * We must have the VersionInfo here.
10244
     */
10245
184k
    version = xmlParseVersionInfo(ctxt);
10246
184k
    if (version == NULL) {
10247
25.2k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10248
159k
    } else {
10249
159k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10250
      /*
10251
       * Changed here for XML-1.0 5th edition
10252
       */
10253
59.0k
      if (ctxt->options & XML_PARSE_OLD10) {
10254
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10255
0
                "Unsupported version '%s'\n",
10256
0
                version);
10257
59.0k
      } else {
10258
59.0k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10259
50.4k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10260
50.4k
                      "Unsupported version '%s'\n",
10261
50.4k
          version, NULL);
10262
50.4k
    } else {
10263
8.61k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10264
8.61k
              "Unsupported version '%s'\n",
10265
8.61k
              version);
10266
8.61k
    }
10267
59.0k
      }
10268
59.0k
  }
10269
159k
  if (ctxt->version != NULL)
10270
0
      xmlFree(ctxt->version);
10271
159k
  ctxt->version = version;
10272
159k
    }
10273
10274
    /*
10275
     * We may have the encoding declaration
10276
     */
10277
184k
    if (!IS_BLANK_CH(RAW)) {
10278
51.1k
        if ((RAW == '?') && (NXT(1) == '>')) {
10279
25.8k
      SKIP(2);
10280
25.8k
      return;
10281
25.8k
  }
10282
25.3k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10283
25.3k
    }
10284
158k
    xmlParseEncodingDecl(ctxt);
10285
10286
    /*
10287
     * We may have the standalone status.
10288
     */
10289
158k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10290
78.7k
        if ((RAW == '?') && (NXT(1) == '>')) {
10291
76.8k
      SKIP(2);
10292
76.8k
      return;
10293
76.8k
  }
10294
1.83k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10295
1.83k
    }
10296
10297
    /*
10298
     * We can grow the input buffer freely at that point
10299
     */
10300
82.0k
    GROW;
10301
10302
82.0k
    SKIP_BLANKS;
10303
82.0k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10304
10305
82.0k
    SKIP_BLANKS;
10306
82.0k
    if ((RAW == '?') && (NXT(1) == '>')) {
10307
32.3k
        SKIP(2);
10308
49.6k
    } else if (RAW == '>') {
10309
        /* Deprecated old WD ... */
10310
1.18k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10311
1.18k
  NEXT;
10312
48.4k
    } else {
10313
48.4k
        int c;
10314
10315
48.4k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10316
2.01M
        while ((PARSER_STOPPED(ctxt) == 0) &&
10317
2.01M
               ((c = CUR) != 0)) {
10318
2.00M
            NEXT;
10319
2.00M
            if (c == '>')
10320
36.8k
                break;
10321
2.00M
        }
10322
48.4k
    }
10323
82.0k
}
10324
10325
/**
10326
 * @since 2.14.0
10327
 *
10328
 * @param ctxt  parser context
10329
 * @returns the version from the XML declaration.
10330
 */
10331
const xmlChar *
10332
0
xmlCtxtGetVersion(xmlParserCtxt *ctxt) {
10333
0
    if (ctxt == NULL)
10334
0
        return(NULL);
10335
10336
0
    return(ctxt->version);
10337
0
}
10338
10339
/**
10340
 * @since 2.14.0
10341
 *
10342
 * @param ctxt  parser context
10343
 * @returns the value from the standalone document declaration.
10344
 */
10345
int
10346
0
xmlCtxtGetStandalone(xmlParserCtxt *ctxt) {
10347
0
    if (ctxt == NULL)
10348
0
        return(0);
10349
10350
0
    return(ctxt->standalone);
10351
0
}
10352
10353
/**
10354
 * Parse an XML Misc* optional field.
10355
 *
10356
 * @deprecated Internal function, don't use.
10357
 *
10358
 *     [27] Misc ::= Comment | PI |  S
10359
 * @param ctxt  an XML parser context
10360
 */
10361
10362
void
10363
1.68M
xmlParseMisc(xmlParserCtxt *ctxt) {
10364
1.77M
    while (PARSER_STOPPED(ctxt) == 0) {
10365
1.74M
        SKIP_BLANKS;
10366
1.74M
        GROW;
10367
1.74M
        if ((RAW == '<') && (NXT(1) == '?')) {
10368
64.6k
      xmlParsePI(ctxt);
10369
1.67M
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10370
25.1k
      xmlParseComment(ctxt);
10371
1.65M
        } else {
10372
1.65M
            break;
10373
1.65M
        }
10374
1.74M
    }
10375
1.68M
}
10376
10377
static void
10378
833k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10379
833k
    xmlDocPtr doc;
10380
10381
    /*
10382
     * SAX: end of the document processing.
10383
     */
10384
833k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10385
833k
        ctxt->sax->endDocument(ctxt->userData);
10386
10387
    /*
10388
     * Remove locally kept entity definitions if the tree was not built
10389
     */
10390
833k
    doc = ctxt->myDoc;
10391
833k
    if ((doc != NULL) &&
10392
833k
        (xmlStrEqual(doc->version, SAX_COMPAT_MODE))) {
10393
15.9k
        xmlFreeDoc(doc);
10394
15.9k
        ctxt->myDoc = NULL;
10395
15.9k
    }
10396
833k
}
10397
10398
/**
10399
 * Parse an XML document and invoke the SAX handlers. This is useful
10400
 * if you're only interested in custom SAX callbacks. If you want a
10401
 * document tree, use #xmlCtxtParseDocument.
10402
 *
10403
 * @param ctxt  an XML parser context
10404
 * @returns 0, -1 in case of error.
10405
 */
10406
10407
int
10408
836k
xmlParseDocument(xmlParserCtxt *ctxt) {
10409
836k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10410
0
        return(-1);
10411
10412
836k
    GROW;
10413
10414
    /*
10415
     * SAX: detecting the level.
10416
     */
10417
836k
    xmlCtxtInitializeLate(ctxt);
10418
10419
836k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10420
836k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10421
836k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10422
836k
    }
10423
10424
836k
    xmlDetectEncoding(ctxt);
10425
10426
836k
    if (CUR == 0) {
10427
3.27k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10428
3.27k
  return(-1);
10429
3.27k
    }
10430
10431
833k
    GROW;
10432
833k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10433
10434
  /*
10435
   * Note that we will switch encoding on the fly.
10436
   */
10437
184k
  xmlParseXMLDecl(ctxt);
10438
184k
  SKIP_BLANKS;
10439
649k
    } else {
10440
649k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10441
649k
        if (ctxt->version == NULL) {
10442
53
            xmlErrMemory(ctxt);
10443
53
            return(-1);
10444
53
        }
10445
649k
    }
10446
833k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10447
777k
        ctxt->sax->startDocument(ctxt->userData);
10448
833k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10449
833k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10450
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10451
0
    }
10452
10453
    /*
10454
     * The Misc part of the Prolog
10455
     */
10456
833k
    xmlParseMisc(ctxt);
10457
10458
    /*
10459
     * Then possibly doc type declaration(s) and more Misc
10460
     * (doctypedecl Misc*)?
10461
     */
10462
833k
    GROW;
10463
833k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10464
10465
148k
  ctxt->inSubset = 1;
10466
148k
  xmlParseDocTypeDecl(ctxt);
10467
148k
  if (RAW == '[') {
10468
136k
      xmlParseInternalSubset(ctxt);
10469
136k
  } else if (RAW == '>') {
10470
7.51k
            NEXT;
10471
7.51k
        }
10472
10473
  /*
10474
   * Create and update the external subset.
10475
   */
10476
148k
  ctxt->inSubset = 2;
10477
148k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10478
148k
      (!ctxt->disableSAX))
10479
37.6k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10480
37.6k
                                ctxt->extSubSystem, ctxt->extSubURI);
10481
148k
  ctxt->inSubset = 0;
10482
10483
148k
        xmlCleanSpecialAttr(ctxt);
10484
10485
148k
  xmlParseMisc(ctxt);
10486
148k
    }
10487
10488
    /*
10489
     * Time to start parsing the tree itself
10490
     */
10491
833k
    GROW;
10492
833k
    if (RAW != '<') {
10493
135k
        if (ctxt->wellFormed)
10494
6.85k
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10495
6.85k
                           "Start tag expected, '<' not found\n");
10496
698k
    } else {
10497
698k
  xmlParseElement(ctxt);
10498
10499
  /*
10500
   * The Misc part at the end
10501
   */
10502
698k
  xmlParseMisc(ctxt);
10503
10504
698k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10505
698k
    }
10506
10507
833k
    ctxt->instate = XML_PARSER_EOF;
10508
833k
    xmlFinishDocument(ctxt);
10509
10510
833k
    if (! ctxt->wellFormed) {
10511
232k
  ctxt->valid = 0;
10512
232k
  return(-1);
10513
232k
    }
10514
10515
601k
    return(0);
10516
833k
}
10517
10518
/**
10519
 * Parse a general parsed entity
10520
 * An external general parsed entity is well-formed if it matches the
10521
 * production labeled extParsedEnt.
10522
 *
10523
 * @deprecated Internal function, don't use.
10524
 *
10525
 *     [78] extParsedEnt ::= TextDecl? content
10526
 *
10527
 * @param ctxt  an XML parser context
10528
 * @returns 0, -1 in case of error. the parser context is augmented
10529
 *                as a result of the parsing.
10530
 */
10531
10532
int
10533
0
xmlParseExtParsedEnt(xmlParserCtxt *ctxt) {
10534
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10535
0
        return(-1);
10536
10537
0
    xmlCtxtInitializeLate(ctxt);
10538
10539
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10540
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10541
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10542
0
    }
10543
10544
0
    xmlDetectEncoding(ctxt);
10545
10546
0
    if (CUR == 0) {
10547
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10548
0
    }
10549
10550
    /*
10551
     * Check for the XMLDecl in the Prolog.
10552
     */
10553
0
    GROW;
10554
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10555
10556
  /*
10557
   * Note that we will switch encoding on the fly.
10558
   */
10559
0
  xmlParseXMLDecl(ctxt);
10560
0
  SKIP_BLANKS;
10561
0
    } else {
10562
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10563
0
    }
10564
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10565
0
        ctxt->sax->startDocument(ctxt->userData);
10566
10567
    /*
10568
     * Doing validity checking on chunk doesn't make sense
10569
     */
10570
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10571
0
    ctxt->validate = 0;
10572
0
    ctxt->depth = 0;
10573
10574
0
    xmlParseContentInternal(ctxt);
10575
10576
0
    if (ctxt->input->cur < ctxt->input->end)
10577
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10578
10579
    /*
10580
     * SAX: end of the document processing.
10581
     */
10582
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10583
0
        ctxt->sax->endDocument(ctxt->userData);
10584
10585
0
    if (! ctxt->wellFormed) return(-1);
10586
0
    return(0);
10587
0
}
10588
10589
#ifdef LIBXML_PUSH_ENABLED
10590
/************************************************************************
10591
 *                  *
10592
 *    Progressive parsing interfaces        *
10593
 *                  *
10594
 ************************************************************************/
10595
10596
/**
10597
 * Check whether the input buffer contains a character.
10598
 *
10599
 * @param ctxt  an XML parser context
10600
 * @param c  character
10601
 */
10602
static int
10603
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10604
    const xmlChar *cur;
10605
10606
    if (ctxt->checkIndex == 0) {
10607
        cur = ctxt->input->cur + 1;
10608
    } else {
10609
        cur = ctxt->input->cur + ctxt->checkIndex;
10610
    }
10611
10612
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10613
        size_t index = ctxt->input->end - ctxt->input->cur;
10614
10615
        if (index > LONG_MAX) {
10616
            ctxt->checkIndex = 0;
10617
            return(1);
10618
        }
10619
        ctxt->checkIndex = index;
10620
        return(0);
10621
    } else {
10622
        ctxt->checkIndex = 0;
10623
        return(1);
10624
    }
10625
}
10626
10627
/**
10628
 * Check whether the input buffer contains a string.
10629
 *
10630
 * @param ctxt  an XML parser context
10631
 * @param startDelta  delta to apply at the start
10632
 * @param str  string
10633
 * @param strLen  length of string
10634
 */
10635
static const xmlChar *
10636
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10637
                     const char *str, size_t strLen) {
10638
    const xmlChar *cur, *term;
10639
10640
    if (ctxt->checkIndex == 0) {
10641
        cur = ctxt->input->cur + startDelta;
10642
    } else {
10643
        cur = ctxt->input->cur + ctxt->checkIndex;
10644
    }
10645
10646
    term = BAD_CAST strstr((const char *) cur, str);
10647
    if (term == NULL) {
10648
        const xmlChar *end = ctxt->input->end;
10649
        size_t index;
10650
10651
        /* Rescan (strLen - 1) characters. */
10652
        if ((size_t) (end - cur) < strLen)
10653
            end = cur;
10654
        else
10655
            end -= strLen - 1;
10656
        index = end - ctxt->input->cur;
10657
        if (index > LONG_MAX) {
10658
            ctxt->checkIndex = 0;
10659
            return(ctxt->input->end - strLen);
10660
        }
10661
        ctxt->checkIndex = index;
10662
    } else {
10663
        ctxt->checkIndex = 0;
10664
    }
10665
10666
    return(term);
10667
}
10668
10669
/**
10670
 * Check whether the input buffer contains terminated char data.
10671
 *
10672
 * @param ctxt  an XML parser context
10673
 */
10674
static int
10675
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10676
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10677
    const xmlChar *end = ctxt->input->end;
10678
    size_t index;
10679
10680
    while (cur < end) {
10681
        if ((*cur == '<') || (*cur == '&')) {
10682
            ctxt->checkIndex = 0;
10683
            return(1);
10684
        }
10685
        cur++;
10686
    }
10687
10688
    index = cur - ctxt->input->cur;
10689
    if (index > LONG_MAX) {
10690
        ctxt->checkIndex = 0;
10691
        return(1);
10692
    }
10693
    ctxt->checkIndex = index;
10694
    return(0);
10695
}
10696
10697
/**
10698
 * Check whether there's enough data in the input buffer to finish parsing
10699
 * a start tag. This has to take quotes into account.
10700
 *
10701
 * @param ctxt  an XML parser context
10702
 */
10703
static int
10704
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10705
    const xmlChar *cur;
10706
    const xmlChar *end = ctxt->input->end;
10707
    int state = ctxt->endCheckState;
10708
    size_t index;
10709
10710
    if (ctxt->checkIndex == 0)
10711
        cur = ctxt->input->cur + 1;
10712
    else
10713
        cur = ctxt->input->cur + ctxt->checkIndex;
10714
10715
    while (cur < end) {
10716
        if (state) {
10717
            if (*cur == state)
10718
                state = 0;
10719
        } else if (*cur == '\'' || *cur == '"') {
10720
            state = *cur;
10721
        } else if (*cur == '>') {
10722
            ctxt->checkIndex = 0;
10723
            ctxt->endCheckState = 0;
10724
            return(1);
10725
        }
10726
        cur++;
10727
    }
10728
10729
    index = cur - ctxt->input->cur;
10730
    if (index > LONG_MAX) {
10731
        ctxt->checkIndex = 0;
10732
        ctxt->endCheckState = 0;
10733
        return(1);
10734
    }
10735
    ctxt->checkIndex = index;
10736
    ctxt->endCheckState = state;
10737
    return(0);
10738
}
10739
10740
/**
10741
 * Check whether there's enough data in the input buffer to finish parsing
10742
 * the internal subset.
10743
 *
10744
 * @param ctxt  an XML parser context
10745
 */
10746
static int
10747
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10748
    /*
10749
     * Sorry, but progressive parsing of the internal subset is not
10750
     * supported. We first check that the full content of the internal
10751
     * subset is available and parsing is launched only at that point.
10752
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10753
     * not in a ']]>' sequence which are conditional sections.
10754
     */
10755
    const xmlChar *cur, *start;
10756
    const xmlChar *end = ctxt->input->end;
10757
    int state = ctxt->endCheckState;
10758
    size_t index;
10759
10760
    if (ctxt->checkIndex == 0) {
10761
        cur = ctxt->input->cur + 1;
10762
    } else {
10763
        cur = ctxt->input->cur + ctxt->checkIndex;
10764
    }
10765
    start = cur;
10766
10767
    while (cur < end) {
10768
        if (state == '-') {
10769
            if ((*cur == '-') &&
10770
                (cur[1] == '-') &&
10771
                (cur[2] == '>')) {
10772
                state = 0;
10773
                cur += 3;
10774
                start = cur;
10775
                continue;
10776
            }
10777
        }
10778
        else if (state == ']') {
10779
            if (*cur == '>') {
10780
                ctxt->checkIndex = 0;
10781
                ctxt->endCheckState = 0;
10782
                return(1);
10783
            }
10784
            if (IS_BLANK_CH(*cur)) {
10785
                state = ' ';
10786
            } else if (*cur != ']') {
10787
                state = 0;
10788
                start = cur;
10789
                continue;
10790
            }
10791
        }
10792
        else if (state == ' ') {
10793
            if (*cur == '>') {
10794
                ctxt->checkIndex = 0;
10795
                ctxt->endCheckState = 0;
10796
                return(1);
10797
            }
10798
            if (!IS_BLANK_CH(*cur)) {
10799
                state = 0;
10800
                start = cur;
10801
                continue;
10802
            }
10803
        }
10804
        else if (state != 0) {
10805
            if (*cur == state) {
10806
                state = 0;
10807
                start = cur + 1;
10808
            }
10809
        }
10810
        else if (*cur == '<') {
10811
            if ((cur[1] == '!') &&
10812
                (cur[2] == '-') &&
10813
                (cur[3] == '-')) {
10814
                state = '-';
10815
                cur += 4;
10816
                /* Don't treat <!--> as comment */
10817
                start = cur;
10818
                continue;
10819
            }
10820
        }
10821
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10822
            state = *cur;
10823
        }
10824
10825
        cur++;
10826
    }
10827
10828
    /*
10829
     * Rescan the three last characters to detect "<!--" and "-->"
10830
     * split across chunks.
10831
     */
10832
    if ((state == 0) || (state == '-')) {
10833
        if (cur - start < 3)
10834
            cur = start;
10835
        else
10836
            cur -= 3;
10837
    }
10838
    index = cur - ctxt->input->cur;
10839
    if (index > LONG_MAX) {
10840
        ctxt->checkIndex = 0;
10841
        ctxt->endCheckState = 0;
10842
        return(1);
10843
    }
10844
    ctxt->checkIndex = index;
10845
    ctxt->endCheckState = state;
10846
    return(0);
10847
}
10848
10849
/**
10850
 * Try to progress on parsing
10851
 *
10852
 * @param ctxt  an XML parser context
10853
 * @param terminate  last chunk indicator
10854
 * @returns zero if no parsing was possible
10855
 */
10856
static int
10857
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10858
    int ret = 0;
10859
    size_t avail;
10860
    xmlChar cur, next;
10861
10862
    if (ctxt->input == NULL)
10863
        return(0);
10864
10865
    if ((ctxt->input != NULL) &&
10866
        (ctxt->input->cur - ctxt->input->base > 4096)) {
10867
        xmlParserShrink(ctxt);
10868
    }
10869
10870
    while (ctxt->disableSAX == 0) {
10871
        avail = ctxt->input->end - ctxt->input->cur;
10872
        if (avail < 1)
10873
      goto done;
10874
        switch (ctxt->instate) {
10875
            case XML_PARSER_EOF:
10876
          /*
10877
     * Document parsing is done !
10878
     */
10879
          goto done;
10880
            case XML_PARSER_START:
10881
                /*
10882
                 * Very first chars read from the document flow.
10883
                 */
10884
                if ((!terminate) && (avail < 4))
10885
                    goto done;
10886
10887
                /*
10888
                 * We need more bytes to detect EBCDIC code pages.
10889
                 * See xmlDetectEBCDIC.
10890
                 */
10891
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
10892
                    (!terminate) && (avail < 200))
10893
                    goto done;
10894
10895
                xmlDetectEncoding(ctxt);
10896
                ctxt->instate = XML_PARSER_XML_DECL;
10897
    break;
10898
10899
            case XML_PARSER_XML_DECL:
10900
    if ((!terminate) && (avail < 2))
10901
        goto done;
10902
    cur = ctxt->input->cur[0];
10903
    next = ctxt->input->cur[1];
10904
          if ((cur == '<') && (next == '?')) {
10905
        /* PI or XML decl */
10906
        if ((!terminate) &&
10907
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
10908
      goto done;
10909
        if ((ctxt->input->cur[2] == 'x') &&
10910
      (ctxt->input->cur[3] == 'm') &&
10911
      (ctxt->input->cur[4] == 'l') &&
10912
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
10913
      ret += 5;
10914
      xmlParseXMLDecl(ctxt);
10915
        } else {
10916
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10917
                        if (ctxt->version == NULL) {
10918
                            xmlErrMemory(ctxt);
10919
                            break;
10920
                        }
10921
        }
10922
    } else {
10923
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10924
        if (ctxt->version == NULL) {
10925
            xmlErrMemory(ctxt);
10926
      break;
10927
        }
10928
    }
10929
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10930
                    ctxt->sax->setDocumentLocator(ctxt->userData,
10931
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
10932
                }
10933
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10934
                    (!ctxt->disableSAX))
10935
                    ctxt->sax->startDocument(ctxt->userData);
10936
                ctxt->instate = XML_PARSER_MISC;
10937
    break;
10938
            case XML_PARSER_START_TAG: {
10939
          const xmlChar *name;
10940
    const xmlChar *prefix = NULL;
10941
    const xmlChar *URI = NULL;
10942
                int line = ctxt->input->line;
10943
    int nbNs = 0;
10944
10945
    if ((!terminate) && (avail < 2))
10946
        goto done;
10947
    cur = ctxt->input->cur[0];
10948
          if (cur != '<') {
10949
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10950
                                   "Start tag expected, '<' not found");
10951
                    ctxt->instate = XML_PARSER_EOF;
10952
                    xmlFinishDocument(ctxt);
10953
        goto done;
10954
    }
10955
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
10956
                    goto done;
10957
    if (ctxt->spaceNr == 0)
10958
        spacePush(ctxt, -1);
10959
    else if (*ctxt->space == -2)
10960
        spacePush(ctxt, -1);
10961
    else
10962
        spacePush(ctxt, *ctxt->space);
10963
#ifdef LIBXML_SAX1_ENABLED
10964
    if (ctxt->sax2)
10965
#endif /* LIBXML_SAX1_ENABLED */
10966
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10967
#ifdef LIBXML_SAX1_ENABLED
10968
    else
10969
        name = xmlParseStartTag(ctxt);
10970
#endif /* LIBXML_SAX1_ENABLED */
10971
    if (name == NULL) {
10972
        spacePop(ctxt);
10973
                    ctxt->instate = XML_PARSER_EOF;
10974
                    xmlFinishDocument(ctxt);
10975
        goto done;
10976
    }
10977
#ifdef LIBXML_VALID_ENABLED
10978
    /*
10979
     * [ VC: Root Element Type ]
10980
     * The Name in the document type declaration must match
10981
     * the element type of the root element.
10982
     */
10983
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10984
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10985
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10986
#endif /* LIBXML_VALID_ENABLED */
10987
10988
    /*
10989
     * Check for an Empty Element.
10990
     */
10991
    if ((RAW == '/') && (NXT(1) == '>')) {
10992
        SKIP(2);
10993
10994
        if (ctxt->sax2) {
10995
      if ((ctxt->sax != NULL) &&
10996
          (ctxt->sax->endElementNs != NULL) &&
10997
          (!ctxt->disableSAX))
10998
          ctxt->sax->endElementNs(ctxt->userData, name,
10999
                                  prefix, URI);
11000
      if (nbNs > 0)
11001
          xmlParserNsPop(ctxt, nbNs);
11002
#ifdef LIBXML_SAX1_ENABLED
11003
        } else {
11004
      if ((ctxt->sax != NULL) &&
11005
          (ctxt->sax->endElement != NULL) &&
11006
          (!ctxt->disableSAX))
11007
          ctxt->sax->endElement(ctxt->userData, name);
11008
#endif /* LIBXML_SAX1_ENABLED */
11009
        }
11010
        spacePop(ctxt);
11011
    } else if (RAW == '>') {
11012
        NEXT;
11013
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11014
    } else {
11015
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11016
           "Couldn't find end of Start Tag %s\n",
11017
           name);
11018
        nodePop(ctxt);
11019
        spacePop(ctxt);
11020
                    if (nbNs > 0)
11021
                        xmlParserNsPop(ctxt, nbNs);
11022
    }
11023
11024
                if (ctxt->nameNr == 0)
11025
                    ctxt->instate = XML_PARSER_EPILOG;
11026
                else
11027
                    ctxt->instate = XML_PARSER_CONTENT;
11028
                break;
11029
      }
11030
            case XML_PARSER_CONTENT: {
11031
    cur = ctxt->input->cur[0];
11032
11033
    if (cur == '<') {
11034
                    if ((!terminate) && (avail < 2))
11035
                        goto done;
11036
        next = ctxt->input->cur[1];
11037
11038
                    if (next == '/') {
11039
                        ctxt->instate = XML_PARSER_END_TAG;
11040
                        break;
11041
                    } else if (next == '?') {
11042
                        if ((!terminate) &&
11043
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11044
                            goto done;
11045
                        xmlParsePI(ctxt);
11046
                        ctxt->instate = XML_PARSER_CONTENT;
11047
                        break;
11048
                    } else if (next == '!') {
11049
                        if ((!terminate) && (avail < 3))
11050
                            goto done;
11051
                        next = ctxt->input->cur[2];
11052
11053
                        if (next == '-') {
11054
                            if ((!terminate) && (avail < 4))
11055
                                goto done;
11056
                            if (ctxt->input->cur[3] == '-') {
11057
                                if ((!terminate) &&
11058
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11059
                                    goto done;
11060
                                xmlParseComment(ctxt);
11061
                                ctxt->instate = XML_PARSER_CONTENT;
11062
                                break;
11063
                            }
11064
                        } else if (next == '[') {
11065
                            if ((!terminate) && (avail < 9))
11066
                                goto done;
11067
                            if ((ctxt->input->cur[2] == '[') &&
11068
                                (ctxt->input->cur[3] == 'C') &&
11069
                                (ctxt->input->cur[4] == 'D') &&
11070
                                (ctxt->input->cur[5] == 'A') &&
11071
                                (ctxt->input->cur[6] == 'T') &&
11072
                                (ctxt->input->cur[7] == 'A') &&
11073
                                (ctxt->input->cur[8] == '[')) {
11074
                                if ((!terminate) &&
11075
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11076
                                    goto done;
11077
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11078
                                xmlParseCDSect(ctxt);
11079
                                ctxt->instate = XML_PARSER_CONTENT;
11080
                                break;
11081
                            }
11082
                        }
11083
                    }
11084
    } else if (cur == '&') {
11085
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11086
      goto done;
11087
        xmlParseReference(ctxt);
11088
                    break;
11089
    } else {
11090
        /* TODO Avoid the extra copy, handle directly !!! */
11091
        /*
11092
         * Goal of the following test is:
11093
         *  - minimize calls to the SAX 'character' callback
11094
         *    when they are mergeable
11095
         *  - handle an problem for isBlank when we only parse
11096
         *    a sequence of blank chars and the next one is
11097
         *    not available to check against '<' presence.
11098
         *  - tries to homogenize the differences in SAX
11099
         *    callbacks between the push and pull versions
11100
         *    of the parser.
11101
         */
11102
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11103
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11104
          goto done;
11105
                    }
11106
                    ctxt->checkIndex = 0;
11107
        xmlParseCharDataInternal(ctxt, !terminate);
11108
                    break;
11109
    }
11110
11111
                ctxt->instate = XML_PARSER_START_TAG;
11112
    break;
11113
      }
11114
            case XML_PARSER_END_TAG:
11115
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11116
        goto done;
11117
    if (ctxt->sax2) {
11118
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11119
        nameNsPop(ctxt);
11120
    }
11121
#ifdef LIBXML_SAX1_ENABLED
11122
      else
11123
        xmlParseEndTag1(ctxt, 0);
11124
#endif /* LIBXML_SAX1_ENABLED */
11125
    if (ctxt->nameNr == 0) {
11126
        ctxt->instate = XML_PARSER_EPILOG;
11127
    } else {
11128
        ctxt->instate = XML_PARSER_CONTENT;
11129
    }
11130
    break;
11131
            case XML_PARSER_MISC:
11132
            case XML_PARSER_PROLOG:
11133
            case XML_PARSER_EPILOG:
11134
    SKIP_BLANKS;
11135
                avail = ctxt->input->end - ctxt->input->cur;
11136
    if (avail < 1)
11137
        goto done;
11138
    if (ctxt->input->cur[0] == '<') {
11139
                    if ((!terminate) && (avail < 2))
11140
                        goto done;
11141
                    next = ctxt->input->cur[1];
11142
                    if (next == '?') {
11143
                        if ((!terminate) &&
11144
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11145
                            goto done;
11146
                        xmlParsePI(ctxt);
11147
                        break;
11148
                    } else if (next == '!') {
11149
                        if ((!terminate) && (avail < 3))
11150
                            goto done;
11151
11152
                        if (ctxt->input->cur[2] == '-') {
11153
                            if ((!terminate) && (avail < 4))
11154
                                goto done;
11155
                            if (ctxt->input->cur[3] == '-') {
11156
                                if ((!terminate) &&
11157
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11158
                                    goto done;
11159
                                xmlParseComment(ctxt);
11160
                                break;
11161
                            }
11162
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11163
                            if ((!terminate) && (avail < 9))
11164
                                goto done;
11165
                            if ((ctxt->input->cur[2] == 'D') &&
11166
                                (ctxt->input->cur[3] == 'O') &&
11167
                                (ctxt->input->cur[4] == 'C') &&
11168
                                (ctxt->input->cur[5] == 'T') &&
11169
                                (ctxt->input->cur[6] == 'Y') &&
11170
                                (ctxt->input->cur[7] == 'P') &&
11171
                                (ctxt->input->cur[8] == 'E')) {
11172
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11173
                                    goto done;
11174
                                ctxt->inSubset = 1;
11175
                                xmlParseDocTypeDecl(ctxt);
11176
                                if (RAW == '[') {
11177
                                    ctxt->instate = XML_PARSER_DTD;
11178
                                } else {
11179
                                    if (RAW == '>')
11180
                                        NEXT;
11181
                                    /*
11182
                                     * Create and update the external subset.
11183
                                     */
11184
                                    ctxt->inSubset = 2;
11185
                                    if ((ctxt->sax != NULL) &&
11186
                                        (!ctxt->disableSAX) &&
11187
                                        (ctxt->sax->externalSubset != NULL))
11188
                                        ctxt->sax->externalSubset(
11189
                                                ctxt->userData,
11190
                                                ctxt->intSubName,
11191
                                                ctxt->extSubSystem,
11192
                                                ctxt->extSubURI);
11193
                                    ctxt->inSubset = 0;
11194
                                    xmlCleanSpecialAttr(ctxt);
11195
                                    ctxt->instate = XML_PARSER_PROLOG;
11196
                                }
11197
                                break;
11198
                            }
11199
                        }
11200
                    }
11201
                }
11202
11203
                if (ctxt->instate == XML_PARSER_EPILOG) {
11204
                    if (ctxt->errNo == XML_ERR_OK)
11205
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11206
        ctxt->instate = XML_PARSER_EOF;
11207
                    xmlFinishDocument(ctxt);
11208
                } else {
11209
        ctxt->instate = XML_PARSER_START_TAG;
11210
    }
11211
    break;
11212
            case XML_PARSER_DTD: {
11213
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11214
                    goto done;
11215
    xmlParseInternalSubset(ctxt);
11216
    ctxt->inSubset = 2;
11217
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11218
        (ctxt->sax->externalSubset != NULL))
11219
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11220
          ctxt->extSubSystem, ctxt->extSubURI);
11221
    ctxt->inSubset = 0;
11222
    xmlCleanSpecialAttr(ctxt);
11223
    ctxt->instate = XML_PARSER_PROLOG;
11224
                break;
11225
      }
11226
            default:
11227
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11228
      "PP: internal error\n");
11229
    ctxt->instate = XML_PARSER_EOF;
11230
    break;
11231
  }
11232
    }
11233
done:
11234
    return(ret);
11235
}
11236
11237
/**
11238
 * Parse a chunk of memory in push parser mode.
11239
 *
11240
 * Assumes that the parser context was initialized with
11241
 * #xmlCreatePushParserCtxt.
11242
 *
11243
 * The last chunk, which will often be empty, must be marked with
11244
 * the `terminate` flag. With the default SAX callbacks, the resulting
11245
 * document will be available in ctxt->myDoc. This pointer will not
11246
 * be freed when calling #xmlFreeParserCtxt and must be freed by the
11247
 * caller. If the document isn't well-formed, it will still be returned
11248
 * in ctxt->myDoc.
11249
 *
11250
 * As an exception, #xmlCtxtResetPush will free the document in
11251
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11252
 * the document.
11253
 *
11254
 * Since 2.14.0, #xmlCtxtGetDocument can be used to retrieve the
11255
 * result document.
11256
 *
11257
 * @param ctxt  an XML parser context
11258
 * @param chunk  chunk of memory
11259
 * @param size  size of chunk in bytes
11260
 * @param terminate  last chunk indicator
11261
 * @returns an xmlParserErrors code (0 on success).
11262
 */
11263
int
11264
xmlParseChunk(xmlParserCtxt *ctxt, const char *chunk, int size,
11265
              int terminate) {
11266
    size_t curBase;
11267
    size_t maxLength;
11268
    size_t pos;
11269
    int end_in_lf = 0;
11270
    int res;
11271
11272
    if ((ctxt == NULL) || (size < 0))
11273
        return(XML_ERR_ARGUMENT);
11274
    if ((chunk == NULL) && (size > 0))
11275
        return(XML_ERR_ARGUMENT);
11276
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11277
        return(XML_ERR_ARGUMENT);
11278
    if (ctxt->disableSAX != 0)
11279
        return(ctxt->errNo);
11280
11281
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11282
    if (ctxt->instate == XML_PARSER_START)
11283
        xmlCtxtInitializeLate(ctxt);
11284
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11285
        (chunk[size - 1] == '\r')) {
11286
  end_in_lf = 1;
11287
  size--;
11288
    }
11289
11290
    /*
11291
     * Also push an empty chunk to make sure that the raw buffer
11292
     * will be flushed if there is an encoder.
11293
     */
11294
    pos = ctxt->input->cur - ctxt->input->base;
11295
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11296
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11297
    if (res < 0) {
11298
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11299
        xmlHaltParser(ctxt);
11300
        return(ctxt->errNo);
11301
    }
11302
11303
    xmlParseTryOrFinish(ctxt, terminate);
11304
11305
    curBase = ctxt->input->cur - ctxt->input->base;
11306
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11307
                XML_MAX_HUGE_LENGTH :
11308
                XML_MAX_LOOKUP_LIMIT;
11309
    if (curBase > maxLength) {
11310
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11311
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11312
        xmlHaltParser(ctxt);
11313
    }
11314
11315
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11316
        return(ctxt->errNo);
11317
11318
    if (end_in_lf == 1) {
11319
  pos = ctxt->input->cur - ctxt->input->base;
11320
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11321
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11322
        if (res < 0) {
11323
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11324
            xmlHaltParser(ctxt);
11325
            return(ctxt->errNo);
11326
        }
11327
    }
11328
    if (terminate) {
11329
  /*
11330
   * Check for termination
11331
   */
11332
        if ((ctxt->instate != XML_PARSER_EOF) &&
11333
            (ctxt->instate != XML_PARSER_EPILOG)) {
11334
            if (ctxt->nameNr > 0) {
11335
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11336
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11337
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11338
                        "Premature end of data in tag %s line %d\n",
11339
                        name, line, NULL);
11340
            } else if (ctxt->instate == XML_PARSER_START) {
11341
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11342
            } else {
11343
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11344
                               "Start tag expected, '<' not found\n");
11345
            }
11346
        } else {
11347
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11348
        }
11349
  if (ctxt->instate != XML_PARSER_EOF) {
11350
            ctxt->instate = XML_PARSER_EOF;
11351
            xmlFinishDocument(ctxt);
11352
  }
11353
    }
11354
    if (ctxt->wellFormed == 0)
11355
  return((xmlParserErrors) ctxt->errNo);
11356
    else
11357
        return(0);
11358
}
11359
11360
/************************************************************************
11361
 *                  *
11362
 *    I/O front end functions to the parser     *
11363
 *                  *
11364
 ************************************************************************/
11365
11366
/**
11367
 * Create a parser context for using the XML parser in push mode.
11368
 * See #xmlParseChunk.
11369
 *
11370
 * Passing an initial chunk is useless and deprecated.
11371
 *
11372
 * The push parser doesn't support recovery mode or the
11373
 * XML_PARSE_NOBLANKS option.
11374
 *
11375
 * `filename` is used as base URI to fetch external entities and for
11376
 * error reports.
11377
 *
11378
 * @param sax  a SAX handler (optional)
11379
 * @param user_data  user data for SAX callbacks (optional)
11380
 * @param chunk  initial chunk (optional, deprecated)
11381
 * @param size  size of initial chunk in bytes
11382
 * @param filename  file name or URI (optional)
11383
 * @returns the new parser context or NULL if a memory allocation
11384
 * failed.
11385
 */
11386
11387
xmlParserCtxt *
11388
xmlCreatePushParserCtxt(xmlSAXHandler *sax, void *user_data,
11389
                        const char *chunk, int size, const char *filename) {
11390
    xmlParserCtxtPtr ctxt;
11391
    xmlParserInputPtr input;
11392
11393
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11394
    if (ctxt == NULL)
11395
  return(NULL);
11396
11397
    ctxt->options &= ~XML_PARSE_NODICT;
11398
    ctxt->dictNames = 1;
11399
11400
    input = xmlNewPushInput(filename, chunk, size);
11401
    if (input == NULL) {
11402
  xmlFreeParserCtxt(ctxt);
11403
  return(NULL);
11404
    }
11405
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11406
        xmlFreeInputStream(input);
11407
        xmlFreeParserCtxt(ctxt);
11408
        return(NULL);
11409
    }
11410
11411
    return(ctxt);
11412
}
11413
#endif /* LIBXML_PUSH_ENABLED */
11414
11415
/**
11416
 * Blocks further parser processing
11417
 *
11418
 * @param ctxt  an XML parser context
11419
 */
11420
void
11421
22.9M
xmlStopParser(xmlParserCtxt *ctxt) {
11422
22.9M
    if (ctxt == NULL)
11423
22.9M
        return;
11424
0
    xmlHaltParser(ctxt);
11425
    /*
11426
     * TODO: Update ctxt->lastError and ctxt->wellFormed?
11427
     */
11428
0
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11429
0
        ctxt->errNo = XML_ERR_USER_STOP;
11430
0
}
11431
11432
/**
11433
 * Create a parser context for using the XML parser with an existing
11434
 * I/O stream
11435
 *
11436
 * @param sax  a SAX handler (optional)
11437
 * @param user_data  user data for SAX callbacks (optional)
11438
 * @param ioread  an I/O read function
11439
 * @param ioclose  an I/O close function (optional)
11440
 * @param ioctx  an I/O handler
11441
 * @param enc  the charset encoding if known (deprecated)
11442
 * @returns the new parser context or NULL
11443
 */
11444
xmlParserCtxt *
11445
xmlCreateIOParserCtxt(xmlSAXHandler *sax, void *user_data,
11446
                      xmlInputReadCallback ioread,
11447
                      xmlInputCloseCallback ioclose,
11448
0
                      void *ioctx, xmlCharEncoding enc) {
11449
0
    xmlParserCtxtPtr ctxt;
11450
0
    xmlParserInputPtr input;
11451
0
    const char *encoding;
11452
11453
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11454
0
    if (ctxt == NULL)
11455
0
  return(NULL);
11456
11457
0
    encoding = xmlGetCharEncodingName(enc);
11458
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11459
0
                                  encoding, 0);
11460
0
    if (input == NULL) {
11461
0
  xmlFreeParserCtxt(ctxt);
11462
0
        return (NULL);
11463
0
    }
11464
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11465
0
        xmlFreeInputStream(input);
11466
0
        xmlFreeParserCtxt(ctxt);
11467
0
        return(NULL);
11468
0
    }
11469
11470
0
    return(ctxt);
11471
0
}
11472
11473
#ifdef LIBXML_VALID_ENABLED
11474
/************************************************************************
11475
 *                  *
11476
 *    Front ends when parsing a DTD       *
11477
 *                  *
11478
 ************************************************************************/
11479
11480
/**
11481
 * Parse a DTD.
11482
 *
11483
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11484
 * to make external entities work.
11485
 *
11486
 * @since 2.14.0
11487
 *
11488
 * @param ctxt  a parser context
11489
 * @param input  a parser input
11490
 * @param publicId  public ID of the DTD (optional)
11491
 * @param systemId  system ID of the DTD (optional)
11492
 * @returns the resulting xmlDtd or NULL in case of error.
11493
 * `input` will be freed by the function in any case.
11494
 */
11495
xmlDtd *
11496
xmlCtxtParseDtd(xmlParserCtxt *ctxt, xmlParserInput *input,
11497
                const xmlChar *publicId, const xmlChar *systemId) {
11498
    xmlDtdPtr ret = NULL;
11499
11500
    if ((ctxt == NULL) || (input == NULL)) {
11501
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11502
        xmlFreeInputStream(input);
11503
        return(NULL);
11504
    }
11505
11506
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11507
        xmlFreeInputStream(input);
11508
        return(NULL);
11509
    }
11510
11511
    if (publicId == NULL)
11512
        publicId = BAD_CAST "none";
11513
    if (systemId == NULL)
11514
        systemId = BAD_CAST "none";
11515
11516
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11517
    if (ctxt->myDoc == NULL) {
11518
        xmlErrMemory(ctxt);
11519
        goto error;
11520
    }
11521
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11522
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11523
                                       publicId, systemId);
11524
    if (ctxt->myDoc->extSubset == NULL) {
11525
        xmlErrMemory(ctxt);
11526
        xmlFreeDoc(ctxt->myDoc);
11527
        goto error;
11528
    }
11529
11530
    xmlParseExternalSubset(ctxt, publicId, systemId);
11531
11532
    if (ctxt->wellFormed) {
11533
        ret = ctxt->myDoc->extSubset;
11534
        ctxt->myDoc->extSubset = NULL;
11535
        if (ret != NULL) {
11536
            xmlNodePtr tmp;
11537
11538
            ret->doc = NULL;
11539
            tmp = ret->children;
11540
            while (tmp != NULL) {
11541
                tmp->doc = NULL;
11542
                tmp = tmp->next;
11543
            }
11544
        }
11545
    } else {
11546
        ret = NULL;
11547
    }
11548
    xmlFreeDoc(ctxt->myDoc);
11549
    ctxt->myDoc = NULL;
11550
11551
error:
11552
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11553
11554
    return(ret);
11555
}
11556
11557
/**
11558
 * Load and parse a DTD
11559
 *
11560
 * @deprecated Use #xmlCtxtParseDtd.
11561
 *
11562
 * @param sax  the SAX handler block or NULL
11563
 * @param input  an Input Buffer
11564
 * @param enc  the charset encoding if known
11565
 * @returns the resulting xmlDtd or NULL in case of error.
11566
 * `input` will be freed by the function in any case.
11567
 */
11568
11569
xmlDtd *
11570
xmlIOParseDTD(xmlSAXHandler *sax, xmlParserInputBuffer *input,
11571
        xmlCharEncoding enc) {
11572
    xmlDtdPtr ret = NULL;
11573
    xmlParserCtxtPtr ctxt;
11574
    xmlParserInputPtr pinput = NULL;
11575
11576
    if (input == NULL)
11577
  return(NULL);
11578
11579
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11580
    if (ctxt == NULL) {
11581
        xmlFreeParserInputBuffer(input);
11582
  return(NULL);
11583
    }
11584
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11585
11586
    /*
11587
     * generate a parser input from the I/O handler
11588
     */
11589
11590
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11591
    if (pinput == NULL) {
11592
        xmlFreeParserInputBuffer(input);
11593
  xmlFreeParserCtxt(ctxt);
11594
  return(NULL);
11595
    }
11596
11597
    if (enc != XML_CHAR_ENCODING_NONE) {
11598
        xmlSwitchEncoding(ctxt, enc);
11599
    }
11600
11601
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11602
11603
    xmlFreeParserCtxt(ctxt);
11604
    return(ret);
11605
}
11606
11607
/**
11608
 * Load and parse an external subset.
11609
 *
11610
 * @deprecated Use #xmlCtxtParseDtd.
11611
 *
11612
 * @param sax  the SAX handler block
11613
 * @param publicId  public identifier of the DTD (optional)
11614
 * @param systemId  system identifier (URL) of the DTD
11615
 * @returns the resulting xmlDtd or NULL in case of error.
11616
 */
11617
11618
xmlDtd *
11619
xmlSAXParseDTD(xmlSAXHandler *sax, const xmlChar *publicId,
11620
               const xmlChar *systemId) {
11621
    xmlDtdPtr ret = NULL;
11622
    xmlParserCtxtPtr ctxt;
11623
    xmlParserInputPtr input = NULL;
11624
    xmlChar* systemIdCanonic;
11625
11626
    if ((publicId == NULL) && (systemId == NULL)) return(NULL);
11627
11628
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11629
    if (ctxt == NULL) {
11630
  return(NULL);
11631
    }
11632
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11633
11634
    /*
11635
     * Canonicalise the system ID
11636
     */
11637
    systemIdCanonic = xmlCanonicPath(systemId);
11638
    if ((systemId != NULL) && (systemIdCanonic == NULL)) {
11639
  xmlFreeParserCtxt(ctxt);
11640
  return(NULL);
11641
    }
11642
11643
    /*
11644
     * Ask the Entity resolver to load the damn thing
11645
     */
11646
11647
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11648
  input = ctxt->sax->resolveEntity(ctxt->userData, publicId,
11649
                                   systemIdCanonic);
11650
    if (input == NULL) {
11651
  xmlFreeParserCtxt(ctxt);
11652
  if (systemIdCanonic != NULL)
11653
      xmlFree(systemIdCanonic);
11654
  return(NULL);
11655
    }
11656
11657
    if (input->filename == NULL)
11658
  input->filename = (char *) systemIdCanonic;
11659
    else
11660
  xmlFree(systemIdCanonic);
11661
11662
    ret = xmlCtxtParseDtd(ctxt, input, publicId, systemId);
11663
11664
    xmlFreeParserCtxt(ctxt);
11665
    return(ret);
11666
}
11667
11668
11669
/**
11670
 * Load and parse an external subset.
11671
 *
11672
 * @param publicId  public identifier of the DTD (optional)
11673
 * @param systemId  system identifier (URL) of the DTD
11674
 * @returns the resulting xmlDtd or NULL in case of error.
11675
 */
11676
11677
xmlDtd *
11678
xmlParseDTD(const xmlChar *publicId, const xmlChar *systemId) {
11679
    return(xmlSAXParseDTD(NULL, publicId, systemId));
11680
}
11681
#endif /* LIBXML_VALID_ENABLED */
11682
11683
/************************************************************************
11684
 *                  *
11685
 *    Front ends when parsing an Entity     *
11686
 *                  *
11687
 ************************************************************************/
11688
11689
static xmlNodePtr
11690
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11691
9.90k
                            int hasTextDecl, int buildTree) {
11692
9.90k
    xmlNodePtr root = NULL;
11693
9.90k
    xmlNodePtr list = NULL;
11694
9.90k
    xmlChar *rootName = BAD_CAST "#root";
11695
9.90k
    int result;
11696
11697
9.90k
    if (buildTree) {
11698
9.90k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11699
9.90k
        if (root == NULL) {
11700
1
            xmlErrMemory(ctxt);
11701
1
            goto error;
11702
1
        }
11703
9.90k
    }
11704
11705
9.90k
    if (xmlCtxtPushInput(ctxt, input) < 0)
11706
1
        goto error;
11707
11708
9.90k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11709
9.90k
    spacePush(ctxt, -1);
11710
11711
9.90k
    if (buildTree)
11712
9.90k
        nodePush(ctxt, root);
11713
11714
9.90k
    if (hasTextDecl) {
11715
8.24k
        xmlDetectEncoding(ctxt);
11716
11717
        /*
11718
         * Parse a possible text declaration first
11719
         */
11720
8.24k
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11721
8.24k
            (IS_BLANK_CH(NXT(5)))) {
11722
2.54k
            xmlParseTextDecl(ctxt);
11723
            /*
11724
             * An XML-1.0 document can't reference an entity not XML-1.0
11725
             */
11726
2.54k
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11727
2.54k
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11728
1.29k
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11729
1.29k
                               "Version mismatch between document and "
11730
1.29k
                               "entity\n");
11731
1.29k
            }
11732
2.54k
        }
11733
8.24k
    }
11734
11735
9.90k
    xmlParseContentInternal(ctxt);
11736
11737
9.90k
    if (ctxt->input->cur < ctxt->input->end)
11738
841
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11739
11740
9.90k
    if ((ctxt->wellFormed) ||
11741
9.90k
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
11742
2.44k
        if (root != NULL) {
11743
2.44k
            xmlNodePtr cur;
11744
11745
            /*
11746
             * Unlink newly created node list.
11747
             */
11748
2.44k
            list = root->children;
11749
2.44k
            root->children = NULL;
11750
2.44k
            root->last = NULL;
11751
8.83k
            for (cur = list; cur != NULL; cur = cur->next)
11752
6.39k
                cur->parent = NULL;
11753
2.44k
        }
11754
2.44k
    }
11755
11756
    /*
11757
     * Read the rest of the stream in case of errors. We want
11758
     * to account for the whole entity size.
11759
     */
11760
10.1k
    do {
11761
10.1k
        ctxt->input->cur = ctxt->input->end;
11762
10.1k
        xmlParserShrink(ctxt);
11763
10.1k
        result = xmlParserGrow(ctxt);
11764
10.1k
    } while (result > 0);
11765
11766
9.90k
    if (buildTree)
11767
9.90k
        nodePop(ctxt);
11768
11769
9.90k
    namePop(ctxt);
11770
9.90k
    spacePop(ctxt);
11771
11772
9.90k
    xmlCtxtPopInput(ctxt);
11773
11774
9.90k
error:
11775
9.90k
    xmlFreeNode(root);
11776
11777
9.90k
    return(list);
11778
9.90k
}
11779
11780
static void
11781
11.5k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11782
11.5k
    xmlParserInputPtr input;
11783
11.5k
    xmlNodePtr list;
11784
11.5k
    unsigned long consumed;
11785
11.5k
    int isExternal;
11786
11.5k
    int buildTree;
11787
11.5k
    int oldMinNsIndex;
11788
11.5k
    int oldNodelen, oldNodemem;
11789
11790
11.5k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11791
11.5k
    buildTree = (ctxt->node != NULL);
11792
11793
    /*
11794
     * Recursion check
11795
     */
11796
11.5k
    if (ent->flags & XML_ENT_EXPANDING) {
11797
243
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11798
243
        xmlHaltParser(ctxt);
11799
243
        goto error;
11800
243
    }
11801
11802
    /*
11803
     * Load entity
11804
     */
11805
11.3k
    input = xmlNewEntityInputStream(ctxt, ent);
11806
11.3k
    if (input == NULL)
11807
1.42k
        goto error;
11808
11809
    /*
11810
     * When building a tree, we need to limit the scope of namespace
11811
     * declarations, so that entities don't reference xmlNs structs
11812
     * from the parent of a reference.
11813
     */
11814
9.90k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
11815
9.90k
    if (buildTree)
11816
9.90k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
11817
11818
9.90k
    oldNodelen = ctxt->nodelen;
11819
9.90k
    oldNodemem = ctxt->nodemem;
11820
9.90k
    ctxt->nodelen = 0;
11821
9.90k
    ctxt->nodemem = 0;
11822
11823
    /*
11824
     * Parse content
11825
     *
11826
     * This initiates a recursive call chain:
11827
     *
11828
     * - xmlCtxtParseContentInternal
11829
     * - xmlParseContentInternal
11830
     * - xmlParseReference
11831
     * - xmlCtxtParseEntity
11832
     *
11833
     * The nesting depth is limited by the maximum number of inputs,
11834
     * see xmlCtxtPushInput.
11835
     *
11836
     * It's possible to make this non-recursive (minNsIndex must be
11837
     * stored in the input struct) at the expense of code readability.
11838
     */
11839
11840
9.90k
    ent->flags |= XML_ENT_EXPANDING;
11841
11842
9.90k
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
11843
11844
9.90k
    ent->flags &= ~XML_ENT_EXPANDING;
11845
11846
9.90k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
11847
9.90k
    ctxt->nodelen = oldNodelen;
11848
9.90k
    ctxt->nodemem = oldNodemem;
11849
11850
    /*
11851
     * Entity size accounting
11852
     */
11853
9.90k
    consumed = input->consumed;
11854
9.90k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
11855
11856
9.90k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
11857
9.58k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
11858
11859
9.90k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
11860
9.58k
        if (isExternal)
11861
8.00k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
11862
11863
9.58k
        ent->children = list;
11864
11865
15.9k
        while (list != NULL) {
11866
6.39k
            list->parent = (xmlNodePtr) ent;
11867
11868
            /*
11869
             * Downstream code like the nginx xslt module can set
11870
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
11871
             * might have a different or a NULL document.
11872
             */
11873
6.39k
            if (list->doc != ent->doc)
11874
0
                xmlSetTreeDoc(list, ent->doc);
11875
11876
6.39k
            if (list->next == NULL)
11877
2.14k
                ent->last = list;
11878
6.39k
            list = list->next;
11879
6.39k
        }
11880
9.58k
    } else {
11881
320
        xmlFreeNodeList(list);
11882
320
    }
11883
11884
9.90k
    xmlFreeInputStream(input);
11885
11886
11.5k
error:
11887
11.5k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
11888
11.5k
}
11889
11890
/**
11891
 * Parse an external general entity within an existing parsing context
11892
 * An external general parsed entity is well-formed if it matches the
11893
 * production labeled extParsedEnt.
11894
 *
11895
 *     [78] extParsedEnt ::= TextDecl? content
11896
 *
11897
 * @param ctxt  the existing parsing context
11898
 * @param URL  the URL for the entity to load
11899
 * @param ID  the System ID for the entity to load
11900
 * @param listOut  the return value for the set of parsed nodes
11901
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11902
 *    the parser error code otherwise
11903
 */
11904
11905
int
11906
xmlParseCtxtExternalEntity(xmlParserCtxt *ctxt, const xmlChar *URL,
11907
0
                           const xmlChar *ID, xmlNode **listOut) {
11908
0
    xmlParserInputPtr input;
11909
0
    xmlNodePtr list;
11910
11911
0
    if (listOut != NULL)
11912
0
        *listOut = NULL;
11913
11914
0
    if (ctxt == NULL)
11915
0
        return(XML_ERR_ARGUMENT);
11916
11917
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
11918
0
                            XML_RESOURCE_GENERAL_ENTITY);
11919
0
    if (input == NULL)
11920
0
        return(ctxt->errNo);
11921
11922
0
    xmlCtxtInitializeLate(ctxt);
11923
11924
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
11925
0
    if (listOut != NULL)
11926
0
        *listOut = list;
11927
0
    else
11928
0
        xmlFreeNodeList(list);
11929
11930
0
    xmlFreeInputStream(input);
11931
0
    return(ctxt->errNo);
11932
0
}
11933
11934
#ifdef LIBXML_SAX1_ENABLED
11935
/**
11936
 * Parse an external general entity
11937
 * An external general parsed entity is well-formed if it matches the
11938
 * production labeled extParsedEnt.
11939
 *
11940
 * @deprecated Use #xmlParseCtxtExternalEntity.
11941
 *
11942
 *     [78] extParsedEnt ::= TextDecl? content
11943
 *
11944
 * @param doc  the document the chunk pertains to
11945
 * @param sax  the SAX handler block (possibly NULL)
11946
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11947
 * @param depth  Used for loop detection, use 0
11948
 * @param URL  the URL for the entity to load
11949
 * @param ID  the System ID for the entity to load
11950
 * @param list  the return value for the set of parsed nodes
11951
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11952
 *    the parser error code otherwise
11953
 */
11954
11955
int
11956
xmlParseExternalEntity(xmlDoc *doc, xmlSAXHandler *sax, void *user_data,
11957
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNode **list) {
11958
    xmlParserCtxtPtr ctxt;
11959
    int ret;
11960
11961
    if (list != NULL)
11962
        *list = NULL;
11963
11964
    if (doc == NULL)
11965
        return(XML_ERR_ARGUMENT);
11966
11967
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11968
    if (ctxt == NULL)
11969
        return(XML_ERR_NO_MEMORY);
11970
11971
    ctxt->depth = depth;
11972
    ctxt->myDoc = doc;
11973
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
11974
11975
    xmlFreeParserCtxt(ctxt);
11976
    return(ret);
11977
}
11978
11979
/**
11980
 * Parse a well-balanced chunk of an XML document
11981
 * called by the parser
11982
 * The allowed sequence for the Well Balanced Chunk is the one defined by
11983
 * the content production in the XML grammar:
11984
 *
11985
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
11986
 *                       Comment)*
11987
 *
11988
 * @param doc  the document the chunk pertains to (must not be NULL)
11989
 * @param sax  the SAX handler block (possibly NULL)
11990
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11991
 * @param depth  Used for loop detection, use 0
11992
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
11993
 * @param lst  the return value for the set of parsed nodes
11994
 * @returns 0 if the chunk is well balanced, -1 in case of args problem and
11995
 *    the parser error code otherwise
11996
 */
11997
11998
int
11999
xmlParseBalancedChunkMemory(xmlDoc *doc, xmlSAXHandler *sax,
12000
     void *user_data, int depth, const xmlChar *string, xmlNode **lst) {
12001
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12002
                                                depth, string, lst, 0 );
12003
}
12004
#endif /* LIBXML_SAX1_ENABLED */
12005
12006
/**
12007
 * Parse a well-balanced chunk of XML matching the 'content' production.
12008
 *
12009
 * Namespaces in scope of `node` and entities of `node`'s document are
12010
 * recognized. When validating, the DTD of `node`'s document is used.
12011
 *
12012
 * Always consumes `input` even in error case.
12013
 *
12014
 * @since 2.14.0
12015
 *
12016
 * @param ctxt  parser context
12017
 * @param input  parser input
12018
 * @param node  target node or document
12019
 * @param hasTextDecl  whether to parse text declaration
12020
 * @returns a node list or NULL in case of error.
12021
 */
12022
xmlNode *
12023
xmlCtxtParseContent(xmlParserCtxt *ctxt, xmlParserInput *input,
12024
0
                    xmlNode *node, int hasTextDecl) {
12025
0
    xmlDocPtr doc;
12026
0
    xmlNodePtr cur, list = NULL;
12027
0
    int nsnr = 0;
12028
0
    xmlDictPtr oldDict;
12029
0
    int oldOptions, oldDictNames, oldLoadSubset;
12030
12031
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12032
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12033
0
        goto exit;
12034
0
    }
12035
12036
0
    doc = node->doc;
12037
0
    if (doc == NULL) {
12038
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12039
0
        goto exit;
12040
0
    }
12041
12042
0
    switch (node->type) {
12043
0
        case XML_ELEMENT_NODE:
12044
0
        case XML_DOCUMENT_NODE:
12045
0
        case XML_HTML_DOCUMENT_NODE:
12046
0
            break;
12047
12048
0
        case XML_ATTRIBUTE_NODE:
12049
0
        case XML_TEXT_NODE:
12050
0
        case XML_CDATA_SECTION_NODE:
12051
0
        case XML_ENTITY_REF_NODE:
12052
0
        case XML_PI_NODE:
12053
0
        case XML_COMMENT_NODE:
12054
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12055
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12056
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12057
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12058
0
                    node = cur;
12059
0
                    break;
12060
0
                }
12061
0
            }
12062
0
            break;
12063
12064
0
        default:
12065
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12066
0
            goto exit;
12067
0
    }
12068
12069
0
    xmlCtxtReset(ctxt);
12070
12071
0
    oldDict = ctxt->dict;
12072
0
    oldOptions = ctxt->options;
12073
0
    oldDictNames = ctxt->dictNames;
12074
0
    oldLoadSubset = ctxt->loadsubset;
12075
12076
    /*
12077
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12078
     */
12079
0
    if (doc->dict != NULL) {
12080
0
        ctxt->dict = doc->dict;
12081
0
    } else {
12082
0
        ctxt->options |= XML_PARSE_NODICT;
12083
0
        ctxt->dictNames = 0;
12084
0
    }
12085
12086
    /*
12087
     * Disable IDs
12088
     */
12089
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12090
0
    ctxt->options |= XML_PARSE_SKIP_IDS;
12091
12092
0
    ctxt->myDoc = doc;
12093
12094
0
#ifdef LIBXML_HTML_ENABLED
12095
0
    if (ctxt->html) {
12096
        /*
12097
         * When parsing in context, it makes no sense to add implied
12098
         * elements like html/body/etc...
12099
         */
12100
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12101
12102
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12103
0
    } else
12104
0
#endif
12105
0
    {
12106
0
        xmlCtxtInitializeLate(ctxt);
12107
12108
        /*
12109
         * initialize the SAX2 namespaces stack
12110
         */
12111
0
        cur = node;
12112
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12113
0
            xmlNsPtr ns = cur->nsDef;
12114
0
            xmlHashedString hprefix, huri;
12115
12116
0
            while (ns != NULL) {
12117
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12118
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12119
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12120
0
                    nsnr++;
12121
0
                ns = ns->next;
12122
0
            }
12123
0
            cur = cur->parent;
12124
0
        }
12125
12126
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12127
12128
0
        if (nsnr > 0)
12129
0
            xmlParserNsPop(ctxt, nsnr);
12130
0
    }
12131
12132
0
    ctxt->dict = oldDict;
12133
0
    ctxt->options = oldOptions;
12134
0
    ctxt->dictNames = oldDictNames;
12135
0
    ctxt->loadsubset = oldLoadSubset;
12136
0
    ctxt->myDoc = NULL;
12137
0
    ctxt->node = NULL;
12138
12139
0
exit:
12140
0
    xmlFreeInputStream(input);
12141
0
    return(list);
12142
0
}
12143
12144
/**
12145
 * Parse a well-balanced chunk of an XML document
12146
 * within the context (DTD, namespaces, etc ...) of the given node.
12147
 *
12148
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12149
 * the content production in the XML grammar:
12150
 *
12151
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12152
 *                       Comment)*
12153
 *
12154
 * This function assumes the encoding of `node`'s document which is
12155
 * typically not what you want. A better alternative is
12156
 * #xmlCtxtParseContent.
12157
 *
12158
 * @param node  the context node
12159
 * @param data  the input string
12160
 * @param datalen  the input string length in bytes
12161
 * @param options  a combination of xmlParserOption
12162
 * @param listOut  the return value for the set of parsed nodes
12163
 * @returns XML_ERR_OK if the chunk is well balanced, and the parser
12164
 * error code otherwise
12165
 */
12166
xmlParserErrors
12167
xmlParseInNodeContext(xmlNode *node, const char *data, int datalen,
12168
0
                      int options, xmlNode **listOut) {
12169
0
    xmlParserCtxtPtr ctxt;
12170
0
    xmlParserInputPtr input;
12171
0
    xmlDocPtr doc;
12172
0
    xmlNodePtr list;
12173
0
    xmlParserErrors ret;
12174
12175
0
    if (listOut == NULL)
12176
0
        return(XML_ERR_INTERNAL_ERROR);
12177
0
    *listOut = NULL;
12178
12179
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12180
0
        return(XML_ERR_INTERNAL_ERROR);
12181
12182
0
    doc = node->doc;
12183
0
    if (doc == NULL)
12184
0
        return(XML_ERR_INTERNAL_ERROR);
12185
12186
0
#ifdef LIBXML_HTML_ENABLED
12187
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12188
0
        ctxt = htmlNewParserCtxt();
12189
0
    }
12190
0
    else
12191
0
#endif
12192
0
        ctxt = xmlNewParserCtxt();
12193
12194
0
    if (ctxt == NULL)
12195
0
        return(XML_ERR_NO_MEMORY);
12196
12197
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12198
0
                                      (const char *) doc->encoding,
12199
0
                                      XML_INPUT_BUF_STATIC);
12200
0
    if (input == NULL) {
12201
0
        xmlFreeParserCtxt(ctxt);
12202
0
        return(XML_ERR_NO_MEMORY);
12203
0
    }
12204
12205
0
    xmlCtxtUseOptions(ctxt, options);
12206
12207
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12208
12209
0
    if (list == NULL) {
12210
0
        ret = ctxt->errNo;
12211
0
        if (ret == XML_ERR_ARGUMENT)
12212
0
            ret = XML_ERR_INTERNAL_ERROR;
12213
0
    } else {
12214
0
        ret = XML_ERR_OK;
12215
0
        *listOut = list;
12216
0
    }
12217
12218
0
    xmlFreeParserCtxt(ctxt);
12219
12220
0
    return(ret);
12221
0
}
12222
12223
#ifdef LIBXML_SAX1_ENABLED
12224
/**
12225
 * Parse a well-balanced chunk of an XML document
12226
 *
12227
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12228
 * the content production in the XML grammar:
12229
 *
12230
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12231
 *                       Comment)*
12232
 *
12233
 * In case recover is set to 1, the nodelist will not be empty even if
12234
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12235
 * some extent.
12236
 *
12237
 * @param doc  the document the chunk pertains to (must not be NULL)
12238
 * @param sax  the SAX handler block (possibly NULL)
12239
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
12240
 * @param depth  Used for loop detection, use 0
12241
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
12242
 * @param listOut  the return value for the set of parsed nodes
12243
 * @param recover  return nodes even if the data is broken (use 0)
12244
 * @returns 0 if the chunk is well balanced, or thehe parser error code
12245
 * otherwise.
12246
 */
12247
int
12248
xmlParseBalancedChunkMemoryRecover(xmlDoc *doc, xmlSAXHandler *sax,
12249
     void *user_data, int depth, const xmlChar *string, xmlNode **listOut,
12250
     int recover) {
12251
    xmlParserCtxtPtr ctxt;
12252
    xmlParserInputPtr input;
12253
    xmlNodePtr list;
12254
    int ret;
12255
12256
    if (listOut != NULL)
12257
        *listOut = NULL;
12258
12259
    if (string == NULL)
12260
        return(XML_ERR_ARGUMENT);
12261
12262
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12263
    if (ctxt == NULL)
12264
        return(XML_ERR_NO_MEMORY);
12265
12266
    xmlCtxtInitializeLate(ctxt);
12267
12268
    ctxt->depth = depth;
12269
    ctxt->myDoc = doc;
12270
    if (recover) {
12271
        ctxt->options |= XML_PARSE_RECOVER;
12272
        ctxt->recovery = 1;
12273
    }
12274
12275
    input = xmlNewStringInputStream(ctxt, string);
12276
    if (input == NULL) {
12277
        ret = ctxt->errNo;
12278
        goto error;
12279
    }
12280
12281
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12282
    if (listOut != NULL)
12283
        *listOut = list;
12284
    else
12285
        xmlFreeNodeList(list);
12286
12287
    if (!ctxt->wellFormed)
12288
        ret = ctxt->errNo;
12289
    else
12290
        ret = XML_ERR_OK;
12291
12292
error:
12293
    xmlFreeInputStream(input);
12294
    xmlFreeParserCtxt(ctxt);
12295
    return(ret);
12296
}
12297
12298
/**
12299
 * Parse an XML external entity out of context and build a tree.
12300
 * It use the given SAX function block to handle the parsing callback.
12301
 * If sax is NULL, fallback to the default DOM tree building routines.
12302
 *
12303
 * @deprecated Don't use.
12304
 *
12305
 *     [78] extParsedEnt ::= TextDecl? content
12306
 *
12307
 * This correspond to a "Well Balanced" chunk
12308
 *
12309
 * @param sax  the SAX handler block
12310
 * @param filename  the filename
12311
 * @returns the resulting document tree
12312
 */
12313
12314
xmlDoc *
12315
xmlSAXParseEntity(xmlSAXHandler *sax, const char *filename) {
12316
    xmlDocPtr ret;
12317
    xmlParserCtxtPtr ctxt;
12318
12319
    ctxt = xmlCreateFileParserCtxt(filename);
12320
    if (ctxt == NULL) {
12321
  return(NULL);
12322
    }
12323
    if (sax != NULL) {
12324
        if (sax->initialized == XML_SAX2_MAGIC) {
12325
            *ctxt->sax = *sax;
12326
        } else {
12327
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12328
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12329
        }
12330
        ctxt->userData = NULL;
12331
    }
12332
12333
    xmlParseExtParsedEnt(ctxt);
12334
12335
    if (ctxt->wellFormed) {
12336
  ret = ctxt->myDoc;
12337
    } else {
12338
        ret = NULL;
12339
        xmlFreeDoc(ctxt->myDoc);
12340
    }
12341
12342
    xmlFreeParserCtxt(ctxt);
12343
12344
    return(ret);
12345
}
12346
12347
/**
12348
 * Parse an XML external entity out of context and build a tree.
12349
 *
12350
 *     [78] extParsedEnt ::= TextDecl? content
12351
 *
12352
 * This correspond to a "Well Balanced" chunk
12353
 *
12354
 * @param filename  the filename
12355
 * @returns the resulting document tree
12356
 */
12357
12358
xmlDoc *
12359
xmlParseEntity(const char *filename) {
12360
    return(xmlSAXParseEntity(NULL, filename));
12361
}
12362
#endif /* LIBXML_SAX1_ENABLED */
12363
12364
/**
12365
 * Create a parser context for an external entity
12366
 * Automatic support for ZLIB/Compress compressed document is provided
12367
 * by default if found at compile-time.
12368
 *
12369
 * @deprecated Don't use.
12370
 *
12371
 * @param URL  the entity URL
12372
 * @param ID  the entity PUBLIC ID
12373
 * @param base  a possible base for the target URI
12374
 * @returns the new parser context or NULL
12375
 */
12376
xmlParserCtxt *
12377
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12378
0
                    const xmlChar *base) {
12379
0
    xmlParserCtxtPtr ctxt;
12380
0
    xmlParserInputPtr input;
12381
0
    xmlChar *uri = NULL;
12382
12383
0
    ctxt = xmlNewParserCtxt();
12384
0
    if (ctxt == NULL)
12385
0
  return(NULL);
12386
12387
0
    if (base != NULL) {
12388
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12389
0
            goto error;
12390
0
        if (uri != NULL)
12391
0
            URL = uri;
12392
0
    }
12393
12394
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12395
0
                            XML_RESOURCE_UNKNOWN);
12396
0
    if (input == NULL)
12397
0
        goto error;
12398
12399
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12400
0
        xmlFreeInputStream(input);
12401
0
        goto error;
12402
0
    }
12403
12404
0
    xmlFree(uri);
12405
0
    return(ctxt);
12406
12407
0
error:
12408
0
    xmlFree(uri);
12409
0
    xmlFreeParserCtxt(ctxt);
12410
0
    return(NULL);
12411
0
}
12412
12413
/************************************************************************
12414
 *                  *
12415
 *    Front ends when parsing from a file     *
12416
 *                  *
12417
 ************************************************************************/
12418
12419
/**
12420
 * Create a parser context for a file or URL content.
12421
 * Automatic support for ZLIB/Compress compressed document is provided
12422
 * by default if found at compile-time and for file accesses
12423
 *
12424
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12425
 *
12426
 * @param filename  the filename or URL
12427
 * @param options  a combination of xmlParserOption
12428
 * @returns the new parser context or NULL
12429
 */
12430
xmlParserCtxt *
12431
xmlCreateURLParserCtxt(const char *filename, int options)
12432
0
{
12433
0
    xmlParserCtxtPtr ctxt;
12434
0
    xmlParserInputPtr input;
12435
12436
0
    ctxt = xmlNewParserCtxt();
12437
0
    if (ctxt == NULL)
12438
0
  return(NULL);
12439
12440
0
    options |= XML_PARSE_UNZIP;
12441
12442
0
    xmlCtxtUseOptions(ctxt, options);
12443
12444
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12445
0
    if (input == NULL) {
12446
0
  xmlFreeParserCtxt(ctxt);
12447
0
  return(NULL);
12448
0
    }
12449
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12450
0
        xmlFreeInputStream(input);
12451
0
        xmlFreeParserCtxt(ctxt);
12452
0
        return(NULL);
12453
0
    }
12454
12455
0
    return(ctxt);
12456
0
}
12457
12458
/**
12459
 * Create a parser context for a file content.
12460
 * Automatic support for ZLIB/Compress compressed document is provided
12461
 * by default if found at compile-time.
12462
 *
12463
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12464
 *
12465
 * @param filename  the filename
12466
 * @returns the new parser context or NULL
12467
 */
12468
xmlParserCtxt *
12469
xmlCreateFileParserCtxt(const char *filename)
12470
0
{
12471
0
    return(xmlCreateURLParserCtxt(filename, 0));
12472
0
}
12473
12474
#ifdef LIBXML_SAX1_ENABLED
12475
/**
12476
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12477
 * compressed document is provided by default if found at compile-time.
12478
 * It use the given SAX function block to handle the parsing callback.
12479
 * If sax is NULL, fallback to the default DOM tree building routines.
12480
 *
12481
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12482
 *
12483
 * User data (void *) is stored within the parser context in the
12484
 * context's _private member, so it is available nearly everywhere in libxml
12485
 *
12486
 * @param sax  the SAX handler block
12487
 * @param filename  the filename
12488
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12489
 *             documents
12490
 * @param data  the userdata
12491
 * @returns the resulting document tree
12492
 */
12493
12494
xmlDoc *
12495
xmlSAXParseFileWithData(xmlSAXHandler *sax, const char *filename,
12496
                        int recovery, void *data) {
12497
    xmlDocPtr ret = NULL;
12498
    xmlParserCtxtPtr ctxt;
12499
    xmlParserInputPtr input;
12500
12501
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12502
    if (ctxt == NULL)
12503
  return(NULL);
12504
12505
    if (data != NULL)
12506
  ctxt->_private = data;
12507
12508
    if (recovery) {
12509
        ctxt->options |= XML_PARSE_RECOVER;
12510
        ctxt->recovery = 1;
12511
    }
12512
12513
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12514
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12515
    else
12516
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12517
12518
    if (input != NULL)
12519
        ret = xmlCtxtParseDocument(ctxt, input);
12520
12521
    xmlFreeParserCtxt(ctxt);
12522
    return(ret);
12523
}
12524
12525
/**
12526
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12527
 * compressed document is provided by default if found at compile-time.
12528
 * It use the given SAX function block to handle the parsing callback.
12529
 * If sax is NULL, fallback to the default DOM tree building routines.
12530
 *
12531
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12532
 *
12533
 * @param sax  the SAX handler block
12534
 * @param filename  the filename
12535
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12536
 *             documents
12537
 * @returns the resulting document tree
12538
 */
12539
12540
xmlDoc *
12541
xmlSAXParseFile(xmlSAXHandler *sax, const char *filename,
12542
                          int recovery) {
12543
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12544
}
12545
12546
/**
12547
 * Parse an XML in-memory document and build a tree.
12548
 * In the case the document is not Well Formed, a attempt to build a
12549
 * tree is tried anyway
12550
 *
12551
 * @deprecated Use #xmlReadDoc with XML_PARSE_RECOVER.
12552
 *
12553
 * @param cur  a pointer to an array of xmlChar
12554
 * @returns the resulting document tree or NULL in case of failure
12555
 */
12556
12557
xmlDoc *
12558
xmlRecoverDoc(const xmlChar *cur) {
12559
    return(xmlSAXParseDoc(NULL, cur, 1));
12560
}
12561
12562
/**
12563
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12564
 * compressed document is provided by default if found at compile-time.
12565
 *
12566
 * @deprecated Use #xmlReadFile.
12567
 *
12568
 * @param filename  the filename
12569
 * @returns the resulting document tree if the file was wellformed,
12570
 * NULL otherwise.
12571
 */
12572
12573
xmlDoc *
12574
xmlParseFile(const char *filename) {
12575
    return(xmlSAXParseFile(NULL, filename, 0));
12576
}
12577
12578
/**
12579
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12580
 * compressed document is provided by default if found at compile-time.
12581
 * In the case the document is not Well Formed, it attempts to build
12582
 * a tree anyway
12583
 *
12584
 * @deprecated Use #xmlReadFile with XML_PARSE_RECOVER.
12585
 *
12586
 * @param filename  the filename
12587
 * @returns the resulting document tree or NULL in case of failure
12588
 */
12589
12590
xmlDoc *
12591
xmlRecoverFile(const char *filename) {
12592
    return(xmlSAXParseFile(NULL, filename, 1));
12593
}
12594
12595
12596
/**
12597
 * Setup the parser context to parse a new buffer; Clears any prior
12598
 * contents from the parser context. The buffer parameter must not be
12599
 * NULL, but the filename parameter can be
12600
 *
12601
 * @deprecated Don't use.
12602
 *
12603
 * @param ctxt  an XML parser context
12604
 * @param buffer  a xmlChar * buffer
12605
 * @param filename  a file name
12606
 */
12607
void
12608
xmlSetupParserForBuffer(xmlParserCtxt *ctxt, const xmlChar* buffer,
12609
                             const char* filename)
12610
{
12611
    xmlParserInputPtr input;
12612
12613
    if ((ctxt == NULL) || (buffer == NULL))
12614
        return;
12615
12616
    xmlCtxtReset(ctxt);
12617
12618
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12619
                                      NULL, 0);
12620
    if (input == NULL)
12621
        return;
12622
    if (xmlCtxtPushInput(ctxt, input) < 0)
12623
        xmlFreeInputStream(input);
12624
}
12625
12626
/**
12627
 * Parse an XML file and call the given SAX handler routines.
12628
 * Automatic support for ZLIB/Compress compressed document is provided
12629
 *
12630
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12631
 *
12632
 * @param sax  a SAX handler
12633
 * @param user_data  The user data returned on SAX callbacks
12634
 * @param filename  a file name
12635
 * @returns 0 in case of success or a error number otherwise
12636
 */
12637
int
12638
xmlSAXUserParseFile(xmlSAXHandler *sax, void *user_data,
12639
                    const char *filename) {
12640
    int ret = 0;
12641
    xmlParserCtxtPtr ctxt;
12642
12643
    ctxt = xmlCreateFileParserCtxt(filename);
12644
    if (ctxt == NULL) return -1;
12645
    if (sax != NULL) {
12646
        if (sax->initialized == XML_SAX2_MAGIC) {
12647
            *ctxt->sax = *sax;
12648
        } else {
12649
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12650
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12651
        }
12652
  ctxt->userData = user_data;
12653
    }
12654
12655
    xmlParseDocument(ctxt);
12656
12657
    if (ctxt->wellFormed)
12658
  ret = 0;
12659
    else {
12660
        if (ctxt->errNo != 0)
12661
      ret = ctxt->errNo;
12662
  else
12663
      ret = -1;
12664
    }
12665
    if (ctxt->myDoc != NULL) {
12666
        xmlFreeDoc(ctxt->myDoc);
12667
  ctxt->myDoc = NULL;
12668
    }
12669
    xmlFreeParserCtxt(ctxt);
12670
12671
    return ret;
12672
}
12673
#endif /* LIBXML_SAX1_ENABLED */
12674
12675
/************************************************************************
12676
 *                  *
12677
 *    Front ends when parsing from memory     *
12678
 *                  *
12679
 ************************************************************************/
12680
12681
/**
12682
 * Create a parser context for an XML in-memory document. The input buffer
12683
 * must not contain a terminating null byte.
12684
 *
12685
 * @param buffer  a pointer to a char array
12686
 * @param size  the size of the array
12687
 * @returns the new parser context or NULL
12688
 */
12689
xmlParserCtxt *
12690
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12691
0
    xmlParserCtxtPtr ctxt;
12692
0
    xmlParserInputPtr input;
12693
12694
0
    if (size < 0)
12695
0
  return(NULL);
12696
12697
0
    ctxt = xmlNewParserCtxt();
12698
0
    if (ctxt == NULL)
12699
0
  return(NULL);
12700
12701
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12702
0
    if (input == NULL) {
12703
0
  xmlFreeParserCtxt(ctxt);
12704
0
  return(NULL);
12705
0
    }
12706
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12707
0
        xmlFreeInputStream(input);
12708
0
        xmlFreeParserCtxt(ctxt);
12709
0
        return(NULL);
12710
0
    }
12711
12712
0
    return(ctxt);
12713
0
}
12714
12715
#ifdef LIBXML_SAX1_ENABLED
12716
/**
12717
 * Parse an XML in-memory block and use the given SAX function block
12718
 * to handle the parsing callback. If sax is NULL, fallback to the default
12719
 * DOM tree building routines.
12720
 *
12721
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12722
 *
12723
 * User data (void *) is stored within the parser context in the
12724
 * context's _private member, so it is available nearly everywhere in libxml
12725
 *
12726
 * @param sax  the SAX handler block
12727
 * @param buffer  an pointer to a char array
12728
 * @param size  the size of the array
12729
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12730
 *             documents
12731
 * @param data  the userdata
12732
 * @returns the resulting document tree
12733
 */
12734
12735
xmlDoc *
12736
xmlSAXParseMemoryWithData(xmlSAXHandler *sax, const char *buffer,
12737
                          int size, int recovery, void *data) {
12738
    xmlDocPtr ret = NULL;
12739
    xmlParserCtxtPtr ctxt;
12740
    xmlParserInputPtr input;
12741
12742
    if (size < 0)
12743
        return(NULL);
12744
12745
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12746
    if (ctxt == NULL)
12747
        return(NULL);
12748
12749
    if (data != NULL)
12750
  ctxt->_private=data;
12751
12752
    if (recovery) {
12753
        ctxt->options |= XML_PARSE_RECOVER;
12754
        ctxt->recovery = 1;
12755
    }
12756
12757
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12758
                                      XML_INPUT_BUF_STATIC);
12759
12760
    if (input != NULL)
12761
        ret = xmlCtxtParseDocument(ctxt, input);
12762
12763
    xmlFreeParserCtxt(ctxt);
12764
    return(ret);
12765
}
12766
12767
/**
12768
 * Parse an XML in-memory block and use the given SAX function block
12769
 * to handle the parsing callback. If sax is NULL, fallback to the default
12770
 * DOM tree building routines.
12771
 *
12772
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12773
 *
12774
 * @param sax  the SAX handler block
12775
 * @param buffer  an pointer to a char array
12776
 * @param size  the size of the array
12777
 * @param recovery  work in recovery mode, i.e. tries to read not Well Formed
12778
 *             documents
12779
 * @returns the resulting document tree
12780
 */
12781
xmlDoc *
12782
xmlSAXParseMemory(xmlSAXHandler *sax, const char *buffer,
12783
            int size, int recovery) {
12784
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12785
}
12786
12787
/**
12788
 * Parse an XML in-memory block and build a tree.
12789
 *
12790
 * @deprecated Use #xmlReadMemory.
12791
 *
12792
 * @param buffer  an pointer to a char array
12793
 * @param size  the size of the array
12794
 * @returns the resulting document tree
12795
 */
12796
12797
xmlDoc *xmlParseMemory(const char *buffer, int size) {
12798
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
12799
}
12800
12801
/**
12802
 * Parse an XML in-memory block and build a tree.
12803
 * In the case the document is not Well Formed, an attempt to
12804
 * build a tree is tried anyway
12805
 *
12806
 * @deprecated Use #xmlReadMemory with XML_PARSE_RECOVER.
12807
 *
12808
 * @param buffer  an pointer to a char array
12809
 * @param size  the size of the array
12810
 * @returns the resulting document tree or NULL in case of error
12811
 */
12812
12813
xmlDoc *xmlRecoverMemory(const char *buffer, int size) {
12814
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
12815
}
12816
12817
/**
12818
 * Parse an XML in-memory buffer and call the given SAX handler routines.
12819
 *
12820
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12821
 *
12822
 * @param sax  a SAX handler
12823
 * @param user_data  The user data returned on SAX callbacks
12824
 * @param buffer  an in-memory XML document input
12825
 * @param size  the length of the XML document in bytes
12826
 * @returns 0 in case of success or a error number otherwise
12827
 */
12828
int xmlSAXUserParseMemory(xmlSAXHandler *sax, void *user_data,
12829
        const char *buffer, int size) {
12830
    int ret = 0;
12831
    xmlParserCtxtPtr ctxt;
12832
12833
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12834
    if (ctxt == NULL) return -1;
12835
    if (sax != NULL) {
12836
        if (sax->initialized == XML_SAX2_MAGIC) {
12837
            *ctxt->sax = *sax;
12838
        } else {
12839
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12840
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12841
        }
12842
  ctxt->userData = user_data;
12843
    }
12844
12845
    xmlParseDocument(ctxt);
12846
12847
    if (ctxt->wellFormed)
12848
  ret = 0;
12849
    else {
12850
        if (ctxt->errNo != 0)
12851
      ret = ctxt->errNo;
12852
  else
12853
      ret = -1;
12854
    }
12855
    if (ctxt->myDoc != NULL) {
12856
        xmlFreeDoc(ctxt->myDoc);
12857
  ctxt->myDoc = NULL;
12858
    }
12859
    xmlFreeParserCtxt(ctxt);
12860
12861
    return ret;
12862
}
12863
#endif /* LIBXML_SAX1_ENABLED */
12864
12865
/**
12866
 * Creates a parser context for an XML in-memory document.
12867
 *
12868
 * @param str  a pointer to an array of xmlChar
12869
 * @returns the new parser context or NULL
12870
 */
12871
xmlParserCtxt *
12872
0
xmlCreateDocParserCtxt(const xmlChar *str) {
12873
0
    xmlParserCtxtPtr ctxt;
12874
0
    xmlParserInputPtr input;
12875
12876
0
    ctxt = xmlNewParserCtxt();
12877
0
    if (ctxt == NULL)
12878
0
  return(NULL);
12879
12880
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
12881
0
    if (input == NULL) {
12882
0
  xmlFreeParserCtxt(ctxt);
12883
0
  return(NULL);
12884
0
    }
12885
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12886
0
        xmlFreeInputStream(input);
12887
0
        xmlFreeParserCtxt(ctxt);
12888
0
        return(NULL);
12889
0
    }
12890
12891
0
    return(ctxt);
12892
0
}
12893
12894
#ifdef LIBXML_SAX1_ENABLED
12895
/**
12896
 * Parse an XML in-memory document and build a tree.
12897
 * It use the given SAX function block to handle the parsing callback.
12898
 * If sax is NULL, fallback to the default DOM tree building routines.
12899
 *
12900
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadDoc.
12901
 *
12902
 * @param sax  the SAX handler block
12903
 * @param cur  a pointer to an array of xmlChar
12904
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12905
 *             documents
12906
 * @returns the resulting document tree
12907
 */
12908
12909
xmlDoc *
12910
xmlSAXParseDoc(xmlSAXHandler *sax, const xmlChar *cur, int recovery) {
12911
    xmlDocPtr ret;
12912
    xmlParserCtxtPtr ctxt;
12913
    xmlSAXHandlerPtr oldsax = NULL;
12914
12915
    if (cur == NULL) return(NULL);
12916
12917
12918
    ctxt = xmlCreateDocParserCtxt(cur);
12919
    if (ctxt == NULL) return(NULL);
12920
    if (sax != NULL) {
12921
        oldsax = ctxt->sax;
12922
        ctxt->sax = sax;
12923
        ctxt->userData = NULL;
12924
    }
12925
12926
    xmlParseDocument(ctxt);
12927
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12928
    else {
12929
       ret = NULL;
12930
       xmlFreeDoc(ctxt->myDoc);
12931
       ctxt->myDoc = NULL;
12932
    }
12933
    if (sax != NULL)
12934
  ctxt->sax = oldsax;
12935
    xmlFreeParserCtxt(ctxt);
12936
12937
    return(ret);
12938
}
12939
12940
/**
12941
 * Parse an XML in-memory document and build a tree.
12942
 *
12943
 * @deprecated Use #xmlReadDoc.
12944
 *
12945
 * @param cur  a pointer to an array of xmlChar
12946
 * @returns the resulting document tree
12947
 */
12948
12949
xmlDoc *
12950
xmlParseDoc(const xmlChar *cur) {
12951
    return(xmlSAXParseDoc(NULL, cur, 0));
12952
}
12953
#endif /* LIBXML_SAX1_ENABLED */
12954
12955
/************************************************************************
12956
 *                  *
12957
 *  New set (2.6.0) of simpler and more flexible APIs   *
12958
 *                  *
12959
 ************************************************************************/
12960
12961
/**
12962
 * Reset a parser context
12963
 *
12964
 * @param ctxt  an XML parser context
12965
 */
12966
void
12967
xmlCtxtReset(xmlParserCtxt *ctxt)
12968
0
{
12969
0
    xmlParserInputPtr input;
12970
12971
0
    if (ctxt == NULL)
12972
0
        return;
12973
12974
0
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
12975
0
        xmlFreeInputStream(input);
12976
0
    }
12977
0
    ctxt->inputNr = 0;
12978
0
    ctxt->input = NULL;
12979
12980
0
    ctxt->spaceNr = 0;
12981
0
    if (ctxt->spaceTab != NULL) {
12982
0
  ctxt->spaceTab[0] = -1;
12983
0
  ctxt->space = &ctxt->spaceTab[0];
12984
0
    } else {
12985
0
        ctxt->space = NULL;
12986
0
    }
12987
12988
12989
0
    ctxt->nodeNr = 0;
12990
0
    ctxt->node = NULL;
12991
12992
0
    ctxt->nameNr = 0;
12993
0
    ctxt->name = NULL;
12994
12995
0
    ctxt->nsNr = 0;
12996
0
    xmlParserNsReset(ctxt->nsdb);
12997
12998
0
    if (ctxt->version != NULL) {
12999
0
        xmlFree(ctxt->version);
13000
0
        ctxt->version = NULL;
13001
0
    }
13002
0
    if (ctxt->encoding != NULL) {
13003
0
        xmlFree(ctxt->encoding);
13004
0
        ctxt->encoding = NULL;
13005
0
    }
13006
0
    if (ctxt->extSubURI != NULL) {
13007
0
        xmlFree(ctxt->extSubURI);
13008
0
        ctxt->extSubURI = NULL;
13009
0
    }
13010
0
    if (ctxt->extSubSystem != NULL) {
13011
0
        xmlFree(ctxt->extSubSystem);
13012
0
        ctxt->extSubSystem = NULL;
13013
0
    }
13014
0
    if (ctxt->directory != NULL) {
13015
0
        xmlFree(ctxt->directory);
13016
0
        ctxt->directory = NULL;
13017
0
    }
13018
13019
0
    if (ctxt->myDoc != NULL)
13020
0
        xmlFreeDoc(ctxt->myDoc);
13021
0
    ctxt->myDoc = NULL;
13022
13023
0
    ctxt->standalone = -1;
13024
0
    ctxt->hasExternalSubset = 0;
13025
0
    ctxt->hasPErefs = 0;
13026
0
    ctxt->html = ctxt->html ? 1 : 0;
13027
0
    ctxt->instate = XML_PARSER_START;
13028
13029
0
    ctxt->wellFormed = 1;
13030
0
    ctxt->nsWellFormed = 1;
13031
0
    ctxt->disableSAX = 0;
13032
0
    ctxt->valid = 1;
13033
0
    ctxt->record_info = 0;
13034
0
    ctxt->checkIndex = 0;
13035
0
    ctxt->endCheckState = 0;
13036
0
    ctxt->inSubset = 0;
13037
0
    ctxt->errNo = XML_ERR_OK;
13038
0
    ctxt->depth = 0;
13039
0
    ctxt->catalogs = NULL;
13040
0
    ctxt->sizeentities = 0;
13041
0
    ctxt->sizeentcopy = 0;
13042
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13043
13044
0
    if (ctxt->attsDefault != NULL) {
13045
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13046
0
        ctxt->attsDefault = NULL;
13047
0
    }
13048
0
    if (ctxt->attsSpecial != NULL) {
13049
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13050
0
        ctxt->attsSpecial = NULL;
13051
0
    }
13052
13053
0
#ifdef LIBXML_CATALOG_ENABLED
13054
0
    if (ctxt->catalogs != NULL)
13055
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13056
0
#endif
13057
0
    ctxt->nbErrors = 0;
13058
0
    ctxt->nbWarnings = 0;
13059
0
    if (ctxt->lastError.code != XML_ERR_OK)
13060
0
        xmlResetError(&ctxt->lastError);
13061
0
}
13062
13063
/**
13064
 * Reset a push parser context
13065
 *
13066
 * @param ctxt  an XML parser context
13067
 * @param chunk  a pointer to an array of chars
13068
 * @param size  number of chars in the array
13069
 * @param filename  an optional file name or URI
13070
 * @param encoding  the document encoding, or NULL
13071
 * @returns 0 in case of success and 1 in case of error
13072
 */
13073
int
13074
xmlCtxtResetPush(xmlParserCtxt *ctxt, const char *chunk,
13075
                 int size, const char *filename, const char *encoding)
13076
0
{
13077
0
    xmlParserInputPtr input;
13078
13079
0
    if (ctxt == NULL)
13080
0
        return(1);
13081
13082
0
    xmlCtxtReset(ctxt);
13083
13084
0
    input = xmlNewPushInput(filename, chunk, size);
13085
0
    if (input == NULL)
13086
0
        return(1);
13087
13088
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13089
0
        xmlFreeInputStream(input);
13090
0
        return(1);
13091
0
    }
13092
13093
0
    if (encoding != NULL)
13094
0
        xmlSwitchEncodingName(ctxt, encoding);
13095
13096
0
    return(0);
13097
0
}
13098
13099
static int
13100
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13101
1.00M
{
13102
1.00M
    int allMask;
13103
13104
1.00M
    if (ctxt == NULL)
13105
0
        return(-1);
13106
13107
    /*
13108
     * XInclude options aren't handled by the parser.
13109
     *
13110
     * XML_PARSE_XINCLUDE
13111
     * XML_PARSE_NOXINCNODE
13112
     * XML_PARSE_NOBASEFIX
13113
     */
13114
1.00M
    allMask = XML_PARSE_RECOVER |
13115
1.00M
              XML_PARSE_NOENT |
13116
1.00M
              XML_PARSE_DTDLOAD |
13117
1.00M
              XML_PARSE_DTDATTR |
13118
1.00M
              XML_PARSE_DTDVALID |
13119
1.00M
              XML_PARSE_NOERROR |
13120
1.00M
              XML_PARSE_NOWARNING |
13121
1.00M
              XML_PARSE_PEDANTIC |
13122
1.00M
              XML_PARSE_NOBLANKS |
13123
#ifdef LIBXML_SAX1_ENABLED
13124
              XML_PARSE_SAX1 |
13125
#endif
13126
1.00M
              XML_PARSE_NONET |
13127
1.00M
              XML_PARSE_NODICT |
13128
1.00M
              XML_PARSE_NSCLEAN |
13129
1.00M
              XML_PARSE_NOCDATA |
13130
1.00M
              XML_PARSE_COMPACT |
13131
1.00M
              XML_PARSE_OLD10 |
13132
1.00M
              XML_PARSE_HUGE |
13133
1.00M
              XML_PARSE_OLDSAX |
13134
1.00M
              XML_PARSE_IGNORE_ENC |
13135
1.00M
              XML_PARSE_BIG_LINES |
13136
1.00M
              XML_PARSE_NO_XXE |
13137
1.00M
              XML_PARSE_UNZIP |
13138
1.00M
              XML_PARSE_NO_SYS_CATALOG |
13139
1.00M
              XML_PARSE_CATALOG_PI;
13140
13141
1.00M
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13142
13143
    /*
13144
     * For some options, struct members are historically the source
13145
     * of truth. The values are initalized from global variables and
13146
     * old code could also modify them directly. Several older API
13147
     * functions that don't take an options argument rely on these
13148
     * deprecated mechanisms.
13149
     *
13150
     * Once public access to struct members and the globals are
13151
     * disabled, we can use the options bitmask as source of
13152
     * truth, making all these struct members obsolete.
13153
     *
13154
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13155
     * loading of the external subset.
13156
     */
13157
1.00M
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13158
1.00M
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13159
1.00M
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13160
1.00M
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13161
1.00M
    ctxt->loadsubset |= (options & XML_PARSE_SKIP_IDS) ? XML_SKIP_IDS : 0;
13162
1.00M
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13163
1.00M
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13164
1.00M
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13165
1.00M
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13166
13167
1.00M
    return(options & ~allMask);
13168
1.00M
}
13169
13170
/**
13171
 * Applies the options to the parser context. Unset options are
13172
 * cleared.
13173
 *
13174
 * @since 2.13.0
13175
 *
13176
 * With older versions, you can use #xmlCtxtUseOptions.
13177
 *
13178
 * @param ctxt  an XML parser context
13179
 * @param options  a bitmask of xmlParserOption values
13180
 * @returns 0 in case of success, the set of unknown or unimplemented options
13181
 *         in case of error.
13182
 */
13183
int
13184
xmlCtxtSetOptions(xmlParserCtxt *ctxt, int options)
13185
0
{
13186
0
#ifdef LIBXML_HTML_ENABLED
13187
0
    if ((ctxt != NULL) && (ctxt->html))
13188
0
        return(htmlCtxtSetOptions(ctxt, options));
13189
0
#endif
13190
13191
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13192
0
}
13193
13194
/**
13195
 * Get the current options of the parser context.
13196
 *
13197
 * @since 2.14.0
13198
 *
13199
 * @param ctxt  an XML parser context
13200
 * @returns the current options set in the parser context, or -1 if ctxt is NULL.
13201
 */
13202
int
13203
xmlCtxtGetOptions(xmlParserCtxt *ctxt)
13204
0
{
13205
0
    if (ctxt == NULL)
13206
0
        return(-1);
13207
13208
0
    return(ctxt->options);
13209
0
}
13210
13211
/**
13212
 * Applies the options to the parser context. The following options
13213
 * are never cleared and can only be enabled:
13214
 *
13215
 * - XML_PARSE_NOERROR
13216
 * - XML_PARSE_NOWARNING
13217
 * - XML_PARSE_NONET
13218
 * - XML_PARSE_NSCLEAN
13219
 * - XML_PARSE_NOCDATA
13220
 * - XML_PARSE_COMPACT
13221
 * - XML_PARSE_OLD10
13222
 * - XML_PARSE_HUGE
13223
 * - XML_PARSE_OLDSAX
13224
 * - XML_PARSE_IGNORE_ENC
13225
 * - XML_PARSE_BIG_LINES
13226
 *
13227
 * @deprecated Use #xmlCtxtSetOptions.
13228
 *
13229
 * @param ctxt  an XML parser context
13230
 * @param options  a combination of xmlParserOption
13231
 * @returns 0 in case of success, the set of unknown or unimplemented options
13232
 *         in case of error.
13233
 */
13234
int
13235
xmlCtxtUseOptions(xmlParserCtxt *ctxt, int options)
13236
1.00M
{
13237
1.00M
    int keepMask;
13238
13239
1.00M
#ifdef LIBXML_HTML_ENABLED
13240
1.00M
    if ((ctxt != NULL) && (ctxt->html))
13241
0
        return(htmlCtxtUseOptions(ctxt, options));
13242
1.00M
#endif
13243
13244
    /*
13245
     * For historic reasons, some options can only be enabled.
13246
     */
13247
1.00M
    keepMask = XML_PARSE_NOERROR |
13248
1.00M
               XML_PARSE_NOWARNING |
13249
1.00M
               XML_PARSE_NONET |
13250
1.00M
               XML_PARSE_NSCLEAN |
13251
1.00M
               XML_PARSE_NOCDATA |
13252
1.00M
               XML_PARSE_COMPACT |
13253
1.00M
               XML_PARSE_OLD10 |
13254
1.00M
               XML_PARSE_HUGE |
13255
1.00M
               XML_PARSE_OLDSAX |
13256
1.00M
               XML_PARSE_IGNORE_ENC |
13257
1.00M
               XML_PARSE_BIG_LINES;
13258
13259
1.00M
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13260
1.00M
}
13261
13262
/**
13263
 * To protect against exponential entity expansion ("billion laughs"), the
13264
 * size of serialized output is (roughly) limited to the input size
13265
 * multiplied by this factor. The default value is 5.
13266
 *
13267
 * When working with documents making heavy use of entity expansion, it can
13268
 * be necessary to increase the value. For security reasons, this should only
13269
 * be considered when processing trusted input.
13270
 *
13271
 * @param ctxt  an XML parser context
13272
 * @param maxAmpl  maximum amplification factor
13273
 */
13274
void
13275
xmlCtxtSetMaxAmplification(xmlParserCtxt *ctxt, unsigned maxAmpl)
13276
0
{
13277
0
    if (ctxt == NULL)
13278
0
        return;
13279
0
    ctxt->maxAmpl = maxAmpl;
13280
0
}
13281
13282
/**
13283
 * Parse an XML document and return the resulting document tree.
13284
 * Takes ownership of the input object.
13285
 *
13286
 * @since 2.13.0
13287
 *
13288
 * @param ctxt  an XML parser context
13289
 * @param input  parser input
13290
 * @returns the resulting document tree or NULL
13291
 */
13292
xmlDoc *
13293
xmlCtxtParseDocument(xmlParserCtxt *ctxt, xmlParserInput *input)
13294
836k
{
13295
836k
    xmlDocPtr ret = NULL;
13296
13297
836k
    if ((ctxt == NULL) || (input == NULL)) {
13298
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13299
0
        xmlFreeInputStream(input);
13300
0
        return(NULL);
13301
0
    }
13302
13303
    /* assert(ctxt->inputNr == 0); */
13304
836k
    while (ctxt->inputNr > 0)
13305
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13306
13307
836k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13308
0
        xmlFreeInputStream(input);
13309
0
        return(NULL);
13310
0
    }
13311
13312
836k
    xmlParseDocument(ctxt);
13313
13314
836k
    ret = xmlCtxtGetDocument(ctxt);
13315
13316
    /* assert(ctxt->inputNr == 1); */
13317
1.67M
    while (ctxt->inputNr > 0)
13318
839k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13319
13320
836k
    return(ret);
13321
836k
}
13322
13323
/**
13324
 * Convenience function to parse an XML document from a
13325
 * zero-terminated string.
13326
 *
13327
 * See #xmlCtxtReadDoc for details.
13328
 *
13329
 * @param cur  a pointer to a zero terminated string
13330
 * @param URL  base URL (optional)
13331
 * @param encoding  the document encoding (optional)
13332
 * @param options  a combination of xmlParserOption
13333
 * @returns the resulting document tree
13334
 */
13335
xmlDoc *
13336
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13337
           int options)
13338
0
{
13339
0
    xmlParserCtxtPtr ctxt;
13340
0
    xmlParserInputPtr input;
13341
0
    xmlDocPtr doc = NULL;
13342
13343
0
    ctxt = xmlNewParserCtxt();
13344
0
    if (ctxt == NULL)
13345
0
        return(NULL);
13346
13347
0
    xmlCtxtUseOptions(ctxt, options);
13348
13349
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13350
0
                                      XML_INPUT_BUF_STATIC);
13351
13352
0
    if (input != NULL)
13353
0
        doc = xmlCtxtParseDocument(ctxt, input);
13354
13355
0
    xmlFreeParserCtxt(ctxt);
13356
0
    return(doc);
13357
0
}
13358
13359
/**
13360
 * Convenience function to parse an XML file from the filesystem
13361
 * or a global, user-defined resource loader.
13362
 *
13363
 * This function always enables the XML_PARSE_UNZIP option for
13364
 * backward compatibility. If a "-" filename is passed, it will
13365
 * read from stdin. Both of these features are potentially
13366
 * insecure and might be removed from later versions.
13367
 *
13368
 * See #xmlCtxtReadFile for details.
13369
 *
13370
 * @param filename  a file or URL
13371
 * @param encoding  the document encoding (optional)
13372
 * @param options  a combination of xmlParserOption
13373
 * @returns the resulting document tree
13374
 */
13375
xmlDoc *
13376
xmlReadFile(const char *filename, const char *encoding, int options)
13377
0
{
13378
0
    xmlParserCtxtPtr ctxt;
13379
0
    xmlParserInputPtr input;
13380
0
    xmlDocPtr doc = NULL;
13381
13382
0
    ctxt = xmlNewParserCtxt();
13383
0
    if (ctxt == NULL)
13384
0
        return(NULL);
13385
13386
0
    options |= XML_PARSE_UNZIP;
13387
13388
0
    xmlCtxtUseOptions(ctxt, options);
13389
13390
    /*
13391
     * Backward compatibility for users of command line utilities like
13392
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13393
     * should be removed at some point.
13394
     */
13395
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13396
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13397
0
                                      encoding, 0);
13398
0
    else
13399
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13400
13401
0
    if (input != NULL)
13402
0
        doc = xmlCtxtParseDocument(ctxt, input);
13403
13404
0
    xmlFreeParserCtxt(ctxt);
13405
0
    return(doc);
13406
0
}
13407
13408
/**
13409
 * Parse an XML in-memory document and build a tree. The input buffer must
13410
 * not contain a terminating null byte.
13411
 *
13412
 * See #xmlCtxtReadMemory for details.
13413
 *
13414
 * @param buffer  a pointer to a char array
13415
 * @param size  the size of the array
13416
 * @param url  base URL (optional)
13417
 * @param encoding  the document encoding (optional)
13418
 * @param options  a combination of xmlParserOption
13419
 * @returns the resulting document tree
13420
 */
13421
xmlDoc *
13422
xmlReadMemory(const char *buffer, int size, const char *url,
13423
              const char *encoding, int options)
13424
66.7k
{
13425
66.7k
    xmlParserCtxtPtr ctxt;
13426
66.7k
    xmlParserInputPtr input;
13427
66.7k
    xmlDocPtr doc = NULL;
13428
13429
66.7k
    if (size < 0)
13430
0
  return(NULL);
13431
13432
66.7k
    ctxt = xmlNewParserCtxt();
13433
66.7k
    if (ctxt == NULL)
13434
0
        return(NULL);
13435
13436
66.7k
    xmlCtxtUseOptions(ctxt, options);
13437
13438
66.7k
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13439
66.7k
                                      XML_INPUT_BUF_STATIC);
13440
13441
66.7k
    if (input != NULL)
13442
66.7k
        doc = xmlCtxtParseDocument(ctxt, input);
13443
13444
66.7k
    xmlFreeParserCtxt(ctxt);
13445
66.7k
    return(doc);
13446
66.7k
}
13447
13448
/**
13449
 * Parse an XML from a file descriptor and build a tree.
13450
 *
13451
 * See #xmlCtxtReadFd for details.
13452
 *
13453
 * NOTE that the file descriptor will not be closed when the
13454
 * context is freed or reset.
13455
 *
13456
 * @param fd  an open file descriptor
13457
 * @param URL  base URL (optional)
13458
 * @param encoding  the document encoding (optional)
13459
 * @param options  a combination of xmlParserOption
13460
 * @returns the resulting document tree
13461
 */
13462
xmlDoc *
13463
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13464
0
{
13465
0
    xmlParserCtxtPtr ctxt;
13466
0
    xmlParserInputPtr input;
13467
0
    xmlDocPtr doc = NULL;
13468
13469
0
    ctxt = xmlNewParserCtxt();
13470
0
    if (ctxt == NULL)
13471
0
        return(NULL);
13472
13473
0
    xmlCtxtUseOptions(ctxt, options);
13474
13475
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13476
13477
0
    if (input != NULL)
13478
0
        doc = xmlCtxtParseDocument(ctxt, input);
13479
13480
0
    xmlFreeParserCtxt(ctxt);
13481
0
    return(doc);
13482
0
}
13483
13484
/**
13485
 * Parse an XML document from I/O functions and context and build a tree.
13486
 *
13487
 * See #xmlCtxtReadIO for details.
13488
 *
13489
 * @param ioread  an I/O read function
13490
 * @param ioclose  an I/O close function (optional)
13491
 * @param ioctx  an I/O handler
13492
 * @param URL  base URL (optional)
13493
 * @param encoding  the document encoding (optional)
13494
 * @param options  a combination of xmlParserOption
13495
 * @returns the resulting document tree
13496
 */
13497
xmlDoc *
13498
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13499
          void *ioctx, const char *URL, const char *encoding, int options)
13500
0
{
13501
0
    xmlParserCtxtPtr ctxt;
13502
0
    xmlParserInputPtr input;
13503
0
    xmlDocPtr doc = NULL;
13504
13505
0
    ctxt = xmlNewParserCtxt();
13506
0
    if (ctxt == NULL)
13507
0
        return(NULL);
13508
13509
0
    xmlCtxtUseOptions(ctxt, options);
13510
13511
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13512
0
                                  encoding, 0);
13513
13514
0
    if (input != NULL)
13515
0
        doc = xmlCtxtParseDocument(ctxt, input);
13516
13517
0
    xmlFreeParserCtxt(ctxt);
13518
0
    return(doc);
13519
0
}
13520
13521
/**
13522
 * Parse an XML in-memory document and build a tree.
13523
 *
13524
 * `URL` is used as base to resolve external entities and for error
13525
 * reporting.
13526
 *
13527
 * @param ctxt  an XML parser context
13528
 * @param str  a pointer to a zero terminated string
13529
 * @param URL  base URL (optional)
13530
 * @param encoding  the document encoding (optional)
13531
 * @param options  a combination of xmlParserOption
13532
 * @returns the resulting document tree
13533
 */
13534
xmlDoc *
13535
xmlCtxtReadDoc(xmlParserCtxt *ctxt, const xmlChar *str,
13536
               const char *URL, const char *encoding, int options)
13537
0
{
13538
0
    xmlParserInputPtr input;
13539
13540
0
    if (ctxt == NULL)
13541
0
        return(NULL);
13542
13543
0
    xmlCtxtReset(ctxt);
13544
0
    xmlCtxtUseOptions(ctxt, options);
13545
13546
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13547
0
                                      XML_INPUT_BUF_STATIC);
13548
0
    if (input == NULL)
13549
0
        return(NULL);
13550
13551
0
    return(xmlCtxtParseDocument(ctxt, input));
13552
0
}
13553
13554
/**
13555
 * Parse an XML file from the filesystem or a global, user-defined
13556
 * resource loader.
13557
 *
13558
 * This function always enables the XML_PARSE_UNZIP option for
13559
 * backward compatibility. This feature is potentially insecure
13560
 * and might be removed from later versions.
13561
 *
13562
 * @param ctxt  an XML parser context
13563
 * @param filename  a file or URL
13564
 * @param encoding  the document encoding (optional)
13565
 * @param options  a combination of xmlParserOption
13566
 * @returns the resulting document tree
13567
 */
13568
xmlDoc *
13569
xmlCtxtReadFile(xmlParserCtxt *ctxt, const char *filename,
13570
                const char *encoding, int options)
13571
0
{
13572
0
    xmlParserInputPtr input;
13573
13574
0
    if (ctxt == NULL)
13575
0
        return(NULL);
13576
13577
0
    options |= XML_PARSE_UNZIP;
13578
13579
0
    xmlCtxtReset(ctxt);
13580
0
    xmlCtxtUseOptions(ctxt, options);
13581
13582
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13583
0
    if (input == NULL)
13584
0
        return(NULL);
13585
13586
0
    return(xmlCtxtParseDocument(ctxt, input));
13587
0
}
13588
13589
/**
13590
 * Parse an XML in-memory document and build a tree. The input buffer must
13591
 * not contain a terminating null byte.
13592
 *
13593
 * `URL` is used as base to resolve external entities and for error
13594
 * reporting.
13595
 *
13596
 * @param ctxt  an XML parser context
13597
 * @param buffer  a pointer to a char array
13598
 * @param size  the size of the array
13599
 * @param URL  base URL (optional)
13600
 * @param encoding  the document encoding (optional)
13601
 * @param options  a combination of xmlParserOption
13602
 * @returns the resulting document tree
13603
 */
13604
xmlDoc *
13605
xmlCtxtReadMemory(xmlParserCtxt *ctxt, const char *buffer, int size,
13606
                  const char *URL, const char *encoding, int options)
13607
0
{
13608
0
    xmlParserInputPtr input;
13609
13610
0
    if ((ctxt == NULL) || (size < 0))
13611
0
        return(NULL);
13612
13613
0
    xmlCtxtReset(ctxt);
13614
0
    xmlCtxtUseOptions(ctxt, options);
13615
13616
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
13617
0
                                      XML_INPUT_BUF_STATIC);
13618
0
    if (input == NULL)
13619
0
        return(NULL);
13620
13621
0
    return(xmlCtxtParseDocument(ctxt, input));
13622
0
}
13623
13624
/**
13625
 * Parse an XML document from a file descriptor and build a tree.
13626
 *
13627
 * NOTE that the file descriptor will not be closed when the
13628
 * context is freed or reset.
13629
 *
13630
 * `URL` is used as base to resolve external entities and for error
13631
 * reporting.
13632
 *
13633
 * @param ctxt  an XML parser context
13634
 * @param fd  an open file descriptor
13635
 * @param URL  base URL (optional)
13636
 * @param encoding  the document encoding (optional)
13637
 * @param options  a combination of xmlParserOption
13638
 * @returns the resulting document tree
13639
 */
13640
xmlDoc *
13641
xmlCtxtReadFd(xmlParserCtxt *ctxt, int fd,
13642
              const char *URL, const char *encoding, int options)
13643
0
{
13644
0
    xmlParserInputPtr input;
13645
13646
0
    if (ctxt == NULL)
13647
0
        return(NULL);
13648
13649
0
    xmlCtxtReset(ctxt);
13650
0
    xmlCtxtUseOptions(ctxt, options);
13651
13652
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13653
0
    if (input == NULL)
13654
0
        return(NULL);
13655
13656
0
    return(xmlCtxtParseDocument(ctxt, input));
13657
0
}
13658
13659
/**
13660
 * Parse an XML document from I/O functions and source and build a tree.
13661
 * This reuses the existing `ctxt` parser context
13662
 *
13663
 * `URL` is used as base to resolve external entities and for error
13664
 * reporting.
13665
 *
13666
 * @param ctxt  an XML parser context
13667
 * @param ioread  an I/O read function
13668
 * @param ioclose  an I/O close function
13669
 * @param ioctx  an I/O handler
13670
 * @param URL  the base URL to use for the document
13671
 * @param encoding  the document encoding, or NULL
13672
 * @param options  a combination of xmlParserOption
13673
 * @returns the resulting document tree
13674
 */
13675
xmlDoc *
13676
xmlCtxtReadIO(xmlParserCtxt *ctxt, xmlInputReadCallback ioread,
13677
              xmlInputCloseCallback ioclose, void *ioctx,
13678
        const char *URL,
13679
              const char *encoding, int options)
13680
0
{
13681
0
    xmlParserInputPtr input;
13682
13683
0
    if (ctxt == NULL)
13684
0
        return(NULL);
13685
13686
0
    xmlCtxtReset(ctxt);
13687
0
    xmlCtxtUseOptions(ctxt, options);
13688
13689
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13690
0
                                  encoding, 0);
13691
0
    if (input == NULL)
13692
0
        return(NULL);
13693
13694
0
    return(xmlCtxtParseDocument(ctxt, input));
13695
0
}
13696