Coverage Report

Created: 2025-06-22 06:55

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * Author: Daniel Veillard
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
#include "private/tree.h"
79
80
1.70M
#define NS_INDEX_EMPTY  INT_MAX
81
69.2k
#define NS_INDEX_XML    (INT_MAX - 1)
82
717k
#define URI_HASH_EMPTY  0xD943A04E
83
24.9k
#define URI_HASH_XML    0xF0451F02
84
85
#ifndef STDIN_FILENO
86
0
  #define STDIN_FILENO 0
87
#endif
88
89
#ifndef SIZE_MAX
90
  #define SIZE_MAX ((size_t) -1)
91
#endif
92
93
341k
#define XML_MAX_ATTRS 100000000 /* 100 million */
94
95
672k
#define XML_SPECIAL_EXTERNAL    (1 << 20)
96
672k
#define XML_SPECIAL_TYPE_MASK   (XML_SPECIAL_EXTERNAL - 1)
97
98
736k
#define XML_ATTVAL_ALLOC        (1 << 0)
99
18.1k
#define XML_ATTVAL_NORM_CHANGE  (1 << 1)
100
101
struct _xmlStartTag {
102
    const xmlChar *prefix;
103
    const xmlChar *URI;
104
    int line;
105
    int nsNr;
106
};
107
108
typedef struct {
109
    void *saxData;
110
    unsigned prefixHashValue;
111
    unsigned uriHashValue;
112
    unsigned elementId;
113
    int oldIndex;
114
} xmlParserNsExtra;
115
116
typedef struct {
117
    unsigned hashValue;
118
    int index;
119
} xmlParserNsBucket;
120
121
struct _xmlParserNsData {
122
    xmlParserNsExtra *extra;
123
124
    unsigned hashSize;
125
    unsigned hashElems;
126
    xmlParserNsBucket *hash;
127
128
    unsigned elementId;
129
    int defaultNsIndex;
130
    int minNsIndex;
131
};
132
133
static int
134
xmlParseElementStart(xmlParserCtxtPtr ctxt);
135
136
static void
137
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
138
139
static xmlEntityPtr
140
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
141
142
static const xmlChar *
143
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
144
145
/************************************************************************
146
 *                  *
147
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
148
 *                  *
149
 ************************************************************************/
150
151
#define XML_PARSER_BIG_ENTITY 1000
152
#define XML_PARSER_LOT_ENTITY 5000
153
154
/*
155
 * Constants for protection against abusive entity expansion
156
 * ("billion laughs").
157
 */
158
159
/*
160
 * A certain amount of entity expansion which is always allowed.
161
 */
162
1.32M
#define XML_PARSER_ALLOWED_EXPANSION 1000000
163
164
/*
165
 * Fixed cost for each entity reference. This crudely models processing time
166
 * as well to protect, for example, against exponential expansion of empty
167
 * or very short entities.
168
 */
169
1.32M
#define XML_ENT_FIXED_COST 20
170
171
28.4M
#define XML_PARSER_BIG_BUFFER_SIZE 300
172
165k
#define XML_PARSER_BUFFER_SIZE 100
173
34.3k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
174
175
/**
176
 * XML_PARSER_CHUNK_SIZE
177
 *
178
 * When calling GROW that's the minimal amount of data
179
 * the parser expected to have received. It is not a hard
180
 * limit but an optimization when reading strings like Names
181
 * It is not strictly needed as long as inputs available characters
182
 * are followed by 0, which should be provided by the I/O level
183
 */
184
#define XML_PARSER_CHUNK_SIZE 100
185
186
/**
187
 * Constant string describing the internal version of the library
188
 */
189
const char *const
190
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
191
192
/*
193
 * List of XML prefixed PI allowed by W3C specs
194
 */
195
196
static const char* const xmlW3CPIs[] = {
197
    "xml-stylesheet",
198
    "xml-model",
199
    NULL
200
};
201
202
203
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
204
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
205
                                              const xmlChar **str);
206
207
static void
208
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
209
210
static int
211
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
212
213
static void
214
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl);
215
216
/************************************************************************
217
 *                  *
218
 *    Some factorized error routines        *
219
 *                  *
220
 ************************************************************************/
221
222
static void
223
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
224
0
    xmlCtxtErrMemory(ctxt);
225
0
}
226
227
/**
228
 * Handle a redefinition of attribute error
229
 *
230
 * @param ctxt  an XML parser context
231
 * @param prefix  the attribute prefix
232
 * @param localname  the attribute localname
233
 */
234
static void
235
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
236
                   const xmlChar * localname)
237
448k
{
238
448k
    if (prefix == NULL)
239
441k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
240
441k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
241
441k
                   "Attribute %s redefined\n", localname);
242
7.14k
    else
243
7.14k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
244
7.14k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
245
7.14k
                   "Attribute %s:%s redefined\n", prefix, localname);
246
448k
}
247
248
/**
249
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
250
 *
251
 * @param ctxt  an XML parser context
252
 * @param error  the error number
253
 * @param msg  the error message
254
 */
255
static void LIBXML_ATTR_FORMAT(3,0)
256
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
257
               const char *msg)
258
6.40M
{
259
6.40M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
260
6.40M
               NULL, NULL, NULL, 0, "%s", msg);
261
6.40M
}
262
263
/**
264
 * Handle a warning.
265
 *
266
 * @param ctxt  an XML parser context
267
 * @param error  the error number
268
 * @param msg  the error message
269
 * @param str1  extra data
270
 * @param str2  extra data
271
 */
272
void LIBXML_ATTR_FORMAT(3,0)
273
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
274
              const char *msg, const xmlChar *str1, const xmlChar *str2)
275
2.86k
{
276
2.86k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
277
2.86k
               str1, str2, NULL, 0, msg, str1, str2);
278
2.86k
}
279
280
#ifdef LIBXML_VALID_ENABLED
281
/**
282
 * Handle a validity error.
283
 *
284
 * @param ctxt  an XML parser context
285
 * @param error  the error number
286
 * @param msg  the error message
287
 * @param str1  extra data
288
 * @param str2  extra data
289
 */
290
static void LIBXML_ATTR_FORMAT(3,0)
291
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
292
              const char *msg, const xmlChar *str1, const xmlChar *str2)
293
0
{
294
0
    ctxt->valid = 0;
295
296
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
297
0
               str1, str2, NULL, 0, msg, str1, str2);
298
0
}
299
#endif
300
301
/**
302
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
303
 *
304
 * @param ctxt  an XML parser context
305
 * @param error  the error number
306
 * @param msg  the error message
307
 * @param val  an integer value
308
 */
309
static void LIBXML_ATTR_FORMAT(3,0)
310
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
311
                  const char *msg, int val)
312
3.25M
{
313
3.25M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
314
3.25M
               NULL, NULL, NULL, val, msg, val);
315
3.25M
}
316
317
/**
318
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
319
 *
320
 * @param ctxt  an XML parser context
321
 * @param error  the error number
322
 * @param msg  the error message
323
 * @param str1  an string info
324
 * @param val  an integer value
325
 * @param str2  an string info
326
 */
327
static void LIBXML_ATTR_FORMAT(3,0)
328
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
329
                  const char *msg, const xmlChar *str1, int val,
330
      const xmlChar *str2)
331
215k
{
332
215k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
333
215k
               str1, str2, NULL, val, msg, str1, val, str2);
334
215k
}
335
336
/**
337
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
338
 *
339
 * @param ctxt  an XML parser context
340
 * @param error  the error number
341
 * @param msg  the error message
342
 * @param val  a string value
343
 */
344
static void LIBXML_ATTR_FORMAT(3,0)
345
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
346
                  const char *msg, const xmlChar * val)
347
809k
{
348
809k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
349
809k
               val, NULL, NULL, 0, msg, val);
350
809k
}
351
352
/**
353
 * Handle a non fatal parser error
354
 *
355
 * @param ctxt  an XML parser context
356
 * @param error  the error number
357
 * @param msg  the error message
358
 * @param val  a string value
359
 */
360
static void LIBXML_ATTR_FORMAT(3,0)
361
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
362
                  const char *msg, const xmlChar * val)
363
17.9k
{
364
17.9k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
365
17.9k
               val, NULL, NULL, 0, msg, val);
366
17.9k
}
367
368
/**
369
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
370
 *
371
 * @param ctxt  an XML parser context
372
 * @param error  the error number
373
 * @param msg  the message
374
 * @param info1  extra information string
375
 * @param info2  extra information string
376
 * @param info3  extra information string
377
 */
378
static void LIBXML_ATTR_FORMAT(3,0)
379
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
380
         const char *msg,
381
         const xmlChar * info1, const xmlChar * info2,
382
         const xmlChar * info3)
383
426k
{
384
426k
    ctxt->nsWellFormed = 0;
385
386
426k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
387
426k
               info1, info2, info3, 0, msg, info1, info2, info3);
388
426k
}
389
390
/**
391
 * Handle a namespace warning error
392
 *
393
 * @param ctxt  an XML parser context
394
 * @param error  the error number
395
 * @param msg  the message
396
 * @param info1  extra information string
397
 * @param info2  extra information string
398
 * @param info3  extra information string
399
 */
400
static void LIBXML_ATTR_FORMAT(3,0)
401
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
402
         const char *msg,
403
         const xmlChar * info1, const xmlChar * info2,
404
         const xmlChar * info3)
405
27.6k
{
406
27.6k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
407
27.6k
               info1, info2, info3, 0, msg, info1, info2, info3);
408
27.6k
}
409
410
static void
411
3.97M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
412
3.97M
    if (val > ULONG_MAX - *dst)
413
0
        *dst = ULONG_MAX;
414
3.97M
    else
415
3.97M
        *dst += val;
416
3.97M
}
417
418
static void
419
1.32M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
420
1.32M
    if (val > ULONG_MAX - *dst)
421
0
        *dst = ULONG_MAX;
422
1.32M
    else
423
1.32M
        *dst += val;
424
1.32M
}
425
426
/**
427
 * Check for non-linear entity expansion behaviour.
428
 *
429
 * In some cases like xmlExpandEntityInAttValue, this function is called
430
 * for each, possibly nested entity and its unexpanded content length.
431
 *
432
 * In other cases like #xmlParseReference, it's only called for each
433
 * top-level entity with its unexpanded content length plus the sum of
434
 * the unexpanded content lengths (plus fixed cost) of all nested
435
 * entities.
436
 *
437
 * Summing the unexpanded lengths also adds the length of the reference.
438
 * This is by design. Taking the length of the entity name into account
439
 * discourages attacks that try to waste CPU time with abusively long
440
 * entity names. See test/recurse/lol6.xml for example. Each call also
441
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
442
 * short entities.
443
 *
444
 * @param ctxt  parser context
445
 * @param extra  sum of unexpanded entity sizes
446
 * @returns 1 on error, 0 on success.
447
 */
448
static int
449
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
450
1.32M
{
451
1.32M
    unsigned long consumed;
452
1.32M
    unsigned long *expandedSize;
453
1.32M
    xmlParserInputPtr input = ctxt->input;
454
1.32M
    xmlEntityPtr entity = input->entity;
455
456
1.32M
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
457
887
        return(0);
458
459
    /*
460
     * Compute total consumed bytes so far, including input streams of
461
     * external entities.
462
     */
463
1.32M
    consumed = input->consumed;
464
1.32M
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
465
1.32M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
466
467
1.32M
    if (entity)
468
6.31k
        expandedSize = &entity->expandedSize;
469
1.31M
    else
470
1.31M
        expandedSize = &ctxt->sizeentcopy;
471
472
    /*
473
     * Add extra cost and some fixed cost.
474
     */
475
1.32M
    xmlSaturatedAdd(expandedSize, extra);
476
1.32M
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
477
478
    /*
479
     * It's important to always use saturation arithmetic when tracking
480
     * entity sizes to make the size checks reliable. If "sizeentcopy"
481
     * overflows, we have to abort.
482
     */
483
1.32M
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
484
1.32M
        ((*expandedSize >= ULONG_MAX) ||
485
124k
         (*expandedSize / ctxt->maxAmpl > consumed))) {
486
486
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
487
486
                       "Maximum entity amplification factor exceeded, see "
488
486
                       "xmlCtxtSetMaxAmplification.\n");
489
486
        xmlHaltParser(ctxt);
490
486
        return(1);
491
486
    }
492
493
1.32M
    return(0);
494
1.32M
}
495
496
/************************************************************************
497
 *                  *
498
 *    Library wide options          *
499
 *                  *
500
 ************************************************************************/
501
502
/**
503
 * Examines if the library has been compiled with a given feature.
504
 *
505
 * @param feature  the feature to be examined
506
 * @returns zero (0) if the feature does not exist or an unknown
507
 * feature is requested, non-zero otherwise.
508
 */
509
int
510
xmlHasFeature(xmlFeature feature)
511
0
{
512
0
    switch (feature) {
513
0
  case XML_WITH_THREAD:
514
0
#ifdef LIBXML_THREAD_ENABLED
515
0
      return(1);
516
#else
517
      return(0);
518
#endif
519
0
        case XML_WITH_TREE:
520
0
            return(1);
521
0
        case XML_WITH_OUTPUT:
522
0
#ifdef LIBXML_OUTPUT_ENABLED
523
0
            return(1);
524
#else
525
            return(0);
526
#endif
527
0
        case XML_WITH_PUSH:
528
0
#ifdef LIBXML_PUSH_ENABLED
529
0
            return(1);
530
#else
531
            return(0);
532
#endif
533
0
        case XML_WITH_READER:
534
0
#ifdef LIBXML_READER_ENABLED
535
0
            return(1);
536
#else
537
            return(0);
538
#endif
539
0
        case XML_WITH_PATTERN:
540
0
#ifdef LIBXML_PATTERN_ENABLED
541
0
            return(1);
542
#else
543
            return(0);
544
#endif
545
0
        case XML_WITH_WRITER:
546
0
#ifdef LIBXML_WRITER_ENABLED
547
0
            return(1);
548
#else
549
            return(0);
550
#endif
551
0
        case XML_WITH_SAX1:
552
0
#ifdef LIBXML_SAX1_ENABLED
553
0
            return(1);
554
#else
555
            return(0);
556
#endif
557
0
        case XML_WITH_HTTP:
558
0
            return(0);
559
0
        case XML_WITH_VALID:
560
0
#ifdef LIBXML_VALID_ENABLED
561
0
            return(1);
562
#else
563
            return(0);
564
#endif
565
0
        case XML_WITH_HTML:
566
0
#ifdef LIBXML_HTML_ENABLED
567
0
            return(1);
568
#else
569
            return(0);
570
#endif
571
0
        case XML_WITH_LEGACY:
572
0
            return(0);
573
0
        case XML_WITH_C14N:
574
0
#ifdef LIBXML_C14N_ENABLED
575
0
            return(1);
576
#else
577
            return(0);
578
#endif
579
0
        case XML_WITH_CATALOG:
580
0
#ifdef LIBXML_CATALOG_ENABLED
581
0
            return(1);
582
#else
583
            return(0);
584
#endif
585
0
        case XML_WITH_XPATH:
586
0
#ifdef LIBXML_XPATH_ENABLED
587
0
            return(1);
588
#else
589
            return(0);
590
#endif
591
0
        case XML_WITH_XPTR:
592
0
#ifdef LIBXML_XPTR_ENABLED
593
0
            return(1);
594
#else
595
            return(0);
596
#endif
597
0
        case XML_WITH_XINCLUDE:
598
0
#ifdef LIBXML_XINCLUDE_ENABLED
599
0
            return(1);
600
#else
601
            return(0);
602
#endif
603
0
        case XML_WITH_ICONV:
604
0
#ifdef LIBXML_ICONV_ENABLED
605
0
            return(1);
606
#else
607
            return(0);
608
#endif
609
0
        case XML_WITH_ISO8859X:
610
0
#ifdef LIBXML_ISO8859X_ENABLED
611
0
            return(1);
612
#else
613
            return(0);
614
#endif
615
0
        case XML_WITH_UNICODE:
616
0
            return(0);
617
0
        case XML_WITH_REGEXP:
618
0
#ifdef LIBXML_REGEXP_ENABLED
619
0
            return(1);
620
#else
621
            return(0);
622
#endif
623
0
        case XML_WITH_AUTOMATA:
624
0
#ifdef LIBXML_REGEXP_ENABLED
625
0
            return(1);
626
#else
627
            return(0);
628
#endif
629
0
        case XML_WITH_EXPR:
630
0
            return(0);
631
0
        case XML_WITH_RELAXNG:
632
0
#ifdef LIBXML_RELAXNG_ENABLED
633
0
            return(1);
634
#else
635
            return(0);
636
#endif
637
0
        case XML_WITH_SCHEMAS:
638
0
#ifdef LIBXML_SCHEMAS_ENABLED
639
0
            return(1);
640
#else
641
            return(0);
642
#endif
643
0
        case XML_WITH_SCHEMATRON:
644
#ifdef LIBXML_SCHEMATRON_ENABLED
645
            return(1);
646
#else
647
0
            return(0);
648
0
#endif
649
0
        case XML_WITH_MODULES:
650
0
#ifdef LIBXML_MODULES_ENABLED
651
0
            return(1);
652
#else
653
            return(0);
654
#endif
655
0
        case XML_WITH_DEBUG:
656
0
#ifdef LIBXML_DEBUG_ENABLED
657
0
            return(1);
658
#else
659
            return(0);
660
#endif
661
0
        case XML_WITH_DEBUG_MEM:
662
0
            return(0);
663
0
        case XML_WITH_ZLIB:
664
#ifdef LIBXML_ZLIB_ENABLED
665
            return(1);
666
#else
667
0
            return(0);
668
0
#endif
669
0
        case XML_WITH_LZMA:
670
#ifdef LIBXML_LZMA_ENABLED
671
            return(1);
672
#else
673
0
            return(0);
674
0
#endif
675
0
        case XML_WITH_ICU:
676
#ifdef LIBXML_ICU_ENABLED
677
            return(1);
678
#else
679
0
            return(0);
680
0
#endif
681
0
        default:
682
0
      break;
683
0
     }
684
0
     return(0);
685
0
}
686
687
/************************************************************************
688
 *                  *
689
 *      Simple string buffer        *
690
 *                  *
691
 ************************************************************************/
692
693
typedef struct {
694
    xmlChar *mem;
695
    unsigned size;
696
    unsigned cap; /* size < cap */
697
    unsigned max; /* size <= max */
698
    xmlParserErrors code;
699
} xmlSBuf;
700
701
static void
702
695k
xmlSBufInit(xmlSBuf *buf, unsigned max) {
703
695k
    buf->mem = NULL;
704
695k
    buf->size = 0;
705
695k
    buf->cap = 0;
706
695k
    buf->max = max;
707
695k
    buf->code = XML_ERR_OK;
708
695k
}
709
710
static int
711
140k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
712
140k
    xmlChar *mem;
713
140k
    unsigned cap;
714
715
140k
    if (len >= UINT_MAX / 2 - buf->size) {
716
0
        if (buf->code == XML_ERR_OK)
717
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
718
0
        return(-1);
719
0
    }
720
721
140k
    cap = (buf->size + len) * 2;
722
140k
    if (cap < 240)
723
100k
        cap = 240;
724
725
140k
    mem = xmlRealloc(buf->mem, cap);
726
140k
    if (mem == NULL) {
727
0
        buf->code = XML_ERR_NO_MEMORY;
728
0
        return(-1);
729
0
    }
730
731
140k
    buf->mem = mem;
732
140k
    buf->cap = cap;
733
734
140k
    return(0);
735
140k
}
736
737
static void
738
72.8M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
739
72.8M
    if (buf->max - buf->size < len) {
740
0
        if (buf->code == XML_ERR_OK)
741
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
742
0
        return;
743
0
    }
744
745
72.8M
    if (buf->cap - buf->size <= len) {
746
130k
        if (xmlSBufGrow(buf, len) < 0)
747
0
            return;
748
130k
    }
749
750
72.8M
    if (len > 0)
751
72.8M
        memcpy(buf->mem + buf->size, str, len);
752
72.8M
    buf->size += len;
753
72.8M
}
754
755
static void
756
71.3M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
757
71.3M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
758
71.3M
}
759
760
static void
761
104k
xmlSBufAddChar(xmlSBuf *buf, int c) {
762
104k
    xmlChar *end;
763
764
104k
    if (buf->max - buf->size < 4) {
765
0
        if (buf->code == XML_ERR_OK)
766
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
767
0
        return;
768
0
    }
769
770
104k
    if (buf->cap - buf->size <= 4) {
771
10.2k
        if (xmlSBufGrow(buf, 4) < 0)
772
0
            return;
773
10.2k
    }
774
775
104k
    end = buf->mem + buf->size;
776
777
104k
    if (c < 0x80) {
778
53.5k
        *end = (xmlChar) c;
779
53.5k
        buf->size += 1;
780
53.5k
    } else {
781
51.1k
        buf->size += xmlCopyCharMultiByte(end, c);
782
51.1k
    }
783
104k
}
784
785
static void
786
53.4M
xmlSBufAddReplChar(xmlSBuf *buf) {
787
53.4M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
788
53.4M
}
789
790
static void
791
0
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
792
0
    if (buf->code == XML_ERR_NO_MEMORY)
793
0
        xmlCtxtErrMemory(ctxt);
794
0
    else
795
0
        xmlFatalErr(ctxt, buf->code, errMsg);
796
0
}
797
798
static xmlChar *
799
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
800
116k
              const char *errMsg) {
801
116k
    if (buf->mem == NULL) {
802
16.5k
        buf->mem = xmlMalloc(1);
803
16.5k
        if (buf->mem == NULL) {
804
0
            buf->code = XML_ERR_NO_MEMORY;
805
16.5k
        } else {
806
16.5k
            buf->mem[0] = 0;
807
16.5k
        }
808
100k
    } else {
809
100k
        buf->mem[buf->size] = 0;
810
100k
    }
811
812
116k
    if (buf->code == XML_ERR_OK) {
813
116k
        if (sizeOut != NULL)
814
76.1k
            *sizeOut = buf->size;
815
116k
        return(buf->mem);
816
116k
    }
817
818
0
    xmlSBufReportError(buf, ctxt, errMsg);
819
820
0
    xmlFree(buf->mem);
821
822
0
    if (sizeOut != NULL)
823
0
        *sizeOut = 0;
824
0
    return(NULL);
825
116k
}
826
827
static void
828
572k
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
829
572k
    if (buf->code != XML_ERR_OK)
830
0
        xmlSBufReportError(buf, ctxt, errMsg);
831
832
572k
    xmlFree(buf->mem);
833
572k
}
834
835
static int
836
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
837
84.7M
                    const char *errMsg) {
838
84.7M
    int c = str[0];
839
84.7M
    int c1 = str[1];
840
841
84.7M
    if ((c1 & 0xC0) != 0x80)
842
4.02M
        goto encoding_error;
843
844
80.6M
    if (c < 0xE0) {
845
        /* 2-byte sequence */
846
79.6M
        if (c < 0xC2)
847
44.4M
            goto encoding_error;
848
849
35.2M
        return(2);
850
79.6M
    } else {
851
1.00M
        int c2 = str[2];
852
853
1.00M
        if ((c2 & 0xC0) != 0x80)
854
5.89k
            goto encoding_error;
855
856
996k
        if (c < 0xF0) {
857
            /* 3-byte sequence */
858
984k
            if (c == 0xE0) {
859
                /* overlong */
860
9.85k
                if (c1 < 0xA0)
861
450
                    goto encoding_error;
862
974k
            } else if (c == 0xED) {
863
                /* surrogate */
864
519
                if (c1 >= 0xA0)
865
283
                    goto encoding_error;
866
973k
            } else if (c == 0xEF) {
867
                /* U+FFFE and U+FFFF are invalid Chars */
868
209k
                if ((c1 == 0xBF) && (c2 >= 0xBE))
869
2.29k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
870
209k
            }
871
872
983k
            return(3);
873
984k
        } else {
874
            /* 4-byte sequence */
875
12.6k
            if ((str[3] & 0xC0) != 0x80)
876
2.18k
                goto encoding_error;
877
10.4k
            if (c == 0xF0) {
878
                /* overlong */
879
917
                if (c1 < 0x90)
880
250
                    goto encoding_error;
881
9.57k
            } else if (c >= 0xF4) {
882
                /* greater than 0x10FFFF */
883
4.31k
                if ((c > 0xF4) || (c1 >= 0x90))
884
3.50k
                    goto encoding_error;
885
4.31k
            }
886
887
6.73k
            return(4);
888
10.4k
        }
889
996k
    }
890
891
48.4M
encoding_error:
892
    /* Only report the first error */
893
48.4M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
894
1.74k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
895
1.74k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
896
1.74k
    }
897
898
48.4M
    return(0);
899
80.6M
}
900
901
/************************************************************************
902
 *                  *
903
 *    SAX2 defaulted attributes handling      *
904
 *                  *
905
 ************************************************************************/
906
907
/**
908
 * Final initialization of the parser context before starting to parse.
909
 *
910
 * This accounts for users modifying struct members of parser context
911
 * directly.
912
 *
913
 * @param ctxt  an XML parser context
914
 */
915
static void
916
15.1k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
917
15.1k
    xmlSAXHandlerPtr sax;
918
919
    /* Avoid unused variable warning if features are disabled. */
920
15.1k
    (void) sax;
921
922
    /*
923
     * Changing the SAX struct directly is still widespread practice
924
     * in internal and external code.
925
     */
926
15.1k
    if (ctxt == NULL) return;
927
15.1k
    sax = ctxt->sax;
928
15.1k
#ifdef LIBXML_SAX1_ENABLED
929
    /*
930
     * Only enable SAX2 if there SAX2 element handlers, except when there
931
     * are no element handlers at all.
932
     */
933
15.1k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
934
15.1k
        (sax) &&
935
15.1k
        (sax->initialized == XML_SAX2_MAGIC) &&
936
15.1k
        ((sax->startElementNs != NULL) ||
937
15.1k
         (sax->endElementNs != NULL) ||
938
15.1k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
939
15.1k
        ctxt->sax2 = 1;
940
#else
941
    ctxt->sax2 = 1;
942
#endif /* LIBXML_SAX1_ENABLED */
943
944
    /*
945
     * Some users replace the dictionary directly in the context struct.
946
     * We really need an API function to do that cleanly.
947
     */
948
15.1k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
949
15.1k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
950
15.1k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
951
15.1k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
952
15.1k
    (ctxt->str_xml_ns == NULL)) {
953
0
        xmlErrMemory(ctxt);
954
0
    }
955
956
15.1k
    xmlDictSetLimit(ctxt->dict,
957
15.1k
                    (ctxt->options & XML_PARSE_HUGE) ?
958
15.1k
                        0 :
959
15.1k
                        XML_MAX_DICTIONARY_LIMIT);
960
961
15.1k
#ifdef LIBXML_VALID_ENABLED
962
15.1k
    if (ctxt->validate)
963
0
        ctxt->vctxt.flags |= XML_VCTXT_VALIDATE;
964
15.1k
    else
965
15.1k
        ctxt->vctxt.flags &= ~XML_VCTXT_VALIDATE;
966
15.1k
#endif /* LIBXML_VALID_ENABLED */
967
15.1k
}
968
969
typedef struct {
970
    xmlHashedString prefix;
971
    xmlHashedString name;
972
    xmlHashedString value;
973
    const xmlChar *valueEnd;
974
    int external;
975
    int expandedSize;
976
} xmlDefAttr;
977
978
typedef struct _xmlDefAttrs xmlDefAttrs;
979
typedef xmlDefAttrs *xmlDefAttrsPtr;
980
struct _xmlDefAttrs {
981
    int nbAttrs;  /* number of defaulted attributes on that element */
982
    int maxAttrs;       /* the size of the array */
983
#if __STDC_VERSION__ >= 199901L
984
    /* Using a C99 flexible array member avoids UBSan errors. */
985
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
986
#else
987
    xmlDefAttr attrs[1];
988
#endif
989
};
990
991
/**
992
 * Normalize the space in non CDATA attribute values:
993
 * If the attribute type is not CDATA, then the XML processor MUST further
994
 * process the normalized attribute value by discarding any leading and
995
 * trailing space (\#x20) characters, and by replacing sequences of space
996
 * (\#x20) characters by a single space (\#x20) character.
997
 * Note that the size of dst need to be at least src, and if one doesn't need
998
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
999
 * passing src as dst is just fine.
1000
 *
1001
 * @param src  the source string
1002
 * @param dst  the target string
1003
 * @returns a pointer to the normalized value (dst) or NULL if no conversion
1004
 *         is needed.
1005
 */
1006
static xmlChar *
1007
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1008
16.6k
{
1009
16.6k
    if ((src == NULL) || (dst == NULL))
1010
0
        return(NULL);
1011
1012
18.1k
    while (*src == 0x20) src++;
1013
17.4M
    while (*src != 0) {
1014
17.4M
  if (*src == 0x20) {
1015
141k
      while (*src == 0x20) src++;
1016
18.8k
      if (*src != 0)
1017
18.2k
    *dst++ = 0x20;
1018
17.4M
  } else {
1019
17.4M
      *dst++ = *src++;
1020
17.4M
  }
1021
17.4M
    }
1022
16.6k
    *dst = 0;
1023
16.6k
    if (dst == src)
1024
15.4k
       return(NULL);
1025
1.21k
    return(dst);
1026
16.6k
}
1027
1028
/**
1029
 * Add a defaulted attribute for an element
1030
 *
1031
 * @param ctxt  an XML parser context
1032
 * @param fullname  the element fullname
1033
 * @param fullattr  the attribute fullname
1034
 * @param value  the attribute value
1035
 */
1036
static void
1037
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1038
               const xmlChar *fullname,
1039
               const xmlChar *fullattr,
1040
17.5k
               const xmlChar *value) {
1041
17.5k
    xmlDefAttrsPtr defaults;
1042
17.5k
    xmlDefAttr *attr;
1043
17.5k
    int len, expandedSize;
1044
17.5k
    xmlHashedString name;
1045
17.5k
    xmlHashedString prefix;
1046
17.5k
    xmlHashedString hvalue;
1047
17.5k
    const xmlChar *localname;
1048
1049
    /*
1050
     * Allows to detect attribute redefinitions
1051
     */
1052
17.5k
    if (ctxt->attsSpecial != NULL) {
1053
15.9k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1054
4.37k
      return;
1055
15.9k
    }
1056
1057
13.1k
    if (ctxt->attsDefault == NULL) {
1058
1.61k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1059
1.61k
  if (ctxt->attsDefault == NULL)
1060
0
      goto mem_error;
1061
1.61k
    }
1062
1063
    /*
1064
     * split the element name into prefix:localname , the string found
1065
     * are within the DTD and then not associated to namespace names.
1066
     */
1067
13.1k
    localname = xmlSplitQName3(fullname, &len);
1068
13.1k
    if (localname == NULL) {
1069
8.71k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1070
8.71k
  prefix.name = NULL;
1071
8.71k
    } else {
1072
4.45k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1073
4.45k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1074
4.45k
        if (prefix.name == NULL)
1075
0
            goto mem_error;
1076
4.45k
    }
1077
13.1k
    if (name.name == NULL)
1078
0
        goto mem_error;
1079
1080
    /*
1081
     * make sure there is some storage
1082
     */
1083
13.1k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1084
13.1k
    if ((defaults == NULL) ||
1085
13.1k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1086
4.19k
        xmlDefAttrsPtr temp;
1087
4.19k
        int newSize;
1088
1089
4.19k
        if (defaults == NULL) {
1090
2.56k
            newSize = 4;
1091
2.56k
        } else {
1092
1.63k
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1093
1.63k
                ((size_t) defaults->maxAttrs >
1094
1.63k
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1095
0
                goto mem_error;
1096
1097
1.63k
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1098
0
                newSize = XML_MAX_ATTRS;
1099
1.63k
            else
1100
1.63k
                newSize = defaults->maxAttrs * 2;
1101
1.63k
        }
1102
4.19k
        temp = xmlRealloc(defaults,
1103
4.19k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1104
4.19k
  if (temp == NULL)
1105
0
      goto mem_error;
1106
4.19k
        if (defaults == NULL)
1107
2.56k
            temp->nbAttrs = 0;
1108
4.19k
  temp->maxAttrs = newSize;
1109
4.19k
        defaults = temp;
1110
4.19k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1111
4.19k
                          defaults, NULL) < 0) {
1112
0
      xmlFree(defaults);
1113
0
      goto mem_error;
1114
0
  }
1115
4.19k
    }
1116
1117
    /*
1118
     * Split the attribute name into prefix:localname , the string found
1119
     * are within the DTD and hen not associated to namespace names.
1120
     */
1121
13.1k
    localname = xmlSplitQName3(fullattr, &len);
1122
13.1k
    if (localname == NULL) {
1123
7.47k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1124
7.47k
  prefix.name = NULL;
1125
7.47k
    } else {
1126
5.69k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1127
5.69k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1128
5.69k
        if (prefix.name == NULL)
1129
0
            goto mem_error;
1130
5.69k
    }
1131
13.1k
    if (name.name == NULL)
1132
0
        goto mem_error;
1133
1134
    /* intern the string and precompute the end */
1135
13.1k
    len = strlen((const char *) value);
1136
13.1k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1137
13.1k
    if (hvalue.name == NULL)
1138
0
        goto mem_error;
1139
1140
13.1k
    expandedSize = strlen((const char *) name.name);
1141
13.1k
    if (prefix.name != NULL)
1142
5.69k
        expandedSize += strlen((const char *) prefix.name);
1143
13.1k
    expandedSize += len;
1144
1145
13.1k
    attr = &defaults->attrs[defaults->nbAttrs++];
1146
13.1k
    attr->name = name;
1147
13.1k
    attr->prefix = prefix;
1148
13.1k
    attr->value = hvalue;
1149
13.1k
    attr->valueEnd = hvalue.name + len;
1150
13.1k
    attr->external = PARSER_EXTERNAL(ctxt);
1151
13.1k
    attr->expandedSize = expandedSize;
1152
1153
13.1k
    return;
1154
1155
0
mem_error:
1156
0
    xmlErrMemory(ctxt);
1157
0
}
1158
1159
/**
1160
 * Register this attribute type
1161
 *
1162
 * @param ctxt  an XML parser context
1163
 * @param fullname  the element fullname
1164
 * @param fullattr  the attribute fullname
1165
 * @param type  the attribute type
1166
 */
1167
static void
1168
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1169
      const xmlChar *fullname,
1170
      const xmlChar *fullattr,
1171
      int type)
1172
18.4k
{
1173
18.4k
    if (ctxt->attsSpecial == NULL) {
1174
1.85k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1175
1.85k
  if (ctxt->attsSpecial == NULL)
1176
0
      goto mem_error;
1177
1.85k
    }
1178
1179
18.4k
    if (PARSER_EXTERNAL(ctxt))
1180
0
        type |= XML_SPECIAL_EXTERNAL;
1181
1182
18.4k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1183
18.4k
                    XML_INT_TO_PTR(type)) < 0)
1184
0
        goto mem_error;
1185
18.4k
    return;
1186
1187
18.4k
mem_error:
1188
0
    xmlErrMemory(ctxt);
1189
0
}
1190
1191
/**
1192
 * Removes CDATA attributes from the special attribute table
1193
 */
1194
static void
1195
xmlCleanSpecialAttrCallback(void *payload, void *data,
1196
                            const xmlChar *fullname, const xmlChar *fullattr,
1197
13.5k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1198
13.5k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1199
1200
13.5k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1201
777
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1202
777
    }
1203
13.5k
}
1204
1205
/**
1206
 * Trim the list of attributes defined to remove all those of type
1207
 * CDATA as they are not special. This call should be done when finishing
1208
 * to parse the DTD and before starting to parse the document root.
1209
 *
1210
 * @param ctxt  an XML parser context
1211
 */
1212
static void
1213
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1214
6.30k
{
1215
6.30k
    if (ctxt->attsSpecial == NULL)
1216
4.44k
        return;
1217
1218
1.85k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1219
1220
1.85k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1221
58
        xmlHashFree(ctxt->attsSpecial, NULL);
1222
58
        ctxt->attsSpecial = NULL;
1223
58
    }
1224
1.85k
}
1225
1226
/**
1227
 * Checks that the value conforms to the LanguageID production:
1228
 *
1229
 * @deprecated Internal function, do not use.
1230
 *
1231
 * NOTE: this is somewhat deprecated, those productions were removed from
1232
 * the XML Second edition.
1233
 *
1234
 *     [33] LanguageID ::= Langcode ('-' Subcode)*
1235
 *     [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1236
 *     [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1237
 *     [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1238
 *     [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1239
 *     [38] Subcode ::= ([a-z] | [A-Z])+
1240
 *
1241
 * The current REC reference the successors of RFC 1766, currently 5646
1242
 *
1243
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1244
 *
1245
 *     langtag       = language
1246
 *                     ["-" script]
1247
 *                     ["-" region]
1248
 *                     *("-" variant)
1249
 *                     *("-" extension)
1250
 *                     ["-" privateuse]
1251
 *     language      = 2*3ALPHA            ; shortest ISO 639 code
1252
 *                     ["-" extlang]       ; sometimes followed by
1253
 *                                         ; extended language subtags
1254
 *                   / 4ALPHA              ; or reserved for future use
1255
 *                   / 5*8ALPHA            ; or registered language subtag
1256
 *
1257
 *     extlang       = 3ALPHA              ; selected ISO 639 codes
1258
 *                     *2("-" 3ALPHA)      ; permanently reserved
1259
 *
1260
 *     script        = 4ALPHA              ; ISO 15924 code
1261
 *
1262
 *     region        = 2ALPHA              ; ISO 3166-1 code
1263
 *                   / 3DIGIT              ; UN M.49 code
1264
 *
1265
 *     variant       = 5*8alphanum         ; registered variants
1266
 *                   / (DIGIT 3alphanum)
1267
 *
1268
 *     extension     = singleton 1*("-" (2*8alphanum))
1269
 *
1270
 *                                         ; Single alphanumerics
1271
 *                                         ; "x" reserved for private use
1272
 *     singleton     = DIGIT               ; 0 - 9
1273
 *                   / %x41-57             ; A - W
1274
 *                   / %x59-5A             ; Y - Z
1275
 *                   / %x61-77             ; a - w
1276
 *                   / %x79-7A             ; y - z
1277
 *
1278
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1279
 * The parser below doesn't try to cope with extension or privateuse
1280
 * that could be added but that's not interoperable anyway
1281
 *
1282
 * @param lang  pointer to the string value
1283
 * @returns 1 if correct 0 otherwise
1284
 **/
1285
int
1286
xmlCheckLanguageID(const xmlChar * lang)
1287
0
{
1288
0
    const xmlChar *cur = lang, *nxt;
1289
1290
0
    if (cur == NULL)
1291
0
        return (0);
1292
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1293
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1294
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1295
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1296
        /*
1297
         * Still allow IANA code and user code which were coming
1298
         * from the previous version of the XML-1.0 specification
1299
         * it's deprecated but we should not fail
1300
         */
1301
0
        cur += 2;
1302
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1303
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1304
0
            cur++;
1305
0
        return(cur[0] == 0);
1306
0
    }
1307
0
    nxt = cur;
1308
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1309
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1310
0
           nxt++;
1311
0
    if (nxt - cur >= 4) {
1312
        /*
1313
         * Reserved
1314
         */
1315
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1316
0
            return(0);
1317
0
        return(1);
1318
0
    }
1319
0
    if (nxt - cur < 2)
1320
0
        return(0);
1321
    /* we got an ISO 639 code */
1322
0
    if (nxt[0] == 0)
1323
0
        return(1);
1324
0
    if (nxt[0] != '-')
1325
0
        return(0);
1326
1327
0
    nxt++;
1328
0
    cur = nxt;
1329
    /* now we can have extlang or script or region or variant */
1330
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1331
0
        goto region_m49;
1332
1333
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1334
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1335
0
           nxt++;
1336
0
    if (nxt - cur == 4)
1337
0
        goto script;
1338
0
    if (nxt - cur == 2)
1339
0
        goto region;
1340
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1341
0
        goto variant;
1342
0
    if (nxt - cur != 3)
1343
0
        return(0);
1344
    /* we parsed an extlang */
1345
0
    if (nxt[0] == 0)
1346
0
        return(1);
1347
0
    if (nxt[0] != '-')
1348
0
        return(0);
1349
1350
0
    nxt++;
1351
0
    cur = nxt;
1352
    /* now we can have script or region or variant */
1353
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1354
0
        goto region_m49;
1355
1356
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1357
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1358
0
           nxt++;
1359
0
    if (nxt - cur == 2)
1360
0
        goto region;
1361
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1362
0
        goto variant;
1363
0
    if (nxt - cur != 4)
1364
0
        return(0);
1365
    /* we parsed a script */
1366
0
script:
1367
0
    if (nxt[0] == 0)
1368
0
        return(1);
1369
0
    if (nxt[0] != '-')
1370
0
        return(0);
1371
1372
0
    nxt++;
1373
0
    cur = nxt;
1374
    /* now we can have region or variant */
1375
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1376
0
        goto region_m49;
1377
1378
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1379
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1380
0
           nxt++;
1381
1382
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1383
0
        goto variant;
1384
0
    if (nxt - cur != 2)
1385
0
        return(0);
1386
    /* we parsed a region */
1387
0
region:
1388
0
    if (nxt[0] == 0)
1389
0
        return(1);
1390
0
    if (nxt[0] != '-')
1391
0
        return(0);
1392
1393
0
    nxt++;
1394
0
    cur = nxt;
1395
    /* now we can just have a variant */
1396
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1397
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1398
0
           nxt++;
1399
1400
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1401
0
        return(0);
1402
1403
    /* we parsed a variant */
1404
0
variant:
1405
0
    if (nxt[0] == 0)
1406
0
        return(1);
1407
0
    if (nxt[0] != '-')
1408
0
        return(0);
1409
    /* extensions and private use subtags not checked */
1410
0
    return (1);
1411
1412
0
region_m49:
1413
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1414
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1415
0
        nxt += 3;
1416
0
        goto region;
1417
0
    }
1418
0
    return(0);
1419
0
}
1420
1421
/************************************************************************
1422
 *                  *
1423
 *    Parser stacks related functions and macros    *
1424
 *                  *
1425
 ************************************************************************/
1426
1427
static xmlChar *
1428
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1429
1430
/**
1431
 * Create a new namespace database.
1432
 *
1433
 * @returns the new obejct.
1434
 */
1435
xmlParserNsData *
1436
15.1k
xmlParserNsCreate(void) {
1437
15.1k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1438
1439
15.1k
    if (nsdb == NULL)
1440
0
        return(NULL);
1441
15.1k
    memset(nsdb, 0, sizeof(*nsdb));
1442
15.1k
    nsdb->defaultNsIndex = INT_MAX;
1443
1444
15.1k
    return(nsdb);
1445
15.1k
}
1446
1447
/**
1448
 * Free a namespace database.
1449
 *
1450
 * @param nsdb  namespace database
1451
 */
1452
void
1453
15.1k
xmlParserNsFree(xmlParserNsData *nsdb) {
1454
15.1k
    if (nsdb == NULL)
1455
0
        return;
1456
1457
15.1k
    xmlFree(nsdb->extra);
1458
15.1k
    xmlFree(nsdb->hash);
1459
15.1k
    xmlFree(nsdb);
1460
15.1k
}
1461
1462
/**
1463
 * Reset a namespace database.
1464
 *
1465
 * @param nsdb  namespace database
1466
 */
1467
static void
1468
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1469
0
    if (nsdb == NULL)
1470
0
        return;
1471
1472
0
    nsdb->hashElems = 0;
1473
0
    nsdb->elementId = 0;
1474
0
    nsdb->defaultNsIndex = INT_MAX;
1475
1476
0
    if (nsdb->hash)
1477
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1478
0
}
1479
1480
/**
1481
 * Signal that a new element has started.
1482
 *
1483
 * @param nsdb  namespace database
1484
 * @returns 0 on success, -1 if the element counter overflowed.
1485
 */
1486
static int
1487
1.51M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1488
1.51M
    if (nsdb->elementId == UINT_MAX)
1489
0
        return(-1);
1490
1.51M
    nsdb->elementId++;
1491
1492
1.51M
    return(0);
1493
1.51M
}
1494
1495
/**
1496
 * Lookup namespace with given prefix. If `bucketPtr` is non-NULL, it will
1497
 * be set to the matching bucket, or the first empty bucket if no match
1498
 * was found.
1499
 *
1500
 * @param ctxt  parser context
1501
 * @param prefix  namespace prefix
1502
 * @param bucketPtr  optional bucket (return value)
1503
 * @returns the namespace index on success, INT_MAX if no namespace was
1504
 * found.
1505
 */
1506
static int
1507
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1508
2.45M
                  xmlParserNsBucket **bucketPtr) {
1509
2.45M
    xmlParserNsBucket *bucket, *tombstone;
1510
2.45M
    unsigned index, hashValue;
1511
1512
2.45M
    if (prefix->name == NULL)
1513
484k
        return(ctxt->nsdb->defaultNsIndex);
1514
1515
1.97M
    if (ctxt->nsdb->hashSize == 0)
1516
25.7k
        return(INT_MAX);
1517
1518
1.94M
    hashValue = prefix->hashValue;
1519
1.94M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1520
1.94M
    bucket = &ctxt->nsdb->hash[index];
1521
1.94M
    tombstone = NULL;
1522
1523
2.67M
    while (bucket->hashValue) {
1524
2.39M
        if (bucket->index == INT_MAX) {
1525
44.0k
            if (tombstone == NULL)
1526
38.9k
                tombstone = bucket;
1527
2.35M
        } else if (bucket->hashValue == hashValue) {
1528
1.66M
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1529
1.66M
                if (bucketPtr != NULL)
1530
1.43M
                    *bucketPtr = bucket;
1531
1.66M
                return(bucket->index);
1532
1.66M
            }
1533
1.66M
        }
1534
1535
728k
        index++;
1536
728k
        bucket++;
1537
728k
        if (index == ctxt->nsdb->hashSize) {
1538
103k
            index = 0;
1539
103k
            bucket = ctxt->nsdb->hash;
1540
103k
        }
1541
728k
    }
1542
1543
279k
    if (bucketPtr != NULL)
1544
36.2k
        *bucketPtr = tombstone ? tombstone : bucket;
1545
279k
    return(INT_MAX);
1546
1.94M
}
1547
1548
/**
1549
 * Lookup namespace URI with given prefix.
1550
 *
1551
 * @param ctxt  parser context
1552
 * @param prefix  namespace prefix
1553
 * @returns the namespace URI on success, NULL if no namespace was found.
1554
 */
1555
static const xmlChar *
1556
493k
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1557
493k
    const xmlChar *ret;
1558
493k
    int nsIndex;
1559
1560
493k
    if (prefix->name == ctxt->str_xml)
1561
143
        return(ctxt->str_xml_ns);
1562
1563
    /*
1564
     * minNsIndex is used when building an entity tree. We must
1565
     * ignore namespaces declared outside the entity.
1566
     */
1567
493k
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1568
493k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1569
174k
        return(NULL);
1570
1571
318k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1572
318k
    if (ret[0] == 0)
1573
28.5k
        ret = NULL;
1574
318k
    return(ret);
1575
493k
}
1576
1577
/**
1578
 * Lookup extra data for the given prefix. This returns data stored
1579
 * with xmlParserNsUdpateSax.
1580
 *
1581
 * @param ctxt  parser context
1582
 * @param prefix  namespace prefix
1583
 * @returns the data on success, NULL if no namespace was found.
1584
 */
1585
void *
1586
20.9k
xmlParserNsLookupSax(xmlParserCtxt *ctxt, const xmlChar *prefix) {
1587
20.9k
    xmlHashedString hprefix;
1588
20.9k
    int nsIndex;
1589
1590
20.9k
    if (prefix == ctxt->str_xml)
1591
7.02k
        return(NULL);
1592
1593
13.9k
    hprefix.name = prefix;
1594
13.9k
    if (prefix != NULL)
1595
4.53k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1596
9.40k
    else
1597
9.40k
        hprefix.hashValue = 0;
1598
13.9k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1599
13.9k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1600
0
        return(NULL);
1601
1602
13.9k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1603
13.9k
}
1604
1605
/**
1606
 * Sets or updates extra data for the given prefix. This value will be
1607
 * returned by xmlParserNsLookupSax as long as the namespace with the
1608
 * given prefix is in scope.
1609
 *
1610
 * @param ctxt  parser context
1611
 * @param prefix  namespace prefix
1612
 * @param saxData  extra data for SAX handler
1613
 * @returns the data on success, NULL if no namespace was found.
1614
 */
1615
int
1616
xmlParserNsUpdateSax(xmlParserCtxt *ctxt, const xmlChar *prefix,
1617
192k
                     void *saxData) {
1618
192k
    xmlHashedString hprefix;
1619
192k
    int nsIndex;
1620
1621
192k
    if (prefix == ctxt->str_xml)
1622
0
        return(-1);
1623
1624
192k
    hprefix.name = prefix;
1625
192k
    if (prefix != NULL)
1626
188k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1627
4.14k
    else
1628
4.14k
        hprefix.hashValue = 0;
1629
192k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1630
192k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1631
0
        return(-1);
1632
1633
192k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1634
192k
    return(0);
1635
192k
}
1636
1637
/**
1638
 * Grows the namespace tables.
1639
 *
1640
 * @param ctxt  parser context
1641
 * @returns 0 on success, -1 if a memory allocation failed.
1642
 */
1643
static int
1644
13.1k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1645
13.1k
    const xmlChar **table;
1646
13.1k
    xmlParserNsExtra *extra;
1647
13.1k
    int newSize;
1648
1649
13.1k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1650
13.1k
                              sizeof(table[0]) + sizeof(extra[0]),
1651
13.1k
                              16, XML_MAX_ITEMS);
1652
13.1k
    if (newSize < 0)
1653
0
        goto error;
1654
1655
13.1k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1656
13.1k
    if (table == NULL)
1657
0
        goto error;
1658
13.1k
    ctxt->nsTab = table;
1659
1660
13.1k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1661
13.1k
    if (extra == NULL)
1662
0
        goto error;
1663
13.1k
    ctxt->nsdb->extra = extra;
1664
1665
13.1k
    ctxt->nsMax = newSize;
1666
13.1k
    return(0);
1667
1668
0
error:
1669
0
    xmlErrMemory(ctxt);
1670
0
    return(-1);
1671
13.1k
}
1672
1673
/**
1674
 * Push a new namespace on the table.
1675
 *
1676
 * @param ctxt  parser context
1677
 * @param prefix  prefix with hash value
1678
 * @param uri  uri with hash value
1679
 * @param saxData  extra data for SAX handler
1680
 * @param defAttr  whether the namespace comes from a default attribute
1681
 * @returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1682
 * -1 if a memory allocation failed.
1683
 */
1684
static int
1685
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1686
880k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1687
880k
    xmlParserNsBucket *bucket = NULL;
1688
880k
    xmlParserNsExtra *extra;
1689
880k
    const xmlChar **ns;
1690
880k
    unsigned hashValue, nsIndex, oldIndex;
1691
1692
880k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1693
195
        return(0);
1694
1695
880k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1696
0
        xmlErrMemory(ctxt);
1697
0
        return(-1);
1698
0
    }
1699
1700
    /*
1701
     * Default namespace and 'xml' namespace
1702
     */
1703
880k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1704
143k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1705
1706
143k
        if (oldIndex != INT_MAX) {
1707
133k
            extra = &ctxt->nsdb->extra[oldIndex];
1708
1709
133k
            if (extra->elementId == ctxt->nsdb->elementId) {
1710
19.1k
                if (defAttr == 0)
1711
18.6k
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1712
19.1k
                return(0);
1713
19.1k
            }
1714
1715
114k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1716
114k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1717
0
                return(0);
1718
114k
        }
1719
1720
123k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1721
123k
        goto populate_entry;
1722
143k
    }
1723
1724
    /*
1725
     * Hash table lookup
1726
     */
1727
737k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1728
737k
    if (oldIndex != INT_MAX) {
1729
700k
        extra = &ctxt->nsdb->extra[oldIndex];
1730
1731
        /*
1732
         * Check for duplicate definitions on the same element.
1733
         */
1734
700k
        if (extra->elementId == ctxt->nsdb->elementId) {
1735
742
            if (defAttr == 0)
1736
462
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1737
742
            return(0);
1738
742
        }
1739
1740
699k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1741
699k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1742
0
            return(0);
1743
1744
699k
        bucket->index = ctxt->nsNr;
1745
699k
        goto populate_entry;
1746
699k
    }
1747
1748
    /*
1749
     * Insert new bucket
1750
     */
1751
1752
37.4k
    hashValue = prefix->hashValue;
1753
1754
    /*
1755
     * Grow hash table, 50% fill factor
1756
     */
1757
37.4k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1758
1.83k
        xmlParserNsBucket *newHash;
1759
1.83k
        unsigned newSize, i, index;
1760
1761
1.83k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1762
0
            xmlErrMemory(ctxt);
1763
0
            return(-1);
1764
0
        }
1765
1.83k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1766
1.83k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1767
1.83k
        if (newHash == NULL) {
1768
0
            xmlErrMemory(ctxt);
1769
0
            return(-1);
1770
0
        }
1771
1.83k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1772
1773
111k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1774
109k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1775
109k
            unsigned newIndex;
1776
1777
109k
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1778
106k
                continue;
1779
2.84k
            newIndex = hv & (newSize - 1);
1780
1781
3.70k
            while (newHash[newIndex].hashValue != 0) {
1782
861
                newIndex++;
1783
861
                if (newIndex == newSize)
1784
172
                    newIndex = 0;
1785
861
            }
1786
1787
2.84k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1788
2.84k
        }
1789
1790
1.83k
        xmlFree(ctxt->nsdb->hash);
1791
1.83k
        ctxt->nsdb->hash = newHash;
1792
1.83k
        ctxt->nsdb->hashSize = newSize;
1793
1794
        /*
1795
         * Relookup
1796
         */
1797
1.83k
        index = hashValue & (newSize - 1);
1798
1799
2.09k
        while (newHash[index].hashValue != 0) {
1800
254
            index++;
1801
254
            if (index == newSize)
1802
35
                index = 0;
1803
254
        }
1804
1805
1.83k
        bucket = &newHash[index];
1806
1.83k
    }
1807
1808
37.4k
    bucket->hashValue = hashValue;
1809
37.4k
    bucket->index = ctxt->nsNr;
1810
37.4k
    ctxt->nsdb->hashElems++;
1811
37.4k
    oldIndex = INT_MAX;
1812
1813
860k
populate_entry:
1814
860k
    nsIndex = ctxt->nsNr;
1815
1816
860k
    ns = &ctxt->nsTab[nsIndex * 2];
1817
860k
    ns[0] = prefix ? prefix->name : NULL;
1818
860k
    ns[1] = uri->name;
1819
1820
860k
    extra = &ctxt->nsdb->extra[nsIndex];
1821
860k
    extra->saxData = saxData;
1822
860k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1823
860k
    extra->uriHashValue = uri->hashValue;
1824
860k
    extra->elementId = ctxt->nsdb->elementId;
1825
860k
    extra->oldIndex = oldIndex;
1826
1827
860k
    ctxt->nsNr++;
1828
1829
860k
    return(1);
1830
37.4k
}
1831
1832
/**
1833
 * Pops the top `nr` namespaces and restores the hash table.
1834
 *
1835
 * @param ctxt  an XML parser context
1836
 * @param nr  the number to pop
1837
 * @returns the number of namespaces popped.
1838
 */
1839
static int
1840
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1841
203k
{
1842
203k
    int i;
1843
1844
    /* assert(nr <= ctxt->nsNr); */
1845
1846
1.06M
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1847
858k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1848
858k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1849
1850
858k
        if (prefix == NULL) {
1851
123k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1852
734k
        } else {
1853
734k
            xmlHashedString hprefix;
1854
734k
            xmlParserNsBucket *bucket = NULL;
1855
1856
734k
            hprefix.name = prefix;
1857
734k
            hprefix.hashValue = extra->prefixHashValue;
1858
734k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1859
            /* assert(bucket && bucket->hashValue); */
1860
734k
            bucket->index = extra->oldIndex;
1861
734k
        }
1862
858k
    }
1863
1864
203k
    ctxt->nsNr -= nr;
1865
203k
    return(nr);
1866
203k
}
1867
1868
static int
1869
5.75k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1870
5.75k
    const xmlChar **atts;
1871
5.75k
    unsigned *attallocs;
1872
5.75k
    int newSize;
1873
1874
5.75k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1875
5.75k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1876
5.75k
                              10, XML_MAX_ATTRS);
1877
5.75k
    if (newSize < 0) {
1878
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1879
0
                    "Maximum number of attributes exceeded");
1880
0
        return(-1);
1881
0
    }
1882
1883
5.75k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1884
5.75k
    if (atts == NULL)
1885
0
        goto mem_error;
1886
5.75k
    ctxt->atts = atts;
1887
1888
5.75k
    attallocs = xmlRealloc(ctxt->attallocs,
1889
5.75k
                           newSize * sizeof(attallocs[0]));
1890
5.75k
    if (attallocs == NULL)
1891
0
        goto mem_error;
1892
5.75k
    ctxt->attallocs = attallocs;
1893
1894
5.75k
    ctxt->maxatts = newSize * 5;
1895
1896
5.75k
    return(0);
1897
1898
0
mem_error:
1899
0
    xmlErrMemory(ctxt);
1900
0
    return(-1);
1901
5.75k
}
1902
1903
/**
1904
 * Pushes a new parser input on top of the input stack
1905
 *
1906
 * @param ctxt  an XML parser context
1907
 * @param value  the parser input
1908
 * @returns -1 in case of error, the index in the stack otherwise
1909
 */
1910
int
1911
xmlCtxtPushInput(xmlParserCtxt *ctxt, xmlParserInput *value)
1912
48.2k
{
1913
48.2k
    char *directory = NULL;
1914
48.2k
    int maxDepth;
1915
1916
48.2k
    if ((ctxt == NULL) || (value == NULL))
1917
0
        return(-1);
1918
1919
48.2k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1920
1921
48.2k
    if (ctxt->inputNr >= ctxt->inputMax) {
1922
1.59k
        xmlParserInputPtr *tmp;
1923
1.59k
        int newSize;
1924
1925
1.59k
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1926
1.59k
                                  5, maxDepth);
1927
1.59k
        if (newSize < 0) {
1928
0
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1929
0
                           "Maximum entity nesting depth exceeded");
1930
0
            xmlHaltParser(ctxt);
1931
0
            return(-1);
1932
0
        }
1933
1.59k
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1934
1.59k
        if (tmp == NULL) {
1935
0
            xmlErrMemory(ctxt);
1936
0
            return(-1);
1937
0
        }
1938
1.59k
        ctxt->inputTab = tmp;
1939
1.59k
        ctxt->inputMax = newSize;
1940
1.59k
    }
1941
1942
48.2k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1943
0
        directory = xmlParserGetDirectory(value->filename);
1944
0
        if (directory == NULL) {
1945
0
            xmlErrMemory(ctxt);
1946
0
            return(-1);
1947
0
        }
1948
0
    }
1949
1950
48.2k
    if (ctxt->input_id >= INT_MAX) {
1951
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1952
0
        return(-1);
1953
0
    }
1954
1955
48.2k
    ctxt->inputTab[ctxt->inputNr] = value;
1956
48.2k
    ctxt->input = value;
1957
1958
48.2k
    if (ctxt->inputNr == 0) {
1959
15.1k
        xmlFree(ctxt->directory);
1960
15.1k
        ctxt->directory = directory;
1961
15.1k
    }
1962
1963
    /*
1964
     * The input ID is unused internally, but there are entity
1965
     * loaders in downstream code that detect the main document
1966
     * by checking for "input_id == 1".
1967
     */
1968
48.2k
    value->id = ctxt->input_id++;
1969
1970
48.2k
    return(ctxt->inputNr++);
1971
48.2k
}
1972
1973
/**
1974
 * Pops the top parser input from the input stack
1975
 *
1976
 * @param ctxt  an XML parser context
1977
 * @returns the input just removed
1978
 */
1979
xmlParserInput *
1980
xmlCtxtPopInput(xmlParserCtxt *ctxt)
1981
78.4k
{
1982
78.4k
    xmlParserInputPtr ret;
1983
1984
78.4k
    if (ctxt == NULL)
1985
0
        return(NULL);
1986
78.4k
    if (ctxt->inputNr <= 0)
1987
30.2k
        return (NULL);
1988
48.2k
    ctxt->inputNr--;
1989
48.2k
    if (ctxt->inputNr > 0)
1990
33.1k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1991
15.1k
    else
1992
15.1k
        ctxt->input = NULL;
1993
48.2k
    ret = ctxt->inputTab[ctxt->inputNr];
1994
48.2k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1995
48.2k
    return (ret);
1996
78.4k
}
1997
1998
/**
1999
 * Pushes a new element node on top of the node stack
2000
 *
2001
 * @deprecated Internal function, do not use.
2002
 *
2003
 * @param ctxt  an XML parser context
2004
 * @param value  the element node
2005
 * @returns -1 in case of error, the index in the stack otherwise
2006
 */
2007
int
2008
nodePush(xmlParserCtxt *ctxt, xmlNode *value)
2009
61.0k
{
2010
61.0k
    if (ctxt == NULL)
2011
0
        return(0);
2012
2013
61.0k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2014
5.08k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2015
5.08k
        xmlNodePtr *tmp;
2016
5.08k
        int newSize;
2017
2018
5.08k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
2019
5.08k
                                  10, maxDepth);
2020
5.08k
        if (newSize < 0) {
2021
7
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2022
7
                    "Excessive depth in document: %d,"
2023
7
                    " use XML_PARSE_HUGE option\n",
2024
7
                    ctxt->nodeNr);
2025
7
            xmlHaltParser(ctxt);
2026
7
            return(-1);
2027
7
        }
2028
2029
5.07k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2030
5.07k
        if (tmp == NULL) {
2031
0
            xmlErrMemory(ctxt);
2032
0
            return (-1);
2033
0
        }
2034
5.07k
        ctxt->nodeTab = tmp;
2035
5.07k
  ctxt->nodeMax = newSize;
2036
5.07k
    }
2037
2038
60.9k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2039
60.9k
    ctxt->node = value;
2040
60.9k
    return (ctxt->nodeNr++);
2041
61.0k
}
2042
2043
/**
2044
 * Pops the top element node from the node stack
2045
 *
2046
 * @deprecated Internal function, do not use.
2047
 *
2048
 * @param ctxt  an XML parser context
2049
 * @returns the node just removed
2050
 */
2051
xmlNode *
2052
nodePop(xmlParserCtxt *ctxt)
2053
255k
{
2054
255k
    xmlNodePtr ret;
2055
2056
255k
    if (ctxt == NULL) return(NULL);
2057
255k
    if (ctxt->nodeNr <= 0)
2058
196k
        return (NULL);
2059
58.2k
    ctxt->nodeNr--;
2060
58.2k
    if (ctxt->nodeNr > 0)
2061
56.6k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2062
1.58k
    else
2063
1.58k
        ctxt->node = NULL;
2064
58.2k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2065
58.2k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2066
58.2k
    return (ret);
2067
255k
}
2068
2069
/**
2070
 * Pushes a new element name/prefix/URL on top of the name stack
2071
 *
2072
 * @param ctxt  an XML parser context
2073
 * @param value  the element name
2074
 * @param prefix  the element prefix
2075
 * @param URI  the element namespace name
2076
 * @param line  the current line number for error messages
2077
 * @param nsNr  the number of namespaces pushed on the namespace table
2078
 * @returns -1 in case of error, the index in the stack otherwise
2079
 */
2080
static int
2081
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2082
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2083
497k
{
2084
497k
    xmlStartTag *tag;
2085
2086
497k
    if (ctxt->nameNr >= ctxt->nameMax) {
2087
16.9k
        const xmlChar **tmp;
2088
16.9k
        xmlStartTag *tmp2;
2089
16.9k
        int newSize;
2090
2091
16.9k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2092
16.9k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2093
16.9k
                                  10, XML_MAX_ITEMS);
2094
16.9k
        if (newSize < 0)
2095
0
            goto mem_error;
2096
2097
16.9k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2098
16.9k
        if (tmp == NULL)
2099
0
      goto mem_error;
2100
16.9k
  ctxt->nameTab = tmp;
2101
2102
16.9k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2103
16.9k
        if (tmp2 == NULL)
2104
0
      goto mem_error;
2105
16.9k
  ctxt->pushTab = tmp2;
2106
2107
16.9k
        ctxt->nameMax = newSize;
2108
480k
    } else if (ctxt->pushTab == NULL) {
2109
9.33k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2110
9.33k
        if (ctxt->pushTab == NULL)
2111
0
            goto mem_error;
2112
9.33k
    }
2113
497k
    ctxt->nameTab[ctxt->nameNr] = value;
2114
497k
    ctxt->name = value;
2115
497k
    tag = &ctxt->pushTab[ctxt->nameNr];
2116
497k
    tag->prefix = prefix;
2117
497k
    tag->URI = URI;
2118
497k
    tag->line = line;
2119
497k
    tag->nsNr = nsNr;
2120
497k
    return (ctxt->nameNr++);
2121
0
mem_error:
2122
0
    xmlErrMemory(ctxt);
2123
0
    return (-1);
2124
497k
}
2125
#ifdef LIBXML_PUSH_ENABLED
2126
/**
2127
 * Pops the top element/prefix/URI name from the name stack
2128
 *
2129
 * @param ctxt  an XML parser context
2130
 * @returns the name just removed
2131
 */
2132
static const xmlChar *
2133
nameNsPop(xmlParserCtxtPtr ctxt)
2134
0
{
2135
0
    const xmlChar *ret;
2136
2137
0
    if (ctxt->nameNr <= 0)
2138
0
        return (NULL);
2139
0
    ctxt->nameNr--;
2140
0
    if (ctxt->nameNr > 0)
2141
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2142
0
    else
2143
0
        ctxt->name = NULL;
2144
0
    ret = ctxt->nameTab[ctxt->nameNr];
2145
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
2146
0
    return (ret);
2147
0
}
2148
#endif /* LIBXML_PUSH_ENABLED */
2149
2150
/**
2151
 * Pops the top element name from the name stack
2152
 *
2153
 * @deprecated Internal function, do not use.
2154
 *
2155
 * @param ctxt  an XML parser context
2156
 * @returns the name just removed
2157
 */
2158
static const xmlChar *
2159
namePop(xmlParserCtxtPtr ctxt)
2160
491k
{
2161
491k
    const xmlChar *ret;
2162
2163
491k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2164
0
        return (NULL);
2165
491k
    ctxt->nameNr--;
2166
491k
    if (ctxt->nameNr > 0)
2167
487k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2168
3.96k
    else
2169
3.96k
        ctxt->name = NULL;
2170
491k
    ret = ctxt->nameTab[ctxt->nameNr];
2171
491k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2172
491k
    return (ret);
2173
491k
}
2174
2175
1.52M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2176
1.52M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2177
25.3k
        int *tmp;
2178
25.3k
        int newSize;
2179
2180
25.3k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2181
25.3k
                                  10, XML_MAX_ITEMS);
2182
25.3k
        if (newSize < 0) {
2183
0
      xmlErrMemory(ctxt);
2184
0
      return(-1);
2185
0
        }
2186
2187
25.3k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2188
25.3k
        if (tmp == NULL) {
2189
0
      xmlErrMemory(ctxt);
2190
0
      return(-1);
2191
0
  }
2192
25.3k
  ctxt->spaceTab = tmp;
2193
2194
25.3k
        ctxt->spaceMax = newSize;
2195
25.3k
    }
2196
1.52M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2197
1.52M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2198
1.52M
    return(ctxt->spaceNr++);
2199
1.52M
}
2200
2201
1.51M
static int spacePop(xmlParserCtxtPtr ctxt) {
2202
1.51M
    int ret;
2203
1.51M
    if (ctxt->spaceNr <= 0) return(0);
2204
1.51M
    ctxt->spaceNr--;
2205
1.51M
    if (ctxt->spaceNr > 0)
2206
1.51M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2207
0
    else
2208
0
        ctxt->space = &ctxt->spaceTab[0];
2209
1.51M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2210
1.51M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2211
1.51M
    return(ret);
2212
1.51M
}
2213
2214
/*
2215
 * Macros for accessing the content. Those should be used only by the parser,
2216
 * and not exported.
2217
 *
2218
 * Dirty macros, i.e. one often need to make assumption on the context to
2219
 * use them
2220
 *
2221
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2222
 *           To be used with extreme caution since operations consuming
2223
 *           characters may move the input buffer to a different location !
2224
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2225
 *           This should be used internally by the parser
2226
 *           only to compare to ASCII values otherwise it would break when
2227
 *           running with UTF-8 encoding.
2228
 *   RAW     same as CUR but in the input buffer, bypass any token
2229
 *           extraction that may have been done
2230
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2231
 *           to compare on ASCII based substring.
2232
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2233
 *           strings without newlines within the parser.
2234
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2235
 *           defined char within the parser.
2236
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2237
 *
2238
 *   NEXT    Skip to the next character, this does the proper decoding
2239
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2240
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2241
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2242
 *            the index
2243
 *   GROW, SHRINK  handling of input buffers
2244
 */
2245
2246
20.5M
#define RAW (*ctxt->input->cur)
2247
163M
#define CUR (*ctxt->input->cur)
2248
5.15M
#define NXT(val) ctxt->input->cur[(val)]
2249
338M
#define CUR_PTR ctxt->input->cur
2250
4.47M
#define BASE_PTR ctxt->input->base
2251
2252
#define CMP4( s, c1, c2, c3, c4 ) \
2253
11.4M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2254
5.72M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2255
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2256
11.3M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2257
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2258
11.0M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2259
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2260
10.8M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2261
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2262
10.7M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2263
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2264
5.36M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2265
5.36M
    ((unsigned char *) s)[ 8 ] == c9 )
2266
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2267
2.65k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2268
2.65k
    ((unsigned char *) s)[ 9 ] == c10 )
2269
2270
530k
#define SKIP(val) do {             \
2271
530k
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2272
530k
    if (*ctxt->input->cur == 0)           \
2273
530k
        xmlParserGrow(ctxt);           \
2274
530k
  } while (0)
2275
2276
#define SKIPL(val) do {             \
2277
    int skipl;                \
2278
    for(skipl=0; skipl<val; skipl++) {          \
2279
  if (*(ctxt->input->cur) == '\n') {        \
2280
  ctxt->input->line++; ctxt->input->col = 1;      \
2281
  } else ctxt->input->col++;          \
2282
  ctxt->input->cur++;           \
2283
    }                 \
2284
    if (*ctxt->input->cur == 0)           \
2285
        xmlParserGrow(ctxt);            \
2286
  } while (0)
2287
2288
#define SHRINK \
2289
9.72M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2290
9.72M
  xmlParserShrink(ctxt);
2291
2292
#define GROW \
2293
26.5M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2294
26.5M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2295
1.70M
  xmlParserGrow(ctxt);
2296
2297
6.40M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2298
2299
267k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2300
2301
20.8M
#define NEXT xmlNextChar(ctxt)
2302
2303
1.79M
#define NEXT1 {               \
2304
1.79M
  ctxt->input->col++;           \
2305
1.79M
  ctxt->input->cur++;           \
2306
1.79M
  if (*ctxt->input->cur == 0)         \
2307
1.79M
      xmlParserGrow(ctxt);           \
2308
1.79M
    }
2309
2310
207M
#define NEXTL(l) do {             \
2311
207M
    if (*(ctxt->input->cur) == '\n') {         \
2312
15.1M
  ctxt->input->line++; ctxt->input->col = 1;      \
2313
192M
    } else ctxt->input->col++;           \
2314
207M
    ctxt->input->cur += l;        \
2315
207M
  } while (0)
2316
2317
#define COPY_BUF(b, i, v)           \
2318
46.0M
    if (v < 0x80) b[i++] = v;           \
2319
46.0M
    else i += xmlCopyCharMultiByte(&b[i],v)
2320
2321
static int
2322
43.6M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2323
43.6M
    int c = xmlCurrentChar(ctxt, len);
2324
2325
43.6M
    if (c == XML_INVALID_CHAR)
2326
11.5M
        c = 0xFFFD; /* replacement character */
2327
2328
43.6M
    return(c);
2329
43.6M
}
2330
2331
/**
2332
 * Skip whitespace in the input stream.
2333
 *
2334
 * @deprecated Internal function, do not use.
2335
 *
2336
 * @param ctxt  the XML parser context
2337
 * @returns the number of space chars skipped
2338
 */
2339
int
2340
6.54M
xmlSkipBlankChars(xmlParserCtxt *ctxt) {
2341
6.54M
    const xmlChar *cur;
2342
6.54M
    int res = 0;
2343
2344
6.54M
    cur = ctxt->input->cur;
2345
6.54M
    while (IS_BLANK_CH(*cur)) {
2346
1.89M
        if (*cur == '\n') {
2347
996k
            ctxt->input->line++; ctxt->input->col = 1;
2348
996k
        } else {
2349
895k
            ctxt->input->col++;
2350
895k
        }
2351
1.89M
        cur++;
2352
1.89M
        if (res < INT_MAX)
2353
1.89M
            res++;
2354
1.89M
        if (*cur == 0) {
2355
8.27k
            ctxt->input->cur = cur;
2356
8.27k
            xmlParserGrow(ctxt);
2357
8.27k
            cur = ctxt->input->cur;
2358
8.27k
        }
2359
1.89M
    }
2360
6.54M
    ctxt->input->cur = cur;
2361
2362
6.54M
    if (res > 4)
2363
7.79k
        GROW;
2364
2365
6.54M
    return(res);
2366
6.54M
}
2367
2368
static void
2369
29.2k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2370
29.2k
    unsigned long consumed;
2371
29.2k
    xmlEntityPtr ent;
2372
2373
29.2k
    ent = ctxt->input->entity;
2374
2375
29.2k
    ent->flags &= ~XML_ENT_EXPANDING;
2376
2377
29.2k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2378
1.09k
        int result;
2379
2380
        /*
2381
         * Read the rest of the stream in case of errors. We want
2382
         * to account for the whole entity size.
2383
         */
2384
1.09k
        do {
2385
1.09k
            ctxt->input->cur = ctxt->input->end;
2386
1.09k
            xmlParserShrink(ctxt);
2387
1.09k
            result = xmlParserGrow(ctxt);
2388
1.09k
        } while (result > 0);
2389
2390
1.09k
        consumed = ctxt->input->consumed;
2391
1.09k
        xmlSaturatedAddSizeT(&consumed,
2392
1.09k
                             ctxt->input->end - ctxt->input->base);
2393
2394
1.09k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2395
2396
        /*
2397
         * Add to sizeentities when parsing an external entity
2398
         * for the first time.
2399
         */
2400
1.09k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2401
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2402
0
        }
2403
2404
1.09k
        ent->flags |= XML_ENT_CHECKED;
2405
1.09k
    }
2406
2407
29.2k
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2408
2409
29.2k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2410
2411
29.2k
    GROW;
2412
29.2k
}
2413
2414
/**
2415
 * Skip whitespace in the input stream, also handling parameter
2416
 * entities.
2417
 *
2418
 * @param ctxt  the XML parser context
2419
 * @returns the number of space chars skipped
2420
 */
2421
static int
2422
267k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2423
267k
    int res = 0;
2424
267k
    int inParam;
2425
267k
    int expandParam;
2426
2427
267k
    inParam = PARSER_IN_PE(ctxt);
2428
267k
    expandParam = PARSER_EXTERNAL(ctxt);
2429
2430
267k
    if (!inParam && !expandParam)
2431
136k
        return(xmlSkipBlankChars(ctxt));
2432
2433
    /*
2434
     * It's Okay to use CUR/NEXT here since all the blanks are on
2435
     * the ASCII range.
2436
     */
2437
12.9M
    while (PARSER_STOPPED(ctxt) == 0) {
2438
12.9M
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2439
12.8M
            NEXT;
2440
12.8M
        } else if (CUR == '%') {
2441
384
            if ((expandParam == 0) ||
2442
384
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2443
384
                break;
2444
2445
            /*
2446
             * Expand parameter entity. We continue to consume
2447
             * whitespace at the start of the entity and possible
2448
             * even consume the whole entity and pop it. We might
2449
             * even pop multiple PEs in this loop.
2450
             */
2451
0
            xmlParsePERefInternal(ctxt, 0);
2452
2453
0
            inParam = PARSER_IN_PE(ctxt);
2454
0
            expandParam = PARSER_EXTERNAL(ctxt);
2455
131k
        } else if (CUR == 0) {
2456
10.0k
            if (inParam == 0)
2457
0
                break;
2458
2459
            /*
2460
             * Don't pop parameter entities that start a markup
2461
             * declaration to detect Well-formedness constraint:
2462
             * PE Between Declarations.
2463
             */
2464
10.0k
            if (ctxt->input->flags & XML_INPUT_MARKUP_DECL)
2465
10.0k
                break;
2466
2467
0
            xmlPopPE(ctxt);
2468
2469
0
            inParam = PARSER_IN_PE(ctxt);
2470
0
            expandParam = PARSER_EXTERNAL(ctxt);
2471
121k
        } else {
2472
121k
            break;
2473
121k
        }
2474
2475
        /*
2476
         * Also increase the counter when entering or exiting a PERef.
2477
         * The spec says: "When a parameter-entity reference is recognized
2478
         * in the DTD and included, its replacement text MUST be enlarged
2479
         * by the attachment of one leading and one following space (#x20)
2480
         * character."
2481
         */
2482
12.8M
        if (res < INT_MAX)
2483
12.8M
            res++;
2484
12.8M
    }
2485
2486
131k
    return(res);
2487
267k
}
2488
2489
/************************************************************************
2490
 *                  *
2491
 *    Commodity functions to handle entities      *
2492
 *                  *
2493
 ************************************************************************/
2494
2495
/**
2496
 * @deprecated Internal function, don't use.
2497
 *
2498
 * @param ctxt  an XML parser context
2499
 * @returns the current xmlChar in the parser context
2500
 */
2501
xmlChar
2502
0
xmlPopInput(xmlParserCtxt *ctxt) {
2503
0
    xmlParserInputPtr input;
2504
2505
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2506
0
    input = xmlCtxtPopInput(ctxt);
2507
0
    xmlFreeInputStream(input);
2508
0
    if (*ctxt->input->cur == 0)
2509
0
        xmlParserGrow(ctxt);
2510
0
    return(CUR);
2511
0
}
2512
2513
/**
2514
 * Push an input stream onto the stack.
2515
 *
2516
 * @deprecated Internal function, don't use.
2517
 *
2518
 * @param ctxt  an XML parser context
2519
 * @param input  an XML parser input fragment (entity, XML fragment ...).
2520
 * @returns -1 in case of error or the index in the input stack
2521
 */
2522
int
2523
0
xmlPushInput(xmlParserCtxt *ctxt, xmlParserInput *input) {
2524
0
    int ret;
2525
2526
0
    if ((ctxt == NULL) || (input == NULL))
2527
0
        return(-1);
2528
2529
0
    ret = xmlCtxtPushInput(ctxt, input);
2530
0
    if (ret >= 0)
2531
0
        GROW;
2532
0
    return(ret);
2533
0
}
2534
2535
/**
2536
 * Parse a numeric character reference. Always consumes '&'.
2537
 *
2538
 * @deprecated Internal function, don't use.
2539
 *
2540
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2541
 *                      '&#x' [0-9a-fA-F]+ ';'
2542
 *
2543
 * [ WFC: Legal Character ]
2544
 * Characters referred to using character references must match the
2545
 * production for Char.
2546
 *
2547
 * @param ctxt  an XML parser context
2548
 * @returns the value parsed (as an int), 0 in case of error
2549
 */
2550
int
2551
62.7k
xmlParseCharRef(xmlParserCtxt *ctxt) {
2552
62.7k
    int val = 0;
2553
62.7k
    int count = 0;
2554
2555
    /*
2556
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2557
     */
2558
62.7k
    if ((RAW == '&') && (NXT(1) == '#') &&
2559
62.7k
        (NXT(2) == 'x')) {
2560
22.6k
  SKIP(3);
2561
22.6k
  GROW;
2562
79.9k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2563
60.3k
      if (count++ > 20) {
2564
940
    count = 0;
2565
940
    GROW;
2566
940
      }
2567
60.3k
      if ((RAW >= '0') && (RAW <= '9'))
2568
13.7k
          val = val * 16 + (CUR - '0');
2569
46.5k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2570
15.7k
          val = val * 16 + (CUR - 'a') + 10;
2571
30.8k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2572
27.7k
          val = val * 16 + (CUR - 'A') + 10;
2573
3.07k
      else {
2574
3.07k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2575
3.07k
    val = 0;
2576
3.07k
    break;
2577
3.07k
      }
2578
57.2k
      if (val > 0x110000)
2579
5.87k
          val = 0x110000;
2580
2581
57.2k
      NEXT;
2582
57.2k
      count++;
2583
57.2k
  }
2584
22.6k
  if (RAW == ';') {
2585
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2586
19.6k
      ctxt->input->col++;
2587
19.6k
      ctxt->input->cur++;
2588
19.6k
  }
2589
40.0k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2590
40.0k
  SKIP(2);
2591
40.0k
  GROW;
2592
122k
  while (RAW != ';') { /* loop blocked by count */
2593
93.0k
      if (count++ > 20) {
2594
1.26k
    count = 0;
2595
1.26k
    GROW;
2596
1.26k
      }
2597
93.0k
      if ((RAW >= '0') && (RAW <= '9'))
2598
82.4k
          val = val * 10 + (CUR - '0');
2599
10.6k
      else {
2600
10.6k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2601
10.6k
    val = 0;
2602
10.6k
    break;
2603
10.6k
      }
2604
82.4k
      if (val > 0x110000)
2605
11.6k
          val = 0x110000;
2606
2607
82.4k
      NEXT;
2608
82.4k
      count++;
2609
82.4k
  }
2610
40.0k
  if (RAW == ';') {
2611
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2612
29.4k
      ctxt->input->col++;
2613
29.4k
      ctxt->input->cur++;
2614
29.4k
  }
2615
40.0k
    } else {
2616
0
        if (RAW == '&')
2617
0
            SKIP(1);
2618
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2619
0
    }
2620
2621
    /*
2622
     * [ WFC: Legal Character ]
2623
     * Characters referred to using character references must match the
2624
     * production for Char.
2625
     */
2626
62.7k
    if (val >= 0x110000) {
2627
395
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2628
395
                "xmlParseCharRef: character reference out of bounds\n",
2629
395
          val);
2630
62.3k
    } else if (IS_CHAR(val)) {
2631
45.6k
        return(val);
2632
45.6k
    } else {
2633
16.7k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2634
16.7k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2635
16.7k
                    val);
2636
16.7k
    }
2637
17.1k
    return(0);
2638
62.7k
}
2639
2640
/**
2641
 * parse Reference declarations, variant parsing from a string rather
2642
 * than an an input flow.
2643
 *
2644
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2645
 *                      '&#x' [0-9a-fA-F]+ ';'
2646
 *
2647
 * [ WFC: Legal Character ]
2648
 * Characters referred to using character references must match the
2649
 * production for Char.
2650
 *
2651
 * @param ctxt  an XML parser context
2652
 * @param str  a pointer to an index in the string
2653
 * @returns the value parsed (as an int), 0 in case of error, str will be
2654
 *         updated to the current value of the index
2655
 */
2656
static int
2657
159k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2658
159k
    const xmlChar *ptr;
2659
159k
    xmlChar cur;
2660
159k
    int val = 0;
2661
2662
159k
    if ((str == NULL) || (*str == NULL)) return(0);
2663
159k
    ptr = *str;
2664
159k
    cur = *ptr;
2665
159k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2666
22.5k
  ptr += 3;
2667
22.5k
  cur = *ptr;
2668
526k
  while (cur != ';') { /* Non input consuming loop */
2669
504k
      if ((cur >= '0') && (cur <= '9'))
2670
462k
          val = val * 16 + (cur - '0');
2671
42.2k
      else if ((cur >= 'a') && (cur <= 'f'))
2672
667
          val = val * 16 + (cur - 'a') + 10;
2673
41.5k
      else if ((cur >= 'A') && (cur <= 'F'))
2674
40.4k
          val = val * 16 + (cur - 'A') + 10;
2675
1.13k
      else {
2676
1.13k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2677
1.13k
    val = 0;
2678
1.13k
    break;
2679
1.13k
      }
2680
503k
      if (val > 0x110000)
2681
1.90k
          val = 0x110000;
2682
2683
503k
      ptr++;
2684
503k
      cur = *ptr;
2685
503k
  }
2686
22.5k
  if (cur == ';')
2687
21.3k
      ptr++;
2688
137k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2689
137k
  ptr += 2;
2690
137k
  cur = *ptr;
2691
453k
  while (cur != ';') { /* Non input consuming loops */
2692
318k
      if ((cur >= '0') && (cur <= '9'))
2693
316k
          val = val * 10 + (cur - '0');
2694
2.29k
      else {
2695
2.29k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2696
2.29k
    val = 0;
2697
2.29k
    break;
2698
2.29k
      }
2699
316k
      if (val > 0x110000)
2700
1.70k
          val = 0x110000;
2701
2702
316k
      ptr++;
2703
316k
      cur = *ptr;
2704
316k
  }
2705
137k
  if (cur == ';')
2706
134k
      ptr++;
2707
137k
    } else {
2708
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2709
0
  return(0);
2710
0
    }
2711
159k
    *str = ptr;
2712
2713
    /*
2714
     * [ WFC: Legal Character ]
2715
     * Characters referred to using character references must match the
2716
     * production for Char.
2717
     */
2718
159k
    if (val >= 0x110000) {
2719
256
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2720
256
                "xmlParseStringCharRef: character reference out of bounds\n",
2721
256
                val);
2722
159k
    } else if (IS_CHAR(val)) {
2723
154k
        return(val);
2724
154k
    } else {
2725
4.90k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2726
4.90k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2727
4.90k
        val);
2728
4.90k
    }
2729
5.15k
    return(0);
2730
159k
}
2731
2732
/**
2733
 *     [69] PEReference ::= '%' Name ';'
2734
 *
2735
 * @deprecated Internal function, do not use.
2736
 *
2737
 * [ WFC: No Recursion ]
2738
 * A parsed entity must not contain a recursive
2739
 * reference to itself, either directly or indirectly.
2740
 *
2741
 * [ WFC: Entity Declared ]
2742
 * In a document without any DTD, a document with only an internal DTD
2743
 * subset which contains no parameter entity references, or a document
2744
 * with "standalone='yes'", ...  ... The declaration of a parameter
2745
 * entity must precede any reference to it...
2746
 *
2747
 * [ VC: Entity Declared ]
2748
 * In a document with an external subset or external parameter entities
2749
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2750
 * must precede any reference to it...
2751
 *
2752
 * [ WFC: In DTD ]
2753
 * Parameter-entity references may only appear in the DTD.
2754
 * NOTE: misleading but this is handled.
2755
 *
2756
 * A PEReference may have been detected in the current input stream
2757
 * the handling is done accordingly to
2758
 *      http://www.w3.org/TR/REC-xml#entproc
2759
 * i.e.
2760
 *   - Included in literal in entity values
2761
 *   - Included as Parameter Entity reference within DTDs
2762
 * @param ctxt  the parser context
2763
 */
2764
void
2765
0
xmlParserHandlePEReference(xmlParserCtxt *ctxt) {
2766
0
    xmlParsePERefInternal(ctxt, 0);
2767
0
}
2768
2769
/**
2770
 * @deprecated Internal function, don't use.
2771
 *
2772
 * @param ctxt  the parser context
2773
 * @param str  the input string
2774
 * @param len  the string length
2775
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2776
 * @param end  an end marker xmlChar, 0 if none
2777
 * @param end2  an end marker xmlChar, 0 if none
2778
 * @param end3  an end marker xmlChar, 0 if none
2779
 * @returns A newly allocated string with the substitution done. The caller
2780
 *      must deallocate it !
2781
 */
2782
xmlChar *
2783
xmlStringLenDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str, int len,
2784
                           int what ATTRIBUTE_UNUSED,
2785
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2786
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2787
0
        return(NULL);
2788
2789
0
    if ((str[len] != 0) ||
2790
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2791
0
        return(NULL);
2792
2793
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2794
0
}
2795
2796
/**
2797
 * @deprecated Internal function, don't use.
2798
 *
2799
 * @param ctxt  the parser context
2800
 * @param str  the input string
2801
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2802
 * @param end  an end marker xmlChar, 0 if none
2803
 * @param end2  an end marker xmlChar, 0 if none
2804
 * @param end3  an end marker xmlChar, 0 if none
2805
 * @returns A newly allocated string with the substitution done. The caller
2806
 *      must deallocate it !
2807
 */
2808
xmlChar *
2809
xmlStringDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str,
2810
                        int what ATTRIBUTE_UNUSED,
2811
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2812
0
    if ((ctxt == NULL) || (str == NULL))
2813
0
        return(NULL);
2814
2815
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2816
0
        return(NULL);
2817
2818
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2819
0
}
2820
2821
/************************************************************************
2822
 *                  *
2823
 *    Commodity functions, cleanup needed ?     *
2824
 *                  *
2825
 ************************************************************************/
2826
2827
/**
2828
 * Is this a sequence of blank chars that one can ignore ?
2829
 *
2830
 * @param ctxt  an XML parser context
2831
 * @param str  a xmlChar *
2832
 * @param len  the size of `str`
2833
 * @param blank_chars  we know the chars are blanks
2834
 * @returns 1 if ignorable 0 otherwise.
2835
 */
2836
2837
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2838
0
                     int blank_chars) {
2839
0
    int i;
2840
0
    xmlNodePtr lastChild;
2841
2842
    /*
2843
     * Check for xml:space value.
2844
     */
2845
0
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2846
0
        (*(ctxt->space) == -2))
2847
0
  return(0);
2848
2849
    /*
2850
     * Check that the string is made of blanks
2851
     */
2852
0
    if (blank_chars == 0) {
2853
0
  for (i = 0;i < len;i++)
2854
0
      if (!(IS_BLANK_CH(str[i]))) return(0);
2855
0
    }
2856
2857
    /*
2858
     * Look if the element is mixed content in the DTD if available
2859
     */
2860
0
    if (ctxt->node == NULL) return(0);
2861
0
    if (ctxt->myDoc != NULL) {
2862
0
        xmlElementPtr elemDecl = NULL;
2863
0
        xmlDocPtr doc = ctxt->myDoc;
2864
0
        const xmlChar *prefix = NULL;
2865
2866
0
        if (ctxt->node->ns)
2867
0
            prefix = ctxt->node->ns->prefix;
2868
0
        if (doc->intSubset != NULL)
2869
0
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2870
0
                                      prefix);
2871
0
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2872
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2873
0
                                      prefix);
2874
0
        if (elemDecl != NULL) {
2875
0
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2876
0
                return(1);
2877
0
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2878
0
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2879
0
                return(0);
2880
0
        }
2881
0
    }
2882
2883
    /*
2884
     * Otherwise, heuristic :-\
2885
     *
2886
     * When push parsing, we could be at the end of a chunk.
2887
     * This makes the look-ahead and consequently the NOBLANKS
2888
     * option unreliable.
2889
     */
2890
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2891
0
    if ((ctxt->node->children == NULL) &&
2892
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2893
2894
0
    lastChild = xmlGetLastChild(ctxt->node);
2895
0
    if (lastChild == NULL) {
2896
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2897
0
            (ctxt->node->content != NULL)) return(0);
2898
0
    } else if (xmlNodeIsText(lastChild))
2899
0
        return(0);
2900
0
    else if ((ctxt->node->children != NULL) &&
2901
0
             (xmlNodeIsText(ctxt->node->children)))
2902
0
        return(0);
2903
0
    return(1);
2904
0
}
2905
2906
/************************************************************************
2907
 *                  *
2908
 *    Extra stuff for namespace support     *
2909
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2910
 *                  *
2911
 ************************************************************************/
2912
2913
/**
2914
 * parse an UTF8 encoded XML qualified name string
2915
 *
2916
 * @deprecated Don't use.
2917
 *
2918
 * @param ctxt  an XML parser context
2919
 * @param name  an XML parser context
2920
 * @param prefixOut  a xmlChar **
2921
 * @returns the local part, and prefix is updated
2922
 *   to get the Prefix if any.
2923
 */
2924
2925
xmlChar *
2926
0
xmlSplitQName(xmlParserCtxt *ctxt, const xmlChar *name, xmlChar **prefixOut) {
2927
0
    xmlChar *ret;
2928
0
    const xmlChar *localname;
2929
2930
0
    localname = xmlSplitQName4(name, prefixOut);
2931
0
    if (localname == NULL) {
2932
0
        xmlCtxtErrMemory(ctxt);
2933
0
        return(NULL);
2934
0
    }
2935
2936
0
    ret = xmlStrdup(localname);
2937
0
    if (ret == NULL) {
2938
0
        xmlCtxtErrMemory(ctxt);
2939
0
        xmlFree(*prefixOut);
2940
0
    }
2941
2942
0
    return(ret);
2943
0
}
2944
2945
/************************************************************************
2946
 *                  *
2947
 *      The parser itself       *
2948
 *  Relates to http://www.w3.org/TR/REC-xml       *
2949
 *                  *
2950
 ************************************************************************/
2951
2952
/************************************************************************
2953
 *                  *
2954
 *  Routines to parse Name, NCName and NmToken      *
2955
 *                  *
2956
 ************************************************************************/
2957
2958
/*
2959
 * The two following functions are related to the change of accepted
2960
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2961
 * They correspond to the modified production [4] and the new production [4a]
2962
 * changes in that revision. Also note that the macros used for the
2963
 * productions Letter, Digit, CombiningChar and Extender are not needed
2964
 * anymore.
2965
 * We still keep compatibility to pre-revision5 parsing semantic if the
2966
 * new XML_PARSE_OLD10 option is given to the parser.
2967
 */
2968
2969
static int
2970
1.51M
xmlIsNameStartCharNew(int c) {
2971
    /*
2972
     * Use the new checks of production [4] [4a] amd [5] of the
2973
     * Update 5 of XML-1.0
2974
     */
2975
1.51M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2976
1.51M
        (((c >= 'a') && (c <= 'z')) ||
2977
1.50M
         ((c >= 'A') && (c <= 'Z')) ||
2978
1.50M
         (c == '_') || (c == ':') ||
2979
1.50M
         ((c >= 0xC0) && (c <= 0xD6)) ||
2980
1.50M
         ((c >= 0xD8) && (c <= 0xF6)) ||
2981
1.50M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2982
1.50M
         ((c >= 0x370) && (c <= 0x37D)) ||
2983
1.50M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2984
1.50M
         ((c >= 0x200C) && (c <= 0x200D)) ||
2985
1.50M
         ((c >= 0x2070) && (c <= 0x218F)) ||
2986
1.50M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2987
1.50M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2988
1.50M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2989
1.50M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2990
1.50M
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2991
322k
        return(1);
2992
1.18M
    return(0);
2993
1.51M
}
2994
2995
static int
2996
13.4M
xmlIsNameCharNew(int c) {
2997
    /*
2998
     * Use the new checks of production [4] [4a] amd [5] of the
2999
     * Update 5 of XML-1.0
3000
     */
3001
13.4M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3002
13.4M
        (((c >= 'a') && (c <= 'z')) ||
3003
13.4M
         ((c >= 'A') && (c <= 'Z')) ||
3004
13.4M
         ((c >= '0') && (c <= '9')) || /* !start */
3005
13.4M
         (c == '_') || (c == ':') ||
3006
13.4M
         (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3007
13.4M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3008
13.4M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3009
13.4M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3010
13.4M
         ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3011
13.4M
         ((c >= 0x370) && (c <= 0x37D)) ||
3012
13.4M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3013
13.4M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3014
13.4M
         ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3015
13.4M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3016
13.4M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3017
13.4M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3018
13.4M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3019
13.4M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3020
13.4M
         ((c >= 0x10000) && (c <= 0xEFFFF))))
3021
13.1M
         return(1);
3022
338k
    return(0);
3023
13.4M
}
3024
3025
static int
3026
0
xmlIsNameStartCharOld(int c) {
3027
0
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3028
0
        ((IS_LETTER(c) || (c == '_') || (c == ':'))))
3029
0
        return(1);
3030
0
    return(0);
3031
0
}
3032
3033
static int
3034
0
xmlIsNameCharOld(int c) {
3035
0
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3036
0
        ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3037
0
         (c == '.') || (c == '-') ||
3038
0
         (c == '_') || (c == ':') ||
3039
0
         (IS_COMBINING(c)) ||
3040
0
         (IS_EXTENDER(c))))
3041
0
        return(1);
3042
0
    return(0);
3043
0
}
3044
3045
static int
3046
1.51M
xmlIsNameStartChar(int c, int old10) {
3047
1.51M
    if (!old10)
3048
1.51M
        return(xmlIsNameStartCharNew(c));
3049
0
    else
3050
0
        return(xmlIsNameStartCharOld(c));
3051
1.51M
}
3052
3053
static int
3054
13.4M
xmlIsNameChar(int c, int old10) {
3055
13.4M
    if (!old10)
3056
13.4M
        return(xmlIsNameCharNew(c));
3057
0
    else
3058
0
        return(xmlIsNameCharOld(c));
3059
13.4M
}
3060
3061
/*
3062
 * Scan an XML Name, NCName or Nmtoken.
3063
 *
3064
 * Returns a pointer to the end of the name on success. If the
3065
 * name is invalid, returns `ptr`. If the name is longer than
3066
 * `maxSize` bytes, returns NULL.
3067
 *
3068
 * @param ptr  pointer to the start of the name
3069
 * @param maxSize  maximum size in bytes
3070
 * @param flags  XML_SCAN_* flags
3071
 * @returns a pointer to the end of the name or NULL
3072
 */
3073
const xmlChar *
3074
250k
xmlScanName(const xmlChar *ptr, size_t maxSize, int flags) {
3075
250k
    int stop = flags & XML_SCAN_NC ? ':' : 0;
3076
250k
    int old10 = flags & XML_SCAN_OLD10 ? 1 : 0;
3077
3078
2.08M
    while (1) {
3079
2.08M
        int c, len;
3080
3081
2.08M
        c = *ptr;
3082
2.08M
        if (c < 0x80) {
3083
1.08M
            if (c == stop)
3084
230
                break;
3085
1.08M
            len = 1;
3086
1.08M
        } else {
3087
1.00M
            len = 4;
3088
1.00M
            c = xmlGetUTF8Char(ptr, &len);
3089
1.00M
            if (c < 0)
3090
389
                break;
3091
1.00M
        }
3092
3093
2.08M
        if (flags & XML_SCAN_NMTOKEN ?
3094
1.83M
                !xmlIsNameChar(c, old10) :
3095
2.08M
                !xmlIsNameStartChar(c, old10))
3096
249k
            break;
3097
3098
1.83M
        if ((size_t) len > maxSize)
3099
0
            return(NULL);
3100
1.83M
        ptr += len;
3101
1.83M
        maxSize -= len;
3102
1.83M
        flags |= XML_SCAN_NMTOKEN;
3103
1.83M
    }
3104
3105
250k
    return(ptr);
3106
250k
}
3107
3108
static const xmlChar *
3109
108k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3110
108k
    const xmlChar *ret;
3111
108k
    int len = 0, l;
3112
108k
    int c;
3113
108k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3114
108k
                    XML_MAX_TEXT_LENGTH :
3115
108k
                    XML_MAX_NAME_LENGTH;
3116
108k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3117
3118
    /*
3119
     * Handler for more complex cases
3120
     */
3121
108k
    c = xmlCurrentChar(ctxt, &l);
3122
108k
    if (!xmlIsNameStartChar(c, old10))
3123
82.4k
        return(NULL);
3124
26.3k
    len += l;
3125
26.3k
    NEXTL(l);
3126
26.3k
    c = xmlCurrentChar(ctxt, &l);
3127
2.98M
    while (xmlIsNameChar(c, old10)) {
3128
2.95M
        if (len <= INT_MAX - l)
3129
2.95M
            len += l;
3130
2.95M
        NEXTL(l);
3131
2.95M
        c = xmlCurrentChar(ctxt, &l);
3132
2.95M
    }
3133
26.3k
    if (len > maxLength) {
3134
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3135
0
        return(NULL);
3136
0
    }
3137
26.3k
    if (ctxt->input->cur - ctxt->input->base < len) {
3138
        /*
3139
         * There were a couple of bugs where PERefs lead to to a change
3140
         * of the buffer. Check the buffer size to avoid passing an invalid
3141
         * pointer to xmlDictLookup.
3142
         */
3143
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3144
0
                    "unexpected change of input buffer");
3145
0
        return (NULL);
3146
0
    }
3147
26.3k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3148
958
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3149
25.4k
    else
3150
25.4k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3151
26.3k
    if (ret == NULL)
3152
0
        xmlErrMemory(ctxt);
3153
26.3k
    return(ret);
3154
26.3k
}
3155
3156
/**
3157
 * parse an XML name.
3158
 *
3159
 * @deprecated Internal function, don't use.
3160
 *
3161
 *     [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3162
 *                      CombiningChar | Extender
3163
 *
3164
 *     [5] Name ::= (Letter | '_' | ':') (NameChar)*
3165
 *
3166
 *     [6] Names ::= Name (#x20 Name)*
3167
 *
3168
 * @param ctxt  an XML parser context
3169
 * @returns the Name parsed or NULL
3170
 */
3171
3172
const xmlChar *
3173
1.70M
xmlParseName(xmlParserCtxt *ctxt) {
3174
1.70M
    const xmlChar *in;
3175
1.70M
    const xmlChar *ret;
3176
1.70M
    size_t count = 0;
3177
1.70M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3178
1.70M
                       XML_MAX_TEXT_LENGTH :
3179
1.70M
                       XML_MAX_NAME_LENGTH;
3180
3181
1.70M
    GROW;
3182
3183
    /*
3184
     * Accelerator for simple ASCII names
3185
     */
3186
1.70M
    in = ctxt->input->cur;
3187
1.70M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3188
1.70M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3189
1.70M
  (*in == '_') || (*in == ':')) {
3190
1.61M
  in++;
3191
13.3M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3192
13.3M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3193
13.3M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3194
13.3M
         (*in == '_') || (*in == '-') ||
3195
13.3M
         (*in == ':') || (*in == '.'))
3196
11.7M
      in++;
3197
1.61M
  if ((*in > 0) && (*in < 0x80)) {
3198
1.59M
      count = in - ctxt->input->cur;
3199
1.59M
            if (count > maxLength) {
3200
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3201
0
                return(NULL);
3202
0
            }
3203
1.59M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3204
1.59M
      ctxt->input->cur = in;
3205
1.59M
      ctxt->input->col += count;
3206
1.59M
      if (ret == NULL)
3207
0
          xmlErrMemory(ctxt);
3208
1.59M
      return(ret);
3209
1.59M
  }
3210
1.61M
    }
3211
    /* accelerator for special cases */
3212
108k
    return(xmlParseNameComplex(ctxt));
3213
1.70M
}
3214
3215
static xmlHashedString
3216
1.16M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3217
1.16M
    xmlHashedString ret;
3218
1.16M
    int len = 0, l;
3219
1.16M
    int c;
3220
1.16M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3221
1.16M
                    XML_MAX_TEXT_LENGTH :
3222
1.16M
                    XML_MAX_NAME_LENGTH;
3223
1.16M
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3224
1.16M
    size_t startPosition = 0;
3225
3226
1.16M
    ret.name = NULL;
3227
1.16M
    ret.hashValue = 0;
3228
3229
    /*
3230
     * Handler for more complex cases
3231
     */
3232
1.16M
    startPosition = CUR_PTR - BASE_PTR;
3233
1.16M
    c = xmlCurrentChar(ctxt, &l);
3234
1.16M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3235
1.16M
  (!xmlIsNameStartChar(c, old10) || (c == ':'))) {
3236
1.12M
  return(ret);
3237
1.12M
    }
3238
3239
2.78M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3240
2.78M
     (xmlIsNameChar(c, old10) && (c != ':'))) {
3241
2.74M
        if (len <= INT_MAX - l)
3242
2.74M
      len += l;
3243
2.74M
  NEXTL(l);
3244
2.74M
  c = xmlCurrentChar(ctxt, &l);
3245
2.74M
    }
3246
37.7k
    if (len > maxLength) {
3247
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3248
0
        return(ret);
3249
0
    }
3250
37.7k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3251
37.7k
    if (ret.name == NULL)
3252
0
        xmlErrMemory(ctxt);
3253
37.7k
    return(ret);
3254
37.7k
}
3255
3256
/**
3257
 * parse an XML name.
3258
 *
3259
 *     [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3260
 *                          CombiningChar | Extender
3261
 *
3262
 *     [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3263
 *
3264
 * @param ctxt  an XML parser context
3265
 * @returns the Name parsed or NULL
3266
 */
3267
3268
static xmlHashedString
3269
2.40M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3270
2.40M
    const xmlChar *in, *e;
3271
2.40M
    xmlHashedString ret;
3272
2.40M
    size_t count = 0;
3273
2.40M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3274
2.40M
                       XML_MAX_TEXT_LENGTH :
3275
2.40M
                       XML_MAX_NAME_LENGTH;
3276
3277
2.40M
    ret.name = NULL;
3278
3279
    /*
3280
     * Accelerator for simple ASCII names
3281
     */
3282
2.40M
    in = ctxt->input->cur;
3283
2.40M
    e = ctxt->input->end;
3284
2.40M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3285
2.40M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3286
2.40M
   (*in == '_')) && (in < e)) {
3287
1.25M
  in++;
3288
5.99M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3289
5.99M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3290
5.99M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3291
5.99M
          (*in == '_') || (*in == '-') ||
3292
5.99M
          (*in == '.')) && (in < e))
3293
4.73M
      in++;
3294
1.25M
  if (in >= e)
3295
1.03k
      goto complex;
3296
1.25M
  if ((*in > 0) && (*in < 0x80)) {
3297
1.23M
      count = in - ctxt->input->cur;
3298
1.23M
            if (count > maxLength) {
3299
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3300
0
                return(ret);
3301
0
            }
3302
1.23M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3303
1.23M
      ctxt->input->cur = in;
3304
1.23M
      ctxt->input->col += count;
3305
1.23M
      if (ret.name == NULL) {
3306
0
          xmlErrMemory(ctxt);
3307
0
      }
3308
1.23M
      return(ret);
3309
1.23M
  }
3310
1.25M
    }
3311
1.16M
complex:
3312
1.16M
    return(xmlParseNCNameComplex(ctxt));
3313
2.40M
}
3314
3315
/**
3316
 * parse an XML name and compares for match
3317
 * (specialized for endtag parsing)
3318
 *
3319
 * @param ctxt  an XML parser context
3320
 * @param other  the name to compare with
3321
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
3322
 * and the name for mismatch
3323
 */
3324
3325
static const xmlChar *
3326
10.2k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3327
10.2k
    register const xmlChar *cmp = other;
3328
10.2k
    register const xmlChar *in;
3329
10.2k
    const xmlChar *ret;
3330
3331
10.2k
    GROW;
3332
3333
10.2k
    in = ctxt->input->cur;
3334
14.5k
    while (*in != 0 && *in == *cmp) {
3335
4.30k
  ++in;
3336
4.30k
  ++cmp;
3337
4.30k
    }
3338
10.2k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3339
  /* success */
3340
2.88k
  ctxt->input->col += in - ctxt->input->cur;
3341
2.88k
  ctxt->input->cur = in;
3342
2.88k
  return (const xmlChar*) 1;
3343
2.88k
    }
3344
    /* failure (or end of input buffer), check with full function */
3345
7.33k
    ret = xmlParseName (ctxt);
3346
    /* strings coming from the dictionary direct compare possible */
3347
7.33k
    if (ret == other) {
3348
977
  return (const xmlChar*) 1;
3349
977
    }
3350
6.35k
    return ret;
3351
7.33k
}
3352
3353
/**
3354
 * Parse an XML name.
3355
 *
3356
 * @param ctxt  an XML parser context
3357
 * @param str  a pointer to the string pointer (IN/OUT)
3358
 * @returns the Name parsed or NULL. The `str` pointer
3359
 * is updated to the current location in the string.
3360
 */
3361
3362
static xmlChar *
3363
245k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3364
245k
    xmlChar *ret;
3365
245k
    const xmlChar *cur = *str;
3366
245k
    int flags = 0;
3367
245k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3368
245k
                    XML_MAX_TEXT_LENGTH :
3369
245k
                    XML_MAX_NAME_LENGTH;
3370
3371
245k
    if (ctxt->options & XML_PARSE_OLD10)
3372
0
        flags |= XML_SCAN_OLD10;
3373
3374
245k
    cur = xmlScanName(*str, maxLength, flags);
3375
245k
    if (cur == NULL) {
3376
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3377
0
        return(NULL);
3378
0
    }
3379
245k
    if (cur == *str)
3380
1.74k
        return(NULL);
3381
3382
243k
    ret = xmlStrndup(*str, cur - *str);
3383
243k
    if (ret == NULL)
3384
0
        xmlErrMemory(ctxt);
3385
243k
    *str = cur;
3386
243k
    return(ret);
3387
245k
}
3388
3389
/**
3390
 * parse an XML Nmtoken.
3391
 *
3392
 * @deprecated Internal function, don't use.
3393
 *
3394
 *     [7] Nmtoken ::= (NameChar)+
3395
 *
3396
 *     [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3397
 *
3398
 * @param ctxt  an XML parser context
3399
 * @returns the Nmtoken parsed or NULL
3400
 */
3401
3402
xmlChar *
3403
34.4k
xmlParseNmtoken(xmlParserCtxt *ctxt) {
3404
34.4k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3405
34.4k
    xmlChar *ret;
3406
34.4k
    int len = 0, l;
3407
34.4k
    int c;
3408
34.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3409
34.4k
                    XML_MAX_TEXT_LENGTH :
3410
34.4k
                    XML_MAX_NAME_LENGTH;
3411
34.4k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3412
3413
34.4k
    c = xmlCurrentChar(ctxt, &l);
3414
3415
141k
    while (xmlIsNameChar(c, old10)) {
3416
107k
  COPY_BUF(buf, len, c);
3417
107k
  NEXTL(l);
3418
107k
  c = xmlCurrentChar(ctxt, &l);
3419
107k
  if (len >= XML_MAX_NAMELEN) {
3420
      /*
3421
       * Okay someone managed to make a huge token, so he's ready to pay
3422
       * for the processing speed.
3423
       */
3424
791
      xmlChar *buffer;
3425
791
      int max = len * 2;
3426
3427
791
      buffer = xmlMalloc(max);
3428
791
      if (buffer == NULL) {
3429
0
          xmlErrMemory(ctxt);
3430
0
    return(NULL);
3431
0
      }
3432
791
      memcpy(buffer, buf, len);
3433
5.75M
      while (xmlIsNameChar(c, old10)) {
3434
5.75M
    if (len + 10 > max) {
3435
2.63k
        xmlChar *tmp;
3436
2.63k
                    int newSize;
3437
3438
2.63k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3439
2.63k
                    if (newSize < 0) {
3440
0
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3441
0
                        xmlFree(buffer);
3442
0
                        return(NULL);
3443
0
                    }
3444
2.63k
        tmp = xmlRealloc(buffer, newSize);
3445
2.63k
        if (tmp == NULL) {
3446
0
      xmlErrMemory(ctxt);
3447
0
      xmlFree(buffer);
3448
0
      return(NULL);
3449
0
        }
3450
2.63k
        buffer = tmp;
3451
2.63k
                    max = newSize;
3452
2.63k
    }
3453
5.75M
    COPY_BUF(buffer, len, c);
3454
5.75M
    NEXTL(l);
3455
5.75M
    c = xmlCurrentChar(ctxt, &l);
3456
5.75M
      }
3457
791
      buffer[len] = 0;
3458
791
      return(buffer);
3459
791
  }
3460
107k
    }
3461
33.6k
    if (len == 0)
3462
13.6k
        return(NULL);
3463
20.0k
    if (len > maxLength) {
3464
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3465
0
        return(NULL);
3466
0
    }
3467
20.0k
    ret = xmlStrndup(buf, len);
3468
20.0k
    if (ret == NULL)
3469
0
        xmlErrMemory(ctxt);
3470
20.0k
    return(ret);
3471
20.0k
}
3472
3473
/**
3474
 * Validate an entity value and expand parameter entities.
3475
 *
3476
 * @param ctxt  parser context
3477
 * @param buf  string buffer
3478
 * @param str  entity value
3479
 * @param length  size of entity value
3480
 * @param depth  nesting depth
3481
 */
3482
static void
3483
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3484
22.6k
                          const xmlChar *str, int length, int depth) {
3485
22.6k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3486
22.6k
    const xmlChar *end, *chunk;
3487
22.6k
    int c, l;
3488
3489
22.6k
    if (str == NULL)
3490
0
        return;
3491
3492
22.6k
    depth += 1;
3493
22.6k
    if (depth > maxDepth) {
3494
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3495
0
                       "Maximum entity nesting depth exceeded");
3496
0
  return;
3497
0
    }
3498
3499
22.6k
    end = str + length;
3500
22.6k
    chunk = str;
3501
3502
69.0M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3503
69.0M
        c = *str;
3504
3505
69.0M
        if (c >= 0x80) {
3506
54.4M
            l = xmlUTF8MultibyteLen(ctxt, str,
3507
54.4M
                    "invalid character in entity value\n");
3508
54.4M
            if (l == 0) {
3509
21.6M
                if (chunk < str)
3510
86.4k
                    xmlSBufAddString(buf, chunk, str - chunk);
3511
21.6M
                xmlSBufAddReplChar(buf);
3512
21.6M
                str += 1;
3513
21.6M
                chunk = str;
3514
32.8M
            } else {
3515
32.8M
                str += l;
3516
32.8M
            }
3517
54.4M
        } else if (c == '&') {
3518
36.2k
            if (str[1] == '#') {
3519
18.9k
                if (chunk < str)
3520
8.96k
                    xmlSBufAddString(buf, chunk, str - chunk);
3521
3522
18.9k
                c = xmlParseStringCharRef(ctxt, &str);
3523
18.9k
                if (c == 0)
3524
5.14k
                    return;
3525
3526
13.8k
                xmlSBufAddChar(buf, c);
3527
3528
13.8k
                chunk = str;
3529
17.2k
            } else {
3530
17.2k
                xmlChar *name;
3531
3532
                /*
3533
                 * General entity references are checked for
3534
                 * syntactic validity.
3535
                 */
3536
17.2k
                str++;
3537
17.2k
                name = xmlParseStringName(ctxt, &str);
3538
3539
17.2k
                if ((name == NULL) || (*str++ != ';')) {
3540
1.56k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3541
1.56k
                            "EntityValue: '&' forbidden except for entities "
3542
1.56k
                            "references\n");
3543
1.56k
                    xmlFree(name);
3544
1.56k
                    return;
3545
1.56k
                }
3546
3547
15.6k
                xmlFree(name);
3548
15.6k
            }
3549
14.5M
        } else if (c == '%') {
3550
1.75k
            xmlEntityPtr ent;
3551
3552
1.75k
            if (chunk < str)
3553
986
                xmlSBufAddString(buf, chunk, str - chunk);
3554
3555
1.75k
            ent = xmlParseStringPEReference(ctxt, &str);
3556
1.75k
            if (ent == NULL)
3557
1.35k
                return;
3558
3559
400
            if (!PARSER_EXTERNAL(ctxt)) {
3560
400
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3561
400
                return;
3562
400
            }
3563
3564
0
            if (ent->content == NULL) {
3565
                /*
3566
                 * Note: external parsed entities will not be loaded,
3567
                 * it is not required for a non-validating parser to
3568
                 * complete external PEReferences coming from the
3569
                 * internal subset
3570
                 */
3571
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3572
0
                    ((ctxt->replaceEntities) ||
3573
0
                     (ctxt->validate))) {
3574
0
                    xmlLoadEntityContent(ctxt, ent);
3575
0
                } else {
3576
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3577
0
                                  "not validating will not read content for "
3578
0
                                  "PE entity %s\n", ent->name, NULL);
3579
0
                }
3580
0
            }
3581
3582
            /*
3583
             * TODO: Skip if ent->content is still NULL.
3584
             */
3585
3586
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3587
0
                return;
3588
3589
0
            if (ent->flags & XML_ENT_EXPANDING) {
3590
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3591
0
                xmlHaltParser(ctxt);
3592
0
                return;
3593
0
            }
3594
3595
0
            ent->flags |= XML_ENT_EXPANDING;
3596
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3597
0
                                      depth);
3598
0
            ent->flags &= ~XML_ENT_EXPANDING;
3599
3600
0
            chunk = str;
3601
14.5M
        } else {
3602
            /* Normal ASCII char */
3603
14.5M
            if (!IS_BYTE_CHAR(c)) {
3604
1.87M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3605
1.87M
                        "invalid character in entity value\n");
3606
1.87M
                if (chunk < str)
3607
28.6k
                    xmlSBufAddString(buf, chunk, str - chunk);
3608
1.87M
                xmlSBufAddReplChar(buf);
3609
1.87M
                str += 1;
3610
1.87M
                chunk = str;
3611
12.6M
            } else {
3612
12.6M
                str += 1;
3613
12.6M
            }
3614
14.5M
        }
3615
69.0M
    }
3616
3617
14.1k
    if (chunk < str)
3618
11.6k
        xmlSBufAddString(buf, chunk, str - chunk);
3619
14.1k
}
3620
3621
/**
3622
 * parse a value for ENTITY declarations
3623
 *
3624
 * @deprecated Internal function, don't use.
3625
 *
3626
 *     [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3627
 *                         "'" ([^%&'] | PEReference | Reference)* "'"
3628
 *
3629
 * @param ctxt  an XML parser context
3630
 * @param orig  if non-NULL store a copy of the original entity value
3631
 * @returns the EntityValue parsed with reference substituted or NULL
3632
 */
3633
xmlChar *
3634
22.6k
xmlParseEntityValue(xmlParserCtxt *ctxt, xmlChar **orig) {
3635
22.6k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3636
22.6k
                         XML_MAX_HUGE_LENGTH :
3637
22.6k
                         XML_MAX_TEXT_LENGTH;
3638
22.6k
    xmlSBuf buf;
3639
22.6k
    const xmlChar *start;
3640
22.6k
    int quote, length;
3641
3642
22.6k
    xmlSBufInit(&buf, maxLength);
3643
3644
22.6k
    GROW;
3645
3646
22.6k
    quote = CUR;
3647
22.6k
    if ((quote != '"') && (quote != '\'')) {
3648
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3649
0
  return(NULL);
3650
0
    }
3651
22.6k
    CUR_PTR++;
3652
3653
22.6k
    length = 0;
3654
3655
    /*
3656
     * Copy raw content of the entity into a buffer
3657
     */
3658
104M
    while (1) {
3659
104M
        int c;
3660
3661
104M
        if (PARSER_STOPPED(ctxt))
3662
0
            goto error;
3663
3664
104M
        if (CUR_PTR >= ctxt->input->end) {
3665
37
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3666
37
            goto error;
3667
37
        }
3668
3669
104M
        c = CUR;
3670
3671
104M
        if (c == 0) {
3672
9
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3673
9
                    "invalid character in entity value\n");
3674
9
            goto error;
3675
9
        }
3676
104M
        if (c == quote)
3677
22.6k
            break;
3678
104M
        NEXTL(1);
3679
104M
        length += 1;
3680
3681
        /*
3682
         * TODO: Check growth threshold
3683
         */
3684
104M
        if (ctxt->input->end - CUR_PTR < 10)
3685
37.4k
            GROW;
3686
104M
    }
3687
3688
22.6k
    start = CUR_PTR - length;
3689
3690
22.6k
    if (orig != NULL) {
3691
22.6k
        *orig = xmlStrndup(start, length);
3692
22.6k
        if (*orig == NULL)
3693
0
            xmlErrMemory(ctxt);
3694
22.6k
    }
3695
3696
22.6k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3697
3698
22.6k
    NEXTL(1);
3699
3700
22.6k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3701
3702
46
error:
3703
46
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3704
46
    return(NULL);
3705
22.6k
}
3706
3707
/**
3708
 * Check an entity reference in an attribute value for validity
3709
 * without expanding it.
3710
 *
3711
 * @param ctxt  parser context
3712
 * @param pent  entity
3713
 * @param depth  nesting depth
3714
 */
3715
static void
3716
0
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3717
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3718
0
    const xmlChar *str;
3719
0
    unsigned long expandedSize = pent->length;
3720
0
    int c, flags;
3721
3722
0
    depth += 1;
3723
0
    if (depth > maxDepth) {
3724
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3725
0
                       "Maximum entity nesting depth exceeded");
3726
0
  return;
3727
0
    }
3728
3729
0
    if (pent->flags & XML_ENT_EXPANDING) {
3730
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3731
0
        xmlHaltParser(ctxt);
3732
0
        return;
3733
0
    }
3734
3735
    /*
3736
     * If we're parsing a default attribute value in DTD content,
3737
     * the entity might reference other entities which weren't
3738
     * defined yet, so the check isn't reliable.
3739
     */
3740
0
    if (ctxt->inSubset == 0)
3741
0
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3742
0
    else
3743
0
        flags = XML_ENT_VALIDATED;
3744
3745
0
    str = pent->content;
3746
0
    if (str == NULL)
3747
0
        goto done;
3748
3749
    /*
3750
     * Note that entity values are already validated. We only check
3751
     * for illegal less-than signs and compute the expanded size
3752
     * of the entity. No special handling for multi-byte characters
3753
     * is needed.
3754
     */
3755
0
    while (!PARSER_STOPPED(ctxt)) {
3756
0
        c = *str;
3757
3758
0
  if (c != '&') {
3759
0
            if (c == 0)
3760
0
                break;
3761
3762
0
            if (c == '<')
3763
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3764
0
                        "'<' in entity '%s' is not allowed in attributes "
3765
0
                        "values\n", pent->name);
3766
3767
0
            str += 1;
3768
0
        } else if (str[1] == '#') {
3769
0
            int val;
3770
3771
0
      val = xmlParseStringCharRef(ctxt, &str);
3772
0
      if (val == 0) {
3773
0
                pent->content[0] = 0;
3774
0
                break;
3775
0
            }
3776
0
  } else {
3777
0
            xmlChar *name;
3778
0
            xmlEntityPtr ent;
3779
3780
0
      name = xmlParseStringEntityRef(ctxt, &str);
3781
0
      if (name == NULL) {
3782
0
                pent->content[0] = 0;
3783
0
                break;
3784
0
            }
3785
3786
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3787
0
            xmlFree(name);
3788
3789
0
            if ((ent != NULL) &&
3790
0
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
3791
0
                if ((ent->flags & flags) != flags) {
3792
0
                    pent->flags |= XML_ENT_EXPANDING;
3793
0
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
3794
0
                    pent->flags &= ~XML_ENT_EXPANDING;
3795
0
                }
3796
3797
0
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
3798
0
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
3799
0
            }
3800
0
        }
3801
0
    }
3802
3803
0
done:
3804
0
    if (ctxt->inSubset == 0)
3805
0
        pent->expandedSize = expandedSize;
3806
3807
0
    pent->flags |= flags;
3808
0
}
3809
3810
/**
3811
 * Expand general entity references in an entity or attribute value.
3812
 * Perform attribute value normalization.
3813
 *
3814
 * @param ctxt  parser context
3815
 * @param buf  string buffer
3816
 * @param str  entity or attribute value
3817
 * @param pent  entity for entity value, NULL for attribute values
3818
 * @param normalize  whether to collapse whitespace
3819
 * @param inSpace  whitespace state
3820
 * @param depth  nesting depth
3821
 * @param check  whether to check for amplification
3822
 * @returns  whether there was a normalization change
3823
 */
3824
static int
3825
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3826
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
3827
219k
                          int *inSpace, int depth, int check) {
3828
219k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3829
219k
    int c, chunkSize;
3830
219k
    int normChange = 0;
3831
3832
219k
    if (str == NULL)
3833
0
        return(0);
3834
3835
219k
    depth += 1;
3836
219k
    if (depth > maxDepth) {
3837
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3838
0
                       "Maximum entity nesting depth exceeded");
3839
0
  return(0);
3840
0
    }
3841
3842
219k
    if (pent != NULL) {
3843
219k
        if (pent->flags & XML_ENT_EXPANDING) {
3844
14
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3845
14
            xmlHaltParser(ctxt);
3846
14
            return(0);
3847
14
        }
3848
3849
219k
        if (check) {
3850
219k
            if (xmlParserEntityCheck(ctxt, pent->length))
3851
49
                return(0);
3852
219k
        }
3853
219k
    }
3854
3855
219k
    chunkSize = 0;
3856
3857
    /*
3858
     * Note that entity values are already validated. No special
3859
     * handling for multi-byte characters is needed.
3860
     */
3861
419M
    while (!PARSER_STOPPED(ctxt)) {
3862
419M
        c = *str;
3863
3864
419M
  if (c != '&') {
3865
419M
            if (c == 0)
3866
180k
                break;
3867
3868
            /*
3869
             * If this function is called without an entity, it is used to
3870
             * expand entities in an attribute content where less-than was
3871
             * already unscaped and is allowed.
3872
             */
3873
418M
            if ((pent != NULL) && (c == '<')) {
3874
39.4k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3875
39.4k
                        "'<' in entity '%s' is not allowed in attributes "
3876
39.4k
                        "values\n", pent->name);
3877
39.4k
                break;
3878
39.4k
            }
3879
3880
418M
            if (c <= 0x20) {
3881
13.8M
                if ((normalize) && (*inSpace)) {
3882
                    /* Skip char */
3883
281k
                    if (chunkSize > 0) {
3884
19.6k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3885
19.6k
                        chunkSize = 0;
3886
19.6k
                    }
3887
281k
                    normChange = 1;
3888
13.6M
                } else if (c < 0x20) {
3889
13.0M
                    if (chunkSize > 0) {
3890
473k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3891
473k
                        chunkSize = 0;
3892
473k
                    }
3893
3894
13.0M
                    xmlSBufAddCString(buf, " ", 1);
3895
13.0M
                } else {
3896
516k
                    chunkSize += 1;
3897
516k
                }
3898
3899
13.8M
                *inSpace = 1;
3900
404M
            } else {
3901
404M
                chunkSize += 1;
3902
404M
                *inSpace = 0;
3903
404M
            }
3904
3905
418M
            str += 1;
3906
418M
        } else if (str[1] == '#') {
3907
140k
            int val;
3908
3909
140k
            if (chunkSize > 0) {
3910
138k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3911
138k
                chunkSize = 0;
3912
138k
            }
3913
3914
140k
      val = xmlParseStringCharRef(ctxt, &str);
3915
140k
      if (val == 0) {
3916
8
                if (pent != NULL)
3917
8
                    pent->content[0] = 0;
3918
8
                break;
3919
8
            }
3920
3921
140k
            if (val == ' ') {
3922
70.0k
                if ((normalize) && (*inSpace))
3923
255
                    normChange = 1;
3924
69.7k
                else
3925
69.7k
                    xmlSBufAddCString(buf, " ", 1);
3926
70.0k
                *inSpace = 1;
3927
70.6k
            } else {
3928
70.6k
                xmlSBufAddChar(buf, val);
3929
70.6k
                *inSpace = 0;
3930
70.6k
            }
3931
226k
  } else {
3932
226k
            xmlChar *name;
3933
226k
            xmlEntityPtr ent;
3934
3935
226k
            if (chunkSize > 0) {
3936
207k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3937
207k
                chunkSize = 0;
3938
207k
            }
3939
3940
226k
      name = xmlParseStringEntityRef(ctxt, &str);
3941
226k
            if (name == NULL) {
3942
11
                if (pent != NULL)
3943
11
                    pent->content[0] = 0;
3944
11
                break;
3945
11
            }
3946
3947
226k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3948
226k
            xmlFree(name);
3949
3950
226k
      if ((ent != NULL) &&
3951
226k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3952
67.0k
    if (ent->content == NULL) {
3953
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
3954
0
          "predefined entity has no content\n");
3955
0
                    break;
3956
0
                }
3957
3958
67.0k
                xmlSBufAddString(buf, ent->content, ent->length);
3959
3960
67.0k
                *inSpace = 0;
3961
159k
      } else if ((ent != NULL) && (ent->content != NULL)) {
3962
138k
                if (pent != NULL)
3963
138k
                    pent->flags |= XML_ENT_EXPANDING;
3964
138k
    normChange |= xmlExpandEntityInAttValue(ctxt, buf,
3965
138k
                        ent->content, ent, normalize, inSpace, depth, check);
3966
138k
                if (pent != NULL)
3967
138k
                    pent->flags &= ~XML_ENT_EXPANDING;
3968
138k
      }
3969
226k
        }
3970
419M
    }
3971
3972
219k
    if (chunkSize > 0)
3973
207k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3974
3975
219k
    return(normChange);
3976
219k
}
3977
3978
/**
3979
 * Expand general entity references in an entity or attribute value.
3980
 * Perform attribute value normalization.
3981
 *
3982
 * @param ctxt  parser context
3983
 * @param str  entity or attribute value
3984
 * @param normalize  whether to collapse whitespace
3985
 * @returns the expanded attribtue value.
3986
 */
3987
xmlChar *
3988
xmlExpandEntitiesInAttValue(xmlParserCtxt *ctxt, const xmlChar *str,
3989
0
                            int normalize) {
3990
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3991
0
                         XML_MAX_HUGE_LENGTH :
3992
0
                         XML_MAX_TEXT_LENGTH;
3993
0
    xmlSBuf buf;
3994
0
    int inSpace = 1;
3995
3996
0
    xmlSBufInit(&buf, maxLength);
3997
3998
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
3999
0
                              ctxt->inputNr, /* check */ 0);
4000
4001
0
    if ((normalize) && (inSpace) && (buf.size > 0))
4002
0
        buf.size--;
4003
4004
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4005
0
}
4006
4007
/**
4008
 * parse a value for an attribute.
4009
 *
4010
 * NOTE: if no normalization is needed, the routine will return pointers
4011
 * directly from the data buffer.
4012
 *
4013
 * 3.3.3 Attribute-Value Normalization:
4014
 *
4015
 * Before the value of an attribute is passed to the application or
4016
 * checked for validity, the XML processor must normalize it as follows:
4017
 *
4018
 * - a character reference is processed by appending the referenced
4019
 *   character to the attribute value
4020
 * - an entity reference is processed by recursively processing the
4021
 *   replacement text of the entity
4022
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4023
 *   appending \#x20 to the normalized value, except that only a single
4024
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4025
 *   parsed entity or the literal entity value of an internal parsed entity
4026
 * - other characters are processed by appending them to the normalized value
4027
 *
4028
 * If the declared value is not CDATA, then the XML processor must further
4029
 * process the normalized attribute value by discarding any leading and
4030
 * trailing space (\#x20) characters, and by replacing sequences of space
4031
 * (\#x20) characters by a single space (\#x20) character.
4032
 * All attributes for which no declaration has been read should be treated
4033
 * by a non-validating parser as if declared CDATA.
4034
 *
4035
 * @param ctxt  an XML parser context
4036
 * @param attlen  attribute len result
4037
 * @param outFlags  resulting XML_ATTVAL_* flags
4038
 * @param special  value from attsSpecial
4039
 * @param isNamespace  whether this is a namespace declaration
4040
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4041
 *     caller if it was copied, this can be detected by val[*len] == 0.
4042
 */
4043
static xmlChar *
4044
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *outFlags,
4045
672k
                         int special, int isNamespace) {
4046
672k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4047
672k
                         XML_MAX_HUGE_LENGTH :
4048
672k
                         XML_MAX_TEXT_LENGTH;
4049
672k
    xmlSBuf buf;
4050
672k
    xmlChar *ret;
4051
672k
    int c, l, quote, entFlags, chunkSize;
4052
672k
    int inSpace = 1;
4053
672k
    int replaceEntities;
4054
672k
    int normalize = (special & XML_SPECIAL_TYPE_MASK) != 0;
4055
672k
    int attvalFlags = 0;
4056
4057
    /* Always expand namespace URIs */
4058
672k
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4059
4060
672k
    xmlSBufInit(&buf, maxLength);
4061
4062
672k
    GROW;
4063
4064
672k
    quote = CUR;
4065
672k
    if ((quote != '"') && (quote != '\'')) {
4066
6.66k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4067
6.66k
  return(NULL);
4068
6.66k
    }
4069
666k
    NEXTL(1);
4070
4071
666k
    if (ctxt->inSubset == 0)
4072
647k
        entFlags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4073
18.7k
    else
4074
18.7k
        entFlags = XML_ENT_VALIDATED;
4075
4076
666k
    inSpace = 1;
4077
666k
    chunkSize = 0;
4078
4079
47.4M
    while (1) {
4080
47.4M
        if (PARSER_STOPPED(ctxt))
4081
63
            goto error;
4082
4083
47.4M
        if (CUR_PTR >= ctxt->input->end) {
4084
2.11k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4085
2.11k
                           "AttValue: ' expected\n");
4086
2.11k
            goto error;
4087
2.11k
        }
4088
4089
        /*
4090
         * TODO: Check growth threshold
4091
         */
4092
47.4M
        if (ctxt->input->end - CUR_PTR < 10)
4093
31.8k
            GROW;
4094
4095
47.4M
        c = CUR;
4096
4097
47.4M
        if (c >= 0x80) {
4098
30.2M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4099
30.2M
                    "invalid character in attribute value\n");
4100
30.2M
            if (l == 0) {
4101
26.8M
                if (chunkSize > 0) {
4102
84.6k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4103
84.6k
                    chunkSize = 0;
4104
84.6k
                }
4105
26.8M
                xmlSBufAddReplChar(&buf);
4106
26.8M
                NEXTL(1);
4107
26.8M
            } else {
4108
3.43M
                chunkSize += l;
4109
3.43M
                NEXTL(l);
4110
3.43M
            }
4111
4112
30.2M
            inSpace = 0;
4113
30.2M
        } else if (c != '&') {
4114
16.9M
            if (c > 0x20) {
4115
8.60M
                if (c == quote)
4116
659k
                    break;
4117
4118
7.94M
                if (c == '<')
4119
215k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4120
4121
7.94M
                chunkSize += 1;
4122
7.94M
                inSpace = 0;
4123
8.35M
            } else if (!IS_BYTE_CHAR(c)) {
4124
3.15M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4125
3.15M
                        "invalid character in attribute value\n");
4126
3.15M
                if (chunkSize > 0) {
4127
37.0k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4128
37.0k
                    chunkSize = 0;
4129
37.0k
                }
4130
3.15M
                xmlSBufAddReplChar(&buf);
4131
3.15M
                inSpace = 0;
4132
5.19M
            } else {
4133
                /* Whitespace */
4134
5.19M
                if ((normalize) && (inSpace)) {
4135
                    /* Skip char */
4136
9.14k
                    if (chunkSize > 0) {
4137
1.37k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4138
1.37k
                        chunkSize = 0;
4139
1.37k
                    }
4140
9.14k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4141
5.18M
                } else if (c < 0x20) {
4142
                    /* Convert to space */
4143
4.68M
                    if (chunkSize > 0) {
4144
55.2k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4145
55.2k
                        chunkSize = 0;
4146
55.2k
                    }
4147
4148
4.68M
                    xmlSBufAddCString(&buf, " ", 1);
4149
4.68M
                } else {
4150
502k
                    chunkSize += 1;
4151
502k
                }
4152
4153
5.19M
                inSpace = 1;
4154
4155
5.19M
                if ((c == 0xD) && (NXT(1) == 0xA))
4156
1.76k
                    CUR_PTR++;
4157
5.19M
            }
4158
4159
16.2M
            NEXTL(1);
4160
16.2M
        } else if (NXT(1) == '#') {
4161
27.8k
            int val;
4162
4163
27.8k
            if (chunkSize > 0) {
4164
12.7k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4165
12.7k
                chunkSize = 0;
4166
12.7k
            }
4167
4168
27.8k
            val = xmlParseCharRef(ctxt);
4169
27.8k
            if (val == 0)
4170
4.14k
                goto error;
4171
4172
23.7k
            if ((val == '&') && (!replaceEntities)) {
4173
                /*
4174
                 * The reparsing will be done in xmlNodeParseContent()
4175
                 * called from SAX2.c
4176
                 */
4177
0
                xmlSBufAddCString(&buf, "&#38;", 5);
4178
0
                inSpace = 0;
4179
23.7k
            } else if (val == ' ') {
4180
3.60k
                if ((normalize) && (inSpace))
4181
222
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4182
3.38k
                else
4183
3.38k
                    xmlSBufAddCString(&buf, " ", 1);
4184
3.60k
                inSpace = 1;
4185
20.1k
            } else {
4186
20.1k
                xmlSBufAddChar(&buf, val);
4187
20.1k
                inSpace = 0;
4188
20.1k
            }
4189
160k
        } else {
4190
160k
            const xmlChar *name;
4191
160k
            xmlEntityPtr ent;
4192
4193
160k
            if (chunkSize > 0) {
4194
72.1k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4195
72.1k
                chunkSize = 0;
4196
72.1k
            }
4197
4198
160k
            name = xmlParseEntityRefInternal(ctxt);
4199
160k
            if (name == NULL) {
4200
                /*
4201
                 * Probably a literal '&' which wasn't escaped.
4202
                 * TODO: Handle gracefully in recovery mode.
4203
                 */
4204
35.1k
                continue;
4205
35.1k
            }
4206
4207
125k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4208
125k
            if (ent == NULL)
4209
38.3k
                continue;
4210
4211
87.0k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4212
5.39k
                if ((ent->content[0] == '&') && (!replaceEntities))
4213
0
                    xmlSBufAddCString(&buf, "&#38;", 5);
4214
5.39k
                else
4215
5.39k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4216
5.39k
                inSpace = 0;
4217
81.6k
            } else if (replaceEntities) {
4218
81.6k
                if (xmlExpandEntityInAttValue(ctxt, &buf,
4219
81.6k
                        ent->content, ent, normalize, &inSpace, ctxt->inputNr,
4220
81.6k
                        /* check */ 1) > 0)
4221
8.27k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4222
81.6k
            } else {
4223
0
                if ((ent->flags & entFlags) != entFlags)
4224
0
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4225
4226
0
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4227
0
                    ent->content[0] = 0;
4228
0
                    goto error;
4229
0
                }
4230
4231
                /*
4232
                 * Just output the reference
4233
                 */
4234
0
                xmlSBufAddCString(&buf, "&", 1);
4235
0
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4236
0
                xmlSBufAddCString(&buf, ";", 1);
4237
4238
0
                inSpace = 0;
4239
0
            }
4240
87.0k
  }
4241
47.4M
    }
4242
4243
659k
    if ((buf.mem == NULL) && (outFlags != NULL)) {
4244
565k
        ret = (xmlChar *) CUR_PTR - chunkSize;
4245
4246
565k
        if (attlen != NULL)
4247
565k
            *attlen = chunkSize;
4248
565k
        if ((normalize) && (inSpace) && (chunkSize > 0)) {
4249
130
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4250
130
            *attlen -= 1;
4251
130
        }
4252
4253
        /* Report potential error */
4254
565k
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4255
565k
    } else {
4256
93.9k
        if (chunkSize > 0)
4257
58.5k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4258
4259
93.9k
        if ((normalize) && (inSpace) && (buf.size > 0)) {
4260
355
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4261
355
            buf.size--;
4262
355
        }
4263
4264
93.9k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4265
93.9k
        attvalFlags |= XML_ATTVAL_ALLOC;
4266
4267
93.9k
        if (ret != NULL) {
4268
93.9k
            if (attlen != NULL)
4269
76.1k
                *attlen = buf.size;
4270
93.9k
        }
4271
93.9k
    }
4272
4273
659k
    if (outFlags != NULL)
4274
642k
        *outFlags = attvalFlags;
4275
4276
659k
    NEXTL(1);
4277
4278
659k
    return(ret);
4279
4280
6.32k
error:
4281
6.32k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4282
6.32k
    return(NULL);
4283
666k
}
4284
4285
/**
4286
 * parse a value for an attribute
4287
 * Note: the parser won't do substitution of entities here, this
4288
 * will be handled later in #xmlStringGetNodeList
4289
 *
4290
 * @deprecated Internal function, don't use.
4291
 *
4292
 *     [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4293
 *                       "'" ([^<&'] | Reference)* "'"
4294
 *
4295
 * 3.3.3 Attribute-Value Normalization:
4296
 *
4297
 * Before the value of an attribute is passed to the application or
4298
 * checked for validity, the XML processor must normalize it as follows:
4299
 *
4300
 * - a character reference is processed by appending the referenced
4301
 *   character to the attribute value
4302
 * - an entity reference is processed by recursively processing the
4303
 *   replacement text of the entity
4304
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4305
 *   appending \#x20 to the normalized value, except that only a single
4306
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4307
 *   parsed entity or the literal entity value of an internal parsed entity
4308
 * - other characters are processed by appending them to the normalized value
4309
 *
4310
 * If the declared value is not CDATA, then the XML processor must further
4311
 * process the normalized attribute value by discarding any leading and
4312
 * trailing space (\#x20) characters, and by replacing sequences of space
4313
 * (\#x20) characters by a single space (\#x20) character.
4314
 * All attributes for which no declaration has been read should be treated
4315
 * by a non-validating parser as if declared CDATA.
4316
 *
4317
 * @param ctxt  an XML parser context
4318
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4319
 * caller.
4320
 */
4321
xmlChar *
4322
19.9k
xmlParseAttValue(xmlParserCtxt *ctxt) {
4323
19.9k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4324
19.9k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4325
19.9k
}
4326
4327
/**
4328
 * parse an XML Literal
4329
 *
4330
 * @deprecated Internal function, don't use.
4331
 *
4332
 *     [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4333
 *
4334
 * @param ctxt  an XML parser context
4335
 * @returns the SystemLiteral parsed or NULL
4336
 */
4337
4338
xmlChar *
4339
4.14k
xmlParseSystemLiteral(xmlParserCtxt *ctxt) {
4340
4.14k
    xmlChar *buf = NULL;
4341
4.14k
    int len = 0;
4342
4.14k
    int size = XML_PARSER_BUFFER_SIZE;
4343
4.14k
    int cur, l;
4344
4.14k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4345
4.14k
                    XML_MAX_TEXT_LENGTH :
4346
4.14k
                    XML_MAX_NAME_LENGTH;
4347
4.14k
    xmlChar stop;
4348
4349
4.14k
    if (RAW == '"') {
4350
1.50k
        NEXT;
4351
1.50k
  stop = '"';
4352
2.64k
    } else if (RAW == '\'') {
4353
1.39k
        NEXT;
4354
1.39k
  stop = '\'';
4355
1.39k
    } else {
4356
1.25k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4357
1.25k
  return(NULL);
4358
1.25k
    }
4359
4360
2.89k
    buf = xmlMalloc(size);
4361
2.89k
    if (buf == NULL) {
4362
0
        xmlErrMemory(ctxt);
4363
0
  return(NULL);
4364
0
    }
4365
2.89k
    cur = xmlCurrentCharRecover(ctxt, &l);
4366
2.30M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4367
2.30M
  if (len + 5 >= size) {
4368
1.17k
      xmlChar *tmp;
4369
1.17k
            int newSize;
4370
4371
1.17k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4372
1.17k
            if (newSize < 0) {
4373
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4374
0
                xmlFree(buf);
4375
0
                return(NULL);
4376
0
            }
4377
1.17k
      tmp = xmlRealloc(buf, newSize);
4378
1.17k
      if (tmp == NULL) {
4379
0
          xmlFree(buf);
4380
0
    xmlErrMemory(ctxt);
4381
0
    return(NULL);
4382
0
      }
4383
1.17k
      buf = tmp;
4384
1.17k
            size = newSize;
4385
1.17k
  }
4386
2.30M
  COPY_BUF(buf, len, cur);
4387
2.30M
  NEXTL(l);
4388
2.30M
  cur = xmlCurrentCharRecover(ctxt, &l);
4389
2.30M
    }
4390
2.89k
    buf[len] = 0;
4391
2.89k
    if (!IS_CHAR(cur)) {
4392
459
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4393
2.43k
    } else {
4394
2.43k
  NEXT;
4395
2.43k
    }
4396
2.89k
    return(buf);
4397
2.89k
}
4398
4399
/**
4400
 * parse an XML public literal
4401
 *
4402
 * @deprecated Internal function, don't use.
4403
 *
4404
 *     [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4405
 *
4406
 * @param ctxt  an XML parser context
4407
 * @returns the PubidLiteral parsed or NULL.
4408
 */
4409
4410
xmlChar *
4411
4.59k
xmlParsePubidLiteral(xmlParserCtxt *ctxt) {
4412
4.59k
    xmlChar *buf = NULL;
4413
4.59k
    int len = 0;
4414
4.59k
    int size = XML_PARSER_BUFFER_SIZE;
4415
4.59k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4416
4.59k
                    XML_MAX_TEXT_LENGTH :
4417
4.59k
                    XML_MAX_NAME_LENGTH;
4418
4.59k
    xmlChar cur;
4419
4.59k
    xmlChar stop;
4420
4421
4.59k
    if (RAW == '"') {
4422
1.00k
        NEXT;
4423
1.00k
  stop = '"';
4424
3.59k
    } else if (RAW == '\'') {
4425
1.45k
        NEXT;
4426
1.45k
  stop = '\'';
4427
2.13k
    } else {
4428
2.13k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4429
2.13k
  return(NULL);
4430
2.13k
    }
4431
2.46k
    buf = xmlMalloc(size);
4432
2.46k
    if (buf == NULL) {
4433
0
  xmlErrMemory(ctxt);
4434
0
  return(NULL);
4435
0
    }
4436
2.46k
    cur = CUR;
4437
466k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4438
466k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4439
463k
  if (len + 1 >= size) {
4440
389
      xmlChar *tmp;
4441
389
            int newSize;
4442
4443
389
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4444
389
            if (newSize < 0) {
4445
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4446
0
                xmlFree(buf);
4447
0
                return(NULL);
4448
0
            }
4449
389
      tmp = xmlRealloc(buf, newSize);
4450
389
      if (tmp == NULL) {
4451
0
    xmlErrMemory(ctxt);
4452
0
    xmlFree(buf);
4453
0
    return(NULL);
4454
0
      }
4455
389
      buf = tmp;
4456
389
            size = newSize;
4457
389
  }
4458
463k
  buf[len++] = cur;
4459
463k
  NEXT;
4460
463k
  cur = CUR;
4461
463k
    }
4462
2.46k
    buf[len] = 0;
4463
2.46k
    if (cur != stop) {
4464
1.33k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4465
1.33k
    } else {
4466
1.13k
  NEXTL(1);
4467
1.13k
    }
4468
2.46k
    return(buf);
4469
2.46k
}
4470
4471
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4472
4473
/*
4474
 * used for the test in the inner loop of the char data testing
4475
 */
4476
static const unsigned char test_char_data[256] = {
4477
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4478
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4479
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4482
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4483
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4484
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4485
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4486
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4487
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4488
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4489
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4490
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4491
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4492
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4493
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4494
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4495
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4496
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4497
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4498
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4499
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4500
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4501
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4502
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4503
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4504
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4505
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4506
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4507
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4508
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4509
};
4510
4511
static void
4512
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4513
919k
              int isBlank) {
4514
919k
    int checkBlanks;
4515
4516
919k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4517
843k
        return;
4518
4519
76.0k
    checkBlanks = (!ctxt->keepBlanks) ||
4520
76.0k
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4521
4522
    /*
4523
     * Calling areBlanks with only parts of a text node
4524
     * is fundamentally broken, making the NOBLANKS option
4525
     * essentially unusable.
4526
     */
4527
76.0k
    if ((checkBlanks) &&
4528
76.0k
        (areBlanks(ctxt, buf, size, isBlank))) {
4529
0
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4530
0
            (ctxt->keepBlanks))
4531
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4532
76.0k
    } else {
4533
76.0k
        if (ctxt->sax->characters != NULL)
4534
76.0k
            ctxt->sax->characters(ctxt->userData, buf, size);
4535
4536
        /*
4537
         * The old code used to update this value for "complex" data
4538
         * even if checkBlanks was false. This was probably a bug.
4539
         */
4540
76.0k
        if ((checkBlanks) && (*ctxt->space == -1))
4541
0
            *ctxt->space = -2;
4542
76.0k
    }
4543
76.0k
}
4544
4545
/**
4546
 * Parse character data. Always makes progress if the first char isn't
4547
 * '<' or '&'.
4548
 *
4549
 * The right angle bracket (>) may be represented using the string "&gt;",
4550
 * and must, for compatibility, be escaped using "&gt;" or a character
4551
 * reference when it appears in the string "]]>" in content, when that
4552
 * string is not marking the end of a CDATA section.
4553
 *
4554
 *     [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4555
 * @param ctxt  an XML parser context
4556
 * @param partial  buffer may contain partial UTF-8 sequences
4557
 */
4558
static void
4559
3.58M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4560
3.58M
    const xmlChar *in;
4561
3.58M
    int line = ctxt->input->line;
4562
3.58M
    int col = ctxt->input->col;
4563
3.58M
    int ccol;
4564
4565
3.58M
    GROW;
4566
    /*
4567
     * Accelerated common case where input don't need to be
4568
     * modified before passing it to the handler.
4569
     */
4570
3.58M
    in = ctxt->input->cur;
4571
3.59M
    do {
4572
3.60M
get_more_space:
4573
3.61M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4574
3.60M
        if (*in == 0xA) {
4575
1.14M
            do {
4576
1.14M
                ctxt->input->line++; ctxt->input->col = 1;
4577
1.14M
                in++;
4578
1.14M
            } while (*in == 0xA);
4579
11.2k
            goto get_more_space;
4580
11.2k
        }
4581
3.59M
        if (*in == '<') {
4582
5.90k
            while (in > ctxt->input->cur) {
4583
2.95k
                const xmlChar *tmp = ctxt->input->cur;
4584
2.95k
                size_t nbchar = in - tmp;
4585
4586
2.95k
                if (nbchar > XML_MAX_ITEMS)
4587
0
                    nbchar = XML_MAX_ITEMS;
4588
2.95k
                ctxt->input->cur += nbchar;
4589
4590
2.95k
                xmlCharacters(ctxt, tmp, nbchar, 1);
4591
2.95k
            }
4592
2.95k
            return;
4593
2.95k
        }
4594
4595
3.61M
get_more:
4596
3.61M
        ccol = ctxt->input->col;
4597
9.22M
        while (test_char_data[*in]) {
4598
5.60M
            in++;
4599
5.60M
            ccol++;
4600
5.60M
        }
4601
3.61M
        ctxt->input->col = ccol;
4602
3.61M
        if (*in == 0xA) {
4603
1.43M
            do {
4604
1.43M
                ctxt->input->line++; ctxt->input->col = 1;
4605
1.43M
                in++;
4606
1.43M
            } while (*in == 0xA);
4607
13.5k
            goto get_more;
4608
13.5k
        }
4609
3.60M
        if (*in == ']') {
4610
18.3k
            if ((in[1] == ']') && (in[2] == '>')) {
4611
2.25k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4612
2.25k
                ctxt->input->cur = in + 1;
4613
2.25k
                return;
4614
2.25k
            }
4615
16.0k
            if ((!partial) || (ctxt->input->end - in >= 2)) {
4616
16.0k
                in++;
4617
16.0k
                ctxt->input->col++;
4618
16.0k
                goto get_more;
4619
16.0k
            }
4620
16.0k
        }
4621
3.99M
        while (in > ctxt->input->cur) {
4622
405k
            const xmlChar *tmp = ctxt->input->cur;
4623
405k
            size_t nbchar = in - tmp;
4624
4625
405k
            if (nbchar > XML_MAX_ITEMS)
4626
0
                nbchar = XML_MAX_ITEMS;
4627
405k
            ctxt->input->cur += nbchar;
4628
4629
405k
            xmlCharacters(ctxt, tmp, nbchar, 0);
4630
4631
405k
            line = ctxt->input->line;
4632
405k
            col = ctxt->input->col;
4633
405k
        }
4634
3.58M
        ctxt->input->cur = in;
4635
3.58M
        if (*in == 0xD) {
4636
22.2k
            in++;
4637
22.2k
            if (*in == 0xA) {
4638
2.46k
                ctxt->input->cur = in;
4639
2.46k
                in++;
4640
2.46k
                ctxt->input->line++; ctxt->input->col = 1;
4641
2.46k
                continue; /* while */
4642
2.46k
            }
4643
19.8k
            in--;
4644
19.8k
        }
4645
3.58M
        if (*in == '<') {
4646
161k
            return;
4647
161k
        }
4648
3.42M
        if (*in == '&') {
4649
46.3k
            return;
4650
46.3k
        }
4651
3.37M
        if ((partial) && (*in == ']') && (ctxt->input->end - in < 2)) {
4652
0
            return;
4653
0
        }
4654
3.37M
        SHRINK;
4655
3.37M
        GROW;
4656
3.37M
        in = ctxt->input->cur;
4657
3.37M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4658
3.37M
             (*in == 0x09) || (*in == 0x0a));
4659
3.37M
    ctxt->input->line = line;
4660
3.37M
    ctxt->input->col = col;
4661
3.37M
    xmlParseCharDataComplex(ctxt, partial);
4662
3.37M
}
4663
4664
/**
4665
 * Always makes progress if the first char isn't '<' or '&'.
4666
 *
4667
 * parse a CharData section.this is the fallback function
4668
 * of #xmlParseCharData when the parsing requires handling
4669
 * of non-ASCII characters.
4670
 *
4671
 * @param ctxt  an XML parser context
4672
 * @param partial  whether the input can end with truncated UTF-8
4673
 */
4674
static void
4675
3.37M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4676
3.37M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4677
3.37M
    int nbchar = 0;
4678
3.37M
    int cur, l;
4679
4680
3.37M
    cur = xmlCurrentCharRecover(ctxt, &l);
4681
31.8M
    while ((cur != '<') && /* checked */
4682
31.8M
           (cur != '&') &&
4683
31.8M
           ((!partial) || (cur != ']') ||
4684
31.6M
            (ctxt->input->end - ctxt->input->cur >= 2)) &&
4685
31.8M
     (IS_CHAR(cur))) {
4686
28.4M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4687
727
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4688
727
  }
4689
28.4M
  COPY_BUF(buf, nbchar, cur);
4690
  /* move current position before possible calling of ctxt->sax->characters */
4691
28.4M
  NEXTL(l);
4692
28.4M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4693
182k
      buf[nbchar] = 0;
4694
4695
182k
            xmlCharacters(ctxt, buf, nbchar, 0);
4696
182k
      nbchar = 0;
4697
182k
            SHRINK;
4698
182k
  }
4699
28.4M
  cur = xmlCurrentCharRecover(ctxt, &l);
4700
28.4M
    }
4701
3.37M
    if (nbchar != 0) {
4702
328k
        buf[nbchar] = 0;
4703
4704
328k
        xmlCharacters(ctxt, buf, nbchar, 0);
4705
328k
    }
4706
    /*
4707
     * cur == 0 can mean
4708
     *
4709
     * - End of buffer.
4710
     * - An actual 0 character.
4711
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4712
     */
4713
3.37M
    if (ctxt->input->cur < ctxt->input->end) {
4714
3.37M
        if ((cur == 0) && (CUR != 0)) {
4715
349
            if (partial == 0) {
4716
349
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4717
349
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4718
349
                NEXTL(1);
4719
349
            }
4720
3.37M
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4721
            /* Generate the error and skip the offending character */
4722
3.20M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4723
3.20M
                              "PCDATA invalid Char value %d\n", cur);
4724
3.20M
            NEXTL(l);
4725
3.20M
        }
4726
3.37M
    }
4727
3.37M
}
4728
4729
/**
4730
 * @deprecated Internal function, don't use.
4731
 * @param ctxt  an XML parser context
4732
 * @param cdata  unused
4733
 */
4734
void
4735
0
xmlParseCharData(xmlParserCtxt *ctxt, ATTRIBUTE_UNUSED int cdata) {
4736
0
    xmlParseCharDataInternal(ctxt, 0);
4737
0
}
4738
4739
/**
4740
 * Parse an External ID or a Public ID
4741
 *
4742
 * @deprecated Internal function, don't use.
4743
 *
4744
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4745
 * `'PUBLIC' S PubidLiteral S SystemLiteral`
4746
 *
4747
 *     [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4748
 *                       | 'PUBLIC' S PubidLiteral S SystemLiteral
4749
 *
4750
 *     [83] PublicID ::= 'PUBLIC' S PubidLiteral
4751
 *
4752
 * @param ctxt  an XML parser context
4753
 * @param publicId  a xmlChar** receiving PubidLiteral
4754
 * @param strict  indicate whether we should restrict parsing to only
4755
 *          production [75], see NOTE below
4756
 * @returns the function returns SystemLiteral and in the second
4757
 *                case publicID receives PubidLiteral, is strict is off
4758
 *                it is possible to return NULL and have publicID set.
4759
 */
4760
4761
xmlChar *
4762
15.1k
xmlParseExternalID(xmlParserCtxt *ctxt, xmlChar **publicId, int strict) {
4763
15.1k
    xmlChar *URI = NULL;
4764
4765
15.1k
    *publicId = NULL;
4766
15.1k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4767
2.41k
        SKIP(6);
4768
2.41k
  if (SKIP_BLANKS == 0) {
4769
874
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4770
874
                     "Space required after 'SYSTEM'\n");
4771
874
  }
4772
2.41k
  URI = xmlParseSystemLiteral(ctxt);
4773
2.41k
  if (URI == NULL) {
4774
443
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4775
443
        }
4776
12.7k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4777
4.59k
        SKIP(6);
4778
4.59k
  if (SKIP_BLANKS == 0) {
4779
2.97k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4780
2.97k
        "Space required after 'PUBLIC'\n");
4781
2.97k
  }
4782
4.59k
  *publicId = xmlParsePubidLiteral(ctxt);
4783
4.59k
  if (*publicId == NULL) {
4784
2.13k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4785
2.13k
  }
4786
4.59k
  if (strict) {
4787
      /*
4788
       * We don't handle [83] so "S SystemLiteral" is required.
4789
       */
4790
1.22k
      if (SKIP_BLANKS == 0) {
4791
826
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4792
826
      "Space required after the Public Identifier\n");
4793
826
      }
4794
3.36k
  } else {
4795
      /*
4796
       * We handle [83] so we return immediately, if
4797
       * "S SystemLiteral" is not detected. We skip blanks if no
4798
             * system literal was found, but this is harmless since we must
4799
             * be at the end of a NotationDecl.
4800
       */
4801
3.36k
      if (SKIP_BLANKS == 0) return(NULL);
4802
665
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4803
665
  }
4804
1.73k
  URI = xmlParseSystemLiteral(ctxt);
4805
1.73k
  if (URI == NULL) {
4806
808
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4807
808
        }
4808
1.73k
    }
4809
12.3k
    return(URI);
4810
15.1k
}
4811
4812
/**
4813
 * Skip an XML (SGML) comment <!-- .... -->
4814
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4815
 *  must not occur within comments. "
4816
 * This is the slow routine in case the accelerator for ascii didn't work
4817
 *
4818
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4819
 * @param ctxt  an XML parser context
4820
 * @param buf  the already parsed part of the buffer
4821
 * @param len  number of bytes in the buffer
4822
 * @param size  allocated size of the buffer
4823
 */
4824
static void
4825
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4826
35.4k
                       size_t len, size_t size) {
4827
35.4k
    int q, ql;
4828
35.4k
    int r, rl;
4829
35.4k
    int cur, l;
4830
35.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4831
35.4k
                    XML_MAX_HUGE_LENGTH :
4832
35.4k
                    XML_MAX_TEXT_LENGTH;
4833
4834
35.4k
    if (buf == NULL) {
4835
12.7k
        len = 0;
4836
12.7k
  size = XML_PARSER_BUFFER_SIZE;
4837
12.7k
  buf = xmlMalloc(size);
4838
12.7k
  if (buf == NULL) {
4839
0
      xmlErrMemory(ctxt);
4840
0
      return;
4841
0
  }
4842
12.7k
    }
4843
35.4k
    q = xmlCurrentCharRecover(ctxt, &ql);
4844
35.4k
    if (q == 0)
4845
1.27k
        goto not_terminated;
4846
34.1k
    if (!IS_CHAR(q)) {
4847
12.0k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4848
12.0k
                          "xmlParseComment: invalid xmlChar value %d\n",
4849
12.0k
                    q);
4850
12.0k
  xmlFree (buf);
4851
12.0k
  return;
4852
12.0k
    }
4853
22.0k
    NEXTL(ql);
4854
22.0k
    r = xmlCurrentCharRecover(ctxt, &rl);
4855
22.0k
    if (r == 0)
4856
2.22k
        goto not_terminated;
4857
19.8k
    if (!IS_CHAR(r)) {
4858
2.54k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4859
2.54k
                          "xmlParseComment: invalid xmlChar value %d\n",
4860
2.54k
                    r);
4861
2.54k
  xmlFree (buf);
4862
2.54k
  return;
4863
2.54k
    }
4864
17.3k
    NEXTL(rl);
4865
17.3k
    cur = xmlCurrentCharRecover(ctxt, &l);
4866
17.3k
    if (cur == 0)
4867
509
        goto not_terminated;
4868
2.08M
    while (IS_CHAR(cur) && /* checked */
4869
2.08M
           ((cur != '>') ||
4870
2.07M
      (r != '-') || (q != '-'))) {
4871
2.07M
  if ((r == '-') && (q == '-')) {
4872
184k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4873
184k
  }
4874
2.07M
  if (len + 5 >= size) {
4875
4.16k
      xmlChar *tmp;
4876
4.16k
            int newSize;
4877
4878
4.16k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4879
4.16k
            if (newSize < 0) {
4880
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4881
0
                             "Comment too big found", NULL);
4882
0
                xmlFree (buf);
4883
0
                return;
4884
0
            }
4885
4.16k
      tmp = xmlRealloc(buf, newSize);
4886
4.16k
      if (tmp == NULL) {
4887
0
    xmlErrMemory(ctxt);
4888
0
    xmlFree(buf);
4889
0
    return;
4890
0
      }
4891
4.16k
      buf = tmp;
4892
4.16k
            size = newSize;
4893
4.16k
  }
4894
2.07M
  COPY_BUF(buf, len, q);
4895
4896
2.07M
  q = r;
4897
2.07M
  ql = rl;
4898
2.07M
  r = cur;
4899
2.07M
  rl = l;
4900
4901
2.07M
  NEXTL(l);
4902
2.07M
  cur = xmlCurrentCharRecover(ctxt, &l);
4903
4904
2.07M
    }
4905
16.8k
    buf[len] = 0;
4906
16.8k
    if (cur == 0) {
4907
4.65k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4908
4.65k
                       "Comment not terminated \n<!--%.50s\n", buf);
4909
12.1k
    } else if (!IS_CHAR(cur)) {
4910
8.11k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4911
8.11k
                          "xmlParseComment: invalid xmlChar value %d\n",
4912
8.11k
                    cur);
4913
8.11k
    } else {
4914
4.04k
        NEXT;
4915
4.04k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4916
4.04k
      (!ctxt->disableSAX))
4917
841
      ctxt->sax->comment(ctxt->userData, buf);
4918
4.04k
    }
4919
16.8k
    xmlFree(buf);
4920
16.8k
    return;
4921
4.00k
not_terminated:
4922
4.00k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4923
4.00k
       "Comment not terminated\n", NULL);
4924
4.00k
    xmlFree(buf);
4925
4.00k
}
4926
4927
/**
4928
 * Parse an XML (SGML) comment. Always consumes '<!'.
4929
 *
4930
 * @deprecated Internal function, don't use.
4931
 *
4932
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4933
 *  must not occur within comments. "
4934
 *
4935
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4936
 * @param ctxt  an XML parser context
4937
 */
4938
void
4939
45.1k
xmlParseComment(xmlParserCtxt *ctxt) {
4940
45.1k
    xmlChar *buf = NULL;
4941
45.1k
    size_t size = XML_PARSER_BUFFER_SIZE;
4942
45.1k
    size_t len = 0;
4943
45.1k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4944
45.1k
                       XML_MAX_HUGE_LENGTH :
4945
45.1k
                       XML_MAX_TEXT_LENGTH;
4946
45.1k
    const xmlChar *in;
4947
45.1k
    size_t nbchar = 0;
4948
45.1k
    int ccol;
4949
4950
    /*
4951
     * Check that there is a comment right here.
4952
     */
4953
45.1k
    if ((RAW != '<') || (NXT(1) != '!'))
4954
0
        return;
4955
45.1k
    SKIP(2);
4956
45.1k
    if ((RAW != '-') || (NXT(1) != '-'))
4957
3
        return;
4958
45.1k
    SKIP(2);
4959
45.1k
    GROW;
4960
4961
    /*
4962
     * Accelerated common case where input don't need to be
4963
     * modified before passing it to the handler.
4964
     */
4965
45.1k
    in = ctxt->input->cur;
4966
45.6k
    do {
4967
45.6k
  if (*in == 0xA) {
4968
2.93k
      do {
4969
2.93k
    ctxt->input->line++; ctxt->input->col = 1;
4970
2.93k
    in++;
4971
2.93k
      } while (*in == 0xA);
4972
1.07k
  }
4973
678k
get_more:
4974
678k
        ccol = ctxt->input->col;
4975
3.18M
  while (((*in > '-') && (*in <= 0x7F)) ||
4976
3.18M
         ((*in >= 0x20) && (*in < '-')) ||
4977
3.18M
         (*in == 0x09)) {
4978
2.51M
        in++;
4979
2.51M
        ccol++;
4980
2.51M
  }
4981
678k
  ctxt->input->col = ccol;
4982
678k
  if (*in == 0xA) {
4983
64.2k
      do {
4984
64.2k
    ctxt->input->line++; ctxt->input->col = 1;
4985
64.2k
    in++;
4986
64.2k
      } while (*in == 0xA);
4987
10.1k
      goto get_more;
4988
10.1k
  }
4989
667k
  nbchar = in - ctxt->input->cur;
4990
  /*
4991
   * save current set of data
4992
   */
4993
667k
  if (nbchar > 0) {
4994
636k
            if (nbchar > maxLength - len) {
4995
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4996
0
                                  "Comment too big found", NULL);
4997
0
                xmlFree(buf);
4998
0
                return;
4999
0
            }
5000
636k
            if (buf == NULL) {
5001
26.0k
                if ((*in == '-') && (in[1] == '-'))
5002
6.45k
                    size = nbchar + 1;
5003
19.5k
                else
5004
19.5k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5005
26.0k
                buf = xmlMalloc(size);
5006
26.0k
                if (buf == NULL) {
5007
0
                    xmlErrMemory(ctxt);
5008
0
                    return;
5009
0
                }
5010
26.0k
                len = 0;
5011
610k
            } else if (len + nbchar + 1 >= size) {
5012
4.79k
                xmlChar *new_buf;
5013
4.79k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5014
4.79k
                new_buf = xmlRealloc(buf, size);
5015
4.79k
                if (new_buf == NULL) {
5016
0
                    xmlErrMemory(ctxt);
5017
0
                    xmlFree(buf);
5018
0
                    return;
5019
0
                }
5020
4.79k
                buf = new_buf;
5021
4.79k
            }
5022
636k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5023
636k
            len += nbchar;
5024
636k
            buf[len] = 0;
5025
636k
  }
5026
667k
  ctxt->input->cur = in;
5027
667k
  if (*in == 0xA) {
5028
0
      in++;
5029
0
      ctxt->input->line++; ctxt->input->col = 1;
5030
0
  }
5031
667k
  if (*in == 0xD) {
5032
10.7k
      in++;
5033
10.7k
      if (*in == 0xA) {
5034
5.11k
    ctxt->input->cur = in;
5035
5.11k
    in++;
5036
5.11k
    ctxt->input->line++; ctxt->input->col = 1;
5037
5.11k
    goto get_more;
5038
5.11k
      }
5039
5.60k
      in--;
5040
5.60k
  }
5041
662k
  SHRINK;
5042
662k
  GROW;
5043
662k
  in = ctxt->input->cur;
5044
662k
  if (*in == '-') {
5045
626k
      if (in[1] == '-') {
5046
610k
          if (in[2] == '>') {
5047
9.75k
        SKIP(3);
5048
9.75k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5049
9.75k
            (!ctxt->disableSAX)) {
5050
1.16k
      if (buf != NULL)
5051
603
          ctxt->sax->comment(ctxt->userData, buf);
5052
562
      else
5053
562
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5054
1.16k
        }
5055
9.75k
        if (buf != NULL)
5056
3.38k
            xmlFree(buf);
5057
9.75k
        return;
5058
9.75k
    }
5059
600k
    if (buf != NULL) {
5060
597k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5061
597k
                          "Double hyphen within comment: "
5062
597k
                                      "<!--%.50s\n",
5063
597k
              buf);
5064
597k
    } else
5065
2.80k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5066
2.80k
                          "Double hyphen within comment\n", NULL);
5067
600k
    in++;
5068
600k
    ctxt->input->col++;
5069
600k
      }
5070
617k
      in++;
5071
617k
      ctxt->input->col++;
5072
617k
      goto get_more;
5073
626k
  }
5074
662k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5075
35.4k
    xmlParseCommentComplex(ctxt, buf, len, size);
5076
35.4k
}
5077
5078
5079
/**
5080
 * parse the name of a PI
5081
 *
5082
 * @deprecated Internal function, don't use.
5083
 *
5084
 *     [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5085
 *
5086
 * @param ctxt  an XML parser context
5087
 * @returns the PITarget name or NULL
5088
 */
5089
5090
const xmlChar *
5091
51.7k
xmlParsePITarget(xmlParserCtxt *ctxt) {
5092
51.7k
    const xmlChar *name;
5093
5094
51.7k
    name = xmlParseName(ctxt);
5095
51.7k
    if ((name != NULL) &&
5096
51.7k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5097
51.7k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5098
51.7k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5099
6.63k
  int i;
5100
6.63k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5101
6.63k
      (name[2] == 'l') && (name[3] == 0)) {
5102
2.54k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5103
2.54k
     "XML declaration allowed only at the start of the document\n");
5104
2.54k
      return(name);
5105
4.09k
  } else if (name[3] == 0) {
5106
560
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5107
560
      return(name);
5108
560
  }
5109
9.22k
  for (i = 0;;i++) {
5110
9.22k
      if (xmlW3CPIs[i] == NULL) break;
5111
6.85k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5112
1.16k
          return(name);
5113
6.85k
  }
5114
2.36k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5115
2.36k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5116
2.36k
          NULL, NULL);
5117
2.36k
    }
5118
47.4k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5119
1.54k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5120
1.54k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5121
1.54k
    }
5122
47.4k
    return(name);
5123
51.7k
}
5124
5125
#ifdef LIBXML_CATALOG_ENABLED
5126
/**
5127
 * parse an XML Catalog Processing Instruction.
5128
 *
5129
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5130
 *
5131
 * Occurs only if allowed by the user and if happening in the Misc
5132
 * part of the document before any doctype information
5133
 * This will add the given catalog to the parsing context in order
5134
 * to be used if there is a resolution need further down in the document
5135
 *
5136
 * @param ctxt  an XML parser context
5137
 * @param catalog  the PI value string
5138
 */
5139
5140
static void
5141
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5142
0
    xmlChar *URL = NULL;
5143
0
    const xmlChar *tmp, *base;
5144
0
    xmlChar marker;
5145
5146
0
    tmp = catalog;
5147
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5148
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5149
0
  goto error;
5150
0
    tmp += 7;
5151
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5152
0
    if (*tmp != '=') {
5153
0
  return;
5154
0
    }
5155
0
    tmp++;
5156
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5157
0
    marker = *tmp;
5158
0
    if ((marker != '\'') && (marker != '"'))
5159
0
  goto error;
5160
0
    tmp++;
5161
0
    base = tmp;
5162
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5163
0
    if (*tmp == 0)
5164
0
  goto error;
5165
0
    URL = xmlStrndup(base, tmp - base);
5166
0
    tmp++;
5167
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5168
0
    if (*tmp != 0)
5169
0
  goto error;
5170
5171
0
    if (URL != NULL) {
5172
        /*
5173
         * Unfortunately, the catalog API doesn't report OOM errors.
5174
         * xmlGetLastError isn't very helpful since we don't know
5175
         * where the last error came from. We'd have to reset it
5176
         * before this call and restore it afterwards.
5177
         */
5178
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5179
0
  xmlFree(URL);
5180
0
    }
5181
0
    return;
5182
5183
0
error:
5184
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5185
0
            "Catalog PI syntax error: %s\n",
5186
0
      catalog, NULL);
5187
0
    if (URL != NULL)
5188
0
  xmlFree(URL);
5189
0
}
5190
#endif
5191
5192
/**
5193
 * parse an XML Processing Instruction.
5194
 *
5195
 * @deprecated Internal function, don't use.
5196
 *
5197
 *     [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5198
 *
5199
 * The processing is transferred to SAX once parsed.
5200
 *
5201
 * @param ctxt  an XML parser context
5202
 */
5203
5204
void
5205
51.7k
xmlParsePI(xmlParserCtxt *ctxt) {
5206
51.7k
    xmlChar *buf = NULL;
5207
51.7k
    size_t len = 0;
5208
51.7k
    size_t size = XML_PARSER_BUFFER_SIZE;
5209
51.7k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5210
51.7k
                       XML_MAX_HUGE_LENGTH :
5211
51.7k
                       XML_MAX_TEXT_LENGTH;
5212
51.7k
    int cur, l;
5213
51.7k
    const xmlChar *target;
5214
5215
51.7k
    if ((RAW == '<') && (NXT(1) == '?')) {
5216
  /*
5217
   * this is a Processing Instruction.
5218
   */
5219
51.7k
  SKIP(2);
5220
5221
  /*
5222
   * Parse the target name and check for special support like
5223
   * namespace.
5224
   */
5225
51.7k
        target = xmlParsePITarget(ctxt);
5226
51.7k
  if (target != NULL) {
5227
43.9k
      if ((RAW == '?') && (NXT(1) == '>')) {
5228
9.57k
    SKIP(2);
5229
5230
    /*
5231
     * SAX: PI detected.
5232
     */
5233
9.57k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5234
9.57k
        (ctxt->sax->processingInstruction != NULL))
5235
2.18k
        ctxt->sax->processingInstruction(ctxt->userData,
5236
2.18k
                                         target, NULL);
5237
9.57k
    return;
5238
9.57k
      }
5239
34.3k
      buf = xmlMalloc(size);
5240
34.3k
      if (buf == NULL) {
5241
0
    xmlErrMemory(ctxt);
5242
0
    return;
5243
0
      }
5244
34.3k
      if (SKIP_BLANKS == 0) {
5245
23.7k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5246
23.7k
        "ParsePI: PI %s space expected\n", target);
5247
23.7k
      }
5248
34.3k
      cur = xmlCurrentCharRecover(ctxt, &l);
5249
3.44M
      while (IS_CHAR(cur) && /* checked */
5250
3.44M
       ((cur != '?') || (NXT(1) != '>'))) {
5251
3.40M
    if (len + 5 >= size) {
5252
6.51k
        xmlChar *tmp;
5253
6.51k
                    int newSize;
5254
5255
6.51k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5256
6.51k
                    if (newSize < 0) {
5257
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5258
0
                                          "PI %s too big found", target);
5259
0
                        xmlFree(buf);
5260
0
                        return;
5261
0
                    }
5262
6.51k
        tmp = xmlRealloc(buf, newSize);
5263
6.51k
        if (tmp == NULL) {
5264
0
      xmlErrMemory(ctxt);
5265
0
      xmlFree(buf);
5266
0
      return;
5267
0
        }
5268
6.51k
        buf = tmp;
5269
6.51k
                    size = newSize;
5270
6.51k
    }
5271
3.40M
    COPY_BUF(buf, len, cur);
5272
3.40M
    NEXTL(l);
5273
3.40M
    cur = xmlCurrentCharRecover(ctxt, &l);
5274
3.40M
      }
5275
34.3k
      buf[len] = 0;
5276
34.3k
      if (cur != '?') {
5277
14.0k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5278
14.0k
          "ParsePI: PI %s never end ...\n", target);
5279
20.2k
      } else {
5280
20.2k
    SKIP(2);
5281
5282
20.2k
#ifdef LIBXML_CATALOG_ENABLED
5283
20.2k
    if ((ctxt->inSubset == 0) &&
5284
20.2k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5285
3.82k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5286
5287
3.82k
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5288
3.82k
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5289
0
       (allow == XML_CATA_ALLOW_ALL)))
5290
0
      xmlParseCatalogPI(ctxt, buf);
5291
3.82k
    }
5292
20.2k
#endif
5293
5294
    /*
5295
     * SAX: PI detected.
5296
     */
5297
20.2k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5298
20.2k
        (ctxt->sax->processingInstruction != NULL))
5299
605
        ctxt->sax->processingInstruction(ctxt->userData,
5300
605
                                         target, buf);
5301
20.2k
      }
5302
34.3k
      xmlFree(buf);
5303
34.3k
  } else {
5304
7.80k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5305
7.80k
  }
5306
51.7k
    }
5307
51.7k
}
5308
5309
/**
5310
 * Parse a notation declaration. Always consumes '<!'.
5311
 *
5312
 * @deprecated Internal function, don't use.
5313
 *
5314
 *     [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID)
5315
 *                           S? '>'
5316
 *
5317
 * Hence there is actually 3 choices:
5318
 *
5319
 *     'PUBLIC' S PubidLiteral
5320
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5321
 *     'SYSTEM' S SystemLiteral
5322
 *
5323
 * See the NOTE on #xmlParseExternalID.
5324
 *
5325
 * @param ctxt  an XML parser context
5326
 */
5327
5328
void
5329
7.01k
xmlParseNotationDecl(xmlParserCtxt *ctxt) {
5330
7.01k
    const xmlChar *name;
5331
7.01k
    xmlChar *Pubid;
5332
7.01k
    xmlChar *Systemid;
5333
5334
7.01k
    if ((CUR != '<') || (NXT(1) != '!'))
5335
0
        return;
5336
7.01k
    SKIP(2);
5337
5338
7.01k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5339
6.96k
#ifdef LIBXML_VALID_ENABLED
5340
6.96k
  int oldInputNr = ctxt->inputNr;
5341
6.96k
#endif
5342
5343
6.96k
  SKIP(8);
5344
6.96k
  if (SKIP_BLANKS_PE == 0) {
5345
769
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5346
769
         "Space required after '<!NOTATION'\n");
5347
769
      return;
5348
769
  }
5349
5350
6.19k
        name = xmlParseName(ctxt);
5351
6.19k
  if (name == NULL) {
5352
381
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5353
381
      return;
5354
381
  }
5355
5.81k
  if (xmlStrchr(name, ':') != NULL) {
5356
2.05k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5357
2.05k
         "colons are forbidden from notation names '%s'\n",
5358
2.05k
         name, NULL, NULL);
5359
2.05k
  }
5360
5.81k
  if (SKIP_BLANKS_PE == 0) {
5361
955
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5362
955
         "Space required after the NOTATION name'\n");
5363
955
      return;
5364
955
  }
5365
5366
  /*
5367
   * Parse the IDs.
5368
   */
5369
4.86k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5370
4.86k
  SKIP_BLANKS_PE;
5371
5372
4.86k
  if (RAW == '>') {
5373
928
#ifdef LIBXML_VALID_ENABLED
5374
928
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5375
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5376
0
                           "Notation declaration doesn't start and stop"
5377
0
                                 " in the same entity\n",
5378
0
                                 NULL, NULL);
5379
0
      }
5380
928
#endif
5381
928
      NEXT;
5382
928
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5383
928
    (ctxt->sax->notationDecl != NULL))
5384
291
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5385
3.93k
  } else {
5386
3.93k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5387
3.93k
  }
5388
4.86k
  if (Systemid != NULL) xmlFree(Systemid);
5389
4.86k
  if (Pubid != NULL) xmlFree(Pubid);
5390
4.86k
    }
5391
7.01k
}
5392
5393
/**
5394
 * Parse an entity declaration. Always consumes '<!'.
5395
 *
5396
 * @deprecated Internal function, don't use.
5397
 *
5398
 *     [70] EntityDecl ::= GEDecl | PEDecl
5399
 *
5400
 *     [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5401
 *
5402
 *     [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5403
 *
5404
 *     [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5405
 *
5406
 *     [74] PEDef ::= EntityValue | ExternalID
5407
 *
5408
 *     [76] NDataDecl ::= S 'NDATA' S Name
5409
 *
5410
 * [ VC: Notation Declared ]
5411
 * The Name must match the declared name of a notation.
5412
 *
5413
 * @param ctxt  an XML parser context
5414
 */
5415
5416
void
5417
27.1k
xmlParseEntityDecl(xmlParserCtxt *ctxt) {
5418
27.1k
    const xmlChar *name = NULL;
5419
27.1k
    xmlChar *value = NULL;
5420
27.1k
    xmlChar *URI = NULL, *literal = NULL;
5421
27.1k
    const xmlChar *ndata = NULL;
5422
27.1k
    int isParameter = 0;
5423
27.1k
    xmlChar *orig = NULL;
5424
5425
27.1k
    if ((CUR != '<') || (NXT(1) != '!'))
5426
0
        return;
5427
27.1k
    SKIP(2);
5428
5429
    /* GROW; done in the caller */
5430
27.1k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5431
27.0k
#ifdef LIBXML_VALID_ENABLED
5432
27.0k
  int oldInputNr = ctxt->inputNr;
5433
27.0k
#endif
5434
5435
27.0k
  SKIP(6);
5436
27.0k
  if (SKIP_BLANKS_PE == 0) {
5437
15.3k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5438
15.3k
         "Space required after '<!ENTITY'\n");
5439
15.3k
  }
5440
5441
27.0k
  if (RAW == '%') {
5442
5.77k
      NEXT;
5443
5.77k
      if (SKIP_BLANKS_PE == 0) {
5444
720
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5445
720
             "Space required after '%%'\n");
5446
720
      }
5447
5.77k
      isParameter = 1;
5448
5.77k
  }
5449
5450
27.0k
        name = xmlParseName(ctxt);
5451
27.0k
  if (name == NULL) {
5452
432
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5453
432
                     "xmlParseEntityDecl: no name\n");
5454
432
            return;
5455
432
  }
5456
26.6k
  if (xmlStrchr(name, ':') != NULL) {
5457
573
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5458
573
         "colons are forbidden from entities names '%s'\n",
5459
573
         name, NULL, NULL);
5460
573
  }
5461
26.6k
  if (SKIP_BLANKS_PE == 0) {
5462
10.4k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5463
10.4k
         "Space required after the entity name\n");
5464
10.4k
  }
5465
5466
  /*
5467
   * handle the various case of definitions...
5468
   */
5469
26.6k
  if (isParameter) {
5470
5.72k
      if ((RAW == '"') || (RAW == '\'')) {
5471
4.36k
          value = xmlParseEntityValue(ctxt, &orig);
5472
4.36k
    if (value) {
5473
4.35k
        if ((ctxt->sax != NULL) &&
5474
4.35k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5475
1.44k
      ctxt->sax->entityDecl(ctxt->userData, name,
5476
1.44k
                        XML_INTERNAL_PARAMETER_ENTITY,
5477
1.44k
            NULL, NULL, value);
5478
4.35k
    }
5479
4.36k
      } else {
5480
1.35k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5481
1.35k
    if ((URI == NULL) && (literal == NULL)) {
5482
705
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5483
705
    }
5484
1.35k
    if (URI) {
5485
546
                    if (xmlStrchr(URI, '#')) {
5486
193
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5487
353
                    } else {
5488
353
                        if ((ctxt->sax != NULL) &&
5489
353
                            (!ctxt->disableSAX) &&
5490
353
                            (ctxt->sax->entityDecl != NULL))
5491
190
                            ctxt->sax->entityDecl(ctxt->userData, name,
5492
190
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5493
190
                                        literal, URI, NULL);
5494
353
                    }
5495
546
    }
5496
1.35k
      }
5497
20.9k
  } else {
5498
20.9k
      if ((RAW == '"') || (RAW == '\'')) {
5499
18.2k
          value = xmlParseEntityValue(ctxt, &orig);
5500
18.2k
    if ((ctxt->sax != NULL) &&
5501
18.2k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5502
1.00k
        ctxt->sax->entityDecl(ctxt->userData, name,
5503
1.00k
        XML_INTERNAL_GENERAL_ENTITY,
5504
1.00k
        NULL, NULL, value);
5505
    /*
5506
     * For expat compatibility in SAX mode.
5507
     */
5508
18.2k
    if ((ctxt->myDoc == NULL) ||
5509
18.2k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5510
10.5k
        if (ctxt->myDoc == NULL) {
5511
1.20k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5512
1.20k
      if (ctxt->myDoc == NULL) {
5513
0
          xmlErrMemory(ctxt);
5514
0
          goto done;
5515
0
      }
5516
1.20k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5517
1.20k
        }
5518
10.5k
        if (ctxt->myDoc->intSubset == NULL) {
5519
1.20k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5520
1.20k
              BAD_CAST "fake", NULL, NULL);
5521
1.20k
                        if (ctxt->myDoc->intSubset == NULL) {
5522
0
                            xmlErrMemory(ctxt);
5523
0
                            goto done;
5524
0
                        }
5525
1.20k
                    }
5526
5527
10.5k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5528
10.5k
                    NULL, NULL, value);
5529
10.5k
    }
5530
18.2k
      } else {
5531
2.65k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5532
2.65k
    if ((URI == NULL) && (literal == NULL)) {
5533
974
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5534
974
    }
5535
2.65k
    if (URI) {
5536
1.01k
                    if (xmlStrchr(URI, '#')) {
5537
219
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5538
219
                    }
5539
1.01k
    }
5540
2.65k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5541
477
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5542
477
           "Space required before 'NDATA'\n");
5543
477
    }
5544
2.65k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5545
473
        SKIP(5);
5546
473
        if (SKIP_BLANKS_PE == 0) {
5547
238
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5548
238
               "Space required after 'NDATA'\n");
5549
238
        }
5550
473
        ndata = xmlParseName(ctxt);
5551
473
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5552
473
            (ctxt->sax->unparsedEntityDecl != NULL))
5553
73
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5554
73
            literal, URI, ndata);
5555
2.17k
    } else {
5556
2.17k
        if ((ctxt->sax != NULL) &&
5557
2.17k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5558
250
      ctxt->sax->entityDecl(ctxt->userData, name,
5559
250
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5560
250
            literal, URI, NULL);
5561
        /*
5562
         * For expat compatibility in SAX mode.
5563
         * assuming the entity replacement was asked for
5564
         */
5565
2.17k
        if ((ctxt->replaceEntities != 0) &&
5566
2.17k
      ((ctxt->myDoc == NULL) ||
5567
2.17k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5568
1.22k
      if (ctxt->myDoc == NULL) {
5569
61
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5570
61
          if (ctxt->myDoc == NULL) {
5571
0
              xmlErrMemory(ctxt);
5572
0
        goto done;
5573
0
          }
5574
61
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5575
61
      }
5576
5577
1.22k
      if (ctxt->myDoc->intSubset == NULL) {
5578
61
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5579
61
            BAD_CAST "fake", NULL, NULL);
5580
61
                            if (ctxt->myDoc->intSubset == NULL) {
5581
0
                                xmlErrMemory(ctxt);
5582
0
                                goto done;
5583
0
                            }
5584
61
                        }
5585
1.22k
      xmlSAX2EntityDecl(ctxt, name,
5586
1.22k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5587
1.22k
                  literal, URI, NULL);
5588
1.22k
        }
5589
2.17k
    }
5590
2.65k
      }
5591
20.9k
  }
5592
26.6k
  SKIP_BLANKS_PE;
5593
26.6k
  if (RAW != '>') {
5594
691
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5595
691
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5596
691
      xmlHaltParser(ctxt);
5597
25.9k
  } else {
5598
25.9k
#ifdef LIBXML_VALID_ENABLED
5599
25.9k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5600
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5601
0
                           "Entity declaration doesn't start and stop in"
5602
0
                                 " the same entity\n",
5603
0
                                 NULL, NULL);
5604
0
      }
5605
25.9k
#endif
5606
25.9k
      NEXT;
5607
25.9k
  }
5608
26.6k
  if (orig != NULL) {
5609
      /*
5610
       * Ugly mechanism to save the raw entity value.
5611
       */
5612
22.6k
      xmlEntityPtr cur = NULL;
5613
5614
22.6k
      if (isParameter) {
5615
4.35k
          if ((ctxt->sax != NULL) &&
5616
4.35k
        (ctxt->sax->getParameterEntity != NULL))
5617
4.35k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5618
18.2k
      } else {
5619
18.2k
          if ((ctxt->sax != NULL) &&
5620
18.2k
        (ctxt->sax->getEntity != NULL))
5621
18.2k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5622
18.2k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5623
4.94k
        cur = xmlSAX2GetEntity(ctxt, name);
5624
4.94k
    }
5625
18.2k
      }
5626
22.6k
            if ((cur != NULL) && (cur->orig == NULL)) {
5627
3.80k
    cur->orig = orig;
5628
3.80k
                orig = NULL;
5629
3.80k
      }
5630
22.6k
  }
5631
5632
26.6k
done:
5633
26.6k
  if (value != NULL) xmlFree(value);
5634
26.6k
  if (URI != NULL) xmlFree(URI);
5635
26.6k
  if (literal != NULL) xmlFree(literal);
5636
26.6k
        if (orig != NULL) xmlFree(orig);
5637
26.6k
    }
5638
27.1k
}
5639
5640
/**
5641
 * Parse an attribute default declaration
5642
 *
5643
 * @deprecated Internal function, don't use.
5644
 *
5645
 *     [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5646
 *
5647
 * [ VC: Required Attribute ]
5648
 * if the default declaration is the keyword \#REQUIRED, then the
5649
 * attribute must be specified for all elements of the type in the
5650
 * attribute-list declaration.
5651
 *
5652
 * [ VC: Attribute Default Legal ]
5653
 * The declared default value must meet the lexical constraints of
5654
 * the declared attribute type c.f. #xmlValidateAttributeDecl
5655
 *
5656
 * [ VC: Fixed Attribute Default ]
5657
 * if an attribute has a default value declared with the \#FIXED
5658
 * keyword, instances of that attribute must match the default value.
5659
 *
5660
 * [ WFC: No < in Attribute Values ]
5661
 * handled in #xmlParseAttValue
5662
 *
5663
 * @param ctxt  an XML parser context
5664
 * @param value  Receive a possible fixed default value for the attribute
5665
 * @returns XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5666
 *          or XML_ATTRIBUTE_FIXED.
5667
 */
5668
5669
int
5670
20.9k
xmlParseDefaultDecl(xmlParserCtxt *ctxt, xmlChar **value) {
5671
20.9k
    int val;
5672
20.9k
    xmlChar *ret;
5673
5674
20.9k
    *value = NULL;
5675
20.9k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5676
291
  SKIP(9);
5677
291
  return(XML_ATTRIBUTE_REQUIRED);
5678
291
    }
5679
20.6k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5680
702
  SKIP(8);
5681
702
  return(XML_ATTRIBUTE_IMPLIED);
5682
702
    }
5683
19.9k
    val = XML_ATTRIBUTE_NONE;
5684
19.9k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5685
506
  SKIP(6);
5686
506
  val = XML_ATTRIBUTE_FIXED;
5687
506
  if (SKIP_BLANKS_PE == 0) {
5688
436
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5689
436
         "Space required after '#FIXED'\n");
5690
436
  }
5691
506
    }
5692
19.9k
    ret = xmlParseAttValue(ctxt);
5693
19.9k
    if (ret == NULL) {
5694
2.18k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5695
2.18k
           "Attribute default value declaration error\n");
5696
2.18k
    } else
5697
17.7k
        *value = ret;
5698
19.9k
    return(val);
5699
20.6k
}
5700
5701
/**
5702
 * parse an Notation attribute type.
5703
 *
5704
 * @deprecated Internal function, don't use.
5705
 *
5706
 * Note: the leading 'NOTATION' S part has already being parsed...
5707
 *
5708
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5709
 *
5710
 * [ VC: Notation Attributes ]
5711
 * Values of this type must match one of the notation names included
5712
 * in the declaration; all notation names in the declaration must be declared.
5713
 *
5714
 * @param ctxt  an XML parser context
5715
 * @returns the notation attribute tree built while parsing
5716
 */
5717
5718
xmlEnumeration *
5719
2.25k
xmlParseNotationType(xmlParserCtxt *ctxt) {
5720
2.25k
    const xmlChar *name;
5721
2.25k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5722
5723
2.25k
    if (RAW != '(') {
5724
1.32k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5725
1.32k
  return(NULL);
5726
1.32k
    }
5727
1.32k
    do {
5728
1.32k
        NEXT;
5729
1.32k
  SKIP_BLANKS_PE;
5730
1.32k
        name = xmlParseName(ctxt);
5731
1.32k
  if (name == NULL) {
5732
369
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5733
369
         "Name expected in NOTATION declaration\n");
5734
369
            xmlFreeEnumeration(ret);
5735
369
      return(NULL);
5736
369
  }
5737
958
        tmp = NULL;
5738
958
#ifdef LIBXML_VALID_ENABLED
5739
958
        if (ctxt->validate) {
5740
0
            tmp = ret;
5741
0
            while (tmp != NULL) {
5742
0
                if (xmlStrEqual(name, tmp->name)) {
5743
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5744
0
              "standalone: attribute notation value token %s duplicated\n",
5745
0
                                     name, NULL);
5746
0
                    if (!xmlDictOwns(ctxt->dict, name))
5747
0
                        xmlFree((xmlChar *) name);
5748
0
                    break;
5749
0
                }
5750
0
                tmp = tmp->next;
5751
0
            }
5752
0
        }
5753
958
#endif /* LIBXML_VALID_ENABLED */
5754
958
  if (tmp == NULL) {
5755
958
      cur = xmlCreateEnumeration(name);
5756
958
      if (cur == NULL) {
5757
0
                xmlErrMemory(ctxt);
5758
0
                xmlFreeEnumeration(ret);
5759
0
                return(NULL);
5760
0
            }
5761
958
      if (last == NULL) ret = last = cur;
5762
393
      else {
5763
393
    last->next = cur;
5764
393
    last = cur;
5765
393
      }
5766
958
  }
5767
958
  SKIP_BLANKS_PE;
5768
958
    } while (RAW == '|');
5769
561
    if (RAW != ')') {
5770
381
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5771
381
        xmlFreeEnumeration(ret);
5772
381
  return(NULL);
5773
381
    }
5774
180
    NEXT;
5775
180
    return(ret);
5776
561
}
5777
5778
/**
5779
 * parse an Enumeration attribute type.
5780
 *
5781
 * @deprecated Internal function, don't use.
5782
 *
5783
 *     [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5784
 *
5785
 * [ VC: Enumeration ]
5786
 * Values of this type must match one of the Nmtoken tokens in
5787
 * the declaration
5788
 *
5789
 * @param ctxt  an XML parser context
5790
 * @returns the enumeration attribute tree built while parsing
5791
 */
5792
5793
xmlEnumeration *
5794
3.95k
xmlParseEnumerationType(xmlParserCtxt *ctxt) {
5795
3.95k
    xmlChar *name;
5796
3.95k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5797
5798
3.95k
    if (RAW != '(') {
5799
1.12k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5800
1.12k
  return(NULL);
5801
1.12k
    }
5802
5.67k
    do {
5803
5.67k
        NEXT;
5804
5.67k
  SKIP_BLANKS_PE;
5805
5.67k
        name = xmlParseNmtoken(ctxt);
5806
5.67k
  if (name == NULL) {
5807
564
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5808
564
      return(ret);
5809
564
  }
5810
5.11k
        tmp = NULL;
5811
5.11k
#ifdef LIBXML_VALID_ENABLED
5812
5.11k
        if (ctxt->validate) {
5813
0
            tmp = ret;
5814
0
            while (tmp != NULL) {
5815
0
                if (xmlStrEqual(name, tmp->name)) {
5816
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5817
0
              "standalone: attribute enumeration value token %s duplicated\n",
5818
0
                                     name, NULL);
5819
0
                    if (!xmlDictOwns(ctxt->dict, name))
5820
0
                        xmlFree(name);
5821
0
                    break;
5822
0
                }
5823
0
                tmp = tmp->next;
5824
0
            }
5825
0
        }
5826
5.11k
#endif /* LIBXML_VALID_ENABLED */
5827
5.11k
  if (tmp == NULL) {
5828
5.11k
      cur = xmlCreateEnumeration(name);
5829
5.11k
      if (!xmlDictOwns(ctxt->dict, name))
5830
5.11k
    xmlFree(name);
5831
5.11k
      if (cur == NULL) {
5832
0
                xmlErrMemory(ctxt);
5833
0
                xmlFreeEnumeration(ret);
5834
0
                return(NULL);
5835
0
            }
5836
5.11k
      if (last == NULL) ret = last = cur;
5837
2.31k
      else {
5838
2.31k
    last->next = cur;
5839
2.31k
    last = cur;
5840
2.31k
      }
5841
5.11k
  }
5842
5.11k
  SKIP_BLANKS_PE;
5843
5.11k
    } while (RAW == '|');
5844
2.26k
    if (RAW != ')') {
5845
553
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5846
553
  return(ret);
5847
553
    }
5848
1.71k
    NEXT;
5849
1.71k
    return(ret);
5850
2.26k
}
5851
5852
/**
5853
 * parse an Enumerated attribute type.
5854
 *
5855
 * @deprecated Internal function, don't use.
5856
 *
5857
 *     [57] EnumeratedType ::= NotationType | Enumeration
5858
 *
5859
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5860
 *
5861
 * @param ctxt  an XML parser context
5862
 * @param tree  the enumeration tree built while parsing
5863
 * @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5864
 */
5865
5866
int
5867
6.57k
xmlParseEnumeratedType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5868
6.57k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5869
2.61k
  SKIP(8);
5870
2.61k
  if (SKIP_BLANKS_PE == 0) {
5871
365
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5872
365
         "Space required after 'NOTATION'\n");
5873
365
      return(0);
5874
365
  }
5875
2.25k
  *tree = xmlParseNotationType(ctxt);
5876
2.25k
  if (*tree == NULL) return(0);
5877
180
  return(XML_ATTRIBUTE_NOTATION);
5878
2.25k
    }
5879
3.95k
    *tree = xmlParseEnumerationType(ctxt);
5880
3.95k
    if (*tree == NULL) return(0);
5881
2.79k
    return(XML_ATTRIBUTE_ENUMERATION);
5882
3.95k
}
5883
5884
/**
5885
 * parse the Attribute list def for an element
5886
 *
5887
 * @deprecated Internal function, don't use.
5888
 *
5889
 *     [54] AttType ::= StringType | TokenizedType | EnumeratedType
5890
 *
5891
 *     [55] StringType ::= 'CDATA'
5892
 *
5893
 *     [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5894
 *                            'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5895
 *
5896
 * Validity constraints for attribute values syntax are checked in
5897
 * #xmlValidateAttributeValue
5898
 *
5899
 * [ VC: ID ]
5900
 * Values of type ID must match the Name production. A name must not
5901
 * appear more than once in an XML document as a value of this type;
5902
 * i.e., ID values must uniquely identify the elements which bear them.
5903
 *
5904
 * [ VC: One ID per Element Type ]
5905
 * No element type may have more than one ID attribute specified.
5906
 *
5907
 * [ VC: ID Attribute Default ]
5908
 * An ID attribute must have a declared default of \#IMPLIED or \#REQUIRED.
5909
 *
5910
 * [ VC: IDREF ]
5911
 * Values of type IDREF must match the Name production, and values
5912
 * of type IDREFS must match Names; each IDREF Name must match the value
5913
 * of an ID attribute on some element in the XML document; i.e. IDREF
5914
 * values must match the value of some ID attribute.
5915
 *
5916
 * [ VC: Entity Name ]
5917
 * Values of type ENTITY must match the Name production, values
5918
 * of type ENTITIES must match Names; each Entity Name must match the
5919
 * name of an unparsed entity declared in the DTD.
5920
 *
5921
 * [ VC: Name Token ]
5922
 * Values of type NMTOKEN must match the Nmtoken production; values
5923
 * of type NMTOKENS must match Nmtokens.
5924
 *
5925
 * @param ctxt  an XML parser context
5926
 * @param tree  the enumeration tree built while parsing
5927
 * @returns the attribute type
5928
 */
5929
int
5930
26.9k
xmlParseAttributeType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5931
26.9k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5932
1.31k
  SKIP(5);
5933
1.31k
  return(XML_ATTRIBUTE_CDATA);
5934
25.6k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5935
236
  SKIP(6);
5936
236
  return(XML_ATTRIBUTE_IDREFS);
5937
25.3k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5938
469
  SKIP(5);
5939
469
  return(XML_ATTRIBUTE_IDREF);
5940
24.9k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5941
15.6k
        SKIP(2);
5942
15.6k
  return(XML_ATTRIBUTE_ID);
5943
15.6k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5944
473
  SKIP(6);
5945
473
  return(XML_ATTRIBUTE_ENTITY);
5946
8.79k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5947
1.83k
  SKIP(8);
5948
1.83k
  return(XML_ATTRIBUTE_ENTITIES);
5949
6.95k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5950
114
  SKIP(8);
5951
114
  return(XML_ATTRIBUTE_NMTOKENS);
5952
6.84k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5953
270
  SKIP(7);
5954
270
  return(XML_ATTRIBUTE_NMTOKEN);
5955
270
     }
5956
6.57k
     return(xmlParseEnumeratedType(ctxt, tree));
5957
26.9k
}
5958
5959
/**
5960
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5961
 *
5962
 * @deprecated Internal function, don't use.
5963
 *
5964
 *     [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5965
 *
5966
 *     [53] AttDef ::= S Name S AttType S DefaultDecl
5967
 * @param ctxt  an XML parser context
5968
 */
5969
void
5970
21.0k
xmlParseAttributeListDecl(xmlParserCtxt *ctxt) {
5971
21.0k
    const xmlChar *elemName;
5972
21.0k
    const xmlChar *attrName;
5973
21.0k
    xmlEnumerationPtr tree;
5974
5975
21.0k
    if ((CUR != '<') || (NXT(1) != '!'))
5976
0
        return;
5977
21.0k
    SKIP(2);
5978
5979
21.0k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5980
21.0k
#ifdef LIBXML_VALID_ENABLED
5981
21.0k
  int oldInputNr = ctxt->inputNr;
5982
21.0k
#endif
5983
5984
21.0k
  SKIP(7);
5985
21.0k
  if (SKIP_BLANKS_PE == 0) {
5986
11.0k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5987
11.0k
                     "Space required after '<!ATTLIST'\n");
5988
11.0k
  }
5989
21.0k
        elemName = xmlParseName(ctxt);
5990
21.0k
  if (elemName == NULL) {
5991
1.25k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5992
1.25k
         "ATTLIST: no name for Element\n");
5993
1.25k
      return;
5994
1.25k
  }
5995
19.7k
  SKIP_BLANKS_PE;
5996
19.7k
  GROW;
5997
38.2k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
5998
35.2k
      int type;
5999
35.2k
      int def;
6000
35.2k
      xmlChar *defaultValue = NULL;
6001
6002
35.2k
      GROW;
6003
35.2k
            tree = NULL;
6004
35.2k
      attrName = xmlParseName(ctxt);
6005
35.2k
      if (attrName == NULL) {
6006
7.38k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6007
7.38k
             "ATTLIST: no name for Attribute\n");
6008
7.38k
    break;
6009
7.38k
      }
6010
27.8k
      GROW;
6011
27.8k
      if (SKIP_BLANKS_PE == 0) {
6012
937
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6013
937
            "Space required after the attribute name\n");
6014
937
    break;
6015
937
      }
6016
6017
26.9k
      type = xmlParseAttributeType(ctxt, &tree);
6018
26.9k
      if (type <= 0) {
6019
3.59k
          break;
6020
3.59k
      }
6021
6022
23.3k
      GROW;
6023
23.3k
      if (SKIP_BLANKS_PE == 0) {
6024
2.35k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6025
2.35k
             "Space required after the attribute type\n");
6026
2.35k
          if (tree != NULL)
6027
1.11k
        xmlFreeEnumeration(tree);
6028
2.35k
    break;
6029
2.35k
      }
6030
6031
20.9k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6032
20.9k
      if (def <= 0) {
6033
0
                if (defaultValue != NULL)
6034
0
        xmlFree(defaultValue);
6035
0
          if (tree != NULL)
6036
0
        xmlFreeEnumeration(tree);
6037
0
          break;
6038
0
      }
6039
20.9k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6040
16.6k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6041
6042
20.9k
      GROW;
6043
20.9k
            if (RAW != '>') {
6044
18.4k
    if (SKIP_BLANKS_PE == 0) {
6045
2.52k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6046
2.52k
      "Space required after the attribute default value\n");
6047
2.52k
        if (defaultValue != NULL)
6048
258
      xmlFree(defaultValue);
6049
2.52k
        if (tree != NULL)
6050
727
      xmlFreeEnumeration(tree);
6051
2.52k
        break;
6052
2.52k
    }
6053
18.4k
      }
6054
18.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6055
18.4k
    (ctxt->sax->attributeDecl != NULL))
6056
12.0k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6057
12.0k
                          type, def, defaultValue, tree);
6058
6.42k
      else if (tree != NULL)
6059
262
    xmlFreeEnumeration(tree);
6060
6061
18.4k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6062
18.4k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6063
18.4k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6064
17.5k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6065
17.5k
      }
6066
18.4k
      if (ctxt->sax2) {
6067
18.4k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6068
18.4k
      }
6069
18.4k
      if (defaultValue != NULL)
6070
17.5k
          xmlFree(defaultValue);
6071
18.4k
      GROW;
6072
18.4k
  }
6073
19.7k
  if (RAW == '>') {
6074
3.06k
#ifdef LIBXML_VALID_ENABLED
6075
3.06k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6076
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6077
0
                                 "Attribute list declaration doesn't start and"
6078
0
                                 " stop in the same entity\n",
6079
0
                                 NULL, NULL);
6080
0
      }
6081
3.06k
#endif
6082
3.06k
      NEXT;
6083
3.06k
  }
6084
19.7k
    }
6085
21.0k
}
6086
6087
/**
6088
 * Handle PEs and check that we don't pop the entity that started
6089
 * a balanced group.
6090
 *
6091
 * @param ctxt  parser context
6092
 * @param openInputNr  input nr of the entity with opening '('
6093
 */
6094
static void
6095
3.63M
xmlSkipBlankCharsPEBalanced(xmlParserCtxt *ctxt, int openInputNr) {
6096
3.63M
    SKIP_BLANKS;
6097
3.63M
    GROW;
6098
6099
3.63M
    (void) openInputNr;
6100
6101
3.63M
    if (!PARSER_EXTERNAL(ctxt) && !PARSER_IN_PE(ctxt))
6102
41.8k
        return;
6103
6104
3.61M
    while (!PARSER_STOPPED(ctxt)) {
6105
3.61M
        if (ctxt->input->cur >= ctxt->input->end) {
6106
8.05k
#ifdef LIBXML_VALID_ENABLED
6107
8.05k
            if ((ctxt->validate) && (ctxt->inputNr <= openInputNr)) {
6108
0
                xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6109
0
                                 "Element content declaration doesn't start "
6110
0
                                 "and stop in the same entity\n",
6111
0
                                 NULL, NULL);
6112
0
            }
6113
8.05k
#endif
6114
8.05k
            if (PARSER_IN_PE(ctxt))
6115
7.98k
                xmlPopPE(ctxt);
6116
64
            else
6117
64
                break;
6118
3.60M
        } else if (RAW == '%') {
6119
10.2k
            xmlParsePERefInternal(ctxt, 0);
6120
3.59M
        } else {
6121
3.59M
            break;
6122
3.59M
        }
6123
6124
18.2k
        SKIP_BLANKS;
6125
18.2k
        GROW;
6126
18.2k
    }
6127
3.59M
}
6128
6129
/**
6130
 * parse the declaration for a Mixed Element content
6131
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6132
 *
6133
 * @deprecated Internal function, don't use.
6134
 *
6135
 *     [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6136
 *                    '(' S? '#PCDATA' S? ')'
6137
 *
6138
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6139
 *
6140
 * [ VC: No Duplicate Types ]
6141
 * The same name must not appear more than once in a single
6142
 * mixed-content declaration.
6143
 *
6144
 * @param ctxt  an XML parser context
6145
 * @param openInputNr  the input used for the current entity, needed for
6146
 * boundary checks
6147
 * @returns the list of the xmlElementContent describing the element choices
6148
 */
6149
xmlElementContent *
6150
1.71k
xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int openInputNr) {
6151
1.71k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6152
1.71k
    const xmlChar *elem = NULL;
6153
6154
1.71k
    GROW;
6155
1.71k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6156
1.71k
  SKIP(7);
6157
1.71k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6158
1.71k
  if (RAW == ')') {
6159
541
#ifdef LIBXML_VALID_ENABLED
6160
541
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6161
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6162
0
                                 "Element content declaration doesn't start "
6163
0
                                 "and stop in the same entity\n",
6164
0
                                 NULL, NULL);
6165
0
      }
6166
541
#endif
6167
541
      NEXT;
6168
541
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6169
541
      if (ret == NULL)
6170
0
                goto mem_error;
6171
541
      if (RAW == '*') {
6172
50
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6173
50
    NEXT;
6174
50
      }
6175
541
      return(ret);
6176
541
  }
6177
1.17k
  if ((RAW == '(') || (RAW == '|')) {
6178
644
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6179
644
      if (ret == NULL)
6180
0
                goto mem_error;
6181
644
  }
6182
2.54k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6183
1.63k
      NEXT;
6184
1.63k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6185
1.63k
            if (n == NULL)
6186
0
                goto mem_error;
6187
1.63k
      if (elem == NULL) {
6188
643
    n->c1 = cur;
6189
643
    if (cur != NULL)
6190
643
        cur->parent = n;
6191
643
    ret = cur = n;
6192
990
      } else {
6193
990
          cur->c2 = n;
6194
990
    n->parent = cur;
6195
990
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6196
990
                if (n->c1 == NULL)
6197
0
                    goto mem_error;
6198
990
    n->c1->parent = n;
6199
990
    cur = n;
6200
990
      }
6201
1.63k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6202
1.63k
      elem = xmlParseName(ctxt);
6203
1.63k
      if (elem == NULL) {
6204
259
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6205
259
      "xmlParseElementMixedContentDecl : Name expected\n");
6206
259
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6207
259
    return(NULL);
6208
259
      }
6209
1.37k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6210
1.37k
  }
6211
916
  if ((RAW == ')') && (NXT(1) == '*')) {
6212
308
      if (elem != NULL) {
6213
308
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6214
308
                                   XML_ELEMENT_CONTENT_ELEMENT);
6215
308
    if (cur->c2 == NULL)
6216
0
                    goto mem_error;
6217
308
    cur->c2->parent = cur;
6218
308
            }
6219
308
            if (ret != NULL)
6220
308
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6221
308
#ifdef LIBXML_VALID_ENABLED
6222
308
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6223
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6224
0
                                 "Element content declaration doesn't start "
6225
0
                                 "and stop in the same entity\n",
6226
0
                                 NULL, NULL);
6227
0
      }
6228
308
#endif
6229
308
      SKIP(2);
6230
608
  } else {
6231
608
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6232
608
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6233
608
      return(NULL);
6234
608
  }
6235
6236
916
    } else {
6237
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6238
0
    }
6239
308
    return(ret);
6240
6241
0
mem_error:
6242
0
    xmlErrMemory(ctxt);
6243
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6244
0
    return(NULL);
6245
1.71k
}
6246
6247
/**
6248
 * parse the declaration for a Mixed Element content
6249
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6250
 *
6251
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6252
 *
6253
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6254
 *
6255
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6256
 *
6257
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6258
 *
6259
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6260
 * TODO Parameter-entity replacement text must be properly nested
6261
 *  with parenthesized groups. That is to say, if either of the
6262
 *  opening or closing parentheses in a choice, seq, or Mixed
6263
 *  construct is contained in the replacement text for a parameter
6264
 *  entity, both must be contained in the same replacement text. For
6265
 *  interoperability, if a parameter-entity reference appears in a
6266
 *  choice, seq, or Mixed construct, its replacement text should not
6267
 *  be empty, and neither the first nor last non-blank character of
6268
 *  the replacement text should be a connector (| or ,).
6269
 *
6270
 * @param ctxt  an XML parser context
6271
 * @param openInputNr  the input used for the current entity, needed for
6272
 * boundary checks
6273
 * @param depth  the level of recursion
6274
 * @returns the tree of xmlElementContent describing the element
6275
 *          hierarchy.
6276
 */
6277
static xmlElementContentPtr
6278
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int openInputNr,
6279
981k
                                       int depth) {
6280
981k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6281
981k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6282
981k
    const xmlChar *elem;
6283
981k
    xmlChar type = 0;
6284
6285
981k
    if (depth > maxDepth) {
6286
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6287
0
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6288
0
                "use XML_PARSE_HUGE\n", depth);
6289
0
  return(NULL);
6290
0
    }
6291
981k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6292
981k
    if (RAW == '(') {
6293
149k
        int newInputNr = ctxt->inputNr;
6294
6295
        /* Recurse on first child */
6296
149k
  NEXT;
6297
149k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6298
149k
                                                           depth + 1);
6299
149k
        if (cur == NULL)
6300
148k
            return(NULL);
6301
832k
    } else {
6302
832k
  elem = xmlParseName(ctxt);
6303
832k
  if (elem == NULL) {
6304
723
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6305
723
      return(NULL);
6306
723
  }
6307
831k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6308
831k
  if (cur == NULL) {
6309
0
      xmlErrMemory(ctxt);
6310
0
      return(NULL);
6311
0
  }
6312
831k
  GROW;
6313
831k
  if (RAW == '?') {
6314
812k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6315
812k
      NEXT;
6316
812k
  } else if (RAW == '*') {
6317
2.77k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6318
2.77k
      NEXT;
6319
16.2k
  } else if (RAW == '+') {
6320
1.35k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6321
1.35k
      NEXT;
6322
14.8k
  } else {
6323
14.8k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6324
14.8k
  }
6325
831k
  GROW;
6326
831k
    }
6327
1.42M
    while (!PARSER_STOPPED(ctxt)) {
6328
1.41M
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6329
1.41M
        if (RAW == ')')
6330
193k
            break;
6331
        /*
6332
   * Each loop we parse one separator and one element.
6333
   */
6334
1.22M
        if (RAW == ',') {
6335
304
      if (type == 0) type = CUR;
6336
6337
      /*
6338
       * Detect "Name | Name , Name" error
6339
       */
6340
73
      else if (type != CUR) {
6341
2
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6342
2
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6343
2
                      type);
6344
2
    if ((last != NULL) && (last != ret))
6345
2
        xmlFreeDocElementContent(ctxt->myDoc, last);
6346
2
    if (ret != NULL)
6347
2
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6348
2
    return(NULL);
6349
2
      }
6350
302
      NEXT;
6351
6352
302
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6353
302
      if (op == NULL) {
6354
0
                xmlErrMemory(ctxt);
6355
0
    if ((last != NULL) && (last != ret))
6356
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6357
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6358
0
    return(NULL);
6359
0
      }
6360
302
      if (last == NULL) {
6361
231
    op->c1 = ret;
6362
231
    if (ret != NULL)
6363
231
        ret->parent = op;
6364
231
    ret = cur = op;
6365
231
      } else {
6366
71
          cur->c2 = op;
6367
71
    if (op != NULL)
6368
71
        op->parent = cur;
6369
71
    op->c1 = last;
6370
71
    if (last != NULL)
6371
71
        last->parent = op;
6372
71
    cur =op;
6373
71
    last = NULL;
6374
71
      }
6375
1.22M
  } else if (RAW == '|') {
6376
1.21M
      if (type == 0) type = CUR;
6377
6378
      /*
6379
       * Detect "Name , Name | Name" error
6380
       */
6381
394k
      else if (type != CUR) {
6382
1
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6383
1
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6384
1
          type);
6385
1
    if ((last != NULL) && (last != ret))
6386
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6387
1
    if (ret != NULL)
6388
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6389
1
    return(NULL);
6390
1
      }
6391
1.21M
      NEXT;
6392
6393
1.21M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6394
1.21M
      if (op == NULL) {
6395
0
                xmlErrMemory(ctxt);
6396
0
    if ((last != NULL) && (last != ret))
6397
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6398
0
    if (ret != NULL)
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6400
0
    return(NULL);
6401
0
      }
6402
1.21M
      if (last == NULL) {
6403
823k
    op->c1 = ret;
6404
823k
    if (ret != NULL)
6405
823k
        ret->parent = op;
6406
823k
    ret = cur = op;
6407
823k
      } else {
6408
394k
          cur->c2 = op;
6409
394k
    if (op != NULL)
6410
394k
        op->parent = cur;
6411
394k
    op->c1 = last;
6412
394k
    if (last != NULL)
6413
394k
        last->parent = op;
6414
394k
    cur =op;
6415
394k
    last = NULL;
6416
394k
      }
6417
1.21M
  } else {
6418
7.49k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6419
7.49k
      if ((last != NULL) && (last != ret))
6420
6.93k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6421
7.49k
      if (ret != NULL)
6422
7.49k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6423
7.49k
      return(NULL);
6424
7.49k
  }
6425
1.21M
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6426
1.21M
        if (RAW == '(') {
6427
823k
            int newInputNr = ctxt->inputNr;
6428
6429
      /* Recurse on second child */
6430
823k
      NEXT;
6431
823k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6432
823k
                                                          depth + 1);
6433
823k
            if (last == NULL) {
6434
629k
    if (ret != NULL)
6435
629k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6436
629k
    return(NULL);
6437
629k
            }
6438
823k
  } else {
6439
394k
      elem = xmlParseName(ctxt);
6440
394k
      if (elem == NULL) {
6441
533
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6442
533
    if (ret != NULL)
6443
533
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6444
533
    return(NULL);
6445
533
      }
6446
394k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6447
394k
      if (last == NULL) {
6448
0
                xmlErrMemory(ctxt);
6449
0
    if (ret != NULL)
6450
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6451
0
    return(NULL);
6452
0
      }
6453
394k
      if (RAW == '?') {
6454
30.3k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6455
30.3k
    NEXT;
6456
363k
      } else if (RAW == '*') {
6457
166k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6458
166k
    NEXT;
6459
197k
      } else if (RAW == '+') {
6460
286
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6461
286
    NEXT;
6462
196k
      } else {
6463
196k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6464
196k
      }
6465
394k
  }
6466
1.21M
    }
6467
195k
    if ((cur != NULL) && (last != NULL)) {
6468
186k
        cur->c2 = last;
6469
186k
  if (last != NULL)
6470
186k
      last->parent = cur;
6471
186k
    }
6472
195k
#ifdef LIBXML_VALID_ENABLED
6473
195k
    if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6474
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6475
0
                         "Element content declaration doesn't start "
6476
0
                         "and stop in the same entity\n",
6477
0
                         NULL, NULL);
6478
0
    }
6479
195k
#endif
6480
195k
    NEXT;
6481
195k
    if (RAW == '?') {
6482
1.40k
  if (ret != NULL) {
6483
1.40k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6484
1.40k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6485
933
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6486
468
      else
6487
468
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6488
1.40k
  }
6489
1.40k
  NEXT;
6490
193k
    } else if (RAW == '*') {
6491
187k
  if (ret != NULL) {
6492
187k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6493
187k
      cur = ret;
6494
      /*
6495
       * Some normalization:
6496
       * (a | b* | c?)* == (a | b | c)*
6497
       */
6498
703k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6499
516k
    if ((cur->c1 != NULL) &&
6500
516k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6501
516k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6502
344k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6503
516k
    if ((cur->c2 != NULL) &&
6504
516k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6505
516k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6506
181k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6507
516k
    cur = cur->c2;
6508
516k
      }
6509
187k
  }
6510
187k
  NEXT;
6511
187k
    } else if (RAW == '+') {
6512
1.81k
  if (ret != NULL) {
6513
1.81k
      int found = 0;
6514
6515
1.81k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6516
1.81k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6517
407
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6518
1.40k
      else
6519
1.40k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6520
      /*
6521
       * Some normalization:
6522
       * (a | b*)+ == (a | b)*
6523
       * (a | b?)+ == (a | b)*
6524
       */
6525
5.09k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6526
3.27k
    if ((cur->c1 != NULL) &&
6527
3.27k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6528
3.27k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6529
839
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6530
839
        found = 1;
6531
839
    }
6532
3.27k
    if ((cur->c2 != NULL) &&
6533
3.27k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6534
3.27k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6535
1.10k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6536
1.10k
        found = 1;
6537
1.10k
    }
6538
3.27k
    cur = cur->c2;
6539
3.27k
      }
6540
1.81k
      if (found)
6541
1.21k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6542
1.81k
  }
6543
1.81k
  NEXT;
6544
1.81k
    }
6545
195k
    return(ret);
6546
832k
}
6547
6548
/**
6549
 * parse the declaration for a Mixed Element content
6550
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6551
 *
6552
 * @deprecated Internal function, don't use.
6553
 *
6554
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6555
 *
6556
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6557
 *
6558
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6559
 *
6560
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6561
 *
6562
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6563
 * TODO Parameter-entity replacement text must be properly nested
6564
 *  with parenthesized groups. That is to say, if either of the
6565
 *  opening or closing parentheses in a choice, seq, or Mixed
6566
 *  construct is contained in the replacement text for a parameter
6567
 *  entity, both must be contained in the same replacement text. For
6568
 *  interoperability, if a parameter-entity reference appears in a
6569
 *  choice, seq, or Mixed construct, its replacement text should not
6570
 *  be empty, and neither the first nor last non-blank character of
6571
 *  the replacement text should be a connector (| or ,).
6572
 *
6573
 * @param ctxt  an XML parser context
6574
 * @param inputchk  the input used for the current entity, needed for boundary checks
6575
 * @returns the tree of xmlElementContent describing the element
6576
 *          hierarchy.
6577
 */
6578
xmlElementContent *
6579
0
xmlParseElementChildrenContentDecl(xmlParserCtxt *ctxt, int inputchk) {
6580
    /* stub left for API/ABI compat */
6581
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6582
0
}
6583
6584
/**
6585
 * parse the declaration for an Element content either Mixed or Children,
6586
 * the cases EMPTY and ANY are handled directly in #xmlParseElementDecl
6587
 *
6588
 * @deprecated Internal function, don't use.
6589
 *
6590
 *     [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6591
 *
6592
 * @param ctxt  an XML parser context
6593
 * @param name  the name of the element being defined.
6594
 * @param result  the Element Content pointer will be stored here if any
6595
 * @returns an xmlElementTypeVal value or -1 on error
6596
 */
6597
6598
int
6599
xmlParseElementContentDecl(xmlParserCtxt *ctxt, const xmlChar *name,
6600
11.1k
                           xmlElementContent **result) {
6601
6602
11.1k
    xmlElementContentPtr tree = NULL;
6603
11.1k
    int openInputNr = ctxt->inputNr;
6604
11.1k
    int res;
6605
6606
11.1k
    *result = NULL;
6607
6608
11.1k
    if (RAW != '(') {
6609
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6610
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6611
0
  return(-1);
6612
0
    }
6613
11.1k
    NEXT;
6614
11.1k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6615
11.1k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6616
1.71k
        tree = xmlParseElementMixedContentDecl(ctxt, openInputNr);
6617
1.71k
  res = XML_ELEMENT_TYPE_MIXED;
6618
9.44k
    } else {
6619
9.44k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, openInputNr, 1);
6620
9.44k
  res = XML_ELEMENT_TYPE_ELEMENT;
6621
9.44k
    }
6622
11.1k
    if (tree == NULL)
6623
9.62k
        return(-1);
6624
1.53k
    SKIP_BLANKS_PE;
6625
1.53k
    *result = tree;
6626
1.53k
    return(res);
6627
11.1k
}
6628
6629
/**
6630
 * Parse an element declaration. Always consumes '<!'.
6631
 *
6632
 * @deprecated Internal function, don't use.
6633
 *
6634
 *     [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6635
 *
6636
 * [ VC: Unique Element Type Declaration ]
6637
 * No element type may be declared more than once
6638
 *
6639
 * @param ctxt  an XML parser context
6640
 * @returns the type of the element, or -1 in case of error
6641
 */
6642
int
6643
19.7k
xmlParseElementDecl(xmlParserCtxt *ctxt) {
6644
19.7k
    const xmlChar *name;
6645
19.7k
    int ret = -1;
6646
19.7k
    xmlElementContentPtr content  = NULL;
6647
6648
19.7k
    if ((CUR != '<') || (NXT(1) != '!'))
6649
0
        return(ret);
6650
19.7k
    SKIP(2);
6651
6652
    /* GROW; done in the caller */
6653
19.7k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6654
19.7k
#ifdef LIBXML_VALID_ENABLED
6655
19.7k
  int oldInputNr = ctxt->inputNr;
6656
19.7k
#endif
6657
6658
19.7k
  SKIP(7);
6659
19.7k
  if (SKIP_BLANKS_PE == 0) {
6660
6.83k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6661
6.83k
               "Space required after 'ELEMENT'\n");
6662
6.83k
      return(-1);
6663
6.83k
  }
6664
12.9k
        name = xmlParseName(ctxt);
6665
12.9k
  if (name == NULL) {
6666
352
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6667
352
         "xmlParseElementDecl: no name for Element\n");
6668
352
      return(-1);
6669
352
  }
6670
12.5k
  if (SKIP_BLANKS_PE == 0) {
6671
10.4k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6672
10.4k
         "Space required after the element name\n");
6673
10.4k
  }
6674
12.5k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6675
475
      SKIP(5);
6676
      /*
6677
       * Element must always be empty.
6678
       */
6679
475
      ret = XML_ELEMENT_TYPE_EMPTY;
6680
12.1k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6681
12.1k
             (NXT(2) == 'Y')) {
6682
445
      SKIP(3);
6683
      /*
6684
       * Element is a generic container.
6685
       */
6686
445
      ret = XML_ELEMENT_TYPE_ANY;
6687
11.6k
  } else if (RAW == '(') {
6688
11.1k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6689
11.1k
            if (ret <= 0)
6690
9.62k
                return(-1);
6691
11.1k
  } else {
6692
      /*
6693
       * [ WFC: PEs in Internal Subset ] error handling.
6694
       */
6695
498
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6696
498
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6697
498
      return(-1);
6698
498
  }
6699
6700
2.45k
  SKIP_BLANKS_PE;
6701
6702
2.45k
  if (RAW != '>') {
6703
1.53k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6704
1.53k
      if (content != NULL) {
6705
1.11k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6706
1.11k
      }
6707
1.53k
  } else {
6708
918
#ifdef LIBXML_VALID_ENABLED
6709
918
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6710
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6711
0
                                 "Element declaration doesn't start and stop in"
6712
0
                                 " the same entity\n",
6713
0
                                 NULL, NULL);
6714
0
      }
6715
918
#endif
6716
6717
918
      NEXT;
6718
918
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6719
918
    (ctxt->sax->elementDecl != NULL)) {
6720
385
    if (content != NULL)
6721
205
        content->parent = NULL;
6722
385
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6723
385
                           content);
6724
385
    if ((content != NULL) && (content->parent == NULL)) {
6725
        /*
6726
         * this is a trick: if xmlAddElementDecl is called,
6727
         * instead of copying the full tree it is plugged directly
6728
         * if called from the parser. Avoid duplicating the
6729
         * interfaces or change the API/ABI
6730
         */
6731
117
        xmlFreeDocElementContent(ctxt->myDoc, content);
6732
117
    }
6733
533
      } else if (content != NULL) {
6734
218
    xmlFreeDocElementContent(ctxt->myDoc, content);
6735
218
      }
6736
918
  }
6737
2.45k
    }
6738
2.48k
    return(ret);
6739
19.7k
}
6740
6741
/**
6742
 * Parse a conditional section. Always consumes '<!['.
6743
 *
6744
 *     [61] conditionalSect ::= includeSect | ignoreSect
6745
 *     [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6746
 *     [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6747
 *     [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>'
6748
 *                                 Ignore)*
6749
 *     [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6750
 * @param ctxt  an XML parser context
6751
 */
6752
6753
static void
6754
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6755
0
    size_t depth = 0;
6756
0
    int isFreshPE = 0;
6757
0
    int oldInputNr = ctxt->inputNr;
6758
0
    int declInputNr = ctxt->inputNr;
6759
6760
0
    while (!PARSER_STOPPED(ctxt)) {
6761
0
        if (ctxt->input->cur >= ctxt->input->end) {
6762
0
            if (ctxt->inputNr <= oldInputNr) {
6763
0
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6764
0
                return;
6765
0
            }
6766
6767
0
            xmlPopPE(ctxt);
6768
0
            declInputNr = ctxt->inputNr;
6769
0
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6770
0
            SKIP(3);
6771
0
            SKIP_BLANKS_PE;
6772
6773
0
            isFreshPE = 0;
6774
6775
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6776
0
                SKIP(7);
6777
0
                SKIP_BLANKS_PE;
6778
0
                if (RAW != '[') {
6779
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6780
0
                    return;
6781
0
                }
6782
0
#ifdef LIBXML_VALID_ENABLED
6783
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6784
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6785
0
                                     "All markup of the conditional section is"
6786
0
                                     " not in the same entity\n",
6787
0
                                     NULL, NULL);
6788
0
                }
6789
0
#endif
6790
0
                NEXT;
6791
6792
0
                depth++;
6793
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6794
0
                size_t ignoreDepth = 0;
6795
6796
0
                SKIP(6);
6797
0
                SKIP_BLANKS_PE;
6798
0
                if (RAW != '[') {
6799
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6800
0
                    return;
6801
0
                }
6802
0
#ifdef LIBXML_VALID_ENABLED
6803
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6804
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6805
0
                                     "All markup of the conditional section is"
6806
0
                                     " not in the same entity\n",
6807
0
                                     NULL, NULL);
6808
0
                }
6809
0
#endif
6810
0
                NEXT;
6811
6812
0
                while (PARSER_STOPPED(ctxt) == 0) {
6813
0
                    if (RAW == 0) {
6814
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6815
0
                        return;
6816
0
                    }
6817
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6818
0
                        SKIP(3);
6819
0
                        ignoreDepth++;
6820
                        /* Check for integer overflow */
6821
0
                        if (ignoreDepth == 0) {
6822
0
                            xmlErrMemory(ctxt);
6823
0
                            return;
6824
0
                        }
6825
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6826
0
                               (NXT(2) == '>')) {
6827
0
                        SKIP(3);
6828
0
                        if (ignoreDepth == 0)
6829
0
                            break;
6830
0
                        ignoreDepth--;
6831
0
                    } else {
6832
0
                        NEXT;
6833
0
                    }
6834
0
                }
6835
6836
0
#ifdef LIBXML_VALID_ENABLED
6837
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6838
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6839
0
                                     "All markup of the conditional section is"
6840
0
                                     " not in the same entity\n",
6841
0
                                     NULL, NULL);
6842
0
                }
6843
0
#endif
6844
0
            } else {
6845
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6846
0
                return;
6847
0
            }
6848
0
        } else if ((depth > 0) &&
6849
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6850
0
            if (isFreshPE) {
6851
0
                xmlFatalErrMsg(ctxt, XML_ERR_CONDSEC_INVALID,
6852
0
                               "Parameter entity must match "
6853
0
                               "extSubsetDecl\n");
6854
0
                return;
6855
0
            }
6856
6857
0
            depth--;
6858
0
#ifdef LIBXML_VALID_ENABLED
6859
0
            if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6860
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6861
0
                                 "All markup of the conditional section is not"
6862
0
                                 " in the same entity\n",
6863
0
                                 NULL, NULL);
6864
0
            }
6865
0
#endif
6866
0
            SKIP(3);
6867
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6868
0
            isFreshPE = 0;
6869
0
            xmlParseMarkupDecl(ctxt);
6870
0
        } else if (RAW == '%') {
6871
0
            xmlParsePERefInternal(ctxt, 1);
6872
0
            if (ctxt->inputNr > declInputNr) {
6873
0
                isFreshPE = 1;
6874
0
                declInputNr = ctxt->inputNr;
6875
0
            }
6876
0
        } else {
6877
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6878
0
            return;
6879
0
        }
6880
6881
0
        if (depth == 0)
6882
0
            break;
6883
6884
0
        SKIP_BLANKS;
6885
0
        SHRINK;
6886
0
        GROW;
6887
0
    }
6888
0
}
6889
6890
/**
6891
 * Parse markup declarations. Always consumes '<!' or '<?'.
6892
 *
6893
 * @deprecated Internal function, don't use.
6894
 *
6895
 *     [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6896
 *                         NotationDecl | PI | Comment
6897
 *
6898
 * [ VC: Proper Declaration/PE Nesting ]
6899
 * Parameter-entity replacement text must be properly nested with
6900
 * markup declarations. That is to say, if either the first character
6901
 * or the last character of a markup declaration (markupdecl above) is
6902
 * contained in the replacement text for a parameter-entity reference,
6903
 * both must be contained in the same replacement text.
6904
 *
6905
 * [ WFC: PEs in Internal Subset ]
6906
 * In the internal DTD subset, parameter-entity references can occur
6907
 * only where markup declarations can occur, not within markup declarations.
6908
 * (This does not apply to references that occur in external parameter
6909
 * entities or to the external subset.)
6910
 *
6911
 * @param ctxt  an XML parser context
6912
 */
6913
void
6914
105k
xmlParseMarkupDecl(xmlParserCtxt *ctxt) {
6915
105k
    GROW;
6916
105k
    if (CUR == '<') {
6917
105k
        if (NXT(1) == '!') {
6918
84.3k
      switch (NXT(2)) {
6919
46.9k
          case 'E':
6920
46.9k
        if (NXT(3) == 'L')
6921
19.7k
      xmlParseElementDecl(ctxt);
6922
27.1k
        else if (NXT(3) == 'N')
6923
27.1k
      xmlParseEntityDecl(ctxt);
6924
13
                    else
6925
13
                        SKIP(2);
6926
46.9k
        break;
6927
21.0k
          case 'A':
6928
21.0k
        xmlParseAttributeListDecl(ctxt);
6929
21.0k
        break;
6930
7.01k
          case 'N':
6931
7.01k
        xmlParseNotationDecl(ctxt);
6932
7.01k
        break;
6933
5.65k
          case '-':
6934
5.65k
        xmlParseComment(ctxt);
6935
5.65k
        break;
6936
3.68k
    default:
6937
3.68k
                    xmlFatalErr(ctxt,
6938
3.68k
                                ctxt->inSubset == 2 ?
6939
0
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
6940
3.68k
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
6941
3.68k
                                NULL);
6942
3.68k
                    SKIP(2);
6943
3.68k
        break;
6944
84.3k
      }
6945
84.3k
  } else if (NXT(1) == '?') {
6946
21.5k
      xmlParsePI(ctxt);
6947
21.5k
  }
6948
105k
    }
6949
105k
}
6950
6951
/**
6952
 * parse an XML declaration header for external entities
6953
 *
6954
 * @deprecated Internal function, don't use.
6955
 *
6956
 *     [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6957
 * @param ctxt  an XML parser context
6958
 */
6959
6960
void
6961
0
xmlParseTextDecl(xmlParserCtxt *ctxt) {
6962
0
    xmlChar *version;
6963
6964
    /*
6965
     * We know that '<?xml' is here.
6966
     */
6967
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6968
0
  SKIP(5);
6969
0
    } else {
6970
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6971
0
  return;
6972
0
    }
6973
6974
0
    if (SKIP_BLANKS == 0) {
6975
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6976
0
           "Space needed after '<?xml'\n");
6977
0
    }
6978
6979
    /*
6980
     * We may have the VersionInfo here.
6981
     */
6982
0
    version = xmlParseVersionInfo(ctxt);
6983
0
    if (version == NULL) {
6984
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6985
0
        if (version == NULL) {
6986
0
            xmlErrMemory(ctxt);
6987
0
            return;
6988
0
        }
6989
0
    } else {
6990
0
  if (SKIP_BLANKS == 0) {
6991
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6992
0
               "Space needed here\n");
6993
0
  }
6994
0
    }
6995
0
    ctxt->input->version = version;
6996
6997
    /*
6998
     * We must have the encoding declaration
6999
     */
7000
0
    xmlParseEncodingDecl(ctxt);
7001
7002
0
    SKIP_BLANKS;
7003
0
    if ((RAW == '?') && (NXT(1) == '>')) {
7004
0
        SKIP(2);
7005
0
    } else if (RAW == '>') {
7006
        /* Deprecated old WD ... */
7007
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7008
0
  NEXT;
7009
0
    } else {
7010
0
        int c;
7011
7012
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7013
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7014
0
            NEXT;
7015
0
            if (c == '>')
7016
0
                break;
7017
0
        }
7018
0
    }
7019
0
}
7020
7021
/**
7022
 * parse Markup declarations from an external subset
7023
 *
7024
 * @deprecated Internal function, don't use.
7025
 *
7026
 *     [30] extSubset ::= textDecl? extSubsetDecl
7027
 *
7028
 *     [31] extSubsetDecl ::= (markupdecl | conditionalSect |
7029
 *                             PEReference | S) *
7030
 * @param ctxt  an XML parser context
7031
 * @param publicId  the public identifier
7032
 * @param systemId  the system identifier (URL)
7033
 */
7034
void
7035
xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
7036
0
                       const xmlChar *systemId) {
7037
0
    int oldInputNr;
7038
7039
0
    xmlCtxtInitializeLate(ctxt);
7040
7041
0
    xmlDetectEncoding(ctxt);
7042
7043
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7044
0
  xmlParseTextDecl(ctxt);
7045
0
    }
7046
0
    if (ctxt->myDoc == NULL) {
7047
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7048
0
  if (ctxt->myDoc == NULL) {
7049
0
      xmlErrMemory(ctxt);
7050
0
      return;
7051
0
  }
7052
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7053
0
    }
7054
0
    if ((ctxt->myDoc->intSubset == NULL) &&
7055
0
        (xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) {
7056
0
        xmlErrMemory(ctxt);
7057
0
    }
7058
7059
0
    ctxt->inSubset = 2;
7060
0
    oldInputNr = ctxt->inputNr;
7061
7062
0
    SKIP_BLANKS;
7063
0
    while (!PARSER_STOPPED(ctxt)) {
7064
0
        if (ctxt->input->cur >= ctxt->input->end) {
7065
0
            if (ctxt->inputNr <= oldInputNr) {
7066
0
                xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7067
0
                break;
7068
0
            }
7069
7070
0
            xmlPopPE(ctxt);
7071
0
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7072
0
            xmlParseConditionalSections(ctxt);
7073
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7074
0
            xmlParseMarkupDecl(ctxt);
7075
0
        } else if (RAW == '%') {
7076
0
            xmlParsePERefInternal(ctxt, 1);
7077
0
        } else {
7078
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7079
7080
0
            while (ctxt->inputNr > oldInputNr)
7081
0
                xmlPopPE(ctxt);
7082
0
            break;
7083
0
        }
7084
0
        SKIP_BLANKS;
7085
0
        SHRINK;
7086
0
        GROW;
7087
0
    }
7088
0
}
7089
7090
/**
7091
 * parse and handle entity references in content, depending on the SAX
7092
 * interface, this may end-up in a call to character() if this is a
7093
 * CharRef, a predefined entity, if there is no reference() callback.
7094
 * or if the parser was asked to switch to that mode.
7095
 *
7096
 * @deprecated Internal function, don't use.
7097
 *
7098
 * Always consumes '&'.
7099
 *
7100
 *     [67] Reference ::= EntityRef | CharRef
7101
 * @param ctxt  an XML parser context
7102
 */
7103
void
7104
143k
xmlParseReference(xmlParserCtxt *ctxt) {
7105
143k
    xmlEntityPtr ent = NULL;
7106
143k
    const xmlChar *name;
7107
143k
    xmlChar *val;
7108
7109
143k
    if (RAW != '&')
7110
0
        return;
7111
7112
    /*
7113
     * Simple case of a CharRef
7114
     */
7115
143k
    if (NXT(1) == '#') {
7116
34.8k
  int i = 0;
7117
34.8k
  xmlChar out[16];
7118
34.8k
  int value = xmlParseCharRef(ctxt);
7119
7120
34.8k
  if (value == 0)
7121
13.0k
      return;
7122
7123
        /*
7124
         * Just encode the value in UTF-8
7125
         */
7126
21.8k
        COPY_BUF(out, i, value);
7127
21.8k
        out[i] = 0;
7128
21.8k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7129
21.8k
            (!ctxt->disableSAX))
7130
445
            ctxt->sax->characters(ctxt->userData, out, i);
7131
21.8k
  return;
7132
34.8k
    }
7133
7134
    /*
7135
     * We are seeing an entity reference
7136
     */
7137
108k
    name = xmlParseEntityRefInternal(ctxt);
7138
108k
    if (name == NULL)
7139
57.0k
        return;
7140
51.6k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7141
51.6k
    if (ent == NULL) {
7142
        /*
7143
         * Create a reference for undeclared entities.
7144
         */
7145
31.5k
        if ((ctxt->replaceEntities == 0) &&
7146
31.5k
            (ctxt->sax != NULL) &&
7147
31.5k
            (ctxt->disableSAX == 0) &&
7148
31.5k
            (ctxt->sax->reference != NULL)) {
7149
0
            ctxt->sax->reference(ctxt->userData, name);
7150
0
        }
7151
31.5k
        return;
7152
31.5k
    }
7153
20.0k
    if (!ctxt->wellFormed)
7154
11.2k
  return;
7155
7156
    /* special case of predefined entities */
7157
8.83k
    if ((ent->name == NULL) ||
7158
8.83k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7159
1.08k
  val = ent->content;
7160
1.08k
  if (val == NULL) return;
7161
  /*
7162
   * inline the entity.
7163
   */
7164
1.08k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7165
1.08k
      (!ctxt->disableSAX))
7166
1.08k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7167
1.08k
  return;
7168
1.08k
    }
7169
7170
    /*
7171
     * Some users try to parse entities on their own and used to set
7172
     * the renamed "checked" member. Fix the flags to cover this
7173
     * case.
7174
     */
7175
7.75k
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7176
0
        ent->flags |= XML_ENT_PARSED;
7177
7178
    /*
7179
     * The first reference to the entity trigger a parsing phase
7180
     * where the ent->children is filled with the result from
7181
     * the parsing.
7182
     * Note: external parsed entities will not be loaded, it is not
7183
     * required for a non-validating parser, unless the parsing option
7184
     * of validating, or substituting entities were given. Doing so is
7185
     * far more secure as the parser will only process data coming from
7186
     * the document entity by default.
7187
     *
7188
     * FIXME: This doesn't work correctly since entities can be
7189
     * expanded with different namespace declarations in scope.
7190
     * For example:
7191
     *
7192
     * <!DOCTYPE doc [
7193
     *   <!ENTITY ent "<ns:elem/>">
7194
     * ]>
7195
     * <doc>
7196
     *   <decl1 xmlns:ns="urn:ns1">
7197
     *     &ent;
7198
     *   </decl1>
7199
     *   <decl2 xmlns:ns="urn:ns2">
7200
     *     &ent;
7201
     *   </decl2>
7202
     * </doc>
7203
     *
7204
     * Proposed fix:
7205
     *
7206
     * - Ignore current namespace declarations when parsing the
7207
     *   entity. If a prefix can't be resolved, don't report an error
7208
     *   but mark it as unresolved.
7209
     * - Try to resolve these prefixes when expanding the entity.
7210
     *   This will require a specialized version of xmlStaticCopyNode
7211
     *   which can also make use of the namespace hash table to avoid
7212
     *   quadratic behavior.
7213
     *
7214
     * Alternatively, we could simply reparse the entity on each
7215
     * expansion like we already do with custom SAX callbacks.
7216
     * External entity content should be cached in this case.
7217
     */
7218
7.75k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7219
7.75k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7220
100
         ((ctxt->replaceEntities) ||
7221
7.65k
          (ctxt->validate)))) {
7222
7.65k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7223
555
            xmlCtxtParseEntity(ctxt, ent);
7224
7.10k
        } else if (ent->children == NULL) {
7225
            /*
7226
             * Probably running in SAX mode and the callbacks don't
7227
             * build the entity content. Parse the entity again.
7228
             *
7229
             * This will also be triggered in normal tree builder mode
7230
             * if an entity happens to be empty, causing unnecessary
7231
             * reloads. It's hard to come up with a reliable check in
7232
             * which mode we're running.
7233
             */
7234
3.34k
            xmlCtxtParseEntity(ctxt, ent);
7235
3.34k
        }
7236
7.65k
    }
7237
7238
    /*
7239
     * We also check for amplification if entities aren't substituted.
7240
     * They might be expanded later.
7241
     */
7242
7.75k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7243
31
        return;
7244
7245
7.72k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7246
147
        return;
7247
7248
7.58k
    if (ctxt->replaceEntities == 0) {
7249
  /*
7250
   * Create a reference
7251
   */
7252
0
        if (ctxt->sax->reference != NULL)
7253
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7254
7.58k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7255
4.08k
        xmlNodePtr copy, cur;
7256
7257
        /*
7258
         * Seems we are generating the DOM content, copy the tree
7259
   */
7260
4.08k
        cur = ent->children;
7261
7262
        /*
7263
         * Handle first text node with SAX to coalesce text efficiently
7264
         */
7265
4.08k
        if ((cur->type == XML_TEXT_NODE) ||
7266
4.08k
            (cur->type == XML_CDATA_SECTION_NODE)) {
7267
3.18k
            int len = xmlStrlen(cur->content);
7268
7269
3.18k
            if ((cur->type == XML_TEXT_NODE) ||
7270
3.18k
                (ctxt->options & XML_PARSE_NOCDATA)) {
7271
2.90k
                if (ctxt->sax->characters != NULL)
7272
2.90k
                    ctxt->sax->characters(ctxt, cur->content, len);
7273
2.90k
            } else {
7274
280
                if (ctxt->sax->cdataBlock != NULL)
7275
280
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7276
280
            }
7277
7278
3.18k
            cur = cur->next;
7279
3.18k
        }
7280
7281
9.27k
        while (cur != NULL) {
7282
7.56k
            xmlNodePtr last;
7283
7284
            /*
7285
             * Handle last text node with SAX to coalesce text efficiently
7286
             */
7287
7.56k
            if ((cur->next == NULL) &&
7288
7.56k
                ((cur->type == XML_TEXT_NODE) ||
7289
3.00k
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7290
2.37k
                int len = xmlStrlen(cur->content);
7291
7292
2.37k
                if ((cur->type == XML_TEXT_NODE) ||
7293
2.37k
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7294
2.18k
                    if (ctxt->sax->characters != NULL)
7295
2.18k
                        ctxt->sax->characters(ctxt, cur->content, len);
7296
2.18k
                } else {
7297
191
                    if (ctxt->sax->cdataBlock != NULL)
7298
191
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7299
191
                }
7300
7301
2.37k
                break;
7302
2.37k
            }
7303
7304
            /*
7305
             * Reset coalesce buffer stats only for non-text nodes.
7306
             */
7307
5.18k
            ctxt->nodemem = 0;
7308
5.18k
            ctxt->nodelen = 0;
7309
7310
5.18k
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7311
7312
5.18k
            if (copy == NULL) {
7313
0
                xmlErrMemory(ctxt);
7314
0
                break;
7315
0
            }
7316
7317
5.18k
            if (ctxt->parseMode == XML_PARSE_READER) {
7318
                /* Needed for reader */
7319
0
                copy->extra = cur->extra;
7320
                /* Maybe needed for reader */
7321
0
                copy->_private = cur->_private;
7322
0
            }
7323
7324
5.18k
            copy->parent = ctxt->node;
7325
5.18k
            last = ctxt->node->last;
7326
5.18k
            if (last == NULL) {
7327
224
                ctxt->node->children = copy;
7328
4.95k
            } else {
7329
4.95k
                last->next = copy;
7330
4.95k
                copy->prev = last;
7331
4.95k
            }
7332
5.18k
            ctxt->node->last = copy;
7333
7334
5.18k
            cur = cur->next;
7335
5.18k
        }
7336
4.08k
    }
7337
7.58k
}
7338
7339
static void
7340
95.5k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7341
    /*
7342
     * [ WFC: Entity Declared ]
7343
     * In a document without any DTD, a document with only an
7344
     * internal DTD subset which contains no parameter entity
7345
     * references, or a document with "standalone='yes'", the
7346
     * Name given in the entity reference must match that in an
7347
     * entity declaration, except that well-formed documents
7348
     * need not declare any of the following entities: amp, lt,
7349
     * gt, apos, quot.
7350
     * The declaration of a parameter entity must precede any
7351
     * reference to it.
7352
     * Similarly, the declaration of a general entity must
7353
     * precede any reference to it which appears in a default
7354
     * value in an attribute-list declaration. Note that if
7355
     * entities are declared in the external subset or in
7356
     * external parameter entities, a non-validating processor
7357
     * is not obligated to read and process their declarations;
7358
     * for such documents, the rule that an entity must be
7359
     * declared is a well-formedness constraint only if
7360
     * standalone='yes'.
7361
     */
7362
95.5k
    if ((ctxt->standalone == 1) ||
7363
95.5k
        ((ctxt->hasExternalSubset == 0) &&
7364
95.3k
         (ctxt->hasPErefs == 0))) {
7365
77.5k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7366
77.5k
                          "Entity '%s' not defined\n", name);
7367
77.5k
#ifdef LIBXML_VALID_ENABLED
7368
77.5k
    } else if (ctxt->validate) {
7369
        /*
7370
         * [ VC: Entity Declared ]
7371
         * In a document with an external subset or external
7372
         * parameter entities with "standalone='no'", ...
7373
         * ... The declaration of a parameter entity must
7374
         * precede any reference to it...
7375
         */
7376
0
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7377
0
                         "Entity '%s' not defined\n", name, NULL);
7378
0
#endif
7379
17.9k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7380
17.9k
               ((ctxt->replaceEntities) &&
7381
17.9k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7382
        /*
7383
         * Also raise a non-fatal error
7384
         *
7385
         * - if the external subset is loaded and all entity declarations
7386
         *   should be available, or
7387
         * - entity substition was requested without restricting
7388
         *   external entity access.
7389
         */
7390
17.9k
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7391
17.9k
                     "Entity '%s' not defined\n", name);
7392
17.9k
    } else {
7393
0
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7394
0
                      "Entity '%s' not defined\n", name, NULL);
7395
0
    }
7396
7397
95.5k
    ctxt->valid = 0;
7398
95.5k
}
7399
7400
static xmlEntityPtr
7401
403k
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7402
403k
    xmlEntityPtr ent = NULL;
7403
7404
    /*
7405
     * Predefined entities override any extra definition
7406
     */
7407
403k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7408
403k
        ent = xmlGetPredefinedEntity(name);
7409
403k
        if (ent != NULL)
7410
76.0k
            return(ent);
7411
403k
    }
7412
7413
    /*
7414
     * Ask first SAX for entity resolution, otherwise try the
7415
     * entities which may have stored in the parser context.
7416
     */
7417
327k
    if (ctxt->sax != NULL) {
7418
327k
  if (ctxt->sax->getEntity != NULL)
7419
327k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7420
327k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7421
327k
      (ctxt->options & XML_PARSE_OLDSAX))
7422
0
      ent = xmlGetPredefinedEntity(name);
7423
327k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7424
327k
      (ctxt->userData==ctxt)) {
7425
3.93k
      ent = xmlSAX2GetEntity(ctxt, name);
7426
3.93k
  }
7427
327k
    }
7428
7429
327k
    if (ent == NULL) {
7430
90.9k
        xmlHandleUndeclaredEntity(ctxt, name);
7431
90.9k
    }
7432
7433
    /*
7434
     * [ WFC: Parsed Entity ]
7435
     * An entity reference must not contain the name of an
7436
     * unparsed entity
7437
     */
7438
236k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7439
181
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7440
181
     "Entity reference to unparsed entity %s\n", name);
7441
181
        ent = NULL;
7442
181
    }
7443
7444
    /*
7445
     * [ WFC: No External Entity References ]
7446
     * Attribute values cannot contain direct or indirect
7447
     * entity references to external entities.
7448
     */
7449
236k
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7450
609
        if (inAttr) {
7451
211
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7452
211
                 "Attribute references external entity '%s'\n", name);
7453
211
            ent = NULL;
7454
211
        }
7455
609
    }
7456
7457
327k
    return(ent);
7458
403k
}
7459
7460
/**
7461
 * Parse an entity reference. Always consumes '&'.
7462
 *
7463
 *     [68] EntityRef ::= '&' Name ';'
7464
 *
7465
 * @param ctxt  an XML parser context
7466
 * @returns the name, or NULL in case of error.
7467
 */
7468
static const xmlChar *
7469
269k
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7470
269k
    const xmlChar *name;
7471
7472
269k
    GROW;
7473
7474
269k
    if (RAW != '&')
7475
0
        return(NULL);
7476
269k
    NEXT;
7477
269k
    name = xmlParseName(ctxt);
7478
269k
    if (name == NULL) {
7479
55.2k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7480
55.2k
           "xmlParseEntityRef: no name\n");
7481
55.2k
        return(NULL);
7482
55.2k
    }
7483
213k
    if (RAW != ';') {
7484
36.9k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7485
36.9k
  return(NULL);
7486
36.9k
    }
7487
177k
    NEXT;
7488
7489
177k
    return(name);
7490
213k
}
7491
7492
/**
7493
 * @deprecated Internal function, don't use.
7494
 *
7495
 * @param ctxt  an XML parser context
7496
 * @returns the xmlEntity if found, or NULL otherwise.
7497
 */
7498
xmlEntity *
7499
0
xmlParseEntityRef(xmlParserCtxt *ctxt) {
7500
0
    const xmlChar *name;
7501
7502
0
    if (ctxt == NULL)
7503
0
        return(NULL);
7504
7505
0
    name = xmlParseEntityRefInternal(ctxt);
7506
0
    if (name == NULL)
7507
0
        return(NULL);
7508
7509
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7510
0
}
7511
7512
/**
7513
 * parse ENTITY references declarations, but this version parses it from
7514
 * a string value.
7515
 *
7516
 *     [68] EntityRef ::= '&' Name ';'
7517
 *
7518
 * [ WFC: Entity Declared ]
7519
 * In a document without any DTD, a document with only an internal DTD
7520
 * subset which contains no parameter entity references, or a document
7521
 * with "standalone='yes'", the Name given in the entity reference
7522
 * must match that in an entity declaration, except that well-formed
7523
 * documents need not declare any of the following entities: amp, lt,
7524
 * gt, apos, quot.  The declaration of a parameter entity must precede
7525
 * any reference to it.  Similarly, the declaration of a general entity
7526
 * must precede any reference to it which appears in a default value in an
7527
 * attribute-list declaration. Note that if entities are declared in the
7528
 * external subset or in external parameter entities, a non-validating
7529
 * processor is not obligated to read and process their declarations;
7530
 * for such documents, the rule that an entity must be declared is a
7531
 * well-formedness constraint only if standalone='yes'.
7532
 *
7533
 * [ WFC: Parsed Entity ]
7534
 * An entity reference must not contain the name of an unparsed entity
7535
 *
7536
 * @param ctxt  an XML parser context
7537
 * @param str  a pointer to an index in the string
7538
 * @returns the xmlEntity if found, or NULL otherwise. The str pointer
7539
 * is updated to the current location in the string.
7540
 */
7541
static xmlChar *
7542
226k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7543
226k
    xmlChar *name;
7544
226k
    const xmlChar *ptr;
7545
226k
    xmlChar cur;
7546
7547
226k
    if ((str == NULL) || (*str == NULL))
7548
0
        return(NULL);
7549
226k
    ptr = *str;
7550
226k
    cur = *ptr;
7551
226k
    if (cur != '&')
7552
0
  return(NULL);
7553
7554
226k
    ptr++;
7555
226k
    name = xmlParseStringName(ctxt, &ptr);
7556
226k
    if (name == NULL) {
7557
6
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7558
6
           "xmlParseStringEntityRef: no name\n");
7559
6
  *str = ptr;
7560
6
  return(NULL);
7561
6
    }
7562
226k
    if (*ptr != ';') {
7563
5
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7564
5
        xmlFree(name);
7565
5
  *str = ptr;
7566
5
  return(NULL);
7567
5
    }
7568
226k
    ptr++;
7569
7570
226k
    *str = ptr;
7571
226k
    return(name);
7572
226k
}
7573
7574
/**
7575
 * Parse a parameter entity reference. Always consumes '%'.
7576
 *
7577
 * The entity content is handled directly by pushing it's content as
7578
 * a new input stream.
7579
 *
7580
 *     [69] PEReference ::= '%' Name ';'
7581
 *
7582
 * [ WFC: No Recursion ]
7583
 * A parsed entity must not contain a recursive
7584
 * reference to itself, either directly or indirectly.
7585
 *
7586
 * [ WFC: Entity Declared ]
7587
 * In a document without any DTD, a document with only an internal DTD
7588
 * subset which contains no parameter entity references, or a document
7589
 * with "standalone='yes'", ...  ... The declaration of a parameter
7590
 * entity must precede any reference to it...
7591
 *
7592
 * [ VC: Entity Declared ]
7593
 * In a document with an external subset or external parameter entities
7594
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7595
 * must precede any reference to it...
7596
 *
7597
 * [ WFC: In DTD ]
7598
 * Parameter-entity references may only appear in the DTD.
7599
 * NOTE: misleading but this is handled.
7600
 *
7601
 * @param ctxt  an XML parser context
7602
 * @param markupDecl  whether the PERef starts a markup declaration
7603
 */
7604
static void
7605
40.2k
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl) {
7606
40.2k
    const xmlChar *name;
7607
40.2k
    xmlEntityPtr entity = NULL;
7608
40.2k
    xmlParserInputPtr input;
7609
7610
40.2k
    if (RAW != '%')
7611
0
        return;
7612
40.2k
    NEXT;
7613
40.2k
    name = xmlParseName(ctxt);
7614
40.2k
    if (name == NULL) {
7615
2.86k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7616
2.86k
  return;
7617
2.86k
    }
7618
37.4k
    if (RAW != ';') {
7619
3.64k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7620
3.64k
        return;
7621
3.64k
    }
7622
7623
33.7k
    NEXT;
7624
7625
    /* Must be set before xmlHandleUndeclaredEntity */
7626
33.7k
    ctxt->hasPErefs = 1;
7627
7628
    /*
7629
     * Request the entity from SAX
7630
     */
7631
33.7k
    if ((ctxt->sax != NULL) &&
7632
33.7k
  (ctxt->sax->getParameterEntity != NULL))
7633
33.7k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7634
7635
33.7k
    if (entity == NULL) {
7636
4.17k
        xmlHandleUndeclaredEntity(ctxt, name);
7637
29.6k
    } else {
7638
  /*
7639
   * Internal checking in case the entity quest barfed
7640
   */
7641
29.6k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7642
29.6k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7643
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7644
0
      "Internal: %%%s; is not a parameter entity\n",
7645
0
        name, NULL);
7646
29.6k
  } else {
7647
29.6k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7648
29.6k
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7649
371
     ((ctxt->loadsubset == 0) &&
7650
0
      (ctxt->replaceEntities == 0) &&
7651
0
      (ctxt->validate == 0))))
7652
371
    return;
7653
7654
29.2k
            if (entity->flags & XML_ENT_EXPANDING) {
7655
2
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7656
2
                xmlHaltParser(ctxt);
7657
2
                return;
7658
2
            }
7659
7660
29.2k
      input = xmlNewEntityInputStream(ctxt, entity);
7661
29.2k
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7662
0
                xmlFreeInputStream(input);
7663
0
    return;
7664
0
            }
7665
7666
29.2k
            entity->flags |= XML_ENT_EXPANDING;
7667
7668
29.2k
            if (markupDecl)
7669
22.0k
                input->flags |= XML_INPUT_MARKUP_DECL;
7670
7671
29.2k
            GROW;
7672
7673
29.2k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7674
0
                xmlDetectEncoding(ctxt);
7675
7676
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7677
0
                    (IS_BLANK_CH(NXT(5)))) {
7678
0
                    xmlParseTextDecl(ctxt);
7679
0
                }
7680
0
            }
7681
29.2k
  }
7682
29.6k
    }
7683
33.7k
}
7684
7685
/**
7686
 * Parse a parameter entity reference.
7687
 *
7688
 * @deprecated Internal function, don't use.
7689
 *
7690
 * @param ctxt  an XML parser context
7691
 */
7692
void
7693
0
xmlParsePEReference(xmlParserCtxt *ctxt) {
7694
0
    xmlParsePERefInternal(ctxt, 0);
7695
0
}
7696
7697
/**
7698
 * Load the content of an entity.
7699
 *
7700
 * @param ctxt  an XML parser context
7701
 * @param entity  an unloaded system entity
7702
 * @returns 0 in case of success and -1 in case of failure
7703
 */
7704
static int
7705
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7706
0
    xmlParserInputPtr oldinput, input = NULL;
7707
0
    xmlParserInputPtr *oldinputTab;
7708
0
    const xmlChar *oldencoding;
7709
0
    xmlChar *content = NULL;
7710
0
    xmlResourceType rtype;
7711
0
    size_t length, i;
7712
0
    int oldinputNr, oldinputMax;
7713
0
    int ret = -1;
7714
0
    int res;
7715
7716
0
    if ((ctxt == NULL) || (entity == NULL) ||
7717
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7718
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7719
0
  (entity->content != NULL)) {
7720
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7721
0
              "xmlLoadEntityContent parameter error");
7722
0
        return(-1);
7723
0
    }
7724
7725
0
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7726
0
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7727
0
    else
7728
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7729
7730
0
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7731
0
                            (char *) entity->ExternalID, rtype);
7732
0
    if (input == NULL)
7733
0
        return(-1);
7734
7735
0
    oldinput = ctxt->input;
7736
0
    oldinputNr = ctxt->inputNr;
7737
0
    oldinputMax = ctxt->inputMax;
7738
0
    oldinputTab = ctxt->inputTab;
7739
0
    oldencoding = ctxt->encoding;
7740
7741
0
    ctxt->input = NULL;
7742
0
    ctxt->inputNr = 0;
7743
0
    ctxt->inputMax = 1;
7744
0
    ctxt->encoding = NULL;
7745
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7746
0
    if (ctxt->inputTab == NULL) {
7747
0
        xmlErrMemory(ctxt);
7748
0
        xmlFreeInputStream(input);
7749
0
        goto error;
7750
0
    }
7751
7752
0
    xmlBufResetInput(input->buf->buffer, input);
7753
7754
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7755
0
        xmlFreeInputStream(input);
7756
0
        goto error;
7757
0
    }
7758
7759
0
    xmlDetectEncoding(ctxt);
7760
7761
    /*
7762
     * Parse a possible text declaration first
7763
     */
7764
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7765
0
  xmlParseTextDecl(ctxt);
7766
        /*
7767
         * An XML-1.0 document can't reference an entity not XML-1.0
7768
         */
7769
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7770
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7771
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7772
0
                           "Version mismatch between document and entity\n");
7773
0
        }
7774
0
    }
7775
7776
0
    length = input->cur - input->base;
7777
0
    xmlBufShrink(input->buf->buffer, length);
7778
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7779
7780
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7781
0
        ;
7782
7783
0
    xmlBufResetInput(input->buf->buffer, input);
7784
7785
0
    if (res < 0) {
7786
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7787
0
        goto error;
7788
0
    }
7789
7790
0
    length = xmlBufUse(input->buf->buffer);
7791
0
    if (length > INT_MAX) {
7792
0
        xmlErrMemory(ctxt);
7793
0
        goto error;
7794
0
    }
7795
7796
0
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
7797
0
    if (content == NULL) {
7798
0
        xmlErrMemory(ctxt);
7799
0
        goto error;
7800
0
    }
7801
7802
0
    for (i = 0; i < length; ) {
7803
0
        int clen = length - i;
7804
0
        int c = xmlGetUTF8Char(content + i, &clen);
7805
7806
0
        if ((c < 0) || (!IS_CHAR(c))) {
7807
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7808
0
                              "xmlLoadEntityContent: invalid char value %d\n",
7809
0
                              content[i]);
7810
0
            goto error;
7811
0
        }
7812
0
        i += clen;
7813
0
    }
7814
7815
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7816
0
    entity->content = content;
7817
0
    entity->length = length;
7818
0
    content = NULL;
7819
0
    ret = 0;
7820
7821
0
error:
7822
0
    while (ctxt->inputNr > 0)
7823
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
7824
0
    xmlFree(ctxt->inputTab);
7825
0
    xmlFree((xmlChar *) ctxt->encoding);
7826
7827
0
    ctxt->input = oldinput;
7828
0
    ctxt->inputNr = oldinputNr;
7829
0
    ctxt->inputMax = oldinputMax;
7830
0
    ctxt->inputTab = oldinputTab;
7831
0
    ctxt->encoding = oldencoding;
7832
7833
0
    xmlFree(content);
7834
7835
0
    return(ret);
7836
0
}
7837
7838
/**
7839
 * parse PEReference declarations
7840
 *
7841
 *     [69] PEReference ::= '%' Name ';'
7842
 *
7843
 * [ WFC: No Recursion ]
7844
 * A parsed entity must not contain a recursive
7845
 * reference to itself, either directly or indirectly.
7846
 *
7847
 * [ WFC: Entity Declared ]
7848
 * In a document without any DTD, a document with only an internal DTD
7849
 * subset which contains no parameter entity references, or a document
7850
 * with "standalone='yes'", ...  ... The declaration of a parameter
7851
 * entity must precede any reference to it...
7852
 *
7853
 * [ VC: Entity Declared ]
7854
 * In a document with an external subset or external parameter entities
7855
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7856
 * must precede any reference to it...
7857
 *
7858
 * [ WFC: In DTD ]
7859
 * Parameter-entity references may only appear in the DTD.
7860
 * NOTE: misleading but this is handled.
7861
 *
7862
 * @param ctxt  an XML parser context
7863
 * @param str  a pointer to an index in the string
7864
 * @returns the string of the entity content.
7865
 *         str is updated to the current value of the index
7866
 */
7867
static xmlEntityPtr
7868
1.75k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7869
1.75k
    const xmlChar *ptr;
7870
1.75k
    xmlChar cur;
7871
1.75k
    xmlChar *name;
7872
1.75k
    xmlEntityPtr entity = NULL;
7873
7874
1.75k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7875
1.75k
    ptr = *str;
7876
1.75k
    cur = *ptr;
7877
1.75k
    if (cur != '%')
7878
0
        return(NULL);
7879
1.75k
    ptr++;
7880
1.75k
    name = xmlParseStringName(ctxt, &ptr);
7881
1.75k
    if (name == NULL) {
7882
320
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7883
320
           "xmlParseStringPEReference: no name\n");
7884
320
  *str = ptr;
7885
320
  return(NULL);
7886
320
    }
7887
1.43k
    cur = *ptr;
7888
1.43k
    if (cur != ';') {
7889
555
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7890
555
  xmlFree(name);
7891
555
  *str = ptr;
7892
555
  return(NULL);
7893
555
    }
7894
877
    ptr++;
7895
7896
    /* Must be set before xmlHandleUndeclaredEntity */
7897
877
    ctxt->hasPErefs = 1;
7898
7899
    /*
7900
     * Request the entity from SAX
7901
     */
7902
877
    if ((ctxt->sax != NULL) &&
7903
877
  (ctxt->sax->getParameterEntity != NULL))
7904
877
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7905
7906
877
    if (entity == NULL) {
7907
477
        xmlHandleUndeclaredEntity(ctxt, name);
7908
477
    } else {
7909
  /*
7910
   * Internal checking in case the entity quest barfed
7911
   */
7912
400
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7913
400
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7914
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7915
0
        "%%%s; is not a parameter entity\n",
7916
0
        name, NULL);
7917
0
  }
7918
400
    }
7919
7920
877
    xmlFree(name);
7921
877
    *str = ptr;
7922
877
    return(entity);
7923
1.43k
}
7924
7925
/**
7926
 * parse a DOCTYPE declaration
7927
 *
7928
 * @deprecated Internal function, don't use.
7929
 *
7930
 *     [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7931
 *                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7932
 *
7933
 * [ VC: Root Element Type ]
7934
 * The Name in the document type declaration must match the element
7935
 * type of the root element.
7936
 *
7937
 * @param ctxt  an XML parser context
7938
 */
7939
7940
void
7941
6.30k
xmlParseDocTypeDecl(xmlParserCtxt *ctxt) {
7942
6.30k
    const xmlChar *name = NULL;
7943
6.30k
    xmlChar *publicId = NULL;
7944
6.30k
    xmlChar *URI = NULL;
7945
7946
    /*
7947
     * We know that '<!DOCTYPE' has been detected.
7948
     */
7949
6.30k
    SKIP(9);
7950
7951
6.30k
    if (SKIP_BLANKS == 0) {
7952
3.33k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7953
3.33k
                       "Space required after 'DOCTYPE'\n");
7954
3.33k
    }
7955
7956
    /*
7957
     * Parse the DOCTYPE name.
7958
     */
7959
6.30k
    name = xmlParseName(ctxt);
7960
6.30k
    if (name == NULL) {
7961
1.61k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7962
1.61k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7963
1.61k
    }
7964
6.30k
    ctxt->intSubName = name;
7965
7966
6.30k
    SKIP_BLANKS;
7967
7968
    /*
7969
     * Check for public and system identifier (URI)
7970
     */
7971
6.30k
    URI = xmlParseExternalID(ctxt, &publicId, 1);
7972
7973
6.30k
    if ((URI != NULL) || (publicId != NULL)) {
7974
315
        ctxt->hasExternalSubset = 1;
7975
315
    }
7976
6.30k
    ctxt->extSubURI = URI;
7977
6.30k
    ctxt->extSubSystem = publicId;
7978
7979
6.30k
    SKIP_BLANKS;
7980
7981
    /*
7982
     * Create and update the internal subset.
7983
     */
7984
6.30k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7985
6.30k
  (!ctxt->disableSAX))
7986
2.82k
  ctxt->sax->internalSubset(ctxt->userData, name, publicId, URI);
7987
7988
6.30k
    if ((RAW != '[') && (RAW != '>')) {
7989
197
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7990
197
    }
7991
6.30k
}
7992
7993
/**
7994
 * parse the internal subset declaration
7995
 *
7996
 *     [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7997
 * @param ctxt  an XML parser context
7998
 */
7999
8000
static void
8001
6.00k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8002
    /*
8003
     * Is there any DTD definition ?
8004
     */
8005
6.00k
    if (RAW == '[') {
8006
6.00k
        int oldInputNr = ctxt->inputNr;
8007
8008
6.00k
        NEXT;
8009
  /*
8010
   * Parse the succession of Markup declarations and
8011
   * PEReferences.
8012
   * Subsequence (markupdecl | PEReference | S)*
8013
   */
8014
6.00k
  SKIP_BLANKS;
8015
163k
        while (1) {
8016
163k
            if (PARSER_STOPPED(ctxt)) {
8017
715
                return;
8018
162k
            } else if (ctxt->input->cur >= ctxt->input->end) {
8019
22.3k
                if (ctxt->inputNr <= oldInputNr) {
8020
1.22k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8021
1.22k
                    return;
8022
1.22k
                }
8023
21.1k
                xmlPopPE(ctxt);
8024
140k
            } else if ((RAW == ']') && (ctxt->inputNr <= oldInputNr)) {
8025
1.97k
                NEXT;
8026
1.97k
                SKIP_BLANKS;
8027
1.97k
                break;
8028
138k
            } else if ((PARSER_EXTERNAL(ctxt)) &&
8029
138k
                       (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8030
                /*
8031
                 * Conditional sections are allowed in external entities
8032
                 * included by PE References in the internal subset.
8033
                 */
8034
0
                xmlParseConditionalSections(ctxt);
8035
138k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8036
105k
                xmlParseMarkupDecl(ctxt);
8037
105k
            } else if (RAW == '%') {
8038
30.0k
                xmlParsePERefInternal(ctxt, 1);
8039
30.0k
            } else {
8040
2.09k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8041
8042
2.16k
                while (ctxt->inputNr > oldInputNr)
8043
66
                    xmlPopPE(ctxt);
8044
2.09k
                return;
8045
2.09k
            }
8046
157k
            SKIP_BLANKS;
8047
157k
            SHRINK;
8048
157k
            GROW;
8049
157k
        }
8050
6.00k
    }
8051
8052
    /*
8053
     * We should be at the end of the DOCTYPE declaration.
8054
     */
8055
1.97k
    if (RAW != '>') {
8056
148
        xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8057
148
        return;
8058
148
    }
8059
1.82k
    NEXT;
8060
1.82k
}
8061
8062
#ifdef LIBXML_SAX1_ENABLED
8063
/**
8064
 * parse an attribute
8065
 *
8066
 * @deprecated Internal function, don't use.
8067
 *
8068
 *     [41] Attribute ::= Name Eq AttValue
8069
 *
8070
 * [ WFC: No External Entity References ]
8071
 * Attribute values cannot contain direct or indirect entity references
8072
 * to external entities.
8073
 *
8074
 * [ WFC: No < in Attribute Values ]
8075
 * The replacement text of any entity referred to directly or indirectly in
8076
 * an attribute value (other than "&lt;") must not contain a <.
8077
 *
8078
 * [ VC: Attribute Value Type ]
8079
 * The attribute must have been declared; the value must be of the type
8080
 * declared for it.
8081
 *
8082
 *     [25] Eq ::= S? '=' S?
8083
 *
8084
 * With namespace:
8085
 *
8086
 *     [NS 11] Attribute ::= QName Eq AttValue
8087
 *
8088
 * Also the case QName == xmlns:??? is handled independently as a namespace
8089
 * definition.
8090
 *
8091
 * @param ctxt  an XML parser context
8092
 * @param value  a xmlChar ** used to store the value of the attribute
8093
 * @returns the attribute name, and the value in *value.
8094
 */
8095
8096
const xmlChar *
8097
0
xmlParseAttribute(xmlParserCtxt *ctxt, xmlChar **value) {
8098
0
    const xmlChar *name;
8099
0
    xmlChar *val;
8100
8101
0
    *value = NULL;
8102
0
    GROW;
8103
0
    name = xmlParseName(ctxt);
8104
0
    if (name == NULL) {
8105
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8106
0
                 "error parsing attribute name\n");
8107
0
        return(NULL);
8108
0
    }
8109
8110
    /*
8111
     * read the value
8112
     */
8113
0
    SKIP_BLANKS;
8114
0
    if (RAW == '=') {
8115
0
        NEXT;
8116
0
  SKIP_BLANKS;
8117
0
  val = xmlParseAttValue(ctxt);
8118
0
    } else {
8119
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8120
0
         "Specification mandates value for attribute %s\n", name);
8121
0
  return(name);
8122
0
    }
8123
8124
    /*
8125
     * Check that xml:lang conforms to the specification
8126
     * No more registered as an error, just generate a warning now
8127
     * since this was deprecated in XML second edition
8128
     */
8129
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8130
0
  if (!xmlCheckLanguageID(val)) {
8131
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8132
0
              "Malformed value for xml:lang : %s\n",
8133
0
        val, NULL);
8134
0
  }
8135
0
    }
8136
8137
    /*
8138
     * Check that xml:space conforms to the specification
8139
     */
8140
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8141
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8142
0
      *(ctxt->space) = 0;
8143
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8144
0
      *(ctxt->space) = 1;
8145
0
  else {
8146
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8147
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8148
0
                                 val, NULL);
8149
0
  }
8150
0
    }
8151
8152
0
    *value = val;
8153
0
    return(name);
8154
0
}
8155
8156
/**
8157
 * Parse a start tag. Always consumes '<'.
8158
 *
8159
 * @deprecated Internal function, don't use.
8160
 *
8161
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8162
 *
8163
 * [ WFC: Unique Att Spec ]
8164
 * No attribute name may appear more than once in the same start-tag or
8165
 * empty-element tag.
8166
 *
8167
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8168
 *
8169
 * [ WFC: Unique Att Spec ]
8170
 * No attribute name may appear more than once in the same start-tag or
8171
 * empty-element tag.
8172
 *
8173
 * With namespace:
8174
 *
8175
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8176
 *
8177
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8178
 *
8179
 * @param ctxt  an XML parser context
8180
 * @returns the element name parsed
8181
 */
8182
8183
const xmlChar *
8184
0
xmlParseStartTag(xmlParserCtxt *ctxt) {
8185
0
    const xmlChar *name;
8186
0
    const xmlChar *attname;
8187
0
    xmlChar *attvalue;
8188
0
    const xmlChar **atts = ctxt->atts;
8189
0
    int nbatts = 0;
8190
0
    int maxatts = ctxt->maxatts;
8191
0
    int i;
8192
8193
0
    if (RAW != '<') return(NULL);
8194
0
    NEXT1;
8195
8196
0
    name = xmlParseName(ctxt);
8197
0
    if (name == NULL) {
8198
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8199
0
       "xmlParseStartTag: invalid element name\n");
8200
0
        return(NULL);
8201
0
    }
8202
8203
    /*
8204
     * Now parse the attributes, it ends up with the ending
8205
     *
8206
     * (S Attribute)* S?
8207
     */
8208
0
    SKIP_BLANKS;
8209
0
    GROW;
8210
8211
0
    while (((RAW != '>') &&
8212
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8213
0
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8214
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8215
0
        if (attname == NULL)
8216
0
      break;
8217
0
        if (attvalue != NULL) {
8218
      /*
8219
       * [ WFC: Unique Att Spec ]
8220
       * No attribute name may appear more than once in the same
8221
       * start-tag or empty-element tag.
8222
       */
8223
0
      for (i = 0; i < nbatts;i += 2) {
8224
0
          if (xmlStrEqual(atts[i], attname)) {
8225
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8226
0
        goto failed;
8227
0
    }
8228
0
      }
8229
      /*
8230
       * Add the pair to atts
8231
       */
8232
0
      if (nbatts + 4 > maxatts) {
8233
0
          const xmlChar **n;
8234
0
                int newSize;
8235
8236
0
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8237
0
                                          11, XML_MAX_ATTRS);
8238
0
                if (newSize < 0) {
8239
0
        xmlErrMemory(ctxt);
8240
0
        goto failed;
8241
0
    }
8242
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8243
0
                if (newSize < 2)
8244
0
                    newSize = 2;
8245
0
#endif
8246
0
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8247
0
    if (n == NULL) {
8248
0
        xmlErrMemory(ctxt);
8249
0
        goto failed;
8250
0
    }
8251
0
    atts = n;
8252
0
                maxatts = newSize * 2;
8253
0
    ctxt->atts = atts;
8254
0
    ctxt->maxatts = maxatts;
8255
0
      }
8256
8257
0
      atts[nbatts++] = attname;
8258
0
      atts[nbatts++] = attvalue;
8259
0
      atts[nbatts] = NULL;
8260
0
      atts[nbatts + 1] = NULL;
8261
8262
0
            attvalue = NULL;
8263
0
  }
8264
8265
0
failed:
8266
8267
0
        if (attvalue != NULL)
8268
0
            xmlFree(attvalue);
8269
8270
0
  GROW
8271
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8272
0
      break;
8273
0
  if (SKIP_BLANKS == 0) {
8274
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8275
0
         "attributes construct error\n");
8276
0
  }
8277
0
  SHRINK;
8278
0
        GROW;
8279
0
    }
8280
8281
    /*
8282
     * SAX: Start of Element !
8283
     */
8284
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8285
0
  (!ctxt->disableSAX)) {
8286
0
  if (nbatts > 0)
8287
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8288
0
  else
8289
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8290
0
    }
8291
8292
0
    if (atts != NULL) {
8293
        /* Free only the content strings */
8294
0
        for (i = 1;i < nbatts;i+=2)
8295
0
      if (atts[i] != NULL)
8296
0
         xmlFree((xmlChar *) atts[i]);
8297
0
    }
8298
0
    return(name);
8299
0
}
8300
8301
/**
8302
 * Parse an end tag. Always consumes '</'.
8303
 *
8304
 *     [42] ETag ::= '</' Name S? '>'
8305
 *
8306
 * With namespace
8307
 *
8308
 *     [NS 9] ETag ::= '</' QName S? '>'
8309
 * @param ctxt  an XML parser context
8310
 * @param line  line of the start tag
8311
 */
8312
8313
static void
8314
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8315
0
    const xmlChar *name;
8316
8317
0
    GROW;
8318
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8319
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8320
0
           "xmlParseEndTag: '</' not found\n");
8321
0
  return;
8322
0
    }
8323
0
    SKIP(2);
8324
8325
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8326
8327
    /*
8328
     * We should definitely be at the ending "S? '>'" part
8329
     */
8330
0
    GROW;
8331
0
    SKIP_BLANKS;
8332
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8333
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8334
0
    } else
8335
0
  NEXT1;
8336
8337
    /*
8338
     * [ WFC: Element Type Match ]
8339
     * The Name in an element's end-tag must match the element type in the
8340
     * start-tag.
8341
     *
8342
     */
8343
0
    if (name != (xmlChar*)1) {
8344
0
        if (name == NULL) name = BAD_CAST "unparsable";
8345
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8346
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8347
0
                    ctxt->name, line, name);
8348
0
    }
8349
8350
    /*
8351
     * SAX: End of Tag
8352
     */
8353
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8354
0
  (!ctxt->disableSAX))
8355
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8356
8357
0
    namePop(ctxt);
8358
0
    spacePop(ctxt);
8359
0
}
8360
8361
/**
8362
 * parse an end of tag
8363
 *
8364
 * @deprecated Internal function, don't use.
8365
 *
8366
 *     [42] ETag ::= '</' Name S? '>'
8367
 *
8368
 * With namespace
8369
 *
8370
 *     [NS 9] ETag ::= '</' QName S? '>'
8371
 * @param ctxt  an XML parser context
8372
 */
8373
8374
void
8375
0
xmlParseEndTag(xmlParserCtxt *ctxt) {
8376
0
    xmlParseEndTag1(ctxt, 0);
8377
0
}
8378
#endif /* LIBXML_SAX1_ENABLED */
8379
8380
/************************************************************************
8381
 *                  *
8382
 *          SAX 2 specific operations       *
8383
 *                  *
8384
 ************************************************************************/
8385
8386
/**
8387
 * parse an XML Namespace QName
8388
 *
8389
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8390
 *     [7]  Prefix  ::= NCName
8391
 *     [8]  LocalPart  ::= NCName
8392
 *
8393
 * @param ctxt  an XML parser context
8394
 * @param prefix  pointer to store the prefix part
8395
 * @returns the Name parsed or NULL
8396
 */
8397
8398
static xmlHashedString
8399
2.29M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8400
2.29M
    xmlHashedString l, p;
8401
2.29M
    int start, isNCName = 0;
8402
8403
2.29M
    l.name = NULL;
8404
2.29M
    p.name = NULL;
8405
8406
2.29M
    GROW;
8407
2.29M
    start = CUR_PTR - BASE_PTR;
8408
8409
2.29M
    l = xmlParseNCName(ctxt);
8410
2.29M
    if (l.name != NULL) {
8411
1.17M
        isNCName = 1;
8412
1.17M
        if (CUR == ':') {
8413
106k
            NEXT;
8414
106k
            p = l;
8415
106k
            l = xmlParseNCName(ctxt);
8416
106k
        }
8417
1.17M
    }
8418
2.29M
    if ((l.name == NULL) || (CUR == ':')) {
8419
1.13M
        xmlChar *tmp;
8420
8421
1.13M
        l.name = NULL;
8422
1.13M
        p.name = NULL;
8423
1.13M
        if ((isNCName == 0) && (CUR != ':'))
8424
1.10M
            return(l);
8425
28.7k
        tmp = xmlParseNmtoken(ctxt);
8426
28.7k
        if (tmp != NULL)
8427
15.6k
            xmlFree(tmp);
8428
28.7k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8429
28.7k
                                CUR_PTR - (BASE_PTR + start));
8430
28.7k
        if (l.name == NULL) {
8431
0
            xmlErrMemory(ctxt);
8432
0
            return(l);
8433
0
        }
8434
28.7k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8435
28.7k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8436
28.7k
    }
8437
8438
1.18M
    *prefix = p;
8439
1.18M
    return(l);
8440
2.29M
}
8441
8442
/**
8443
 * parse an XML Namespace QName
8444
 *
8445
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8446
 *     [7]  Prefix  ::= NCName
8447
 *     [8]  LocalPart  ::= NCName
8448
 *
8449
 * @param ctxt  an XML parser context
8450
 * @param prefix  pointer to store the prefix part
8451
 * @returns the Name parsed or NULL
8452
 */
8453
8454
static const xmlChar *
8455
8.91k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8456
8.91k
    xmlHashedString n, p;
8457
8458
8.91k
    n = xmlParseQNameHashed(ctxt, &p);
8459
8.91k
    if (n.name == NULL)
8460
4.12k
        return(NULL);
8461
4.78k
    *prefix = p.name;
8462
4.78k
    return(n.name);
8463
8.91k
}
8464
8465
/**
8466
 * parse an XML name and compares for match
8467
 * (specialized for endtag parsing)
8468
 *
8469
 * @param ctxt  an XML parser context
8470
 * @param name  the localname
8471
 * @param prefix  the prefix, if any.
8472
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
8473
 * and the name for mismatch
8474
 */
8475
8476
static const xmlChar *
8477
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8478
12.1k
                        xmlChar const *prefix) {
8479
12.1k
    const xmlChar *cmp;
8480
12.1k
    const xmlChar *in;
8481
12.1k
    const xmlChar *ret;
8482
12.1k
    const xmlChar *prefix2;
8483
8484
12.1k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8485
8486
12.1k
    GROW;
8487
12.1k
    in = ctxt->input->cur;
8488
8489
12.1k
    cmp = prefix;
8490
20.0k
    while (*in != 0 && *in == *cmp) {
8491
7.93k
  ++in;
8492
7.93k
  ++cmp;
8493
7.93k
    }
8494
12.1k
    if ((*cmp == 0) && (*in == ':')) {
8495
5.69k
        in++;
8496
5.69k
  cmp = name;
8497
9.95k
  while (*in != 0 && *in == *cmp) {
8498
4.25k
      ++in;
8499
4.25k
      ++cmp;
8500
4.25k
  }
8501
5.69k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8502
      /* success */
8503
3.21k
            ctxt->input->col += in - ctxt->input->cur;
8504
3.21k
      ctxt->input->cur = in;
8505
3.21k
      return((const xmlChar*) 1);
8506
3.21k
  }
8507
5.69k
    }
8508
    /*
8509
     * all strings coms from the dictionary, equality can be done directly
8510
     */
8511
8.91k
    ret = xmlParseQName (ctxt, &prefix2);
8512
8.91k
    if (ret == NULL)
8513
4.12k
        return(NULL);
8514
4.78k
    if ((ret == name) && (prefix == prefix2))
8515
868
  return((const xmlChar*) 1);
8516
3.91k
    return ret;
8517
4.78k
}
8518
8519
/**
8520
 * parse an attribute in the new SAX2 framework.
8521
 *
8522
 * @param ctxt  an XML parser context
8523
 * @param pref  the element prefix
8524
 * @param elem  the element name
8525
 * @param hprefix  resulting attribute prefix
8526
 * @param value  resulting value of the attribute
8527
 * @param len  resulting length of the attribute
8528
 * @param alloc  resulting indicator if the attribute was allocated
8529
 * @returns the attribute name, and the value in *value, .
8530
 */
8531
8532
static xmlHashedString
8533
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8534
                   const xmlChar * pref, const xmlChar * elem,
8535
                   xmlHashedString * hprefix, xmlChar ** value,
8536
                   int *len, int *alloc)
8537
766k
{
8538
766k
    xmlHashedString hname;
8539
766k
    const xmlChar *prefix, *name;
8540
766k
    xmlChar *val = NULL, *internal_val = NULL;
8541
766k
    int special = 0;
8542
766k
    int isNamespace;
8543
766k
    int flags;
8544
8545
766k
    *value = NULL;
8546
766k
    GROW;
8547
766k
    hname = xmlParseQNameHashed(ctxt, hprefix);
8548
766k
    if (hname.name == NULL) {
8549
74.8k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8550
74.8k
                       "error parsing attribute name\n");
8551
74.8k
        return(hname);
8552
74.8k
    }
8553
691k
    name = hname.name;
8554
691k
    prefix = hprefix->name;
8555
8556
    /*
8557
     * get the type if needed
8558
     */
8559
691k
    if (ctxt->attsSpecial != NULL) {
8560
140k
        special = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8561
140k
                                              prefix, name));
8562
140k
    }
8563
8564
    /*
8565
     * read the value
8566
     */
8567
691k
    SKIP_BLANKS;
8568
691k
    if (RAW != '=') {
8569
38.6k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8570
38.6k
                          "Specification mandates value for attribute %s\n",
8571
38.6k
                          name);
8572
38.6k
        goto error;
8573
38.6k
    }
8574
8575
8576
652k
    NEXT;
8577
652k
    SKIP_BLANKS;
8578
652k
    flags = 0;
8579
652k
    isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8580
652k
                   (prefix == ctxt->str_xmlns));
8581
652k
    val = xmlParseAttValueInternal(ctxt, len, &flags, special,
8582
652k
                                   isNamespace);
8583
652k
    if (val == NULL)
8584
10.7k
        goto error;
8585
8586
642k
    *alloc = (flags & XML_ATTVAL_ALLOC) != 0;
8587
8588
642k
#ifdef LIBXML_VALID_ENABLED
8589
642k
    if ((ctxt->validate) &&
8590
642k
        (ctxt->standalone) &&
8591
642k
        (special & XML_SPECIAL_EXTERNAL) &&
8592
642k
        (flags & XML_ATTVAL_NORM_CHANGE)) {
8593
0
        xmlValidityError(ctxt, XML_DTD_NOT_STANDALONE,
8594
0
                         "standalone: normalization of attribute %s on %s "
8595
0
                         "by external subset declaration\n",
8596
0
                         name, elem);
8597
0
    }
8598
642k
#endif
8599
8600
642k
    if (prefix == ctxt->str_xml) {
8601
        /*
8602
         * Check that xml:lang conforms to the specification
8603
         * No more registered as an error, just generate a warning now
8604
         * since this was deprecated in XML second edition
8605
         */
8606
11.8k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8607
0
            internal_val = xmlStrndup(val, *len);
8608
0
            if (internal_val == NULL)
8609
0
                goto mem_error;
8610
0
            if (!xmlCheckLanguageID(internal_val)) {
8611
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8612
0
                              "Malformed value for xml:lang : %s\n",
8613
0
                              internal_val, NULL);
8614
0
            }
8615
0
        }
8616
8617
        /*
8618
         * Check that xml:space conforms to the specification
8619
         */
8620
11.8k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8621
680
            internal_val = xmlStrndup(val, *len);
8622
680
            if (internal_val == NULL)
8623
0
                goto mem_error;
8624
680
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8625
337
                *(ctxt->space) = 0;
8626
343
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8627
100
                *(ctxt->space) = 1;
8628
243
            else {
8629
243
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8630
243
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8631
243
                              internal_val, NULL);
8632
243
            }
8633
680
        }
8634
11.8k
        if (internal_val) {
8635
680
            xmlFree(internal_val);
8636
680
        }
8637
11.8k
    }
8638
8639
642k
    *value = val;
8640
642k
    return (hname);
8641
8642
0
mem_error:
8643
0
    xmlErrMemory(ctxt);
8644
49.4k
error:
8645
49.4k
    if ((val != NULL) && (*alloc != 0))
8646
0
        xmlFree(val);
8647
49.4k
    return(hname);
8648
0
}
8649
8650
/**
8651
 * Inserts a new attribute into the hash table.
8652
 *
8653
 * @param ctxt  parser context
8654
 * @param size  size of the hash table
8655
 * @param name  attribute name
8656
 * @param uri  namespace uri
8657
 * @param hashValue  combined hash value of name and uri
8658
 * @param aindex  attribute index (this is a multiple of 5)
8659
 * @returns INT_MAX if no existing attribute was found, the attribute
8660
 * index if an attribute was found, -1 if a memory allocation failed.
8661
 */
8662
static int
8663
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8664
750k
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8665
750k
    xmlAttrHashBucket *table = ctxt->attrHash;
8666
750k
    xmlAttrHashBucket *bucket;
8667
750k
    unsigned hindex;
8668
8669
750k
    hindex = hashValue & (size - 1);
8670
750k
    bucket = &table[hindex];
8671
8672
816k
    while (bucket->index >= 0) {
8673
531k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8674
8675
531k
        if (name == atts[0]) {
8676
487k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8677
8678
487k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8679
487k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8680
20.1k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8681
465k
                return(bucket->index);
8682
487k
        }
8683
8684
66.0k
        hindex++;
8685
66.0k
        bucket++;
8686
66.0k
        if (hindex >= size) {
8687
12.1k
            hindex = 0;
8688
12.1k
            bucket = table;
8689
12.1k
        }
8690
66.0k
    }
8691
8692
285k
    bucket->index = aindex;
8693
8694
285k
    return(INT_MAX);
8695
750k
}
8696
8697
static int
8698
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8699
                       const xmlChar *name, const xmlChar *prefix,
8700
8.76k
                       unsigned hashValue, int aindex) {
8701
8.76k
    xmlAttrHashBucket *table = ctxt->attrHash;
8702
8.76k
    xmlAttrHashBucket *bucket;
8703
8.76k
    unsigned hindex;
8704
8705
8.76k
    hindex = hashValue & (size - 1);
8706
8.76k
    bucket = &table[hindex];
8707
8708
14.8k
    while (bucket->index >= 0) {
8709
10.5k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8710
8711
10.5k
        if ((name == atts[0]) && (prefix == atts[1]))
8712
4.44k
            return(bucket->index);
8713
8714
6.13k
        hindex++;
8715
6.13k
        bucket++;
8716
6.13k
        if (hindex >= size) {
8717
328
            hindex = 0;
8718
328
            bucket = table;
8719
328
        }
8720
6.13k
    }
8721
8722
4.31k
    bucket->index = aindex;
8723
8724
4.31k
    return(INT_MAX);
8725
8.76k
}
8726
/**
8727
 * Parse a start tag. Always consumes '<'.
8728
 *
8729
 * This routine is called when running SAX2 parsing
8730
 *
8731
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8732
 *
8733
 * [ WFC: Unique Att Spec ]
8734
 * No attribute name may appear more than once in the same start-tag or
8735
 * empty-element tag.
8736
 *
8737
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8738
 *
8739
 * [ WFC: Unique Att Spec ]
8740
 * No attribute name may appear more than once in the same start-tag or
8741
 * empty-element tag.
8742
 *
8743
 * With namespace:
8744
 *
8745
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8746
 *
8747
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8748
 *
8749
 * @param ctxt  an XML parser context
8750
 * @param pref  resulting namespace prefix
8751
 * @param URI  resulting namespace URI
8752
 * @param nbNsPtr  resulting number of namespace declarations
8753
 * @returns the element name parsed
8754
 */
8755
8756
static const xmlChar *
8757
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8758
1.51M
                  const xmlChar **URI, int *nbNsPtr) {
8759
1.51M
    xmlHashedString hlocalname;
8760
1.51M
    xmlHashedString hprefix;
8761
1.51M
    xmlHashedString hattname;
8762
1.51M
    xmlHashedString haprefix;
8763
1.51M
    const xmlChar *localname;
8764
1.51M
    const xmlChar *prefix;
8765
1.51M
    const xmlChar *attname;
8766
1.51M
    const xmlChar *aprefix;
8767
1.51M
    const xmlChar *uri;
8768
1.51M
    xmlChar *attvalue = NULL;
8769
1.51M
    const xmlChar **atts = ctxt->atts;
8770
1.51M
    unsigned attrHashSize = 0;
8771
1.51M
    int maxatts = ctxt->maxatts;
8772
1.51M
    int nratts, nbatts, nbdef;
8773
1.51M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8774
1.51M
    int alloc = 0;
8775
1.51M
    int numNsErr = 0;
8776
1.51M
    int numDupErr = 0;
8777
8778
1.51M
    if (RAW != '<') return(NULL);
8779
1.51M
    NEXT1;
8780
8781
1.51M
    nbatts = 0;
8782
1.51M
    nratts = 0;
8783
1.51M
    nbdef = 0;
8784
1.51M
    nbNs = 0;
8785
1.51M
    nbTotalDef = 0;
8786
1.51M
    attval = 0;
8787
8788
1.51M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8789
0
        xmlErrMemory(ctxt);
8790
0
        return(NULL);
8791
0
    }
8792
8793
1.51M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8794
1.51M
    if (hlocalname.name == NULL) {
8795
1.02M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8796
1.02M
           "StartTag: invalid element name\n");
8797
1.02M
        return(NULL);
8798
1.02M
    }
8799
493k
    localname = hlocalname.name;
8800
493k
    prefix = hprefix.name;
8801
8802
    /*
8803
     * Now parse the attributes, it ends up with the ending
8804
     *
8805
     * (S Attribute)* S?
8806
     */
8807
493k
    SKIP_BLANKS;
8808
493k
    GROW;
8809
8810
    /*
8811
     * The ctxt->atts array will be ultimately passed to the SAX callback
8812
     * containing five xmlChar pointers for each attribute:
8813
     *
8814
     * [0] attribute name
8815
     * [1] attribute prefix
8816
     * [2] namespace URI
8817
     * [3] attribute value
8818
     * [4] end of attribute value
8819
     *
8820
     * To save memory, we reuse this array temporarily and store integers
8821
     * in these pointer variables.
8822
     *
8823
     * [0] attribute name
8824
     * [1] attribute prefix
8825
     * [2] hash value of attribute prefix, and later namespace index
8826
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
8827
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
8828
     *
8829
     * The ctxt->attallocs array contains an additional unsigned int for
8830
     * each attribute, containing the hash value of the attribute name
8831
     * and the alloc flag in bit 31.
8832
     */
8833
8834
985k
    while (((RAW != '>') &&
8835
985k
     ((RAW != '/') || (NXT(1) != '>')) &&
8836
985k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8837
766k
  int len = -1;
8838
8839
766k
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
8840
766k
                                          &haprefix, &attvalue, &len,
8841
766k
                                          &alloc);
8842
766k
        if (hattname.name == NULL)
8843
74.8k
      break;
8844
691k
        if (attvalue == NULL)
8845
49.4k
            goto next_attr;
8846
642k
        attname = hattname.name;
8847
642k
        aprefix = haprefix.name;
8848
642k
  if (len < 0) len = xmlStrlen(attvalue);
8849
8850
642k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8851
127k
            xmlHashedString huri;
8852
127k
            xmlURIPtr parsedUri;
8853
8854
127k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8855
127k
            uri = huri.name;
8856
127k
            if (uri == NULL) {
8857
0
                xmlErrMemory(ctxt);
8858
0
                goto next_attr;
8859
0
            }
8860
127k
            if (*uri != 0) {
8861
123k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8862
0
                    xmlErrMemory(ctxt);
8863
0
                    goto next_attr;
8864
0
                }
8865
123k
                if (parsedUri == NULL) {
8866
76.3k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8867
76.3k
                             "xmlns: '%s' is not a valid URI\n",
8868
76.3k
                                       uri, NULL, NULL);
8869
76.3k
                } else {
8870
47.1k
                    if (parsedUri->scheme == NULL) {
8871
27.6k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8872
27.6k
                                  "xmlns: URI %s is not absolute\n",
8873
27.6k
                                  uri, NULL, NULL);
8874
27.6k
                    }
8875
47.1k
                    xmlFreeURI(parsedUri);
8876
47.1k
                }
8877
123k
                if (uri == ctxt->str_xml_ns) {
8878
475
                    if (attname != ctxt->str_xml) {
8879
475
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8880
475
                     "xml namespace URI cannot be the default namespace\n",
8881
475
                                 NULL, NULL, NULL);
8882
475
                    }
8883
475
                    goto next_attr;
8884
475
                }
8885
123k
                if ((len == 29) &&
8886
123k
                    (xmlStrEqual(uri,
8887
1.86k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8888
142
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8889
142
                         "reuse of the xmlns namespace name is forbidden\n",
8890
142
                             NULL, NULL, NULL);
8891
142
                    goto next_attr;
8892
142
                }
8893
123k
            }
8894
8895
127k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
8896
108k
                nbNs++;
8897
514k
        } else if (aprefix == ctxt->str_xmlns) {
8898
19.8k
            xmlHashedString huri;
8899
19.8k
            xmlURIPtr parsedUri;
8900
8901
19.8k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8902
19.8k
            uri = huri.name;
8903
19.8k
            if (uri == NULL) {
8904
0
                xmlErrMemory(ctxt);
8905
0
                goto next_attr;
8906
0
            }
8907
8908
19.8k
            if (attname == ctxt->str_xml) {
8909
1.16k
                if (uri != ctxt->str_xml_ns) {
8910
691
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8911
691
                             "xml namespace prefix mapped to wrong URI\n",
8912
691
                             NULL, NULL, NULL);
8913
691
                }
8914
                /*
8915
                 * Do not keep a namespace definition node
8916
                 */
8917
1.16k
                goto next_attr;
8918
1.16k
            }
8919
18.7k
            if (uri == ctxt->str_xml_ns) {
8920
1.10k
                if (attname != ctxt->str_xml) {
8921
1.10k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8922
1.10k
                             "xml namespace URI mapped to wrong prefix\n",
8923
1.10k
                             NULL, NULL, NULL);
8924
1.10k
                }
8925
1.10k
                goto next_attr;
8926
1.10k
            }
8927
17.6k
            if (attname == ctxt->str_xmlns) {
8928
222
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8929
222
                         "redefinition of the xmlns prefix is forbidden\n",
8930
222
                         NULL, NULL, NULL);
8931
222
                goto next_attr;
8932
222
            }
8933
17.3k
            if ((len == 29) &&
8934
17.3k
                (xmlStrEqual(uri,
8935
779
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8936
391
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8937
391
                         "reuse of the xmlns namespace name is forbidden\n",
8938
391
                         NULL, NULL, NULL);
8939
391
                goto next_attr;
8940
391
            }
8941
16.9k
            if ((uri == NULL) || (uri[0] == 0)) {
8942
269
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8943
269
                         "xmlns:%s: Empty XML namespace is not allowed\n",
8944
269
                              attname, NULL, NULL);
8945
269
                goto next_attr;
8946
16.7k
            } else {
8947
16.7k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8948
0
                    xmlErrMemory(ctxt);
8949
0
                    goto next_attr;
8950
0
                }
8951
16.7k
                if (parsedUri == NULL) {
8952
7.36k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8953
7.36k
                         "xmlns:%s: '%s' is not a valid URI\n",
8954
7.36k
                                       attname, uri, NULL);
8955
9.35k
                } else {
8956
9.35k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
8957
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8958
0
                                  "xmlns:%s: URI %s is not absolute\n",
8959
0
                                  attname, uri, NULL);
8960
0
                    }
8961
9.35k
                    xmlFreeURI(parsedUri);
8962
9.35k
                }
8963
16.7k
            }
8964
8965
16.7k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
8966
16.2k
                nbNs++;
8967
494k
        } else {
8968
            /*
8969
             * Populate attributes array, see above for repurposing
8970
             * of xmlChar pointers.
8971
             */
8972
494k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8973
3.97k
                int res = xmlCtxtGrowAttrs(ctxt);
8974
8975
3.97k
                maxatts = ctxt->maxatts;
8976
3.97k
                atts = ctxt->atts;
8977
8978
3.97k
                if (res < 0)
8979
0
                    goto next_attr;
8980
3.97k
            }
8981
494k
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
8982
494k
                                        ((unsigned) alloc << 31);
8983
494k
            atts[nbatts++] = attname;
8984
494k
            atts[nbatts++] = aprefix;
8985
494k
            atts[nbatts++] = XML_INT_TO_PTR(haprefix.hashValue);
8986
494k
            if (alloc) {
8987
33.0k
                atts[nbatts++] = attvalue;
8988
33.0k
                attvalue += len;
8989
33.0k
                atts[nbatts++] = attvalue;
8990
461k
            } else {
8991
                /*
8992
                 * attvalue points into the input buffer which can be
8993
                 * reallocated. Store differences to input->base instead.
8994
                 * The pointers will be reconstructed later.
8995
                 */
8996
461k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8997
461k
                attvalue += len;
8998
461k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8999
461k
            }
9000
            /*
9001
             * tag if some deallocation is needed
9002
             */
9003
494k
            if (alloc != 0) attval = 1;
9004
494k
            attvalue = NULL; /* moved into atts */
9005
494k
        }
9006
9007
691k
next_attr:
9008
691k
        if ((attvalue != NULL) && (alloc != 0)) {
9009
43.0k
            xmlFree(attvalue);
9010
43.0k
            attvalue = NULL;
9011
43.0k
        }
9012
9013
691k
  GROW
9014
691k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9015
83.9k
      break;
9016
607k
  if (SKIP_BLANKS == 0) {
9017
115k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9018
115k
         "attributes construct error\n");
9019
115k
      break;
9020
115k
  }
9021
491k
        GROW;
9022
491k
    }
9023
9024
    /*
9025
     * Namespaces from default attributes
9026
     */
9027
493k
    if (ctxt->attsDefault != NULL) {
9028
196k
        xmlDefAttrsPtr defaults;
9029
9030
196k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9031
196k
  if (defaults != NULL) {
9032
1.16M
      for (i = 0; i < defaults->nbAttrs; i++) {
9033
1.06M
                xmlDefAttr *attr = &defaults->attrs[i];
9034
9035
1.06M
          attname = attr->name.name;
9036
1.06M
    aprefix = attr->prefix.name;
9037
9038
1.06M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9039
15.7k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9040
9041
15.7k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9042
15.2k
                        nbNs++;
9043
1.05M
    } else if (aprefix == ctxt->str_xmlns) {
9044
721k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9045
9046
721k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9047
721k
                                      NULL, 1) > 0)
9048
720k
                        nbNs++;
9049
721k
    } else {
9050
332k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9051
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9052
0
                                    "Maximum number of attributes exceeded");
9053
0
                        break;
9054
0
                    }
9055
332k
                    nbTotalDef += 1;
9056
332k
                }
9057
1.06M
      }
9058
96.5k
  }
9059
196k
    }
9060
9061
    /*
9062
     * Resolve attribute namespaces
9063
     */
9064
987k
    for (i = 0; i < nbatts; i += 5) {
9065
494k
        attname = atts[i];
9066
494k
        aprefix = atts[i+1];
9067
9068
        /*
9069
  * The default namespace does not apply to attribute names.
9070
  */
9071
494k
  if (aprefix == NULL) {
9072
454k
            nsIndex = NS_INDEX_EMPTY;
9073
454k
        } else if (aprefix == ctxt->str_xml) {
9074
11.8k
            nsIndex = NS_INDEX_XML;
9075
27.8k
        } else {
9076
27.8k
            haprefix.name = aprefix;
9077
27.8k
            haprefix.hashValue = (size_t) atts[i+2];
9078
27.8k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9079
9080
27.8k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9081
14.9k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9082
14.9k
        "Namespace prefix %s for %s on %s is not defined\n",
9083
14.9k
        aprefix, attname, localname);
9084
14.9k
                nsIndex = NS_INDEX_EMPTY;
9085
14.9k
            }
9086
27.8k
        }
9087
9088
494k
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9089
494k
    }
9090
9091
    /*
9092
     * Maximum number of attributes including default attributes.
9093
     */
9094
493k
    maxAtts = nratts + nbTotalDef;
9095
9096
    /*
9097
     * Verify that attribute names are unique.
9098
     */
9099
493k
    if (maxAtts > 1) {
9100
69.9k
        attrHashSize = 4;
9101
146k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9102
76.5k
            attrHashSize *= 2;
9103
9104
69.9k
        if (attrHashSize > ctxt->attrHashMax) {
9105
1.33k
            xmlAttrHashBucket *tmp;
9106
9107
1.33k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9108
1.33k
            if (tmp == NULL) {
9109
0
                xmlErrMemory(ctxt);
9110
0
                goto done;
9111
0
            }
9112
9113
1.33k
            ctxt->attrHash = tmp;
9114
1.33k
            ctxt->attrHashMax = attrHashSize;
9115
1.33k
        }
9116
9117
69.9k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9118
9119
531k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9120
461k
            const xmlChar *nsuri;
9121
461k
            unsigned hashValue, nameHashValue, uriHashValue;
9122
461k
            int res;
9123
9124
461k
            attname = atts[i];
9125
461k
            aprefix = atts[i+1];
9126
461k
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9127
            /* Hash values always have bit 31 set, see dict.c */
9128
461k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9129
9130
461k
            if (nsIndex == NS_INDEX_EMPTY) {
9131
                /*
9132
                 * Prefix with empty namespace means an undeclared
9133
                 * prefix which was already reported above.
9134
                 */
9135
446k
                if (aprefix != NULL)
9136
12.5k
                    continue;
9137
434k
                nsuri = NULL;
9138
434k
                uriHashValue = URI_HASH_EMPTY;
9139
434k
            } else if (nsIndex == NS_INDEX_XML) {
9140
2.25k
                nsuri = ctxt->str_xml_ns;
9141
2.25k
                uriHashValue = URI_HASH_XML;
9142
12.3k
            } else {
9143
12.3k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9144
12.3k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9145
12.3k
            }
9146
9147
448k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9148
448k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9149
448k
                                    hashValue, i);
9150
448k
            if (res < 0)
9151
0
                continue;
9152
9153
            /*
9154
             * [ WFC: Unique Att Spec ]
9155
             * No attribute name may appear more than once in the same
9156
             * start-tag or empty-element tag.
9157
             * As extended by the Namespace in XML REC.
9158
             */
9159
448k
            if (res < INT_MAX) {
9160
431k
                if (aprefix == atts[res+1]) {
9161
425k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9162
425k
                    numDupErr += 1;
9163
425k
                } else {
9164
5.67k
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9165
5.67k
                             "Namespaced Attribute %s in '%s' redefined\n",
9166
5.67k
                             attname, nsuri, NULL);
9167
5.67k
                    numNsErr += 1;
9168
5.67k
                }
9169
431k
            }
9170
448k
        }
9171
69.9k
    }
9172
9173
    /*
9174
     * Default attributes
9175
     */
9176
493k
    if (ctxt->attsDefault != NULL) {
9177
196k
        xmlDefAttrsPtr defaults;
9178
9179
196k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9180
196k
  if (defaults != NULL) {
9181
1.16M
      for (i = 0; i < defaults->nbAttrs; i++) {
9182
1.06M
                xmlDefAttr *attr = &defaults->attrs[i];
9183
1.06M
                const xmlChar *nsuri = NULL;
9184
1.06M
                unsigned hashValue, uriHashValue = 0;
9185
1.06M
                int res;
9186
9187
1.06M
          attname = attr->name.name;
9188
1.06M
    aprefix = attr->prefix.name;
9189
9190
1.06M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9191
15.7k
                    continue;
9192
1.05M
    if (aprefix == ctxt->str_xmlns)
9193
721k
                    continue;
9194
9195
332k
                if (aprefix == NULL) {
9196
50.4k
                    nsIndex = NS_INDEX_EMPTY;
9197
50.4k
                    nsuri = NULL;
9198
50.4k
                    uriHashValue = URI_HASH_EMPTY;
9199
282k
                } else if (aprefix == ctxt->str_xml) {
9200
22.6k
                    nsIndex = NS_INDEX_XML;
9201
22.6k
                    nsuri = ctxt->str_xml_ns;
9202
22.6k
                    uriHashValue = URI_HASH_XML;
9203
259k
                } else {
9204
259k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9205
259k
                    if ((nsIndex == INT_MAX) ||
9206
259k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9207
232k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9208
232k
                                 "Namespace prefix %s for %s on %s is not "
9209
232k
                                 "defined\n",
9210
232k
                                 aprefix, attname, localname);
9211
232k
                        nsIndex = NS_INDEX_EMPTY;
9212
232k
                        nsuri = NULL;
9213
232k
                        uriHashValue = URI_HASH_EMPTY;
9214
232k
                    } else {
9215
26.4k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9216
26.4k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9217
26.4k
                    }
9218
259k
                }
9219
9220
                /*
9221
                 * Check whether the attribute exists
9222
                 */
9223
332k
                if (maxAtts > 1) {
9224
301k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9225
301k
                                                   uriHashValue);
9226
301k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9227
301k
                                            hashValue, nbatts);
9228
301k
                    if (res < 0)
9229
0
                        continue;
9230
301k
                    if (res < INT_MAX) {
9231
34.0k
                        if (aprefix == atts[res+1])
9232
1.34k
                            continue;
9233
32.6k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9234
32.6k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9235
32.6k
                                 attname, nsuri, NULL);
9236
32.6k
                    }
9237
301k
                }
9238
9239
331k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9240
9241
331k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9242
1.77k
                    res = xmlCtxtGrowAttrs(ctxt);
9243
9244
1.77k
                    maxatts = ctxt->maxatts;
9245
1.77k
                    atts = ctxt->atts;
9246
9247
1.77k
                    if (res < 0) {
9248
0
                        localname = NULL;
9249
0
                        goto done;
9250
0
                    }
9251
1.77k
                }
9252
9253
331k
                atts[nbatts++] = attname;
9254
331k
                atts[nbatts++] = aprefix;
9255
331k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9256
331k
                atts[nbatts++] = attr->value.name;
9257
331k
                atts[nbatts++] = attr->valueEnd;
9258
9259
331k
#ifdef LIBXML_VALID_ENABLED
9260
                /*
9261
                 * This should be moved to valid.c, but we don't keep track
9262
                 * whether an attribute was defaulted.
9263
                 */
9264
331k
                if ((ctxt->validate) &&
9265
331k
                    (ctxt->standalone == 1) &&
9266
331k
                    (attr->external != 0)) {
9267
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9268
0
                            "standalone: attribute %s on %s defaulted "
9269
0
                            "from external subset\n",
9270
0
                            attname, localname);
9271
0
                }
9272
331k
#endif
9273
331k
                nbdef++;
9274
331k
      }
9275
96.5k
  }
9276
196k
    }
9277
9278
    /*
9279
     * Using a single hash table for nsUri/localName pairs cannot
9280
     * detect duplicate QNames reliably. The following example will
9281
     * only result in two namespace errors.
9282
     *
9283
     * <doc xmlns:a="a" xmlns:b="a">
9284
     *   <elem a:a="" b:a="" b:a=""/>
9285
     * </doc>
9286
     *
9287
     * If we saw more than one namespace error but no duplicate QNames
9288
     * were found, we have to scan for duplicate QNames.
9289
     */
9290
493k
    if ((numDupErr == 0) && (numNsErr > 1)) {
9291
1.11k
        memset(ctxt->attrHash, -1,
9292
1.11k
               attrHashSize * sizeof(ctxt->attrHash[0]));
9293
9294
10.8k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9295
9.75k
            unsigned hashValue, nameHashValue, prefixHashValue;
9296
9.75k
            int res;
9297
9298
9.75k
            aprefix = atts[i+1];
9299
9.75k
            if (aprefix == NULL)
9300
988
                continue;
9301
9302
8.76k
            attname = atts[i];
9303
            /* Hash values always have bit 31 set, see dict.c */
9304
8.76k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9305
8.76k
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9306
9307
8.76k
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9308
8.76k
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9309
8.76k
                                         aprefix, hashValue, i);
9310
8.76k
            if (res < INT_MAX)
9311
4.44k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9312
8.76k
        }
9313
1.11k
    }
9314
9315
    /*
9316
     * Reconstruct attribute pointers
9317
     */
9318
1.31M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9319
        /* namespace URI */
9320
825k
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9321
825k
        if (nsIndex == INT_MAX)
9322
752k
            atts[i+2] = NULL;
9323
73.2k
        else if (nsIndex == INT_MAX - 1)
9324
34.2k
            atts[i+2] = ctxt->str_xml_ns;
9325
38.9k
        else
9326
38.9k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9327
9328
825k
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9329
461k
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9330
461k
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9331
461k
        }
9332
825k
    }
9333
9334
493k
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9335
493k
    if ((prefix != NULL) && (uri == NULL)) {
9336
20.4k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9337
20.4k
           "Namespace prefix %s on %s is not defined\n",
9338
20.4k
     prefix, localname, NULL);
9339
20.4k
    }
9340
493k
    *pref = prefix;
9341
493k
    *URI = uri;
9342
9343
    /*
9344
     * SAX callback
9345
     */
9346
493k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9347
493k
  (!ctxt->disableSAX)) {
9348
57.1k
  if (nbNs > 0)
9349
20.5k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9350
20.5k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9351
20.5k
        nbatts / 5, nbdef, atts);
9352
36.5k
  else
9353
36.5k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9354
36.5k
                          0, NULL, nbatts / 5, nbdef, atts);
9355
57.1k
    }
9356
9357
493k
done:
9358
    /*
9359
     * Free allocated attribute values
9360
     */
9361
493k
    if (attval != 0) {
9362
465k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9363
443k
      if (ctxt->attallocs[j] & 0x80000000)
9364
33.0k
          xmlFree((xmlChar *) atts[i+3]);
9365
21.4k
    }
9366
9367
493k
    *nbNsPtr = nbNs;
9368
493k
    return(localname);
9369
493k
}
9370
9371
/**
9372
 * Parse an end tag. Always consumes '</'.
9373
 *
9374
 *     [42] ETag ::= '</' Name S? '>'
9375
 *
9376
 * With namespace
9377
 *
9378
 *     [NS 9] ETag ::= '</' QName S? '>'
9379
 * @param ctxt  an XML parser context
9380
 * @param tag  the corresponding start tag
9381
 */
9382
9383
static void
9384
22.4k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9385
22.4k
    const xmlChar *name;
9386
9387
22.4k
    GROW;
9388
22.4k
    if ((RAW != '<') || (NXT(1) != '/')) {
9389
146
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9390
146
  return;
9391
146
    }
9392
22.3k
    SKIP(2);
9393
9394
22.3k
    if (tag->prefix == NULL)
9395
10.2k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9396
12.1k
    else
9397
12.1k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9398
9399
    /*
9400
     * We should definitely be at the ending "S? '>'" part
9401
     */
9402
22.3k
    GROW;
9403
22.3k
    SKIP_BLANKS;
9404
22.3k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9405
17.5k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9406
17.5k
    } else
9407
4.74k
  NEXT1;
9408
9409
    /*
9410
     * [ WFC: Element Type Match ]
9411
     * The Name in an element's end-tag must match the element type in the
9412
     * start-tag.
9413
     *
9414
     */
9415
22.3k
    if (name != (xmlChar*)1) {
9416
14.3k
        if (name == NULL) name = BAD_CAST "unparsable";
9417
14.3k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9418
14.3k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9419
14.3k
                    ctxt->name, tag->line, name);
9420
14.3k
    }
9421
9422
    /*
9423
     * SAX: End of Tag
9424
     */
9425
22.3k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9426
22.3k
  (!ctxt->disableSAX))
9427
379
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9428
379
                                tag->URI);
9429
9430
22.3k
    spacePop(ctxt);
9431
22.3k
    if (tag->nsNr != 0)
9432
2.17k
  xmlParserNsPop(ctxt, tag->nsNr);
9433
22.3k
}
9434
9435
/**
9436
 * Parse escaped pure raw content. Always consumes '<!['.
9437
 *
9438
 * @deprecated Internal function, don't use.
9439
 *
9440
 *     [18] CDSect ::= CDStart CData CDEnd
9441
 *
9442
 *     [19] CDStart ::= '<![CDATA['
9443
 *
9444
 *     [20] Data ::= (Char* - (Char* ']]>' Char*))
9445
 *
9446
 *     [21] CDEnd ::= ']]>'
9447
 * @param ctxt  an XML parser context
9448
 */
9449
void
9450
22.6k
xmlParseCDSect(xmlParserCtxt *ctxt) {
9451
22.6k
    xmlChar *buf = NULL;
9452
22.6k
    int len = 0;
9453
22.6k
    int size = XML_PARSER_BUFFER_SIZE;
9454
22.6k
    int r, rl;
9455
22.6k
    int s, sl;
9456
22.6k
    int cur, l;
9457
22.6k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9458
22.6k
                    XML_MAX_HUGE_LENGTH :
9459
22.6k
                    XML_MAX_TEXT_LENGTH;
9460
9461
22.6k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9462
0
        return;
9463
22.6k
    SKIP(3);
9464
9465
22.6k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9466
0
        return;
9467
22.6k
    SKIP(6);
9468
9469
22.6k
    r = xmlCurrentCharRecover(ctxt, &rl);
9470
22.6k
    if (!IS_CHAR(r)) {
9471
6.34k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9472
6.34k
        goto out;
9473
6.34k
    }
9474
16.2k
    NEXTL(rl);
9475
16.2k
    s = xmlCurrentCharRecover(ctxt, &sl);
9476
16.2k
    if (!IS_CHAR(s)) {
9477
7.09k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9478
7.09k
        goto out;
9479
7.09k
    }
9480
9.17k
    NEXTL(sl);
9481
9.17k
    cur = xmlCurrentCharRecover(ctxt, &l);
9482
9.17k
    buf = xmlMalloc(size);
9483
9.17k
    if (buf == NULL) {
9484
0
  xmlErrMemory(ctxt);
9485
0
        goto out;
9486
0
    }
9487
3.92M
    while (IS_CHAR(cur) &&
9488
3.92M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9489
3.91M
  if (len + 5 >= size) {
9490
6.41k
      xmlChar *tmp;
9491
6.41k
            int newSize;
9492
9493
6.41k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9494
6.41k
            if (newSize < 0) {
9495
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9496
0
                               "CData section too big found\n");
9497
0
                goto out;
9498
0
            }
9499
6.41k
      tmp = xmlRealloc(buf, newSize);
9500
6.41k
      if (tmp == NULL) {
9501
0
    xmlErrMemory(ctxt);
9502
0
                goto out;
9503
0
      }
9504
6.41k
      buf = tmp;
9505
6.41k
      size = newSize;
9506
6.41k
  }
9507
3.91M
  COPY_BUF(buf, len, r);
9508
3.91M
  r = s;
9509
3.91M
  rl = sl;
9510
3.91M
  s = cur;
9511
3.91M
  sl = l;
9512
3.91M
  NEXTL(l);
9513
3.91M
  cur = xmlCurrentCharRecover(ctxt, &l);
9514
3.91M
    }
9515
9.17k
    buf[len] = 0;
9516
9.17k
    if (cur != '>') {
9517
6.01k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9518
6.01k
                       "CData section not finished\n%.50s\n", buf);
9519
6.01k
        goto out;
9520
6.01k
    }
9521
3.15k
    NEXTL(l);
9522
9523
    /*
9524
     * OK the buffer is to be consumed as cdata.
9525
     */
9526
3.15k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9527
2.73k
        if ((ctxt->sax->cdataBlock != NULL) &&
9528
2.73k
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9529
2.73k
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9530
2.73k
        } else if (ctxt->sax->characters != NULL) {
9531
0
            ctxt->sax->characters(ctxt->userData, buf, len);
9532
0
        }
9533
2.73k
    }
9534
9535
22.6k
out:
9536
22.6k
    xmlFree(buf);
9537
22.6k
}
9538
9539
/**
9540
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9541
 * unexpected EOF to the caller.
9542
 *
9543
 * @param ctxt  an XML parser context
9544
 */
9545
9546
static void
9547
9.62k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9548
9.62k
    int oldNameNr = ctxt->nameNr;
9549
9.62k
    int oldSpaceNr = ctxt->spaceNr;
9550
9.62k
    int oldNodeNr = ctxt->nodeNr;
9551
9552
9.62k
    GROW;
9553
5.36M
    while ((ctxt->input->cur < ctxt->input->end) &&
9554
5.36M
     (PARSER_STOPPED(ctxt) == 0)) {
9555
5.35M
  const xmlChar *cur = ctxt->input->cur;
9556
9557
  /*
9558
   * First case : a Processing Instruction.
9559
   */
9560
5.35M
  if ((*cur == '<') && (cur[1] == '?')) {
9561
27.8k
      xmlParsePI(ctxt);
9562
27.8k
  }
9563
9564
  /*
9565
   * Second case : a CDSection
9566
   */
9567
  /* 2.6.0 test was *cur not RAW */
9568
5.32M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9569
22.6k
      xmlParseCDSect(ctxt);
9570
22.6k
  }
9571
9572
  /*
9573
   * Third case :  a comment
9574
   */
9575
5.30M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9576
5.30M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9577
38.4k
      xmlParseComment(ctxt);
9578
38.4k
  }
9579
9580
  /*
9581
   * Fourth case :  a sub-element.
9582
   */
9583
5.26M
  else if (*cur == '<') {
9584
1.53M
            if (NXT(1) == '/') {
9585
22.3k
                if (ctxt->nameNr <= oldNameNr)
9586
217
                    break;
9587
22.1k
          xmlParseElementEnd(ctxt);
9588
1.50M
            } else {
9589
1.50M
          xmlParseElementStart(ctxt);
9590
1.50M
            }
9591
1.53M
  }
9592
9593
  /*
9594
   * Fifth case : a reference. If if has not been resolved,
9595
   *    parsing returns it's Name, create the node
9596
   */
9597
9598
3.73M
  else if (*cur == '&') {
9599
143k
      xmlParseReference(ctxt);
9600
143k
  }
9601
9602
  /*
9603
   * Last case, text. Note that References are handled directly.
9604
   */
9605
3.58M
  else {
9606
3.58M
      xmlParseCharDataInternal(ctxt, 0);
9607
3.58M
  }
9608
9609
5.35M
  SHRINK;
9610
5.35M
  GROW;
9611
5.35M
    }
9612
9613
9.62k
    if ((ctxt->nameNr > oldNameNr) &&
9614
9.62k
        (ctxt->input->cur >= ctxt->input->end) &&
9615
9.62k
        (ctxt->wellFormed)) {
9616
189
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9617
189
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9618
189
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9619
189
                "Premature end of data in tag %s line %d\n",
9620
189
                name, line, NULL);
9621
189
    }
9622
9623
    /*
9624
     * Clean up in error case
9625
     */
9626
9627
56.5k
    while (ctxt->nodeNr > oldNodeNr)
9628
46.9k
        nodePop(ctxt);
9629
9630
257k
    while (ctxt->nameNr > oldNameNr) {
9631
247k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9632
9633
247k
        if (tag->nsNr != 0)
9634
121k
            xmlParserNsPop(ctxt, tag->nsNr);
9635
9636
247k
        namePop(ctxt);
9637
247k
    }
9638
9639
257k
    while (ctxt->spaceNr > oldSpaceNr)
9640
247k
        spacePop(ctxt);
9641
9.62k
}
9642
9643
/**
9644
 * Parse XML element content. This is useful if you're only interested
9645
 * in custom SAX callbacks. If you want a node list, use
9646
 * #xmlCtxtParseContent.
9647
 *
9648
 * @param ctxt  an XML parser context
9649
 */
9650
void
9651
0
xmlParseContent(xmlParserCtxt *ctxt) {
9652
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9653
0
        return;
9654
9655
0
    xmlCtxtInitializeLate(ctxt);
9656
9657
0
    xmlParseContentInternal(ctxt);
9658
9659
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9660
0
}
9661
9662
/**
9663
 * parse an XML element
9664
 *
9665
 * @deprecated Internal function, don't use.
9666
 *
9667
 *     [39] element ::= EmptyElemTag | STag content ETag
9668
 *
9669
 * [ WFC: Element Type Match ]
9670
 * The Name in an element's end-tag must match the element type in the
9671
 * start-tag.
9672
 *
9673
 * @param ctxt  an XML parser context
9674
 */
9675
9676
void
9677
9.74k
xmlParseElement(xmlParserCtxt *ctxt) {
9678
9.74k
    if (xmlParseElementStart(ctxt) != 0)
9679
4.01k
        return;
9680
9681
5.73k
    xmlParseContentInternal(ctxt);
9682
9683
5.73k
    if (ctxt->input->cur >= ctxt->input->end) {
9684
5.37k
        if (ctxt->wellFormed) {
9685
510
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9686
510
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9687
510
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9688
510
                    "Premature end of data in tag %s line %d\n",
9689
510
                    name, line, NULL);
9690
510
        }
9691
5.37k
        return;
9692
5.37k
    }
9693
9694
360
    xmlParseElementEnd(ctxt);
9695
360
}
9696
9697
/**
9698
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9699
 * opening tag was parsed, 1 if an empty element was parsed.
9700
 *
9701
 * Always consumes '<'.
9702
 *
9703
 * @param ctxt  an XML parser context
9704
 */
9705
static int
9706
1.51M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9707
1.51M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9708
1.51M
    const xmlChar *name;
9709
1.51M
    const xmlChar *prefix = NULL;
9710
1.51M
    const xmlChar *URI = NULL;
9711
1.51M
    xmlParserNodeInfo node_info;
9712
1.51M
    int line;
9713
1.51M
    xmlNodePtr cur;
9714
1.51M
    int nbNs = 0;
9715
9716
1.51M
    if (ctxt->nameNr > maxDepth) {
9717
8
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9718
8
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9719
8
                ctxt->nameNr);
9720
8
  xmlHaltParser(ctxt);
9721
8
  return(-1);
9722
8
    }
9723
9724
    /* Capture start position */
9725
1.51M
    if (ctxt->record_info) {
9726
0
        node_info.begin_pos = ctxt->input->consumed +
9727
0
                          (CUR_PTR - ctxt->input->base);
9728
0
  node_info.begin_line = ctxt->input->line;
9729
0
    }
9730
9731
1.51M
    if (ctxt->spaceNr == 0)
9732
0
  spacePush(ctxt, -1);
9733
1.51M
    else if (*ctxt->space == -2)
9734
0
  spacePush(ctxt, -1);
9735
1.51M
    else
9736
1.51M
  spacePush(ctxt, *ctxt->space);
9737
9738
1.51M
    line = ctxt->input->line;
9739
1.51M
#ifdef LIBXML_SAX1_ENABLED
9740
1.51M
    if (ctxt->sax2)
9741
1.51M
#endif /* LIBXML_SAX1_ENABLED */
9742
1.51M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9743
0
#ifdef LIBXML_SAX1_ENABLED
9744
0
    else
9745
0
  name = xmlParseStartTag(ctxt);
9746
1.51M
#endif /* LIBXML_SAX1_ENABLED */
9747
1.51M
    if (name == NULL) {
9748
1.02M
  spacePop(ctxt);
9749
1.02M
        return(-1);
9750
1.02M
    }
9751
493k
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9752
493k
    cur = ctxt->node;
9753
9754
493k
#ifdef LIBXML_VALID_ENABLED
9755
    /*
9756
     * [ VC: Root Element Type ]
9757
     * The Name in the document type declaration must match the element
9758
     * type of the root element.
9759
     */
9760
493k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9761
493k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9762
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9763
493k
#endif /* LIBXML_VALID_ENABLED */
9764
9765
    /*
9766
     * Check for an Empty Element.
9767
     */
9768
493k
    if ((RAW == '/') && (NXT(1) == '>')) {
9769
17.8k
        SKIP(2);
9770
17.8k
  if (ctxt->sax2) {
9771
17.8k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9772
17.8k
    (!ctxt->disableSAX))
9773
3.82k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9774
17.8k
#ifdef LIBXML_SAX1_ENABLED
9775
17.8k
  } else {
9776
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9777
0
    (!ctxt->disableSAX))
9778
0
    ctxt->sax->endElement(ctxt->userData, name);
9779
0
#endif /* LIBXML_SAX1_ENABLED */
9780
0
  }
9781
17.8k
  namePop(ctxt);
9782
17.8k
  spacePop(ctxt);
9783
17.8k
  if (nbNs > 0)
9784
1.97k
      xmlParserNsPop(ctxt, nbNs);
9785
17.8k
  if (cur != NULL && ctxt->record_info) {
9786
0
            node_info.node = cur;
9787
0
            node_info.end_pos = ctxt->input->consumed +
9788
0
                                (CUR_PTR - ctxt->input->base);
9789
0
            node_info.end_line = ctxt->input->line;
9790
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9791
0
  }
9792
17.8k
  return(1);
9793
17.8k
    }
9794
475k
    if (RAW == '>') {
9795
275k
        NEXT1;
9796
275k
        if (cur != NULL && ctxt->record_info) {
9797
0
            node_info.node = cur;
9798
0
            node_info.end_pos = 0;
9799
0
            node_info.end_line = 0;
9800
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9801
0
        }
9802
275k
    } else {
9803
200k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9804
200k
         "Couldn't find end of Start Tag %s line %d\n",
9805
200k
                    name, line, NULL);
9806
9807
  /*
9808
   * end of parsing of this node.
9809
   */
9810
200k
  nodePop(ctxt);
9811
200k
  namePop(ctxt);
9812
200k
  spacePop(ctxt);
9813
200k
  if (nbNs > 0)
9814
77.8k
      xmlParserNsPop(ctxt, nbNs);
9815
200k
  return(-1);
9816
200k
    }
9817
9818
275k
    return(0);
9819
475k
}
9820
9821
/**
9822
 * Parse the end of an XML element. Always consumes '</'.
9823
 *
9824
 * @param ctxt  an XML parser context
9825
 */
9826
static void
9827
22.4k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9828
22.4k
    xmlNodePtr cur = ctxt->node;
9829
9830
22.4k
    if (ctxt->nameNr <= 0) {
9831
0
        if ((RAW == '<') && (NXT(1) == '/'))
9832
0
            SKIP(2);
9833
0
        return;
9834
0
    }
9835
9836
    /*
9837
     * parse the end of tag: '</' should be here.
9838
     */
9839
22.4k
    if (ctxt->sax2) {
9840
22.4k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9841
22.4k
  namePop(ctxt);
9842
22.4k
    }
9843
0
#ifdef LIBXML_SAX1_ENABLED
9844
0
    else
9845
0
  xmlParseEndTag1(ctxt, 0);
9846
22.4k
#endif /* LIBXML_SAX1_ENABLED */
9847
9848
    /*
9849
     * Capture end position
9850
     */
9851
22.4k
    if (cur != NULL && ctxt->record_info) {
9852
0
        xmlParserNodeInfoPtr node_info;
9853
9854
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9855
0
        if (node_info != NULL) {
9856
0
            node_info->end_pos = ctxt->input->consumed +
9857
0
                                 (CUR_PTR - ctxt->input->base);
9858
0
            node_info->end_line = ctxt->input->line;
9859
0
        }
9860
0
    }
9861
22.4k
}
9862
9863
/**
9864
 * parse the XML version value.
9865
 *
9866
 * @deprecated Internal function, don't use.
9867
 *
9868
 *     [26] VersionNum ::= '1.' [0-9]+
9869
 *
9870
 * In practice allow [0-9].[0-9]+ at that level
9871
 *
9872
 * @param ctxt  an XML parser context
9873
 * @returns the string giving the XML version number, or NULL
9874
 */
9875
xmlChar *
9876
590
xmlParseVersionNum(xmlParserCtxt *ctxt) {
9877
590
    xmlChar *buf = NULL;
9878
590
    int len = 0;
9879
590
    int size = 10;
9880
590
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9881
590
                    XML_MAX_TEXT_LENGTH :
9882
590
                    XML_MAX_NAME_LENGTH;
9883
590
    xmlChar cur;
9884
9885
590
    buf = xmlMalloc(size);
9886
590
    if (buf == NULL) {
9887
0
  xmlErrMemory(ctxt);
9888
0
  return(NULL);
9889
0
    }
9890
590
    cur = CUR;
9891
590
    if (!((cur >= '0') && (cur <= '9'))) {
9892
27
  xmlFree(buf);
9893
27
  return(NULL);
9894
27
    }
9895
563
    buf[len++] = cur;
9896
563
    NEXT;
9897
563
    cur=CUR;
9898
563
    if (cur != '.') {
9899
14
  xmlFree(buf);
9900
14
  return(NULL);
9901
14
    }
9902
549
    buf[len++] = cur;
9903
549
    NEXT;
9904
549
    cur=CUR;
9905
1.44M
    while ((cur >= '0') && (cur <= '9')) {
9906
1.44M
  if (len + 1 >= size) {
9907
320
      xmlChar *tmp;
9908
320
            int newSize;
9909
9910
320
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9911
320
            if (newSize < 0) {
9912
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
9913
0
                xmlFree(buf);
9914
0
                return(NULL);
9915
0
            }
9916
320
      tmp = xmlRealloc(buf, newSize);
9917
320
      if (tmp == NULL) {
9918
0
    xmlErrMemory(ctxt);
9919
0
          xmlFree(buf);
9920
0
    return(NULL);
9921
0
      }
9922
320
      buf = tmp;
9923
320
            size = newSize;
9924
320
  }
9925
1.44M
  buf[len++] = cur;
9926
1.44M
  NEXT;
9927
1.44M
  cur=CUR;
9928
1.44M
    }
9929
549
    buf[len] = 0;
9930
549
    return(buf);
9931
549
}
9932
9933
/**
9934
 * parse the XML version.
9935
 *
9936
 * @deprecated Internal function, don't use.
9937
 *
9938
 *     [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9939
 *
9940
 *     [25] Eq ::= S? '=' S?
9941
 *
9942
 * @param ctxt  an XML parser context
9943
 * @returns the version string, e.g. "1.0"
9944
 */
9945
9946
xmlChar *
9947
2.71k
xmlParseVersionInfo(xmlParserCtxt *ctxt) {
9948
2.71k
    xmlChar *version = NULL;
9949
9950
2.71k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9951
663
  SKIP(7);
9952
663
  SKIP_BLANKS;
9953
663
  if (RAW != '=') {
9954
11
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9955
11
      return(NULL);
9956
11
        }
9957
652
  NEXT;
9958
652
  SKIP_BLANKS;
9959
652
  if (RAW == '"') {
9960
142
      NEXT;
9961
142
      version = xmlParseVersionNum(ctxt);
9962
142
      if (RAW != '"') {
9963
27
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9964
27
      } else
9965
115
          NEXT;
9966
510
  } else if (RAW == '\''){
9967
448
      NEXT;
9968
448
      version = xmlParseVersionNum(ctxt);
9969
448
      if (RAW != '\'') {
9970
44
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9971
44
      } else
9972
404
          NEXT;
9973
448
  } else {
9974
62
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9975
62
  }
9976
652
    }
9977
2.70k
    return(version);
9978
2.71k
}
9979
9980
/**
9981
 * parse the XML encoding name
9982
 *
9983
 * @deprecated Internal function, don't use.
9984
 *
9985
 *     [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9986
 *
9987
 * @param ctxt  an XML parser context
9988
 * @returns the encoding name value or NULL
9989
 */
9990
xmlChar *
9991
1.11k
xmlParseEncName(xmlParserCtxt *ctxt) {
9992
1.11k
    xmlChar *buf = NULL;
9993
1.11k
    int len = 0;
9994
1.11k
    int size = 10;
9995
1.11k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9996
1.11k
                    XML_MAX_TEXT_LENGTH :
9997
1.11k
                    XML_MAX_NAME_LENGTH;
9998
1.11k
    xmlChar cur;
9999
10000
1.11k
    cur = CUR;
10001
1.11k
    if (((cur >= 'a') && (cur <= 'z')) ||
10002
1.11k
        ((cur >= 'A') && (cur <= 'Z'))) {
10003
1.09k
  buf = xmlMalloc(size);
10004
1.09k
  if (buf == NULL) {
10005
0
      xmlErrMemory(ctxt);
10006
0
      return(NULL);
10007
0
  }
10008
10009
1.09k
  buf[len++] = cur;
10010
1.09k
  NEXT;
10011
1.09k
  cur = CUR;
10012
393k
  while (((cur >= 'a') && (cur <= 'z')) ||
10013
393k
         ((cur >= 'A') && (cur <= 'Z')) ||
10014
393k
         ((cur >= '0') && (cur <= '9')) ||
10015
393k
         (cur == '.') || (cur == '_') ||
10016
393k
         (cur == '-')) {
10017
391k
      if (len + 1 >= size) {
10018
750
          xmlChar *tmp;
10019
750
                int newSize;
10020
10021
750
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10022
750
                if (newSize < 0) {
10023
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10024
0
                    xmlFree(buf);
10025
0
                    return(NULL);
10026
0
                }
10027
750
    tmp = xmlRealloc(buf, newSize);
10028
750
    if (tmp == NULL) {
10029
0
        xmlErrMemory(ctxt);
10030
0
        xmlFree(buf);
10031
0
        return(NULL);
10032
0
    }
10033
750
    buf = tmp;
10034
750
                size = newSize;
10035
750
      }
10036
391k
      buf[len++] = cur;
10037
391k
      NEXT;
10038
391k
      cur = CUR;
10039
391k
        }
10040
1.09k
  buf[len] = 0;
10041
1.09k
    } else {
10042
19
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10043
19
    }
10044
1.11k
    return(buf);
10045
1.11k
}
10046
10047
/**
10048
 * parse the XML encoding declaration
10049
 *
10050
 * @deprecated Internal function, don't use.
10051
 *
10052
 *     [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | 
10053
 *                           "'" EncName "'")
10054
 *
10055
 * this setups the conversion filters.
10056
 *
10057
 * @param ctxt  an XML parser context
10058
 * @returns the encoding value or NULL
10059
 */
10060
10061
const xmlChar *
10062
2.70k
xmlParseEncodingDecl(xmlParserCtxt *ctxt) {
10063
2.70k
    xmlChar *encoding = NULL;
10064
10065
2.70k
    SKIP_BLANKS;
10066
2.70k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10067
1.57k
        return(NULL);
10068
10069
1.12k
    SKIP(8);
10070
1.12k
    SKIP_BLANKS;
10071
1.12k
    if (RAW != '=') {
10072
9
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10073
9
        return(NULL);
10074
9
    }
10075
1.11k
    NEXT;
10076
1.11k
    SKIP_BLANKS;
10077
1.11k
    if (RAW == '"') {
10078
681
        NEXT;
10079
681
        encoding = xmlParseEncName(ctxt);
10080
681
        if (RAW != '"') {
10081
80
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10082
80
            xmlFree((xmlChar *) encoding);
10083
80
            return(NULL);
10084
80
        } else
10085
601
            NEXT;
10086
681
    } else if (RAW == '\''){
10087
433
        NEXT;
10088
433
        encoding = xmlParseEncName(ctxt);
10089
433
        if (RAW != '\'') {
10090
18
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10091
18
            xmlFree((xmlChar *) encoding);
10092
18
            return(NULL);
10093
18
        } else
10094
415
            NEXT;
10095
433
    } else {
10096
4
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10097
4
    }
10098
10099
1.02k
    if (encoding == NULL)
10100
6
        return(NULL);
10101
10102
1.01k
    xmlSetDeclaredEncoding(ctxt, encoding);
10103
10104
1.01k
    return(ctxt->encoding);
10105
1.02k
}
10106
10107
/**
10108
 * parse the XML standalone declaration
10109
 *
10110
 * @deprecated Internal function, don't use.
10111
 *
10112
 *     [32] SDDecl ::= S 'standalone' Eq
10113
 *                     (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10114
 *
10115
 * [ VC: Standalone Document Declaration ]
10116
 * TODO The standalone document declaration must have the value "no"
10117
 * if any external markup declarations contain declarations of:
10118
 *  - attributes with default values, if elements to which these
10119
 *    attributes apply appear in the document without specifications
10120
 *    of values for these attributes, or
10121
 *  - entities (other than amp, lt, gt, apos, quot), if references
10122
 *    to those entities appear in the document, or
10123
 *  - attributes with values subject to normalization, where the
10124
 *    attribute appears in the document with a value which will change
10125
 *    as a result of normalization, or
10126
 *  - element types with element content, if white space occurs directly
10127
 *    within any instance of those types.
10128
 *
10129
 * @param ctxt  an XML parser context
10130
 * @returns
10131
 *   1 if standalone="yes"
10132
 *   0 if standalone="no"
10133
 *  -2 if standalone attribute is missing or invalid
10134
 *    (A standalone value of -2 means that the XML declaration was found,
10135
 *     but no value was specified for the standalone attribute).
10136
 */
10137
10138
int
10139
2.65k
xmlParseSDDecl(xmlParserCtxt *ctxt) {
10140
2.65k
    int standalone = -2;
10141
10142
2.65k
    SKIP_BLANKS;
10143
2.65k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10144
359
  SKIP(10);
10145
359
        SKIP_BLANKS;
10146
359
  if (RAW != '=') {
10147
10
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10148
10
      return(standalone);
10149
10
        }
10150
349
  NEXT;
10151
349
  SKIP_BLANKS;
10152
349
        if (RAW == '\''){
10153
298
      NEXT;
10154
298
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10155
288
          standalone = 0;
10156
288
                SKIP(2);
10157
288
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10158
10
                 (NXT(2) == 's')) {
10159
2
          standalone = 1;
10160
2
    SKIP(3);
10161
8
            } else {
10162
8
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10163
8
      }
10164
298
      if (RAW != '\'') {
10165
11
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10166
11
      } else
10167
287
          NEXT;
10168
298
  } else if (RAW == '"'){
10169
43
      NEXT;
10170
43
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10171
2
          standalone = 0;
10172
2
    SKIP(2);
10173
41
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10174
41
                 (NXT(2) == 's')) {
10175
25
          standalone = 1;
10176
25
                SKIP(3);
10177
25
            } else {
10178
16
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10179
16
      }
10180
43
      if (RAW != '"') {
10181
23
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10182
23
      } else
10183
20
          NEXT;
10184
43
  } else {
10185
8
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10186
8
        }
10187
349
    }
10188
2.64k
    return(standalone);
10189
2.65k
}
10190
10191
/**
10192
 * parse an XML declaration header
10193
 *
10194
 * @deprecated Internal function, don't use.
10195
 *
10196
 *     [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10197
 * @param ctxt  an XML parser context
10198
 */
10199
10200
void
10201
2.71k
xmlParseXMLDecl(xmlParserCtxt *ctxt) {
10202
2.71k
    xmlChar *version;
10203
10204
    /*
10205
     * This value for standalone indicates that the document has an
10206
     * XML declaration but it does not have a standalone attribute.
10207
     * It will be overwritten later if a standalone attribute is found.
10208
     */
10209
10210
2.71k
    ctxt->standalone = -2;
10211
10212
    /*
10213
     * We know that '<?xml' is here.
10214
     */
10215
2.71k
    SKIP(5);
10216
10217
2.71k
    if (!IS_BLANK_CH(RAW)) {
10218
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10219
0
                 "Blank needed after '<?xml'\n");
10220
0
    }
10221
2.71k
    SKIP_BLANKS;
10222
10223
    /*
10224
     * We must have the VersionInfo here.
10225
     */
10226
2.71k
    version = xmlParseVersionInfo(ctxt);
10227
2.71k
    if (version == NULL) {
10228
2.16k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10229
2.16k
    } else {
10230
549
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10231
      /*
10232
       * Changed here for XML-1.0 5th edition
10233
       */
10234
268
      if (ctxt->options & XML_PARSE_OLD10) {
10235
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10236
0
                "Unsupported version '%s'\n",
10237
0
                version);
10238
268
      } else {
10239
268
          if ((version[0] == '1') && ((version[1] == '.'))) {
10240
251
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10241
251
                      "Unsupported version '%s'\n",
10242
251
          version, NULL);
10243
251
    } else {
10244
17
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10245
17
              "Unsupported version '%s'\n",
10246
17
              version);
10247
17
    }
10248
268
      }
10249
268
  }
10250
549
  if (ctxt->version != NULL)
10251
0
      xmlFree((void *) ctxt->version);
10252
549
  ctxt->version = version;
10253
549
    }
10254
10255
    /*
10256
     * We may have the encoding declaration
10257
     */
10258
2.71k
    if (!IS_BLANK_CH(RAW)) {
10259
2.19k
        if ((RAW == '?') && (NXT(1) == '>')) {
10260
14
      SKIP(2);
10261
14
      return;
10262
14
  }
10263
2.17k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10264
2.17k
    }
10265
2.70k
    xmlParseEncodingDecl(ctxt);
10266
10267
    /*
10268
     * We may have the standalone status.
10269
     */
10270
2.70k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10271
605
        if ((RAW == '?') && (NXT(1) == '>')) {
10272
49
      SKIP(2);
10273
49
      return;
10274
49
  }
10275
556
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10276
556
    }
10277
10278
    /*
10279
     * We can grow the input buffer freely at that point
10280
     */
10281
2.65k
    GROW;
10282
10283
2.65k
    SKIP_BLANKS;
10284
2.65k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10285
10286
2.65k
    SKIP_BLANKS;
10287
2.65k
    if ((RAW == '?') && (NXT(1) == '>')) {
10288
349
        SKIP(2);
10289
2.30k
    } else if (RAW == '>') {
10290
        /* Deprecated old WD ... */
10291
535
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10292
535
  NEXT;
10293
1.77k
    } else {
10294
1.77k
        int c;
10295
10296
1.77k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10297
628k
        while ((PARSER_STOPPED(ctxt) == 0) &&
10298
628k
               ((c = CUR) != 0)) {
10299
627k
            NEXT;
10300
627k
            if (c == '>')
10301
1.25k
                break;
10302
627k
        }
10303
1.77k
    }
10304
2.65k
}
10305
10306
/**
10307
 * @since 2.14.0
10308
 *
10309
 * @param ctxt  parser context
10310
 * @returns the version from the XML declaration.
10311
 */
10312
const xmlChar *
10313
0
xmlCtxtGetVersion(xmlParserCtxt *ctxt) {
10314
0
    if (ctxt == NULL)
10315
0
        return(NULL);
10316
10317
0
    return(ctxt->version);
10318
0
}
10319
10320
/**
10321
 * @since 2.14.0
10322
 *
10323
 * @param ctxt  parser context
10324
 * @returns the value from the standalone document declaration.
10325
 */
10326
int
10327
0
xmlCtxtGetStandalone(xmlParserCtxt *ctxt) {
10328
0
    if (ctxt == NULL)
10329
0
        return(0);
10330
10331
0
    return(ctxt->standalone);
10332
0
}
10333
10334
/**
10335
 * parse an XML Misc* optional field.
10336
 *
10337
 * @deprecated Internal function, don't use.
10338
 *
10339
 *     [27] Misc ::= Comment | PI |  S
10340
 * @param ctxt  an XML parser context
10341
 */
10342
10343
void
10344
31.1k
xmlParseMisc(xmlParserCtxt *ctxt) {
10345
34.3k
    while (PARSER_STOPPED(ctxt) == 0) {
10346
33.4k
        SKIP_BLANKS;
10347
33.4k
        GROW;
10348
33.4k
        if ((RAW == '<') && (NXT(1) == '?')) {
10349
2.25k
      xmlParsePI(ctxt);
10350
31.2k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10351
1.02k
      xmlParseComment(ctxt);
10352
30.1k
        } else {
10353
30.1k
            break;
10354
30.1k
        }
10355
33.4k
    }
10356
31.1k
}
10357
10358
static void
10359
15.0k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10360
15.0k
    xmlDocPtr doc;
10361
10362
    /*
10363
     * SAX: end of the document processing.
10364
     */
10365
15.0k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10366
15.0k
        ctxt->sax->endDocument(ctxt->userData);
10367
10368
    /*
10369
     * Remove locally kept entity definitions if the tree was not built
10370
     */
10371
15.0k
    doc = ctxt->myDoc;
10372
15.0k
    if ((doc != NULL) &&
10373
15.0k
        (xmlStrEqual(doc->version, SAX_COMPAT_MODE))) {
10374
1.26k
        xmlFreeDoc(doc);
10375
1.26k
        ctxt->myDoc = NULL;
10376
1.26k
    }
10377
15.0k
}
10378
10379
/**
10380
 * Parse an XML document and invoke the SAX handlers. This is useful
10381
 * if you're only interested in custom SAX callbacks. If you want a
10382
 * document tree, use #xmlCtxtParseDocument.
10383
 *
10384
 * @param ctxt  an XML parser context
10385
 * @returns 0, -1 in case of error.
10386
 */
10387
10388
int
10389
15.1k
xmlParseDocument(xmlParserCtxt *ctxt) {
10390
15.1k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10391
0
        return(-1);
10392
10393
15.1k
    GROW;
10394
10395
    /*
10396
     * SAX: detecting the level.
10397
     */
10398
15.1k
    xmlCtxtInitializeLate(ctxt);
10399
10400
15.1k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10401
15.1k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10402
15.1k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10403
15.1k
    }
10404
10405
15.1k
    xmlDetectEncoding(ctxt);
10406
10407
15.1k
    if (CUR == 0) {
10408
40
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10409
40
  return(-1);
10410
40
    }
10411
10412
15.0k
    GROW;
10413
15.0k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10414
10415
  /*
10416
   * Note that we will switch encoding on the fly.
10417
   */
10418
2.71k
  xmlParseXMLDecl(ctxt);
10419
2.71k
  SKIP_BLANKS;
10420
12.3k
    } else {
10421
12.3k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10422
12.3k
        if (ctxt->version == NULL) {
10423
0
            xmlErrMemory(ctxt);
10424
0
            return(-1);
10425
0
        }
10426
12.3k
    }
10427
15.0k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10428
12.6k
        ctxt->sax->startDocument(ctxt->userData);
10429
15.0k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10430
15.0k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10431
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10432
0
    }
10433
10434
    /*
10435
     * The Misc part of the Prolog
10436
     */
10437
15.0k
    xmlParseMisc(ctxt);
10438
10439
    /*
10440
     * Then possibly doc type declaration(s) and more Misc
10441
     * (doctypedecl Misc*)?
10442
     */
10443
15.0k
    GROW;
10444
15.0k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10445
10446
6.30k
  ctxt->inSubset = 1;
10447
6.30k
  xmlParseDocTypeDecl(ctxt);
10448
6.30k
  if (RAW == '[') {
10449
6.00k
      xmlParseInternalSubset(ctxt);
10450
6.00k
  } else if (RAW == '>') {
10451
97
            NEXT;
10452
97
        }
10453
10454
  /*
10455
   * Create and update the external subset.
10456
   */
10457
6.30k
  ctxt->inSubset = 2;
10458
6.30k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10459
6.30k
      (!ctxt->disableSAX))
10460
837
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10461
837
                                ctxt->extSubSystem, ctxt->extSubURI);
10462
6.30k
  ctxt->inSubset = 0;
10463
10464
6.30k
        xmlCleanSpecialAttr(ctxt);
10465
10466
6.30k
  xmlParseMisc(ctxt);
10467
6.30k
    }
10468
10469
    /*
10470
     * Time to start parsing the tree itself
10471
     */
10472
15.0k
    GROW;
10473
15.0k
    if (RAW != '<') {
10474
5.32k
        if (ctxt->wellFormed)
10475
329
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10476
329
                           "Start tag expected, '<' not found\n");
10477
9.74k
    } else {
10478
9.74k
  xmlParseElement(ctxt);
10479
10480
  /*
10481
   * The Misc part at the end
10482
   */
10483
9.74k
  xmlParseMisc(ctxt);
10484
10485
9.74k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10486
9.74k
    }
10487
10488
15.0k
    ctxt->instate = XML_PARSER_EOF;
10489
15.0k
    xmlFinishDocument(ctxt);
10490
10491
15.0k
    if (! ctxt->wellFormed) {
10492
15.0k
  ctxt->valid = 0;
10493
15.0k
  return(-1);
10494
15.0k
    }
10495
10496
33
    return(0);
10497
15.0k
}
10498
10499
/**
10500
 * parse a general parsed entity
10501
 * An external general parsed entity is well-formed if it matches the
10502
 * production labeled extParsedEnt.
10503
 *
10504
 * @deprecated Internal function, don't use.
10505
 *
10506
 *     [78] extParsedEnt ::= TextDecl? content
10507
 *
10508
 * @param ctxt  an XML parser context
10509
 * @returns 0, -1 in case of error. the parser context is augmented
10510
 *                as a result of the parsing.
10511
 */
10512
10513
int
10514
0
xmlParseExtParsedEnt(xmlParserCtxt *ctxt) {
10515
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10516
0
        return(-1);
10517
10518
0
    xmlCtxtInitializeLate(ctxt);
10519
10520
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10521
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10522
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10523
0
    }
10524
10525
0
    xmlDetectEncoding(ctxt);
10526
10527
0
    if (CUR == 0) {
10528
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10529
0
    }
10530
10531
    /*
10532
     * Check for the XMLDecl in the Prolog.
10533
     */
10534
0
    GROW;
10535
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10536
10537
  /*
10538
   * Note that we will switch encoding on the fly.
10539
   */
10540
0
  xmlParseXMLDecl(ctxt);
10541
0
  SKIP_BLANKS;
10542
0
    } else {
10543
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10544
0
    }
10545
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10546
0
        ctxt->sax->startDocument(ctxt->userData);
10547
10548
    /*
10549
     * Doing validity checking on chunk doesn't make sense
10550
     */
10551
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10552
0
    ctxt->validate = 0;
10553
0
    ctxt->depth = 0;
10554
10555
0
    xmlParseContentInternal(ctxt);
10556
10557
0
    if (ctxt->input->cur < ctxt->input->end)
10558
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10559
10560
    /*
10561
     * SAX: end of the document processing.
10562
     */
10563
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10564
0
        ctxt->sax->endDocument(ctxt->userData);
10565
10566
0
    if (! ctxt->wellFormed) return(-1);
10567
0
    return(0);
10568
0
}
10569
10570
#ifdef LIBXML_PUSH_ENABLED
10571
/************************************************************************
10572
 *                  *
10573
 *    Progressive parsing interfaces        *
10574
 *                  *
10575
 ************************************************************************/
10576
10577
/**
10578
 * Check whether the input buffer contains a character.
10579
 *
10580
 * @param ctxt  an XML parser context
10581
 * @param c  character
10582
 */
10583
static int
10584
0
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10585
0
    const xmlChar *cur;
10586
10587
0
    if (ctxt->checkIndex == 0) {
10588
0
        cur = ctxt->input->cur + 1;
10589
0
    } else {
10590
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10591
0
    }
10592
10593
0
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10594
0
        size_t index = ctxt->input->end - ctxt->input->cur;
10595
10596
0
        if (index > LONG_MAX) {
10597
0
            ctxt->checkIndex = 0;
10598
0
            return(1);
10599
0
        }
10600
0
        ctxt->checkIndex = index;
10601
0
        return(0);
10602
0
    } else {
10603
0
        ctxt->checkIndex = 0;
10604
0
        return(1);
10605
0
    }
10606
0
}
10607
10608
/**
10609
 * Check whether the input buffer contains a string.
10610
 *
10611
 * @param ctxt  an XML parser context
10612
 * @param startDelta  delta to apply at the start
10613
 * @param str  string
10614
 * @param strLen  length of string
10615
 */
10616
static const xmlChar *
10617
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10618
0
                     const char *str, size_t strLen) {
10619
0
    const xmlChar *cur, *term;
10620
10621
0
    if (ctxt->checkIndex == 0) {
10622
0
        cur = ctxt->input->cur + startDelta;
10623
0
    } else {
10624
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10625
0
    }
10626
10627
0
    term = BAD_CAST strstr((const char *) cur, str);
10628
0
    if (term == NULL) {
10629
0
        const xmlChar *end = ctxt->input->end;
10630
0
        size_t index;
10631
10632
        /* Rescan (strLen - 1) characters. */
10633
0
        if ((size_t) (end - cur) < strLen)
10634
0
            end = cur;
10635
0
        else
10636
0
            end -= strLen - 1;
10637
0
        index = end - ctxt->input->cur;
10638
0
        if (index > LONG_MAX) {
10639
0
            ctxt->checkIndex = 0;
10640
0
            return(ctxt->input->end - strLen);
10641
0
        }
10642
0
        ctxt->checkIndex = index;
10643
0
    } else {
10644
0
        ctxt->checkIndex = 0;
10645
0
    }
10646
10647
0
    return(term);
10648
0
}
10649
10650
/**
10651
 * Check whether the input buffer contains terminated char data.
10652
 *
10653
 * @param ctxt  an XML parser context
10654
 */
10655
static int
10656
0
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10657
0
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10658
0
    const xmlChar *end = ctxt->input->end;
10659
0
    size_t index;
10660
10661
0
    while (cur < end) {
10662
0
        if ((*cur == '<') || (*cur == '&')) {
10663
0
            ctxt->checkIndex = 0;
10664
0
            return(1);
10665
0
        }
10666
0
        cur++;
10667
0
    }
10668
10669
0
    index = cur - ctxt->input->cur;
10670
0
    if (index > LONG_MAX) {
10671
0
        ctxt->checkIndex = 0;
10672
0
        return(1);
10673
0
    }
10674
0
    ctxt->checkIndex = index;
10675
0
    return(0);
10676
0
}
10677
10678
/**
10679
 * Check whether there's enough data in the input buffer to finish parsing
10680
 * a start tag. This has to take quotes into account.
10681
 *
10682
 * @param ctxt  an XML parser context
10683
 */
10684
static int
10685
0
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10686
0
    const xmlChar *cur;
10687
0
    const xmlChar *end = ctxt->input->end;
10688
0
    int state = ctxt->endCheckState;
10689
0
    size_t index;
10690
10691
0
    if (ctxt->checkIndex == 0)
10692
0
        cur = ctxt->input->cur + 1;
10693
0
    else
10694
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10695
10696
0
    while (cur < end) {
10697
0
        if (state) {
10698
0
            if (*cur == state)
10699
0
                state = 0;
10700
0
        } else if (*cur == '\'' || *cur == '"') {
10701
0
            state = *cur;
10702
0
        } else if (*cur == '>') {
10703
0
            ctxt->checkIndex = 0;
10704
0
            ctxt->endCheckState = 0;
10705
0
            return(1);
10706
0
        }
10707
0
        cur++;
10708
0
    }
10709
10710
0
    index = cur - ctxt->input->cur;
10711
0
    if (index > LONG_MAX) {
10712
0
        ctxt->checkIndex = 0;
10713
0
        ctxt->endCheckState = 0;
10714
0
        return(1);
10715
0
    }
10716
0
    ctxt->checkIndex = index;
10717
0
    ctxt->endCheckState = state;
10718
0
    return(0);
10719
0
}
10720
10721
/**
10722
 * Check whether there's enough data in the input buffer to finish parsing
10723
 * the internal subset.
10724
 *
10725
 * @param ctxt  an XML parser context
10726
 */
10727
static int
10728
0
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10729
    /*
10730
     * Sorry, but progressive parsing of the internal subset is not
10731
     * supported. We first check that the full content of the internal
10732
     * subset is available and parsing is launched only at that point.
10733
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10734
     * not in a ']]>' sequence which are conditional sections.
10735
     */
10736
0
    const xmlChar *cur, *start;
10737
0
    const xmlChar *end = ctxt->input->end;
10738
0
    int state = ctxt->endCheckState;
10739
0
    size_t index;
10740
10741
0
    if (ctxt->checkIndex == 0) {
10742
0
        cur = ctxt->input->cur + 1;
10743
0
    } else {
10744
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10745
0
    }
10746
0
    start = cur;
10747
10748
0
    while (cur < end) {
10749
0
        if (state == '-') {
10750
0
            if ((*cur == '-') &&
10751
0
                (cur[1] == '-') &&
10752
0
                (cur[2] == '>')) {
10753
0
                state = 0;
10754
0
                cur += 3;
10755
0
                start = cur;
10756
0
                continue;
10757
0
            }
10758
0
        }
10759
0
        else if (state == ']') {
10760
0
            if (*cur == '>') {
10761
0
                ctxt->checkIndex = 0;
10762
0
                ctxt->endCheckState = 0;
10763
0
                return(1);
10764
0
            }
10765
0
            if (IS_BLANK_CH(*cur)) {
10766
0
                state = ' ';
10767
0
            } else if (*cur != ']') {
10768
0
                state = 0;
10769
0
                start = cur;
10770
0
                continue;
10771
0
            }
10772
0
        }
10773
0
        else if (state == ' ') {
10774
0
            if (*cur == '>') {
10775
0
                ctxt->checkIndex = 0;
10776
0
                ctxt->endCheckState = 0;
10777
0
                return(1);
10778
0
            }
10779
0
            if (!IS_BLANK_CH(*cur)) {
10780
0
                state = 0;
10781
0
                start = cur;
10782
0
                continue;
10783
0
            }
10784
0
        }
10785
0
        else if (state != 0) {
10786
0
            if (*cur == state) {
10787
0
                state = 0;
10788
0
                start = cur + 1;
10789
0
            }
10790
0
        }
10791
0
        else if (*cur == '<') {
10792
0
            if ((cur[1] == '!') &&
10793
0
                (cur[2] == '-') &&
10794
0
                (cur[3] == '-')) {
10795
0
                state = '-';
10796
0
                cur += 4;
10797
                /* Don't treat <!--> as comment */
10798
0
                start = cur;
10799
0
                continue;
10800
0
            }
10801
0
        }
10802
0
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10803
0
            state = *cur;
10804
0
        }
10805
10806
0
        cur++;
10807
0
    }
10808
10809
    /*
10810
     * Rescan the three last characters to detect "<!--" and "-->"
10811
     * split across chunks.
10812
     */
10813
0
    if ((state == 0) || (state == '-')) {
10814
0
        if (cur - start < 3)
10815
0
            cur = start;
10816
0
        else
10817
0
            cur -= 3;
10818
0
    }
10819
0
    index = cur - ctxt->input->cur;
10820
0
    if (index > LONG_MAX) {
10821
0
        ctxt->checkIndex = 0;
10822
0
        ctxt->endCheckState = 0;
10823
0
        return(1);
10824
0
    }
10825
0
    ctxt->checkIndex = index;
10826
0
    ctxt->endCheckState = state;
10827
0
    return(0);
10828
0
}
10829
10830
/**
10831
 * Try to progress on parsing
10832
 *
10833
 * @param ctxt  an XML parser context
10834
 * @param terminate  last chunk indicator
10835
 * @returns zero if no parsing was possible
10836
 */
10837
static int
10838
0
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10839
0
    int ret = 0;
10840
0
    size_t avail;
10841
0
    xmlChar cur, next;
10842
10843
0
    if (ctxt->input == NULL)
10844
0
        return(0);
10845
10846
0
    if ((ctxt->input != NULL) &&
10847
0
        (ctxt->input->cur - ctxt->input->base > 4096)) {
10848
0
        xmlParserShrink(ctxt);
10849
0
    }
10850
10851
0
    while (ctxt->disableSAX == 0) {
10852
0
        avail = ctxt->input->end - ctxt->input->cur;
10853
0
        if (avail < 1)
10854
0
      goto done;
10855
0
        switch (ctxt->instate) {
10856
0
            case XML_PARSER_EOF:
10857
          /*
10858
     * Document parsing is done !
10859
     */
10860
0
          goto done;
10861
0
            case XML_PARSER_START:
10862
                /*
10863
                 * Very first chars read from the document flow.
10864
                 */
10865
0
                if ((!terminate) && (avail < 4))
10866
0
                    goto done;
10867
10868
                /*
10869
                 * We need more bytes to detect EBCDIC code pages.
10870
                 * See xmlDetectEBCDIC.
10871
                 */
10872
0
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
10873
0
                    (!terminate) && (avail < 200))
10874
0
                    goto done;
10875
10876
0
                xmlDetectEncoding(ctxt);
10877
0
                ctxt->instate = XML_PARSER_XML_DECL;
10878
0
    break;
10879
10880
0
            case XML_PARSER_XML_DECL:
10881
0
    if ((!terminate) && (avail < 2))
10882
0
        goto done;
10883
0
    cur = ctxt->input->cur[0];
10884
0
    next = ctxt->input->cur[1];
10885
0
          if ((cur == '<') && (next == '?')) {
10886
        /* PI or XML decl */
10887
0
        if ((!terminate) &&
10888
0
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
10889
0
      goto done;
10890
0
        if ((ctxt->input->cur[2] == 'x') &&
10891
0
      (ctxt->input->cur[3] == 'm') &&
10892
0
      (ctxt->input->cur[4] == 'l') &&
10893
0
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
10894
0
      ret += 5;
10895
0
      xmlParseXMLDecl(ctxt);
10896
0
        } else {
10897
0
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10898
0
                        if (ctxt->version == NULL) {
10899
0
                            xmlErrMemory(ctxt);
10900
0
                            break;
10901
0
                        }
10902
0
        }
10903
0
    } else {
10904
0
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10905
0
        if (ctxt->version == NULL) {
10906
0
            xmlErrMemory(ctxt);
10907
0
      break;
10908
0
        }
10909
0
    }
10910
0
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10911
0
                    ctxt->sax->setDocumentLocator(ctxt->userData,
10912
0
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
10913
0
                }
10914
0
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10915
0
                    (!ctxt->disableSAX))
10916
0
                    ctxt->sax->startDocument(ctxt->userData);
10917
0
                ctxt->instate = XML_PARSER_MISC;
10918
0
    break;
10919
0
            case XML_PARSER_START_TAG: {
10920
0
          const xmlChar *name;
10921
0
    const xmlChar *prefix = NULL;
10922
0
    const xmlChar *URI = NULL;
10923
0
                int line = ctxt->input->line;
10924
0
    int nbNs = 0;
10925
10926
0
    if ((!terminate) && (avail < 2))
10927
0
        goto done;
10928
0
    cur = ctxt->input->cur[0];
10929
0
          if (cur != '<') {
10930
0
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10931
0
                                   "Start tag expected, '<' not found");
10932
0
                    ctxt->instate = XML_PARSER_EOF;
10933
0
                    xmlFinishDocument(ctxt);
10934
0
        goto done;
10935
0
    }
10936
0
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
10937
0
                    goto done;
10938
0
    if (ctxt->spaceNr == 0)
10939
0
        spacePush(ctxt, -1);
10940
0
    else if (*ctxt->space == -2)
10941
0
        spacePush(ctxt, -1);
10942
0
    else
10943
0
        spacePush(ctxt, *ctxt->space);
10944
0
#ifdef LIBXML_SAX1_ENABLED
10945
0
    if (ctxt->sax2)
10946
0
#endif /* LIBXML_SAX1_ENABLED */
10947
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10948
0
#ifdef LIBXML_SAX1_ENABLED
10949
0
    else
10950
0
        name = xmlParseStartTag(ctxt);
10951
0
#endif /* LIBXML_SAX1_ENABLED */
10952
0
    if (name == NULL) {
10953
0
        spacePop(ctxt);
10954
0
                    ctxt->instate = XML_PARSER_EOF;
10955
0
                    xmlFinishDocument(ctxt);
10956
0
        goto done;
10957
0
    }
10958
0
#ifdef LIBXML_VALID_ENABLED
10959
    /*
10960
     * [ VC: Root Element Type ]
10961
     * The Name in the document type declaration must match
10962
     * the element type of the root element.
10963
     */
10964
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10965
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10966
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10967
0
#endif /* LIBXML_VALID_ENABLED */
10968
10969
    /*
10970
     * Check for an Empty Element.
10971
     */
10972
0
    if ((RAW == '/') && (NXT(1) == '>')) {
10973
0
        SKIP(2);
10974
10975
0
        if (ctxt->sax2) {
10976
0
      if ((ctxt->sax != NULL) &&
10977
0
          (ctxt->sax->endElementNs != NULL) &&
10978
0
          (!ctxt->disableSAX))
10979
0
          ctxt->sax->endElementNs(ctxt->userData, name,
10980
0
                                  prefix, URI);
10981
0
      if (nbNs > 0)
10982
0
          xmlParserNsPop(ctxt, nbNs);
10983
0
#ifdef LIBXML_SAX1_ENABLED
10984
0
        } else {
10985
0
      if ((ctxt->sax != NULL) &&
10986
0
          (ctxt->sax->endElement != NULL) &&
10987
0
          (!ctxt->disableSAX))
10988
0
          ctxt->sax->endElement(ctxt->userData, name);
10989
0
#endif /* LIBXML_SAX1_ENABLED */
10990
0
        }
10991
0
        spacePop(ctxt);
10992
0
    } else if (RAW == '>') {
10993
0
        NEXT;
10994
0
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
10995
0
    } else {
10996
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10997
0
           "Couldn't find end of Start Tag %s\n",
10998
0
           name);
10999
0
        nodePop(ctxt);
11000
0
        spacePop(ctxt);
11001
0
                    if (nbNs > 0)
11002
0
                        xmlParserNsPop(ctxt, nbNs);
11003
0
    }
11004
11005
0
                if (ctxt->nameNr == 0)
11006
0
                    ctxt->instate = XML_PARSER_EPILOG;
11007
0
                else
11008
0
                    ctxt->instate = XML_PARSER_CONTENT;
11009
0
                break;
11010
0
      }
11011
0
            case XML_PARSER_CONTENT: {
11012
0
    cur = ctxt->input->cur[0];
11013
11014
0
    if (cur == '<') {
11015
0
                    if ((!terminate) && (avail < 2))
11016
0
                        goto done;
11017
0
        next = ctxt->input->cur[1];
11018
11019
0
                    if (next == '/') {
11020
0
                        ctxt->instate = XML_PARSER_END_TAG;
11021
0
                        break;
11022
0
                    } else if (next == '?') {
11023
0
                        if ((!terminate) &&
11024
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11025
0
                            goto done;
11026
0
                        xmlParsePI(ctxt);
11027
0
                        ctxt->instate = XML_PARSER_CONTENT;
11028
0
                        break;
11029
0
                    } else if (next == '!') {
11030
0
                        if ((!terminate) && (avail < 3))
11031
0
                            goto done;
11032
0
                        next = ctxt->input->cur[2];
11033
11034
0
                        if (next == '-') {
11035
0
                            if ((!terminate) && (avail < 4))
11036
0
                                goto done;
11037
0
                            if (ctxt->input->cur[3] == '-') {
11038
0
                                if ((!terminate) &&
11039
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11040
0
                                    goto done;
11041
0
                                xmlParseComment(ctxt);
11042
0
                                ctxt->instate = XML_PARSER_CONTENT;
11043
0
                                break;
11044
0
                            }
11045
0
                        } else if (next == '[') {
11046
0
                            if ((!terminate) && (avail < 9))
11047
0
                                goto done;
11048
0
                            if ((ctxt->input->cur[2] == '[') &&
11049
0
                                (ctxt->input->cur[3] == 'C') &&
11050
0
                                (ctxt->input->cur[4] == 'D') &&
11051
0
                                (ctxt->input->cur[5] == 'A') &&
11052
0
                                (ctxt->input->cur[6] == 'T') &&
11053
0
                                (ctxt->input->cur[7] == 'A') &&
11054
0
                                (ctxt->input->cur[8] == '[')) {
11055
0
                                if ((!terminate) &&
11056
0
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11057
0
                                    goto done;
11058
0
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11059
0
                                xmlParseCDSect(ctxt);
11060
0
                                ctxt->instate = XML_PARSER_CONTENT;
11061
0
                                break;
11062
0
                            }
11063
0
                        }
11064
0
                    }
11065
0
    } else if (cur == '&') {
11066
0
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11067
0
      goto done;
11068
0
        xmlParseReference(ctxt);
11069
0
                    break;
11070
0
    } else {
11071
        /* TODO Avoid the extra copy, handle directly !!! */
11072
        /*
11073
         * Goal of the following test is:
11074
         *  - minimize calls to the SAX 'character' callback
11075
         *    when they are mergeable
11076
         *  - handle an problem for isBlank when we only parse
11077
         *    a sequence of blank chars and the next one is
11078
         *    not available to check against '<' presence.
11079
         *  - tries to homogenize the differences in SAX
11080
         *    callbacks between the push and pull versions
11081
         *    of the parser.
11082
         */
11083
0
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11084
0
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11085
0
          goto done;
11086
0
                    }
11087
0
                    ctxt->checkIndex = 0;
11088
0
        xmlParseCharDataInternal(ctxt, !terminate);
11089
0
                    break;
11090
0
    }
11091
11092
0
                ctxt->instate = XML_PARSER_START_TAG;
11093
0
    break;
11094
0
      }
11095
0
            case XML_PARSER_END_TAG:
11096
0
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11097
0
        goto done;
11098
0
    if (ctxt->sax2) {
11099
0
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11100
0
        nameNsPop(ctxt);
11101
0
    }
11102
0
#ifdef LIBXML_SAX1_ENABLED
11103
0
      else
11104
0
        xmlParseEndTag1(ctxt, 0);
11105
0
#endif /* LIBXML_SAX1_ENABLED */
11106
0
    if (ctxt->nameNr == 0) {
11107
0
        ctxt->instate = XML_PARSER_EPILOG;
11108
0
    } else {
11109
0
        ctxt->instate = XML_PARSER_CONTENT;
11110
0
    }
11111
0
    break;
11112
0
            case XML_PARSER_MISC:
11113
0
            case XML_PARSER_PROLOG:
11114
0
            case XML_PARSER_EPILOG:
11115
0
    SKIP_BLANKS;
11116
0
                avail = ctxt->input->end - ctxt->input->cur;
11117
0
    if (avail < 1)
11118
0
        goto done;
11119
0
    if (ctxt->input->cur[0] == '<') {
11120
0
                    if ((!terminate) && (avail < 2))
11121
0
                        goto done;
11122
0
                    next = ctxt->input->cur[1];
11123
0
                    if (next == '?') {
11124
0
                        if ((!terminate) &&
11125
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11126
0
                            goto done;
11127
0
                        xmlParsePI(ctxt);
11128
0
                        break;
11129
0
                    } else if (next == '!') {
11130
0
                        if ((!terminate) && (avail < 3))
11131
0
                            goto done;
11132
11133
0
                        if (ctxt->input->cur[2] == '-') {
11134
0
                            if ((!terminate) && (avail < 4))
11135
0
                                goto done;
11136
0
                            if (ctxt->input->cur[3] == '-') {
11137
0
                                if ((!terminate) &&
11138
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11139
0
                                    goto done;
11140
0
                                xmlParseComment(ctxt);
11141
0
                                break;
11142
0
                            }
11143
0
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11144
0
                            if ((!terminate) && (avail < 9))
11145
0
                                goto done;
11146
0
                            if ((ctxt->input->cur[2] == 'D') &&
11147
0
                                (ctxt->input->cur[3] == 'O') &&
11148
0
                                (ctxt->input->cur[4] == 'C') &&
11149
0
                                (ctxt->input->cur[5] == 'T') &&
11150
0
                                (ctxt->input->cur[6] == 'Y') &&
11151
0
                                (ctxt->input->cur[7] == 'P') &&
11152
0
                                (ctxt->input->cur[8] == 'E')) {
11153
0
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11154
0
                                    goto done;
11155
0
                                ctxt->inSubset = 1;
11156
0
                                xmlParseDocTypeDecl(ctxt);
11157
0
                                if (RAW == '[') {
11158
0
                                    ctxt->instate = XML_PARSER_DTD;
11159
0
                                } else {
11160
0
                                    if (RAW == '>')
11161
0
                                        NEXT;
11162
                                    /*
11163
                                     * Create and update the external subset.
11164
                                     */
11165
0
                                    ctxt->inSubset = 2;
11166
0
                                    if ((ctxt->sax != NULL) &&
11167
0
                                        (!ctxt->disableSAX) &&
11168
0
                                        (ctxt->sax->externalSubset != NULL))
11169
0
                                        ctxt->sax->externalSubset(
11170
0
                                                ctxt->userData,
11171
0
                                                ctxt->intSubName,
11172
0
                                                ctxt->extSubSystem,
11173
0
                                                ctxt->extSubURI);
11174
0
                                    ctxt->inSubset = 0;
11175
0
                                    xmlCleanSpecialAttr(ctxt);
11176
0
                                    ctxt->instate = XML_PARSER_PROLOG;
11177
0
                                }
11178
0
                                break;
11179
0
                            }
11180
0
                        }
11181
0
                    }
11182
0
                }
11183
11184
0
                if (ctxt->instate == XML_PARSER_EPILOG) {
11185
0
                    if (ctxt->errNo == XML_ERR_OK)
11186
0
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11187
0
        ctxt->instate = XML_PARSER_EOF;
11188
0
                    xmlFinishDocument(ctxt);
11189
0
                } else {
11190
0
        ctxt->instate = XML_PARSER_START_TAG;
11191
0
    }
11192
0
    break;
11193
0
            case XML_PARSER_DTD: {
11194
0
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11195
0
                    goto done;
11196
0
    xmlParseInternalSubset(ctxt);
11197
0
    ctxt->inSubset = 2;
11198
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11199
0
        (ctxt->sax->externalSubset != NULL))
11200
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11201
0
          ctxt->extSubSystem, ctxt->extSubURI);
11202
0
    ctxt->inSubset = 0;
11203
0
    xmlCleanSpecialAttr(ctxt);
11204
0
    ctxt->instate = XML_PARSER_PROLOG;
11205
0
                break;
11206
0
      }
11207
0
            default:
11208
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11209
0
      "PP: internal error\n");
11210
0
    ctxt->instate = XML_PARSER_EOF;
11211
0
    break;
11212
0
  }
11213
0
    }
11214
0
done:
11215
0
    return(ret);
11216
0
}
11217
11218
/**
11219
 * Parse a chunk of memory in push parser mode.
11220
 *
11221
 * Assumes that the parser context was initialized with
11222
 * #xmlCreatePushParserCtxt.
11223
 *
11224
 * The last chunk, which will often be empty, must be marked with
11225
 * the `terminate` flag. With the default SAX callbacks, the resulting
11226
 * document will be available in ctxt->myDoc. This pointer will not
11227
 * be freed when calling #xmlFreeParserCtxt and must be freed by the
11228
 * caller. If the document isn't well-formed, it will still be returned
11229
 * in ctxt->myDoc.
11230
 *
11231
 * As an exception, #xmlCtxtResetPush will free the document in
11232
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11233
 * the document.
11234
 *
11235
 * Since 2.14.0, #xmlCtxtGetDocument can be used to retrieve the
11236
 * result document.
11237
 *
11238
 * @param ctxt  an XML parser context
11239
 * @param chunk  chunk of memory
11240
 * @param size  size of chunk in bytes
11241
 * @param terminate  last chunk indicator
11242
 * @returns an xmlParserErrors code (0 on success).
11243
 */
11244
int
11245
xmlParseChunk(xmlParserCtxt *ctxt, const char *chunk, int size,
11246
0
              int terminate) {
11247
0
    size_t curBase;
11248
0
    size_t maxLength;
11249
0
    size_t pos;
11250
0
    int end_in_lf = 0;
11251
0
    int res;
11252
11253
0
    if ((ctxt == NULL) || (size < 0))
11254
0
        return(XML_ERR_ARGUMENT);
11255
0
    if ((chunk == NULL) && (size > 0))
11256
0
        return(XML_ERR_ARGUMENT);
11257
0
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11258
0
        return(XML_ERR_ARGUMENT);
11259
0
    if (ctxt->disableSAX != 0)
11260
0
        return(ctxt->errNo);
11261
11262
0
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11263
0
    if (ctxt->instate == XML_PARSER_START)
11264
0
        xmlCtxtInitializeLate(ctxt);
11265
0
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11266
0
        (chunk[size - 1] == '\r')) {
11267
0
  end_in_lf = 1;
11268
0
  size--;
11269
0
    }
11270
11271
    /*
11272
     * Also push an empty chunk to make sure that the raw buffer
11273
     * will be flushed if there is an encoder.
11274
     */
11275
0
    pos = ctxt->input->cur - ctxt->input->base;
11276
0
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11277
0
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11278
0
    if (res < 0) {
11279
0
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11280
0
        xmlHaltParser(ctxt);
11281
0
        return(ctxt->errNo);
11282
0
    }
11283
11284
0
    xmlParseTryOrFinish(ctxt, terminate);
11285
11286
0
    curBase = ctxt->input->cur - ctxt->input->base;
11287
0
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11288
0
                XML_MAX_HUGE_LENGTH :
11289
0
                XML_MAX_LOOKUP_LIMIT;
11290
0
    if (curBase > maxLength) {
11291
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11292
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11293
0
        xmlHaltParser(ctxt);
11294
0
    }
11295
11296
0
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11297
0
        return(ctxt->errNo);
11298
11299
0
    if (end_in_lf == 1) {
11300
0
  pos = ctxt->input->cur - ctxt->input->base;
11301
0
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11302
0
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11303
0
        if (res < 0) {
11304
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11305
0
            xmlHaltParser(ctxt);
11306
0
            return(ctxt->errNo);
11307
0
        }
11308
0
    }
11309
0
    if (terminate) {
11310
  /*
11311
   * Check for termination
11312
   */
11313
0
        if ((ctxt->instate != XML_PARSER_EOF) &&
11314
0
            (ctxt->instate != XML_PARSER_EPILOG)) {
11315
0
            if (ctxt->nameNr > 0) {
11316
0
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11317
0
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11318
0
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11319
0
                        "Premature end of data in tag %s line %d\n",
11320
0
                        name, line, NULL);
11321
0
            } else if (ctxt->instate == XML_PARSER_START) {
11322
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11323
0
            } else {
11324
0
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11325
0
                               "Start tag expected, '<' not found\n");
11326
0
            }
11327
0
        } else {
11328
0
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11329
0
        }
11330
0
  if (ctxt->instate != XML_PARSER_EOF) {
11331
0
            ctxt->instate = XML_PARSER_EOF;
11332
0
            xmlFinishDocument(ctxt);
11333
0
  }
11334
0
    }
11335
0
    if (ctxt->wellFormed == 0)
11336
0
  return((xmlParserErrors) ctxt->errNo);
11337
0
    else
11338
0
        return(0);
11339
0
}
11340
11341
/************************************************************************
11342
 *                  *
11343
 *    I/O front end functions to the parser     *
11344
 *                  *
11345
 ************************************************************************/
11346
11347
/**
11348
 * Create a parser context for using the XML parser in push mode.
11349
 * See #xmlParseChunk.
11350
 *
11351
 * Passing an initial chunk is useless and deprecated.
11352
 *
11353
 * The push parser doesn't support recovery mode or the
11354
 * XML_PARSE_NOBLANKS option.
11355
 *
11356
 * `filename` is used as base URI to fetch external entities and for
11357
 * error reports.
11358
 *
11359
 * @param sax  a SAX handler (optional)
11360
 * @param user_data  user data for SAX callbacks (optional)
11361
 * @param chunk  initial chunk (optional, deprecated)
11362
 * @param size  size of initial chunk in bytes
11363
 * @param filename  file name or URI (optional)
11364
 * @returns the new parser context or NULL if a memory allocation
11365
 * failed.
11366
 */
11367
11368
xmlParserCtxt *
11369
xmlCreatePushParserCtxt(xmlSAXHandler *sax, void *user_data,
11370
0
                        const char *chunk, int size, const char *filename) {
11371
0
    xmlParserCtxtPtr ctxt;
11372
0
    xmlParserInputPtr input;
11373
11374
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11375
0
    if (ctxt == NULL)
11376
0
  return(NULL);
11377
11378
0
    ctxt->options &= ~XML_PARSE_NODICT;
11379
0
    ctxt->dictNames = 1;
11380
11381
0
    input = xmlNewPushInput(filename, chunk, size);
11382
0
    if (input == NULL) {
11383
0
  xmlFreeParserCtxt(ctxt);
11384
0
  return(NULL);
11385
0
    }
11386
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11387
0
        xmlFreeInputStream(input);
11388
0
        xmlFreeParserCtxt(ctxt);
11389
0
        return(NULL);
11390
0
    }
11391
11392
0
    return(ctxt);
11393
0
}
11394
#endif /* LIBXML_PUSH_ENABLED */
11395
11396
/**
11397
 * Blocks further parser processing
11398
 *
11399
 * @param ctxt  an XML parser context
11400
 */
11401
void
11402
0
xmlStopParser(xmlParserCtxt *ctxt) {
11403
0
    if (ctxt == NULL)
11404
0
        return;
11405
0
    xmlHaltParser(ctxt);
11406
0
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11407
0
        ctxt->errNo = XML_ERR_USER_STOP;
11408
0
}
11409
11410
/**
11411
 * Create a parser context for using the XML parser with an existing
11412
 * I/O stream
11413
 *
11414
 * @param sax  a SAX handler (optional)
11415
 * @param user_data  user data for SAX callbacks (optional)
11416
 * @param ioread  an I/O read function
11417
 * @param ioclose  an I/O close function (optional)
11418
 * @param ioctx  an I/O handler
11419
 * @param enc  the charset encoding if known (deprecated)
11420
 * @returns the new parser context or NULL
11421
 */
11422
xmlParserCtxt *
11423
xmlCreateIOParserCtxt(xmlSAXHandler *sax, void *user_data,
11424
                      xmlInputReadCallback ioread,
11425
                      xmlInputCloseCallback ioclose,
11426
0
                      void *ioctx, xmlCharEncoding enc) {
11427
0
    xmlParserCtxtPtr ctxt;
11428
0
    xmlParserInputPtr input;
11429
0
    const char *encoding;
11430
11431
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11432
0
    if (ctxt == NULL)
11433
0
  return(NULL);
11434
11435
0
    encoding = xmlGetCharEncodingName(enc);
11436
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11437
0
                                  encoding, 0);
11438
0
    if (input == NULL) {
11439
0
  xmlFreeParserCtxt(ctxt);
11440
0
        return (NULL);
11441
0
    }
11442
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11443
0
        xmlFreeInputStream(input);
11444
0
        xmlFreeParserCtxt(ctxt);
11445
0
        return(NULL);
11446
0
    }
11447
11448
0
    return(ctxt);
11449
0
}
11450
11451
#ifdef LIBXML_VALID_ENABLED
11452
/************************************************************************
11453
 *                  *
11454
 *    Front ends when parsing a DTD       *
11455
 *                  *
11456
 ************************************************************************/
11457
11458
/**
11459
 * Parse a DTD.
11460
 *
11461
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11462
 * to make external entities work.
11463
 *
11464
 * @since 2.14.0
11465
 *
11466
 * @param ctxt  a parser context
11467
 * @param input  a parser input
11468
 * @param publicId  public ID of the DTD (optional)
11469
 * @param systemId  system ID of the DTD (optional)
11470
 * @returns the resulting xmlDtd or NULL in case of error.
11471
 * `input` will be freed by the function in any case.
11472
 */
11473
xmlDtd *
11474
xmlCtxtParseDtd(xmlParserCtxt *ctxt, xmlParserInput *input,
11475
0
                const xmlChar *publicId, const xmlChar *systemId) {
11476
0
    xmlDtdPtr ret = NULL;
11477
11478
0
    if ((ctxt == NULL) || (input == NULL)) {
11479
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11480
0
        xmlFreeInputStream(input);
11481
0
        return(NULL);
11482
0
    }
11483
11484
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11485
0
        xmlFreeInputStream(input);
11486
0
        return(NULL);
11487
0
    }
11488
11489
0
    if (publicId == NULL)
11490
0
        publicId = BAD_CAST "none";
11491
0
    if (systemId == NULL)
11492
0
        systemId = BAD_CAST "none";
11493
11494
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11495
0
    if (ctxt->myDoc == NULL) {
11496
0
        xmlErrMemory(ctxt);
11497
0
        goto error;
11498
0
    }
11499
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11500
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11501
0
                                       publicId, systemId);
11502
0
    if (ctxt->myDoc->extSubset == NULL) {
11503
0
        xmlErrMemory(ctxt);
11504
0
        xmlFreeDoc(ctxt->myDoc);
11505
0
        goto error;
11506
0
    }
11507
11508
0
    xmlParseExternalSubset(ctxt, publicId, systemId);
11509
11510
0
    if (ctxt->wellFormed) {
11511
0
        ret = ctxt->myDoc->extSubset;
11512
0
        ctxt->myDoc->extSubset = NULL;
11513
0
        if (ret != NULL) {
11514
0
            xmlNodePtr tmp;
11515
11516
0
            ret->doc = NULL;
11517
0
            tmp = ret->children;
11518
0
            while (tmp != NULL) {
11519
0
                tmp->doc = NULL;
11520
0
                tmp = tmp->next;
11521
0
            }
11522
0
        }
11523
0
    } else {
11524
0
        ret = NULL;
11525
0
    }
11526
0
    xmlFreeDoc(ctxt->myDoc);
11527
0
    ctxt->myDoc = NULL;
11528
11529
0
error:
11530
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11531
11532
0
    return(ret);
11533
0
}
11534
11535
/**
11536
 * Load and parse a DTD
11537
 *
11538
 * @deprecated Use #xmlCtxtParseDtd.
11539
 *
11540
 * @param sax  the SAX handler block or NULL
11541
 * @param input  an Input Buffer
11542
 * @param enc  the charset encoding if known
11543
 * @returns the resulting xmlDtd or NULL in case of error.
11544
 * `input` will be freed by the function in any case.
11545
 */
11546
11547
xmlDtd *
11548
xmlIOParseDTD(xmlSAXHandler *sax, xmlParserInputBuffer *input,
11549
0
        xmlCharEncoding enc) {
11550
0
    xmlDtdPtr ret = NULL;
11551
0
    xmlParserCtxtPtr ctxt;
11552
0
    xmlParserInputPtr pinput = NULL;
11553
11554
0
    if (input == NULL)
11555
0
  return(NULL);
11556
11557
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11558
0
    if (ctxt == NULL) {
11559
0
        xmlFreeParserInputBuffer(input);
11560
0
  return(NULL);
11561
0
    }
11562
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11563
11564
    /*
11565
     * generate a parser input from the I/O handler
11566
     */
11567
11568
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11569
0
    if (pinput == NULL) {
11570
0
        xmlFreeParserInputBuffer(input);
11571
0
  xmlFreeParserCtxt(ctxt);
11572
0
  return(NULL);
11573
0
    }
11574
11575
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11576
0
        xmlSwitchEncoding(ctxt, enc);
11577
0
    }
11578
11579
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11580
11581
0
    xmlFreeParserCtxt(ctxt);
11582
0
    return(ret);
11583
0
}
11584
11585
/**
11586
 * Load and parse an external subset.
11587
 *
11588
 * @deprecated Use #xmlCtxtParseDtd.
11589
 *
11590
 * @param sax  the SAX handler block
11591
 * @param publicId  public identifier of the DTD (optional)
11592
 * @param systemId  system identifier (URL) of the DTD
11593
 * @returns the resulting xmlDtd or NULL in case of error.
11594
 */
11595
11596
xmlDtd *
11597
xmlSAXParseDTD(xmlSAXHandler *sax, const xmlChar *publicId,
11598
0
               const xmlChar *systemId) {
11599
0
    xmlDtdPtr ret = NULL;
11600
0
    xmlParserCtxtPtr ctxt;
11601
0
    xmlParserInputPtr input = NULL;
11602
0
    xmlChar* systemIdCanonic;
11603
11604
0
    if ((publicId == NULL) && (systemId == NULL)) return(NULL);
11605
11606
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11607
0
    if (ctxt == NULL) {
11608
0
  return(NULL);
11609
0
    }
11610
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11611
11612
    /*
11613
     * Canonicalise the system ID
11614
     */
11615
0
    systemIdCanonic = xmlCanonicPath(systemId);
11616
0
    if ((systemId != NULL) && (systemIdCanonic == NULL)) {
11617
0
  xmlFreeParserCtxt(ctxt);
11618
0
  return(NULL);
11619
0
    }
11620
11621
    /*
11622
     * Ask the Entity resolver to load the damn thing
11623
     */
11624
11625
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11626
0
  input = ctxt->sax->resolveEntity(ctxt->userData, publicId,
11627
0
                                   systemIdCanonic);
11628
0
    if (input == NULL) {
11629
0
  xmlFreeParserCtxt(ctxt);
11630
0
  if (systemIdCanonic != NULL)
11631
0
      xmlFree(systemIdCanonic);
11632
0
  return(NULL);
11633
0
    }
11634
11635
0
    if (input->filename == NULL)
11636
0
  input->filename = (char *) systemIdCanonic;
11637
0
    else
11638
0
  xmlFree(systemIdCanonic);
11639
11640
0
    ret = xmlCtxtParseDtd(ctxt, input, publicId, systemId);
11641
11642
0
    xmlFreeParserCtxt(ctxt);
11643
0
    return(ret);
11644
0
}
11645
11646
11647
/**
11648
 * Load and parse an external subset.
11649
 *
11650
 * @param publicId  public identifier of the DTD (optional)
11651
 * @param systemId  system identifier (URL) of the DTD
11652
 * @returns the resulting xmlDtd or NULL in case of error.
11653
 */
11654
11655
xmlDtd *
11656
0
xmlParseDTD(const xmlChar *publicId, const xmlChar *systemId) {
11657
0
    return(xmlSAXParseDTD(NULL, publicId, systemId));
11658
0
}
11659
#endif /* LIBXML_VALID_ENABLED */
11660
11661
/************************************************************************
11662
 *                  *
11663
 *    Front ends when parsing an Entity     *
11664
 *                  *
11665
 ************************************************************************/
11666
11667
static xmlNodePtr
11668
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11669
3.89k
                            int hasTextDecl, int buildTree) {
11670
3.89k
    xmlNodePtr root = NULL;
11671
3.89k
    xmlNodePtr list = NULL;
11672
3.89k
    xmlChar *rootName = BAD_CAST "#root";
11673
3.89k
    int result;
11674
11675
3.89k
    if (buildTree) {
11676
3.89k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11677
3.89k
        if (root == NULL) {
11678
0
            xmlErrMemory(ctxt);
11679
0
            goto error;
11680
0
        }
11681
3.89k
    }
11682
11683
3.89k
    if (xmlCtxtPushInput(ctxt, input) < 0)
11684
0
        goto error;
11685
11686
3.89k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11687
3.89k
    spacePush(ctxt, -1);
11688
11689
3.89k
    if (buildTree)
11690
3.89k
        nodePush(ctxt, root);
11691
11692
3.89k
    if (hasTextDecl) {
11693
0
        xmlDetectEncoding(ctxt);
11694
11695
        /*
11696
         * Parse a possible text declaration first
11697
         */
11698
0
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11699
0
            (IS_BLANK_CH(NXT(5)))) {
11700
0
            xmlParseTextDecl(ctxt);
11701
            /*
11702
             * An XML-1.0 document can't reference an entity not XML-1.0
11703
             */
11704
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11705
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11706
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11707
0
                               "Version mismatch between document and "
11708
0
                               "entity\n");
11709
0
            }
11710
0
        }
11711
0
    }
11712
11713
3.89k
    xmlParseContentInternal(ctxt);
11714
11715
3.89k
    if (ctxt->input->cur < ctxt->input->end)
11716
34
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11717
11718
3.89k
    if ((ctxt->wellFormed) ||
11719
3.89k
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
11720
3.75k
        if (root != NULL) {
11721
3.75k
            xmlNodePtr cur;
11722
11723
            /*
11724
             * Unlink newly created node list.
11725
             */
11726
3.75k
            list = root->children;
11727
3.75k
            root->children = NULL;
11728
3.75k
            root->last = NULL;
11729
4.61k
            for (cur = list; cur != NULL; cur = cur->next)
11730
863
                cur->parent = NULL;
11731
3.75k
        }
11732
3.75k
    }
11733
11734
    /*
11735
     * Read the rest of the stream in case of errors. We want
11736
     * to account for the whole entity size.
11737
     */
11738
3.89k
    do {
11739
3.89k
        ctxt->input->cur = ctxt->input->end;
11740
3.89k
        xmlParserShrink(ctxt);
11741
3.89k
        result = xmlParserGrow(ctxt);
11742
3.89k
    } while (result > 0);
11743
11744
3.89k
    if (buildTree)
11745
3.89k
        nodePop(ctxt);
11746
11747
3.89k
    namePop(ctxt);
11748
3.89k
    spacePop(ctxt);
11749
11750
3.89k
    xmlCtxtPopInput(ctxt);
11751
11752
3.89k
error:
11753
3.89k
    xmlFreeNode(root);
11754
11755
3.89k
    return(list);
11756
3.89k
}
11757
11758
static void
11759
3.90k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11760
3.90k
    xmlParserInputPtr input;
11761
3.90k
    xmlNodePtr list;
11762
3.90k
    unsigned long consumed;
11763
3.90k
    int isExternal;
11764
3.90k
    int buildTree;
11765
3.90k
    int oldMinNsIndex;
11766
3.90k
    int oldNodelen, oldNodemem;
11767
11768
3.90k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11769
3.90k
    buildTree = (ctxt->node != NULL);
11770
11771
    /*
11772
     * Recursion check
11773
     */
11774
3.90k
    if (ent->flags & XML_ENT_EXPANDING) {
11775
10
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11776
10
        xmlHaltParser(ctxt);
11777
10
        goto error;
11778
10
    }
11779
11780
    /*
11781
     * Load entity
11782
     */
11783
3.89k
    input = xmlNewEntityInputStream(ctxt, ent);
11784
3.89k
    if (input == NULL)
11785
0
        goto error;
11786
11787
    /*
11788
     * When building a tree, we need to limit the scope of namespace
11789
     * declarations, so that entities don't reference xmlNs structs
11790
     * from the parent of a reference.
11791
     */
11792
3.89k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
11793
3.89k
    if (buildTree)
11794
3.89k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
11795
11796
3.89k
    oldNodelen = ctxt->nodelen;
11797
3.89k
    oldNodemem = ctxt->nodemem;
11798
3.89k
    ctxt->nodelen = 0;
11799
3.89k
    ctxt->nodemem = 0;
11800
11801
    /*
11802
     * Parse content
11803
     *
11804
     * This initiates a recursive call chain:
11805
     *
11806
     * - xmlCtxtParseContentInternal
11807
     * - xmlParseContentInternal
11808
     * - xmlParseReference
11809
     * - xmlCtxtParseEntity
11810
     *
11811
     * The nesting depth is limited by the maximum number of inputs,
11812
     * see xmlCtxtPushInput.
11813
     *
11814
     * It's possible to make this non-recursive (minNsIndex must be
11815
     * stored in the input struct) at the expense of code readability.
11816
     */
11817
11818
3.89k
    ent->flags |= XML_ENT_EXPANDING;
11819
11820
3.89k
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
11821
11822
3.89k
    ent->flags &= ~XML_ENT_EXPANDING;
11823
11824
3.89k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
11825
3.89k
    ctxt->nodelen = oldNodelen;
11826
3.89k
    ctxt->nodemem = oldNodemem;
11827
11828
    /*
11829
     * Entity size accounting
11830
     */
11831
3.89k
    consumed = input->consumed;
11832
3.89k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
11833
11834
3.89k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
11835
535
        xmlSaturatedAdd(&ent->expandedSize, consumed);
11836
11837
3.89k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
11838
535
        if (isExternal)
11839
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
11840
11841
535
        ent->children = list;
11842
11843
1.39k
        while (list != NULL) {
11844
863
            list->parent = (xmlNodePtr) ent;
11845
11846
            /*
11847
             * Downstream code like the nginx xslt module can set
11848
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
11849
             * might have a different or a NULL document.
11850
             */
11851
863
            if (list->doc != ent->doc)
11852
0
                xmlSetTreeDoc(list, ent->doc);
11853
11854
863
            if (list->next == NULL)
11855
360
                ent->last = list;
11856
863
            list = list->next;
11857
863
        }
11858
3.35k
    } else {
11859
3.35k
        xmlFreeNodeList(list);
11860
3.35k
    }
11861
11862
3.89k
    xmlFreeInputStream(input);
11863
11864
3.90k
error:
11865
3.90k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
11866
3.90k
}
11867
11868
/**
11869
 * Parse an external general entity within an existing parsing context
11870
 * An external general parsed entity is well-formed if it matches the
11871
 * production labeled extParsedEnt.
11872
 *
11873
 *     [78] extParsedEnt ::= TextDecl? content
11874
 *
11875
 * @param ctxt  the existing parsing context
11876
 * @param URL  the URL for the entity to load
11877
 * @param ID  the System ID for the entity to load
11878
 * @param listOut  the return value for the set of parsed nodes
11879
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11880
 *    the parser error code otherwise
11881
 */
11882
11883
int
11884
xmlParseCtxtExternalEntity(xmlParserCtxt *ctxt, const xmlChar *URL,
11885
0
                           const xmlChar *ID, xmlNode **listOut) {
11886
0
    xmlParserInputPtr input;
11887
0
    xmlNodePtr list;
11888
11889
0
    if (listOut != NULL)
11890
0
        *listOut = NULL;
11891
11892
0
    if (ctxt == NULL)
11893
0
        return(XML_ERR_ARGUMENT);
11894
11895
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
11896
0
                            XML_RESOURCE_GENERAL_ENTITY);
11897
0
    if (input == NULL)
11898
0
        return(ctxt->errNo);
11899
11900
0
    xmlCtxtInitializeLate(ctxt);
11901
11902
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
11903
0
    if (listOut != NULL)
11904
0
        *listOut = list;
11905
0
    else
11906
0
        xmlFreeNodeList(list);
11907
11908
0
    xmlFreeInputStream(input);
11909
0
    return(ctxt->errNo);
11910
0
}
11911
11912
#ifdef LIBXML_SAX1_ENABLED
11913
/**
11914
 * Parse an external general entity
11915
 * An external general parsed entity is well-formed if it matches the
11916
 * production labeled extParsedEnt.
11917
 *
11918
 * @deprecated Use #xmlParseCtxtExternalEntity.
11919
 *
11920
 *     [78] extParsedEnt ::= TextDecl? content
11921
 *
11922
 * @param doc  the document the chunk pertains to
11923
 * @param sax  the SAX handler block (possibly NULL)
11924
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11925
 * @param depth  Used for loop detection, use 0
11926
 * @param URL  the URL for the entity to load
11927
 * @param ID  the System ID for the entity to load
11928
 * @param list  the return value for the set of parsed nodes
11929
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11930
 *    the parser error code otherwise
11931
 */
11932
11933
int
11934
xmlParseExternalEntity(xmlDoc *doc, xmlSAXHandler *sax, void *user_data,
11935
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNode **list) {
11936
0
    xmlParserCtxtPtr ctxt;
11937
0
    int ret;
11938
11939
0
    if (list != NULL)
11940
0
        *list = NULL;
11941
11942
0
    if (doc == NULL)
11943
0
        return(XML_ERR_ARGUMENT);
11944
11945
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11946
0
    if (ctxt == NULL)
11947
0
        return(XML_ERR_NO_MEMORY);
11948
11949
0
    ctxt->depth = depth;
11950
0
    ctxt->myDoc = doc;
11951
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
11952
11953
0
    xmlFreeParserCtxt(ctxt);
11954
0
    return(ret);
11955
0
}
11956
11957
/**
11958
 * Parse a well-balanced chunk of an XML document
11959
 * called by the parser
11960
 * The allowed sequence for the Well Balanced Chunk is the one defined by
11961
 * the content production in the XML grammar:
11962
 *
11963
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
11964
 *                       Comment)*
11965
 *
11966
 * @param doc  the document the chunk pertains to (must not be NULL)
11967
 * @param sax  the SAX handler block (possibly NULL)
11968
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11969
 * @param depth  Used for loop detection, use 0
11970
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
11971
 * @param lst  the return value for the set of parsed nodes
11972
 * @returns 0 if the chunk is well balanced, -1 in case of args problem and
11973
 *    the parser error code otherwise
11974
 */
11975
11976
int
11977
xmlParseBalancedChunkMemory(xmlDoc *doc, xmlSAXHandler *sax,
11978
0
     void *user_data, int depth, const xmlChar *string, xmlNode **lst) {
11979
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11980
0
                                                depth, string, lst, 0 );
11981
0
}
11982
#endif /* LIBXML_SAX1_ENABLED */
11983
11984
/**
11985
 * Parse a well-balanced chunk of XML matching the 'content' production.
11986
 *
11987
 * Namespaces in scope of `node` and entities of `node`'s document are
11988
 * recognized. When validating, the DTD of `node`'s document is used.
11989
 *
11990
 * Always consumes `input` even in error case.
11991
 *
11992
 * @since 2.14.0
11993
 *
11994
 * @param ctxt  parser context
11995
 * @param input  parser input
11996
 * @param node  target node or document
11997
 * @param hasTextDecl  whether to parse text declaration
11998
 * @returns a node list or NULL in case of error.
11999
 */
12000
xmlNode *
12001
xmlCtxtParseContent(xmlParserCtxt *ctxt, xmlParserInput *input,
12002
0
                    xmlNode *node, int hasTextDecl) {
12003
0
    xmlDocPtr doc;
12004
0
    xmlNodePtr cur, list = NULL;
12005
0
    int nsnr = 0;
12006
0
    xmlDictPtr oldDict;
12007
0
    int oldOptions, oldDictNames, oldLoadSubset;
12008
12009
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12010
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12011
0
        goto exit;
12012
0
    }
12013
12014
0
    doc = node->doc;
12015
0
    if (doc == NULL) {
12016
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12017
0
        goto exit;
12018
0
    }
12019
12020
0
    switch (node->type) {
12021
0
        case XML_ELEMENT_NODE:
12022
0
        case XML_DOCUMENT_NODE:
12023
0
        case XML_HTML_DOCUMENT_NODE:
12024
0
            break;
12025
12026
0
        case XML_ATTRIBUTE_NODE:
12027
0
        case XML_TEXT_NODE:
12028
0
        case XML_CDATA_SECTION_NODE:
12029
0
        case XML_ENTITY_REF_NODE:
12030
0
        case XML_PI_NODE:
12031
0
        case XML_COMMENT_NODE:
12032
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12033
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12034
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12035
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12036
0
                    node = cur;
12037
0
                    break;
12038
0
                }
12039
0
            }
12040
0
            break;
12041
12042
0
        default:
12043
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12044
0
            goto exit;
12045
0
    }
12046
12047
0
    xmlCtxtReset(ctxt);
12048
12049
0
    oldDict = ctxt->dict;
12050
0
    oldOptions = ctxt->options;
12051
0
    oldDictNames = ctxt->dictNames;
12052
0
    oldLoadSubset = ctxt->loadsubset;
12053
12054
    /*
12055
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12056
     */
12057
0
    if (doc->dict != NULL) {
12058
0
        ctxt->dict = doc->dict;
12059
0
    } else {
12060
0
        ctxt->options |= XML_PARSE_NODICT;
12061
0
        ctxt->dictNames = 0;
12062
0
    }
12063
12064
    /*
12065
     * Disable IDs
12066
     */
12067
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12068
12069
0
    ctxt->myDoc = doc;
12070
12071
0
#ifdef LIBXML_HTML_ENABLED
12072
0
    if (ctxt->html) {
12073
        /*
12074
         * When parsing in context, it makes no sense to add implied
12075
         * elements like html/body/etc...
12076
         */
12077
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12078
12079
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12080
0
    } else
12081
0
#endif
12082
0
    {
12083
0
        xmlCtxtInitializeLate(ctxt);
12084
12085
        /*
12086
         * initialize the SAX2 namespaces stack
12087
         */
12088
0
        cur = node;
12089
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12090
0
            xmlNsPtr ns = cur->nsDef;
12091
0
            xmlHashedString hprefix, huri;
12092
12093
0
            while (ns != NULL) {
12094
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12095
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12096
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12097
0
                    nsnr++;
12098
0
                ns = ns->next;
12099
0
            }
12100
0
            cur = cur->parent;
12101
0
        }
12102
12103
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12104
12105
0
        if (nsnr > 0)
12106
0
            xmlParserNsPop(ctxt, nsnr);
12107
0
    }
12108
12109
0
    ctxt->dict = oldDict;
12110
0
    ctxt->options = oldOptions;
12111
0
    ctxt->dictNames = oldDictNames;
12112
0
    ctxt->loadsubset = oldLoadSubset;
12113
0
    ctxt->myDoc = NULL;
12114
0
    ctxt->node = NULL;
12115
12116
0
exit:
12117
0
    xmlFreeInputStream(input);
12118
0
    return(list);
12119
0
}
12120
12121
/**
12122
 * Parse a well-balanced chunk of an XML document
12123
 * within the context (DTD, namespaces, etc ...) of the given node.
12124
 *
12125
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12126
 * the content production in the XML grammar:
12127
 *
12128
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12129
 *                       Comment)*
12130
 *
12131
 * This function assumes the encoding of `node`'s document which is
12132
 * typically not what you want. A better alternative is
12133
 * #xmlCtxtParseContent.
12134
 *
12135
 * @param node  the context node
12136
 * @param data  the input string
12137
 * @param datalen  the input string length in bytes
12138
 * @param options  a combination of xmlParserOption
12139
 * @param listOut  the return value for the set of parsed nodes
12140
 * @returns XML_ERR_OK if the chunk is well balanced, and the parser
12141
 * error code otherwise
12142
 */
12143
xmlParserErrors
12144
xmlParseInNodeContext(xmlNode *node, const char *data, int datalen,
12145
0
                      int options, xmlNode **listOut) {
12146
0
    xmlParserCtxtPtr ctxt;
12147
0
    xmlParserInputPtr input;
12148
0
    xmlDocPtr doc;
12149
0
    xmlNodePtr list;
12150
0
    xmlParserErrors ret;
12151
12152
0
    if (listOut == NULL)
12153
0
        return(XML_ERR_INTERNAL_ERROR);
12154
0
    *listOut = NULL;
12155
12156
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12157
0
        return(XML_ERR_INTERNAL_ERROR);
12158
12159
0
    doc = node->doc;
12160
0
    if (doc == NULL)
12161
0
        return(XML_ERR_INTERNAL_ERROR);
12162
12163
0
#ifdef LIBXML_HTML_ENABLED
12164
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12165
0
        ctxt = htmlNewParserCtxt();
12166
0
    }
12167
0
    else
12168
0
#endif
12169
0
        ctxt = xmlNewParserCtxt();
12170
12171
0
    if (ctxt == NULL)
12172
0
        return(XML_ERR_NO_MEMORY);
12173
12174
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12175
0
                                      (const char *) doc->encoding,
12176
0
                                      XML_INPUT_BUF_STATIC);
12177
0
    if (input == NULL) {
12178
0
        xmlFreeParserCtxt(ctxt);
12179
0
        return(XML_ERR_NO_MEMORY);
12180
0
    }
12181
12182
0
    xmlCtxtUseOptions(ctxt, options);
12183
12184
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12185
12186
0
    if (list == NULL) {
12187
0
        ret = ctxt->errNo;
12188
0
        if (ret == XML_ERR_ARGUMENT)
12189
0
            ret = XML_ERR_INTERNAL_ERROR;
12190
0
    } else {
12191
0
        ret = XML_ERR_OK;
12192
0
        *listOut = list;
12193
0
    }
12194
12195
0
    xmlFreeParserCtxt(ctxt);
12196
12197
0
    return(ret);
12198
0
}
12199
12200
#ifdef LIBXML_SAX1_ENABLED
12201
/**
12202
 * Parse a well-balanced chunk of an XML document
12203
 *
12204
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12205
 * the content production in the XML grammar:
12206
 *
12207
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12208
 *                       Comment)*
12209
 *
12210
 * In case recover is set to 1, the nodelist will not be empty even if
12211
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12212
 * some extent.
12213
 *
12214
 * @param doc  the document the chunk pertains to (must not be NULL)
12215
 * @param sax  the SAX handler block (possibly NULL)
12216
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
12217
 * @param depth  Used for loop detection, use 0
12218
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
12219
 * @param listOut  the return value for the set of parsed nodes
12220
 * @param recover  return nodes even if the data is broken (use 0)
12221
 * @returns 0 if the chunk is well balanced, or thehe parser error code
12222
 * otherwise.
12223
 */
12224
int
12225
xmlParseBalancedChunkMemoryRecover(xmlDoc *doc, xmlSAXHandler *sax,
12226
     void *user_data, int depth, const xmlChar *string, xmlNode **listOut,
12227
0
     int recover) {
12228
0
    xmlParserCtxtPtr ctxt;
12229
0
    xmlParserInputPtr input;
12230
0
    xmlNodePtr list;
12231
0
    int ret;
12232
12233
0
    if (listOut != NULL)
12234
0
        *listOut = NULL;
12235
12236
0
    if (string == NULL)
12237
0
        return(XML_ERR_ARGUMENT);
12238
12239
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12240
0
    if (ctxt == NULL)
12241
0
        return(XML_ERR_NO_MEMORY);
12242
12243
0
    xmlCtxtInitializeLate(ctxt);
12244
12245
0
    ctxt->depth = depth;
12246
0
    ctxt->myDoc = doc;
12247
0
    if (recover) {
12248
0
        ctxt->options |= XML_PARSE_RECOVER;
12249
0
        ctxt->recovery = 1;
12250
0
    }
12251
12252
0
    input = xmlNewStringInputStream(ctxt, string);
12253
0
    if (input == NULL) {
12254
0
        ret = ctxt->errNo;
12255
0
        goto error;
12256
0
    }
12257
12258
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12259
0
    if (listOut != NULL)
12260
0
        *listOut = list;
12261
0
    else
12262
0
        xmlFreeNodeList(list);
12263
12264
0
    if (!ctxt->wellFormed)
12265
0
        ret = ctxt->errNo;
12266
0
    else
12267
0
        ret = XML_ERR_OK;
12268
12269
0
error:
12270
0
    xmlFreeInputStream(input);
12271
0
    xmlFreeParserCtxt(ctxt);
12272
0
    return(ret);
12273
0
}
12274
12275
/**
12276
 * parse an XML external entity out of context and build a tree.
12277
 * It use the given SAX function block to handle the parsing callback.
12278
 * If sax is NULL, fallback to the default DOM tree building routines.
12279
 *
12280
 * @deprecated Don't use.
12281
 *
12282
 *     [78] extParsedEnt ::= TextDecl? content
12283
 *
12284
 * This correspond to a "Well Balanced" chunk
12285
 *
12286
 * @param sax  the SAX handler block
12287
 * @param filename  the filename
12288
 * @returns the resulting document tree
12289
 */
12290
12291
xmlDoc *
12292
0
xmlSAXParseEntity(xmlSAXHandler *sax, const char *filename) {
12293
0
    xmlDocPtr ret;
12294
0
    xmlParserCtxtPtr ctxt;
12295
12296
0
    ctxt = xmlCreateFileParserCtxt(filename);
12297
0
    if (ctxt == NULL) {
12298
0
  return(NULL);
12299
0
    }
12300
0
    if (sax != NULL) {
12301
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12302
0
            *ctxt->sax = *sax;
12303
0
        } else {
12304
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12305
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12306
0
        }
12307
0
        ctxt->userData = NULL;
12308
0
    }
12309
12310
0
    xmlParseExtParsedEnt(ctxt);
12311
12312
0
    if (ctxt->wellFormed) {
12313
0
  ret = ctxt->myDoc;
12314
0
    } else {
12315
0
        ret = NULL;
12316
0
        xmlFreeDoc(ctxt->myDoc);
12317
0
    }
12318
12319
0
    xmlFreeParserCtxt(ctxt);
12320
12321
0
    return(ret);
12322
0
}
12323
12324
/**
12325
 * parse an XML external entity out of context and build a tree.
12326
 *
12327
 *     [78] extParsedEnt ::= TextDecl? content
12328
 *
12329
 * This correspond to a "Well Balanced" chunk
12330
 *
12331
 * @param filename  the filename
12332
 * @returns the resulting document tree
12333
 */
12334
12335
xmlDoc *
12336
0
xmlParseEntity(const char *filename) {
12337
0
    return(xmlSAXParseEntity(NULL, filename));
12338
0
}
12339
#endif /* LIBXML_SAX1_ENABLED */
12340
12341
/**
12342
 * Create a parser context for an external entity
12343
 * Automatic support for ZLIB/Compress compressed document is provided
12344
 * by default if found at compile-time.
12345
 *
12346
 * @deprecated Don't use.
12347
 *
12348
 * @param URL  the entity URL
12349
 * @param ID  the entity PUBLIC ID
12350
 * @param base  a possible base for the target URI
12351
 * @returns the new parser context or NULL
12352
 */
12353
xmlParserCtxt *
12354
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12355
0
                    const xmlChar *base) {
12356
0
    xmlParserCtxtPtr ctxt;
12357
0
    xmlParserInputPtr input;
12358
0
    xmlChar *uri = NULL;
12359
12360
0
    ctxt = xmlNewParserCtxt();
12361
0
    if (ctxt == NULL)
12362
0
  return(NULL);
12363
12364
0
    if (base != NULL) {
12365
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12366
0
            goto error;
12367
0
        if (uri != NULL)
12368
0
            URL = uri;
12369
0
    }
12370
12371
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12372
0
                            XML_RESOURCE_UNKNOWN);
12373
0
    if (input == NULL)
12374
0
        goto error;
12375
12376
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12377
0
        xmlFreeInputStream(input);
12378
0
        goto error;
12379
0
    }
12380
12381
0
    xmlFree(uri);
12382
0
    return(ctxt);
12383
12384
0
error:
12385
0
    xmlFree(uri);
12386
0
    xmlFreeParserCtxt(ctxt);
12387
0
    return(NULL);
12388
0
}
12389
12390
/************************************************************************
12391
 *                  *
12392
 *    Front ends when parsing from a file     *
12393
 *                  *
12394
 ************************************************************************/
12395
12396
/**
12397
 * Create a parser context for a file or URL content.
12398
 * Automatic support for ZLIB/Compress compressed document is provided
12399
 * by default if found at compile-time and for file accesses
12400
 *
12401
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12402
 *
12403
 * @param filename  the filename or URL
12404
 * @param options  a combination of xmlParserOption
12405
 * @returns the new parser context or NULL
12406
 */
12407
xmlParserCtxt *
12408
xmlCreateURLParserCtxt(const char *filename, int options)
12409
0
{
12410
0
    xmlParserCtxtPtr ctxt;
12411
0
    xmlParserInputPtr input;
12412
12413
0
    ctxt = xmlNewParserCtxt();
12414
0
    if (ctxt == NULL)
12415
0
  return(NULL);
12416
12417
0
    options |= XML_PARSE_UNZIP;
12418
12419
0
    xmlCtxtUseOptions(ctxt, options);
12420
12421
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12422
0
    if (input == NULL) {
12423
0
  xmlFreeParserCtxt(ctxt);
12424
0
  return(NULL);
12425
0
    }
12426
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12427
0
        xmlFreeInputStream(input);
12428
0
        xmlFreeParserCtxt(ctxt);
12429
0
        return(NULL);
12430
0
    }
12431
12432
0
    return(ctxt);
12433
0
}
12434
12435
/**
12436
 * Create a parser context for a file content.
12437
 * Automatic support for ZLIB/Compress compressed document is provided
12438
 * by default if found at compile-time.
12439
 *
12440
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12441
 *
12442
 * @param filename  the filename
12443
 * @returns the new parser context or NULL
12444
 */
12445
xmlParserCtxt *
12446
xmlCreateFileParserCtxt(const char *filename)
12447
0
{
12448
0
    return(xmlCreateURLParserCtxt(filename, 0));
12449
0
}
12450
12451
#ifdef LIBXML_SAX1_ENABLED
12452
/**
12453
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12454
 * compressed document is provided by default if found at compile-time.
12455
 * It use the given SAX function block to handle the parsing callback.
12456
 * If sax is NULL, fallback to the default DOM tree building routines.
12457
 *
12458
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12459
 *
12460
 * User data (void *) is stored within the parser context in the
12461
 * context's _private member, so it is available nearly everywhere in libxml
12462
 *
12463
 * @param sax  the SAX handler block
12464
 * @param filename  the filename
12465
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12466
 *             documents
12467
 * @param data  the userdata
12468
 * @returns the resulting document tree
12469
 */
12470
12471
xmlDoc *
12472
xmlSAXParseFileWithData(xmlSAXHandler *sax, const char *filename,
12473
0
                        int recovery, void *data) {
12474
0
    xmlDocPtr ret = NULL;
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr input;
12477
12478
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12479
0
    if (ctxt == NULL)
12480
0
  return(NULL);
12481
12482
0
    if (data != NULL)
12483
0
  ctxt->_private = data;
12484
12485
0
    if (recovery) {
12486
0
        ctxt->options |= XML_PARSE_RECOVER;
12487
0
        ctxt->recovery = 1;
12488
0
    }
12489
12490
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12491
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12492
0
    else
12493
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12494
12495
0
    if (input != NULL)
12496
0
        ret = xmlCtxtParseDocument(ctxt, input);
12497
12498
0
    xmlFreeParserCtxt(ctxt);
12499
0
    return(ret);
12500
0
}
12501
12502
/**
12503
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12504
 * compressed document is provided by default if found at compile-time.
12505
 * It use the given SAX function block to handle the parsing callback.
12506
 * If sax is NULL, fallback to the default DOM tree building routines.
12507
 *
12508
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12509
 *
12510
 * @param sax  the SAX handler block
12511
 * @param filename  the filename
12512
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12513
 *             documents
12514
 * @returns the resulting document tree
12515
 */
12516
12517
xmlDoc *
12518
xmlSAXParseFile(xmlSAXHandler *sax, const char *filename,
12519
0
                          int recovery) {
12520
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12521
0
}
12522
12523
/**
12524
 * parse an XML in-memory document and build a tree.
12525
 * In the case the document is not Well Formed, a attempt to build a
12526
 * tree is tried anyway
12527
 *
12528
 * @deprecated Use #xmlReadDoc with XML_PARSE_RECOVER.
12529
 *
12530
 * @param cur  a pointer to an array of xmlChar
12531
 * @returns the resulting document tree or NULL in case of failure
12532
 */
12533
12534
xmlDoc *
12535
0
xmlRecoverDoc(const xmlChar *cur) {
12536
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12537
0
}
12538
12539
/**
12540
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12541
 * compressed document is provided by default if found at compile-time.
12542
 *
12543
 * @deprecated Use #xmlReadFile.
12544
 *
12545
 * @param filename  the filename
12546
 * @returns the resulting document tree if the file was wellformed,
12547
 * NULL otherwise.
12548
 */
12549
12550
xmlDoc *
12551
0
xmlParseFile(const char *filename) {
12552
0
    return(xmlSAXParseFile(NULL, filename, 0));
12553
0
}
12554
12555
/**
12556
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12557
 * compressed document is provided by default if found at compile-time.
12558
 * In the case the document is not Well Formed, it attempts to build
12559
 * a tree anyway
12560
 *
12561
 * @deprecated Use #xmlReadFile with XML_PARSE_RECOVER.
12562
 *
12563
 * @param filename  the filename
12564
 * @returns the resulting document tree or NULL in case of failure
12565
 */
12566
12567
xmlDoc *
12568
0
xmlRecoverFile(const char *filename) {
12569
0
    return(xmlSAXParseFile(NULL, filename, 1));
12570
0
}
12571
12572
12573
/**
12574
 * Setup the parser context to parse a new buffer; Clears any prior
12575
 * contents from the parser context. The buffer parameter must not be
12576
 * NULL, but the filename parameter can be
12577
 *
12578
 * @deprecated Don't use.
12579
 *
12580
 * @param ctxt  an XML parser context
12581
 * @param buffer  a xmlChar * buffer
12582
 * @param filename  a file name
12583
 */
12584
void
12585
xmlSetupParserForBuffer(xmlParserCtxt *ctxt, const xmlChar* buffer,
12586
                             const char* filename)
12587
0
{
12588
0
    xmlParserInputPtr input;
12589
12590
0
    if ((ctxt == NULL) || (buffer == NULL))
12591
0
        return;
12592
12593
0
    xmlCtxtReset(ctxt);
12594
12595
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12596
0
                                      NULL, 0);
12597
0
    if (input == NULL)
12598
0
        return;
12599
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12600
0
        xmlFreeInputStream(input);
12601
0
}
12602
12603
/**
12604
 * parse an XML file and call the given SAX handler routines.
12605
 * Automatic support for ZLIB/Compress compressed document is provided
12606
 *
12607
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12608
 *
12609
 * @param sax  a SAX handler
12610
 * @param user_data  The user data returned on SAX callbacks
12611
 * @param filename  a file name
12612
 * @returns 0 in case of success or a error number otherwise
12613
 */
12614
int
12615
xmlSAXUserParseFile(xmlSAXHandler *sax, void *user_data,
12616
0
                    const char *filename) {
12617
0
    int ret = 0;
12618
0
    xmlParserCtxtPtr ctxt;
12619
12620
0
    ctxt = xmlCreateFileParserCtxt(filename);
12621
0
    if (ctxt == NULL) return -1;
12622
0
    if (sax != NULL) {
12623
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12624
0
            *ctxt->sax = *sax;
12625
0
        } else {
12626
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12627
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12628
0
        }
12629
0
  ctxt->userData = user_data;
12630
0
    }
12631
12632
0
    xmlParseDocument(ctxt);
12633
12634
0
    if (ctxt->wellFormed)
12635
0
  ret = 0;
12636
0
    else {
12637
0
        if (ctxt->errNo != 0)
12638
0
      ret = ctxt->errNo;
12639
0
  else
12640
0
      ret = -1;
12641
0
    }
12642
0
    if (ctxt->myDoc != NULL) {
12643
0
        xmlFreeDoc(ctxt->myDoc);
12644
0
  ctxt->myDoc = NULL;
12645
0
    }
12646
0
    xmlFreeParserCtxt(ctxt);
12647
12648
0
    return ret;
12649
0
}
12650
#endif /* LIBXML_SAX1_ENABLED */
12651
12652
/************************************************************************
12653
 *                  *
12654
 *    Front ends when parsing from memory     *
12655
 *                  *
12656
 ************************************************************************/
12657
12658
/**
12659
 * Create a parser context for an XML in-memory document. The input buffer
12660
 * must not contain a terminating null byte.
12661
 *
12662
 * @param buffer  a pointer to a char array
12663
 * @param size  the size of the array
12664
 * @returns the new parser context or NULL
12665
 */
12666
xmlParserCtxt *
12667
15.1k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12668
15.1k
    xmlParserCtxtPtr ctxt;
12669
15.1k
    xmlParserInputPtr input;
12670
12671
15.1k
    if (size < 0)
12672
0
  return(NULL);
12673
12674
15.1k
    ctxt = xmlNewParserCtxt();
12675
15.1k
    if (ctxt == NULL)
12676
0
  return(NULL);
12677
12678
15.1k
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12679
15.1k
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  return(NULL);
12682
0
    }
12683
15.1k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12684
0
        xmlFreeInputStream(input);
12685
0
        xmlFreeParserCtxt(ctxt);
12686
0
        return(NULL);
12687
0
    }
12688
12689
15.1k
    return(ctxt);
12690
15.1k
}
12691
12692
#ifdef LIBXML_SAX1_ENABLED
12693
/**
12694
 * parse an XML in-memory block and use the given SAX function block
12695
 * to handle the parsing callback. If sax is NULL, fallback to the default
12696
 * DOM tree building routines.
12697
 *
12698
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12699
 *
12700
 * User data (void *) is stored within the parser context in the
12701
 * context's _private member, so it is available nearly everywhere in libxml
12702
 *
12703
 * @param sax  the SAX handler block
12704
 * @param buffer  an pointer to a char array
12705
 * @param size  the size of the array
12706
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12707
 *             documents
12708
 * @param data  the userdata
12709
 * @returns the resulting document tree
12710
 */
12711
12712
xmlDoc *
12713
xmlSAXParseMemoryWithData(xmlSAXHandler *sax, const char *buffer,
12714
0
                          int size, int recovery, void *data) {
12715
0
    xmlDocPtr ret = NULL;
12716
0
    xmlParserCtxtPtr ctxt;
12717
0
    xmlParserInputPtr input;
12718
12719
0
    if (size < 0)
12720
0
        return(NULL);
12721
12722
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12723
0
    if (ctxt == NULL)
12724
0
        return(NULL);
12725
12726
0
    if (data != NULL)
12727
0
  ctxt->_private=data;
12728
12729
0
    if (recovery) {
12730
0
        ctxt->options |= XML_PARSE_RECOVER;
12731
0
        ctxt->recovery = 1;
12732
0
    }
12733
12734
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12735
0
                                      XML_INPUT_BUF_STATIC);
12736
12737
0
    if (input != NULL)
12738
0
        ret = xmlCtxtParseDocument(ctxt, input);
12739
12740
0
    xmlFreeParserCtxt(ctxt);
12741
0
    return(ret);
12742
0
}
12743
12744
/**
12745
 * parse an XML in-memory block and use the given SAX function block
12746
 * to handle the parsing callback. If sax is NULL, fallback to the default
12747
 * DOM tree building routines.
12748
 *
12749
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12750
 *
12751
 * @param sax  the SAX handler block
12752
 * @param buffer  an pointer to a char array
12753
 * @param size  the size of the array
12754
 * @param recovery  work in recovery mode, i.e. tries to read not Well Formed
12755
 *             documents
12756
 * @returns the resulting document tree
12757
 */
12758
xmlDoc *
12759
xmlSAXParseMemory(xmlSAXHandler *sax, const char *buffer,
12760
0
            int size, int recovery) {
12761
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12762
0
}
12763
12764
/**
12765
 * parse an XML in-memory block and build a tree.
12766
 *
12767
 * @deprecated Use #xmlReadMemory.
12768
 *
12769
 * @param buffer  an pointer to a char array
12770
 * @param size  the size of the array
12771
 * @returns the resulting document tree
12772
 */
12773
12774
0
xmlDoc *xmlParseMemory(const char *buffer, int size) {
12775
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
12776
0
}
12777
12778
/**
12779
 * parse an XML in-memory block and build a tree.
12780
 * In the case the document is not Well Formed, an attempt to
12781
 * build a tree is tried anyway
12782
 *
12783
 * @deprecated Use #xmlReadMemory with XML_PARSE_RECOVER.
12784
 *
12785
 * @param buffer  an pointer to a char array
12786
 * @param size  the size of the array
12787
 * @returns the resulting document tree or NULL in case of error
12788
 */
12789
12790
0
xmlDoc *xmlRecoverMemory(const char *buffer, int size) {
12791
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
12792
0
}
12793
12794
/**
12795
 * parse an XML in-memory buffer and call the given SAX handler routines.
12796
 *
12797
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12798
 *
12799
 * @param sax  a SAX handler
12800
 * @param user_data  The user data returned on SAX callbacks
12801
 * @param buffer  an in-memory XML document input
12802
 * @param size  the length of the XML document in bytes
12803
 * @returns 0 in case of success or a error number otherwise
12804
 */
12805
int xmlSAXUserParseMemory(xmlSAXHandler *sax, void *user_data,
12806
0
        const char *buffer, int size) {
12807
0
    int ret = 0;
12808
0
    xmlParserCtxtPtr ctxt;
12809
12810
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12811
0
    if (ctxt == NULL) return -1;
12812
0
    if (sax != NULL) {
12813
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12814
0
            *ctxt->sax = *sax;
12815
0
        } else {
12816
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12817
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12818
0
        }
12819
0
  ctxt->userData = user_data;
12820
0
    }
12821
12822
0
    xmlParseDocument(ctxt);
12823
12824
0
    if (ctxt->wellFormed)
12825
0
  ret = 0;
12826
0
    else {
12827
0
        if (ctxt->errNo != 0)
12828
0
      ret = ctxt->errNo;
12829
0
  else
12830
0
      ret = -1;
12831
0
    }
12832
0
    if (ctxt->myDoc != NULL) {
12833
0
        xmlFreeDoc(ctxt->myDoc);
12834
0
  ctxt->myDoc = NULL;
12835
0
    }
12836
0
    xmlFreeParserCtxt(ctxt);
12837
12838
0
    return ret;
12839
0
}
12840
#endif /* LIBXML_SAX1_ENABLED */
12841
12842
/**
12843
 * Creates a parser context for an XML in-memory document.
12844
 *
12845
 * @param str  a pointer to an array of xmlChar
12846
 * @returns the new parser context or NULL
12847
 */
12848
xmlParserCtxt *
12849
0
xmlCreateDocParserCtxt(const xmlChar *str) {
12850
0
    xmlParserCtxtPtr ctxt;
12851
0
    xmlParserInputPtr input;
12852
12853
0
    ctxt = xmlNewParserCtxt();
12854
0
    if (ctxt == NULL)
12855
0
  return(NULL);
12856
12857
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
12858
0
    if (input == NULL) {
12859
0
  xmlFreeParserCtxt(ctxt);
12860
0
  return(NULL);
12861
0
    }
12862
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12863
0
        xmlFreeInputStream(input);
12864
0
        xmlFreeParserCtxt(ctxt);
12865
0
        return(NULL);
12866
0
    }
12867
12868
0
    return(ctxt);
12869
0
}
12870
12871
#ifdef LIBXML_SAX1_ENABLED
12872
/**
12873
 * parse an XML in-memory document and build a tree.
12874
 * It use the given SAX function block to handle the parsing callback.
12875
 * If sax is NULL, fallback to the default DOM tree building routines.
12876
 *
12877
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadDoc.
12878
 *
12879
 * @param sax  the SAX handler block
12880
 * @param cur  a pointer to an array of xmlChar
12881
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12882
 *             documents
12883
 * @returns the resulting document tree
12884
 */
12885
12886
xmlDoc *
12887
0
xmlSAXParseDoc(xmlSAXHandler *sax, const xmlChar *cur, int recovery) {
12888
0
    xmlDocPtr ret;
12889
0
    xmlParserCtxtPtr ctxt;
12890
0
    xmlSAXHandlerPtr oldsax = NULL;
12891
12892
0
    if (cur == NULL) return(NULL);
12893
12894
12895
0
    ctxt = xmlCreateDocParserCtxt(cur);
12896
0
    if (ctxt == NULL) return(NULL);
12897
0
    if (sax != NULL) {
12898
0
        oldsax = ctxt->sax;
12899
0
        ctxt->sax = sax;
12900
0
        ctxt->userData = NULL;
12901
0
    }
12902
12903
0
    xmlParseDocument(ctxt);
12904
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12905
0
    else {
12906
0
       ret = NULL;
12907
0
       xmlFreeDoc(ctxt->myDoc);
12908
0
       ctxt->myDoc = NULL;
12909
0
    }
12910
0
    if (sax != NULL)
12911
0
  ctxt->sax = oldsax;
12912
0
    xmlFreeParserCtxt(ctxt);
12913
12914
0
    return(ret);
12915
0
}
12916
12917
/**
12918
 * parse an XML in-memory document and build a tree.
12919
 *
12920
 * @deprecated Use #xmlReadDoc.
12921
 *
12922
 * @param cur  a pointer to an array of xmlChar
12923
 * @returns the resulting document tree
12924
 */
12925
12926
xmlDoc *
12927
0
xmlParseDoc(const xmlChar *cur) {
12928
0
    return(xmlSAXParseDoc(NULL, cur, 0));
12929
0
}
12930
#endif /* LIBXML_SAX1_ENABLED */
12931
12932
/************************************************************************
12933
 *                  *
12934
 *  New set (2.6.0) of simpler and more flexible APIs   *
12935
 *                  *
12936
 ************************************************************************/
12937
12938
/**
12939
 * Free a string if it is not owned by the "dict" dictionary in the
12940
 * current scope
12941
 *
12942
 * @param str  a string
12943
 */
12944
#define DICT_FREE(str)            \
12945
0
  if ((str) && ((!dict) ||       \
12946
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
12947
0
      xmlFree((char *)(str));
12948
12949
/**
12950
 * Reset a parser context
12951
 *
12952
 * @param ctxt  an XML parser context
12953
 */
12954
void
12955
xmlCtxtReset(xmlParserCtxt *ctxt)
12956
0
{
12957
0
    xmlParserInputPtr input;
12958
0
    xmlDictPtr dict;
12959
12960
0
    if (ctxt == NULL)
12961
0
        return;
12962
12963
0
    dict = ctxt->dict;
12964
12965
0
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
12966
0
        xmlFreeInputStream(input);
12967
0
    }
12968
0
    ctxt->inputNr = 0;
12969
0
    ctxt->input = NULL;
12970
12971
0
    ctxt->spaceNr = 0;
12972
0
    if (ctxt->spaceTab != NULL) {
12973
0
  ctxt->spaceTab[0] = -1;
12974
0
  ctxt->space = &ctxt->spaceTab[0];
12975
0
    } else {
12976
0
        ctxt->space = NULL;
12977
0
    }
12978
12979
12980
0
    ctxt->nodeNr = 0;
12981
0
    ctxt->node = NULL;
12982
12983
0
    ctxt->nameNr = 0;
12984
0
    ctxt->name = NULL;
12985
12986
0
    ctxt->nsNr = 0;
12987
0
    xmlParserNsReset(ctxt->nsdb);
12988
12989
0
    DICT_FREE(ctxt->version);
12990
0
    ctxt->version = NULL;
12991
0
    DICT_FREE(ctxt->encoding);
12992
0
    ctxt->encoding = NULL;
12993
0
    DICT_FREE(ctxt->extSubURI);
12994
0
    ctxt->extSubURI = NULL;
12995
0
    DICT_FREE(ctxt->extSubSystem);
12996
0
    ctxt->extSubSystem = NULL;
12997
12998
0
    if (ctxt->directory != NULL) {
12999
0
        xmlFree(ctxt->directory);
13000
0
        ctxt->directory = NULL;
13001
0
    }
13002
13003
0
    if (ctxt->myDoc != NULL)
13004
0
        xmlFreeDoc(ctxt->myDoc);
13005
0
    ctxt->myDoc = NULL;
13006
13007
0
    ctxt->standalone = -1;
13008
0
    ctxt->hasExternalSubset = 0;
13009
0
    ctxt->hasPErefs = 0;
13010
0
    ctxt->html = ctxt->html ? 1 : 0;
13011
0
    ctxt->instate = XML_PARSER_START;
13012
13013
0
    ctxt->wellFormed = 1;
13014
0
    ctxt->nsWellFormed = 1;
13015
0
    ctxt->disableSAX = 0;
13016
0
    ctxt->valid = 1;
13017
0
    ctxt->record_info = 0;
13018
0
    ctxt->checkIndex = 0;
13019
0
    ctxt->endCheckState = 0;
13020
0
    ctxt->inSubset = 0;
13021
0
    ctxt->errNo = XML_ERR_OK;
13022
0
    ctxt->depth = 0;
13023
0
    ctxt->catalogs = NULL;
13024
0
    ctxt->sizeentities = 0;
13025
0
    ctxt->sizeentcopy = 0;
13026
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13027
13028
0
    if (ctxt->attsDefault != NULL) {
13029
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13030
0
        ctxt->attsDefault = NULL;
13031
0
    }
13032
0
    if (ctxt->attsSpecial != NULL) {
13033
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13034
0
        ctxt->attsSpecial = NULL;
13035
0
    }
13036
13037
0
#ifdef LIBXML_CATALOG_ENABLED
13038
0
    if (ctxt->catalogs != NULL)
13039
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13040
0
#endif
13041
0
    ctxt->nbErrors = 0;
13042
0
    ctxt->nbWarnings = 0;
13043
0
    if (ctxt->lastError.code != XML_ERR_OK)
13044
0
        xmlResetError(&ctxt->lastError);
13045
0
}
13046
13047
/**
13048
 * Reset a push parser context
13049
 *
13050
 * @param ctxt  an XML parser context
13051
 * @param chunk  a pointer to an array of chars
13052
 * @param size  number of chars in the array
13053
 * @param filename  an optional file name or URI
13054
 * @param encoding  the document encoding, or NULL
13055
 * @returns 0 in case of success and 1 in case of error
13056
 */
13057
int
13058
xmlCtxtResetPush(xmlParserCtxt *ctxt, const char *chunk,
13059
                 int size, const char *filename, const char *encoding)
13060
0
{
13061
0
    xmlParserInputPtr input;
13062
13063
0
    if (ctxt == NULL)
13064
0
        return(1);
13065
13066
0
    xmlCtxtReset(ctxt);
13067
13068
0
    input = xmlNewPushInput(filename, chunk, size);
13069
0
    if (input == NULL)
13070
0
        return(1);
13071
13072
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13073
0
        xmlFreeInputStream(input);
13074
0
        return(1);
13075
0
    }
13076
13077
0
    if (encoding != NULL)
13078
0
        xmlSwitchEncodingName(ctxt, encoding);
13079
13080
0
    return(0);
13081
0
}
13082
13083
static int
13084
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13085
30.2k
{
13086
30.2k
    int allMask;
13087
13088
30.2k
    if (ctxt == NULL)
13089
0
        return(-1);
13090
13091
    /*
13092
     * XInclude options aren't handled by the parser.
13093
     *
13094
     * XML_PARSE_XINCLUDE
13095
     * XML_PARSE_NOXINCNODE
13096
     * XML_PARSE_NOBASEFIX
13097
     */
13098
30.2k
    allMask = XML_PARSE_RECOVER |
13099
30.2k
              XML_PARSE_NOENT |
13100
30.2k
              XML_PARSE_DTDLOAD |
13101
30.2k
              XML_PARSE_DTDATTR |
13102
30.2k
              XML_PARSE_DTDVALID |
13103
30.2k
              XML_PARSE_NOERROR |
13104
30.2k
              XML_PARSE_NOWARNING |
13105
30.2k
              XML_PARSE_PEDANTIC |
13106
30.2k
              XML_PARSE_NOBLANKS |
13107
30.2k
#ifdef LIBXML_SAX1_ENABLED
13108
30.2k
              XML_PARSE_SAX1 |
13109
30.2k
#endif
13110
30.2k
              XML_PARSE_NONET |
13111
30.2k
              XML_PARSE_NODICT |
13112
30.2k
              XML_PARSE_NSCLEAN |
13113
30.2k
              XML_PARSE_NOCDATA |
13114
30.2k
              XML_PARSE_COMPACT |
13115
30.2k
              XML_PARSE_OLD10 |
13116
30.2k
              XML_PARSE_HUGE |
13117
30.2k
              XML_PARSE_OLDSAX |
13118
30.2k
              XML_PARSE_IGNORE_ENC |
13119
30.2k
              XML_PARSE_BIG_LINES |
13120
30.2k
              XML_PARSE_NO_XXE |
13121
30.2k
              XML_PARSE_UNZIP |
13122
30.2k
              XML_PARSE_NO_SYS_CATALOG |
13123
30.2k
              XML_PARSE_CATALOG_PI;
13124
13125
30.2k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13126
13127
    /*
13128
     * For some options, struct members are historically the source
13129
     * of truth. The values are initalized from global variables and
13130
     * old code could also modify them directly. Several older API
13131
     * functions that don't take an options argument rely on these
13132
     * deprecated mechanisms.
13133
     *
13134
     * Once public access to struct members and the globals are
13135
     * disabled, we can use the options bitmask as source of
13136
     * truth, making all these struct members obsolete.
13137
     *
13138
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13139
     * loading of the external subset.
13140
     */
13141
30.2k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13142
30.2k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13143
30.2k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13144
30.2k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13145
30.2k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13146
30.2k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13147
30.2k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13148
30.2k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13149
13150
30.2k
    if (options & XML_PARSE_HUGE) {
13151
30.2k
        if (ctxt->dict != NULL)
13152
30.2k
            xmlDictSetLimit(ctxt->dict, 0);
13153
30.2k
    }
13154
13155
30.2k
    return(options & ~allMask);
13156
30.2k
}
13157
13158
/**
13159
 * Applies the options to the parser context. Unset options are
13160
 * cleared.
13161
 *
13162
 * @since 2.13.0
13163
 *
13164
 * With older versions, you can use #xmlCtxtUseOptions.
13165
 *
13166
 * @param ctxt  an XML parser context
13167
 * @param options  a bitmask of xmlParserOption values
13168
 * @returns 0 in case of success, the set of unknown or unimplemented options
13169
 *         in case of error.
13170
 */
13171
int
13172
xmlCtxtSetOptions(xmlParserCtxt *ctxt, int options)
13173
15.1k
{
13174
15.1k
#ifdef LIBXML_HTML_ENABLED
13175
15.1k
    if ((ctxt != NULL) && (ctxt->html))
13176
0
        return(htmlCtxtSetOptions(ctxt, options));
13177
15.1k
#endif
13178
13179
15.1k
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13180
15.1k
}
13181
13182
/**
13183
 * Get the current options of the parser context.
13184
 *
13185
 * @since 2.14.0
13186
 *
13187
 * @param ctxt  an XML parser context
13188
 * @returns the current options set in the parser context, or -1 if ctxt is NULL.
13189
 */
13190
int
13191
xmlCtxtGetOptions(xmlParserCtxt *ctxt)
13192
0
{
13193
0
    if (ctxt == NULL)
13194
0
        return(-1);
13195
13196
0
    return(ctxt->options);
13197
0
}
13198
13199
/**
13200
 * Applies the options to the parser context. The following options
13201
 * are never cleared and can only be enabled:
13202
 *
13203
 * @deprecated Use #xmlCtxtSetOptions.
13204
 *
13205
 * - XML_PARSE_NOERROR
13206
 * - XML_PARSE_NOWARNING
13207
 * - XML_PARSE_NONET
13208
 * - XML_PARSE_NSCLEAN
13209
 * - XML_PARSE_NOCDATA
13210
 * - XML_PARSE_COMPACT
13211
 * - XML_PARSE_OLD10
13212
 * - XML_PARSE_HUGE
13213
 * - XML_PARSE_OLDSA- X
13214
 * - XML_PARSE_IGNORE_ENC
13215
 * - XML_PARSE_BIG_LINES
13216
 *
13217
 * @param ctxt  an XML parser context
13218
 * @param options  a combination of xmlParserOption
13219
 * @returns 0 in case of success, the set of unknown or unimplemented options
13220
 *         in case of error.
13221
 */
13222
int
13223
xmlCtxtUseOptions(xmlParserCtxt *ctxt, int options)
13224
15.1k
{
13225
15.1k
    int keepMask;
13226
13227
15.1k
#ifdef LIBXML_HTML_ENABLED
13228
15.1k
    if ((ctxt != NULL) && (ctxt->html))
13229
0
        return(htmlCtxtUseOptions(ctxt, options));
13230
15.1k
#endif
13231
13232
    /*
13233
     * For historic reasons, some options can only be enabled.
13234
     */
13235
15.1k
    keepMask = XML_PARSE_NOERROR |
13236
15.1k
               XML_PARSE_NOWARNING |
13237
15.1k
               XML_PARSE_NONET |
13238
15.1k
               XML_PARSE_NSCLEAN |
13239
15.1k
               XML_PARSE_NOCDATA |
13240
15.1k
               XML_PARSE_COMPACT |
13241
15.1k
               XML_PARSE_OLD10 |
13242
15.1k
               XML_PARSE_HUGE |
13243
15.1k
               XML_PARSE_OLDSAX |
13244
15.1k
               XML_PARSE_IGNORE_ENC |
13245
15.1k
               XML_PARSE_BIG_LINES;
13246
13247
15.1k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13248
15.1k
}
13249
13250
/**
13251
 * To protect against exponential entity expansion ("billion laughs"), the
13252
 * size of serialized output is (roughly) limited to the input size
13253
 * multiplied by this factor. The default value is 5.
13254
 *
13255
 * When working with documents making heavy use of entity expansion, it can
13256
 * be necessary to increase the value. For security reasons, this should only
13257
 * be considered when processing trusted input.
13258
 *
13259
 * @param ctxt  an XML parser context
13260
 * @param maxAmpl  maximum amplification factor
13261
 */
13262
void
13263
xmlCtxtSetMaxAmplification(xmlParserCtxt *ctxt, unsigned maxAmpl)
13264
0
{
13265
0
    if (ctxt == NULL)
13266
0
        return;
13267
0
    ctxt->maxAmpl = maxAmpl;
13268
0
}
13269
13270
/**
13271
 * Parse an XML document and return the resulting document tree.
13272
 * Takes ownership of the input object.
13273
 *
13274
 * @since 2.13.0
13275
 *
13276
 * @param ctxt  an XML parser context
13277
 * @param input  parser input
13278
 * @returns the resulting document tree or NULL
13279
 */
13280
xmlDoc *
13281
xmlCtxtParseDocument(xmlParserCtxt *ctxt, xmlParserInput *input)
13282
0
{
13283
0
    xmlDocPtr ret = NULL;
13284
13285
0
    if ((ctxt == NULL) || (input == NULL)) {
13286
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13287
0
        xmlFreeInputStream(input);
13288
0
        return(NULL);
13289
0
    }
13290
13291
    /* assert(ctxt->inputNr == 0); */
13292
0
    while (ctxt->inputNr > 0)
13293
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13294
13295
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13296
0
        xmlFreeInputStream(input);
13297
0
        return(NULL);
13298
0
    }
13299
13300
0
    xmlParseDocument(ctxt);
13301
13302
0
    ret = xmlCtxtGetDocument(ctxt);
13303
13304
    /* assert(ctxt->inputNr == 1); */
13305
0
    while (ctxt->inputNr > 0)
13306
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13307
13308
0
    return(ret);
13309
0
}
13310
13311
/**
13312
 * Convenience function to parse an XML document from a
13313
 * zero-terminated string.
13314
 *
13315
 * See #xmlCtxtReadDoc for details.
13316
 *
13317
 * @param cur  a pointer to a zero terminated string
13318
 * @param URL  base URL (optional)
13319
 * @param encoding  the document encoding (optional)
13320
 * @param options  a combination of xmlParserOption
13321
 * @returns the resulting document tree
13322
 */
13323
xmlDoc *
13324
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13325
           int options)
13326
0
{
13327
0
    xmlParserCtxtPtr ctxt;
13328
0
    xmlParserInputPtr input;
13329
0
    xmlDocPtr doc = NULL;
13330
13331
0
    ctxt = xmlNewParserCtxt();
13332
0
    if (ctxt == NULL)
13333
0
        return(NULL);
13334
13335
0
    xmlCtxtUseOptions(ctxt, options);
13336
13337
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13338
0
                                      XML_INPUT_BUF_STATIC);
13339
13340
0
    if (input != NULL)
13341
0
        doc = xmlCtxtParseDocument(ctxt, input);
13342
13343
0
    xmlFreeParserCtxt(ctxt);
13344
0
    return(doc);
13345
0
}
13346
13347
/**
13348
 * Convenience function to parse an XML file from the filesystem,
13349
 * the network or a global user-define resource loader.
13350
 *
13351
 * This function always enables the XML_PARSE_UNZIP option for
13352
 * backward compatibility. If a "-" filename is passed, it will
13353
 * read from stdin. Both of these features are potentially
13354
 * insecure and might be removed from later versions.
13355
 *
13356
 * See #xmlCtxtReadFile for details.
13357
 *
13358
 * @param filename  a file or URL
13359
 * @param encoding  the document encoding (optional)
13360
 * @param options  a combination of xmlParserOption
13361
 * @returns the resulting document tree
13362
 */
13363
xmlDoc *
13364
xmlReadFile(const char *filename, const char *encoding, int options)
13365
0
{
13366
0
    xmlParserCtxtPtr ctxt;
13367
0
    xmlParserInputPtr input;
13368
0
    xmlDocPtr doc = NULL;
13369
13370
0
    ctxt = xmlNewParserCtxt();
13371
0
    if (ctxt == NULL)
13372
0
        return(NULL);
13373
13374
0
    options |= XML_PARSE_UNZIP;
13375
13376
0
    xmlCtxtUseOptions(ctxt, options);
13377
13378
    /*
13379
     * Backward compatibility for users of command line utilities like
13380
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13381
     * should be removed at some point.
13382
     */
13383
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13384
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13385
0
                                      encoding, 0);
13386
0
    else
13387
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13388
13389
0
    if (input != NULL)
13390
0
        doc = xmlCtxtParseDocument(ctxt, input);
13391
13392
0
    xmlFreeParserCtxt(ctxt);
13393
0
    return(doc);
13394
0
}
13395
13396
/**
13397
 * Parse an XML in-memory document and build a tree. The input buffer must
13398
 * not contain a terminating null byte.
13399
 *
13400
 * See #xmlCtxtReadMemory for details.
13401
 *
13402
 * @param buffer  a pointer to a char array
13403
 * @param size  the size of the array
13404
 * @param url  base URL (optional)
13405
 * @param encoding  the document encoding (optional)
13406
 * @param options  a combination of xmlParserOption
13407
 * @returns the resulting document tree
13408
 */
13409
xmlDoc *
13410
xmlReadMemory(const char *buffer, int size, const char *url,
13411
              const char *encoding, int options)
13412
0
{
13413
0
    xmlParserCtxtPtr ctxt;
13414
0
    xmlParserInputPtr input;
13415
0
    xmlDocPtr doc = NULL;
13416
13417
0
    if (size < 0)
13418
0
  return(NULL);
13419
13420
0
    ctxt = xmlNewParserCtxt();
13421
0
    if (ctxt == NULL)
13422
0
        return(NULL);
13423
13424
0
    xmlCtxtUseOptions(ctxt, options);
13425
13426
0
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13427
0
                                      XML_INPUT_BUF_STATIC);
13428
13429
0
    if (input != NULL)
13430
0
        doc = xmlCtxtParseDocument(ctxt, input);
13431
13432
0
    xmlFreeParserCtxt(ctxt);
13433
0
    return(doc);
13434
0
}
13435
13436
/**
13437
 * Parse an XML from a file descriptor and build a tree.
13438
 *
13439
 * See #xmlCtxtReadFd for details.
13440
 *
13441
 * NOTE that the file descriptor will not be closed when the
13442
 * context is freed or reset.
13443
 *
13444
 * @param fd  an open file descriptor
13445
 * @param URL  base URL (optional)
13446
 * @param encoding  the document encoding (optional)
13447
 * @param options  a combination of xmlParserOption
13448
 * @returns the resulting document tree
13449
 */
13450
xmlDoc *
13451
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13452
0
{
13453
0
    xmlParserCtxtPtr ctxt;
13454
0
    xmlParserInputPtr input;
13455
0
    xmlDocPtr doc = NULL;
13456
13457
0
    ctxt = xmlNewParserCtxt();
13458
0
    if (ctxt == NULL)
13459
0
        return(NULL);
13460
13461
0
    xmlCtxtUseOptions(ctxt, options);
13462
13463
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13464
13465
0
    if (input != NULL)
13466
0
        doc = xmlCtxtParseDocument(ctxt, input);
13467
13468
0
    xmlFreeParserCtxt(ctxt);
13469
0
    return(doc);
13470
0
}
13471
13472
/**
13473
 * Parse an XML document from I/O functions and context and build a tree.
13474
 *
13475
 * See #xmlCtxtReadIO for details.
13476
 *
13477
 * @param ioread  an I/O read function
13478
 * @param ioclose  an I/O close function (optional)
13479
 * @param ioctx  an I/O handler
13480
 * @param URL  base URL (optional)
13481
 * @param encoding  the document encoding (optional)
13482
 * @param options  a combination of xmlParserOption
13483
 * @returns the resulting document tree
13484
 */
13485
xmlDoc *
13486
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13487
          void *ioctx, const char *URL, const char *encoding, int options)
13488
0
{
13489
0
    xmlParserCtxtPtr ctxt;
13490
0
    xmlParserInputPtr input;
13491
0
    xmlDocPtr doc = NULL;
13492
13493
0
    ctxt = xmlNewParserCtxt();
13494
0
    if (ctxt == NULL)
13495
0
        return(NULL);
13496
13497
0
    xmlCtxtUseOptions(ctxt, options);
13498
13499
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13500
0
                                  encoding, 0);
13501
13502
0
    if (input != NULL)
13503
0
        doc = xmlCtxtParseDocument(ctxt, input);
13504
13505
0
    xmlFreeParserCtxt(ctxt);
13506
0
    return(doc);
13507
0
}
13508
13509
/**
13510
 * Parse an XML in-memory document and build a tree.
13511
 *
13512
 * `URL` is used as base to resolve external entities and for error
13513
 * reporting.
13514
 *
13515
 * See #xmlCtxtUseOptions for details.
13516
 *
13517
 * @param ctxt  an XML parser context
13518
 * @param str  a pointer to a zero terminated string
13519
 * @param URL  base URL (optional)
13520
 * @param encoding  the document encoding (optional)
13521
 * @param options  a combination of xmlParserOption
13522
 * @returns the resulting document tree
13523
 */
13524
xmlDoc *
13525
xmlCtxtReadDoc(xmlParserCtxt *ctxt, const xmlChar *str,
13526
               const char *URL, const char *encoding, int options)
13527
0
{
13528
0
    xmlParserInputPtr input;
13529
13530
0
    if (ctxt == NULL)
13531
0
        return(NULL);
13532
13533
0
    xmlCtxtReset(ctxt);
13534
0
    xmlCtxtUseOptions(ctxt, options);
13535
13536
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13537
0
                                      XML_INPUT_BUF_STATIC);
13538
0
    if (input == NULL)
13539
0
        return(NULL);
13540
13541
0
    return(xmlCtxtParseDocument(ctxt, input));
13542
0
}
13543
13544
/**
13545
 * Parse an XML file from the filesystem, the network or a user-defined
13546
 * resource loader.
13547
 *
13548
 * This function always enables the XML_PARSE_UNZIP option for
13549
 * backward compatibility. This feature is potentially insecure
13550
 * and might be removed from later versions.
13551
 *
13552
 * @param ctxt  an XML parser context
13553
 * @param filename  a file or URL
13554
 * @param encoding  the document encoding (optional)
13555
 * @param options  a combination of xmlParserOption
13556
 * @returns the resulting document tree
13557
 */
13558
xmlDoc *
13559
xmlCtxtReadFile(xmlParserCtxt *ctxt, const char *filename,
13560
                const char *encoding, int options)
13561
0
{
13562
0
    xmlParserInputPtr input;
13563
13564
0
    if (ctxt == NULL)
13565
0
        return(NULL);
13566
13567
0
    options |= XML_PARSE_UNZIP;
13568
13569
0
    xmlCtxtReset(ctxt);
13570
0
    xmlCtxtUseOptions(ctxt, options);
13571
13572
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13573
0
    if (input == NULL)
13574
0
        return(NULL);
13575
13576
0
    return(xmlCtxtParseDocument(ctxt, input));
13577
0
}
13578
13579
/**
13580
 * Parse an XML in-memory document and build a tree. The input buffer must
13581
 * not contain a terminating null byte.
13582
 *
13583
 * `URL` is used as base to resolve external entities and for error
13584
 * reporting.
13585
 *
13586
 * See #xmlCtxtUseOptions for details.
13587
 *
13588
 * @param ctxt  an XML parser context
13589
 * @param buffer  a pointer to a char array
13590
 * @param size  the size of the array
13591
 * @param URL  base URL (optional)
13592
 * @param encoding  the document encoding (optional)
13593
 * @param options  a combination of xmlParserOption
13594
 * @returns the resulting document tree
13595
 */
13596
xmlDoc *
13597
xmlCtxtReadMemory(xmlParserCtxt *ctxt, const char *buffer, int size,
13598
                  const char *URL, const char *encoding, int options)
13599
0
{
13600
0
    xmlParserInputPtr input;
13601
13602
0
    if ((ctxt == NULL) || (size < 0))
13603
0
        return(NULL);
13604
13605
0
    xmlCtxtReset(ctxt);
13606
0
    xmlCtxtUseOptions(ctxt, options);
13607
13608
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
13609
0
                                      XML_INPUT_BUF_STATIC);
13610
0
    if (input == NULL)
13611
0
        return(NULL);
13612
13613
0
    return(xmlCtxtParseDocument(ctxt, input));
13614
0
}
13615
13616
/**
13617
 * Parse an XML document from a file descriptor and build a tree.
13618
 *
13619
 * NOTE that the file descriptor will not be closed when the
13620
 * context is freed or reset.
13621
 *
13622
 * `URL` is used as base to resolve external entities and for error
13623
 * reporting.
13624
 *
13625
 * See #xmlCtxtUseOptions for details.
13626
 *
13627
 * @param ctxt  an XML parser context
13628
 * @param fd  an open file descriptor
13629
 * @param URL  base URL (optional)
13630
 * @param encoding  the document encoding (optional)
13631
 * @param options  a combination of xmlParserOption
13632
 * @returns the resulting document tree
13633
 */
13634
xmlDoc *
13635
xmlCtxtReadFd(xmlParserCtxt *ctxt, int fd,
13636
              const char *URL, const char *encoding, int options)
13637
0
{
13638
0
    xmlParserInputPtr input;
13639
13640
0
    if (ctxt == NULL)
13641
0
        return(NULL);
13642
13643
0
    xmlCtxtReset(ctxt);
13644
0
    xmlCtxtUseOptions(ctxt, options);
13645
13646
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13647
0
    if (input == NULL)
13648
0
        return(NULL);
13649
13650
0
    return(xmlCtxtParseDocument(ctxt, input));
13651
0
}
13652
13653
/**
13654
 * parse an XML document from I/O functions and source and build a tree.
13655
 * This reuses the existing `ctxt` parser context
13656
 *
13657
 * `URL` is used as base to resolve external entities and for error
13658
 * reporting.
13659
 *
13660
 * See #xmlCtxtUseOptions for details.
13661
 *
13662
 * @param ctxt  an XML parser context
13663
 * @param ioread  an I/O read function
13664
 * @param ioclose  an I/O close function
13665
 * @param ioctx  an I/O handler
13666
 * @param URL  the base URL to use for the document
13667
 * @param encoding  the document encoding, or NULL
13668
 * @param options  a combination of xmlParserOption
13669
 * @returns the resulting document tree
13670
 */
13671
xmlDoc *
13672
xmlCtxtReadIO(xmlParserCtxt *ctxt, xmlInputReadCallback ioread,
13673
              xmlInputCloseCallback ioclose, void *ioctx,
13674
        const char *URL,
13675
              const char *encoding, int options)
13676
0
{
13677
0
    xmlParserInputPtr input;
13678
13679
0
    if (ctxt == NULL)
13680
0
        return(NULL);
13681
13682
0
    xmlCtxtReset(ctxt);
13683
0
    xmlCtxtUseOptions(ctxt, options);
13684
13685
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13686
0
                                  encoding, 0);
13687
0
    if (input == NULL)
13688
0
        return(NULL);
13689
13690
0
    return(xmlCtxtParseDocument(ctxt, input));
13691
0
}
13692