Coverage Report

Created: 2025-08-26 07:06

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * Author: Daniel Veillard
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
#include "private/tree.h"
79
80
1.47M
#define NS_INDEX_EMPTY  INT_MAX
81
39.1k
#define NS_INDEX_XML    (INT_MAX - 1)
82
591k
#define URI_HASH_EMPTY  0xD943A04E
83
10.4k
#define URI_HASH_XML    0xF0451F02
84
85
#ifndef STDIN_FILENO
86
0
  #define STDIN_FILENO 0
87
#endif
88
89
#ifndef SIZE_MAX
90
  #define SIZE_MAX ((size_t) -1)
91
#endif
92
93
221k
#define XML_MAX_ATTRS 100000000 /* 100 million */
94
95
582k
#define XML_SPECIAL_EXTERNAL    (1 << 20)
96
582k
#define XML_SPECIAL_TYPE_MASK   (XML_SPECIAL_EXTERNAL - 1)
97
98
621k
#define XML_ATTVAL_ALLOC        (1 << 0)
99
96.7k
#define XML_ATTVAL_NORM_CHANGE  (1 << 1)
100
101
struct _xmlStartTag {
102
    const xmlChar *prefix;
103
    const xmlChar *URI;
104
    int line;
105
    int nsNr;
106
};
107
108
typedef struct {
109
    void *saxData;
110
    unsigned prefixHashValue;
111
    unsigned uriHashValue;
112
    unsigned elementId;
113
    int oldIndex;
114
} xmlParserNsExtra;
115
116
typedef struct {
117
    unsigned hashValue;
118
    int index;
119
} xmlParserNsBucket;
120
121
struct _xmlParserNsData {
122
    xmlParserNsExtra *extra;
123
124
    unsigned hashSize;
125
    unsigned hashElems;
126
    xmlParserNsBucket *hash;
127
128
    unsigned elementId;
129
    int defaultNsIndex;
130
    int minNsIndex;
131
};
132
133
static int
134
xmlParseElementStart(xmlParserCtxtPtr ctxt);
135
136
static void
137
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
138
139
static xmlEntityPtr
140
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
141
142
static const xmlChar *
143
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
144
145
/************************************************************************
146
 *                  *
147
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
148
 *                  *
149
 ************************************************************************/
150
151
#define XML_PARSER_BIG_ENTITY 1000
152
#define XML_PARSER_LOT_ENTITY 5000
153
154
/*
155
 * Constants for protection against abusive entity expansion
156
 * ("billion laughs").
157
 */
158
159
/*
160
 * A certain amount of entity expansion which is always allowed.
161
 */
162
1.82M
#define XML_PARSER_ALLOWED_EXPANSION 1000000
163
164
/*
165
 * Fixed cost for each entity reference. This crudely models processing time
166
 * as well to protect, for example, against exponential expansion of empty
167
 * or very short entities.
168
 */
169
1.82M
#define XML_ENT_FIXED_COST 20
170
171
31.0M
#define XML_PARSER_BIG_BUFFER_SIZE 300
172
186k
#define XML_PARSER_BUFFER_SIZE 100
173
39.2k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
174
175
/**
176
 * XML_PARSER_CHUNK_SIZE
177
 *
178
 * When calling GROW that's the minimal amount of data
179
 * the parser expected to have received. It is not a hard
180
 * limit but an optimization when reading strings like Names
181
 * It is not strictly needed as long as inputs available characters
182
 * are followed by 0, which should be provided by the I/O level
183
 */
184
#define XML_PARSER_CHUNK_SIZE 100
185
186
/**
187
 * Constant string describing the version of the library used at
188
 * run-time.
189
 */
190
const char *const
191
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
192
193
/*
194
 * List of XML prefixed PI allowed by W3C specs
195
 */
196
197
static const char* const xmlW3CPIs[] = {
198
    "xml-stylesheet",
199
    "xml-model",
200
    NULL
201
};
202
203
204
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206
                                              const xmlChar **str);
207
208
static void
209
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
210
211
static int
212
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
213
214
static void
215
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl);
216
217
/************************************************************************
218
 *                  *
219
 *    Some factorized error routines        *
220
 *                  *
221
 ************************************************************************/
222
223
static void
224
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
225
0
    xmlCtxtErrMemory(ctxt);
226
0
}
227
228
/**
229
 * Handle a redefinition of attribute error
230
 *
231
 * @param ctxt  an XML parser context
232
 * @param prefix  the attribute prefix
233
 * @param localname  the attribute localname
234
 */
235
static void
236
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237
                   const xmlChar * localname)
238
427k
{
239
427k
    if (prefix == NULL)
240
423k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
423k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
242
423k
                   "Attribute %s redefined\n", localname);
243
4.26k
    else
244
4.26k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245
4.26k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
246
4.26k
                   "Attribute %s:%s redefined\n", prefix, localname);
247
427k
}
248
249
/**
250
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
251
 *
252
 * @param ctxt  an XML parser context
253
 * @param error  the error number
254
 * @param msg  the error message
255
 */
256
static void LIBXML_ATTR_FORMAT(3,0)
257
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
258
               const char *msg)
259
4.96M
{
260
4.96M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
261
4.96M
               NULL, NULL, NULL, 0, "%s", msg);
262
4.96M
}
263
264
/**
265
 * Handle a warning.
266
 *
267
 * @param ctxt  an XML parser context
268
 * @param error  the error number
269
 * @param msg  the error message
270
 * @param str1  extra data
271
 * @param str2  extra data
272
 */
273
void LIBXML_ATTR_FORMAT(3,0)
274
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
275
              const char *msg, const xmlChar *str1, const xmlChar *str2)
276
3.18k
{
277
3.18k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
278
3.18k
               str1, str2, NULL, 0, msg, str1, str2);
279
3.18k
}
280
281
#ifdef LIBXML_VALID_ENABLED
282
/**
283
 * Handle a validity error.
284
 *
285
 * @param ctxt  an XML parser context
286
 * @param error  the error number
287
 * @param msg  the error message
288
 * @param str1  extra data
289
 * @param str2  extra data
290
 */
291
static void LIBXML_ATTR_FORMAT(3,0)
292
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
293
              const char *msg, const xmlChar *str1, const xmlChar *str2)
294
0
{
295
0
    ctxt->valid = 0;
296
297
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
298
0
               str1, str2, NULL, 0, msg, str1, str2);
299
0
}
300
#endif
301
302
/**
303
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
304
 *
305
 * @param ctxt  an XML parser context
306
 * @param error  the error number
307
 * @param msg  the error message
308
 * @param val  an integer value
309
 */
310
static void LIBXML_ATTR_FORMAT(3,0)
311
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
312
                  const char *msg, int val)
313
2.70M
{
314
2.70M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
315
2.70M
               NULL, NULL, NULL, val, msg, val);
316
2.70M
}
317
318
/**
319
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
320
 *
321
 * @param ctxt  an XML parser context
322
 * @param error  the error number
323
 * @param msg  the error message
324
 * @param str1  an string info
325
 * @param val  an integer value
326
 * @param str2  an string info
327
 */
328
static void LIBXML_ATTR_FORMAT(3,0)
329
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
330
                  const char *msg, const xmlChar *str1, int val,
331
      const xmlChar *str2)
332
183k
{
333
183k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
334
183k
               str1, str2, NULL, val, msg, str1, val, str2);
335
183k
}
336
337
/**
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 *
340
 * @param ctxt  an XML parser context
341
 * @param error  the error number
342
 * @param msg  the error message
343
 * @param val  a string value
344
 */
345
static void LIBXML_ATTR_FORMAT(3,0)
346
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
347
                  const char *msg, const xmlChar * val)
348
436k
{
349
436k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
350
436k
               val, NULL, NULL, 0, msg, val);
351
436k
}
352
353
/**
354
 * Handle a non fatal parser error
355
 *
356
 * @param ctxt  an XML parser context
357
 * @param error  the error number
358
 * @param msg  the error message
359
 * @param val  a string value
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
17.3k
{
365
17.3k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
17.3k
               val, NULL, NULL, 0, msg, val);
367
17.3k
}
368
369
/**
370
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
371
 *
372
 * @param ctxt  an XML parser context
373
 * @param error  the error number
374
 * @param msg  the message
375
 * @param info1  extra information string
376
 * @param info2  extra information string
377
 * @param info3  extra information string
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
240k
{
385
240k
    ctxt->nsWellFormed = 0;
386
387
240k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
240k
               info1, info2, info3, 0, msg, info1, info2, info3);
389
240k
}
390
391
/**
392
 * Handle a namespace warning error
393
 *
394
 * @param ctxt  an XML parser context
395
 * @param error  the error number
396
 * @param msg  the message
397
 * @param info1  extra information string
398
 * @param info2  extra information string
399
 * @param info3  extra information string
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
22.0k
{
407
22.0k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
22.0k
               info1, info2, info3, 0, msg, info1, info2, info3);
409
22.0k
}
410
411
/**
412
 * Check for non-linear entity expansion behaviour.
413
 *
414
 * In some cases like xmlExpandEntityInAttValue, this function is called
415
 * for each, possibly nested entity and its unexpanded content length.
416
 *
417
 * In other cases like #xmlParseReference, it's only called for each
418
 * top-level entity with its unexpanded content length plus the sum of
419
 * the unexpanded content lengths (plus fixed cost) of all nested
420
 * entities.
421
 *
422
 * Summing the unexpanded lengths also adds the length of the reference.
423
 * This is by design. Taking the length of the entity name into account
424
 * discourages attacks that try to waste CPU time with abusively long
425
 * entity names. See test/recurse/lol6.xml for example. Each call also
426
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
427
 * short entities.
428
 *
429
 * @param ctxt  parser context
430
 * @param extra  sum of unexpanded entity sizes
431
 * @returns 1 on error, 0 on success.
432
 */
433
static int
434
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
435
1.82M
{
436
1.82M
    unsigned long consumed;
437
1.82M
    unsigned long *expandedSize;
438
1.82M
    xmlParserInputPtr input = ctxt->input;
439
1.82M
    xmlEntityPtr entity = input->entity;
440
441
1.82M
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
442
1.21k
        return(0);
443
444
    /*
445
     * Compute total consumed bytes so far, including input streams of
446
     * external entities.
447
     */
448
1.82M
    consumed = input->consumed;
449
1.82M
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
450
1.82M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
451
452
1.82M
    if (entity)
453
2.03k
        expandedSize = &entity->expandedSize;
454
1.81M
    else
455
1.81M
        expandedSize = &ctxt->sizeentcopy;
456
457
    /*
458
     * Add extra cost and some fixed cost.
459
     */
460
1.82M
    xmlSaturatedAdd(expandedSize, extra);
461
1.82M
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
462
463
    /*
464
     * It's important to always use saturation arithmetic when tracking
465
     * entity sizes to make the size checks reliable. If "sizeentcopy"
466
     * overflows, we have to abort.
467
     */
468
1.82M
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
469
1.82M
        ((*expandedSize >= ULONG_MAX) ||
470
470k
         (*expandedSize / ctxt->maxAmpl > consumed))) {
471
378
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
472
378
                       "Maximum entity amplification factor exceeded, see "
473
378
                       "xmlCtxtSetMaxAmplification.\n");
474
378
        return(1);
475
378
    }
476
477
1.82M
    return(0);
478
1.82M
}
479
480
/************************************************************************
481
 *                  *
482
 *    Library wide options          *
483
 *                  *
484
 ************************************************************************/
485
486
/**
487
 * Examines if the library has been compiled with a given feature.
488
 *
489
 * @param feature  the feature to be examined
490
 * @returns zero (0) if the feature does not exist or an unknown
491
 * feature is requested, non-zero otherwise.
492
 */
493
int
494
xmlHasFeature(xmlFeature feature)
495
0
{
496
0
    switch (feature) {
497
0
  case XML_WITH_THREAD:
498
0
#ifdef LIBXML_THREAD_ENABLED
499
0
      return(1);
500
#else
501
      return(0);
502
#endif
503
0
        case XML_WITH_TREE:
504
0
            return(1);
505
0
        case XML_WITH_OUTPUT:
506
0
#ifdef LIBXML_OUTPUT_ENABLED
507
0
            return(1);
508
#else
509
            return(0);
510
#endif
511
0
        case XML_WITH_PUSH:
512
0
#ifdef LIBXML_PUSH_ENABLED
513
0
            return(1);
514
#else
515
            return(0);
516
#endif
517
0
        case XML_WITH_READER:
518
0
#ifdef LIBXML_READER_ENABLED
519
0
            return(1);
520
#else
521
            return(0);
522
#endif
523
0
        case XML_WITH_PATTERN:
524
0
#ifdef LIBXML_PATTERN_ENABLED
525
0
            return(1);
526
#else
527
            return(0);
528
#endif
529
0
        case XML_WITH_WRITER:
530
0
#ifdef LIBXML_WRITER_ENABLED
531
0
            return(1);
532
#else
533
            return(0);
534
#endif
535
0
        case XML_WITH_SAX1:
536
0
#ifdef LIBXML_SAX1_ENABLED
537
0
            return(1);
538
#else
539
            return(0);
540
#endif
541
0
        case XML_WITH_HTTP:
542
0
            return(0);
543
0
        case XML_WITH_VALID:
544
0
#ifdef LIBXML_VALID_ENABLED
545
0
            return(1);
546
#else
547
            return(0);
548
#endif
549
0
        case XML_WITH_HTML:
550
0
#ifdef LIBXML_HTML_ENABLED
551
0
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_LEGACY:
556
0
            return(0);
557
0
        case XML_WITH_C14N:
558
0
#ifdef LIBXML_C14N_ENABLED
559
0
            return(1);
560
#else
561
            return(0);
562
#endif
563
0
        case XML_WITH_CATALOG:
564
0
#ifdef LIBXML_CATALOG_ENABLED
565
0
            return(1);
566
#else
567
            return(0);
568
#endif
569
0
        case XML_WITH_XPATH:
570
0
#ifdef LIBXML_XPATH_ENABLED
571
0
            return(1);
572
#else
573
            return(0);
574
#endif
575
0
        case XML_WITH_XPTR:
576
0
#ifdef LIBXML_XPTR_ENABLED
577
0
            return(1);
578
#else
579
            return(0);
580
#endif
581
0
        case XML_WITH_XINCLUDE:
582
0
#ifdef LIBXML_XINCLUDE_ENABLED
583
0
            return(1);
584
#else
585
            return(0);
586
#endif
587
0
        case XML_WITH_ICONV:
588
0
#ifdef LIBXML_ICONV_ENABLED
589
0
            return(1);
590
#else
591
            return(0);
592
#endif
593
0
        case XML_WITH_ISO8859X:
594
0
#ifdef LIBXML_ISO8859X_ENABLED
595
0
            return(1);
596
#else
597
            return(0);
598
#endif
599
0
        case XML_WITH_UNICODE:
600
0
            return(0);
601
0
        case XML_WITH_REGEXP:
602
0
#ifdef LIBXML_REGEXP_ENABLED
603
0
            return(1);
604
#else
605
            return(0);
606
#endif
607
0
        case XML_WITH_AUTOMATA:
608
0
#ifdef LIBXML_REGEXP_ENABLED
609
0
            return(1);
610
#else
611
            return(0);
612
#endif
613
0
        case XML_WITH_EXPR:
614
0
            return(0);
615
0
        case XML_WITH_RELAXNG:
616
0
#ifdef LIBXML_RELAXNG_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_SCHEMAS:
622
0
#ifdef LIBXML_SCHEMAS_ENABLED
623
0
            return(1);
624
#else
625
            return(0);
626
#endif
627
0
        case XML_WITH_SCHEMATRON:
628
#ifdef LIBXML_SCHEMATRON_ENABLED
629
            return(1);
630
#else
631
0
            return(0);
632
0
#endif
633
0
        case XML_WITH_MODULES:
634
0
#ifdef LIBXML_MODULES_ENABLED
635
0
            return(1);
636
#else
637
            return(0);
638
#endif
639
0
        case XML_WITH_DEBUG:
640
0
#ifdef LIBXML_DEBUG_ENABLED
641
0
            return(1);
642
#else
643
            return(0);
644
#endif
645
0
        case XML_WITH_DEBUG_MEM:
646
0
            return(0);
647
0
        case XML_WITH_ZLIB:
648
#ifdef LIBXML_ZLIB_ENABLED
649
            return(1);
650
#else
651
0
            return(0);
652
0
#endif
653
0
        case XML_WITH_LZMA:
654
#ifdef LIBXML_LZMA_ENABLED
655
            return(1);
656
#else
657
0
            return(0);
658
0
#endif
659
0
        case XML_WITH_ICU:
660
#ifdef LIBXML_ICU_ENABLED
661
            return(1);
662
#else
663
0
            return(0);
664
0
#endif
665
0
        default:
666
0
      break;
667
0
     }
668
0
     return(0);
669
0
}
670
671
/************************************************************************
672
 *                  *
673
 *      Simple string buffer        *
674
 *                  *
675
 ************************************************************************/
676
677
typedef struct {
678
    xmlChar *mem;
679
    unsigned size;
680
    unsigned cap; /* size < cap */
681
    unsigned max; /* size <= max */
682
    xmlParserErrors code;
683
} xmlSBuf;
684
685
static void
686
606k
xmlSBufInit(xmlSBuf *buf, unsigned max) {
687
606k
    buf->mem = NULL;
688
606k
    buf->size = 0;
689
606k
    buf->cap = 0;
690
606k
    buf->max = max;
691
606k
    buf->code = XML_ERR_OK;
692
606k
}
693
694
static int
695
116k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
696
116k
    xmlChar *mem;
697
116k
    unsigned cap;
698
699
116k
    if (len >= UINT_MAX / 2 - buf->size) {
700
0
        if (buf->code == XML_ERR_OK)
701
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
702
0
        return(-1);
703
0
    }
704
705
116k
    cap = (buf->size + len) * 2;
706
116k
    if (cap < 240)
707
77.8k
        cap = 240;
708
709
116k
    mem = xmlRealloc(buf->mem, cap);
710
116k
    if (mem == NULL) {
711
0
        buf->code = XML_ERR_NO_MEMORY;
712
0
        return(-1);
713
0
    }
714
715
116k
    buf->mem = mem;
716
116k
    buf->cap = cap;
717
718
116k
    return(0);
719
116k
}
720
721
static void
722
78.0M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
723
78.0M
    if (buf->max - buf->size < len) {
724
0
        if (buf->code == XML_ERR_OK)
725
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
726
0
        return;
727
0
    }
728
729
78.0M
    if (buf->cap - buf->size <= len) {
730
109k
        if (xmlSBufGrow(buf, len) < 0)
731
0
            return;
732
109k
    }
733
734
78.0M
    if (len > 0)
735
78.0M
        memcpy(buf->mem + buf->size, str, len);
736
78.0M
    buf->size += len;
737
78.0M
}
738
739
static void
740
74.2M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
741
74.2M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
742
74.2M
}
743
744
static void
745
97.7k
xmlSBufAddChar(xmlSBuf *buf, int c) {
746
97.7k
    xmlChar *end;
747
748
97.7k
    if (buf->max - buf->size < 4) {
749
0
        if (buf->code == XML_ERR_OK)
750
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
751
0
        return;
752
0
    }
753
754
97.7k
    if (buf->cap - buf->size <= 4) {
755
6.94k
        if (xmlSBufGrow(buf, 4) < 0)
756
0
            return;
757
6.94k
    }
758
759
97.7k
    end = buf->mem + buf->size;
760
761
97.7k
    if (c < 0x80) {
762
30.0k
        *end = (xmlChar) c;
763
30.0k
        buf->size += 1;
764
67.7k
    } else {
765
67.7k
        buf->size += xmlCopyCharMultiByte(end, c);
766
67.7k
    }
767
97.7k
}
768
769
static void
770
52.9M
xmlSBufAddReplChar(xmlSBuf *buf) {
771
52.9M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
772
52.9M
}
773
774
static void
775
0
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
776
0
    if (buf->code == XML_ERR_NO_MEMORY)
777
0
        xmlCtxtErrMemory(ctxt);
778
0
    else
779
0
        xmlFatalErr(ctxt, buf->code, errMsg);
780
0
}
781
782
static xmlChar *
783
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
784
94.3k
              const char *errMsg) {
785
94.3k
    if (buf->mem == NULL) {
786
16.6k
        buf->mem = xmlMalloc(1);
787
16.6k
        if (buf->mem == NULL) {
788
0
            buf->code = XML_ERR_NO_MEMORY;
789
16.6k
        } else {
790
16.6k
            buf->mem[0] = 0;
791
16.6k
        }
792
77.6k
    } else {
793
77.6k
        buf->mem[buf->size] = 0;
794
77.6k
    }
795
796
94.3k
    if (buf->code == XML_ERR_OK) {
797
94.3k
        if (sizeOut != NULL)
798
50.8k
            *sizeOut = buf->size;
799
94.3k
        return(buf->mem);
800
94.3k
    }
801
802
0
    xmlSBufReportError(buf, ctxt, errMsg);
803
804
0
    xmlFree(buf->mem);
805
806
0
    if (sizeOut != NULL)
807
0
        *sizeOut = 0;
808
0
    return(NULL);
809
94.3k
}
810
811
static void
812
505k
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
813
505k
    if (buf->code != XML_ERR_OK)
814
0
        xmlSBufReportError(buf, ctxt, errMsg);
815
816
505k
    xmlFree(buf->mem);
817
505k
}
818
819
static int
820
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
821
103M
                    const char *errMsg) {
822
103M
    int c = str[0];
823
103M
    int c1 = str[1];
824
825
103M
    if ((c1 & 0xC0) != 0x80)
826
3.96M
        goto encoding_error;
827
828
99.7M
    if (c < 0xE0) {
829
        /* 2-byte sequence */
830
96.5M
        if (c < 0xC2)
831
44.5M
            goto encoding_error;
832
833
51.9M
        return(2);
834
96.5M
    } else {
835
3.17M
        int c2 = str[2];
836
837
3.17M
        if ((c2 & 0xC0) != 0x80)
838
6.30k
            goto encoding_error;
839
840
3.16M
        if (c < 0xF0) {
841
            /* 3-byte sequence */
842
3.15M
            if (c == 0xE0) {
843
                /* overlong */
844
22.9k
                if (c1 < 0xA0)
845
513
                    goto encoding_error;
846
3.13M
            } else if (c == 0xED) {
847
                /* surrogate */
848
529
                if (c1 >= 0xA0)
849
244
                    goto encoding_error;
850
3.12M
            } else if (c == 0xEF) {
851
                /* U+FFFE and U+FFFF are invalid Chars */
852
3.11M
                if ((c1 == 0xBF) && (c2 >= 0xBE))
853
518
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
854
3.11M
            }
855
856
3.15M
            return(3);
857
3.15M
        } else {
858
            /* 4-byte sequence */
859
11.9k
            if ((str[3] & 0xC0) != 0x80)
860
1.33k
                goto encoding_error;
861
10.6k
            if (c == 0xF0) {
862
                /* overlong */
863
747
                if (c1 < 0x90)
864
235
                    goto encoding_error;
865
9.87k
            } else if (c >= 0xF4) {
866
                /* greater than 0x10FFFF */
867
2.96k
                if ((c > 0xF4) || (c1 >= 0x90))
868
2.62k
                    goto encoding_error;
869
2.96k
            }
870
871
7.76k
            return(4);
872
10.6k
        }
873
3.16M
    }
874
875
48.5M
encoding_error:
876
    /* Only report the first error */
877
48.5M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
878
1.98k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
879
1.98k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
880
1.98k
    }
881
882
48.5M
    return(0);
883
99.7M
}
884
885
/************************************************************************
886
 *                  *
887
 *    SAX2 defaulted attributes handling      *
888
 *                  *
889
 ************************************************************************/
890
891
/**
892
 * Final initialization of the parser context before starting to parse.
893
 *
894
 * This accounts for users modifying struct members of parser context
895
 * directly.
896
 *
897
 * @param ctxt  an XML parser context
898
 */
899
static void
900
18.0k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
901
18.0k
    xmlSAXHandlerPtr sax;
902
903
    /* Avoid unused variable warning if features are disabled. */
904
18.0k
    (void) sax;
905
906
    /*
907
     * Changing the SAX struct directly is still widespread practice
908
     * in internal and external code.
909
     */
910
18.0k
    if (ctxt == NULL) return;
911
18.0k
    sax = ctxt->sax;
912
18.0k
#ifdef LIBXML_SAX1_ENABLED
913
    /*
914
     * Only enable SAX2 if there SAX2 element handlers, except when there
915
     * are no element handlers at all.
916
     */
917
18.0k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
918
18.0k
        (sax) &&
919
18.0k
        (sax->initialized == XML_SAX2_MAGIC) &&
920
18.0k
        ((sax->startElementNs != NULL) ||
921
18.0k
         (sax->endElementNs != NULL) ||
922
18.0k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
923
18.0k
        ctxt->sax2 = 1;
924
#else
925
    ctxt->sax2 = 1;
926
#endif /* LIBXML_SAX1_ENABLED */
927
928
    /*
929
     * Some users replace the dictionary directly in the context struct.
930
     * We really need an API function to do that cleanly.
931
     */
932
18.0k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
933
18.0k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
934
18.0k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
935
18.0k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
936
18.0k
    (ctxt->str_xml_ns == NULL)) {
937
0
        xmlErrMemory(ctxt);
938
0
    }
939
940
18.0k
    xmlDictSetLimit(ctxt->dict,
941
18.0k
                    (ctxt->options & XML_PARSE_HUGE) ?
942
18.0k
                        0 :
943
18.0k
                        XML_MAX_DICTIONARY_LIMIT);
944
945
18.0k
#ifdef LIBXML_VALID_ENABLED
946
18.0k
    if (ctxt->validate)
947
0
        ctxt->vctxt.flags |= XML_VCTXT_VALIDATE;
948
18.0k
    else
949
18.0k
        ctxt->vctxt.flags &= ~XML_VCTXT_VALIDATE;
950
18.0k
#endif /* LIBXML_VALID_ENABLED */
951
18.0k
}
952
953
typedef struct {
954
    xmlHashedString prefix;
955
    xmlHashedString name;
956
    xmlHashedString value;
957
    const xmlChar *valueEnd;
958
    int external;
959
    int expandedSize;
960
} xmlDefAttr;
961
962
typedef struct _xmlDefAttrs xmlDefAttrs;
963
typedef xmlDefAttrs *xmlDefAttrsPtr;
964
struct _xmlDefAttrs {
965
    int nbAttrs;  /* number of defaulted attributes on that element */
966
    int maxAttrs;       /* the size of the array */
967
#if __STDC_VERSION__ >= 199901L
968
    /* Using a C99 flexible array member avoids UBSan errors. */
969
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
970
#else
971
    xmlDefAttr attrs[1];
972
#endif
973
};
974
975
/**
976
 * Normalize the space in non CDATA attribute values:
977
 * If the attribute type is not CDATA, then the XML processor MUST further
978
 * process the normalized attribute value by discarding any leading and
979
 * trailing space (\#x20) characters, and by replacing sequences of space
980
 * (\#x20) characters by a single space (\#x20) character.
981
 * Note that the size of dst need to be at least src, and if one doesn't need
982
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
983
 * passing src as dst is just fine.
984
 *
985
 * @param src  the source string
986
 * @param dst  the target string
987
 * @returns a pointer to the normalized value (dst) or NULL if no conversion
988
 *         is needed.
989
 */
990
static xmlChar *
991
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
992
18.5k
{
993
18.5k
    if ((src == NULL) || (dst == NULL))
994
0
        return(NULL);
995
996
19.3k
    while (*src == 0x20) src++;
997
10.8M
    while (*src != 0) {
998
10.7M
  if (*src == 0x20) {
999
150k
      while (*src == 0x20) src++;
1000
8.11k
      if (*src != 0)
1001
7.89k
    *dst++ = 0x20;
1002
10.7M
  } else {
1003
10.7M
      *dst++ = *src++;
1004
10.7M
  }
1005
10.7M
    }
1006
18.5k
    *dst = 0;
1007
18.5k
    if (dst == src)
1008
18.0k
       return(NULL);
1009
548
    return(dst);
1010
18.5k
}
1011
1012
/**
1013
 * Add a defaulted attribute for an element
1014
 *
1015
 * @param ctxt  an XML parser context
1016
 * @param fullname  the element fullname
1017
 * @param fullattr  the attribute fullname
1018
 * @param value  the attribute value
1019
 */
1020
static void
1021
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1022
               const xmlChar *fullname,
1023
               const xmlChar *fullattr,
1024
19.5k
               const xmlChar *value) {
1025
19.5k
    xmlDefAttrsPtr defaults;
1026
19.5k
    xmlDefAttr *attr;
1027
19.5k
    int len, expandedSize;
1028
19.5k
    xmlHashedString name;
1029
19.5k
    xmlHashedString prefix;
1030
19.5k
    xmlHashedString hvalue;
1031
19.5k
    const xmlChar *localname;
1032
1033
    /*
1034
     * Allows to detect attribute redefinitions
1035
     */
1036
19.5k
    if (ctxt->attsSpecial != NULL) {
1037
17.6k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1038
4.67k
      return;
1039
17.6k
    }
1040
1041
14.8k
    if (ctxt->attsDefault == NULL) {
1042
1.86k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1043
1.86k
  if (ctxt->attsDefault == NULL)
1044
0
      goto mem_error;
1045
1.86k
    }
1046
1047
    /*
1048
     * split the element name into prefix:localname , the string found
1049
     * are within the DTD and then not associated to namespace names.
1050
     */
1051
14.8k
    localname = xmlSplitQName3(fullname, &len);
1052
14.8k
    if (localname == NULL) {
1053
9.15k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1054
9.15k
  prefix.name = NULL;
1055
9.15k
    } else {
1056
5.71k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1057
5.71k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1058
5.71k
        if (prefix.name == NULL)
1059
0
            goto mem_error;
1060
5.71k
    }
1061
14.8k
    if (name.name == NULL)
1062
0
        goto mem_error;
1063
1064
    /*
1065
     * make sure there is some storage
1066
     */
1067
14.8k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1068
14.8k
    if ((defaults == NULL) ||
1069
14.8k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1070
4.79k
        xmlDefAttrsPtr temp;
1071
4.79k
        int newSize;
1072
1073
4.79k
        if (defaults == NULL) {
1074
2.95k
            newSize = 4;
1075
2.95k
        } else {
1076
1.84k
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1077
1.84k
                ((size_t) defaults->maxAttrs >
1078
1.84k
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1079
0
                goto mem_error;
1080
1081
1.84k
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1082
0
                newSize = XML_MAX_ATTRS;
1083
1.84k
            else
1084
1.84k
                newSize = defaults->maxAttrs * 2;
1085
1.84k
        }
1086
4.79k
        temp = xmlRealloc(defaults,
1087
4.79k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1088
4.79k
  if (temp == NULL)
1089
0
      goto mem_error;
1090
4.79k
        if (defaults == NULL)
1091
2.95k
            temp->nbAttrs = 0;
1092
4.79k
  temp->maxAttrs = newSize;
1093
4.79k
        defaults = temp;
1094
4.79k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1095
4.79k
                          defaults, NULL) < 0) {
1096
0
      xmlFree(defaults);
1097
0
      goto mem_error;
1098
0
  }
1099
4.79k
    }
1100
1101
    /*
1102
     * Split the attribute name into prefix:localname , the string found
1103
     * are within the DTD and hen not associated to namespace names.
1104
     */
1105
14.8k
    localname = xmlSplitQName3(fullattr, &len);
1106
14.8k
    if (localname == NULL) {
1107
9.06k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1108
9.06k
  prefix.name = NULL;
1109
9.06k
    } else {
1110
5.81k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1111
5.81k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1112
5.81k
        if (prefix.name == NULL)
1113
0
            goto mem_error;
1114
5.81k
    }
1115
14.8k
    if (name.name == NULL)
1116
0
        goto mem_error;
1117
1118
    /* intern the string and precompute the end */
1119
14.8k
    len = strlen((const char *) value);
1120
14.8k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1121
14.8k
    if (hvalue.name == NULL)
1122
0
        goto mem_error;
1123
1124
14.8k
    expandedSize = strlen((const char *) name.name);
1125
14.8k
    if (prefix.name != NULL)
1126
5.81k
        expandedSize += strlen((const char *) prefix.name);
1127
14.8k
    expandedSize += len;
1128
1129
14.8k
    attr = &defaults->attrs[defaults->nbAttrs++];
1130
14.8k
    attr->name = name;
1131
14.8k
    attr->prefix = prefix;
1132
14.8k
    attr->value = hvalue;
1133
14.8k
    attr->valueEnd = hvalue.name + len;
1134
14.8k
    attr->external = PARSER_EXTERNAL(ctxt);
1135
14.8k
    attr->expandedSize = expandedSize;
1136
1137
14.8k
    return;
1138
1139
0
mem_error:
1140
0
    xmlErrMemory(ctxt);
1141
0
}
1142
1143
/**
1144
 * Register this attribute type
1145
 *
1146
 * @param ctxt  an XML parser context
1147
 * @param fullname  the element fullname
1148
 * @param fullattr  the attribute fullname
1149
 * @param type  the attribute type
1150
 */
1151
static void
1152
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1153
      const xmlChar *fullname,
1154
      const xmlChar *fullattr,
1155
      int type)
1156
21.4k
{
1157
21.4k
    if (ctxt->attsSpecial == NULL) {
1158
2.22k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1159
2.22k
  if (ctxt->attsSpecial == NULL)
1160
0
      goto mem_error;
1161
2.22k
    }
1162
1163
21.4k
    if (PARSER_EXTERNAL(ctxt))
1164
0
        type |= XML_SPECIAL_EXTERNAL;
1165
1166
21.4k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1167
21.4k
                    XML_INT_TO_PTR(type)) < 0)
1168
0
        goto mem_error;
1169
21.4k
    return;
1170
1171
21.4k
mem_error:
1172
0
    xmlErrMemory(ctxt);
1173
0
}
1174
1175
/**
1176
 * Removes CDATA attributes from the special attribute table
1177
 */
1178
static void
1179
xmlCleanSpecialAttrCallback(void *payload, void *data,
1180
                            const xmlChar *fullname, const xmlChar *fullattr,
1181
15.3k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1182
15.3k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1183
1184
15.3k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1185
1.12k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1186
1.12k
    }
1187
15.3k
}
1188
1189
/**
1190
 * Trim the list of attributes defined to remove all those of type
1191
 * CDATA as they are not special. This call should be done when finishing
1192
 * to parse the DTD and before starting to parse the document root.
1193
 *
1194
 * @param ctxt  an XML parser context
1195
 */
1196
static void
1197
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1198
7.78k
{
1199
7.78k
    if (ctxt->attsSpecial == NULL)
1200
5.56k
        return;
1201
1202
2.22k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1203
1204
2.22k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1205
64
        xmlHashFree(ctxt->attsSpecial, NULL);
1206
64
        ctxt->attsSpecial = NULL;
1207
64
    }
1208
2.22k
}
1209
1210
/**
1211
 * Checks that the value conforms to the LanguageID production:
1212
 *
1213
 * @deprecated Internal function, do not use.
1214
 *
1215
 * NOTE: this is somewhat deprecated, those productions were removed from
1216
 * the XML Second edition.
1217
 *
1218
 *     [33] LanguageID ::= Langcode ('-' Subcode)*
1219
 *     [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1220
 *     [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1221
 *     [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1222
 *     [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1223
 *     [38] Subcode ::= ([a-z] | [A-Z])+
1224
 *
1225
 * The current REC reference the successors of RFC 1766, currently 5646
1226
 *
1227
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1228
 *
1229
 *     langtag       = language
1230
 *                     ["-" script]
1231
 *                     ["-" region]
1232
 *                     *("-" variant)
1233
 *                     *("-" extension)
1234
 *                     ["-" privateuse]
1235
 *     language      = 2*3ALPHA            ; shortest ISO 639 code
1236
 *                     ["-" extlang]       ; sometimes followed by
1237
 *                                         ; extended language subtags
1238
 *                   / 4ALPHA              ; or reserved for future use
1239
 *                   / 5*8ALPHA            ; or registered language subtag
1240
 *
1241
 *     extlang       = 3ALPHA              ; selected ISO 639 codes
1242
 *                     *2("-" 3ALPHA)      ; permanently reserved
1243
 *
1244
 *     script        = 4ALPHA              ; ISO 15924 code
1245
 *
1246
 *     region        = 2ALPHA              ; ISO 3166-1 code
1247
 *                   / 3DIGIT              ; UN M.49 code
1248
 *
1249
 *     variant       = 5*8alphanum         ; registered variants
1250
 *                   / (DIGIT 3alphanum)
1251
 *
1252
 *     extension     = singleton 1*("-" (2*8alphanum))
1253
 *
1254
 *                                         ; Single alphanumerics
1255
 *                                         ; "x" reserved for private use
1256
 *     singleton     = DIGIT               ; 0 - 9
1257
 *                   / %x41-57             ; A - W
1258
 *                   / %x59-5A             ; Y - Z
1259
 *                   / %x61-77             ; a - w
1260
 *                   / %x79-7A             ; y - z
1261
 *
1262
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1263
 * The parser below doesn't try to cope with extension or privateuse
1264
 * that could be added but that's not interoperable anyway
1265
 *
1266
 * @param lang  pointer to the string value
1267
 * @returns 1 if correct 0 otherwise
1268
 **/
1269
int
1270
xmlCheckLanguageID(const xmlChar * lang)
1271
0
{
1272
0
    const xmlChar *cur = lang, *nxt;
1273
1274
0
    if (cur == NULL)
1275
0
        return (0);
1276
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1277
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1278
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1279
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1280
        /*
1281
         * Still allow IANA code and user code which were coming
1282
         * from the previous version of the XML-1.0 specification
1283
         * it's deprecated but we should not fail
1284
         */
1285
0
        cur += 2;
1286
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1287
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1288
0
            cur++;
1289
0
        return(cur[0] == 0);
1290
0
    }
1291
0
    nxt = cur;
1292
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1293
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1294
0
           nxt++;
1295
0
    if (nxt - cur >= 4) {
1296
        /*
1297
         * Reserved
1298
         */
1299
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1300
0
            return(0);
1301
0
        return(1);
1302
0
    }
1303
0
    if (nxt - cur < 2)
1304
0
        return(0);
1305
    /* we got an ISO 639 code */
1306
0
    if (nxt[0] == 0)
1307
0
        return(1);
1308
0
    if (nxt[0] != '-')
1309
0
        return(0);
1310
1311
0
    nxt++;
1312
0
    cur = nxt;
1313
    /* now we can have extlang or script or region or variant */
1314
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1315
0
        goto region_m49;
1316
1317
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1318
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1319
0
           nxt++;
1320
0
    if (nxt - cur == 4)
1321
0
        goto script;
1322
0
    if (nxt - cur == 2)
1323
0
        goto region;
1324
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1325
0
        goto variant;
1326
0
    if (nxt - cur != 3)
1327
0
        return(0);
1328
    /* we parsed an extlang */
1329
0
    if (nxt[0] == 0)
1330
0
        return(1);
1331
0
    if (nxt[0] != '-')
1332
0
        return(0);
1333
1334
0
    nxt++;
1335
0
    cur = nxt;
1336
    /* now we can have script or region or variant */
1337
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1338
0
        goto region_m49;
1339
1340
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1341
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1342
0
           nxt++;
1343
0
    if (nxt - cur == 2)
1344
0
        goto region;
1345
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1346
0
        goto variant;
1347
0
    if (nxt - cur != 4)
1348
0
        return(0);
1349
    /* we parsed a script */
1350
0
script:
1351
0
    if (nxt[0] == 0)
1352
0
        return(1);
1353
0
    if (nxt[0] != '-')
1354
0
        return(0);
1355
1356
0
    nxt++;
1357
0
    cur = nxt;
1358
    /* now we can have region or variant */
1359
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1360
0
        goto region_m49;
1361
1362
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1363
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1364
0
           nxt++;
1365
1366
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1367
0
        goto variant;
1368
0
    if (nxt - cur != 2)
1369
0
        return(0);
1370
    /* we parsed a region */
1371
0
region:
1372
0
    if (nxt[0] == 0)
1373
0
        return(1);
1374
0
    if (nxt[0] != '-')
1375
0
        return(0);
1376
1377
0
    nxt++;
1378
0
    cur = nxt;
1379
    /* now we can just have a variant */
1380
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1381
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1382
0
           nxt++;
1383
1384
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1385
0
        return(0);
1386
1387
    /* we parsed a variant */
1388
0
variant:
1389
0
    if (nxt[0] == 0)
1390
0
        return(1);
1391
0
    if (nxt[0] != '-')
1392
0
        return(0);
1393
    /* extensions and private use subtags not checked */
1394
0
    return (1);
1395
1396
0
region_m49:
1397
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1398
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1399
0
        nxt += 3;
1400
0
        goto region;
1401
0
    }
1402
0
    return(0);
1403
0
}
1404
1405
/************************************************************************
1406
 *                  *
1407
 *    Parser stacks related functions and macros    *
1408
 *                  *
1409
 ************************************************************************/
1410
1411
static xmlChar *
1412
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1413
1414
/**
1415
 * Create a new namespace database.
1416
 *
1417
 * @returns the new obejct.
1418
 */
1419
xmlParserNsData *
1420
18.0k
xmlParserNsCreate(void) {
1421
18.0k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1422
1423
18.0k
    if (nsdb == NULL)
1424
0
        return(NULL);
1425
18.0k
    memset(nsdb, 0, sizeof(*nsdb));
1426
18.0k
    nsdb->defaultNsIndex = INT_MAX;
1427
1428
18.0k
    return(nsdb);
1429
18.0k
}
1430
1431
/**
1432
 * Free a namespace database.
1433
 *
1434
 * @param nsdb  namespace database
1435
 */
1436
void
1437
18.0k
xmlParserNsFree(xmlParserNsData *nsdb) {
1438
18.0k
    if (nsdb == NULL)
1439
0
        return;
1440
1441
18.0k
    xmlFree(nsdb->extra);
1442
18.0k
    xmlFree(nsdb->hash);
1443
18.0k
    xmlFree(nsdb);
1444
18.0k
}
1445
1446
/**
1447
 * Reset a namespace database.
1448
 *
1449
 * @param nsdb  namespace database
1450
 */
1451
static void
1452
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1453
0
    if (nsdb == NULL)
1454
0
        return;
1455
1456
0
    nsdb->hashElems = 0;
1457
0
    nsdb->elementId = 0;
1458
0
    nsdb->defaultNsIndex = INT_MAX;
1459
1460
0
    if (nsdb->hash)
1461
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1462
0
}
1463
1464
/**
1465
 * Signal that a new element has started.
1466
 *
1467
 * @param nsdb  namespace database
1468
 * @returns 0 on success, -1 if the element counter overflowed.
1469
 */
1470
static int
1471
642k
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1472
642k
    if (nsdb->elementId == UINT_MAX)
1473
0
        return(-1);
1474
642k
    nsdb->elementId++;
1475
1476
642k
    return(0);
1477
642k
}
1478
1479
/**
1480
 * Lookup namespace with given prefix. If `bucketPtr` is non-NULL, it will
1481
 * be set to the matching bucket, or the first empty bucket if no match
1482
 * was found.
1483
 *
1484
 * @param ctxt  parser context
1485
 * @param prefix  namespace prefix
1486
 * @param bucketPtr  optional bucket (return value)
1487
 * @returns the namespace index on success, INT_MAX if no namespace was
1488
 * found.
1489
 */
1490
static int
1491
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1492
2.44M
                  xmlParserNsBucket **bucketPtr) {
1493
2.44M
    xmlParserNsBucket *bucket, *tombstone;
1494
2.44M
    unsigned index, hashValue;
1495
1496
2.44M
    if (prefix->name == NULL)
1497
407k
        return(ctxt->nsdb->defaultNsIndex);
1498
1499
2.03M
    if (ctxt->nsdb->hashSize == 0)
1500
19.0k
        return(INT_MAX);
1501
1502
2.02M
    hashValue = prefix->hashValue;
1503
2.02M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1504
2.02M
    bucket = &ctxt->nsdb->hash[index];
1505
2.02M
    tombstone = NULL;
1506
1507
2.76M
    while (bucket->hashValue) {
1508
2.47M
        if (bucket->index == INT_MAX) {
1509
179k
            if (tombstone == NULL)
1510
174k
                tombstone = bucket;
1511
2.29M
        } else if (bucket->hashValue == hashValue) {
1512
1.72M
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1513
1.72M
                if (bucketPtr != NULL)
1514
1.54M
                    *bucketPtr = bucket;
1515
1.72M
                return(bucket->index);
1516
1.72M
            }
1517
1.72M
        }
1518
1519
747k
        index++;
1520
747k
        bucket++;
1521
747k
        if (index == ctxt->nsdb->hashSize) {
1522
46.5k
            index = 0;
1523
46.5k
            bucket = ctxt->nsdb->hash;
1524
46.5k
        }
1525
747k
    }
1526
1527
296k
    if (bucketPtr != NULL)
1528
169k
        *bucketPtr = tombstone ? tombstone : bucket;
1529
296k
    return(INT_MAX);
1530
2.02M
}
1531
1532
/**
1533
 * Lookup namespace URI with given prefix.
1534
 *
1535
 * @param ctxt  parser context
1536
 * @param prefix  namespace prefix
1537
 * @returns the namespace URI on success, NULL if no namespace was found.
1538
 */
1539
static const xmlChar *
1540
426k
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1541
426k
    const xmlChar *ret;
1542
426k
    int nsIndex;
1543
1544
426k
    if (prefix->name == ctxt->str_xml)
1545
704
        return(ctxt->str_xml_ns);
1546
1547
    /*
1548
     * minNsIndex is used when building an entity tree. We must
1549
     * ignore namespaces declared outside the entity.
1550
     */
1551
426k
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1552
426k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1553
191k
        return(NULL);
1554
1555
234k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1556
234k
    if (ret[0] == 0)
1557
14.0k
        ret = NULL;
1558
234k
    return(ret);
1559
426k
}
1560
1561
/**
1562
 * Lookup extra data for the given prefix. This returns data stored
1563
 * with xmlParserNsUdpateSax.
1564
 *
1565
 * @param ctxt  parser context
1566
 * @param prefix  namespace prefix
1567
 * @returns the data on success, NULL if no namespace was found.
1568
 */
1569
void *
1570
13.8k
xmlParserNsLookupSax(xmlParserCtxt *ctxt, const xmlChar *prefix) {
1571
13.8k
    xmlHashedString hprefix;
1572
13.8k
    int nsIndex;
1573
1574
13.8k
    if (prefix == ctxt->str_xml)
1575
6.92k
        return(NULL);
1576
1577
6.95k
    hprefix.name = prefix;
1578
6.95k
    if (prefix != NULL)
1579
2.73k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1580
4.21k
    else
1581
4.21k
        hprefix.hashValue = 0;
1582
6.95k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1583
6.95k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1584
0
        return(NULL);
1585
1586
6.95k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1587
6.95k
}
1588
1589
/**
1590
 * Sets or updates extra data for the given prefix. This value will be
1591
 * returned by xmlParserNsLookupSax as long as the namespace with the
1592
 * given prefix is in scope.
1593
 *
1594
 * @param ctxt  parser context
1595
 * @param prefix  namespace prefix
1596
 * @param saxData  extra data for SAX handler
1597
 * @returns the data on success, NULL if no namespace was found.
1598
 */
1599
int
1600
xmlParserNsUpdateSax(xmlParserCtxt *ctxt, const xmlChar *prefix,
1601
141k
                     void *saxData) {
1602
141k
    xmlHashedString hprefix;
1603
141k
    int nsIndex;
1604
1605
141k
    if (prefix == ctxt->str_xml)
1606
0
        return(-1);
1607
1608
141k
    hprefix.name = prefix;
1609
141k
    if (prefix != NULL)
1610
140k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1611
1.25k
    else
1612
1.25k
        hprefix.hashValue = 0;
1613
141k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1614
141k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1615
0
        return(-1);
1616
1617
141k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1618
141k
    return(0);
1619
141k
}
1620
1621
/**
1622
 * Grows the namespace tables.
1623
 *
1624
 * @param ctxt  parser context
1625
 * @returns 0 on success, -1 if a memory allocation failed.
1626
 */
1627
static int
1628
13.5k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1629
13.5k
    const xmlChar **table;
1630
13.5k
    xmlParserNsExtra *extra;
1631
13.5k
    int newSize;
1632
1633
13.5k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1634
13.5k
                              sizeof(table[0]) + sizeof(extra[0]),
1635
13.5k
                              16, XML_MAX_ITEMS);
1636
13.5k
    if (newSize < 0)
1637
0
        goto error;
1638
1639
13.5k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1640
13.5k
    if (table == NULL)
1641
0
        goto error;
1642
13.5k
    ctxt->nsTab = table;
1643
1644
13.5k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1645
13.5k
    if (extra == NULL)
1646
0
        goto error;
1647
13.5k
    ctxt->nsdb->extra = extra;
1648
1649
13.5k
    ctxt->nsMax = newSize;
1650
13.5k
    return(0);
1651
1652
0
error:
1653
0
    xmlErrMemory(ctxt);
1654
0
    return(-1);
1655
13.5k
}
1656
1657
/**
1658
 * Push a new namespace on the table.
1659
 *
1660
 * @param ctxt  parser context
1661
 * @param prefix  prefix with hash value
1662
 * @param uri  uri with hash value
1663
 * @param saxData  extra data for SAX handler
1664
 * @param defAttr  whether the namespace comes from a default attribute
1665
 * @returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1666
 * -1 if a memory allocation failed.
1667
 */
1668
static int
1669
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1670
940k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1671
940k
    xmlParserNsBucket *bucket = NULL;
1672
940k
    xmlParserNsExtra *extra;
1673
940k
    const xmlChar **ns;
1674
940k
    unsigned hashValue, nsIndex, oldIndex;
1675
1676
940k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1677
233
        return(0);
1678
1679
939k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1680
0
        xmlErrMemory(ctxt);
1681
0
        return(-1);
1682
0
    }
1683
1684
    /*
1685
     * Default namespace and 'xml' namespace
1686
     */
1687
939k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1688
79.6k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1689
1690
79.6k
        if (oldIndex != INT_MAX) {
1691
73.2k
            extra = &ctxt->nsdb->extra[oldIndex];
1692
1693
73.2k
            if (extra->elementId == ctxt->nsdb->elementId) {
1694
16.6k
                if (defAttr == 0)
1695
15.7k
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1696
16.6k
                return(0);
1697
16.6k
            }
1698
1699
56.6k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1700
56.6k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1701
0
                return(0);
1702
56.6k
        }
1703
1704
62.9k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1705
62.9k
        goto populate_entry;
1706
79.6k
    }
1707
1708
    /*
1709
     * Hash table lookup
1710
     */
1711
860k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1712
860k
    if (oldIndex != INT_MAX) {
1713
689k
        extra = &ctxt->nsdb->extra[oldIndex];
1714
1715
        /*
1716
         * Check for duplicate definitions on the same element.
1717
         */
1718
689k
        if (extra->elementId == ctxt->nsdb->elementId) {
1719
817
            if (defAttr == 0)
1720
596
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1721
817
            return(0);
1722
817
        }
1723
1724
688k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1725
688k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1726
0
            return(0);
1727
1728
688k
        bucket->index = ctxt->nsNr;
1729
688k
        goto populate_entry;
1730
688k
    }
1731
1732
    /*
1733
     * Insert new bucket
1734
     */
1735
1736
171k
    hashValue = prefix->hashValue;
1737
1738
    /*
1739
     * Grow hash table, 50% fill factor
1740
     */
1741
171k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1742
2.03k
        xmlParserNsBucket *newHash;
1743
2.03k
        unsigned newSize, i, index;
1744
1745
2.03k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1746
0
            xmlErrMemory(ctxt);
1747
0
            return(-1);
1748
0
        }
1749
2.03k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1750
2.03k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1751
2.03k
        if (newHash == NULL) {
1752
0
            xmlErrMemory(ctxt);
1753
0
            return(-1);
1754
0
        }
1755
2.03k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1756
1757
661k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1758
659k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1759
659k
            unsigned newIndex;
1760
1761
659k
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1762
655k
                continue;
1763
3.74k
            newIndex = hv & (newSize - 1);
1764
1765
4.58k
            while (newHash[newIndex].hashValue != 0) {
1766
838
                newIndex++;
1767
838
                if (newIndex == newSize)
1768
97
                    newIndex = 0;
1769
838
            }
1770
1771
3.74k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1772
3.74k
        }
1773
1774
2.03k
        xmlFree(ctxt->nsdb->hash);
1775
2.03k
        ctxt->nsdb->hash = newHash;
1776
2.03k
        ctxt->nsdb->hashSize = newSize;
1777
1778
        /*
1779
         * Relookup
1780
         */
1781
2.03k
        index = hashValue & (newSize - 1);
1782
1783
2.33k
        while (newHash[index].hashValue != 0) {
1784
300
            index++;
1785
300
            if (index == newSize)
1786
27
                index = 0;
1787
300
        }
1788
1789
2.03k
        bucket = &newHash[index];
1790
2.03k
    }
1791
1792
171k
    bucket->hashValue = hashValue;
1793
171k
    bucket->index = ctxt->nsNr;
1794
171k
    ctxt->nsdb->hashElems++;
1795
171k
    oldIndex = INT_MAX;
1796
1797
922k
populate_entry:
1798
922k
    nsIndex = ctxt->nsNr;
1799
1800
922k
    ns = &ctxt->nsTab[nsIndex * 2];
1801
922k
    ns[0] = prefix ? prefix->name : NULL;
1802
922k
    ns[1] = uri->name;
1803
1804
922k
    extra = &ctxt->nsdb->extra[nsIndex];
1805
922k
    extra->saxData = saxData;
1806
922k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1807
922k
    extra->uriHashValue = uri->hashValue;
1808
922k
    extra->elementId = ctxt->nsdb->elementId;
1809
922k
    extra->oldIndex = oldIndex;
1810
1811
922k
    ctxt->nsNr++;
1812
1813
922k
    return(1);
1814
171k
}
1815
1816
/**
1817
 * Pops the top `nr` namespaces and restores the hash table.
1818
 *
1819
 * @param ctxt  an XML parser context
1820
 * @param nr  the number to pop
1821
 * @returns the number of namespaces popped.
1822
 */
1823
static int
1824
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1825
170k
{
1826
170k
    int i;
1827
1828
    /* assert(nr <= ctxt->nsNr); */
1829
1830
1.08M
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1831
919k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1832
919k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1833
1834
919k
        if (prefix == NULL) {
1835
62.4k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1836
857k
        } else {
1837
857k
            xmlHashedString hprefix;
1838
857k
            xmlParserNsBucket *bucket = NULL;
1839
1840
857k
            hprefix.name = prefix;
1841
857k
            hprefix.hashValue = extra->prefixHashValue;
1842
857k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1843
            /* assert(bucket && bucket->hashValue); */
1844
857k
            bucket->index = extra->oldIndex;
1845
857k
        }
1846
919k
    }
1847
1848
170k
    ctxt->nsNr -= nr;
1849
170k
    return(nr);
1850
170k
}
1851
1852
static int
1853
6.38k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1854
6.38k
    const xmlChar **atts;
1855
6.38k
    unsigned *attallocs;
1856
6.38k
    int newSize;
1857
1858
6.38k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1859
6.38k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1860
6.38k
                              10, XML_MAX_ATTRS);
1861
6.38k
    if (newSize < 0) {
1862
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1863
0
                    "Maximum number of attributes exceeded");
1864
0
        return(-1);
1865
0
    }
1866
1867
6.38k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1868
6.38k
    if (atts == NULL)
1869
0
        goto mem_error;
1870
6.38k
    ctxt->atts = atts;
1871
1872
6.38k
    attallocs = xmlRealloc(ctxt->attallocs,
1873
6.38k
                           newSize * sizeof(attallocs[0]));
1874
6.38k
    if (attallocs == NULL)
1875
0
        goto mem_error;
1876
6.38k
    ctxt->attallocs = attallocs;
1877
1878
6.38k
    ctxt->maxatts = newSize * 5;
1879
1880
6.38k
    return(0);
1881
1882
0
mem_error:
1883
0
    xmlErrMemory(ctxt);
1884
0
    return(-1);
1885
6.38k
}
1886
1887
/**
1888
 * Pushes a new parser input on top of the input stack
1889
 *
1890
 * @param ctxt  an XML parser context
1891
 * @param value  the parser input
1892
 * @returns -1 in case of error, the index in the stack otherwise
1893
 */
1894
int
1895
xmlCtxtPushInput(xmlParserCtxt *ctxt, xmlParserInput *value)
1896
67.7k
{
1897
67.7k
    char *directory = NULL;
1898
67.7k
    int maxDepth;
1899
1900
67.7k
    if ((ctxt == NULL) || (value == NULL))
1901
0
        return(-1);
1902
1903
67.7k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1904
1905
67.7k
    if (ctxt->inputNr >= ctxt->inputMax) {
1906
2.43k
        xmlParserInputPtr *tmp;
1907
2.43k
        int newSize;
1908
1909
2.43k
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1910
2.43k
                                  5, maxDepth);
1911
2.43k
        if (newSize < 0) {
1912
0
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1913
0
                           "Maximum entity nesting depth exceeded");
1914
0
            return(-1);
1915
0
        }
1916
2.43k
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1917
2.43k
        if (tmp == NULL) {
1918
0
            xmlErrMemory(ctxt);
1919
0
            return(-1);
1920
0
        }
1921
2.43k
        ctxt->inputTab = tmp;
1922
2.43k
        ctxt->inputMax = newSize;
1923
2.43k
    }
1924
1925
67.7k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1926
0
        directory = xmlParserGetDirectory(value->filename);
1927
0
        if (directory == NULL) {
1928
0
            xmlErrMemory(ctxt);
1929
0
            return(-1);
1930
0
        }
1931
0
    }
1932
1933
67.7k
    if (ctxt->input_id >= INT_MAX) {
1934
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1935
0
        return(-1);
1936
0
    }
1937
1938
67.7k
    ctxt->inputTab[ctxt->inputNr] = value;
1939
67.7k
    ctxt->input = value;
1940
1941
67.7k
    if (ctxt->inputNr == 0) {
1942
18.0k
        xmlFree(ctxt->directory);
1943
18.0k
        ctxt->directory = directory;
1944
18.0k
    }
1945
1946
    /*
1947
     * The input ID is unused internally, but there are entity
1948
     * loaders in downstream code that detect the main document
1949
     * by checking for "input_id == 1".
1950
     */
1951
67.7k
    value->id = ctxt->input_id++;
1952
1953
67.7k
    return(ctxt->inputNr++);
1954
67.7k
}
1955
1956
/**
1957
 * Pops the top parser input from the input stack
1958
 *
1959
 * @param ctxt  an XML parser context
1960
 * @returns the input just removed
1961
 */
1962
xmlParserInput *
1963
xmlCtxtPopInput(xmlParserCtxt *ctxt)
1964
103k
{
1965
103k
    xmlParserInputPtr ret;
1966
1967
103k
    if (ctxt == NULL)
1968
0
        return(NULL);
1969
103k
    if (ctxt->inputNr <= 0)
1970
36.1k
        return (NULL);
1971
67.7k
    ctxt->inputNr--;
1972
67.7k
    if (ctxt->inputNr > 0)
1973
49.6k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1974
18.0k
    else
1975
18.0k
        ctxt->input = NULL;
1976
67.7k
    ret = ctxt->inputTab[ctxt->inputNr];
1977
67.7k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1978
67.7k
    return (ret);
1979
103k
}
1980
1981
/**
1982
 * Pushes a new element node on top of the node stack
1983
 *
1984
 * @deprecated Internal function, do not use.
1985
 *
1986
 * @param ctxt  an XML parser context
1987
 * @param value  the element node
1988
 * @returns -1 in case of error, the index in the stack otherwise
1989
 */
1990
int
1991
nodePush(xmlParserCtxt *ctxt, xmlNode *value)
1992
67.6k
{
1993
67.6k
    if (ctxt == NULL)
1994
0
        return(0);
1995
1996
67.6k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1997
6.21k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
1998
6.21k
        xmlNodePtr *tmp;
1999
6.21k
        int newSize;
2000
2001
6.21k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
2002
6.21k
                                  10, maxDepth);
2003
6.21k
        if (newSize < 0) {
2004
8
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2005
8
                    "Excessive depth in document: %d,"
2006
8
                    " use XML_PARSE_HUGE option\n",
2007
8
                    ctxt->nodeNr);
2008
8
            return(-1);
2009
8
        }
2010
2011
6.20k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2012
6.20k
        if (tmp == NULL) {
2013
0
            xmlErrMemory(ctxt);
2014
0
            return (-1);
2015
0
        }
2016
6.20k
        ctxt->nodeTab = tmp;
2017
6.20k
  ctxt->nodeMax = newSize;
2018
6.20k
    }
2019
2020
67.6k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2021
67.6k
    ctxt->node = value;
2022
67.6k
    return (ctxt->nodeNr++);
2023
67.6k
}
2024
2025
/**
2026
 * Pops the top element node from the node stack
2027
 *
2028
 * @deprecated Internal function, do not use.
2029
 *
2030
 * @param ctxt  an XML parser context
2031
 * @returns the node just removed
2032
 */
2033
xmlNode *
2034
nodePop(xmlParserCtxt *ctxt)
2035
228k
{
2036
228k
    xmlNodePtr ret;
2037
2038
228k
    if (ctxt == NULL) return(NULL);
2039
228k
    if (ctxt->nodeNr <= 0)
2040
163k
        return (NULL);
2041
64.2k
    ctxt->nodeNr--;
2042
64.2k
    if (ctxt->nodeNr > 0)
2043
62.5k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2044
1.71k
    else
2045
1.71k
        ctxt->node = NULL;
2046
64.2k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2047
64.2k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2048
64.2k
    return (ret);
2049
228k
}
2050
2051
/**
2052
 * Pushes a new element name/prefix/URL on top of the name stack
2053
 *
2054
 * @param ctxt  an XML parser context
2055
 * @param value  the element name
2056
 * @param prefix  the element prefix
2057
 * @param URI  the element namespace name
2058
 * @param line  the current line number for error messages
2059
 * @param nsNr  the number of namespaces pushed on the namespace table
2060
 * @returns -1 in case of error, the index in the stack otherwise
2061
 */
2062
static int
2063
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2064
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2065
428k
{
2066
428k
    xmlStartTag *tag;
2067
2068
428k
    if (ctxt->nameNr >= ctxt->nameMax) {
2069
18.2k
        const xmlChar **tmp;
2070
18.2k
        xmlStartTag *tmp2;
2071
18.2k
        int newSize;
2072
2073
18.2k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2074
18.2k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2075
18.2k
                                  10, XML_MAX_ITEMS);
2076
18.2k
        if (newSize < 0)
2077
0
            goto mem_error;
2078
2079
18.2k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2080
18.2k
        if (tmp == NULL)
2081
0
      goto mem_error;
2082
18.2k
  ctxt->nameTab = tmp;
2083
2084
18.2k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2085
18.2k
        if (tmp2 == NULL)
2086
0
      goto mem_error;
2087
18.2k
  ctxt->pushTab = tmp2;
2088
2089
18.2k
        ctxt->nameMax = newSize;
2090
410k
    } else if (ctxt->pushTab == NULL) {
2091
11.1k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2092
11.1k
        if (ctxt->pushTab == NULL)
2093
0
            goto mem_error;
2094
11.1k
    }
2095
428k
    ctxt->nameTab[ctxt->nameNr] = value;
2096
428k
    ctxt->name = value;
2097
428k
    tag = &ctxt->pushTab[ctxt->nameNr];
2098
428k
    tag->prefix = prefix;
2099
428k
    tag->URI = URI;
2100
428k
    tag->line = line;
2101
428k
    tag->nsNr = nsNr;
2102
428k
    return (ctxt->nameNr++);
2103
0
mem_error:
2104
0
    xmlErrMemory(ctxt);
2105
0
    return (-1);
2106
428k
}
2107
#ifdef LIBXML_PUSH_ENABLED
2108
/**
2109
 * Pops the top element/prefix/URI name from the name stack
2110
 *
2111
 * @param ctxt  an XML parser context
2112
 * @returns the name just removed
2113
 */
2114
static const xmlChar *
2115
nameNsPop(xmlParserCtxtPtr ctxt)
2116
0
{
2117
0
    const xmlChar *ret;
2118
2119
0
    if (ctxt->nameNr <= 0)
2120
0
        return (NULL);
2121
0
    ctxt->nameNr--;
2122
0
    if (ctxt->nameNr > 0)
2123
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2124
0
    else
2125
0
        ctxt->name = NULL;
2126
0
    ret = ctxt->nameTab[ctxt->nameNr];
2127
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
2128
0
    return (ret);
2129
0
}
2130
#endif /* LIBXML_PUSH_ENABLED */
2131
2132
/**
2133
 * Pops the top element name from the name stack
2134
 *
2135
 * @deprecated Internal function, do not use.
2136
 *
2137
 * @param ctxt  an XML parser context
2138
 * @returns the name just removed
2139
 */
2140
static const xmlChar *
2141
namePop(xmlParserCtxtPtr ctxt)
2142
422k
{
2143
422k
    const xmlChar *ret;
2144
2145
422k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2146
0
        return (NULL);
2147
422k
    ctxt->nameNr--;
2148
422k
    if (ctxt->nameNr > 0)
2149
417k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2150
4.88k
    else
2151
4.88k
        ctxt->name = NULL;
2152
422k
    ret = ctxt->nameTab[ctxt->nameNr];
2153
422k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2154
422k
    return (ret);
2155
422k
}
2156
2157
643k
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2158
643k
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2159
28.3k
        int *tmp;
2160
28.3k
        int newSize;
2161
2162
28.3k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2163
28.3k
                                  10, XML_MAX_ITEMS);
2164
28.3k
        if (newSize < 0) {
2165
0
      xmlErrMemory(ctxt);
2166
0
      return(-1);
2167
0
        }
2168
2169
28.3k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2170
28.3k
        if (tmp == NULL) {
2171
0
      xmlErrMemory(ctxt);
2172
0
      return(-1);
2173
0
  }
2174
28.3k
  ctxt->spaceTab = tmp;
2175
2176
28.3k
        ctxt->spaceMax = newSize;
2177
28.3k
    }
2178
643k
    ctxt->spaceTab[ctxt->spaceNr] = val;
2179
643k
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2180
643k
    return(ctxt->spaceNr++);
2181
643k
}
2182
2183
637k
static int spacePop(xmlParserCtxtPtr ctxt) {
2184
637k
    int ret;
2185
637k
    if (ctxt->spaceNr <= 0) return(0);
2186
637k
    ctxt->spaceNr--;
2187
637k
    if (ctxt->spaceNr > 0)
2188
637k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2189
0
    else
2190
0
        ctxt->space = &ctxt->spaceTab[0];
2191
637k
    ret = ctxt->spaceTab[ctxt->spaceNr];
2192
637k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2193
637k
    return(ret);
2194
637k
}
2195
2196
/*
2197
 * Macros for accessing the content. Those should be used only by the parser,
2198
 * and not exported.
2199
 *
2200
 * Dirty macros, i.e. one often need to make assumption on the context to
2201
 * use them
2202
 *
2203
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2204
 *           To be used with extreme caution since operations consuming
2205
 *           characters may move the input buffer to a different location !
2206
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2207
 *           This should be used internally by the parser
2208
 *           only to compare to ASCII values otherwise it would break when
2209
 *           running with UTF-8 encoding.
2210
 *   RAW     same as CUR but in the input buffer, bypass any token
2211
 *           extraction that may have been done
2212
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2213
 *           to compare on ASCII based substring.
2214
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2215
 *           strings without newlines within the parser.
2216
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2217
 *           defined char within the parser.
2218
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2219
 *
2220
 *   NEXT    Skip to the next character, this does the proper decoding
2221
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2222
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2223
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2224
 *            the index
2225
 *   GROW, SHRINK  handling of input buffers
2226
 */
2227
2228
7.68M
#define RAW (*ctxt->input->cur)
2229
214M
#define CUR (*ctxt->input->cur)
2230
2.90M
#define NXT(val) ctxt->input->cur[(val)]
2231
411M
#define CUR_PTR ctxt->input->cur
2232
2.62M
#define BASE_PTR ctxt->input->base
2233
2234
#define CMP4( s, c1, c2, c3, c4 ) \
2235
8.33M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2236
4.18M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2237
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2238
8.21M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2239
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2240
7.98M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2241
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2242
7.79M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2243
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2244
7.69M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2245
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2246
3.82M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2247
3.82M
    ((unsigned char *) s)[ 8 ] == c9 )
2248
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2249
3.38k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2250
3.38k
    ((unsigned char *) s)[ 9 ] == c10 )
2251
2252
559k
#define SKIP(val) do {             \
2253
559k
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2254
559k
    if (*ctxt->input->cur == 0)           \
2255
559k
        xmlParserGrow(ctxt);           \
2256
559k
  } while (0)
2257
2258
#define SKIPL(val) do {             \
2259
    int skipl;                \
2260
    for(skipl=0; skipl<val; skipl++) {          \
2261
  if (*(ctxt->input->cur) == '\n') {        \
2262
  ctxt->input->line++; ctxt->input->col = 1;      \
2263
  } else ctxt->input->col++;          \
2264
  ctxt->input->cur++;           \
2265
    }                 \
2266
    if (*ctxt->input->cur == 0)           \
2267
        xmlParserGrow(ctxt);            \
2268
  } while (0)
2269
2270
#define SHRINK \
2271
7.27M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2272
7.27M
  xmlParserShrink(ctxt);
2273
2274
#define GROW \
2275
15.7M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2276
15.7M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2277
1.37M
  xmlParserGrow(ctxt);
2278
2279
2.61M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2280
2281
269k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2282
2283
27.1M
#define NEXT xmlNextChar(ctxt)
2284
2285
886k
#define NEXT1 {               \
2286
886k
  ctxt->input->col++;           \
2287
886k
  ctxt->input->cur++;           \
2288
886k
  if (*ctxt->input->cur == 0)         \
2289
886k
      xmlParserGrow(ctxt);           \
2290
886k
    }
2291
2292
251M
#define NEXTL(l) do {             \
2293
251M
    if (*(ctxt->input->cur) == '\n') {         \
2294
14.6M
  ctxt->input->line++; ctxt->input->col = 1;      \
2295
236M
    } else ctxt->input->col++;           \
2296
251M
    ctxt->input->cur += l;        \
2297
251M
  } while (0)
2298
2299
#define COPY_BUF(b, i, v)           \
2300
51.6M
    if (v < 0x80) b[i++] = v;           \
2301
51.6M
    else i += xmlCopyCharMultiByte(&b[i],v)
2302
2303
static int
2304
50.1M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2305
50.1M
    int c = xmlCurrentChar(ctxt, len);
2306
2307
50.1M
    if (c == XML_INVALID_CHAR)
2308
13.0M
        c = 0xFFFD; /* replacement character */
2309
2310
50.1M
    return(c);
2311
50.1M
}
2312
2313
/**
2314
 * Skip whitespace in the input stream.
2315
 *
2316
 * @deprecated Internal function, do not use.
2317
 *
2318
 * @param ctxt  the XML parser context
2319
 * @returns the number of space chars skipped
2320
 */
2321
int
2322
2.74M
xmlSkipBlankChars(xmlParserCtxt *ctxt) {
2323
2.74M
    const xmlChar *cur;
2324
2.74M
    int res = 0;
2325
2326
2.74M
    cur = ctxt->input->cur;
2327
2.74M
    while (IS_BLANK_CH(*cur)) {
2328
2.49M
        if (*cur == '\n') {
2329
1.53M
            ctxt->input->line++; ctxt->input->col = 1;
2330
1.53M
        } else {
2331
956k
            ctxt->input->col++;
2332
956k
        }
2333
2.49M
        cur++;
2334
2.49M
        if (res < INT_MAX)
2335
2.49M
            res++;
2336
2.49M
        if (*cur == 0) {
2337
1.57k
            ctxt->input->cur = cur;
2338
1.57k
            xmlParserGrow(ctxt);
2339
1.57k
            cur = ctxt->input->cur;
2340
1.57k
        }
2341
2.49M
    }
2342
2.74M
    ctxt->input->cur = cur;
2343
2344
2.74M
    if (res > 4)
2345
6.91k
        GROW;
2346
2347
2.74M
    return(res);
2348
2.74M
}
2349
2350
static void
2351
48.0k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2352
48.0k
    unsigned long consumed;
2353
48.0k
    xmlEntityPtr ent;
2354
2355
48.0k
    ent = ctxt->input->entity;
2356
2357
48.0k
    ent->flags &= ~XML_ENT_EXPANDING;
2358
2359
48.0k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2360
1.55k
        int result;
2361
2362
        /*
2363
         * Read the rest of the stream in case of errors. We want
2364
         * to account for the whole entity size.
2365
         */
2366
1.55k
        do {
2367
1.55k
            ctxt->input->cur = ctxt->input->end;
2368
1.55k
            xmlParserShrink(ctxt);
2369
1.55k
            result = xmlParserGrow(ctxt);
2370
1.55k
        } while (result > 0);
2371
2372
1.55k
        consumed = ctxt->input->consumed;
2373
1.55k
        xmlSaturatedAddSizeT(&consumed,
2374
1.55k
                             ctxt->input->end - ctxt->input->base);
2375
2376
1.55k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2377
2378
        /*
2379
         * Add to sizeentities when parsing an external entity
2380
         * for the first time.
2381
         */
2382
1.55k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2383
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2384
0
        }
2385
2386
1.55k
        ent->flags |= XML_ENT_CHECKED;
2387
1.55k
    }
2388
2389
48.0k
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2390
2391
48.0k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2392
2393
48.0k
    GROW;
2394
48.0k
}
2395
2396
/**
2397
 * Skip whitespace in the input stream, also handling parameter
2398
 * entities.
2399
 *
2400
 * @param ctxt  the XML parser context
2401
 * @returns the number of space chars skipped
2402
 */
2403
static int
2404
269k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2405
269k
    int res = 0;
2406
269k
    int inParam;
2407
269k
    int expandParam;
2408
2409
269k
    inParam = PARSER_IN_PE(ctxt);
2410
269k
    expandParam = PARSER_EXTERNAL(ctxt);
2411
2412
269k
    if (!inParam && !expandParam)
2413
133k
        return(xmlSkipBlankChars(ctxt));
2414
2415
    /*
2416
     * It's Okay to use CUR/NEXT here since all the blanks are on
2417
     * the ASCII range.
2418
     */
2419
2.66M
    while (PARSER_STOPPED(ctxt) == 0) {
2420
2.66M
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2421
2.52M
            NEXT;
2422
2.52M
        } else if (CUR == '%') {
2423
956
            if ((expandParam == 0) ||
2424
956
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2425
956
                break;
2426
2427
            /*
2428
             * Expand parameter entity. We continue to consume
2429
             * whitespace at the start of the entity and possible
2430
             * even consume the whole entity and pop it. We might
2431
             * even pop multiple PEs in this loop.
2432
             */
2433
0
            xmlParsePERefInternal(ctxt, 0);
2434
2435
0
            inParam = PARSER_IN_PE(ctxt);
2436
0
            expandParam = PARSER_EXTERNAL(ctxt);
2437
135k
        } else if (CUR == 0) {
2438
17.6k
            if (inParam == 0)
2439
3
                break;
2440
2441
            /*
2442
             * Don't pop parameter entities that start a markup
2443
             * declaration to detect Well-formedness constraint:
2444
             * PE Between Declarations.
2445
             */
2446
17.6k
            if (ctxt->input->flags & XML_INPUT_MARKUP_DECL)
2447
17.1k
                break;
2448
2449
518
            xmlPopPE(ctxt);
2450
2451
518
            inParam = PARSER_IN_PE(ctxt);
2452
518
            expandParam = PARSER_EXTERNAL(ctxt);
2453
118k
        } else {
2454
118k
            break;
2455
118k
        }
2456
2457
        /*
2458
         * Also increase the counter when entering or exiting a PERef.
2459
         * The spec says: "When a parameter-entity reference is recognized
2460
         * in the DTD and included, its replacement text MUST be enlarged
2461
         * by the attachment of one leading and one following space (#x20)
2462
         * character."
2463
         */
2464
2.53M
        if (res < INT_MAX)
2465
2.53M
            res++;
2466
2.53M
    }
2467
2468
136k
    return(res);
2469
269k
}
2470
2471
/************************************************************************
2472
 *                  *
2473
 *    Commodity functions to handle entities      *
2474
 *                  *
2475
 ************************************************************************/
2476
2477
/**
2478
 * @deprecated Internal function, don't use.
2479
 *
2480
 * @param ctxt  an XML parser context
2481
 * @returns the current xmlChar in the parser context
2482
 */
2483
xmlChar
2484
0
xmlPopInput(xmlParserCtxt *ctxt) {
2485
0
    xmlParserInputPtr input;
2486
2487
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2488
0
    input = xmlCtxtPopInput(ctxt);
2489
0
    xmlFreeInputStream(input);
2490
0
    if (*ctxt->input->cur == 0)
2491
0
        xmlParserGrow(ctxt);
2492
0
    return(CUR);
2493
0
}
2494
2495
/**
2496
 * Push an input stream onto the stack.
2497
 *
2498
 * @deprecated Internal function, don't use.
2499
 *
2500
 * @param ctxt  an XML parser context
2501
 * @param input  an XML parser input fragment (entity, XML fragment ...).
2502
 * @returns -1 in case of error or the index in the input stack
2503
 */
2504
int
2505
0
xmlPushInput(xmlParserCtxt *ctxt, xmlParserInput *input) {
2506
0
    int ret;
2507
2508
0
    if ((ctxt == NULL) || (input == NULL))
2509
0
        return(-1);
2510
2511
0
    ret = xmlCtxtPushInput(ctxt, input);
2512
0
    if (ret >= 0)
2513
0
        GROW;
2514
0
    return(ret);
2515
0
}
2516
2517
/**
2518
 * Parse a numeric character reference. Always consumes '&'.
2519
 *
2520
 * @deprecated Internal function, don't use.
2521
 *
2522
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2523
 *                      '&#x' [0-9a-fA-F]+ ';'
2524
 *
2525
 * [ WFC: Legal Character ]
2526
 * Characters referred to using character references must match the
2527
 * production for Char.
2528
 *
2529
 * @param ctxt  an XML parser context
2530
 * @returns the value parsed (as an int), 0 in case of error
2531
 */
2532
int
2533
73.0k
xmlParseCharRef(xmlParserCtxt *ctxt) {
2534
73.0k
    int val = 0;
2535
73.0k
    int count = 0;
2536
2537
    /*
2538
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2539
     */
2540
73.0k
    if ((RAW == '&') && (NXT(1) == '#') &&
2541
73.0k
        (NXT(2) == 'x')) {
2542
19.3k
  SKIP(3);
2543
19.3k
  GROW;
2544
66.4k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2545
49.9k
      if (count++ > 20) {
2546
320
    count = 0;
2547
320
    GROW;
2548
320
      }
2549
49.9k
      if ((RAW >= '0') && (RAW <= '9'))
2550
8.58k
          val = val * 16 + (CUR - '0');
2551
41.3k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2552
27.6k
          val = val * 16 + (CUR - 'a') + 10;
2553
13.6k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2554
10.8k
          val = val * 16 + (CUR - 'A') + 10;
2555
2.86k
      else {
2556
2.86k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2557
2.86k
    val = 0;
2558
2.86k
    break;
2559
2.86k
      }
2560
47.0k
      if (val > 0x110000)
2561
5.97k
          val = 0x110000;
2562
2563
47.0k
      NEXT;
2564
47.0k
      count++;
2565
47.0k
  }
2566
19.3k
  if (RAW == ';') {
2567
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2568
16.4k
      ctxt->input->col++;
2569
16.4k
      ctxt->input->cur++;
2570
16.4k
  }
2571
53.7k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2572
53.7k
  SKIP(2);
2573
53.7k
  GROW;
2574
176k
  while (RAW != ';') { /* loop blocked by count */
2575
127k
      if (count++ > 20) {
2576
770
    count = 0;
2577
770
    GROW;
2578
770
      }
2579
127k
      if ((RAW >= '0') && (RAW <= '9'))
2580
122k
          val = val * 10 + (CUR - '0');
2581
5.04k
      else {
2582
5.04k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2583
5.04k
    val = 0;
2584
5.04k
    break;
2585
5.04k
      }
2586
122k
      if (val > 0x110000)
2587
7.83k
          val = 0x110000;
2588
2589
122k
      NEXT;
2590
122k
      count++;
2591
122k
  }
2592
53.7k
  if (RAW == ';') {
2593
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2594
48.7k
      ctxt->input->col++;
2595
48.7k
      ctxt->input->cur++;
2596
48.7k
  }
2597
53.7k
    } else {
2598
0
        if (RAW == '&')
2599
0
            SKIP(1);
2600
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2601
0
    }
2602
2603
    /*
2604
     * [ WFC: Legal Character ]
2605
     * Characters referred to using character references must match the
2606
     * production for Char.
2607
     */
2608
73.0k
    if (val >= 0x110000) {
2609
335
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2610
335
                "xmlParseCharRef: character reference out of bounds\n",
2611
335
          val);
2612
335
        val = 0xFFFD;
2613
72.7k
    } else if (!IS_CHAR(val)) {
2614
15.2k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2615
15.2k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2616
15.2k
                    val);
2617
15.2k
    }
2618
73.0k
    return(val);
2619
73.0k
}
2620
2621
/**
2622
 * Parse Reference declarations, variant parsing from a string rather
2623
 * than an an input flow.
2624
 *
2625
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2626
 *                      '&#x' [0-9a-fA-F]+ ';'
2627
 *
2628
 * [ WFC: Legal Character ]
2629
 * Characters referred to using character references must match the
2630
 * production for Char.
2631
 *
2632
 * @param ctxt  an XML parser context
2633
 * @param str  a pointer to an index in the string
2634
 * @returns the value parsed (as an int), 0 in case of error, str will be
2635
 *         updated to the current value of the index
2636
 */
2637
static int
2638
507k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2639
507k
    const xmlChar *ptr;
2640
507k
    xmlChar cur;
2641
507k
    int val = 0;
2642
2643
507k
    if ((str == NULL) || (*str == NULL)) return(0);
2644
507k
    ptr = *str;
2645
507k
    cur = *ptr;
2646
507k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2647
29.1k
  ptr += 3;
2648
29.1k
  cur = *ptr;
2649
376k
  while (cur != ';') { /* Non input consuming loop */
2650
348k
      if ((cur >= '0') && (cur <= '9'))
2651
306k
          val = val * 16 + (cur - '0');
2652
41.7k
      else if ((cur >= 'a') && (cur <= 'f'))
2653
14.8k
          val = val * 16 + (cur - 'a') + 10;
2654
26.8k
      else if ((cur >= 'A') && (cur <= 'F'))
2655
25.8k
          val = val * 16 + (cur - 'A') + 10;
2656
991
      else {
2657
991
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2658
991
    val = 0;
2659
991
    break;
2660
991
      }
2661
347k
      if (val > 0x110000)
2662
3.40k
          val = 0x110000;
2663
2664
347k
      ptr++;
2665
347k
      cur = *ptr;
2666
347k
  }
2667
29.1k
  if (cur == ';')
2668
28.1k
      ptr++;
2669
478k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2670
478k
  ptr += 2;
2671
478k
  cur = *ptr;
2672
1.48M
  while (cur != ';') { /* Non input consuming loops */
2673
1.00M
      if ((cur >= '0') && (cur <= '9'))
2674
1.00M
          val = val * 10 + (cur - '0');
2675
2.54k
      else {
2676
2.54k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2677
2.54k
    val = 0;
2678
2.54k
    break;
2679
2.54k
      }
2680
1.00M
      if (val > 0x110000)
2681
982
          val = 0x110000;
2682
2683
1.00M
      ptr++;
2684
1.00M
      cur = *ptr;
2685
1.00M
  }
2686
478k
  if (cur == ';')
2687
476k
      ptr++;
2688
478k
    } else {
2689
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2690
0
  return(0);
2691
0
    }
2692
507k
    *str = ptr;
2693
2694
    /*
2695
     * [ WFC: Legal Character ]
2696
     * Characters referred to using character references must match the
2697
     * production for Char.
2698
     */
2699
507k
    if (val >= 0x110000) {
2700
228
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2701
228
                "xmlParseStringCharRef: character reference out of bounds\n",
2702
228
                val);
2703
507k
    } else if (IS_CHAR(val)) {
2704
502k
        return(val);
2705
502k
    } else {
2706
4.71k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2707
4.71k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2708
4.71k
        val);
2709
4.71k
    }
2710
4.94k
    return(0);
2711
507k
}
2712
2713
/**
2714
 *     [69] PEReference ::= '%' Name ';'
2715
 *
2716
 * @deprecated Internal function, do not use.
2717
 *
2718
 * [ WFC: No Recursion ]
2719
 * A parsed entity must not contain a recursive
2720
 * reference to itself, either directly or indirectly.
2721
 *
2722
 * [ WFC: Entity Declared ]
2723
 * In a document without any DTD, a document with only an internal DTD
2724
 * subset which contains no parameter entity references, or a document
2725
 * with "standalone='yes'", ...  ... The declaration of a parameter
2726
 * entity must precede any reference to it...
2727
 *
2728
 * [ VC: Entity Declared ]
2729
 * In a document with an external subset or external parameter entities
2730
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2731
 * must precede any reference to it...
2732
 *
2733
 * [ WFC: In DTD ]
2734
 * Parameter-entity references may only appear in the DTD.
2735
 * NOTE: misleading but this is handled.
2736
 *
2737
 * A PEReference may have been detected in the current input stream
2738
 * the handling is done accordingly to
2739
 *      http://www.w3.org/TR/REC-xml#entproc
2740
 * i.e.
2741
 *   - Included in literal in entity values
2742
 *   - Included as Parameter Entity reference within DTDs
2743
 * @param ctxt  the parser context
2744
 */
2745
void
2746
0
xmlParserHandlePEReference(xmlParserCtxt *ctxt) {
2747
0
    xmlParsePERefInternal(ctxt, 0);
2748
0
}
2749
2750
/**
2751
 * @deprecated Internal function, don't use.
2752
 *
2753
 * @param ctxt  the parser context
2754
 * @param str  the input string
2755
 * @param len  the string length
2756
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2757
 * @param end  an end marker xmlChar, 0 if none
2758
 * @param end2  an end marker xmlChar, 0 if none
2759
 * @param end3  an end marker xmlChar, 0 if none
2760
 * @returns A newly allocated string with the substitution done. The caller
2761
 *      must deallocate it !
2762
 */
2763
xmlChar *
2764
xmlStringLenDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str, int len,
2765
                           int what ATTRIBUTE_UNUSED,
2766
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2767
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2768
0
        return(NULL);
2769
2770
0
    if ((str[len] != 0) ||
2771
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2772
0
        return(NULL);
2773
2774
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2775
0
}
2776
2777
/**
2778
 * @deprecated Internal function, don't use.
2779
 *
2780
 * @param ctxt  the parser context
2781
 * @param str  the input string
2782
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2783
 * @param end  an end marker xmlChar, 0 if none
2784
 * @param end2  an end marker xmlChar, 0 if none
2785
 * @param end3  an end marker xmlChar, 0 if none
2786
 * @returns A newly allocated string with the substitution done. The caller
2787
 *      must deallocate it !
2788
 */
2789
xmlChar *
2790
xmlStringDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str,
2791
                        int what ATTRIBUTE_UNUSED,
2792
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2793
0
    if ((ctxt == NULL) || (str == NULL))
2794
0
        return(NULL);
2795
2796
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2797
0
        return(NULL);
2798
2799
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2800
0
}
2801
2802
/************************************************************************
2803
 *                  *
2804
 *    Commodity functions, cleanup needed ?     *
2805
 *                  *
2806
 ************************************************************************/
2807
2808
/**
2809
 * Is this a sequence of blank chars that one can ignore ?
2810
 *
2811
 * @param ctxt  an XML parser context
2812
 * @param str  a xmlChar *
2813
 * @param len  the size of `str`
2814
 * @param blank_chars  we know the chars are blanks
2815
 * @returns 1 if ignorable 0 otherwise.
2816
 */
2817
2818
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2819
0
                     int blank_chars) {
2820
0
    int i;
2821
0
    xmlNodePtr lastChild;
2822
2823
    /*
2824
     * Check for xml:space value.
2825
     */
2826
0
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2827
0
        (*(ctxt->space) == -2))
2828
0
  return(0);
2829
2830
    /*
2831
     * Check that the string is made of blanks
2832
     */
2833
0
    if (blank_chars == 0) {
2834
0
  for (i = 0;i < len;i++)
2835
0
      if (!(IS_BLANK_CH(str[i]))) return(0);
2836
0
    }
2837
2838
    /*
2839
     * Look if the element is mixed content in the DTD if available
2840
     */
2841
0
    if (ctxt->node == NULL) return(0);
2842
0
    if (ctxt->myDoc != NULL) {
2843
0
        xmlElementPtr elemDecl = NULL;
2844
0
        xmlDocPtr doc = ctxt->myDoc;
2845
0
        const xmlChar *prefix = NULL;
2846
2847
0
        if (ctxt->node->ns)
2848
0
            prefix = ctxt->node->ns->prefix;
2849
0
        if (doc->intSubset != NULL)
2850
0
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2851
0
                                      prefix);
2852
0
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2853
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2854
0
                                      prefix);
2855
0
        if (elemDecl != NULL) {
2856
0
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2857
0
                return(1);
2858
0
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2859
0
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2860
0
                return(0);
2861
0
        }
2862
0
    }
2863
2864
    /*
2865
     * Otherwise, heuristic :-\
2866
     *
2867
     * When push parsing, we could be at the end of a chunk.
2868
     * This makes the look-ahead and consequently the NOBLANKS
2869
     * option unreliable.
2870
     */
2871
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2872
0
    if ((ctxt->node->children == NULL) &&
2873
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2874
2875
0
    lastChild = xmlGetLastChild(ctxt->node);
2876
0
    if (lastChild == NULL) {
2877
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2878
0
            (ctxt->node->content != NULL)) return(0);
2879
0
    } else if (xmlNodeIsText(lastChild))
2880
0
        return(0);
2881
0
    else if ((ctxt->node->children != NULL) &&
2882
0
             (xmlNodeIsText(ctxt->node->children)))
2883
0
        return(0);
2884
0
    return(1);
2885
0
}
2886
2887
/************************************************************************
2888
 *                  *
2889
 *    Extra stuff for namespace support     *
2890
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2891
 *                  *
2892
 ************************************************************************/
2893
2894
/**
2895
 * Parse an UTF8 encoded XML qualified name string
2896
 *
2897
 * @deprecated Don't use.
2898
 *
2899
 * @param ctxt  an XML parser context
2900
 * @param name  an XML parser context
2901
 * @param prefixOut  a xmlChar **
2902
 * @returns the local part, and prefix is updated
2903
 *   to get the Prefix if any.
2904
 */
2905
2906
xmlChar *
2907
0
xmlSplitQName(xmlParserCtxt *ctxt, const xmlChar *name, xmlChar **prefixOut) {
2908
0
    xmlChar *ret;
2909
0
    const xmlChar *localname;
2910
2911
0
    localname = xmlSplitQName4(name, prefixOut);
2912
0
    if (localname == NULL) {
2913
0
        xmlCtxtErrMemory(ctxt);
2914
0
        return(NULL);
2915
0
    }
2916
2917
0
    ret = xmlStrdup(localname);
2918
0
    if (ret == NULL) {
2919
0
        xmlCtxtErrMemory(ctxt);
2920
0
        xmlFree(*prefixOut);
2921
0
    }
2922
2923
0
    return(ret);
2924
0
}
2925
2926
/************************************************************************
2927
 *                  *
2928
 *      The parser itself       *
2929
 *  Relates to http://www.w3.org/TR/REC-xml       *
2930
 *                  *
2931
 ************************************************************************/
2932
2933
/************************************************************************
2934
 *                  *
2935
 *  Routines to parse Name, NCName and NmToken      *
2936
 *                  *
2937
 ************************************************************************/
2938
2939
/*
2940
 * The two following functions are related to the change of accepted
2941
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2942
 * They correspond to the modified production [4] and the new production [4a]
2943
 * changes in that revision. Also note that the macros used for the
2944
 * productions Letter, Digit, CombiningChar and Extender are not needed
2945
 * anymore.
2946
 * We still keep compatibility to pre-revision5 parsing semantic if the
2947
 * new XML_PARSE_OLD10 option is given to the parser.
2948
 */
2949
2950
static int
2951
1.18M
xmlIsNameStartCharNew(int c) {
2952
    /*
2953
     * Use the new checks of production [4] [4a] amd [5] of the
2954
     * Update 5 of XML-1.0
2955
     */
2956
1.18M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2957
1.18M
        (((c >= 'a') && (c <= 'z')) ||
2958
1.18M
         ((c >= 'A') && (c <= 'Z')) ||
2959
1.18M
         (c == '_') || (c == ':') ||
2960
1.18M
         ((c >= 0xC0) && (c <= 0xD6)) ||
2961
1.18M
         ((c >= 0xD8) && (c <= 0xF6)) ||
2962
1.18M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2963
1.18M
         ((c >= 0x370) && (c <= 0x37D)) ||
2964
1.18M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2965
1.18M
         ((c >= 0x200C) && (c <= 0x200D)) ||
2966
1.18M
         ((c >= 0x2070) && (c <= 0x218F)) ||
2967
1.18M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2968
1.18M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2969
1.18M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2970
1.18M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2971
1.18M
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2972
812k
        return(1);
2973
371k
    return(0);
2974
1.18M
}
2975
2976
static int
2977
18.7M
xmlIsNameCharNew(int c) {
2978
    /*
2979
     * Use the new checks of production [4] [4a] amd [5] of the
2980
     * Update 5 of XML-1.0
2981
     */
2982
18.7M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2983
18.7M
        (((c >= 'a') && (c <= 'z')) ||
2984
18.7M
         ((c >= 'A') && (c <= 'Z')) ||
2985
18.7M
         ((c >= '0') && (c <= '9')) || /* !start */
2986
18.7M
         (c == '_') || (c == ':') ||
2987
18.7M
         (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2988
18.7M
         ((c >= 0xC0) && (c <= 0xD6)) ||
2989
18.7M
         ((c >= 0xD8) && (c <= 0xF6)) ||
2990
18.7M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2991
18.7M
         ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2992
18.7M
         ((c >= 0x370) && (c <= 0x37D)) ||
2993
18.7M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2994
18.7M
         ((c >= 0x200C) && (c <= 0x200D)) ||
2995
18.7M
         ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2996
18.7M
         ((c >= 0x2070) && (c <= 0x218F)) ||
2997
18.7M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2998
18.7M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2999
18.7M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3000
18.7M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3001
18.7M
         ((c >= 0x10000) && (c <= 0xEFFFF))))
3002
17.8M
         return(1);
3003
835k
    return(0);
3004
18.7M
}
3005
3006
static int
3007
0
xmlIsNameStartCharOld(int c) {
3008
0
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3009
0
        ((IS_LETTER(c) || (c == '_') || (c == ':'))))
3010
0
        return(1);
3011
0
    return(0);
3012
0
}
3013
3014
static int
3015
0
xmlIsNameCharOld(int c) {
3016
0
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3017
0
        ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3018
0
         (c == '.') || (c == '-') ||
3019
0
         (c == '_') || (c == ':') ||
3020
0
         (IS_COMBINING(c)) ||
3021
0
         (IS_EXTENDER(c))))
3022
0
        return(1);
3023
0
    return(0);
3024
0
}
3025
3026
static int
3027
1.18M
xmlIsNameStartChar(int c, int old10) {
3028
1.18M
    if (!old10)
3029
1.18M
        return(xmlIsNameStartCharNew(c));
3030
0
    else
3031
0
        return(xmlIsNameStartCharOld(c));
3032
1.18M
}
3033
3034
static int
3035
18.7M
xmlIsNameChar(int c, int old10) {
3036
18.7M
    if (!old10)
3037
18.7M
        return(xmlIsNameCharNew(c));
3038
0
    else
3039
0
        return(xmlIsNameCharOld(c));
3040
18.7M
}
3041
3042
/*
3043
 * Scan an XML Name, NCName or Nmtoken.
3044
 *
3045
 * Returns a pointer to the end of the name on success. If the
3046
 * name is invalid, returns `ptr`. If the name is longer than
3047
 * `maxSize` bytes, returns NULL.
3048
 *
3049
 * @param ptr  pointer to the start of the name
3050
 * @param maxSize  maximum size in bytes
3051
 * @param flags  XML_SCAN_* flags
3052
 * @returns a pointer to the end of the name or NULL
3053
 */
3054
const xmlChar *
3055
754k
xmlScanName(const xmlChar *ptr, size_t maxSize, int flags) {
3056
754k
    int stop = flags & XML_SCAN_NC ? ':' : 0;
3057
754k
    int old10 = flags & XML_SCAN_OLD10 ? 1 : 0;
3058
3059
4.63M
    while (1) {
3060
4.63M
        int c, len;
3061
3062
4.63M
        c = *ptr;
3063
4.63M
        if (c < 0x80) {
3064
2.97M
            if (c == stop)
3065
251
                break;
3066
2.97M
            len = 1;
3067
2.97M
        } else {
3068
1.65M
            len = 4;
3069
1.65M
            c = xmlGetUTF8Char(ptr, &len);
3070
1.65M
            if (c < 0)
3071
565
                break;
3072
1.65M
        }
3073
3074
4.62M
        if (flags & XML_SCAN_NMTOKEN ?
3075
3.87M
                !xmlIsNameChar(c, old10) :
3076
4.62M
                !xmlIsNameStartChar(c, old10))
3077
753k
            break;
3078
3079
3.87M
        if ((size_t) len > maxSize)
3080
0
            return(NULL);
3081
3.87M
        ptr += len;
3082
3.87M
        maxSize -= len;
3083
3.87M
        flags |= XML_SCAN_NMTOKEN;
3084
3.87M
    }
3085
3086
754k
    return(ptr);
3087
754k
}
3088
3089
static const xmlChar *
3090
84.8k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3091
84.8k
    const xmlChar *ret;
3092
84.8k
    int len = 0, l;
3093
84.8k
    int c;
3094
84.8k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3095
84.8k
                    XML_MAX_TEXT_LENGTH :
3096
84.8k
                    XML_MAX_NAME_LENGTH;
3097
84.8k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3098
3099
    /*
3100
     * Handler for more complex cases
3101
     */
3102
84.8k
    c = xmlCurrentChar(ctxt, &l);
3103
84.8k
    if (!xmlIsNameStartChar(c, old10))
3104
59.4k
        return(NULL);
3105
25.4k
    len += l;
3106
25.4k
    NEXTL(l);
3107
25.4k
    c = xmlCurrentChar(ctxt, &l);
3108
4.99M
    while (xmlIsNameChar(c, old10)) {
3109
4.97M
        if (len <= INT_MAX - l)
3110
4.97M
            len += l;
3111
4.97M
        NEXTL(l);
3112
4.97M
        c = xmlCurrentChar(ctxt, &l);
3113
4.97M
    }
3114
25.4k
    if (len > maxLength) {
3115
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3116
0
        return(NULL);
3117
0
    }
3118
25.4k
    if (ctxt->input->cur - ctxt->input->base < len) {
3119
        /*
3120
         * There were a couple of bugs where PERefs lead to to a change
3121
         * of the buffer. Check the buffer size to avoid passing an invalid
3122
         * pointer to xmlDictLookup.
3123
         */
3124
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3125
0
                    "unexpected change of input buffer");
3126
0
        return (NULL);
3127
0
    }
3128
25.4k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3129
234
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3130
25.1k
    else
3131
25.1k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3132
25.4k
    if (ret == NULL)
3133
0
        xmlErrMemory(ctxt);
3134
25.4k
    return(ret);
3135
25.4k
}
3136
3137
/**
3138
 * Parse an XML name.
3139
 *
3140
 * @deprecated Internal function, don't use.
3141
 *
3142
 *     [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3143
 *                      CombiningChar | Extender
3144
 *
3145
 *     [5] Name ::= (Letter | '_' | ':') (NameChar)*
3146
 *
3147
 *     [6] Names ::= Name (#x20 Name)*
3148
 *
3149
 * @param ctxt  an XML parser context
3150
 * @returns the Name parsed or NULL
3151
 */
3152
3153
const xmlChar *
3154
517k
xmlParseName(xmlParserCtxt *ctxt) {
3155
517k
    const xmlChar *in;
3156
517k
    const xmlChar *ret;
3157
517k
    size_t count = 0;
3158
517k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3159
517k
                       XML_MAX_TEXT_LENGTH :
3160
517k
                       XML_MAX_NAME_LENGTH;
3161
3162
517k
    GROW;
3163
3164
    /*
3165
     * Accelerator for simple ASCII names
3166
     */
3167
517k
    in = ctxt->input->cur;
3168
517k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3169
517k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3170
517k
  (*in == '_') || (*in == ':')) {
3171
450k
  in++;
3172
3.67M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3173
3.67M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3174
3.67M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3175
3.67M
         (*in == '_') || (*in == '-') ||
3176
3.67M
         (*in == ':') || (*in == '.'))
3177
3.22M
      in++;
3178
450k
  if ((*in > 0) && (*in < 0x80)) {
3179
432k
      count = in - ctxt->input->cur;
3180
432k
            if (count > maxLength) {
3181
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3182
0
                return(NULL);
3183
0
            }
3184
432k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3185
432k
      ctxt->input->cur = in;
3186
432k
      ctxt->input->col += count;
3187
432k
      if (ret == NULL)
3188
0
          xmlErrMemory(ctxt);
3189
432k
      return(ret);
3190
432k
  }
3191
450k
    }
3192
    /* accelerator for special cases */
3193
84.8k
    return(xmlParseNameComplex(ctxt));
3194
517k
}
3195
3196
static xmlHashedString
3197
356k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3198
356k
    xmlHashedString ret;
3199
356k
    int len = 0, l;
3200
356k
    int c;
3201
356k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3202
356k
                    XML_MAX_TEXT_LENGTH :
3203
356k
                    XML_MAX_NAME_LENGTH;
3204
356k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3205
356k
    size_t startPosition = 0;
3206
3207
356k
    ret.name = NULL;
3208
356k
    ret.hashValue = 0;
3209
3210
    /*
3211
     * Handler for more complex cases
3212
     */
3213
356k
    startPosition = CUR_PTR - BASE_PTR;
3214
356k
    c = xmlCurrentChar(ctxt, &l);
3215
356k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3216
356k
  (!xmlIsNameStartChar(c, old10) || (c == ':'))) {
3217
330k
  return(ret);
3218
330k
    }
3219
3220
5.34M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3221
5.34M
     (xmlIsNameChar(c, old10) && (c != ':'))) {
3222
5.32M
        if (len <= INT_MAX - l)
3223
5.32M
      len += l;
3224
5.32M
  NEXTL(l);
3225
5.32M
  c = xmlCurrentChar(ctxt, &l);
3226
5.32M
    }
3227
26.5k
    if (len > maxLength) {
3228
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3229
0
        return(ret);
3230
0
    }
3231
26.5k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3232
26.5k
    if (ret.name == NULL)
3233
0
        xmlErrMemory(ctxt);
3234
26.5k
    return(ret);
3235
26.5k
}
3236
3237
/**
3238
 * Parse an XML name.
3239
 *
3240
 *     [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3241
 *                          CombiningChar | Extender
3242
 *
3243
 *     [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3244
 *
3245
 * @param ctxt  an XML parser context
3246
 * @returns the Name parsed or NULL
3247
 */
3248
3249
static xmlHashedString
3250
1.42M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3251
1.42M
    const xmlChar *in, *e;
3252
1.42M
    xmlHashedString ret;
3253
1.42M
    size_t count = 0;
3254
1.42M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3255
1.42M
                       XML_MAX_TEXT_LENGTH :
3256
1.42M
                       XML_MAX_NAME_LENGTH;
3257
3258
1.42M
    ret.name = NULL;
3259
3260
    /*
3261
     * Accelerator for simple ASCII names
3262
     */
3263
1.42M
    in = ctxt->input->cur;
3264
1.42M
    e = ctxt->input->end;
3265
1.42M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3266
1.42M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3267
1.42M
   (*in == '_')) && (in < e)) {
3268
1.08M
  in++;
3269
3.91M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3270
3.91M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3271
3.91M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3272
3.91M
          (*in == '_') || (*in == '-') ||
3273
3.91M
          (*in == '.')) && (in < e))
3274
2.82M
      in++;
3275
1.08M
  if (in >= e)
3276
1.14k
      goto complex;
3277
1.08M
  if ((*in > 0) && (*in < 0x80)) {
3278
1.06M
      count = in - ctxt->input->cur;
3279
1.06M
            if (count > maxLength) {
3280
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3281
0
                return(ret);
3282
0
            }
3283
1.06M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3284
1.06M
      ctxt->input->cur = in;
3285
1.06M
      ctxt->input->col += count;
3286
1.06M
      if (ret.name == NULL) {
3287
0
          xmlErrMemory(ctxt);
3288
0
      }
3289
1.06M
      return(ret);
3290
1.06M
  }
3291
1.08M
    }
3292
356k
complex:
3293
356k
    return(xmlParseNCNameComplex(ctxt));
3294
1.42M
}
3295
3296
/**
3297
 * Parse an XML name and compares for match
3298
 * (specialized for endtag parsing)
3299
 *
3300
 * @param ctxt  an XML parser context
3301
 * @param other  the name to compare with
3302
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
3303
 * and the name for mismatch
3304
 */
3305
3306
static const xmlChar *
3307
10.3k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3308
10.3k
    register const xmlChar *cmp = other;
3309
10.3k
    register const xmlChar *in;
3310
10.3k
    const xmlChar *ret;
3311
3312
10.3k
    GROW;
3313
3314
10.3k
    in = ctxt->input->cur;
3315
17.1k
    while (*in != 0 && *in == *cmp) {
3316
6.83k
  ++in;
3317
6.83k
  ++cmp;
3318
6.83k
    }
3319
10.3k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3320
  /* success */
3321
2.75k
  ctxt->input->col += in - ctxt->input->cur;
3322
2.75k
  ctxt->input->cur = in;
3323
2.75k
  return (const xmlChar*) 1;
3324
2.75k
    }
3325
    /* failure (or end of input buffer), check with full function */
3326
7.57k
    ret = xmlParseName (ctxt);
3327
    /* strings coming from the dictionary direct compare possible */
3328
7.57k
    if (ret == other) {
3329
2.30k
  return (const xmlChar*) 1;
3330
2.30k
    }
3331
5.26k
    return ret;
3332
7.57k
}
3333
3334
/**
3335
 * Parse an XML name.
3336
 *
3337
 * @param ctxt  an XML parser context
3338
 * @param str  a pointer to the string pointer (IN/OUT)
3339
 * @returns the Name parsed or NULL. The `str` pointer
3340
 * is updated to the current location in the string.
3341
 */
3342
3343
static xmlChar *
3344
750k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3345
750k
    xmlChar *ret;
3346
750k
    const xmlChar *cur = *str;
3347
750k
    int flags = 0;
3348
750k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3349
750k
                    XML_MAX_TEXT_LENGTH :
3350
750k
                    XML_MAX_NAME_LENGTH;
3351
3352
750k
    if (ctxt->options & XML_PARSE_OLD10)
3353
0
        flags |= XML_SCAN_OLD10;
3354
3355
750k
    cur = xmlScanName(*str, maxLength, flags);
3356
750k
    if (cur == NULL) {
3357
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3358
0
        return(NULL);
3359
0
    }
3360
750k
    if (cur == *str)
3361
1.77k
        return(NULL);
3362
3363
749k
    ret = xmlStrndup(*str, cur - *str);
3364
749k
    if (ret == NULL)
3365
0
        xmlErrMemory(ctxt);
3366
749k
    *str = cur;
3367
749k
    return(ret);
3368
750k
}
3369
3370
/**
3371
 * Parse an XML Nmtoken.
3372
 *
3373
 * @deprecated Internal function, don't use.
3374
 *
3375
 *     [7] Nmtoken ::= (NameChar)+
3376
 *
3377
 *     [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3378
 *
3379
 * @param ctxt  an XML parser context
3380
 * @returns the Nmtoken parsed or NULL
3381
 */
3382
3383
xmlChar *
3384
36.4k
xmlParseNmtoken(xmlParserCtxt *ctxt) {
3385
36.4k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3386
36.4k
    xmlChar *ret;
3387
36.4k
    int len = 0, l;
3388
36.4k
    int c;
3389
36.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3390
36.4k
                    XML_MAX_TEXT_LENGTH :
3391
36.4k
                    XML_MAX_NAME_LENGTH;
3392
36.4k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3393
3394
36.4k
    c = xmlCurrentChar(ctxt, &l);
3395
3396
155k
    while (xmlIsNameChar(c, old10)) {
3397
120k
  COPY_BUF(buf, len, c);
3398
120k
  NEXTL(l);
3399
120k
  c = xmlCurrentChar(ctxt, &l);
3400
120k
  if (len >= XML_MAX_NAMELEN) {
3401
      /*
3402
       * Okay someone managed to make a huge token, so he's ready to pay
3403
       * for the processing speed.
3404
       */
3405
1.15k
      xmlChar *buffer;
3406
1.15k
      int max = len * 2;
3407
3408
1.15k
      buffer = xmlMalloc(max);
3409
1.15k
      if (buffer == NULL) {
3410
0
          xmlErrMemory(ctxt);
3411
0
    return(NULL);
3412
0
      }
3413
1.15k
      memcpy(buffer, buf, len);
3414
4.35M
      while (xmlIsNameChar(c, old10)) {
3415
4.35M
    if (len + 10 > max) {
3416
2.78k
        xmlChar *tmp;
3417
2.78k
                    int newSize;
3418
3419
2.78k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3420
2.78k
                    if (newSize < 0) {
3421
0
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3422
0
                        xmlFree(buffer);
3423
0
                        return(NULL);
3424
0
                    }
3425
2.78k
        tmp = xmlRealloc(buffer, newSize);
3426
2.78k
        if (tmp == NULL) {
3427
0
      xmlErrMemory(ctxt);
3428
0
      xmlFree(buffer);
3429
0
      return(NULL);
3430
0
        }
3431
2.78k
        buffer = tmp;
3432
2.78k
                    max = newSize;
3433
2.78k
    }
3434
4.35M
    COPY_BUF(buffer, len, c);
3435
4.35M
    NEXTL(l);
3436
4.35M
    c = xmlCurrentChar(ctxt, &l);
3437
4.35M
      }
3438
1.15k
      buffer[len] = 0;
3439
1.15k
      return(buffer);
3440
1.15k
  }
3441
120k
    }
3442
35.2k
    if (len == 0)
3443
18.0k
        return(NULL);
3444
17.1k
    if (len > maxLength) {
3445
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3446
0
        return(NULL);
3447
0
    }
3448
17.1k
    ret = xmlStrndup(buf, len);
3449
17.1k
    if (ret == NULL)
3450
0
        xmlErrMemory(ctxt);
3451
17.1k
    return(ret);
3452
17.1k
}
3453
3454
/**
3455
 * Validate an entity value and expand parameter entities.
3456
 *
3457
 * @param ctxt  parser context
3458
 * @param buf  string buffer
3459
 * @param str  entity value
3460
 * @param length  size of entity value
3461
 * @param depth  nesting depth
3462
 */
3463
static void
3464
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3465
23.5k
                          const xmlChar *str, int length, int depth) {
3466
23.5k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3467
23.5k
    const xmlChar *end, *chunk;
3468
23.5k
    int c, l;
3469
3470
23.5k
    if (str == NULL)
3471
0
        return;
3472
3473
23.5k
    depth += 1;
3474
23.5k
    if (depth > maxDepth) {
3475
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3476
0
                       "Maximum entity nesting depth exceeded");
3477
0
  return;
3478
0
    }
3479
3480
23.5k
    end = str + length;
3481
23.5k
    chunk = str;
3482
3483
84.5M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3484
84.5M
        c = *str;
3485
3486
84.5M
        if (c >= 0x80) {
3487
67.6M
            l = xmlUTF8MultibyteLen(ctxt, str,
3488
67.6M
                    "invalid character in entity value\n");
3489
67.6M
            if (l == 0) {
3490
19.1M
                if (chunk < str)
3491
142k
                    xmlSBufAddString(buf, chunk, str - chunk);
3492
19.1M
                xmlSBufAddReplChar(buf);
3493
19.1M
                str += 1;
3494
19.1M
                chunk = str;
3495
48.4M
            } else {
3496
48.4M
                str += l;
3497
48.4M
            }
3498
67.6M
        } else if (c == '&') {
3499
56.8k
            if (str[1] == '#') {
3500
30.3k
                if (chunk < str)
3501
13.0k
                    xmlSBufAddString(buf, chunk, str - chunk);
3502
3503
30.3k
                c = xmlParseStringCharRef(ctxt, &str);
3504
30.3k
                if (c == 0)
3505
4.93k
                    return;
3506
3507
25.3k
                xmlSBufAddChar(buf, c);
3508
3509
25.3k
                chunk = str;
3510
26.5k
            } else {
3511
26.5k
                xmlChar *name;
3512
3513
                /*
3514
                 * General entity references are checked for
3515
                 * syntactic validity.
3516
                 */
3517
26.5k
                str++;
3518
26.5k
                name = xmlParseStringName(ctxt, &str);
3519
3520
26.5k
                if ((name == NULL) || (*str++ != ';')) {
3521
1.51k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3522
1.51k
                            "EntityValue: '&' forbidden except for entities "
3523
1.51k
                            "references\n");
3524
1.51k
                    xmlFree(name);
3525
1.51k
                    return;
3526
1.51k
                }
3527
3528
25.0k
                xmlFree(name);
3529
25.0k
            }
3530
16.8M
        } else if (c == '%') {
3531
2.47k
            xmlEntityPtr ent;
3532
3533
2.47k
            if (chunk < str)
3534
2.01k
                xmlSBufAddString(buf, chunk, str - chunk);
3535
3536
2.47k
            ent = xmlParseStringPEReference(ctxt, &str);
3537
2.47k
            if (ent == NULL)
3538
1.89k
                return;
3539
3540
581
            if (!PARSER_EXTERNAL(ctxt)) {
3541
581
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3542
581
                return;
3543
581
            }
3544
3545
0
            if (ent->content == NULL) {
3546
                /*
3547
                 * Note: external parsed entities will not be loaded,
3548
                 * it is not required for a non-validating parser to
3549
                 * complete external PEReferences coming from the
3550
                 * internal subset
3551
                 */
3552
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3553
0
                    ((ctxt->replaceEntities) ||
3554
0
                     (ctxt->validate))) {
3555
0
                    xmlLoadEntityContent(ctxt, ent);
3556
0
                } else {
3557
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3558
0
                                  "not validating will not read content for "
3559
0
                                  "PE entity %s\n", ent->name, NULL);
3560
0
                }
3561
0
            }
3562
3563
            /*
3564
             * TODO: Skip if ent->content is still NULL.
3565
             */
3566
3567
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3568
0
                return;
3569
3570
0
            if (ent->flags & XML_ENT_EXPANDING) {
3571
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3572
0
                return;
3573
0
            }
3574
3575
0
            ent->flags |= XML_ENT_EXPANDING;
3576
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3577
0
                                      depth);
3578
0
            ent->flags &= ~XML_ENT_EXPANDING;
3579
3580
0
            chunk = str;
3581
16.8M
        } else {
3582
            /* Normal ASCII char */
3583
16.8M
            if (!IS_BYTE_CHAR(c)) {
3584
2.82M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3585
2.82M
                        "invalid character in entity value\n");
3586
2.82M
                if (chunk < str)
3587
74.2k
                    xmlSBufAddString(buf, chunk, str - chunk);
3588
2.82M
                xmlSBufAddReplChar(buf);
3589
2.82M
                str += 1;
3590
2.82M
                chunk = str;
3591
13.9M
            } else {
3592
13.9M
                str += 1;
3593
13.9M
            }
3594
16.8M
        }
3595
84.5M
    }
3596
3597
14.6k
    if (chunk < str)
3598
12.4k
        xmlSBufAddString(buf, chunk, str - chunk);
3599
14.6k
}
3600
3601
/**
3602
 * Parse a value for ENTITY declarations
3603
 *
3604
 * @deprecated Internal function, don't use.
3605
 *
3606
 *     [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3607
 *                         "'" ([^%&'] | PEReference | Reference)* "'"
3608
 *
3609
 * @param ctxt  an XML parser context
3610
 * @param orig  if non-NULL store a copy of the original entity value
3611
 * @returns the EntityValue parsed with reference substituted or NULL
3612
 */
3613
xmlChar *
3614
24.0k
xmlParseEntityValue(xmlParserCtxt *ctxt, xmlChar **orig) {
3615
24.0k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3616
24.0k
                         XML_MAX_HUGE_LENGTH :
3617
24.0k
                         XML_MAX_TEXT_LENGTH;
3618
24.0k
    xmlSBuf buf;
3619
24.0k
    const xmlChar *start;
3620
24.0k
    int quote, length;
3621
3622
24.0k
    xmlSBufInit(&buf, maxLength);
3623
3624
24.0k
    GROW;
3625
3626
24.0k
    quote = CUR;
3627
24.0k
    if ((quote != '"') && (quote != '\'')) {
3628
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3629
0
  return(NULL);
3630
0
    }
3631
24.0k
    CUR_PTR++;
3632
3633
24.0k
    length = 0;
3634
3635
    /*
3636
     * Copy raw content of the entity into a buffer
3637
     */
3638
133M
    while (1) {
3639
133M
        int c;
3640
3641
133M
        if (PARSER_STOPPED(ctxt))
3642
0
            goto error;
3643
3644
133M
        if (CUR_PTR >= ctxt->input->end) {
3645
406
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3646
406
            goto error;
3647
406
        }
3648
3649
133M
        c = CUR;
3650
3651
133M
        if (c == 0) {
3652
14
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3653
14
                    "invalid character in entity value\n");
3654
14
            goto error;
3655
14
        }
3656
133M
        if (c == quote)
3657
23.5k
            break;
3658
133M
        NEXTL(1);
3659
133M
        length += 1;
3660
3661
        /*
3662
         * TODO: Check growth threshold
3663
         */
3664
133M
        if (ctxt->input->end - CUR_PTR < 10)
3665
60.2k
            GROW;
3666
133M
    }
3667
3668
23.5k
    start = CUR_PTR - length;
3669
3670
23.5k
    if (orig != NULL) {
3671
23.5k
        *orig = xmlStrndup(start, length);
3672
23.5k
        if (*orig == NULL)
3673
0
            xmlErrMemory(ctxt);
3674
23.5k
    }
3675
3676
23.5k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3677
3678
23.5k
    NEXTL(1);
3679
3680
23.5k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3681
3682
420
error:
3683
420
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3684
420
    return(NULL);
3685
24.0k
}
3686
3687
/**
3688
 * Check an entity reference in an attribute value for validity
3689
 * without expanding it.
3690
 *
3691
 * @param ctxt  parser context
3692
 * @param pent  entity
3693
 * @param depth  nesting depth
3694
 */
3695
static void
3696
0
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3697
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3698
0
    const xmlChar *str;
3699
0
    unsigned long expandedSize = pent->length;
3700
0
    int c, flags;
3701
3702
0
    depth += 1;
3703
0
    if (depth > maxDepth) {
3704
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3705
0
                       "Maximum entity nesting depth exceeded");
3706
0
  return;
3707
0
    }
3708
3709
0
    if (pent->flags & XML_ENT_EXPANDING) {
3710
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3711
0
        return;
3712
0
    }
3713
3714
    /*
3715
     * If we're parsing a default attribute value in DTD content,
3716
     * the entity might reference other entities which weren't
3717
     * defined yet, so the check isn't reliable.
3718
     */
3719
0
    if (ctxt->inSubset == 0)
3720
0
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3721
0
    else
3722
0
        flags = XML_ENT_VALIDATED;
3723
3724
0
    str = pent->content;
3725
0
    if (str == NULL)
3726
0
        goto done;
3727
3728
    /*
3729
     * Note that entity values are already validated. We only check
3730
     * for illegal less-than signs and compute the expanded size
3731
     * of the entity. No special handling for multi-byte characters
3732
     * is needed.
3733
     */
3734
0
    while (!PARSER_STOPPED(ctxt)) {
3735
0
        c = *str;
3736
3737
0
  if (c != '&') {
3738
0
            if (c == 0)
3739
0
                break;
3740
3741
0
            if (c == '<')
3742
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3743
0
                        "'<' in entity '%s' is not allowed in attributes "
3744
0
                        "values\n", pent->name);
3745
3746
0
            str += 1;
3747
0
        } else if (str[1] == '#') {
3748
0
            int val;
3749
3750
0
      val = xmlParseStringCharRef(ctxt, &str);
3751
0
      if (val == 0) {
3752
0
                pent->content[0] = 0;
3753
0
                break;
3754
0
            }
3755
0
  } else {
3756
0
            xmlChar *name;
3757
0
            xmlEntityPtr ent;
3758
3759
0
      name = xmlParseStringEntityRef(ctxt, &str);
3760
0
      if (name == NULL) {
3761
0
                pent->content[0] = 0;
3762
0
                break;
3763
0
            }
3764
3765
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3766
0
            xmlFree(name);
3767
3768
0
            if ((ent != NULL) &&
3769
0
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
3770
0
                if ((ent->flags & flags) != flags) {
3771
0
                    pent->flags |= XML_ENT_EXPANDING;
3772
0
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
3773
0
                    pent->flags &= ~XML_ENT_EXPANDING;
3774
0
                }
3775
3776
0
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
3777
0
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
3778
0
            }
3779
0
        }
3780
0
    }
3781
3782
0
done:
3783
0
    if (ctxt->inSubset == 0)
3784
0
        pent->expandedSize = expandedSize;
3785
3786
0
    pent->flags |= flags;
3787
0
}
3788
3789
/**
3790
 * Expand general entity references in an entity or attribute value.
3791
 * Perform attribute value normalization.
3792
 *
3793
 * @param ctxt  parser context
3794
 * @param buf  string buffer
3795
 * @param str  entity or attribute value
3796
 * @param pent  entity for entity value, NULL for attribute values
3797
 * @param normalize  whether to collapse whitespace
3798
 * @param inSpace  whitespace state
3799
 * @param depth  nesting depth
3800
 * @param check  whether to check for amplification
3801
 * @returns  whether there was a normalization change
3802
 */
3803
static int
3804
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3805
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
3806
709k
                          int *inSpace, int depth, int check) {
3807
709k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3808
709k
    int c, chunkSize;
3809
709k
    int normChange = 0;
3810
3811
709k
    if (str == NULL)
3812
0
        return(0);
3813
3814
709k
    depth += 1;
3815
709k
    if (depth > maxDepth) {
3816
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3817
0
                       "Maximum entity nesting depth exceeded");
3818
0
  return(0);
3819
0
    }
3820
3821
709k
    if (pent != NULL) {
3822
709k
        if (pent->flags & XML_ENT_EXPANDING) {
3823
19
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3824
19
            return(0);
3825
19
        }
3826
3827
709k
        if (check) {
3828
709k
            if (xmlParserEntityCheck(ctxt, pent->length))
3829
66
                return(0);
3830
709k
        }
3831
709k
    }
3832
3833
709k
    chunkSize = 0;
3834
3835
    /*
3836
     * Note that entity values are already validated. No special
3837
     * handling for multi-byte characters is needed.
3838
     */
3839
562M
    while (!PARSER_STOPPED(ctxt)) {
3840
562M
        c = *str;
3841
3842
562M
  if (c != '&') {
3843
561M
            if (c == 0)
3844
690k
                break;
3845
3846
            /*
3847
             * If this function is called without an entity, it is used to
3848
             * expand entities in an attribute content where less-than was
3849
             * already unscaped and is allowed.
3850
             */
3851
560M
            if ((pent != NULL) && (c == '<')) {
3852
18.7k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3853
18.7k
                        "'<' in entity '%s' is not allowed in attributes "
3854
18.7k
                        "values\n", pent->name);
3855
18.7k
                break;
3856
18.7k
            }
3857
3858
560M
            if (c <= 0x20) {
3859
15.9M
                if ((normalize) && (*inSpace)) {
3860
                    /* Skip char */
3861
62.2k
                    if (chunkSize > 0) {
3862
17.6k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3863
17.6k
                        chunkSize = 0;
3864
17.6k
                    }
3865
62.2k
                    normChange = 1;
3866
15.9M
                } else if (c < 0x20) {
3867
14.7M
                    if (chunkSize > 0) {
3868
1.32M
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3869
1.32M
                        chunkSize = 0;
3870
1.32M
                    }
3871
3872
14.7M
                    xmlSBufAddCString(buf, " ", 1);
3873
14.7M
                } else {
3874
1.18M
                    chunkSize += 1;
3875
1.18M
                }
3876
3877
15.9M
                *inSpace = 1;
3878
544M
            } else {
3879
544M
                chunkSize += 1;
3880
544M
                *inSpace = 0;
3881
544M
            }
3882
3883
560M
            str += 1;
3884
560M
        } else if (str[1] == '#') {
3885
477k
            int val;
3886
3887
477k
            if (chunkSize > 0) {
3888
475k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3889
475k
                chunkSize = 0;
3890
475k
            }
3891
3892
477k
      val = xmlParseStringCharRef(ctxt, &str);
3893
477k
      if (val == 0) {
3894
7
                if (pent != NULL)
3895
7
                    pent->content[0] = 0;
3896
7
                break;
3897
7
            }
3898
3899
477k
            if (val == ' ') {
3900
425k
                if ((normalize) && (*inSpace))
3901
253
                    normChange = 1;
3902
425k
                else
3903
425k
                    xmlSBufAddCString(buf, " ", 1);
3904
425k
                *inSpace = 1;
3905
425k
            } else {
3906
51.8k
                xmlSBufAddChar(buf, val);
3907
51.8k
                *inSpace = 0;
3908
51.8k
            }
3909
721k
  } else {
3910
721k
            xmlChar *name;
3911
721k
            xmlEntityPtr ent;
3912
3913
721k
            if (chunkSize > 0) {
3914
655k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3915
655k
                chunkSize = 0;
3916
655k
            }
3917
3918
721k
      name = xmlParseStringEntityRef(ctxt, &str);
3919
721k
            if (name == NULL) {
3920
14
                if (pent != NULL)
3921
14
                    pent->content[0] = 0;
3922
14
                break;
3923
14
            }
3924
3925
721k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3926
721k
            xmlFree(name);
3927
3928
721k
      if ((ent != NULL) &&
3929
721k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3930
60.9k
    if (ent->content == NULL) {
3931
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
3932
0
          "predefined entity has no content\n");
3933
0
                    break;
3934
0
                }
3935
3936
60.9k
                xmlSBufAddString(buf, ent->content, ent->length);
3937
3938
60.9k
                *inSpace = 0;
3939
660k
      } else if ((ent != NULL) && (ent->content != NULL)) {
3940
615k
                if (pent != NULL)
3941
615k
                    pent->flags |= XML_ENT_EXPANDING;
3942
615k
    normChange |= xmlExpandEntityInAttValue(ctxt, buf,
3943
615k
                        ent->content, ent, normalize, inSpace, depth, check);
3944
615k
                if (pent != NULL)
3945
615k
                    pent->flags &= ~XML_ENT_EXPANDING;
3946
615k
      }
3947
721k
        }
3948
562M
    }
3949
3950
709k
    if (chunkSize > 0)
3951
659k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3952
3953
709k
    return(normChange);
3954
709k
}
3955
3956
/**
3957
 * Expand general entity references in an entity or attribute value.
3958
 * Perform attribute value normalization.
3959
 *
3960
 * @param ctxt  parser context
3961
 * @param str  entity or attribute value
3962
 * @param normalize  whether to collapse whitespace
3963
 * @returns the expanded attribtue value.
3964
 */
3965
xmlChar *
3966
xmlExpandEntitiesInAttValue(xmlParserCtxt *ctxt, const xmlChar *str,
3967
0
                            int normalize) {
3968
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3969
0
                         XML_MAX_HUGE_LENGTH :
3970
0
                         XML_MAX_TEXT_LENGTH;
3971
0
    xmlSBuf buf;
3972
0
    int inSpace = 1;
3973
3974
0
    xmlSBufInit(&buf, maxLength);
3975
3976
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
3977
0
                              ctxt->inputNr, /* check */ 0);
3978
3979
0
    if ((normalize) && (inSpace) && (buf.size > 0))
3980
0
        buf.size--;
3981
3982
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
3983
0
}
3984
3985
/**
3986
 * Parse a value for an attribute.
3987
 *
3988
 * NOTE: if no normalization is needed, the routine will return pointers
3989
 * directly from the data buffer.
3990
 *
3991
 * 3.3.3 Attribute-Value Normalization:
3992
 *
3993
 * Before the value of an attribute is passed to the application or
3994
 * checked for validity, the XML processor must normalize it as follows:
3995
 *
3996
 * - a character reference is processed by appending the referenced
3997
 *   character to the attribute value
3998
 * - an entity reference is processed by recursively processing the
3999
 *   replacement text of the entity
4000
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4001
 *   appending \#x20 to the normalized value, except that only a single
4002
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4003
 *   parsed entity or the literal entity value of an internal parsed entity
4004
 * - other characters are processed by appending them to the normalized value
4005
 *
4006
 * If the declared value is not CDATA, then the XML processor must further
4007
 * process the normalized attribute value by discarding any leading and
4008
 * trailing space (\#x20) characters, and by replacing sequences of space
4009
 * (\#x20) characters by a single space (\#x20) character.
4010
 * All attributes for which no declaration has been read should be treated
4011
 * by a non-validating parser as if declared CDATA.
4012
 *
4013
 * @param ctxt  an XML parser context
4014
 * @param attlen  attribute len result
4015
 * @param outFlags  resulting XML_ATTVAL_* flags
4016
 * @param special  value from attsSpecial
4017
 * @param isNamespace  whether this is a namespace declaration
4018
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4019
 *     caller if it was copied, this can be detected by val[*len] == 0.
4020
 */
4021
static xmlChar *
4022
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *outFlags,
4023
582k
                         int special, int isNamespace) {
4024
582k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4025
582k
                         XML_MAX_HUGE_LENGTH :
4026
582k
                         XML_MAX_TEXT_LENGTH;
4027
582k
    xmlSBuf buf;
4028
582k
    xmlChar *ret;
4029
582k
    int c, l, quote, entFlags, chunkSize;
4030
582k
    int inSpace = 1;
4031
582k
    int replaceEntities;
4032
582k
    int normalize = (special & XML_SPECIAL_TYPE_MASK) != 0;
4033
582k
    int attvalFlags = 0;
4034
4035
    /* Always expand namespace URIs */
4036
582k
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4037
4038
582k
    xmlSBufInit(&buf, maxLength);
4039
4040
582k
    GROW;
4041
4042
582k
    quote = CUR;
4043
582k
    if ((quote != '"') && (quote != '\'')) {
4044
7.02k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4045
7.02k
  return(NULL);
4046
7.02k
    }
4047
575k
    NEXTL(1);
4048
4049
575k
    if (ctxt->inSubset == 0)
4050
553k
        entFlags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4051
22.3k
    else
4052
22.3k
        entFlags = XML_ENT_VALIDATED;
4053
4054
575k
    inSpace = 1;
4055
575k
    chunkSize = 0;
4056
4057
52.3M
    while (1) {
4058
52.3M
        if (PARSER_STOPPED(ctxt))
4059
85
            goto error;
4060
4061
52.3M
        if (CUR_PTR >= ctxt->input->end) {
4062
3.50k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4063
3.50k
                           "AttValue: ' expected\n");
4064
3.50k
            goto error;
4065
3.50k
        }
4066
4067
        /*
4068
         * TODO: Check growth threshold
4069
         */
4070
52.3M
        if (ctxt->input->end - CUR_PTR < 10)
4071
44.4k
            GROW;
4072
4073
52.3M
        c = CUR;
4074
4075
52.3M
        if (c >= 0x80) {
4076
36.0M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4077
36.0M
                    "invalid character in attribute value\n");
4078
36.0M
            if (l == 0) {
4079
29.3M
                if (chunkSize > 0) {
4080
113k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4081
113k
                    chunkSize = 0;
4082
113k
                }
4083
29.3M
                xmlSBufAddReplChar(&buf);
4084
29.3M
                NEXTL(1);
4085
29.3M
            } else {
4086
6.66M
                chunkSize += l;
4087
6.66M
                NEXTL(l);
4088
6.66M
            }
4089
4090
36.0M
            inSpace = 0;
4091
36.0M
        } else if (c != '&') {
4092
16.1M
            if (c > 0x20) {
4093
7.83M
                if (c == quote)
4094
570k
                    break;
4095
4096
7.26M
                if (c == '<')
4097
216k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4098
4099
7.26M
                chunkSize += 1;
4100
7.26M
                inSpace = 0;
4101
8.26M
            } else if (!IS_BYTE_CHAR(c)) {
4102
1.63M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4103
1.63M
                        "invalid character in attribute value\n");
4104
1.63M
                if (chunkSize > 0) {
4105
35.0k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4106
35.0k
                    chunkSize = 0;
4107
35.0k
                }
4108
1.63M
                xmlSBufAddReplChar(&buf);
4109
1.63M
                inSpace = 0;
4110
6.63M
            } else {
4111
                /* Whitespace */
4112
6.63M
                if ((normalize) && (inSpace)) {
4113
                    /* Skip char */
4114
92.1k
                    if (chunkSize > 0) {
4115
2.36k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4116
2.36k
                        chunkSize = 0;
4117
2.36k
                    }
4118
92.1k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4119
6.54M
                } else if (c < 0x20) {
4120
                    /* Convert to space */
4121
6.09M
                    if (chunkSize > 0) {
4122
59.9k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4123
59.9k
                        chunkSize = 0;
4124
59.9k
                    }
4125
4126
6.09M
                    xmlSBufAddCString(&buf, " ", 1);
4127
6.09M
                } else {
4128
451k
                    chunkSize += 1;
4129
451k
                }
4130
4131
6.63M
                inSpace = 1;
4132
4133
6.63M
                if ((c == 0xD) && (NXT(1) == 0xA))
4134
776
                    CUR_PTR++;
4135
6.63M
            }
4136
4137
15.5M
            NEXTL(1);
4138
15.5M
        } else if (NXT(1) == '#') {
4139
38.4k
            int val;
4140
4141
38.4k
            if (chunkSize > 0) {
4142
19.7k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4143
19.7k
                chunkSize = 0;
4144
19.7k
            }
4145
4146
38.4k
            val = xmlParseCharRef(ctxt);
4147
38.4k
            if (val == 0)
4148
1.61k
                goto error;
4149
4150
36.8k
            if ((val == '&') && (!replaceEntities)) {
4151
                /*
4152
                 * The reparsing will be done in xmlNodeParseContent()
4153
                 * called from SAX2.c
4154
                 */
4155
0
                xmlSBufAddCString(&buf, "&#38;", 5);
4156
0
                inSpace = 0;
4157
36.8k
            } else if (val == ' ') {
4158
16.2k
                if ((normalize) && (inSpace))
4159
1.03k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4160
15.2k
                else
4161
15.2k
                    xmlSBufAddCString(&buf, " ", 1);
4162
16.2k
                inSpace = 1;
4163
20.5k
            } else {
4164
20.5k
                xmlSBufAddChar(&buf, val);
4165
20.5k
                inSpace = 0;
4166
20.5k
            }
4167
170k
        } else {
4168
170k
            const xmlChar *name;
4169
170k
            xmlEntityPtr ent;
4170
4171
170k
            if (chunkSize > 0) {
4172
76.4k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4173
76.4k
                chunkSize = 0;
4174
76.4k
            }
4175
4176
170k
            name = xmlParseEntityRefInternal(ctxt);
4177
170k
            if (name == NULL) {
4178
                /*
4179
                 * Probably a literal '&' which wasn't escaped.
4180
                 * TODO: Handle gracefully in recovery mode.
4181
                 */
4182
31.9k
                continue;
4183
31.9k
            }
4184
4185
138k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4186
138k
            if (ent == NULL)
4187
40.4k
                continue;
4188
4189
98.3k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4190
3.99k
                if ((ent->content[0] == '&') && (!replaceEntities))
4191
0
                    xmlSBufAddCString(&buf, "&#38;", 5);
4192
3.99k
                else
4193
3.99k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4194
3.99k
                inSpace = 0;
4195
94.3k
            } else if (replaceEntities) {
4196
94.3k
                if (xmlExpandEntityInAttValue(ctxt, &buf,
4197
94.3k
                        ent->content, ent, normalize, &inSpace, ctxt->inputNr,
4198
94.3k
                        /* check */ 1) > 0)
4199
2.84k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4200
94.3k
            } else {
4201
0
                if ((ent->flags & entFlags) != entFlags)
4202
0
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4203
4204
0
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4205
0
                    ent->content[0] = 0;
4206
0
                    goto error;
4207
0
                }
4208
4209
                /*
4210
                 * Just output the reference
4211
                 */
4212
0
                xmlSBufAddCString(&buf, "&", 1);
4213
0
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4214
0
                xmlSBufAddCString(&buf, ";", 1);
4215
4216
0
                inSpace = 0;
4217
0
            }
4218
98.3k
  }
4219
52.3M
    }
4220
4221
570k
    if ((buf.mem == NULL) && (outFlags != NULL)) {
4222
499k
        ret = (xmlChar *) CUR_PTR - chunkSize;
4223
4224
499k
        if (attlen != NULL)
4225
499k
            *attlen = chunkSize;
4226
499k
        if ((normalize) && (inSpace) && (chunkSize > 0)) {
4227
250
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4228
250
            *attlen -= 1;
4229
250
        }
4230
4231
        /* Report potential error */
4232
499k
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4233
499k
    } else {
4234
70.7k
        if (chunkSize > 0)
4235
42.5k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4236
4237
70.7k
        if ((normalize) && (inSpace) && (buf.size > 0)) {
4238
464
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4239
464
            buf.size--;
4240
464
        }
4241
4242
70.7k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4243
70.7k
        attvalFlags |= XML_ATTVAL_ALLOC;
4244
4245
70.7k
        if (ret != NULL) {
4246
70.7k
            if (attlen != NULL)
4247
50.8k
                *attlen = buf.size;
4248
70.7k
        }
4249
70.7k
    }
4250
4251
570k
    if (outFlags != NULL)
4252
550k
        *outFlags = attvalFlags;
4253
4254
570k
    NEXTL(1);
4255
4256
570k
    return(ret);
4257
4258
5.19k
error:
4259
5.19k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4260
5.19k
    return(NULL);
4261
575k
}
4262
4263
/**
4264
 * Parse a value for an attribute
4265
 * Note: the parser won't do substitution of entities here, this
4266
 * will be handled later in #xmlStringGetNodeList
4267
 *
4268
 * @deprecated Internal function, don't use.
4269
 *
4270
 *     [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4271
 *                       "'" ([^<&'] | Reference)* "'"
4272
 *
4273
 * 3.3.3 Attribute-Value Normalization:
4274
 *
4275
 * Before the value of an attribute is passed to the application or
4276
 * checked for validity, the XML processor must normalize it as follows:
4277
 *
4278
 * - a character reference is processed by appending the referenced
4279
 *   character to the attribute value
4280
 * - an entity reference is processed by recursively processing the
4281
 *   replacement text of the entity
4282
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4283
 *   appending \#x20 to the normalized value, except that only a single
4284
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4285
 *   parsed entity or the literal entity value of an internal parsed entity
4286
 * - other characters are processed by appending them to the normalized value
4287
 *
4288
 * If the declared value is not CDATA, then the XML processor must further
4289
 * process the normalized attribute value by discarding any leading and
4290
 * trailing space (\#x20) characters, and by replacing sequences of space
4291
 * (\#x20) characters by a single space (\#x20) character.
4292
 * All attributes for which no declaration has been read should be treated
4293
 * by a non-validating parser as if declared CDATA.
4294
 *
4295
 * @param ctxt  an XML parser context
4296
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4297
 * caller.
4298
 */
4299
xmlChar *
4300
23.8k
xmlParseAttValue(xmlParserCtxt *ctxt) {
4301
23.8k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4302
23.8k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4303
23.8k
}
4304
4305
/**
4306
 * Parse an XML Literal
4307
 *
4308
 * @deprecated Internal function, don't use.
4309
 *
4310
 *     [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4311
 *
4312
 * @param ctxt  an XML parser context
4313
 * @returns the SystemLiteral parsed or NULL
4314
 */
4315
4316
xmlChar *
4317
5.10k
xmlParseSystemLiteral(xmlParserCtxt *ctxt) {
4318
5.10k
    xmlChar *buf = NULL;
4319
5.10k
    int len = 0;
4320
5.10k
    int size = XML_PARSER_BUFFER_SIZE;
4321
5.10k
    int cur, l;
4322
5.10k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4323
5.10k
                    XML_MAX_TEXT_LENGTH :
4324
5.10k
                    XML_MAX_NAME_LENGTH;
4325
5.10k
    xmlChar stop;
4326
4327
5.10k
    if (RAW == '"') {
4328
2.09k
        NEXT;
4329
2.09k
  stop = '"';
4330
3.00k
    } else if (RAW == '\'') {
4331
1.63k
        NEXT;
4332
1.63k
  stop = '\'';
4333
1.63k
    } else {
4334
1.36k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4335
1.36k
  return(NULL);
4336
1.36k
    }
4337
4338
3.73k
    buf = xmlMalloc(size);
4339
3.73k
    if (buf == NULL) {
4340
0
        xmlErrMemory(ctxt);
4341
0
  return(NULL);
4342
0
    }
4343
3.73k
    cur = xmlCurrentCharRecover(ctxt, &l);
4344
4.96M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4345
4.95M
  if (len + 5 >= size) {
4346
2.42k
      xmlChar *tmp;
4347
2.42k
            int newSize;
4348
4349
2.42k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4350
2.42k
            if (newSize < 0) {
4351
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4352
0
                xmlFree(buf);
4353
0
                return(NULL);
4354
0
            }
4355
2.42k
      tmp = xmlRealloc(buf, newSize);
4356
2.42k
      if (tmp == NULL) {
4357
0
          xmlFree(buf);
4358
0
    xmlErrMemory(ctxt);
4359
0
    return(NULL);
4360
0
      }
4361
2.42k
      buf = tmp;
4362
2.42k
            size = newSize;
4363
2.42k
  }
4364
4.95M
  COPY_BUF(buf, len, cur);
4365
4.95M
  NEXTL(l);
4366
4.95M
  cur = xmlCurrentCharRecover(ctxt, &l);
4367
4.95M
    }
4368
3.73k
    buf[len] = 0;
4369
3.73k
    if (!IS_CHAR(cur)) {
4370
735
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4371
3.00k
    } else {
4372
3.00k
  NEXT;
4373
3.00k
    }
4374
3.73k
    return(buf);
4375
3.73k
}
4376
4377
/**
4378
 * Parse an XML public literal
4379
 *
4380
 * @deprecated Internal function, don't use.
4381
 *
4382
 *     [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4383
 *
4384
 * @param ctxt  an XML parser context
4385
 * @returns the PubidLiteral parsed or NULL.
4386
 */
4387
4388
xmlChar *
4389
2.88k
xmlParsePubidLiteral(xmlParserCtxt *ctxt) {
4390
2.88k
    xmlChar *buf = NULL;
4391
2.88k
    int len = 0;
4392
2.88k
    int size = XML_PARSER_BUFFER_SIZE;
4393
2.88k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4394
2.88k
                    XML_MAX_TEXT_LENGTH :
4395
2.88k
                    XML_MAX_NAME_LENGTH;
4396
2.88k
    xmlChar cur;
4397
2.88k
    xmlChar stop;
4398
4399
2.88k
    if (RAW == '"') {
4400
1.83k
        NEXT;
4401
1.83k
  stop = '"';
4402
1.83k
    } else if (RAW == '\'') {
4403
532
        NEXT;
4404
532
  stop = '\'';
4405
532
    } else {
4406
519
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4407
519
  return(NULL);
4408
519
    }
4409
2.36k
    buf = xmlMalloc(size);
4410
2.36k
    if (buf == NULL) {
4411
0
  xmlErrMemory(ctxt);
4412
0
  return(NULL);
4413
0
    }
4414
2.36k
    cur = CUR;
4415
18.7M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4416
18.7M
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4417
18.7M
  if (len + 1 >= size) {
4418
1.75k
      xmlChar *tmp;
4419
1.75k
            int newSize;
4420
4421
1.75k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4422
1.75k
            if (newSize < 0) {
4423
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4424
0
                xmlFree(buf);
4425
0
                return(NULL);
4426
0
            }
4427
1.75k
      tmp = xmlRealloc(buf, newSize);
4428
1.75k
      if (tmp == NULL) {
4429
0
    xmlErrMemory(ctxt);
4430
0
    xmlFree(buf);
4431
0
    return(NULL);
4432
0
      }
4433
1.75k
      buf = tmp;
4434
1.75k
            size = newSize;
4435
1.75k
  }
4436
18.7M
  buf[len++] = cur;
4437
18.7M
  NEXT;
4438
18.7M
  cur = CUR;
4439
18.7M
    }
4440
2.36k
    buf[len] = 0;
4441
2.36k
    if (cur != stop) {
4442
913
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4443
1.45k
    } else {
4444
1.45k
  NEXTL(1);
4445
1.45k
    }
4446
2.36k
    return(buf);
4447
2.36k
}
4448
4449
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4450
4451
/*
4452
 * used for the test in the inner loop of the char data testing
4453
 */
4454
static const unsigned char test_char_data[256] = {
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4457
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4458
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4459
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4460
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4461
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4462
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4463
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4464
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4465
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4466
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4467
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4468
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4469
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4470
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4471
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4472
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4474
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4475
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4476
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4477
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4478
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4479
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4482
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4483
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4484
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4485
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4486
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4487
};
4488
4489
static void
4490
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4491
644k
              int isBlank) {
4492
644k
    int checkBlanks;
4493
4494
644k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4495
531k
        return;
4496
4497
113k
    checkBlanks = (!ctxt->keepBlanks) ||
4498
113k
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4499
4500
    /*
4501
     * Calling areBlanks with only parts of a text node
4502
     * is fundamentally broken, making the NOBLANKS option
4503
     * essentially unusable.
4504
     */
4505
113k
    if ((checkBlanks) &&
4506
113k
        (areBlanks(ctxt, buf, size, isBlank))) {
4507
0
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4508
0
            (ctxt->keepBlanks))
4509
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4510
113k
    } else {
4511
113k
        if (ctxt->sax->characters != NULL)
4512
113k
            ctxt->sax->characters(ctxt->userData, buf, size);
4513
4514
        /*
4515
         * The old code used to update this value for "complex" data
4516
         * even if checkBlanks was false. This was probably a bug.
4517
         */
4518
113k
        if ((checkBlanks) && (*ctxt->space == -1))
4519
0
            *ctxt->space = -2;
4520
113k
    }
4521
113k
}
4522
4523
/**
4524
 * Parse character data. Always makes progress if the first char isn't
4525
 * '<' or '&'.
4526
 *
4527
 * The right angle bracket (>) may be represented using the string "&gt;",
4528
 * and must, for compatibility, be escaped using "&gt;" or a character
4529
 * reference when it appears in the string "]]>" in content, when that
4530
 * string is not marking the end of a CDATA section.
4531
 *
4532
 *     [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4533
 * @param ctxt  an XML parser context
4534
 * @param partial  buffer may contain partial UTF-8 sequences
4535
 */
4536
static void
4537
2.94M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4538
2.94M
    const xmlChar *in;
4539
2.94M
    int line = ctxt->input->line;
4540
2.94M
    int col = ctxt->input->col;
4541
2.94M
    int ccol;
4542
2.94M
    int terminate = 0;
4543
4544
2.94M
    GROW;
4545
    /*
4546
     * Accelerated common case where input don't need to be
4547
     * modified before passing it to the handler.
4548
     */
4549
2.94M
    in = ctxt->input->cur;
4550
2.94M
    do {
4551
2.96M
get_more_space:
4552
2.97M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4553
2.96M
        if (*in == 0xA) {
4554
628k
            do {
4555
628k
                ctxt->input->line++; ctxt->input->col = 1;
4556
628k
                in++;
4557
628k
            } while (*in == 0xA);
4558
16.9k
            goto get_more_space;
4559
16.9k
        }
4560
2.94M
        if (*in == '<') {
4561
4.90k
            while (in > ctxt->input->cur) {
4562
2.45k
                const xmlChar *tmp = ctxt->input->cur;
4563
2.45k
                size_t nbchar = in - tmp;
4564
4565
2.45k
                if (nbchar > XML_MAX_ITEMS)
4566
0
                    nbchar = XML_MAX_ITEMS;
4567
2.45k
                ctxt->input->cur += nbchar;
4568
4569
2.45k
                xmlCharacters(ctxt, tmp, nbchar, 1);
4570
2.45k
            }
4571
2.45k
            return;
4572
2.45k
        }
4573
4574
2.98M
get_more:
4575
2.98M
        ccol = ctxt->input->col;
4576
7.32M
        while (test_char_data[*in]) {
4577
4.34M
            in++;
4578
4.34M
            ccol++;
4579
4.34M
        }
4580
2.98M
        ctxt->input->col = ccol;
4581
2.98M
        if (*in == 0xA) {
4582
552k
            do {
4583
552k
                ctxt->input->line++; ctxt->input->col = 1;
4584
552k
                in++;
4585
552k
            } while (*in == 0xA);
4586
15.8k
            goto get_more;
4587
15.8k
        }
4588
2.96M
        if (*in == ']') {
4589
19.8k
            size_t avail = ctxt->input->end - in;
4590
4591
19.8k
            if (partial && avail < 2) {
4592
0
                terminate = 1;
4593
0
                goto invoke_callback;
4594
0
            }
4595
19.8k
            if (in[1] == ']') {
4596
5.71k
                if (partial && avail < 3) {
4597
0
                    terminate = 1;
4598
0
                    goto invoke_callback;
4599
0
                }
4600
5.71k
                if (in[2] == '>')
4601
4.87k
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4602
5.71k
            }
4603
4604
19.8k
            in++;
4605
19.8k
            ctxt->input->col++;
4606
19.8k
            goto get_more;
4607
19.8k
        }
4608
4609
2.94M
invoke_callback:
4610
3.22M
        while (in > ctxt->input->cur) {
4611
273k
            const xmlChar *tmp = ctxt->input->cur;
4612
273k
            size_t nbchar = in - tmp;
4613
4614
273k
            if (nbchar > XML_MAX_ITEMS)
4615
0
                nbchar = XML_MAX_ITEMS;
4616
273k
            ctxt->input->cur += nbchar;
4617
4618
273k
            xmlCharacters(ctxt, tmp, nbchar, 0);
4619
4620
273k
            line = ctxt->input->line;
4621
273k
            col = ctxt->input->col;
4622
273k
        }
4623
2.94M
        ctxt->input->cur = in;
4624
2.94M
        if (*in == 0xD) {
4625
15.5k
            in++;
4626
15.5k
            if (*in == 0xA) {
4627
892
                ctxt->input->cur = in;
4628
892
                in++;
4629
892
                ctxt->input->line++; ctxt->input->col = 1;
4630
892
                continue; /* while */
4631
892
            }
4632
14.7k
            in--;
4633
14.7k
        }
4634
2.94M
        if (*in == '<') {
4635
123k
            return;
4636
123k
        }
4637
2.82M
        if (*in == '&') {
4638
43.7k
            return;
4639
43.7k
        }
4640
2.77M
        if (terminate) {
4641
0
            return;
4642
0
        }
4643
2.77M
        SHRINK;
4644
2.77M
        GROW;
4645
2.77M
        in = ctxt->input->cur;
4646
2.77M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4647
2.77M
             (*in == 0x09) || (*in == 0x0a));
4648
2.77M
    ctxt->input->line = line;
4649
2.77M
    ctxt->input->col = col;
4650
2.77M
    xmlParseCharDataComplex(ctxt, partial);
4651
2.77M
}
4652
4653
/**
4654
 * Always makes progress if the first char isn't '<' or '&'.
4655
 *
4656
 * parse a CharData section.this is the fallback function
4657
 * of #xmlParseCharData when the parsing requires handling
4658
 * of non-ASCII characters.
4659
 *
4660
 * @param ctxt  an XML parser context
4661
 * @param partial  whether the input can end with truncated UTF-8
4662
 */
4663
static void
4664
2.77M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4665
2.77M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4666
2.77M
    int nbchar = 0;
4667
2.77M
    int cur, l;
4668
4669
2.77M
    cur = xmlCurrentCharRecover(ctxt, &l);
4670
33.8M
    while ((cur != '<') && /* checked */
4671
33.8M
           (cur != '&') &&
4672
33.8M
     (IS_CHAR(cur))) {
4673
31.0M
        if (cur == ']') {
4674
13.9k
            size_t avail = ctxt->input->end - ctxt->input->cur;
4675
4676
13.9k
            if (partial && avail < 2)
4677
0
                break;
4678
13.9k
            if (NXT(1) == ']') {
4679
5.33k
                if (partial && avail < 3)
4680
0
                    break;
4681
5.33k
                if (NXT(2) == '>')
4682
4.30k
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4683
5.33k
            }
4684
13.9k
        }
4685
4686
31.0M
  COPY_BUF(buf, nbchar, cur);
4687
  /* move current position before possible calling of ctxt->sax->characters */
4688
31.0M
  NEXTL(l);
4689
31.0M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4690
210k
      buf[nbchar] = 0;
4691
4692
210k
            xmlCharacters(ctxt, buf, nbchar, 0);
4693
210k
      nbchar = 0;
4694
210k
            SHRINK;
4695
210k
  }
4696
31.0M
  cur = xmlCurrentCharRecover(ctxt, &l);
4697
31.0M
    }
4698
2.77M
    if (nbchar != 0) {
4699
157k
        buf[nbchar] = 0;
4700
4701
157k
        xmlCharacters(ctxt, buf, nbchar, 0);
4702
157k
    }
4703
    /*
4704
     * cur == 0 can mean
4705
     *
4706
     * - End of buffer.
4707
     * - An actual 0 character.
4708
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4709
     */
4710
2.77M
    if (ctxt->input->cur < ctxt->input->end) {
4711
2.77M
        if ((cur == 0) && (CUR != 0)) {
4712
458
            if (partial == 0) {
4713
458
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4714
458
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4715
458
                NEXTL(1);
4716
458
            }
4717
2.77M
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4718
            /* Generate the error and skip the offending character */
4719
2.67M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4720
2.67M
                              "PCDATA invalid Char value %d\n", cur);
4721
2.67M
            NEXTL(l);
4722
2.67M
        }
4723
2.77M
    }
4724
2.77M
}
4725
4726
/**
4727
 * @deprecated Internal function, don't use.
4728
 * @param ctxt  an XML parser context
4729
 * @param cdata  unused
4730
 */
4731
void
4732
0
xmlParseCharData(xmlParserCtxt *ctxt, ATTRIBUTE_UNUSED int cdata) {
4733
0
    xmlParseCharDataInternal(ctxt, 0);
4734
0
}
4735
4736
/**
4737
 * Parse an External ID or a Public ID
4738
 *
4739
 * @deprecated Internal function, don't use.
4740
 *
4741
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4742
 * `'PUBLIC' S PubidLiteral S SystemLiteral`
4743
 *
4744
 *     [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4745
 *                       | 'PUBLIC' S PubidLiteral S SystemLiteral
4746
 *
4747
 *     [83] PublicID ::= 'PUBLIC' S PubidLiteral
4748
 *
4749
 * @param ctxt  an XML parser context
4750
 * @param publicId  a xmlChar** receiving PubidLiteral
4751
 * @param strict  indicate whether we should restrict parsing to only
4752
 *          production [75], see NOTE below
4753
 * @returns the function returns SystemLiteral and in the second
4754
 *                case publicID receives PubidLiteral, is strict is off
4755
 *                it is possible to return NULL and have publicID set.
4756
 */
4757
4758
xmlChar *
4759
16.7k
xmlParseExternalID(xmlParserCtxt *ctxt, xmlChar **publicId, int strict) {
4760
16.7k
    xmlChar *URI = NULL;
4761
4762
16.7k
    *publicId = NULL;
4763
16.7k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4764
3.28k
        SKIP(6);
4765
3.28k
  if (SKIP_BLANKS == 0) {
4766
1.12k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4767
1.12k
                     "Space required after 'SYSTEM'\n");
4768
1.12k
  }
4769
3.28k
  URI = xmlParseSystemLiteral(ctxt);
4770
3.28k
  if (URI == NULL) {
4771
243
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4772
243
        }
4773
13.4k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4774
2.88k
        SKIP(6);
4775
2.88k
  if (SKIP_BLANKS == 0) {
4776
2.42k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4777
2.42k
        "Space required after 'PUBLIC'\n");
4778
2.42k
  }
4779
2.88k
  *publicId = xmlParsePubidLiteral(ctxt);
4780
2.88k
  if (*publicId == NULL) {
4781
519
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4782
519
  }
4783
2.88k
  if (strict) {
4784
      /*
4785
       * We don't handle [83] so "S SystemLiteral" is required.
4786
       */
4787
1.22k
      if (SKIP_BLANKS == 0) {
4788
922
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4789
922
      "Space required after the Public Identifier\n");
4790
922
      }
4791
1.66k
  } else {
4792
      /*
4793
       * We handle [83] so we return immediately, if
4794
       * "S SystemLiteral" is not detected. We skip blanks if no
4795
             * system literal was found, but this is harmless since we must
4796
             * be at the end of a NotationDecl.
4797
       */
4798
1.66k
      if (SKIP_BLANKS == 0) return(NULL);
4799
935
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4800
935
  }
4801
1.81k
  URI = xmlParseSystemLiteral(ctxt);
4802
1.81k
  if (URI == NULL) {
4803
1.12k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4804
1.12k
        }
4805
1.81k
    }
4806
15.6k
    return(URI);
4807
16.7k
}
4808
4809
/**
4810
 * Skip an XML (SGML) comment <!-- .... -->
4811
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4812
 *  must not occur within comments. "
4813
 * This is the slow routine in case the accelerator for ascii didn't work
4814
 *
4815
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4816
 * @param ctxt  an XML parser context
4817
 * @param buf  the already parsed part of the buffer
4818
 * @param len  number of bytes in the buffer
4819
 * @param size  allocated size of the buffer
4820
 */
4821
static void
4822
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4823
40.2k
                       size_t len, size_t size) {
4824
40.2k
    int q, ql;
4825
40.2k
    int r, rl;
4826
40.2k
    int cur, l;
4827
40.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4828
40.2k
                    XML_MAX_HUGE_LENGTH :
4829
40.2k
                    XML_MAX_TEXT_LENGTH;
4830
4831
40.2k
    if (buf == NULL) {
4832
31.9k
        len = 0;
4833
31.9k
  size = XML_PARSER_BUFFER_SIZE;
4834
31.9k
  buf = xmlMalloc(size);
4835
31.9k
  if (buf == NULL) {
4836
0
      xmlErrMemory(ctxt);
4837
0
      return;
4838
0
  }
4839
31.9k
    }
4840
40.2k
    q = xmlCurrentCharRecover(ctxt, &ql);
4841
40.2k
    if (q == 0)
4842
1.34k
        goto not_terminated;
4843
38.9k
    if (!IS_CHAR(q)) {
4844
1.05k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4845
1.05k
                          "xmlParseComment: invalid xmlChar value %d\n",
4846
1.05k
                    q);
4847
1.05k
  xmlFree (buf);
4848
1.05k
  return;
4849
1.05k
    }
4850
37.8k
    NEXTL(ql);
4851
37.8k
    r = xmlCurrentCharRecover(ctxt, &rl);
4852
37.8k
    if (r == 0)
4853
390
        goto not_terminated;
4854
37.4k
    if (!IS_CHAR(r)) {
4855
4.24k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4856
4.24k
                          "xmlParseComment: invalid xmlChar value %d\n",
4857
4.24k
                    r);
4858
4.24k
  xmlFree (buf);
4859
4.24k
  return;
4860
4.24k
    }
4861
33.2k
    NEXTL(rl);
4862
33.2k
    cur = xmlCurrentCharRecover(ctxt, &l);
4863
33.2k
    if (cur == 0)
4864
686
        goto not_terminated;
4865
3.22M
    while (IS_CHAR(cur) && /* checked */
4866
3.22M
           ((cur != '>') ||
4867
3.21M
      (r != '-') || (q != '-'))) {
4868
3.19M
  if ((r == '-') && (q == '-')) {
4869
36.4k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4870
36.4k
  }
4871
3.19M
  if (len + 5 >= size) {
4872
2.08k
      xmlChar *tmp;
4873
2.08k
            int newSize;
4874
4875
2.08k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4876
2.08k
            if (newSize < 0) {
4877
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4878
0
                             "Comment too big found", NULL);
4879
0
                xmlFree (buf);
4880
0
                return;
4881
0
            }
4882
2.08k
      tmp = xmlRealloc(buf, newSize);
4883
2.08k
      if (tmp == NULL) {
4884
0
    xmlErrMemory(ctxt);
4885
0
    xmlFree(buf);
4886
0
    return;
4887
0
      }
4888
2.08k
      buf = tmp;
4889
2.08k
            size = newSize;
4890
2.08k
  }
4891
3.19M
  COPY_BUF(buf, len, q);
4892
4893
3.19M
  q = r;
4894
3.19M
  ql = rl;
4895
3.19M
  r = cur;
4896
3.19M
  rl = l;
4897
4898
3.19M
  NEXTL(l);
4899
3.19M
  cur = xmlCurrentCharRecover(ctxt, &l);
4900
4901
3.19M
    }
4902
32.5k
    buf[len] = 0;
4903
32.5k
    if (cur == 0) {
4904
6.11k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4905
6.11k
                       "Comment not terminated \n<!--%.50s\n", buf);
4906
26.4k
    } else if (!IS_CHAR(cur)) {
4907
3.63k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4908
3.63k
                          "xmlParseComment: invalid xmlChar value %d\n",
4909
3.63k
                    cur);
4910
22.8k
    } else {
4911
22.8k
        NEXT;
4912
22.8k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4913
22.8k
      (!ctxt->disableSAX))
4914
1.29k
      ctxt->sax->comment(ctxt->userData, buf);
4915
22.8k
    }
4916
32.5k
    xmlFree(buf);
4917
32.5k
    return;
4918
2.41k
not_terminated:
4919
2.41k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4920
2.41k
       "Comment not terminated\n", NULL);
4921
2.41k
    xmlFree(buf);
4922
2.41k
}
4923
4924
/**
4925
 * Parse an XML (SGML) comment. Always consumes '<!'.
4926
 *
4927
 * @deprecated Internal function, don't use.
4928
 *
4929
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4930
 *  must not occur within comments. "
4931
 *
4932
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4933
 * @param ctxt  an XML parser context
4934
 */
4935
void
4936
52.9k
xmlParseComment(xmlParserCtxt *ctxt) {
4937
52.9k
    xmlChar *buf = NULL;
4938
52.9k
    size_t size = XML_PARSER_BUFFER_SIZE;
4939
52.9k
    size_t len = 0;
4940
52.9k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4941
52.9k
                       XML_MAX_HUGE_LENGTH :
4942
52.9k
                       XML_MAX_TEXT_LENGTH;
4943
52.9k
    const xmlChar *in;
4944
52.9k
    size_t nbchar = 0;
4945
52.9k
    int ccol;
4946
4947
    /*
4948
     * Check that there is a comment right here.
4949
     */
4950
52.9k
    if ((RAW != '<') || (NXT(1) != '!'))
4951
0
        return;
4952
52.9k
    SKIP(2);
4953
52.9k
    if ((RAW != '-') || (NXT(1) != '-'))
4954
5
        return;
4955
52.9k
    SKIP(2);
4956
52.9k
    GROW;
4957
4958
    /*
4959
     * Accelerated common case where input don't need to be
4960
     * modified before passing it to the handler.
4961
     */
4962
52.9k
    in = ctxt->input->cur;
4963
53.4k
    do {
4964
53.4k
  if (*in == 0xA) {
4965
1.16k
      do {
4966
1.16k
    ctxt->input->line++; ctxt->input->col = 1;
4967
1.16k
    in++;
4968
1.16k
      } while (*in == 0xA);
4969
416
  }
4970
271k
get_more:
4971
271k
        ccol = ctxt->input->col;
4972
2.30M
  while (((*in > '-') && (*in <= 0x7F)) ||
4973
2.30M
         ((*in >= 0x20) && (*in < '-')) ||
4974
2.30M
         (*in == 0x09)) {
4975
2.02M
        in++;
4976
2.02M
        ccol++;
4977
2.02M
  }
4978
271k
  ctxt->input->col = ccol;
4979
271k
  if (*in == 0xA) {
4980
70.9k
      do {
4981
70.9k
    ctxt->input->line++; ctxt->input->col = 1;
4982
70.9k
    in++;
4983
70.9k
      } while (*in == 0xA);
4984
4.60k
      goto get_more;
4985
4.60k
  }
4986
267k
  nbchar = in - ctxt->input->cur;
4987
  /*
4988
   * save current set of data
4989
   */
4990
267k
  if (nbchar > 0) {
4991
223k
            if (nbchar > maxLength - len) {
4992
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4993
0
                                  "Comment too big found", NULL);
4994
0
                xmlFree(buf);
4995
0
                return;
4996
0
            }
4997
223k
            if (buf == NULL) {
4998
12.2k
                if ((*in == '-') && (in[1] == '-'))
4999
4.90k
                    size = nbchar + 1;
5000
7.31k
                else
5001
7.31k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5002
12.2k
                buf = xmlMalloc(size);
5003
12.2k
                if (buf == NULL) {
5004
0
                    xmlErrMemory(ctxt);
5005
0
                    return;
5006
0
                }
5007
12.2k
                len = 0;
5008
210k
            } else if (len + nbchar + 1 >= size) {
5009
1.80k
                xmlChar *new_buf;
5010
1.80k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5011
1.80k
                new_buf = xmlRealloc(buf, size);
5012
1.80k
                if (new_buf == NULL) {
5013
0
                    xmlErrMemory(ctxt);
5014
0
                    xmlFree(buf);
5015
0
                    return;
5016
0
                }
5017
1.80k
                buf = new_buf;
5018
1.80k
            }
5019
223k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5020
223k
            len += nbchar;
5021
223k
            buf[len] = 0;
5022
223k
  }
5023
267k
  ctxt->input->cur = in;
5024
267k
  if (*in == 0xA) {
5025
0
      in++;
5026
0
      ctxt->input->line++; ctxt->input->col = 1;
5027
0
  }
5028
267k
  if (*in == 0xD) {
5029
19.9k
      in++;
5030
19.9k
      if (*in == 0xA) {
5031
2.63k
    ctxt->input->cur = in;
5032
2.63k
    in++;
5033
2.63k
    ctxt->input->line++; ctxt->input->col = 1;
5034
2.63k
    goto get_more;
5035
2.63k
      }
5036
17.3k
      in--;
5037
17.3k
  }
5038
264k
  SHRINK;
5039
264k
  GROW;
5040
264k
  in = ctxt->input->cur;
5041
264k
  if (*in == '-') {
5042
223k
      if (in[1] == '-') {
5043
217k
          if (in[2] == '>') {
5044
12.6k
        SKIP(3);
5045
12.6k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5046
12.6k
            (!ctxt->disableSAX)) {
5047
1.00k
      if (buf != NULL)
5048
384
          ctxt->sax->comment(ctxt->userData, buf);
5049
617
      else
5050
617
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5051
1.00k
        }
5052
12.6k
        if (buf != NULL)
5053
3.84k
            xmlFree(buf);
5054
12.6k
        return;
5055
12.6k
    }
5056
204k
    if (buf != NULL) {
5057
204k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5058
204k
                          "Double hyphen within comment: "
5059
204k
                                      "<!--%.50s\n",
5060
204k
              buf);
5061
204k
    } else
5062
750
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5063
750
                          "Double hyphen within comment\n", NULL);
5064
204k
    in++;
5065
204k
    ctxt->input->col++;
5066
204k
      }
5067
211k
      in++;
5068
211k
      ctxt->input->col++;
5069
211k
      goto get_more;
5070
223k
  }
5071
264k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5072
40.2k
    xmlParseCommentComplex(ctxt, buf, len, size);
5073
40.2k
}
5074
5075
5076
/**
5077
 * Parse the name of a PI
5078
 *
5079
 * @deprecated Internal function, don't use.
5080
 *
5081
 *     [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5082
 *
5083
 * @param ctxt  an XML parser context
5084
 * @returns the PITarget name or NULL
5085
 */
5086
5087
const xmlChar *
5088
69.5k
xmlParsePITarget(xmlParserCtxt *ctxt) {
5089
69.5k
    const xmlChar *name;
5090
5091
69.5k
    name = xmlParseName(ctxt);
5092
69.5k
    if ((name != NULL) &&
5093
69.5k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5094
69.5k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5095
69.5k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5096
15.1k
  int i;
5097
15.1k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5098
15.1k
      (name[2] == 'l') && (name[3] == 0)) {
5099
3.39k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5100
3.39k
     "XML declaration allowed only at the start of the document\n");
5101
3.39k
      return(name);
5102
11.7k
  } else if (name[3] == 0) {
5103
565
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5104
565
      return(name);
5105
565
  }
5106
24.8k
  for (i = 0;;i++) {
5107
24.8k
      if (xmlW3CPIs[i] == NULL) break;
5108
22.2k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5109
8.51k
          return(name);
5110
22.2k
  }
5111
2.62k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5112
2.62k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5113
2.62k
          NULL, NULL);
5114
2.62k
    }
5115
57.0k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5116
1.71k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5117
1.71k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5118
1.71k
    }
5119
57.0k
    return(name);
5120
69.5k
}
5121
5122
#ifdef LIBXML_CATALOG_ENABLED
5123
/**
5124
 * Parse an XML Catalog Processing Instruction.
5125
 *
5126
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5127
 *
5128
 * Occurs only if allowed by the user and if happening in the Misc
5129
 * part of the document before any doctype information
5130
 * This will add the given catalog to the parsing context in order
5131
 * to be used if there is a resolution need further down in the document
5132
 *
5133
 * @param ctxt  an XML parser context
5134
 * @param catalog  the PI value string
5135
 */
5136
5137
static void
5138
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5139
0
    xmlChar *URL = NULL;
5140
0
    const xmlChar *tmp, *base;
5141
0
    xmlChar marker;
5142
5143
0
    tmp = catalog;
5144
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5145
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5146
0
  goto error;
5147
0
    tmp += 7;
5148
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5149
0
    if (*tmp != '=') {
5150
0
  return;
5151
0
    }
5152
0
    tmp++;
5153
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5154
0
    marker = *tmp;
5155
0
    if ((marker != '\'') && (marker != '"'))
5156
0
  goto error;
5157
0
    tmp++;
5158
0
    base = tmp;
5159
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5160
0
    if (*tmp == 0)
5161
0
  goto error;
5162
0
    URL = xmlStrndup(base, tmp - base);
5163
0
    tmp++;
5164
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5165
0
    if (*tmp != 0)
5166
0
  goto error;
5167
5168
0
    if (URL != NULL) {
5169
        /*
5170
         * Unfortunately, the catalog API doesn't report OOM errors.
5171
         * xmlGetLastError isn't very helpful since we don't know
5172
         * where the last error came from. We'd have to reset it
5173
         * before this call and restore it afterwards.
5174
         */
5175
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5176
0
  xmlFree(URL);
5177
0
    }
5178
0
    return;
5179
5180
0
error:
5181
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5182
0
            "Catalog PI syntax error: %s\n",
5183
0
      catalog, NULL);
5184
0
    if (URL != NULL)
5185
0
  xmlFree(URL);
5186
0
}
5187
#endif
5188
5189
/**
5190
 * Parse an XML Processing Instruction.
5191
 *
5192
 * @deprecated Internal function, don't use.
5193
 *
5194
 *     [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5195
 *
5196
 * The processing is transferred to SAX once parsed.
5197
 *
5198
 * @param ctxt  an XML parser context
5199
 */
5200
5201
void
5202
69.5k
xmlParsePI(xmlParserCtxt *ctxt) {
5203
69.5k
    xmlChar *buf = NULL;
5204
69.5k
    size_t len = 0;
5205
69.5k
    size_t size = XML_PARSER_BUFFER_SIZE;
5206
69.5k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5207
69.5k
                       XML_MAX_HUGE_LENGTH :
5208
69.5k
                       XML_MAX_TEXT_LENGTH;
5209
69.5k
    int cur, l;
5210
69.5k
    const xmlChar *target;
5211
5212
69.5k
    if ((RAW == '<') && (NXT(1) == '?')) {
5213
  /*
5214
   * this is a Processing Instruction.
5215
   */
5216
69.5k
  SKIP(2);
5217
5218
  /*
5219
   * Parse the target name and check for special support like
5220
   * namespace.
5221
   */
5222
69.5k
        target = xmlParsePITarget(ctxt);
5223
69.5k
  if (target != NULL) {
5224
63.5k
      if ((RAW == '?') && (NXT(1) == '>')) {
5225
7.94k
    SKIP(2);
5226
5227
    /*
5228
     * SAX: PI detected.
5229
     */
5230
7.94k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5231
7.94k
        (ctxt->sax->processingInstruction != NULL))
5232
1.70k
        ctxt->sax->processingInstruction(ctxt->userData,
5233
1.70k
                                         target, NULL);
5234
7.94k
    return;
5235
7.94k
      }
5236
55.6k
      buf = xmlMalloc(size);
5237
55.6k
      if (buf == NULL) {
5238
0
    xmlErrMemory(ctxt);
5239
0
    return;
5240
0
      }
5241
55.6k
      if (SKIP_BLANKS == 0) {
5242
31.0k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5243
31.0k
        "ParsePI: PI %s space expected\n", target);
5244
31.0k
      }
5245
55.6k
      cur = xmlCurrentCharRecover(ctxt, &l);
5246
3.87M
      while (IS_CHAR(cur) && /* checked */
5247
3.87M
       ((cur != '?') || (NXT(1) != '>'))) {
5248
3.81M
    if (len + 5 >= size) {
5249
8.92k
        xmlChar *tmp;
5250
8.92k
                    int newSize;
5251
5252
8.92k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5253
8.92k
                    if (newSize < 0) {
5254
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5255
0
                                          "PI %s too big found", target);
5256
0
                        xmlFree(buf);
5257
0
                        return;
5258
0
                    }
5259
8.92k
        tmp = xmlRealloc(buf, newSize);
5260
8.92k
        if (tmp == NULL) {
5261
0
      xmlErrMemory(ctxt);
5262
0
      xmlFree(buf);
5263
0
      return;
5264
0
        }
5265
8.92k
        buf = tmp;
5266
8.92k
                    size = newSize;
5267
8.92k
    }
5268
3.81M
    COPY_BUF(buf, len, cur);
5269
3.81M
    NEXTL(l);
5270
3.81M
    cur = xmlCurrentCharRecover(ctxt, &l);
5271
3.81M
      }
5272
55.6k
      buf[len] = 0;
5273
55.6k
      if (cur != '?') {
5274
22.6k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5275
22.6k
          "ParsePI: PI %s never end ...\n", target);
5276
32.9k
      } else {
5277
32.9k
    SKIP(2);
5278
5279
32.9k
#ifdef LIBXML_CATALOG_ENABLED
5280
32.9k
    if ((ctxt->inSubset == 0) &&
5281
32.9k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5282
15.4k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5283
5284
15.4k
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5285
15.4k
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5286
0
       (allow == XML_CATA_ALLOW_ALL)))
5287
0
      xmlParseCatalogPI(ctxt, buf);
5288
15.4k
    }
5289
32.9k
#endif
5290
5291
    /*
5292
     * SAX: PI detected.
5293
     */
5294
32.9k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5295
32.9k
        (ctxt->sax->processingInstruction != NULL))
5296
1.43k
        ctxt->sax->processingInstruction(ctxt->userData,
5297
1.43k
                                         target, buf);
5298
32.9k
      }
5299
55.6k
      xmlFree(buf);
5300
55.6k
  } else {
5301
5.97k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5302
5.97k
  }
5303
69.5k
    }
5304
69.5k
}
5305
5306
/**
5307
 * Parse a notation declaration. Always consumes '<!'.
5308
 *
5309
 * @deprecated Internal function, don't use.
5310
 *
5311
 *     [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID)
5312
 *                           S? '>'
5313
 *
5314
 * Hence there is actually 3 choices:
5315
 *
5316
 *     'PUBLIC' S PubidLiteral
5317
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5318
 *     'SYSTEM' S SystemLiteral
5319
 *
5320
 * See the NOTE on #xmlParseExternalID.
5321
 *
5322
 * @param ctxt  an XML parser context
5323
 */
5324
5325
void
5326
4.81k
xmlParseNotationDecl(xmlParserCtxt *ctxt) {
5327
4.81k
    const xmlChar *name;
5328
4.81k
    xmlChar *Pubid;
5329
4.81k
    xmlChar *Systemid;
5330
5331
4.81k
    if ((CUR != '<') || (NXT(1) != '!'))
5332
0
        return;
5333
4.81k
    SKIP(2);
5334
5335
4.81k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5336
4.78k
#ifdef LIBXML_VALID_ENABLED
5337
4.78k
  int oldInputNr = ctxt->inputNr;
5338
4.78k
#endif
5339
5340
4.78k
  SKIP(8);
5341
4.78k
  if (SKIP_BLANKS_PE == 0) {
5342
428
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5343
428
         "Space required after '<!NOTATION'\n");
5344
428
      return;
5345
428
  }
5346
5347
4.35k
        name = xmlParseName(ctxt);
5348
4.35k
  if (name == NULL) {
5349
484
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5350
484
      return;
5351
484
  }
5352
3.87k
  if (xmlStrchr(name, ':') != NULL) {
5353
318
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5354
318
         "colons are forbidden from notation names '%s'\n",
5355
318
         name, NULL, NULL);
5356
318
  }
5357
3.87k
  if (SKIP_BLANKS_PE == 0) {
5358
449
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5359
449
         "Space required after the NOTATION name'\n");
5360
449
      return;
5361
449
  }
5362
5363
  /*
5364
   * Parse the IDs.
5365
   */
5366
3.42k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5367
3.42k
  SKIP_BLANKS_PE;
5368
5369
3.42k
  if (RAW == '>') {
5370
986
#ifdef LIBXML_VALID_ENABLED
5371
986
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5372
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5373
0
                           "Notation declaration doesn't start and stop"
5374
0
                                 " in the same entity\n",
5375
0
                                 NULL, NULL);
5376
0
      }
5377
986
#endif
5378
986
      NEXT;
5379
986
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5380
986
    (ctxt->sax->notationDecl != NULL))
5381
472
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5382
2.43k
  } else {
5383
2.43k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5384
2.43k
  }
5385
3.42k
  if (Systemid != NULL) xmlFree(Systemid);
5386
3.42k
  if (Pubid != NULL) xmlFree(Pubid);
5387
3.42k
    }
5388
4.81k
}
5389
5390
/**
5391
 * Parse an entity declaration. Always consumes '<!'.
5392
 *
5393
 * @deprecated Internal function, don't use.
5394
 *
5395
 *     [70] EntityDecl ::= GEDecl | PEDecl
5396
 *
5397
 *     [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5398
 *
5399
 *     [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5400
 *
5401
 *     [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5402
 *
5403
 *     [74] PEDef ::= EntityValue | ExternalID
5404
 *
5405
 *     [76] NDataDecl ::= S 'NDATA' S Name
5406
 *
5407
 * [ VC: Notation Declared ]
5408
 * The Name must match the declared name of a notation.
5409
 *
5410
 * @param ctxt  an XML parser context
5411
 */
5412
5413
void
5414
30.2k
xmlParseEntityDecl(xmlParserCtxt *ctxt) {
5415
30.2k
    const xmlChar *name = NULL;
5416
30.2k
    xmlChar *value = NULL;
5417
30.2k
    xmlChar *URI = NULL, *literal = NULL;
5418
30.2k
    const xmlChar *ndata = NULL;
5419
30.2k
    int isParameter = 0;
5420
30.2k
    xmlChar *orig = NULL;
5421
5422
30.2k
    if ((CUR != '<') || (NXT(1) != '!'))
5423
0
        return;
5424
30.2k
    SKIP(2);
5425
5426
    /* GROW; done in the caller */
5427
30.2k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5428
30.2k
#ifdef LIBXML_VALID_ENABLED
5429
30.2k
  int oldInputNr = ctxt->inputNr;
5430
30.2k
#endif
5431
5432
30.2k
  SKIP(6);
5433
30.2k
  if (SKIP_BLANKS_PE == 0) {
5434
14.2k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5435
14.2k
         "Space required after '<!ENTITY'\n");
5436
14.2k
  }
5437
5438
30.2k
  if (RAW == '%') {
5439
5.86k
      NEXT;
5440
5.86k
      if (SKIP_BLANKS_PE == 0) {
5441
952
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5442
952
             "Space required after '%%'\n");
5443
952
      }
5444
5.86k
      isParameter = 1;
5445
5.86k
  }
5446
5447
30.2k
        name = xmlParseName(ctxt);
5448
30.2k
  if (name == NULL) {
5449
661
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5450
661
                     "xmlParseEntityDecl: no name\n");
5451
661
            return;
5452
661
  }
5453
29.5k
  if (xmlStrchr(name, ':') != NULL) {
5454
1.06k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5455
1.06k
         "colons are forbidden from entities names '%s'\n",
5456
1.06k
         name, NULL, NULL);
5457
1.06k
  }
5458
29.5k
  if (SKIP_BLANKS_PE == 0) {
5459
12.0k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5460
12.0k
         "Space required after the entity name\n");
5461
12.0k
  }
5462
5463
  /*
5464
   * handle the various case of definitions...
5465
   */
5466
29.5k
  if (isParameter) {
5467
5.82k
      if ((RAW == '"') || (RAW == '\'')) {
5468
4.74k
          value = xmlParseEntityValue(ctxt, &orig);
5469
4.74k
    if (value) {
5470
4.55k
        if ((ctxt->sax != NULL) &&
5471
4.55k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5472
2.03k
      ctxt->sax->entityDecl(ctxt->userData, name,
5473
2.03k
                        XML_INTERNAL_PARAMETER_ENTITY,
5474
2.03k
            NULL, NULL, value);
5475
4.55k
    }
5476
4.74k
      } else {
5477
1.07k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5478
1.07k
    if ((URI == NULL) && (literal == NULL)) {
5479
223
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5480
223
    }
5481
1.07k
    if (URI) {
5482
790
                    if (xmlStrchr(URI, '#')) {
5483
197
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5484
593
                    } else {
5485
593
                        if ((ctxt->sax != NULL) &&
5486
593
                            (!ctxt->disableSAX) &&
5487
593
                            (ctxt->sax->entityDecl != NULL))
5488
332
                            ctxt->sax->entityDecl(ctxt->userData, name,
5489
332
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5490
332
                                        literal, URI, NULL);
5491
593
                    }
5492
790
    }
5493
1.07k
      }
5494
23.7k
  } else {
5495
23.7k
      if ((RAW == '"') || (RAW == '\'')) {
5496
19.2k
          value = xmlParseEntityValue(ctxt, &orig);
5497
19.2k
    if ((ctxt->sax != NULL) &&
5498
19.2k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5499
1.49k
        ctxt->sax->entityDecl(ctxt->userData, name,
5500
1.49k
        XML_INTERNAL_GENERAL_ENTITY,
5501
1.49k
        NULL, NULL, value);
5502
    /*
5503
     * For expat compatibility in SAX mode.
5504
     */
5505
19.2k
    if ((ctxt->myDoc == NULL) ||
5506
19.2k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5507
10.0k
        if (ctxt->myDoc == NULL) {
5508
1.53k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5509
1.53k
      if (ctxt->myDoc == NULL) {
5510
0
          xmlErrMemory(ctxt);
5511
0
          goto done;
5512
0
      }
5513
1.53k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5514
1.53k
        }
5515
10.0k
        if (ctxt->myDoc->intSubset == NULL) {
5516
1.53k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5517
1.53k
              BAD_CAST "fake", NULL, NULL);
5518
1.53k
                        if (ctxt->myDoc->intSubset == NULL) {
5519
0
                            xmlErrMemory(ctxt);
5520
0
                            goto done;
5521
0
                        }
5522
1.53k
                    }
5523
5524
10.0k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5525
10.0k
                    NULL, NULL, value);
5526
10.0k
    }
5527
19.2k
      } else {
5528
4.47k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5529
4.47k
    if ((URI == NULL) && (literal == NULL)) {
5530
2.18k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5531
2.18k
    }
5532
4.47k
    if (URI) {
5533
1.48k
                    if (xmlStrchr(URI, '#')) {
5534
239
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5535
239
                    }
5536
1.48k
    }
5537
4.47k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5538
2.29k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5539
2.29k
           "Space required before 'NDATA'\n");
5540
2.29k
    }
5541
4.47k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5542
1.10k
        SKIP(5);
5543
1.10k
        if (SKIP_BLANKS_PE == 0) {
5544
314
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5545
314
               "Space required after 'NDATA'\n");
5546
314
        }
5547
1.10k
        ndata = xmlParseName(ctxt);
5548
1.10k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5549
1.10k
            (ctxt->sax->unparsedEntityDecl != NULL))
5550
388
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5551
388
            literal, URI, ndata);
5552
3.37k
    } else {
5553
3.37k
        if ((ctxt->sax != NULL) &&
5554
3.37k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5555
79
      ctxt->sax->entityDecl(ctxt->userData, name,
5556
79
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5557
79
            literal, URI, NULL);
5558
        /*
5559
         * For expat compatibility in SAX mode.
5560
         * assuming the entity replacement was asked for
5561
         */
5562
3.37k
        if ((ctxt->replaceEntities != 0) &&
5563
3.37k
      ((ctxt->myDoc == NULL) ||
5564
3.37k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5565
1.11k
      if (ctxt->myDoc == NULL) {
5566
95
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5567
95
          if (ctxt->myDoc == NULL) {
5568
0
              xmlErrMemory(ctxt);
5569
0
        goto done;
5570
0
          }
5571
95
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5572
95
      }
5573
5574
1.11k
      if (ctxt->myDoc->intSubset == NULL) {
5575
95
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5576
95
            BAD_CAST "fake", NULL, NULL);
5577
95
                            if (ctxt->myDoc->intSubset == NULL) {
5578
0
                                xmlErrMemory(ctxt);
5579
0
                                goto done;
5580
0
                            }
5581
95
                        }
5582
1.11k
      xmlSAX2EntityDecl(ctxt, name,
5583
1.11k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5584
1.11k
                  literal, URI, NULL);
5585
1.11k
        }
5586
3.37k
    }
5587
4.47k
      }
5588
23.7k
  }
5589
29.5k
  SKIP_BLANKS_PE;
5590
29.5k
  if (RAW != '>') {
5591
6.08k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5592
6.08k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5593
23.4k
  } else {
5594
23.4k
#ifdef LIBXML_VALID_ENABLED
5595
23.4k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5596
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5597
0
                           "Entity declaration doesn't start and stop in"
5598
0
                                 " the same entity\n",
5599
0
                                 NULL, NULL);
5600
0
      }
5601
23.4k
#endif
5602
23.4k
      NEXT;
5603
23.4k
  }
5604
29.5k
  if (orig != NULL) {
5605
      /*
5606
       * Ugly mechanism to save the raw entity value.
5607
       */
5608
23.5k
      xmlEntityPtr cur = NULL;
5609
5610
23.5k
      if (isParameter) {
5611
4.55k
          if ((ctxt->sax != NULL) &&
5612
4.55k
        (ctxt->sax->getParameterEntity != NULL))
5613
4.55k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5614
19.0k
      } else {
5615
19.0k
          if ((ctxt->sax != NULL) &&
5616
19.0k
        (ctxt->sax->getEntity != NULL))
5617
19.0k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5618
19.0k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5619
6.60k
        cur = xmlSAX2GetEntity(ctxt, name);
5620
6.60k
    }
5621
19.0k
      }
5622
23.5k
            if ((cur != NULL) && (cur->orig == NULL)) {
5623
5.51k
    cur->orig = orig;
5624
5.51k
                orig = NULL;
5625
5.51k
      }
5626
23.5k
  }
5627
5628
29.5k
done:
5629
29.5k
  if (value != NULL) xmlFree(value);
5630
29.5k
  if (URI != NULL) xmlFree(URI);
5631
29.5k
  if (literal != NULL) xmlFree(literal);
5632
29.5k
        if (orig != NULL) xmlFree(orig);
5633
29.5k
    }
5634
30.2k
}
5635
5636
/**
5637
 * Parse an attribute default declaration
5638
 *
5639
 * @deprecated Internal function, don't use.
5640
 *
5641
 *     [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5642
 *
5643
 * [ VC: Required Attribute ]
5644
 * if the default declaration is the keyword \#REQUIRED, then the
5645
 * attribute must be specified for all elements of the type in the
5646
 * attribute-list declaration.
5647
 *
5648
 * [ VC: Attribute Default Legal ]
5649
 * The declared default value must meet the lexical constraints of
5650
 * the declared attribute type c.f. #xmlValidateAttributeDecl
5651
 *
5652
 * [ VC: Fixed Attribute Default ]
5653
 * if an attribute has a default value declared with the \#FIXED
5654
 * keyword, instances of that attribute must match the default value.
5655
 *
5656
 * [ WFC: No < in Attribute Values ]
5657
 * handled in #xmlParseAttValue
5658
 *
5659
 * @param ctxt  an XML parser context
5660
 * @param value  Receive a possible fixed default value for the attribute
5661
 * @returns XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5662
 *          or XML_ATTRIBUTE_FIXED.
5663
 */
5664
5665
int
5666
25.6k
xmlParseDefaultDecl(xmlParserCtxt *ctxt, xmlChar **value) {
5667
25.6k
    int val;
5668
25.6k
    xmlChar *ret;
5669
5670
25.6k
    *value = NULL;
5671
25.6k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5672
1.37k
  SKIP(9);
5673
1.37k
  return(XML_ATTRIBUTE_REQUIRED);
5674
1.37k
    }
5675
24.2k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5676
428
  SKIP(8);
5677
428
  return(XML_ATTRIBUTE_IMPLIED);
5678
428
    }
5679
23.8k
    val = XML_ATTRIBUTE_NONE;
5680
23.8k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5681
416
  SKIP(6);
5682
416
  val = XML_ATTRIBUTE_FIXED;
5683
416
  if (SKIP_BLANKS_PE == 0) {
5684
222
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5685
222
         "Space required after '#FIXED'\n");
5686
222
  }
5687
416
    }
5688
23.8k
    ret = xmlParseAttValue(ctxt);
5689
23.8k
    if (ret == NULL) {
5690
3.92k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5691
3.92k
           "Attribute default value declaration error\n");
5692
3.92k
    } else
5693
19.8k
        *value = ret;
5694
23.8k
    return(val);
5695
24.2k
}
5696
5697
/**
5698
 * Parse an Notation attribute type.
5699
 *
5700
 * @deprecated Internal function, don't use.
5701
 *
5702
 * Note: the leading 'NOTATION' S part has already being parsed...
5703
 *
5704
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5705
 *
5706
 * [ VC: Notation Attributes ]
5707
 * Values of this type must match one of the notation names included
5708
 * in the declaration; all notation names in the declaration must be declared.
5709
 *
5710
 * @param ctxt  an XML parser context
5711
 * @returns the notation attribute tree built while parsing
5712
 */
5713
5714
xmlEnumeration *
5715
1.51k
xmlParseNotationType(xmlParserCtxt *ctxt) {
5716
1.51k
    const xmlChar *name;
5717
1.51k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5718
5719
1.51k
    if (RAW != '(') {
5720
286
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5721
286
  return(NULL);
5722
286
    }
5723
2.04k
    do {
5724
2.04k
        NEXT;
5725
2.04k
  SKIP_BLANKS_PE;
5726
2.04k
        name = xmlParseName(ctxt);
5727
2.04k
  if (name == NULL) {
5728
405
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5729
405
         "Name expected in NOTATION declaration\n");
5730
405
            xmlFreeEnumeration(ret);
5731
405
      return(NULL);
5732
405
  }
5733
1.64k
        tmp = NULL;
5734
1.64k
#ifdef LIBXML_VALID_ENABLED
5735
1.64k
        if (ctxt->validate) {
5736
0
            tmp = ret;
5737
0
            while (tmp != NULL) {
5738
0
                if (xmlStrEqual(name, tmp->name)) {
5739
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5740
0
              "standalone: attribute notation value token %s duplicated\n",
5741
0
                                     name, NULL);
5742
0
                    if (!xmlDictOwns(ctxt->dict, name))
5743
0
                        xmlFree((xmlChar *) name);
5744
0
                    break;
5745
0
                }
5746
0
                tmp = tmp->next;
5747
0
            }
5748
0
        }
5749
1.64k
#endif /* LIBXML_VALID_ENABLED */
5750
1.64k
  if (tmp == NULL) {
5751
1.64k
      cur = xmlCreateEnumeration(name);
5752
1.64k
      if (cur == NULL) {
5753
0
                xmlErrMemory(ctxt);
5754
0
                xmlFreeEnumeration(ret);
5755
0
                return(NULL);
5756
0
            }
5757
1.64k
      if (last == NULL) ret = last = cur;
5758
791
      else {
5759
791
    last->next = cur;
5760
791
    last = cur;
5761
791
      }
5762
1.64k
  }
5763
1.64k
  SKIP_BLANKS_PE;
5764
1.64k
    } while (RAW == '|');
5765
824
    if (RAW != ')') {
5766
97
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5767
97
        xmlFreeEnumeration(ret);
5768
97
  return(NULL);
5769
97
    }
5770
727
    NEXT;
5771
727
    return(ret);
5772
824
}
5773
5774
/**
5775
 * Parse an Enumeration attribute type.
5776
 *
5777
 * @deprecated Internal function, don't use.
5778
 *
5779
 *     [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5780
 *
5781
 * [ VC: Enumeration ]
5782
 * Values of this type must match one of the Nmtoken tokens in
5783
 * the declaration
5784
 *
5785
 * @param ctxt  an XML parser context
5786
 * @returns the enumeration attribute tree built while parsing
5787
 */
5788
5789
xmlEnumeration *
5790
4.21k
xmlParseEnumerationType(xmlParserCtxt *ctxt) {
5791
4.21k
    xmlChar *name;
5792
4.21k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5793
5794
4.21k
    if (RAW != '(') {
5795
636
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5796
636
  return(NULL);
5797
636
    }
5798
6.82k
    do {
5799
6.82k
        NEXT;
5800
6.82k
  SKIP_BLANKS_PE;
5801
6.82k
        name = xmlParseNmtoken(ctxt);
5802
6.82k
  if (name == NULL) {
5803
390
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5804
390
      return(ret);
5805
390
  }
5806
6.43k
        tmp = NULL;
5807
6.43k
#ifdef LIBXML_VALID_ENABLED
5808
6.43k
        if (ctxt->validate) {
5809
0
            tmp = ret;
5810
0
            while (tmp != NULL) {
5811
0
                if (xmlStrEqual(name, tmp->name)) {
5812
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5813
0
              "standalone: attribute enumeration value token %s duplicated\n",
5814
0
                                     name, NULL);
5815
0
                    if (!xmlDictOwns(ctxt->dict, name))
5816
0
                        xmlFree(name);
5817
0
                    break;
5818
0
                }
5819
0
                tmp = tmp->next;
5820
0
            }
5821
0
        }
5822
6.43k
#endif /* LIBXML_VALID_ENABLED */
5823
6.43k
  if (tmp == NULL) {
5824
6.43k
      cur = xmlCreateEnumeration(name);
5825
6.43k
      if (!xmlDictOwns(ctxt->dict, name))
5826
6.43k
    xmlFree(name);
5827
6.43k
      if (cur == NULL) {
5828
0
                xmlErrMemory(ctxt);
5829
0
                xmlFreeEnumeration(ret);
5830
0
                return(NULL);
5831
0
            }
5832
6.43k
      if (last == NULL) ret = last = cur;
5833
2.92k
      else {
5834
2.92k
    last->next = cur;
5835
2.92k
    last = cur;
5836
2.92k
      }
5837
6.43k
  }
5838
6.43k
  SKIP_BLANKS_PE;
5839
6.43k
    } while (RAW == '|');
5840
3.19k
    if (RAW != ')') {
5841
978
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5842
978
  return(ret);
5843
978
    }
5844
2.21k
    NEXT;
5845
2.21k
    return(ret);
5846
3.19k
}
5847
5848
/**
5849
 * Parse an Enumerated attribute type.
5850
 *
5851
 * @deprecated Internal function, don't use.
5852
 *
5853
 *     [57] EnumeratedType ::= NotationType | Enumeration
5854
 *
5855
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5856
 *
5857
 * @param ctxt  an XML parser context
5858
 * @param tree  the enumeration tree built while parsing
5859
 * @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5860
 */
5861
5862
int
5863
5.92k
xmlParseEnumeratedType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5864
5.92k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5865
1.71k
  SKIP(8);
5866
1.71k
  if (SKIP_BLANKS_PE == 0) {
5867
195
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5868
195
         "Space required after 'NOTATION'\n");
5869
195
      return(0);
5870
195
  }
5871
1.51k
  *tree = xmlParseNotationType(ctxt);
5872
1.51k
  if (*tree == NULL) return(0);
5873
727
  return(XML_ATTRIBUTE_NOTATION);
5874
1.51k
    }
5875
4.21k
    *tree = xmlParseEnumerationType(ctxt);
5876
4.21k
    if (*tree == NULL) return(0);
5877
3.50k
    return(XML_ATTRIBUTE_ENUMERATION);
5878
4.21k
}
5879
5880
/**
5881
 * Parse the Attribute list def for an element
5882
 *
5883
 * @deprecated Internal function, don't use.
5884
 *
5885
 *     [54] AttType ::= StringType | TokenizedType | EnumeratedType
5886
 *
5887
 *     [55] StringType ::= 'CDATA'
5888
 *
5889
 *     [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5890
 *                            'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5891
 *
5892
 * Validity constraints for attribute values syntax are checked in
5893
 * #xmlValidateAttributeValue
5894
 *
5895
 * [ VC: ID ]
5896
 * Values of type ID must match the Name production. A name must not
5897
 * appear more than once in an XML document as a value of this type;
5898
 * i.e., ID values must uniquely identify the elements which bear them.
5899
 *
5900
 * [ VC: One ID per Element Type ]
5901
 * No element type may have more than one ID attribute specified.
5902
 *
5903
 * [ VC: ID Attribute Default ]
5904
 * An ID attribute must have a declared default of \#IMPLIED or \#REQUIRED.
5905
 *
5906
 * [ VC: IDREF ]
5907
 * Values of type IDREF must match the Name production, and values
5908
 * of type IDREFS must match Names; each IDREF Name must match the value
5909
 * of an ID attribute on some element in the XML document; i.e. IDREF
5910
 * values must match the value of some ID attribute.
5911
 *
5912
 * [ VC: Entity Name ]
5913
 * Values of type ENTITY must match the Name production, values
5914
 * of type ENTITIES must match Names; each Entity Name must match the
5915
 * name of an unparsed entity declared in the DTD.
5916
 *
5917
 * [ VC: Name Token ]
5918
 * Values of type NMTOKEN must match the Nmtoken production; values
5919
 * of type NMTOKENS must match Nmtokens.
5920
 *
5921
 * @param ctxt  an XML parser context
5922
 * @param tree  the enumeration tree built while parsing
5923
 * @returns the attribute type
5924
 */
5925
int
5926
30.4k
xmlParseAttributeType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5927
30.4k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5928
1.61k
  SKIP(5);
5929
1.61k
  return(XML_ATTRIBUTE_CDATA);
5930
28.8k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5931
452
  SKIP(6);
5932
452
  return(XML_ATTRIBUTE_IDREFS);
5933
28.4k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5934
1.41k
  SKIP(5);
5935
1.41k
  return(XML_ATTRIBUTE_IDREF);
5936
27.0k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5937
16.7k
        SKIP(2);
5938
16.7k
  return(XML_ATTRIBUTE_ID);
5939
16.7k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5940
457
  SKIP(6);
5941
457
  return(XML_ATTRIBUTE_ENTITY);
5942
9.79k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5943
2.81k
  SKIP(8);
5944
2.81k
  return(XML_ATTRIBUTE_ENTITIES);
5945
6.98k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5946
520
  SKIP(8);
5947
520
  return(XML_ATTRIBUTE_NMTOKENS);
5948
6.46k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5949
533
  SKIP(7);
5950
533
  return(XML_ATTRIBUTE_NMTOKEN);
5951
533
     }
5952
5.92k
     return(xmlParseEnumeratedType(ctxt, tree));
5953
30.4k
}
5954
5955
/**
5956
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5957
 *
5958
 * @deprecated Internal function, don't use.
5959
 *
5960
 *     [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5961
 *
5962
 *     [53] AttDef ::= S Name S AttType S DefaultDecl
5963
 * @param ctxt  an XML parser context
5964
 */
5965
void
5966
16.2k
xmlParseAttributeListDecl(xmlParserCtxt *ctxt) {
5967
16.2k
    const xmlChar *elemName;
5968
16.2k
    const xmlChar *attrName;
5969
16.2k
    xmlEnumerationPtr tree;
5970
5971
16.2k
    if ((CUR != '<') || (NXT(1) != '!'))
5972
0
        return;
5973
16.2k
    SKIP(2);
5974
5975
16.2k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5976
16.2k
#ifdef LIBXML_VALID_ENABLED
5977
16.2k
  int oldInputNr = ctxt->inputNr;
5978
16.2k
#endif
5979
5980
16.2k
  SKIP(7);
5981
16.2k
  if (SKIP_BLANKS_PE == 0) {
5982
4.96k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5983
4.96k
                     "Space required after '<!ATTLIST'\n");
5984
4.96k
  }
5985
16.2k
        elemName = xmlParseName(ctxt);
5986
16.2k
  if (elemName == NULL) {
5987
648
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5988
648
         "ATTLIST: no name for Element\n");
5989
648
      return;
5990
648
  }
5991
15.5k
  SKIP_BLANKS_PE;
5992
15.5k
  GROW;
5993
37.0k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
5994
33.4k
      int type;
5995
33.4k
      int def;
5996
33.4k
      xmlChar *defaultValue = NULL;
5997
5998
33.4k
      GROW;
5999
33.4k
            tree = NULL;
6000
33.4k
      attrName = xmlParseName(ctxt);
6001
33.4k
      if (attrName == NULL) {
6002
2.04k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6003
2.04k
             "ATTLIST: no name for Attribute\n");
6004
2.04k
    break;
6005
2.04k
      }
6006
31.4k
      GROW;
6007
31.4k
      if (SKIP_BLANKS_PE == 0) {
6008
939
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6009
939
            "Space required after the attribute name\n");
6010
939
    break;
6011
939
      }
6012
6013
30.4k
      type = xmlParseAttributeType(ctxt, &tree);
6014
30.4k
      if (type <= 0) {
6015
1.69k
          break;
6016
1.69k
      }
6017
6018
28.8k
      GROW;
6019
28.8k
      if (SKIP_BLANKS_PE == 0) {
6020
3.19k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6021
3.19k
             "Space required after the attribute type\n");
6022
3.19k
          if (tree != NULL)
6023
1.29k
        xmlFreeEnumeration(tree);
6024
3.19k
    break;
6025
3.19k
      }
6026
6027
25.6k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6028
25.6k
      if (def <= 0) {
6029
0
                if (defaultValue != NULL)
6030
0
        xmlFree(defaultValue);
6031
0
          if (tree != NULL)
6032
0
        xmlFreeEnumeration(tree);
6033
0
          break;
6034
0
      }
6035
25.6k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6036
18.5k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6037
6038
25.6k
      GROW;
6039
25.6k
            if (RAW != '>') {
6040
22.1k
    if (SKIP_BLANKS_PE == 0) {
6041
4.16k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6042
4.16k
      "Space required after the attribute default value\n");
6043
4.16k
        if (defaultValue != NULL)
6044
334
      xmlFree(defaultValue);
6045
4.16k
        if (tree != NULL)
6046
859
      xmlFreeEnumeration(tree);
6047
4.16k
        break;
6048
4.16k
    }
6049
22.1k
      }
6050
21.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6051
21.4k
    (ctxt->sax->attributeDecl != NULL))
6052
11.9k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6053
11.9k
                          type, def, defaultValue, tree);
6054
9.53k
      else if (tree != NULL)
6055
1.51k
    xmlFreeEnumeration(tree);
6056
6057
21.4k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6058
21.4k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6059
21.4k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6060
19.5k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6061
19.5k
      }
6062
21.4k
      if (ctxt->sax2) {
6063
21.4k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6064
21.4k
      }
6065
21.4k
      if (defaultValue != NULL)
6066
19.5k
          xmlFree(defaultValue);
6067
21.4k
      GROW;
6068
21.4k
  }
6069
15.5k
  if (RAW == '>') {
6070
3.63k
#ifdef LIBXML_VALID_ENABLED
6071
3.63k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6072
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6073
0
                                 "Attribute list declaration doesn't start and"
6074
0
                                 " stop in the same entity\n",
6075
0
                                 NULL, NULL);
6076
0
      }
6077
3.63k
#endif
6078
3.63k
      NEXT;
6079
3.63k
  }
6080
15.5k
    }
6081
16.2k
}
6082
6083
/**
6084
 * Handle PEs and check that we don't pop the entity that started
6085
 * a balanced group.
6086
 *
6087
 * @param ctxt  parser context
6088
 * @param openInputNr  input nr of the entity with opening '('
6089
 */
6090
static void
6091
103k
xmlSkipBlankCharsPEBalanced(xmlParserCtxt *ctxt, int openInputNr) {
6092
103k
    SKIP_BLANKS;
6093
103k
    GROW;
6094
6095
103k
    (void) openInputNr;
6096
6097
103k
    if (!PARSER_EXTERNAL(ctxt) && !PARSER_IN_PE(ctxt))
6098
46.2k
        return;
6099
6100
63.7k
    while (!PARSER_STOPPED(ctxt)) {
6101
63.5k
        if (ctxt->input->cur >= ctxt->input->end) {
6102
2.40k
#ifdef LIBXML_VALID_ENABLED
6103
2.40k
            if ((ctxt->validate) && (ctxt->inputNr <= openInputNr)) {
6104
0
                xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6105
0
                                 "Element content declaration doesn't start "
6106
0
                                 "and stop in the same entity\n",
6107
0
                                 NULL, NULL);
6108
0
            }
6109
2.40k
#endif
6110
2.40k
            if (PARSER_IN_PE(ctxt))
6111
2.32k
                xmlPopPE(ctxt);
6112
89
            else
6113
89
                break;
6114
61.1k
        } else if (RAW == '%') {
6115
3.74k
            xmlParsePERefInternal(ctxt, 0);
6116
57.4k
        } else {
6117
57.4k
            break;
6118
57.4k
        }
6119
6120
6.06k
        SKIP_BLANKS;
6121
6.06k
        GROW;
6122
6.06k
    }
6123
57.7k
}
6124
6125
/**
6126
 * Parse the declaration for a Mixed Element content
6127
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6128
 *
6129
 * @deprecated Internal function, don't use.
6130
 *
6131
 *     [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6132
 *                    '(' S? '#PCDATA' S? ')'
6133
 *
6134
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6135
 *
6136
 * [ VC: No Duplicate Types ]
6137
 * The same name must not appear more than once in a single
6138
 * mixed-content declaration.
6139
 *
6140
 * @param ctxt  an XML parser context
6141
 * @param openInputNr  the input used for the current entity, needed for
6142
 * boundary checks
6143
 * @returns the list of the xmlElementContent describing the element choices
6144
 */
6145
xmlElementContent *
6146
2.41k
xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int openInputNr) {
6147
2.41k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6148
2.41k
    const xmlChar *elem = NULL;
6149
6150
2.41k
    GROW;
6151
2.41k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6152
2.41k
  SKIP(7);
6153
2.41k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6154
2.41k
  if (RAW == ')') {
6155
1.13k
#ifdef LIBXML_VALID_ENABLED
6156
1.13k
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6157
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6158
0
                                 "Element content declaration doesn't start "
6159
0
                                 "and stop in the same entity\n",
6160
0
                                 NULL, NULL);
6161
0
      }
6162
1.13k
#endif
6163
1.13k
      NEXT;
6164
1.13k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6165
1.13k
      if (ret == NULL)
6166
0
                goto mem_error;
6167
1.13k
      if (RAW == '*') {
6168
216
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6169
216
    NEXT;
6170
216
      }
6171
1.13k
      return(ret);
6172
1.13k
  }
6173
1.28k
  if ((RAW == '(') || (RAW == '|')) {
6174
623
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6175
623
      if (ret == NULL)
6176
0
                goto mem_error;
6177
623
  }
6178
2.08k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6179
1.00k
      NEXT;
6180
1.00k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6181
1.00k
            if (n == NULL)
6182
0
                goto mem_error;
6183
1.00k
      if (elem == NULL) {
6184
622
    n->c1 = cur;
6185
622
    if (cur != NULL)
6186
622
        cur->parent = n;
6187
622
    ret = cur = n;
6188
622
      } else {
6189
384
          cur->c2 = n;
6190
384
    n->parent = cur;
6191
384
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6192
384
                if (n->c1 == NULL)
6193
0
                    goto mem_error;
6194
384
    n->c1->parent = n;
6195
384
    cur = n;
6196
384
      }
6197
1.00k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6198
1.00k
      elem = xmlParseName(ctxt);
6199
1.00k
      if (elem == NULL) {
6200
201
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6201
201
      "xmlParseElementMixedContentDecl : Name expected\n");
6202
201
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6203
201
    return(NULL);
6204
201
      }
6205
805
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6206
805
  }
6207
1.08k
  if ((RAW == ')') && (NXT(1) == '*')) {
6208
396
      if (elem != NULL) {
6209
396
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6210
396
                                   XML_ELEMENT_CONTENT_ELEMENT);
6211
396
    if (cur->c2 == NULL)
6212
0
                    goto mem_error;
6213
396
    cur->c2->parent = cur;
6214
396
            }
6215
396
            if (ret != NULL)
6216
396
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6217
396
#ifdef LIBXML_VALID_ENABLED
6218
396
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6219
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6220
0
                                 "Element content declaration doesn't start "
6221
0
                                 "and stop in the same entity\n",
6222
0
                                 NULL, NULL);
6223
0
      }
6224
396
#endif
6225
396
      SKIP(2);
6226
685
  } else {
6227
685
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6228
685
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6229
685
      return(NULL);
6230
685
  }
6231
6232
1.08k
    } else {
6233
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6234
0
    }
6235
396
    return(ret);
6236
6237
0
mem_error:
6238
0
    xmlErrMemory(ctxt);
6239
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6240
0
    return(NULL);
6241
2.41k
}
6242
6243
/**
6244
 * Parse the declaration for a Mixed Element content
6245
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6246
 *
6247
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6248
 *
6249
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6250
 *
6251
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6252
 *
6253
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6254
 *
6255
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6256
 * TODO Parameter-entity replacement text must be properly nested
6257
 *  with parenthesized groups. That is to say, if either of the
6258
 *  opening or closing parentheses in a choice, seq, or Mixed
6259
 *  construct is contained in the replacement text for a parameter
6260
 *  entity, both must be contained in the same replacement text. For
6261
 *  interoperability, if a parameter-entity reference appears in a
6262
 *  choice, seq, or Mixed construct, its replacement text should not
6263
 *  be empty, and neither the first nor last non-blank character of
6264
 *  the replacement text should be a connector (| or ,).
6265
 *
6266
 * @param ctxt  an XML parser context
6267
 * @param openInputNr  the input used for the current entity, needed for
6268
 * boundary checks
6269
 * @param depth  the level of recursion
6270
 * @returns the tree of xmlElementContent describing the element
6271
 *          hierarchy.
6272
 */
6273
static xmlElementContentPtr
6274
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int openInputNr,
6275
49.4k
                                       int depth) {
6276
49.4k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6277
49.4k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6278
49.4k
    const xmlChar *elem;
6279
49.4k
    xmlChar type = 0;
6280
6281
49.4k
    if (depth > maxDepth) {
6282
1
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6283
1
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6284
1
                "use XML_PARSE_HUGE\n", depth);
6285
1
  return(NULL);
6286
1
    }
6287
49.4k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6288
49.4k
    if (RAW == '(') {
6289
35.9k
        int newInputNr = ctxt->inputNr;
6290
6291
        /* Recurse on first child */
6292
35.9k
  NEXT;
6293
35.9k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6294
35.9k
                                                           depth + 1);
6295
35.9k
        if (cur == NULL)
6296
32.4k
            return(NULL);
6297
35.9k
    } else {
6298
13.5k
  elem = xmlParseName(ctxt);
6299
13.5k
  if (elem == NULL) {
6300
1.11k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6301
1.11k
      return(NULL);
6302
1.11k
  }
6303
12.4k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6304
12.4k
  if (cur == NULL) {
6305
0
      xmlErrMemory(ctxt);
6306
0
      return(NULL);
6307
0
  }
6308
12.4k
  GROW;
6309
12.4k
  if (RAW == '?') {
6310
3.76k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6311
3.76k
      NEXT;
6312
8.66k
  } else if (RAW == '*') {
6313
1.00k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6314
1.00k
      NEXT;
6315
7.65k
  } else if (RAW == '+') {
6316
833
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6317
833
      NEXT;
6318
6.82k
  } else {
6319
6.82k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6320
6.82k
  }
6321
12.4k
  GROW;
6322
12.4k
    }
6323
28.0k
    while (!PARSER_STOPPED(ctxt)) {
6324
27.4k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6325
27.4k
        if (RAW == ')')
6326
9.50k
            break;
6327
        /*
6328
   * Each loop we parse one separator and one element.
6329
   */
6330
17.9k
        if (RAW == ',') {
6331
1.05k
      if (type == 0) type = CUR;
6332
6333
      /*
6334
       * Detect "Name | Name , Name" error
6335
       */
6336
266
      else if (type != CUR) {
6337
1
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6338
1
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6339
1
                      type);
6340
1
    if ((last != NULL) && (last != ret))
6341
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6342
1
    if (ret != NULL)
6343
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6344
1
    return(NULL);
6345
1
      }
6346
1.05k
      NEXT;
6347
6348
1.05k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6349
1.05k
      if (op == NULL) {
6350
0
                xmlErrMemory(ctxt);
6351
0
    if ((last != NULL) && (last != ret))
6352
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6353
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6354
0
    return(NULL);
6355
0
      }
6356
1.05k
      if (last == NULL) {
6357
785
    op->c1 = ret;
6358
785
    if (ret != NULL)
6359
785
        ret->parent = op;
6360
785
    ret = cur = op;
6361
785
      } else {
6362
265
          cur->c2 = op;
6363
265
    if (op != NULL)
6364
265
        op->parent = cur;
6365
265
    op->c1 = last;
6366
265
    if (last != NULL)
6367
265
        last->parent = op;
6368
265
    cur =op;
6369
265
    last = NULL;
6370
265
      }
6371
16.9k
  } else if (RAW == '|') {
6372
15.0k
      if (type == 0) type = CUR;
6373
6374
      /*
6375
       * Detect "Name , Name | Name" error
6376
       */
6377
6.00k
      else if (type != CUR) {
6378
2
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6379
2
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6380
2
          type);
6381
2
    if ((last != NULL) && (last != ret))
6382
2
        xmlFreeDocElementContent(ctxt->myDoc, last);
6383
2
    if (ret != NULL)
6384
2
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6385
2
    return(NULL);
6386
2
      }
6387
15.0k
      NEXT;
6388
6389
15.0k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6390
15.0k
      if (op == NULL) {
6391
0
                xmlErrMemory(ctxt);
6392
0
    if ((last != NULL) && (last != ret))
6393
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6394
0
    if (ret != NULL)
6395
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6396
0
    return(NULL);
6397
0
      }
6398
15.0k
      if (last == NULL) {
6399
9.07k
    op->c1 = ret;
6400
9.07k
    if (ret != NULL)
6401
9.07k
        ret->parent = op;
6402
9.07k
    ret = cur = op;
6403
9.07k
      } else {
6404
6.00k
          cur->c2 = op;
6405
6.00k
    if (op != NULL)
6406
6.00k
        op->parent = cur;
6407
6.00k
    op->c1 = last;
6408
6.00k
    if (last != NULL)
6409
6.00k
        last->parent = op;
6410
6.00k
    cur =op;
6411
6.00k
    last = NULL;
6412
6.00k
      }
6413
15.0k
  } else {
6414
1.86k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6415
1.86k
      if ((last != NULL) && (last != ret))
6416
939
          xmlFreeDocElementContent(ctxt->myDoc, last);
6417
1.86k
      if (ret != NULL)
6418
1.86k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
1.86k
      return(NULL);
6420
1.86k
  }
6421
16.1k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6422
16.1k
        if (RAW == '(') {
6423
9.29k
            int newInputNr = ctxt->inputNr;
6424
6425
      /* Recurse on second child */
6426
9.29k
      NEXT;
6427
9.29k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6428
9.29k
                                                          depth + 1);
6429
9.29k
            if (last == NULL) {
6430
3.29k
    if (ret != NULL)
6431
3.29k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
3.29k
    return(NULL);
6433
3.29k
            }
6434
9.29k
  } else {
6435
6.82k
      elem = xmlParseName(ctxt);
6436
6.82k
      if (elem == NULL) {
6437
654
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6438
654
    if (ret != NULL)
6439
654
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6440
654
    return(NULL);
6441
654
      }
6442
6.17k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6443
6.17k
      if (last == NULL) {
6444
0
                xmlErrMemory(ctxt);
6445
0
    if (ret != NULL)
6446
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6447
0
    return(NULL);
6448
0
      }
6449
6.17k
      if (RAW == '?') {
6450
1.22k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6451
1.22k
    NEXT;
6452
4.94k
      } else if (RAW == '*') {
6453
951
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6454
951
    NEXT;
6455
3.99k
      } else if (RAW == '+') {
6456
382
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6457
382
    NEXT;
6458
3.61k
      } else {
6459
3.61k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6460
3.61k
      }
6461
6.17k
  }
6462
16.1k
    }
6463
10.0k
    if ((cur != NULL) && (last != NULL)) {
6464
4.96k
        cur->c2 = last;
6465
4.96k
  if (last != NULL)
6466
4.96k
      last->parent = cur;
6467
4.96k
    }
6468
10.0k
#ifdef LIBXML_VALID_ENABLED
6469
10.0k
    if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6470
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6471
0
                         "Element content declaration doesn't start "
6472
0
                         "and stop in the same entity\n",
6473
0
                         NULL, NULL);
6474
0
    }
6475
10.0k
#endif
6476
10.0k
    NEXT;
6477
10.0k
    if (RAW == '?') {
6478
1.78k
  if (ret != NULL) {
6479
1.78k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6480
1.78k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6481
716
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6482
1.07k
      else
6483
1.07k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6484
1.78k
  }
6485
1.78k
  NEXT;
6486
8.25k
    } else if (RAW == '*') {
6487
1.25k
  if (ret != NULL) {
6488
1.25k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6489
1.25k
      cur = ret;
6490
      /*
6491
       * Some normalization:
6492
       * (a | b* | c?)* == (a | b | c)*
6493
       */
6494
3.94k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6495
2.68k
    if ((cur->c1 != NULL) &&
6496
2.68k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6497
2.68k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6498
1.51k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6499
2.68k
    if ((cur->c2 != NULL) &&
6500
2.68k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6501
2.68k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6502
1.10k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6503
2.68k
    cur = cur->c2;
6504
2.68k
      }
6505
1.25k
  }
6506
1.25k
  NEXT;
6507
6.99k
    } else if (RAW == '+') {
6508
3.09k
  if (ret != NULL) {
6509
3.09k
      int found = 0;
6510
6511
3.09k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6512
3.09k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6513
1.65k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6514
1.44k
      else
6515
1.44k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6516
      /*
6517
       * Some normalization:
6518
       * (a | b*)+ == (a | b)*
6519
       * (a | b?)+ == (a | b)*
6520
       */
6521
7.24k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6522
4.14k
    if ((cur->c1 != NULL) &&
6523
4.14k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6524
4.14k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6525
1.50k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6526
1.50k
        found = 1;
6527
1.50k
    }
6528
4.14k
    if ((cur->c2 != NULL) &&
6529
4.14k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6530
4.14k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6531
979
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6532
979
        found = 1;
6533
979
    }
6534
4.14k
    cur = cur->c2;
6535
4.14k
      }
6536
3.09k
      if (found)
6537
1.19k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6538
3.09k
  }
6539
3.09k
  NEXT;
6540
3.09k
    }
6541
10.0k
    return(ret);
6542
15.8k
}
6543
6544
/**
6545
 * Parse the declaration for a Mixed Element content
6546
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6547
 *
6548
 * @deprecated Internal function, don't use.
6549
 *
6550
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6551
 *
6552
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6553
 *
6554
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6555
 *
6556
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6557
 *
6558
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6559
 * TODO Parameter-entity replacement text must be properly nested
6560
 *  with parenthesized groups. That is to say, if either of the
6561
 *  opening or closing parentheses in a choice, seq, or Mixed
6562
 *  construct is contained in the replacement text for a parameter
6563
 *  entity, both must be contained in the same replacement text. For
6564
 *  interoperability, if a parameter-entity reference appears in a
6565
 *  choice, seq, or Mixed construct, its replacement text should not
6566
 *  be empty, and neither the first nor last non-blank character of
6567
 *  the replacement text should be a connector (| or ,).
6568
 *
6569
 * @param ctxt  an XML parser context
6570
 * @param inputchk  the input used for the current entity, needed for boundary checks
6571
 * @returns the tree of xmlElementContent describing the element
6572
 *          hierarchy.
6573
 */
6574
xmlElementContent *
6575
0
xmlParseElementChildrenContentDecl(xmlParserCtxt *ctxt, int inputchk) {
6576
    /* stub left for API/ABI compat */
6577
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6578
0
}
6579
6580
/**
6581
 * Parse the declaration for an Element content either Mixed or Children,
6582
 * the cases EMPTY and ANY are handled directly in #xmlParseElementDecl
6583
 *
6584
 * @deprecated Internal function, don't use.
6585
 *
6586
 *     [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6587
 *
6588
 * @param ctxt  an XML parser context
6589
 * @param name  the name of the element being defined.
6590
 * @param result  the Element Content pointer will be stored here if any
6591
 * @returns an xmlElementTypeVal value or -1 on error
6592
 */
6593
6594
int
6595
xmlParseElementContentDecl(xmlParserCtxt *ctxt, const xmlChar *name,
6596
6.65k
                           xmlElementContent **result) {
6597
6598
6.65k
    xmlElementContentPtr tree = NULL;
6599
6.65k
    int openInputNr = ctxt->inputNr;
6600
6.65k
    int res;
6601
6602
6.65k
    *result = NULL;
6603
6604
6.65k
    if (RAW != '(') {
6605
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6606
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6607
0
  return(-1);
6608
0
    }
6609
6.65k
    NEXT;
6610
6.65k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6611
6.65k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6612
2.41k
        tree = xmlParseElementMixedContentDecl(ctxt, openInputNr);
6613
2.41k
  res = XML_ELEMENT_TYPE_MIXED;
6614
4.23k
    } else {
6615
4.23k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, openInputNr, 1);
6616
4.23k
  res = XML_ELEMENT_TYPE_ELEMENT;
6617
4.23k
    }
6618
6.65k
    if (tree == NULL)
6619
4.51k
        return(-1);
6620
2.13k
    SKIP_BLANKS_PE;
6621
2.13k
    *result = tree;
6622
2.13k
    return(res);
6623
6.65k
}
6624
6625
/**
6626
 * Parse an element declaration. Always consumes '<!'.
6627
 *
6628
 * @deprecated Internal function, don't use.
6629
 *
6630
 *     [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6631
 *
6632
 * [ VC: Unique Element Type Declaration ]
6633
 * No element type may be declared more than once
6634
 *
6635
 * @param ctxt  an XML parser context
6636
 * @returns the type of the element, or -1 in case of error
6637
 */
6638
int
6639
10.1k
xmlParseElementDecl(xmlParserCtxt *ctxt) {
6640
10.1k
    const xmlChar *name;
6641
10.1k
    int ret = -1;
6642
10.1k
    xmlElementContentPtr content  = NULL;
6643
6644
10.1k
    if ((CUR != '<') || (NXT(1) != '!'))
6645
0
        return(ret);
6646
10.1k
    SKIP(2);
6647
6648
    /* GROW; done in the caller */
6649
10.1k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6650
10.1k
#ifdef LIBXML_VALID_ENABLED
6651
10.1k
  int oldInputNr = ctxt->inputNr;
6652
10.1k
#endif
6653
6654
10.1k
  SKIP(7);
6655
10.1k
  if (SKIP_BLANKS_PE == 0) {
6656
1.01k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6657
1.01k
               "Space required after 'ELEMENT'\n");
6658
1.01k
      return(-1);
6659
1.01k
  }
6660
9.15k
        name = xmlParseName(ctxt);
6661
9.15k
  if (name == NULL) {
6662
353
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6663
353
         "xmlParseElementDecl: no name for Element\n");
6664
353
      return(-1);
6665
353
  }
6666
8.80k
  if (SKIP_BLANKS_PE == 0) {
6667
6.03k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6668
6.03k
         "Space required after the element name\n");
6669
6.03k
  }
6670
8.80k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6671
990
      SKIP(5);
6672
      /*
6673
       * Element must always be empty.
6674
       */
6675
990
      ret = XML_ELEMENT_TYPE_EMPTY;
6676
7.81k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6677
7.81k
             (NXT(2) == 'Y')) {
6678
527
      SKIP(3);
6679
      /*
6680
       * Element is a generic container.
6681
       */
6682
527
      ret = XML_ELEMENT_TYPE_ANY;
6683
7.28k
  } else if (RAW == '(') {
6684
6.65k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6685
6.65k
            if (ret <= 0)
6686
4.51k
                return(-1);
6687
6.65k
  } else {
6688
      /*
6689
       * [ WFC: PEs in Internal Subset ] error handling.
6690
       */
6691
630
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6692
630
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6693
630
      return(-1);
6694
630
  }
6695
6696
3.65k
  SKIP_BLANKS_PE;
6697
6698
3.65k
  if (RAW != '>') {
6699
1.33k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6700
1.33k
      if (content != NULL) {
6701
733
    xmlFreeDocElementContent(ctxt->myDoc, content);
6702
733
      }
6703
2.32k
  } else {
6704
2.32k
#ifdef LIBXML_VALID_ENABLED
6705
2.32k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6706
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6707
0
                                 "Element declaration doesn't start and stop in"
6708
0
                                 " the same entity\n",
6709
0
                                 NULL, NULL);
6710
0
      }
6711
2.32k
#endif
6712
6713
2.32k
      NEXT;
6714
2.32k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6715
2.32k
    (ctxt->sax->elementDecl != NULL)) {
6716
1.87k
    if (content != NULL)
6717
1.20k
        content->parent = NULL;
6718
1.87k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6719
1.87k
                           content);
6720
1.87k
    if ((content != NULL) && (content->parent == NULL)) {
6721
        /*
6722
         * this is a trick: if xmlAddElementDecl is called,
6723
         * instead of copying the full tree it is plugged directly
6724
         * if called from the parser. Avoid duplicating the
6725
         * interfaces or change the API/ABI
6726
         */
6727
1.04k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6728
1.04k
    }
6729
1.87k
      } else if (content != NULL) {
6730
198
    xmlFreeDocElementContent(ctxt->myDoc, content);
6731
198
      }
6732
2.32k
  }
6733
3.65k
    }
6734
3.67k
    return(ret);
6735
10.1k
}
6736
6737
/**
6738
 * Parse a conditional section. Always consumes '<!['.
6739
 *
6740
 *     [61] conditionalSect ::= includeSect | ignoreSect
6741
 *     [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6742
 *     [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6743
 *     [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>'
6744
 *                                 Ignore)*
6745
 *     [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6746
 * @param ctxt  an XML parser context
6747
 */
6748
6749
static void
6750
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6751
0
    size_t depth = 0;
6752
0
    int isFreshPE = 0;
6753
0
    int oldInputNr = ctxt->inputNr;
6754
0
    int declInputNr = ctxt->inputNr;
6755
6756
0
    while (!PARSER_STOPPED(ctxt)) {
6757
0
        if (ctxt->input->cur >= ctxt->input->end) {
6758
0
            if (ctxt->inputNr <= oldInputNr) {
6759
0
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6760
0
                return;
6761
0
            }
6762
6763
0
            xmlPopPE(ctxt);
6764
0
            declInputNr = ctxt->inputNr;
6765
0
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6766
0
            SKIP(3);
6767
0
            SKIP_BLANKS_PE;
6768
6769
0
            isFreshPE = 0;
6770
6771
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6772
0
                SKIP(7);
6773
0
                SKIP_BLANKS_PE;
6774
0
                if (RAW != '[') {
6775
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6776
0
                    return;
6777
0
                }
6778
0
#ifdef LIBXML_VALID_ENABLED
6779
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6780
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6781
0
                                     "All markup of the conditional section is"
6782
0
                                     " not in the same entity\n",
6783
0
                                     NULL, NULL);
6784
0
                }
6785
0
#endif
6786
0
                NEXT;
6787
6788
0
                depth++;
6789
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6790
0
                size_t ignoreDepth = 0;
6791
6792
0
                SKIP(6);
6793
0
                SKIP_BLANKS_PE;
6794
0
                if (RAW != '[') {
6795
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6796
0
                    return;
6797
0
                }
6798
0
#ifdef LIBXML_VALID_ENABLED
6799
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6800
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6801
0
                                     "All markup of the conditional section is"
6802
0
                                     " not in the same entity\n",
6803
0
                                     NULL, NULL);
6804
0
                }
6805
0
#endif
6806
0
                NEXT;
6807
6808
0
                while (PARSER_STOPPED(ctxt) == 0) {
6809
0
                    if (RAW == 0) {
6810
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6811
0
                        return;
6812
0
                    }
6813
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6814
0
                        SKIP(3);
6815
0
                        ignoreDepth++;
6816
                        /* Check for integer overflow */
6817
0
                        if (ignoreDepth == 0) {
6818
0
                            xmlErrMemory(ctxt);
6819
0
                            return;
6820
0
                        }
6821
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6822
0
                               (NXT(2) == '>')) {
6823
0
                        SKIP(3);
6824
0
                        if (ignoreDepth == 0)
6825
0
                            break;
6826
0
                        ignoreDepth--;
6827
0
                    } else {
6828
0
                        NEXT;
6829
0
                    }
6830
0
                }
6831
6832
0
#ifdef LIBXML_VALID_ENABLED
6833
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6834
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6835
0
                                     "All markup of the conditional section is"
6836
0
                                     " not in the same entity\n",
6837
0
                                     NULL, NULL);
6838
0
                }
6839
0
#endif
6840
0
            } else {
6841
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6842
0
                return;
6843
0
            }
6844
0
        } else if ((depth > 0) &&
6845
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6846
0
            if (isFreshPE) {
6847
0
                xmlFatalErrMsg(ctxt, XML_ERR_CONDSEC_INVALID,
6848
0
                               "Parameter entity must match "
6849
0
                               "extSubsetDecl\n");
6850
0
                return;
6851
0
            }
6852
6853
0
            depth--;
6854
0
#ifdef LIBXML_VALID_ENABLED
6855
0
            if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6856
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
0
                                 "All markup of the conditional section is not"
6858
0
                                 " in the same entity\n",
6859
0
                                 NULL, NULL);
6860
0
            }
6861
0
#endif
6862
0
            SKIP(3);
6863
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6864
0
            isFreshPE = 0;
6865
0
            xmlParseMarkupDecl(ctxt);
6866
0
        } else if (RAW == '%') {
6867
0
            xmlParsePERefInternal(ctxt, 1);
6868
0
            if (ctxt->inputNr > declInputNr) {
6869
0
                isFreshPE = 1;
6870
0
                declInputNr = ctxt->inputNr;
6871
0
            }
6872
0
        } else {
6873
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6874
0
            return;
6875
0
        }
6876
6877
0
        if (depth == 0)
6878
0
            break;
6879
6880
0
        SKIP_BLANKS;
6881
0
        SHRINK;
6882
0
        GROW;
6883
0
    }
6884
0
}
6885
6886
/**
6887
 * Parse markup declarations. Always consumes '<!' or '<?'.
6888
 *
6889
 * @deprecated Internal function, don't use.
6890
 *
6891
 *     [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6892
 *                         NotationDecl | PI | Comment
6893
 *
6894
 * [ VC: Proper Declaration/PE Nesting ]
6895
 * Parameter-entity replacement text must be properly nested with
6896
 * markup declarations. That is to say, if either the first character
6897
 * or the last character of a markup declaration (markupdecl above) is
6898
 * contained in the replacement text for a parameter-entity reference,
6899
 * both must be contained in the same replacement text.
6900
 *
6901
 * [ WFC: PEs in Internal Subset ]
6902
 * In the internal DTD subset, parameter-entity references can occur
6903
 * only where markup declarations can occur, not within markup declarations.
6904
 * (This does not apply to references that occur in external parameter
6905
 * entities or to the external subset.)
6906
 *
6907
 * @param ctxt  an XML parser context
6908
 */
6909
void
6910
104k
xmlParseMarkupDecl(xmlParserCtxt *ctxt) {
6911
104k
    GROW;
6912
104k
    if (CUR == '<') {
6913
104k
        if (NXT(1) == '!') {
6914
79.8k
      switch (NXT(2)) {
6915
40.4k
          case 'E':
6916
40.4k
        if (NXT(3) == 'L')
6917
10.1k
      xmlParseElementDecl(ctxt);
6918
30.2k
        else if (NXT(3) == 'N')
6919
30.2k
      xmlParseEntityDecl(ctxt);
6920
18
                    else
6921
18
                        SKIP(2);
6922
40.4k
        break;
6923
16.2k
          case 'A':
6924
16.2k
        xmlParseAttributeListDecl(ctxt);
6925
16.2k
        break;
6926
4.81k
          case 'N':
6927
4.81k
        xmlParseNotationDecl(ctxt);
6928
4.81k
        break;
6929
13.4k
          case '-':
6930
13.4k
        xmlParseComment(ctxt);
6931
13.4k
        break;
6932
4.74k
    default:
6933
4.74k
                    xmlFatalErr(ctxt,
6934
4.74k
                                ctxt->inSubset == 2 ?
6935
0
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
6936
4.74k
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
6937
4.74k
                                NULL);
6938
4.74k
                    SKIP(2);
6939
4.74k
        break;
6940
79.8k
      }
6941
79.8k
  } else if (NXT(1) == '?') {
6942
25.0k
      xmlParsePI(ctxt);
6943
25.0k
  }
6944
104k
    }
6945
104k
}
6946
6947
/**
6948
 * Parse an XML declaration header for external entities
6949
 *
6950
 * @deprecated Internal function, don't use.
6951
 *
6952
 *     [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6953
 * @param ctxt  an XML parser context
6954
 */
6955
6956
void
6957
0
xmlParseTextDecl(xmlParserCtxt *ctxt) {
6958
0
    xmlChar *version;
6959
6960
    /*
6961
     * We know that '<?xml' is here.
6962
     */
6963
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6964
0
  SKIP(5);
6965
0
    } else {
6966
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6967
0
  return;
6968
0
    }
6969
6970
0
    if (SKIP_BLANKS == 0) {
6971
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6972
0
           "Space needed after '<?xml'\n");
6973
0
    }
6974
6975
    /*
6976
     * We may have the VersionInfo here.
6977
     */
6978
0
    version = xmlParseVersionInfo(ctxt);
6979
0
    if (version == NULL) {
6980
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6981
0
        if (version == NULL) {
6982
0
            xmlErrMemory(ctxt);
6983
0
            return;
6984
0
        }
6985
0
    } else {
6986
0
  if (SKIP_BLANKS == 0) {
6987
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6988
0
               "Space needed here\n");
6989
0
  }
6990
0
    }
6991
0
    ctxt->input->version = version;
6992
6993
    /*
6994
     * We must have the encoding declaration
6995
     */
6996
0
    xmlParseEncodingDecl(ctxt);
6997
6998
0
    SKIP_BLANKS;
6999
0
    if ((RAW == '?') && (NXT(1) == '>')) {
7000
0
        SKIP(2);
7001
0
    } else if (RAW == '>') {
7002
        /* Deprecated old WD ... */
7003
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7004
0
  NEXT;
7005
0
    } else {
7006
0
        int c;
7007
7008
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7009
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7010
0
            NEXT;
7011
0
            if (c == '>')
7012
0
                break;
7013
0
        }
7014
0
    }
7015
0
}
7016
7017
/**
7018
 * Parse Markup declarations from an external subset
7019
 *
7020
 * @deprecated Internal function, don't use.
7021
 *
7022
 *     [30] extSubset ::= textDecl? extSubsetDecl
7023
 *
7024
 *     [31] extSubsetDecl ::= (markupdecl | conditionalSect |
7025
 *                             PEReference | S) *
7026
 * @param ctxt  an XML parser context
7027
 * @param publicId  the public identifier
7028
 * @param systemId  the system identifier (URL)
7029
 */
7030
void
7031
xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
7032
0
                       const xmlChar *systemId) {
7033
0
    int oldInputNr;
7034
7035
0
    xmlCtxtInitializeLate(ctxt);
7036
7037
0
    xmlDetectEncoding(ctxt);
7038
7039
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7040
0
  xmlParseTextDecl(ctxt);
7041
0
    }
7042
0
    if (ctxt->myDoc == NULL) {
7043
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7044
0
  if (ctxt->myDoc == NULL) {
7045
0
      xmlErrMemory(ctxt);
7046
0
      return;
7047
0
  }
7048
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7049
0
    }
7050
0
    if ((ctxt->myDoc->intSubset == NULL) &&
7051
0
        (xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) {
7052
0
        xmlErrMemory(ctxt);
7053
0
    }
7054
7055
0
    ctxt->inSubset = 2;
7056
0
    oldInputNr = ctxt->inputNr;
7057
7058
0
    SKIP_BLANKS;
7059
0
    while (!PARSER_STOPPED(ctxt)) {
7060
0
        if (ctxt->input->cur >= ctxt->input->end) {
7061
0
            if (ctxt->inputNr <= oldInputNr) {
7062
0
                xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7063
0
                break;
7064
0
            }
7065
7066
0
            xmlPopPE(ctxt);
7067
0
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7068
0
            xmlParseConditionalSections(ctxt);
7069
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7070
0
            xmlParseMarkupDecl(ctxt);
7071
0
        } else if (RAW == '%') {
7072
0
            xmlParsePERefInternal(ctxt, 1);
7073
0
        } else {
7074
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7075
7076
0
            while (ctxt->inputNr > oldInputNr)
7077
0
                xmlPopPE(ctxt);
7078
0
            break;
7079
0
        }
7080
0
        SKIP_BLANKS;
7081
0
        SHRINK;
7082
0
        GROW;
7083
0
    }
7084
0
}
7085
7086
/**
7087
 * Parse and handle entity references in content, depending on the SAX
7088
 * interface, this may end-up in a call to character() if this is a
7089
 * CharRef, a predefined entity, if there is no reference() callback.
7090
 * or if the parser was asked to switch to that mode.
7091
 *
7092
 * @deprecated Internal function, don't use.
7093
 *
7094
 * Always consumes '&'.
7095
 *
7096
 *     [67] Reference ::= EntityRef | CharRef
7097
 * @param ctxt  an XML parser context
7098
 */
7099
void
7100
117k
xmlParseReference(xmlParserCtxt *ctxt) {
7101
117k
    xmlEntityPtr ent = NULL;
7102
117k
    const xmlChar *name;
7103
117k
    xmlChar *val;
7104
7105
117k
    if (RAW != '&')
7106
0
        return;
7107
7108
    /*
7109
     * Simple case of a CharRef
7110
     */
7111
117k
    if (NXT(1) == '#') {
7112
34.6k
  int i = 0;
7113
34.6k
  xmlChar out[16];
7114
34.6k
  int value = xmlParseCharRef(ctxt);
7115
7116
34.6k
  if (value == 0)
7117
7.03k
      return;
7118
7119
        /*
7120
         * Just encode the value in UTF-8
7121
         */
7122
27.6k
        COPY_BUF(out, i, value);
7123
27.6k
        out[i] = 0;
7124
27.6k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7125
27.6k
            (!ctxt->disableSAX))
7126
609
            ctxt->sax->characters(ctxt->userData, out, i);
7127
27.6k
  return;
7128
34.6k
    }
7129
7130
    /*
7131
     * We are seeing an entity reference
7132
     */
7133
82.3k
    name = xmlParseEntityRefInternal(ctxt);
7134
82.3k
    if (name == NULL)
7135
36.4k
        return;
7136
45.8k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7137
45.8k
    if (ent == NULL) {
7138
        /*
7139
         * Create a reference for undeclared entities.
7140
         */
7141
32.7k
        if ((ctxt->replaceEntities == 0) &&
7142
32.7k
            (ctxt->sax != NULL) &&
7143
32.7k
            (ctxt->disableSAX == 0) &&
7144
32.7k
            (ctxt->sax->reference != NULL)) {
7145
0
            ctxt->sax->reference(ctxt->userData, name);
7146
0
        }
7147
32.7k
        return;
7148
32.7k
    }
7149
13.1k
    if (!ctxt->wellFormed)
7150
6.11k
  return;
7151
7152
    /* special case of predefined entities */
7153
7.01k
    if ((ent->name == NULL) ||
7154
7.01k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7155
473
  val = ent->content;
7156
473
  if (val == NULL) return;
7157
  /*
7158
   * inline the entity.
7159
   */
7160
473
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7161
473
      (!ctxt->disableSAX))
7162
473
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7163
473
  return;
7164
473
    }
7165
7166
    /*
7167
     * Some users try to parse entities on their own and used to set
7168
     * the renamed "checked" member. Fix the flags to cover this
7169
     * case.
7170
     */
7171
6.53k
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7172
0
        ent->flags |= XML_ENT_PARSED;
7173
7174
    /*
7175
     * The first reference to the entity trigger a parsing phase
7176
     * where the ent->children is filled with the result from
7177
     * the parsing.
7178
     * Note: external parsed entities will not be loaded, it is not
7179
     * required for a non-validating parser, unless the parsing option
7180
     * of validating, or substituting entities were given. Doing so is
7181
     * far more secure as the parser will only process data coming from
7182
     * the document entity by default.
7183
     *
7184
     * FIXME: This doesn't work correctly since entities can be
7185
     * expanded with different namespace declarations in scope.
7186
     * For example:
7187
     *
7188
     * <!DOCTYPE doc [
7189
     *   <!ENTITY ent "<ns:elem/>">
7190
     * ]>
7191
     * <doc>
7192
     *   <decl1 xmlns:ns="urn:ns1">
7193
     *     &ent;
7194
     *   </decl1>
7195
     *   <decl2 xmlns:ns="urn:ns2">
7196
     *     &ent;
7197
     *   </decl2>
7198
     * </doc>
7199
     *
7200
     * Proposed fix:
7201
     *
7202
     * - Ignore current namespace declarations when parsing the
7203
     *   entity. If a prefix can't be resolved, don't report an error
7204
     *   but mark it as unresolved.
7205
     * - Try to resolve these prefixes when expanding the entity.
7206
     *   This will require a specialized version of xmlStaticCopyNode
7207
     *   which can also make use of the namespace hash table to avoid
7208
     *   quadratic behavior.
7209
     *
7210
     * Alternatively, we could simply reparse the entity on each
7211
     * expansion like we already do with custom SAX callbacks.
7212
     * External entity content should be cached in this case.
7213
     */
7214
6.53k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7215
6.53k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7216
100
         ((ctxt->replaceEntities) ||
7217
6.43k
          (ctxt->validate)))) {
7218
6.43k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7219
1.01k
            xmlCtxtParseEntity(ctxt, ent);
7220
5.42k
        } else if (ent->children == NULL) {
7221
            /*
7222
             * Probably running in SAX mode and the callbacks don't
7223
             * build the entity content. Parse the entity again.
7224
             *
7225
             * This will also be triggered in normal tree builder mode
7226
             * if an entity happens to be empty, causing unnecessary
7227
             * reloads. It's hard to come up with a reliable check in
7228
             * which mode we're running.
7229
             */
7230
622
            xmlCtxtParseEntity(ctxt, ent);
7231
622
        }
7232
6.43k
    }
7233
7234
    /*
7235
     * We also check for amplification if entities aren't substituted.
7236
     * They might be expanded later.
7237
     */
7238
6.53k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7239
64
        return;
7240
7241
6.47k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7242
187
        return;
7243
7244
6.28k
    if (ctxt->replaceEntities == 0) {
7245
  /*
7246
   * Create a reference
7247
   */
7248
0
        if (ctxt->sax->reference != NULL)
7249
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7250
6.28k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7251
5.49k
        xmlNodePtr copy, cur;
7252
7253
        /*
7254
         * Seems we are generating the DOM content, copy the tree
7255
   */
7256
5.49k
        cur = ent->children;
7257
7258
        /*
7259
         * Handle first text node with SAX to coalesce text efficiently
7260
         */
7261
5.49k
        if ((cur->type == XML_TEXT_NODE) ||
7262
5.49k
            (cur->type == XML_CDATA_SECTION_NODE)) {
7263
4.45k
            int len = xmlStrlen(cur->content);
7264
7265
4.45k
            if ((cur->type == XML_TEXT_NODE) ||
7266
4.45k
                (ctxt->options & XML_PARSE_NOCDATA)) {
7267
3.88k
                if (ctxt->sax->characters != NULL)
7268
3.88k
                    ctxt->sax->characters(ctxt, cur->content, len);
7269
3.88k
            } else {
7270
565
                if (ctxt->sax->cdataBlock != NULL)
7271
565
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7272
565
            }
7273
7274
4.45k
            cur = cur->next;
7275
4.45k
        }
7276
7277
11.4k
        while (cur != NULL) {
7278
8.17k
            xmlNodePtr last;
7279
7280
            /*
7281
             * Handle last text node with SAX to coalesce text efficiently
7282
             */
7283
8.17k
            if ((cur->next == NULL) &&
7284
8.17k
                ((cur->type == XML_TEXT_NODE) ||
7285
3.43k
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7286
2.27k
                int len = xmlStrlen(cur->content);
7287
7288
2.27k
                if ((cur->type == XML_TEXT_NODE) ||
7289
2.27k
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7290
1.37k
                    if (ctxt->sax->characters != NULL)
7291
1.37k
                        ctxt->sax->characters(ctxt, cur->content, len);
7292
1.37k
                } else {
7293
894
                    if (ctxt->sax->cdataBlock != NULL)
7294
894
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7295
894
                }
7296
7297
2.27k
                break;
7298
2.27k
            }
7299
7300
            /*
7301
             * Reset coalesce buffer stats only for non-text nodes.
7302
             */
7303
5.90k
            ctxt->nodemem = 0;
7304
5.90k
            ctxt->nodelen = 0;
7305
7306
5.90k
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7307
7308
5.90k
            if (copy == NULL) {
7309
0
                xmlErrMemory(ctxt);
7310
0
                break;
7311
0
            }
7312
7313
5.90k
            if (ctxt->parseMode == XML_PARSE_READER) {
7314
                /* Needed for reader */
7315
0
                copy->extra = cur->extra;
7316
                /* Maybe needed for reader */
7317
0
                copy->_private = cur->_private;
7318
0
            }
7319
7320
5.90k
            copy->parent = ctxt->node;
7321
5.90k
            last = ctxt->node->last;
7322
5.90k
            if (last == NULL) {
7323
314
                ctxt->node->children = copy;
7324
5.59k
            } else {
7325
5.59k
                last->next = copy;
7326
5.59k
                copy->prev = last;
7327
5.59k
            }
7328
5.90k
            ctxt->node->last = copy;
7329
7330
5.90k
            cur = cur->next;
7331
5.90k
        }
7332
5.49k
    }
7333
6.28k
}
7334
7335
static void
7336
123k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7337
    /*
7338
     * [ WFC: Entity Declared ]
7339
     * In a document without any DTD, a document with only an
7340
     * internal DTD subset which contains no parameter entity
7341
     * references, or a document with "standalone='yes'", the
7342
     * Name given in the entity reference must match that in an
7343
     * entity declaration, except that well-formed documents
7344
     * need not declare any of the following entities: amp, lt,
7345
     * gt, apos, quot.
7346
     * The declaration of a parameter entity must precede any
7347
     * reference to it.
7348
     * Similarly, the declaration of a general entity must
7349
     * precede any reference to it which appears in a default
7350
     * value in an attribute-list declaration. Note that if
7351
     * entities are declared in the external subset or in
7352
     * external parameter entities, a non-validating processor
7353
     * is not obligated to read and process their declarations;
7354
     * for such documents, the rule that an entity must be
7355
     * declared is a well-formedness constraint only if
7356
     * standalone='yes'.
7357
     */
7358
123k
    if ((ctxt->standalone == 1) ||
7359
123k
        ((ctxt->hasExternalSubset == 0) &&
7360
123k
         (ctxt->hasPErefs == 0))) {
7361
106k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7362
106k
                          "Entity '%s' not defined\n", name);
7363
106k
#ifdef LIBXML_VALID_ENABLED
7364
106k
    } else if (ctxt->validate) {
7365
        /*
7366
         * [ VC: Entity Declared ]
7367
         * In a document with an external subset or external
7368
         * parameter entities with "standalone='no'", ...
7369
         * ... The declaration of a parameter entity must
7370
         * precede any reference to it...
7371
         */
7372
0
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7373
0
                         "Entity '%s' not defined\n", name, NULL);
7374
0
#endif
7375
17.3k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7376
17.3k
               ((ctxt->replaceEntities) &&
7377
17.3k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7378
        /*
7379
         * Also raise a non-fatal error
7380
         *
7381
         * - if the external subset is loaded and all entity declarations
7382
         *   should be available, or
7383
         * - entity substition was requested without restricting
7384
         *   external entity access.
7385
         */
7386
17.3k
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7387
17.3k
                     "Entity '%s' not defined\n", name);
7388
17.3k
    } else {
7389
0
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7390
0
                      "Entity '%s' not defined\n", name, NULL);
7391
0
    }
7392
7393
123k
    ctxt->valid = 0;
7394
123k
}
7395
7396
static xmlEntityPtr
7397
906k
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7398
906k
    xmlEntityPtr ent = NULL;
7399
7400
    /*
7401
     * Predefined entities override any extra definition
7402
     */
7403
906k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7404
906k
        ent = xmlGetPredefinedEntity(name);
7405
906k
        if (ent != NULL)
7406
66.9k
            return(ent);
7407
906k
    }
7408
7409
    /*
7410
     * Ask first SAX for entity resolution, otherwise try the
7411
     * entities which may have stored in the parser context.
7412
     */
7413
839k
    if (ctxt->sax != NULL) {
7414
839k
  if (ctxt->sax->getEntity != NULL)
7415
839k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7416
839k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7417
839k
      (ctxt->options & XML_PARSE_OLDSAX))
7418
0
      ent = xmlGetPredefinedEntity(name);
7419
839k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7420
839k
      (ctxt->userData==ctxt)) {
7421
4.61k
      ent = xmlSAX2GetEntity(ctxt, name);
7422
4.61k
  }
7423
839k
    }
7424
7425
839k
    if (ent == NULL) {
7426
118k
        xmlHandleUndeclaredEntity(ctxt, name);
7427
118k
    }
7428
7429
    /*
7430
     * [ WFC: Parsed Entity ]
7431
     * An entity reference must not contain the name of an
7432
     * unparsed entity
7433
     */
7434
721k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7435
194
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7436
194
     "Entity reference to unparsed entity %s\n", name);
7437
194
        ent = NULL;
7438
194
    }
7439
7440
    /*
7441
     * [ WFC: No External Entity References ]
7442
     * Attribute values cannot contain direct or indirect
7443
     * entity references to external entities.
7444
     */
7445
720k
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7446
495
        if (inAttr) {
7447
201
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7448
201
                 "Attribute references external entity '%s'\n", name);
7449
201
            ent = NULL;
7450
201
        }
7451
495
    }
7452
7453
839k
    return(ent);
7454
906k
}
7455
7456
/**
7457
 * Parse an entity reference. Always consumes '&'.
7458
 *
7459
 *     [68] EntityRef ::= '&' Name ';'
7460
 *
7461
 * @param ctxt  an XML parser context
7462
 * @returns the name, or NULL in case of error.
7463
 */
7464
static const xmlChar *
7465
253k
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7466
253k
    const xmlChar *name;
7467
7468
253k
    GROW;
7469
7470
253k
    if (RAW != '&')
7471
0
        return(NULL);
7472
253k
    NEXT;
7473
253k
    name = xmlParseName(ctxt);
7474
253k
    if (name == NULL) {
7475
37.0k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7476
37.0k
           "xmlParseEntityRef: no name\n");
7477
37.0k
        return(NULL);
7478
37.0k
    }
7479
216k
    if (RAW != ';') {
7480
31.4k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7481
31.4k
  return(NULL);
7482
31.4k
    }
7483
184k
    NEXT;
7484
7485
184k
    return(name);
7486
216k
}
7487
7488
/**
7489
 * @deprecated Internal function, don't use.
7490
 *
7491
 * @param ctxt  an XML parser context
7492
 * @returns the xmlEntity if found, or NULL otherwise.
7493
 */
7494
xmlEntity *
7495
0
xmlParseEntityRef(xmlParserCtxt *ctxt) {
7496
0
    const xmlChar *name;
7497
7498
0
    if (ctxt == NULL)
7499
0
        return(NULL);
7500
7501
0
    name = xmlParseEntityRefInternal(ctxt);
7502
0
    if (name == NULL)
7503
0
        return(NULL);
7504
7505
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7506
0
}
7507
7508
/**
7509
 * Parse ENTITY references declarations, but this version parses it from
7510
 * a string value.
7511
 *
7512
 *     [68] EntityRef ::= '&' Name ';'
7513
 *
7514
 * [ WFC: Entity Declared ]
7515
 * In a document without any DTD, a document with only an internal DTD
7516
 * subset which contains no parameter entity references, or a document
7517
 * with "standalone='yes'", the Name given in the entity reference
7518
 * must match that in an entity declaration, except that well-formed
7519
 * documents need not declare any of the following entities: amp, lt,
7520
 * gt, apos, quot.  The declaration of a parameter entity must precede
7521
 * any reference to it.  Similarly, the declaration of a general entity
7522
 * must precede any reference to it which appears in a default value in an
7523
 * attribute-list declaration. Note that if entities are declared in the
7524
 * external subset or in external parameter entities, a non-validating
7525
 * processor is not obligated to read and process their declarations;
7526
 * for such documents, the rule that an entity must be declared is a
7527
 * well-formedness constraint only if standalone='yes'.
7528
 *
7529
 * [ WFC: Parsed Entity ]
7530
 * An entity reference must not contain the name of an unparsed entity
7531
 *
7532
 * @param ctxt  an XML parser context
7533
 * @param str  a pointer to an index in the string
7534
 * @returns the xmlEntity if found, or NULL otherwise. The str pointer
7535
 * is updated to the current location in the string.
7536
 */
7537
static xmlChar *
7538
721k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7539
721k
    xmlChar *name;
7540
721k
    const xmlChar *ptr;
7541
721k
    xmlChar cur;
7542
7543
721k
    if ((str == NULL) || (*str == NULL))
7544
0
        return(NULL);
7545
721k
    ptr = *str;
7546
721k
    cur = *ptr;
7547
721k
    if (cur != '&')
7548
0
  return(NULL);
7549
7550
721k
    ptr++;
7551
721k
    name = xmlParseStringName(ctxt, &ptr);
7552
721k
    if (name == NULL) {
7553
7
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7554
7
           "xmlParseStringEntityRef: no name\n");
7555
7
  *str = ptr;
7556
7
  return(NULL);
7557
7
    }
7558
721k
    if (*ptr != ';') {
7559
7
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7560
7
        xmlFree(name);
7561
7
  *str = ptr;
7562
7
  return(NULL);
7563
7
    }
7564
721k
    ptr++;
7565
7566
721k
    *str = ptr;
7567
721k
    return(name);
7568
721k
}
7569
7570
/**
7571
 * Parse a parameter entity reference. Always consumes '%'.
7572
 *
7573
 * The entity content is handled directly by pushing it's content as
7574
 * a new input stream.
7575
 *
7576
 *     [69] PEReference ::= '%' Name ';'
7577
 *
7578
 * [ WFC: No Recursion ]
7579
 * A parsed entity must not contain a recursive
7580
 * reference to itself, either directly or indirectly.
7581
 *
7582
 * [ WFC: Entity Declared ]
7583
 * In a document without any DTD, a document with only an internal DTD
7584
 * subset which contains no parameter entity references, or a document
7585
 * with "standalone='yes'", ...  ... The declaration of a parameter
7586
 * entity must precede any reference to it...
7587
 *
7588
 * [ VC: Entity Declared ]
7589
 * In a document with an external subset or external parameter entities
7590
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7591
 * must precede any reference to it...
7592
 *
7593
 * [ WFC: In DTD ]
7594
 * Parameter-entity references may only appear in the DTD.
7595
 * NOTE: misleading but this is handled.
7596
 *
7597
 * @param ctxt  an XML parser context
7598
 * @param markupDecl  whether the PERef starts a markup declaration
7599
 */
7600
static void
7601
61.4k
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl) {
7602
61.4k
    const xmlChar *name;
7603
61.4k
    xmlEntityPtr entity = NULL;
7604
61.4k
    xmlParserInputPtr input;
7605
7606
61.4k
    if (RAW != '%')
7607
0
        return;
7608
61.4k
    NEXT;
7609
61.4k
    name = xmlParseName(ctxt);
7610
61.4k
    if (name == NULL) {
7611
4.37k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7612
4.37k
  return;
7613
4.37k
    }
7614
57.1k
    if (RAW != ';') {
7615
3.47k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7616
3.47k
        return;
7617
3.47k
    }
7618
7619
53.6k
    NEXT;
7620
7621
    /* Must be set before xmlHandleUndeclaredEntity */
7622
53.6k
    ctxt->hasPErefs = 1;
7623
7624
    /*
7625
     * Request the entity from SAX
7626
     */
7627
53.6k
    if ((ctxt->sax != NULL) &&
7628
53.6k
  (ctxt->sax->getParameterEntity != NULL))
7629
53.6k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7630
7631
53.6k
    if (entity == NULL) {
7632
4.84k
        xmlHandleUndeclaredEntity(ctxt, name);
7633
48.7k
    } else {
7634
  /*
7635
   * Internal checking in case the entity quest barfed
7636
   */
7637
48.7k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7638
48.7k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7639
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7640
0
      "Internal: %%%s; is not a parameter entity\n",
7641
0
        name, NULL);
7642
48.7k
  } else {
7643
48.7k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7644
48.7k
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7645
687
     (((ctxt->loadsubset & ~XML_SKIP_IDS) == 0) &&
7646
0
      (ctxt->replaceEntities == 0) &&
7647
0
      (ctxt->validate == 0))))
7648
687
    return;
7649
7650
48.0k
            if (entity->flags & XML_ENT_EXPANDING) {
7651
21
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7652
21
                return;
7653
21
            }
7654
7655
48.0k
      input = xmlNewEntityInputStream(ctxt, entity);
7656
48.0k
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7657
0
                xmlFreeInputStream(input);
7658
0
    return;
7659
0
            }
7660
7661
48.0k
            entity->flags |= XML_ENT_EXPANDING;
7662
7663
48.0k
            if (markupDecl)
7664
45.6k
                input->flags |= XML_INPUT_MARKUP_DECL;
7665
7666
48.0k
            GROW;
7667
7668
48.0k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7669
0
                xmlDetectEncoding(ctxt);
7670
7671
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7672
0
                    (IS_BLANK_CH(NXT(5)))) {
7673
0
                    xmlParseTextDecl(ctxt);
7674
0
                }
7675
0
            }
7676
48.0k
  }
7677
48.7k
    }
7678
53.6k
}
7679
7680
/**
7681
 * Parse a parameter entity reference.
7682
 *
7683
 * @deprecated Internal function, don't use.
7684
 *
7685
 * @param ctxt  an XML parser context
7686
 */
7687
void
7688
0
xmlParsePEReference(xmlParserCtxt *ctxt) {
7689
0
    xmlParsePERefInternal(ctxt, 0);
7690
0
}
7691
7692
/**
7693
 * Load the content of an entity.
7694
 *
7695
 * @param ctxt  an XML parser context
7696
 * @param entity  an unloaded system entity
7697
 * @returns 0 in case of success and -1 in case of failure
7698
 */
7699
static int
7700
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7701
0
    xmlParserInputPtr oldinput, input = NULL;
7702
0
    xmlParserInputPtr *oldinputTab;
7703
0
    xmlChar *oldencoding;
7704
0
    xmlChar *content = NULL;
7705
0
    xmlResourceType rtype;
7706
0
    size_t length, i;
7707
0
    int oldinputNr, oldinputMax;
7708
0
    int ret = -1;
7709
0
    int res;
7710
7711
0
    if ((ctxt == NULL) || (entity == NULL) ||
7712
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7713
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7714
0
  (entity->content != NULL)) {
7715
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7716
0
              "xmlLoadEntityContent parameter error");
7717
0
        return(-1);
7718
0
    }
7719
7720
0
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7721
0
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7722
0
    else
7723
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7724
7725
0
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7726
0
                            (char *) entity->ExternalID, rtype);
7727
0
    if (input == NULL)
7728
0
        return(-1);
7729
7730
0
    oldinput = ctxt->input;
7731
0
    oldinputNr = ctxt->inputNr;
7732
0
    oldinputMax = ctxt->inputMax;
7733
0
    oldinputTab = ctxt->inputTab;
7734
0
    oldencoding = ctxt->encoding;
7735
7736
0
    ctxt->input = NULL;
7737
0
    ctxt->inputNr = 0;
7738
0
    ctxt->inputMax = 1;
7739
0
    ctxt->encoding = NULL;
7740
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7741
0
    if (ctxt->inputTab == NULL) {
7742
0
        xmlErrMemory(ctxt);
7743
0
        xmlFreeInputStream(input);
7744
0
        goto error;
7745
0
    }
7746
7747
0
    xmlBufResetInput(input->buf->buffer, input);
7748
7749
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7750
0
        xmlFreeInputStream(input);
7751
0
        goto error;
7752
0
    }
7753
7754
0
    xmlDetectEncoding(ctxt);
7755
7756
    /*
7757
     * Parse a possible text declaration first
7758
     */
7759
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7760
0
  xmlParseTextDecl(ctxt);
7761
        /*
7762
         * An XML-1.0 document can't reference an entity not XML-1.0
7763
         */
7764
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7765
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7766
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7767
0
                           "Version mismatch between document and entity\n");
7768
0
        }
7769
0
    }
7770
7771
0
    length = input->cur - input->base;
7772
0
    xmlBufShrink(input->buf->buffer, length);
7773
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7774
7775
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7776
0
        ;
7777
7778
0
    xmlBufResetInput(input->buf->buffer, input);
7779
7780
0
    if (res < 0) {
7781
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7782
0
        goto error;
7783
0
    }
7784
7785
0
    length = xmlBufUse(input->buf->buffer);
7786
0
    if (length > INT_MAX) {
7787
0
        xmlErrMemory(ctxt);
7788
0
        goto error;
7789
0
    }
7790
7791
0
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
7792
0
    if (content == NULL) {
7793
0
        xmlErrMemory(ctxt);
7794
0
        goto error;
7795
0
    }
7796
7797
0
    for (i = 0; i < length; ) {
7798
0
        int clen = length - i;
7799
0
        int c = xmlGetUTF8Char(content + i, &clen);
7800
7801
0
        if ((c < 0) || (!IS_CHAR(c))) {
7802
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7803
0
                              "xmlLoadEntityContent: invalid char value %d\n",
7804
0
                              content[i]);
7805
0
            goto error;
7806
0
        }
7807
0
        i += clen;
7808
0
    }
7809
7810
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7811
0
    entity->content = content;
7812
0
    entity->length = length;
7813
0
    content = NULL;
7814
0
    ret = 0;
7815
7816
0
error:
7817
0
    while (ctxt->inputNr > 0)
7818
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
7819
0
    xmlFree(ctxt->inputTab);
7820
0
    xmlFree(ctxt->encoding);
7821
7822
0
    ctxt->input = oldinput;
7823
0
    ctxt->inputNr = oldinputNr;
7824
0
    ctxt->inputMax = oldinputMax;
7825
0
    ctxt->inputTab = oldinputTab;
7826
0
    ctxt->encoding = oldencoding;
7827
7828
0
    xmlFree(content);
7829
7830
0
    return(ret);
7831
0
}
7832
7833
/**
7834
 * Parse PEReference declarations
7835
 *
7836
 *     [69] PEReference ::= '%' Name ';'
7837
 *
7838
 * [ WFC: No Recursion ]
7839
 * A parsed entity must not contain a recursive
7840
 * reference to itself, either directly or indirectly.
7841
 *
7842
 * [ WFC: Entity Declared ]
7843
 * In a document without any DTD, a document with only an internal DTD
7844
 * subset which contains no parameter entity references, or a document
7845
 * with "standalone='yes'", ...  ... The declaration of a parameter
7846
 * entity must precede any reference to it...
7847
 *
7848
 * [ VC: Entity Declared ]
7849
 * In a document with an external subset or external parameter entities
7850
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7851
 * must precede any reference to it...
7852
 *
7853
 * [ WFC: In DTD ]
7854
 * Parameter-entity references may only appear in the DTD.
7855
 * NOTE: misleading but this is handled.
7856
 *
7857
 * @param ctxt  an XML parser context
7858
 * @param str  a pointer to an index in the string
7859
 * @returns the string of the entity content.
7860
 *         str is updated to the current value of the index
7861
 */
7862
static xmlEntityPtr
7863
2.47k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7864
2.47k
    const xmlChar *ptr;
7865
2.47k
    xmlChar cur;
7866
2.47k
    xmlChar *name;
7867
2.47k
    xmlEntityPtr entity = NULL;
7868
7869
2.47k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7870
2.47k
    ptr = *str;
7871
2.47k
    cur = *ptr;
7872
2.47k
    if (cur != '%')
7873
0
        return(NULL);
7874
2.47k
    ptr++;
7875
2.47k
    name = xmlParseStringName(ctxt, &ptr);
7876
2.47k
    if (name == NULL) {
7877
891
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7878
891
           "xmlParseStringPEReference: no name\n");
7879
891
  *str = ptr;
7880
891
  return(NULL);
7881
891
    }
7882
1.58k
    cur = *ptr;
7883
1.58k
    if (cur != ';') {
7884
284
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7885
284
  xmlFree(name);
7886
284
  *str = ptr;
7887
284
  return(NULL);
7888
284
    }
7889
1.30k
    ptr++;
7890
7891
    /* Must be set before xmlHandleUndeclaredEntity */
7892
1.30k
    ctxt->hasPErefs = 1;
7893
7894
    /*
7895
     * Request the entity from SAX
7896
     */
7897
1.30k
    if ((ctxt->sax != NULL) &&
7898
1.30k
  (ctxt->sax->getParameterEntity != NULL))
7899
1.30k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7900
7901
1.30k
    if (entity == NULL) {
7902
721
        xmlHandleUndeclaredEntity(ctxt, name);
7903
721
    } else {
7904
  /*
7905
   * Internal checking in case the entity quest barfed
7906
   */
7907
581
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7908
581
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7909
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7910
0
        "%%%s; is not a parameter entity\n",
7911
0
        name, NULL);
7912
0
  }
7913
581
    }
7914
7915
1.30k
    xmlFree(name);
7916
1.30k
    *str = ptr;
7917
1.30k
    return(entity);
7918
1.58k
}
7919
7920
/**
7921
 * Parse a DOCTYPE declaration
7922
 *
7923
 * @deprecated Internal function, don't use.
7924
 *
7925
 *     [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7926
 *                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7927
 *
7928
 * [ VC: Root Element Type ]
7929
 * The Name in the document type declaration must match the element
7930
 * type of the root element.
7931
 *
7932
 * @param ctxt  an XML parser context
7933
 */
7934
7935
void
7936
7.78k
xmlParseDocTypeDecl(xmlParserCtxt *ctxt) {
7937
7.78k
    const xmlChar *name = NULL;
7938
7.78k
    xmlChar *publicId = NULL;
7939
7.78k
    xmlChar *URI = NULL;
7940
7941
    /*
7942
     * We know that '<!DOCTYPE' has been detected.
7943
     */
7944
7.78k
    SKIP(9);
7945
7946
7.78k
    if (SKIP_BLANKS == 0) {
7947
3.81k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7948
3.81k
                       "Space required after 'DOCTYPE'\n");
7949
3.81k
    }
7950
7951
    /*
7952
     * Parse the DOCTYPE name.
7953
     */
7954
7.78k
    name = xmlParseName(ctxt);
7955
7.78k
    if (name == NULL) {
7956
2.11k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7957
2.11k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7958
2.11k
    }
7959
7.78k
    ctxt->intSubName = name;
7960
7961
7.78k
    SKIP_BLANKS;
7962
7963
    /*
7964
     * Check for public and system identifier (URI)
7965
     */
7966
7.78k
    URI = xmlParseExternalID(ctxt, &publicId, 1);
7967
7968
7.78k
    if ((URI != NULL) || (publicId != NULL)) {
7969
343
        ctxt->hasExternalSubset = 1;
7970
343
    }
7971
7.78k
    ctxt->extSubURI = URI;
7972
7.78k
    ctxt->extSubSystem = publicId;
7973
7974
7.78k
    SKIP_BLANKS;
7975
7976
    /*
7977
     * Create and update the internal subset.
7978
     */
7979
7.78k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7980
7.78k
  (!ctxt->disableSAX))
7981
3.74k
  ctxt->sax->internalSubset(ctxt->userData, name, publicId, URI);
7982
7983
7.78k
    if ((RAW != '[') && (RAW != '>')) {
7984
230
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7985
230
    }
7986
7.78k
}
7987
7988
/**
7989
 * Parse the internal subset declaration
7990
 *
7991
 *     [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7992
 * @param ctxt  an XML parser context
7993
 */
7994
7995
static void
7996
7.43k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7997
    /*
7998
     * Is there any DTD definition ?
7999
     */
8000
7.43k
    if (RAW == '[') {
8001
7.43k
        int oldInputNr = ctxt->inputNr;
8002
8003
7.43k
        NEXT;
8004
  /*
8005
   * Parse the succession of Markup declarations and
8006
   * PEReferences.
8007
   * Subsequence (markupdecl | PEReference | S)*
8008
   */
8009
7.43k
  SKIP_BLANKS;
8010
215k
        while (1) {
8011
215k
            if (PARSER_STOPPED(ctxt)) {
8012
39
                return;
8013
215k
            } else if (ctxt->input->cur >= ctxt->input->end) {
8014
47.2k
                if (ctxt->inputNr <= oldInputNr) {
8015
2.11k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8016
2.11k
                    return;
8017
2.11k
                }
8018
45.1k
                xmlPopPE(ctxt);
8019
167k
            } else if ((RAW == ']') && (ctxt->inputNr <= oldInputNr)) {
8020
2.14k
                NEXT;
8021
2.14k
                SKIP_BLANKS;
8022
2.14k
                break;
8023
165k
            } else if ((PARSER_EXTERNAL(ctxt)) &&
8024
165k
                       (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8025
                /*
8026
                 * Conditional sections are allowed in external entities
8027
                 * included by PE References in the internal subset.
8028
                 */
8029
0
                xmlParseConditionalSections(ctxt);
8030
165k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8031
104k
                xmlParseMarkupDecl(ctxt);
8032
104k
            } else if (RAW == '%') {
8033
57.7k
                xmlParsePERefInternal(ctxt, 1);
8034
57.7k
            } else {
8035
3.12k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8036
8037
3.19k
                while (ctxt->inputNr > oldInputNr)
8038
66
                    xmlPopPE(ctxt);
8039
3.12k
                return;
8040
3.12k
            }
8041
207k
            SKIP_BLANKS;
8042
207k
            SHRINK;
8043
207k
            GROW;
8044
207k
        }
8045
7.43k
    }
8046
8047
    /*
8048
     * We should be at the end of the DOCTYPE declaration.
8049
     */
8050
2.14k
    if (RAW != '>') {
8051
180
        xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8052
180
        return;
8053
180
    }
8054
1.96k
    NEXT;
8055
1.96k
}
8056
8057
#ifdef LIBXML_SAX1_ENABLED
8058
/**
8059
 * Parse an attribute
8060
 *
8061
 * @deprecated Internal function, don't use.
8062
 *
8063
 *     [41] Attribute ::= Name Eq AttValue
8064
 *
8065
 * [ WFC: No External Entity References ]
8066
 * Attribute values cannot contain direct or indirect entity references
8067
 * to external entities.
8068
 *
8069
 * [ WFC: No < in Attribute Values ]
8070
 * The replacement text of any entity referred to directly or indirectly in
8071
 * an attribute value (other than "&lt;") must not contain a <.
8072
 *
8073
 * [ VC: Attribute Value Type ]
8074
 * The attribute must have been declared; the value must be of the type
8075
 * declared for it.
8076
 *
8077
 *     [25] Eq ::= S? '=' S?
8078
 *
8079
 * With namespace:
8080
 *
8081
 *     [NS 11] Attribute ::= QName Eq AttValue
8082
 *
8083
 * Also the case QName == xmlns:??? is handled independently as a namespace
8084
 * definition.
8085
 *
8086
 * @param ctxt  an XML parser context
8087
 * @param value  a xmlChar ** used to store the value of the attribute
8088
 * @returns the attribute name, and the value in *value.
8089
 */
8090
8091
const xmlChar *
8092
0
xmlParseAttribute(xmlParserCtxt *ctxt, xmlChar **value) {
8093
0
    const xmlChar *name;
8094
0
    xmlChar *val;
8095
8096
0
    *value = NULL;
8097
0
    GROW;
8098
0
    name = xmlParseName(ctxt);
8099
0
    if (name == NULL) {
8100
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8101
0
                 "error parsing attribute name\n");
8102
0
        return(NULL);
8103
0
    }
8104
8105
    /*
8106
     * read the value
8107
     */
8108
0
    SKIP_BLANKS;
8109
0
    if (RAW == '=') {
8110
0
        NEXT;
8111
0
  SKIP_BLANKS;
8112
0
  val = xmlParseAttValue(ctxt);
8113
0
    } else {
8114
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8115
0
         "Specification mandates value for attribute %s\n", name);
8116
0
  return(name);
8117
0
    }
8118
8119
    /*
8120
     * Check that xml:lang conforms to the specification
8121
     * No more registered as an error, just generate a warning now
8122
     * since this was deprecated in XML second edition
8123
     */
8124
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8125
0
  if (!xmlCheckLanguageID(val)) {
8126
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8127
0
              "Malformed value for xml:lang : %s\n",
8128
0
        val, NULL);
8129
0
  }
8130
0
    }
8131
8132
    /*
8133
     * Check that xml:space conforms to the specification
8134
     */
8135
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8136
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8137
0
      *(ctxt->space) = 0;
8138
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8139
0
      *(ctxt->space) = 1;
8140
0
  else {
8141
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8142
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8143
0
                                 val, NULL);
8144
0
  }
8145
0
    }
8146
8147
0
    *value = val;
8148
0
    return(name);
8149
0
}
8150
8151
/**
8152
 * Parse a start tag. Always consumes '<'.
8153
 *
8154
 * @deprecated Internal function, don't use.
8155
 *
8156
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8157
 *
8158
 * [ WFC: Unique Att Spec ]
8159
 * No attribute name may appear more than once in the same start-tag or
8160
 * empty-element tag.
8161
 *
8162
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8163
 *
8164
 * [ WFC: Unique Att Spec ]
8165
 * No attribute name may appear more than once in the same start-tag or
8166
 * empty-element tag.
8167
 *
8168
 * With namespace:
8169
 *
8170
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8171
 *
8172
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8173
 *
8174
 * @param ctxt  an XML parser context
8175
 * @returns the element name parsed
8176
 */
8177
8178
const xmlChar *
8179
0
xmlParseStartTag(xmlParserCtxt *ctxt) {
8180
0
    const xmlChar *name;
8181
0
    const xmlChar *attname;
8182
0
    xmlChar *attvalue;
8183
0
    const xmlChar **atts = ctxt->atts;
8184
0
    int nbatts = 0;
8185
0
    int maxatts = ctxt->maxatts;
8186
0
    int i;
8187
8188
0
    if (RAW != '<') return(NULL);
8189
0
    NEXT1;
8190
8191
0
    name = xmlParseName(ctxt);
8192
0
    if (name == NULL) {
8193
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8194
0
       "xmlParseStartTag: invalid element name\n");
8195
0
        return(NULL);
8196
0
    }
8197
8198
    /*
8199
     * Now parse the attributes, it ends up with the ending
8200
     *
8201
     * (S Attribute)* S?
8202
     */
8203
0
    SKIP_BLANKS;
8204
0
    GROW;
8205
8206
0
    while (((RAW != '>') &&
8207
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8208
0
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8209
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8210
0
        if (attname == NULL)
8211
0
      break;
8212
0
        if (attvalue != NULL) {
8213
      /*
8214
       * [ WFC: Unique Att Spec ]
8215
       * No attribute name may appear more than once in the same
8216
       * start-tag or empty-element tag.
8217
       */
8218
0
      for (i = 0; i < nbatts;i += 2) {
8219
0
          if (xmlStrEqual(atts[i], attname)) {
8220
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8221
0
        goto failed;
8222
0
    }
8223
0
      }
8224
      /*
8225
       * Add the pair to atts
8226
       */
8227
0
      if (nbatts + 4 > maxatts) {
8228
0
          const xmlChar **n;
8229
0
                int newSize;
8230
8231
0
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8232
0
                                          11, XML_MAX_ATTRS);
8233
0
                if (newSize < 0) {
8234
0
        xmlErrMemory(ctxt);
8235
0
        goto failed;
8236
0
    }
8237
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8238
0
                if (newSize < 2)
8239
0
                    newSize = 2;
8240
0
#endif
8241
0
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8242
0
    if (n == NULL) {
8243
0
        xmlErrMemory(ctxt);
8244
0
        goto failed;
8245
0
    }
8246
0
    atts = n;
8247
0
                maxatts = newSize * 2;
8248
0
    ctxt->atts = atts;
8249
0
    ctxt->maxatts = maxatts;
8250
0
      }
8251
8252
0
      atts[nbatts++] = attname;
8253
0
      atts[nbatts++] = attvalue;
8254
0
      atts[nbatts] = NULL;
8255
0
      atts[nbatts + 1] = NULL;
8256
8257
0
            attvalue = NULL;
8258
0
  }
8259
8260
0
failed:
8261
8262
0
        if (attvalue != NULL)
8263
0
            xmlFree(attvalue);
8264
8265
0
  GROW
8266
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8267
0
      break;
8268
0
  if (SKIP_BLANKS == 0) {
8269
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8270
0
         "attributes construct error\n");
8271
0
  }
8272
0
  SHRINK;
8273
0
        GROW;
8274
0
    }
8275
8276
    /*
8277
     * SAX: Start of Element !
8278
     */
8279
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8280
0
  (!ctxt->disableSAX)) {
8281
0
  if (nbatts > 0)
8282
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8283
0
  else
8284
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8285
0
    }
8286
8287
0
    if (atts != NULL) {
8288
        /* Free only the content strings */
8289
0
        for (i = 1;i < nbatts;i+=2)
8290
0
      if (atts[i] != NULL)
8291
0
         xmlFree((xmlChar *) atts[i]);
8292
0
    }
8293
0
    return(name);
8294
0
}
8295
8296
/**
8297
 * Parse an end tag. Always consumes '</'.
8298
 *
8299
 *     [42] ETag ::= '</' Name S? '>'
8300
 *
8301
 * With namespace
8302
 *
8303
 *     [NS 9] ETag ::= '</' QName S? '>'
8304
 * @param ctxt  an XML parser context
8305
 * @param line  line of the start tag
8306
 */
8307
8308
static void
8309
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8310
0
    const xmlChar *name;
8311
8312
0
    GROW;
8313
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8314
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8315
0
           "xmlParseEndTag: '</' not found\n");
8316
0
  return;
8317
0
    }
8318
0
    SKIP(2);
8319
8320
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8321
8322
    /*
8323
     * We should definitely be at the ending "S? '>'" part
8324
     */
8325
0
    GROW;
8326
0
    SKIP_BLANKS;
8327
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8328
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8329
0
    } else
8330
0
  NEXT1;
8331
8332
    /*
8333
     * [ WFC: Element Type Match ]
8334
     * The Name in an element's end-tag must match the element type in the
8335
     * start-tag.
8336
     *
8337
     */
8338
0
    if (name != (xmlChar*)1) {
8339
0
        if (name == NULL) name = BAD_CAST "unparsable";
8340
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8341
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8342
0
                    ctxt->name, line, name);
8343
0
    }
8344
8345
    /*
8346
     * SAX: End of Tag
8347
     */
8348
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8349
0
  (!ctxt->disableSAX))
8350
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8351
8352
0
    namePop(ctxt);
8353
0
    spacePop(ctxt);
8354
0
}
8355
8356
/**
8357
 * Parse an end of tag
8358
 *
8359
 * @deprecated Internal function, don't use.
8360
 *
8361
 *     [42] ETag ::= '</' Name S? '>'
8362
 *
8363
 * With namespace
8364
 *
8365
 *     [NS 9] ETag ::= '</' QName S? '>'
8366
 * @param ctxt  an XML parser context
8367
 */
8368
8369
void
8370
0
xmlParseEndTag(xmlParserCtxt *ctxt) {
8371
0
    xmlParseEndTag1(ctxt, 0);
8372
0
}
8373
#endif /* LIBXML_SAX1_ENABLED */
8374
8375
/************************************************************************
8376
 *                  *
8377
 *          SAX 2 specific operations       *
8378
 *                  *
8379
 ************************************************************************/
8380
8381
/**
8382
 * Parse an XML Namespace QName
8383
 *
8384
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8385
 *     [7]  Prefix  ::= NCName
8386
 *     [8]  LocalPart  ::= NCName
8387
 *
8388
 * @param ctxt  an XML parser context
8389
 * @param prefix  pointer to store the prefix part
8390
 * @returns the Name parsed or NULL
8391
 */
8392
8393
static xmlHashedString
8394
1.32M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8395
1.32M
    xmlHashedString l, p;
8396
1.32M
    int start, isNCName = 0;
8397
8398
1.32M
    l.name = NULL;
8399
1.32M
    p.name = NULL;
8400
8401
1.32M
    GROW;
8402
1.32M
    start = CUR_PTR - BASE_PTR;
8403
8404
1.32M
    l = xmlParseNCName(ctxt);
8405
1.32M
    if (l.name != NULL) {
8406
1.01M
        isNCName = 1;
8407
1.01M
        if (CUR == ':') {
8408
98.3k
            NEXT;
8409
98.3k
            p = l;
8410
98.3k
            l = xmlParseNCName(ctxt);
8411
98.3k
        }
8412
1.01M
    }
8413
1.32M
    if ((l.name == NULL) || (CUR == ':')) {
8414
332k
        xmlChar *tmp;
8415
8416
332k
        l.name = NULL;
8417
332k
        p.name = NULL;
8418
332k
        if ((isNCName == 0) && (CUR != ':'))
8419
303k
            return(l);
8420
29.5k
        tmp = xmlParseNmtoken(ctxt);
8421
29.5k
        if (tmp != NULL)
8422
11.8k
            xmlFree(tmp);
8423
29.5k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8424
29.5k
                                CUR_PTR - (BASE_PTR + start));
8425
29.5k
        if (l.name == NULL) {
8426
0
            xmlErrMemory(ctxt);
8427
0
            return(l);
8428
0
        }
8429
29.5k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8430
29.5k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8431
29.5k
    }
8432
8433
1.02M
    *prefix = p;
8434
1.02M
    return(l);
8435
1.32M
}
8436
8437
/**
8438
 * Parse an XML Namespace QName
8439
 *
8440
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8441
 *     [7]  Prefix  ::= NCName
8442
 *     [8]  LocalPart  ::= NCName
8443
 *
8444
 * @param ctxt  an XML parser context
8445
 * @param prefix  pointer to store the prefix part
8446
 * @returns the Name parsed or NULL
8447
 */
8448
8449
static const xmlChar *
8450
11.4k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8451
11.4k
    xmlHashedString n, p;
8452
8453
11.4k
    n = xmlParseQNameHashed(ctxt, &p);
8454
11.4k
    if (n.name == NULL)
8455
1.63k
        return(NULL);
8456
9.81k
    *prefix = p.name;
8457
9.81k
    return(n.name);
8458
11.4k
}
8459
8460
/**
8461
 * Parse an XML name and compares for match
8462
 * (specialized for endtag parsing)
8463
 *
8464
 * @param ctxt  an XML parser context
8465
 * @param name  the localname
8466
 * @param prefix  the prefix, if any.
8467
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
8468
 * and the name for mismatch
8469
 */
8470
8471
static const xmlChar *
8472
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8473
13.8k
                        xmlChar const *prefix) {
8474
13.8k
    const xmlChar *cmp;
8475
13.8k
    const xmlChar *in;
8476
13.8k
    const xmlChar *ret;
8477
13.8k
    const xmlChar *prefix2;
8478
8479
13.8k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8480
8481
13.8k
    GROW;
8482
13.8k
    in = ctxt->input->cur;
8483
8484
13.8k
    cmp = prefix;
8485
26.0k
    while (*in != 0 && *in == *cmp) {
8486
12.1k
  ++in;
8487
12.1k
  ++cmp;
8488
12.1k
    }
8489
13.8k
    if ((*cmp == 0) && (*in == ':')) {
8490
10.5k
        in++;
8491
10.5k
  cmp = name;
8492
14.2k
  while (*in != 0 && *in == *cmp) {
8493
3.70k
      ++in;
8494
3.70k
      ++cmp;
8495
3.70k
  }
8496
10.5k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8497
      /* success */
8498
2.37k
            ctxt->input->col += in - ctxt->input->cur;
8499
2.37k
      ctxt->input->cur = in;
8500
2.37k
      return((const xmlChar*) 1);
8501
2.37k
  }
8502
10.5k
    }
8503
    /*
8504
     * all strings coms from the dictionary, equality can be done directly
8505
     */
8506
11.4k
    ret = xmlParseQName (ctxt, &prefix2);
8507
11.4k
    if (ret == NULL)
8508
1.63k
        return(NULL);
8509
9.81k
    if ((ret == name) && (prefix == prefix2))
8510
856
  return((const xmlChar*) 1);
8511
8.96k
    return ret;
8512
9.81k
}
8513
8514
/**
8515
 * Parse an attribute in the new SAX2 framework.
8516
 *
8517
 * @param ctxt  an XML parser context
8518
 * @param pref  the element prefix
8519
 * @param elem  the element name
8520
 * @param hprefix  resulting attribute prefix
8521
 * @param value  resulting value of the attribute
8522
 * @param len  resulting length of the attribute
8523
 * @param alloc  resulting indicator if the attribute was allocated
8524
 * @returns the attribute name, and the value in *value, .
8525
 */
8526
8527
static xmlHashedString
8528
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8529
                   const xmlChar * pref, const xmlChar * elem,
8530
                   xmlHashedString * hprefix, xmlChar ** value,
8531
                   int *len, int *alloc)
8532
672k
{
8533
672k
    xmlHashedString hname;
8534
672k
    const xmlChar *prefix, *name;
8535
672k
    xmlChar *val = NULL, *internal_val = NULL;
8536
672k
    int special = 0;
8537
672k
    int isNamespace;
8538
672k
    int flags;
8539
8540
672k
    *value = NULL;
8541
672k
    GROW;
8542
672k
    hname = xmlParseQNameHashed(ctxt, hprefix);
8543
672k
    if (hname.name == NULL) {
8544
86.1k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8545
86.1k
                       "error parsing attribute name\n");
8546
86.1k
        return(hname);
8547
86.1k
    }
8548
586k
    name = hname.name;
8549
586k
    prefix = hprefix->name;
8550
8551
    /*
8552
     * get the type if needed
8553
     */
8554
586k
    if (ctxt->attsSpecial != NULL) {
8555
63.7k
        special = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8556
63.7k
                                              prefix, name));
8557
63.7k
    }
8558
8559
    /*
8560
     * read the value
8561
     */
8562
586k
    SKIP_BLANKS;
8563
586k
    if (RAW != '=') {
8564
28.0k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8565
28.0k
                          "Specification mandates value for attribute %s\n",
8566
28.0k
                          name);
8567
28.0k
        goto error;
8568
28.0k
    }
8569
8570
8571
558k
    NEXT;
8572
558k
    SKIP_BLANKS;
8573
558k
    flags = 0;
8574
558k
    isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8575
558k
                   (prefix == ctxt->str_xmlns));
8576
558k
    val = xmlParseAttValueInternal(ctxt, len, &flags, special,
8577
558k
                                   isNamespace);
8578
558k
    if (val == NULL)
8579
8.30k
        goto error;
8580
8581
550k
    *alloc = (flags & XML_ATTVAL_ALLOC) != 0;
8582
8583
550k
#ifdef LIBXML_VALID_ENABLED
8584
550k
    if ((ctxt->validate) &&
8585
550k
        (ctxt->standalone) &&
8586
550k
        (special & XML_SPECIAL_EXTERNAL) &&
8587
550k
        (flags & XML_ATTVAL_NORM_CHANGE)) {
8588
0
        xmlValidityError(ctxt, XML_DTD_NOT_STANDALONE,
8589
0
                         "standalone: normalization of attribute %s on %s "
8590
0
                         "by external subset declaration\n",
8591
0
                         name, elem);
8592
0
    }
8593
550k
#endif
8594
8595
550k
    if (prefix == ctxt->str_xml) {
8596
        /*
8597
         * Check that xml:lang conforms to the specification
8598
         * No more registered as an error, just generate a warning now
8599
         * since this was deprecated in XML second edition
8600
         */
8601
10.3k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8602
0
            internal_val = xmlStrndup(val, *len);
8603
0
            if (internal_val == NULL)
8604
0
                goto mem_error;
8605
0
            if (!xmlCheckLanguageID(internal_val)) {
8606
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8607
0
                              "Malformed value for xml:lang : %s\n",
8608
0
                              internal_val, NULL);
8609
0
            }
8610
0
        }
8611
8612
        /*
8613
         * Check that xml:space conforms to the specification
8614
         */
8615
10.3k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8616
652
            internal_val = xmlStrndup(val, *len);
8617
652
            if (internal_val == NULL)
8618
0
                goto mem_error;
8619
652
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8620
221
                *(ctxt->space) = 0;
8621
431
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8622
196
                *(ctxt->space) = 1;
8623
235
            else {
8624
235
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8625
235
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8626
235
                              internal_val, NULL);
8627
235
            }
8628
652
        }
8629
10.3k
        if (internal_val) {
8630
652
            xmlFree(internal_val);
8631
652
        }
8632
10.3k
    }
8633
8634
550k
    *value = val;
8635
550k
    return (hname);
8636
8637
0
mem_error:
8638
0
    xmlErrMemory(ctxt);
8639
36.3k
error:
8640
36.3k
    if ((val != NULL) && (*alloc != 0))
8641
0
        xmlFree(val);
8642
36.3k
    return(hname);
8643
0
}
8644
8645
/**
8646
 * Inserts a new attribute into the hash table.
8647
 *
8648
 * @param ctxt  parser context
8649
 * @param size  size of the hash table
8650
 * @param name  attribute name
8651
 * @param uri  namespace uri
8652
 * @param hashValue  combined hash value of name and uri
8653
 * @param aindex  attribute index (this is a multiple of 5)
8654
 * @returns INT_MAX if no existing attribute was found, the attribute
8655
 * index if an attribute was found, -1 if a memory allocation failed.
8656
 */
8657
static int
8658
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8659
600k
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8660
600k
    xmlAttrHashBucket *table = ctxt->attrHash;
8661
600k
    xmlAttrHashBucket *bucket;
8662
600k
    unsigned hindex;
8663
8664
600k
    hindex = hashValue & (size - 1);
8665
600k
    bucket = &table[hindex];
8666
8667
668k
    while (bucket->index >= 0) {
8668
485k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8669
8670
485k
        if (name == atts[0]) {
8671
424k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8672
8673
424k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8674
424k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8675
11.6k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8676
417k
                return(bucket->index);
8677
424k
        }
8678
8679
68.3k
        hindex++;
8680
68.3k
        bucket++;
8681
68.3k
        if (hindex >= size) {
8682
26.8k
            hindex = 0;
8683
26.8k
            bucket = table;
8684
26.8k
        }
8685
68.3k
    }
8686
8687
182k
    bucket->index = aindex;
8688
8689
182k
    return(INT_MAX);
8690
600k
}
8691
8692
static int
8693
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8694
                       const xmlChar *name, const xmlChar *prefix,
8695
7.23k
                       unsigned hashValue, int aindex) {
8696
7.23k
    xmlAttrHashBucket *table = ctxt->attrHash;
8697
7.23k
    xmlAttrHashBucket *bucket;
8698
7.23k
    unsigned hindex;
8699
8700
7.23k
    hindex = hashValue & (size - 1);
8701
7.23k
    bucket = &table[hindex];
8702
8703
13.1k
    while (bucket->index >= 0) {
8704
8.34k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8705
8706
8.34k
        if ((name == atts[0]) && (prefix == atts[1]))
8707
2.42k
            return(bucket->index);
8708
8709
5.92k
        hindex++;
8710
5.92k
        bucket++;
8711
5.92k
        if (hindex >= size) {
8712
586
            hindex = 0;
8713
586
            bucket = table;
8714
586
        }
8715
5.92k
    }
8716
8717
4.80k
    bucket->index = aindex;
8718
8719
4.80k
    return(INT_MAX);
8720
7.23k
}
8721
/**
8722
 * Parse a start tag. Always consumes '<'.
8723
 *
8724
 * This routine is called when running SAX2 parsing
8725
 *
8726
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8727
 *
8728
 * [ WFC: Unique Att Spec ]
8729
 * No attribute name may appear more than once in the same start-tag or
8730
 * empty-element tag.
8731
 *
8732
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8733
 *
8734
 * [ WFC: Unique Att Spec ]
8735
 * No attribute name may appear more than once in the same start-tag or
8736
 * empty-element tag.
8737
 *
8738
 * With namespace:
8739
 *
8740
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8741
 *
8742
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8743
 *
8744
 * @param ctxt  an XML parser context
8745
 * @param pref  resulting namespace prefix
8746
 * @param URI  resulting namespace URI
8747
 * @param nbNsPtr  resulting number of namespace declarations
8748
 * @returns the element name parsed
8749
 */
8750
8751
static const xmlChar *
8752
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8753
642k
                  const xmlChar **URI, int *nbNsPtr) {
8754
642k
    xmlHashedString hlocalname;
8755
642k
    xmlHashedString hprefix;
8756
642k
    xmlHashedString hattname;
8757
642k
    xmlHashedString haprefix;
8758
642k
    const xmlChar *localname;
8759
642k
    const xmlChar *prefix;
8760
642k
    const xmlChar *attname;
8761
642k
    const xmlChar *aprefix;
8762
642k
    const xmlChar *uri;
8763
642k
    xmlChar *attvalue = NULL;
8764
642k
    const xmlChar **atts = ctxt->atts;
8765
642k
    unsigned attrHashSize = 0;
8766
642k
    int maxatts = ctxt->maxatts;
8767
642k
    int nratts, nbatts, nbdef;
8768
642k
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8769
642k
    int alloc = 0;
8770
642k
    int numNsErr = 0;
8771
642k
    int numDupErr = 0;
8772
8773
642k
    if (RAW != '<') return(NULL);
8774
642k
    NEXT1;
8775
8776
642k
    nbatts = 0;
8777
642k
    nratts = 0;
8778
642k
    nbdef = 0;
8779
642k
    nbNs = 0;
8780
642k
    nbTotalDef = 0;
8781
642k
    attval = 0;
8782
8783
642k
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8784
0
        xmlErrMemory(ctxt);
8785
0
        return(NULL);
8786
0
    }
8787
8788
642k
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8789
642k
    if (hlocalname.name == NULL) {
8790
215k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8791
215k
           "StartTag: invalid element name\n");
8792
215k
        return(NULL);
8793
215k
    }
8794
426k
    localname = hlocalname.name;
8795
426k
    prefix = hprefix.name;
8796
8797
    /*
8798
     * Now parse the attributes, it ends up with the ending
8799
     *
8800
     * (S Attribute)* S?
8801
     */
8802
426k
    SKIP_BLANKS;
8803
426k
    GROW;
8804
8805
    /*
8806
     * The ctxt->atts array will be ultimately passed to the SAX callback
8807
     * containing five xmlChar pointers for each attribute:
8808
     *
8809
     * [0] attribute name
8810
     * [1] attribute prefix
8811
     * [2] namespace URI
8812
     * [3] attribute value
8813
     * [4] end of attribute value
8814
     *
8815
     * To save memory, we reuse this array temporarily and store integers
8816
     * in these pointer variables.
8817
     *
8818
     * [0] attribute name
8819
     * [1] attribute prefix
8820
     * [2] hash value of attribute prefix, and later namespace index
8821
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
8822
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
8823
     *
8824
     * The ctxt->attallocs array contains an additional unsigned int for
8825
     * each attribute, containing the hash value of the attribute name
8826
     * and the alloc flag in bit 31.
8827
     */
8828
8829
884k
    while (((RAW != '>') &&
8830
884k
     ((RAW != '/') || (NXT(1) != '>')) &&
8831
884k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8832
672k
  int len = -1;
8833
8834
672k
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
8835
672k
                                          &haprefix, &attvalue, &len,
8836
672k
                                          &alloc);
8837
672k
        if (hattname.name == NULL)
8838
86.1k
      break;
8839
586k
        if (attvalue == NULL)
8840
36.3k
            goto next_attr;
8841
550k
        attname = hattname.name;
8842
550k
        aprefix = haprefix.name;
8843
550k
  if (len < 0) len = xmlStrlen(attvalue);
8844
8845
550k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8846
75.9k
            xmlHashedString huri;
8847
75.9k
            xmlURIPtr parsedUri;
8848
8849
75.9k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8850
75.9k
            uri = huri.name;
8851
75.9k
            if (uri == NULL) {
8852
0
                xmlErrMemory(ctxt);
8853
0
                goto next_attr;
8854
0
            }
8855
75.9k
            if (*uri != 0) {
8856
73.5k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8857
0
                    xmlErrMemory(ctxt);
8858
0
                    goto next_attr;
8859
0
                }
8860
73.5k
                if (parsedUri == NULL) {
8861
43.7k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8862
43.7k
                             "xmlns: '%s' is not a valid URI\n",
8863
43.7k
                                       uri, NULL, NULL);
8864
43.7k
                } else {
8865
29.7k
                    if (parsedUri->scheme == NULL) {
8866
22.0k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8867
22.0k
                                  "xmlns: URI %s is not absolute\n",
8868
22.0k
                                  uri, NULL, NULL);
8869
22.0k
                    }
8870
29.7k
                    xmlFreeURI(parsedUri);
8871
29.7k
                }
8872
73.5k
                if (uri == ctxt->str_xml_ns) {
8873
223
                    if (attname != ctxt->str_xml) {
8874
223
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8875
223
                     "xml namespace URI cannot be the default namespace\n",
8876
223
                                 NULL, NULL, NULL);
8877
223
                    }
8878
223
                    goto next_attr;
8879
223
                }
8880
73.2k
                if ((len == 29) &&
8881
73.2k
                    (xmlStrEqual(uri,
8882
710
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8883
216
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8884
216
                         "reuse of the xmlns namespace name is forbidden\n",
8885
216
                             NULL, NULL, NULL);
8886
216
                    goto next_attr;
8887
216
                }
8888
73.2k
            }
8889
8890
75.4k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
8891
59.7k
                nbNs++;
8892
474k
        } else if (aprefix == ctxt->str_xmlns) {
8893
19.2k
            xmlHashedString huri;
8894
19.2k
            xmlURIPtr parsedUri;
8895
8896
19.2k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8897
19.2k
            uri = huri.name;
8898
19.2k
            if (uri == NULL) {
8899
0
                xmlErrMemory(ctxt);
8900
0
                goto next_attr;
8901
0
            }
8902
8903
19.2k
            if (attname == ctxt->str_xml) {
8904
437
                if (uri != ctxt->str_xml_ns) {
8905
243
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8906
243
                             "xml namespace prefix mapped to wrong URI\n",
8907
243
                             NULL, NULL, NULL);
8908
243
                }
8909
                /*
8910
                 * Do not keep a namespace definition node
8911
                 */
8912
437
                goto next_attr;
8913
437
            }
8914
18.8k
            if (uri == ctxt->str_xml_ns) {
8915
702
                if (attname != ctxt->str_xml) {
8916
702
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8917
702
                             "xml namespace URI mapped to wrong prefix\n",
8918
702
                             NULL, NULL, NULL);
8919
702
                }
8920
702
                goto next_attr;
8921
702
            }
8922
18.1k
            if (attname == ctxt->str_xmlns) {
8923
359
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8924
359
                         "redefinition of the xmlns prefix is forbidden\n",
8925
359
                         NULL, NULL, NULL);
8926
359
                goto next_attr;
8927
359
            }
8928
17.7k
            if ((len == 29) &&
8929
17.7k
                (xmlStrEqual(uri,
8930
634
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8931
333
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8932
333
                         "reuse of the xmlns namespace name is forbidden\n",
8933
333
                         NULL, NULL, NULL);
8934
333
                goto next_attr;
8935
333
            }
8936
17.4k
            if ((uri == NULL) || (uri[0] == 0)) {
8937
292
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8938
292
                         "xmlns:%s: Empty XML namespace is not allowed\n",
8939
292
                              attname, NULL, NULL);
8940
292
                goto next_attr;
8941
17.1k
            } else {
8942
17.1k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8943
0
                    xmlErrMemory(ctxt);
8944
0
                    goto next_attr;
8945
0
                }
8946
17.1k
                if (parsedUri == NULL) {
8947
9.92k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8948
9.92k
                         "xmlns:%s: '%s' is not a valid URI\n",
8949
9.92k
                                       attname, uri, NULL);
8950
9.92k
                } else {
8951
7.23k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
8952
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8953
0
                                  "xmlns:%s: URI %s is not absolute\n",
8954
0
                                  attname, uri, NULL);
8955
0
                    }
8956
7.23k
                    xmlFreeURI(parsedUri);
8957
7.23k
                }
8958
17.1k
            }
8959
8960
17.1k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
8961
16.5k
                nbNs++;
8962
455k
        } else {
8963
            /*
8964
             * Populate attributes array, see above for repurposing
8965
             * of xmlChar pointers.
8966
             */
8967
455k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8968
4.77k
                int res = xmlCtxtGrowAttrs(ctxt);
8969
8970
4.77k
                maxatts = ctxt->maxatts;
8971
4.77k
                atts = ctxt->atts;
8972
8973
4.77k
                if (res < 0)
8974
0
                    goto next_attr;
8975
4.77k
            }
8976
455k
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
8977
455k
                                        ((unsigned) alloc << 31);
8978
455k
            atts[nbatts++] = attname;
8979
455k
            atts[nbatts++] = aprefix;
8980
455k
            atts[nbatts++] = XML_INT_TO_PTR(haprefix.hashValue);
8981
455k
            if (alloc) {
8982
27.8k
                atts[nbatts++] = attvalue;
8983
27.8k
                attvalue += len;
8984
27.8k
                atts[nbatts++] = attvalue;
8985
427k
            } else {
8986
                /*
8987
                 * attvalue points into the input buffer which can be
8988
                 * reallocated. Store differences to input->base instead.
8989
                 * The pointers will be reconstructed later.
8990
                 */
8991
427k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8992
427k
                attvalue += len;
8993
427k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8994
427k
            }
8995
            /*
8996
             * tag if some deallocation is needed
8997
             */
8998
455k
            if (alloc != 0) attval = 1;
8999
455k
            attvalue = NULL; /* moved into atts */
9000
455k
        }
9001
9002
586k
next_attr:
9003
586k
        if ((attvalue != NULL) && (alloc != 0)) {
9004
23.0k
            xmlFree(attvalue);
9005
23.0k
            attvalue = NULL;
9006
23.0k
        }
9007
9008
586k
  GROW
9009
586k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9010
54.0k
      break;
9011
532k
  if (SKIP_BLANKS == 0) {
9012
74.6k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9013
74.6k
         "attributes construct error\n");
9014
74.6k
      break;
9015
74.6k
  }
9016
458k
        GROW;
9017
458k
    }
9018
9019
    /*
9020
     * Namespaces from default attributes
9021
     */
9022
426k
    if (ctxt->attsDefault != NULL) {
9023
184k
        xmlDefAttrsPtr defaults;
9024
9025
184k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9026
184k
  if (defaults != NULL) {
9027
1.17M
      for (i = 0; i < defaults->nbAttrs; i++) {
9028
1.05M
                xmlDefAttr *attr = &defaults->attrs[i];
9029
9030
1.05M
          attname = attr->name.name;
9031
1.05M
    aprefix = attr->prefix.name;
9032
9033
1.05M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9034
4.13k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9035
9036
4.13k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9037
3.22k
                        nbNs++;
9038
1.05M
    } else if (aprefix == ctxt->str_xmlns) {
9039
843k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9040
9041
843k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9042
843k
                                      NULL, 1) > 0)
9043
842k
                        nbNs++;
9044
843k
    } else {
9045
211k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9046
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9047
0
                                    "Maximum number of attributes exceeded");
9048
0
                        break;
9049
0
                    }
9050
211k
                    nbTotalDef += 1;
9051
211k
                }
9052
1.05M
      }
9053
112k
  }
9054
184k
    }
9055
9056
    /*
9057
     * Resolve attribute namespaces
9058
     */
9059
881k
    for (i = 0; i < nbatts; i += 5) {
9060
455k
        attname = atts[i];
9061
455k
        aprefix = atts[i+1];
9062
9063
        /*
9064
  * The default namespace does not apply to attribute names.
9065
  */
9066
455k
  if (aprefix == NULL) {
9067
427k
            nsIndex = NS_INDEX_EMPTY;
9068
427k
        } else if (aprefix == ctxt->str_xml) {
9069
10.3k
            nsIndex = NS_INDEX_XML;
9070
17.6k
        } else {
9071
17.6k
            haprefix.name = aprefix;
9072
17.6k
            haprefix.hashValue = (size_t) atts[i+2];
9073
17.6k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9074
9075
17.6k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9076
10.8k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9077
10.8k
        "Namespace prefix %s for %s on %s is not defined\n",
9078
10.8k
        aprefix, attname, localname);
9079
10.8k
                nsIndex = NS_INDEX_EMPTY;
9080
10.8k
            }
9081
17.6k
        }
9082
9083
455k
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9084
455k
    }
9085
9086
    /*
9087
     * Maximum number of attributes including default attributes.
9088
     */
9089
426k
    maxAtts = nratts + nbTotalDef;
9090
9091
    /*
9092
     * Verify that attribute names are unique.
9093
     */
9094
426k
    if (maxAtts > 1) {
9095
43.9k
        attrHashSize = 4;
9096
92.7k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9097
48.7k
            attrHashSize *= 2;
9098
9099
43.9k
        if (attrHashSize > ctxt->attrHashMax) {
9100
1.31k
            xmlAttrHashBucket *tmp;
9101
9102
1.31k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9103
1.31k
            if (tmp == NULL) {
9104
0
                xmlErrMemory(ctxt);
9105
0
                goto done;
9106
0
            }
9107
9108
1.31k
            ctxt->attrHash = tmp;
9109
1.31k
            ctxt->attrHashMax = attrHashSize;
9110
1.31k
        }
9111
9112
43.9k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9113
9114
477k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9115
433k
            const xmlChar *nsuri;
9116
433k
            unsigned hashValue, nameHashValue, uriHashValue;
9117
433k
            int res;
9118
9119
433k
            attname = atts[i];
9120
433k
            aprefix = atts[i+1];
9121
433k
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9122
            /* Hash values always have bit 31 set, see dict.c */
9123
433k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9124
9125
433k
            if (nsIndex == NS_INDEX_EMPTY) {
9126
                /*
9127
                 * Prefix with empty namespace means an undeclared
9128
                 * prefix which was already reported above.
9129
                 */
9130
425k
                if (aprefix != NULL)
9131
8.80k
                    continue;
9132
416k
                nsuri = NULL;
9133
416k
                uriHashValue = URI_HASH_EMPTY;
9134
416k
            } else if (nsIndex == NS_INDEX_XML) {
9135
1.56k
                nsuri = ctxt->str_xml_ns;
9136
1.56k
                uriHashValue = URI_HASH_XML;
9137
6.74k
            } else {
9138
6.74k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9139
6.74k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9140
6.74k
            }
9141
9142
424k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9143
424k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9144
424k
                                    hashValue, i);
9145
424k
            if (res < 0)
9146
0
                continue;
9147
9148
            /*
9149
             * [ WFC: Unique Att Spec ]
9150
             * No attribute name may appear more than once in the same
9151
             * start-tag or empty-element tag.
9152
             * As extended by the Namespace in XML REC.
9153
             */
9154
424k
            if (res < INT_MAX) {
9155
412k
                if (aprefix == atts[res+1]) {
9156
408k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9157
408k
                    numDupErr += 1;
9158
408k
                } else {
9159
3.59k
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9160
3.59k
                             "Namespaced Attribute %s in '%s' redefined\n",
9161
3.59k
                             attname, nsuri, NULL);
9162
3.59k
                    numNsErr += 1;
9163
3.59k
                }
9164
412k
            }
9165
424k
        }
9166
43.9k
    }
9167
9168
    /*
9169
     * Default attributes
9170
     */
9171
426k
    if (ctxt->attsDefault != NULL) {
9172
184k
        xmlDefAttrsPtr defaults;
9173
9174
184k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9175
184k
  if (defaults != NULL) {
9176
1.17M
      for (i = 0; i < defaults->nbAttrs; i++) {
9177
1.05M
                xmlDefAttr *attr = &defaults->attrs[i];
9178
1.05M
                const xmlChar *nsuri = NULL;
9179
1.05M
                unsigned hashValue, uriHashValue = 0;
9180
1.05M
                int res;
9181
9182
1.05M
          attname = attr->name.name;
9183
1.05M
    aprefix = attr->prefix.name;
9184
9185
1.05M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9186
4.13k
                    continue;
9187
1.05M
    if (aprefix == ctxt->str_xmlns)
9188
843k
                    continue;
9189
9190
211k
                if (aprefix == NULL) {
9191
65.1k
                    nsIndex = NS_INDEX_EMPTY;
9192
65.1k
                    nsuri = NULL;
9193
65.1k
                    uriHashValue = URI_HASH_EMPTY;
9194
146k
                } else if (aprefix == ctxt->str_xml) {
9195
8.88k
                    nsIndex = NS_INDEX_XML;
9196
8.88k
                    nsuri = ctxt->str_xml_ns;
9197
8.88k
                    uriHashValue = URI_HASH_XML;
9198
137k
                } else {
9199
137k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9200
137k
                    if ((nsIndex == INT_MAX) ||
9201
137k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9202
110k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9203
110k
                                 "Namespace prefix %s for %s on %s is not "
9204
110k
                                 "defined\n",
9205
110k
                                 aprefix, attname, localname);
9206
110k
                        nsIndex = NS_INDEX_EMPTY;
9207
110k
                        nsuri = NULL;
9208
110k
                        uriHashValue = URI_HASH_EMPTY;
9209
110k
                    } else {
9210
26.6k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9211
26.6k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9212
26.6k
                    }
9213
137k
                }
9214
9215
                /*
9216
                 * Check whether the attribute exists
9217
                 */
9218
211k
                if (maxAtts > 1) {
9219
175k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9220
175k
                                                   uriHashValue);
9221
175k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9222
175k
                                            hashValue, nbatts);
9223
175k
                    if (res < 0)
9224
0
                        continue;
9225
175k
                    if (res < INT_MAX) {
9226
4.96k
                        if (aprefix == atts[res+1])
9227
919
                            continue;
9228
4.04k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9229
4.04k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9230
4.04k
                                 attname, nsuri, NULL);
9231
4.04k
                    }
9232
175k
                }
9233
9234
210k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9235
9236
210k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9237
1.61k
                    res = xmlCtxtGrowAttrs(ctxt);
9238
9239
1.61k
                    maxatts = ctxt->maxatts;
9240
1.61k
                    atts = ctxt->atts;
9241
9242
1.61k
                    if (res < 0) {
9243
0
                        localname = NULL;
9244
0
                        goto done;
9245
0
                    }
9246
1.61k
                }
9247
9248
210k
                atts[nbatts++] = attname;
9249
210k
                atts[nbatts++] = aprefix;
9250
210k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9251
210k
                atts[nbatts++] = attr->value.name;
9252
210k
                atts[nbatts++] = attr->valueEnd;
9253
9254
210k
#ifdef LIBXML_VALID_ENABLED
9255
                /*
9256
                 * This should be moved to valid.c, but we don't keep track
9257
                 * whether an attribute was defaulted.
9258
                 */
9259
210k
                if ((ctxt->validate) &&
9260
210k
                    (ctxt->standalone == 1) &&
9261
210k
                    (attr->external != 0)) {
9262
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9263
0
                            "standalone: attribute %s on %s defaulted "
9264
0
                            "from external subset\n",
9265
0
                            attname, localname);
9266
0
                }
9267
210k
#endif
9268
210k
                nbdef++;
9269
210k
      }
9270
112k
  }
9271
184k
    }
9272
9273
    /*
9274
     * Using a single hash table for nsUri/localName pairs cannot
9275
     * detect duplicate QNames reliably. The following example will
9276
     * only result in two namespace errors.
9277
     *
9278
     * <doc xmlns:a="a" xmlns:b="a">
9279
     *   <elem a:a="" b:a="" b:a=""/>
9280
     * </doc>
9281
     *
9282
     * If we saw more than one namespace error but no duplicate QNames
9283
     * were found, we have to scan for duplicate QNames.
9284
     */
9285
426k
    if ((numDupErr == 0) && (numNsErr > 1)) {
9286
1.09k
        memset(ctxt->attrHash, -1,
9287
1.09k
               attrHashSize * sizeof(ctxt->attrHash[0]));
9288
9289
9.17k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9290
8.08k
            unsigned hashValue, nameHashValue, prefixHashValue;
9291
8.08k
            int res;
9292
9293
8.08k
            aprefix = atts[i+1];
9294
8.08k
            if (aprefix == NULL)
9295
853
                continue;
9296
9297
7.23k
            attname = atts[i];
9298
            /* Hash values always have bit 31 set, see dict.c */
9299
7.23k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9300
7.23k
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9301
9302
7.23k
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9303
7.23k
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9304
7.23k
                                         aprefix, hashValue, i);
9305
7.23k
            if (res < INT_MAX)
9306
2.42k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9307
7.23k
        }
9308
1.09k
    }
9309
9310
    /*
9311
     * Reconstruct attribute pointers
9312
     */
9313
1.09M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9314
        /* namespace URI */
9315
665k
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9316
665k
        if (nsIndex == INT_MAX)
9317
612k
            atts[i+2] = NULL;
9318
52.4k
        else if (nsIndex == INT_MAX - 1)
9319
18.9k
            atts[i+2] = ctxt->str_xml_ns;
9320
33.4k
        else
9321
33.4k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9322
9323
665k
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9324
427k
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9325
427k
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9326
427k
        }
9327
665k
    }
9328
9329
426k
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9330
426k
    if ((prefix != NULL) && (uri == NULL)) {
9331
23.1k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9332
23.1k
           "Namespace prefix %s on %s is not defined\n",
9333
23.1k
     prefix, localname, NULL);
9334
23.1k
    }
9335
426k
    *pref = prefix;
9336
426k
    *URI = uri;
9337
9338
    /*
9339
     * SAX callback
9340
     */
9341
426k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9342
426k
  (!ctxt->disableSAX)) {
9343
65.9k
  if (nbNs > 0)
9344
25.8k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9345
25.8k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9346
25.8k
        nbatts / 5, nbdef, atts);
9347
40.1k
  else
9348
40.1k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9349
40.1k
                          0, NULL, nbatts / 5, nbdef, atts);
9350
65.9k
    }
9351
9352
426k
done:
9353
    /*
9354
     * Free allocated attribute values
9355
     */
9356
426k
    if (attval != 0) {
9357
437k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9358
422k
      if (ctxt->attallocs[j] & 0x80000000)
9359
27.8k
          xmlFree((xmlChar *) atts[i+3]);
9360
15.5k
    }
9361
9362
426k
    *nbNsPtr = nbNs;
9363
426k
    return(localname);
9364
426k
}
9365
9366
/**
9367
 * Parse an end tag. Always consumes '</'.
9368
 *
9369
 *     [42] ETag ::= '</' Name S? '>'
9370
 *
9371
 * With namespace
9372
 *
9373
 *     [NS 9] ETag ::= '</' QName S? '>'
9374
 * @param ctxt  an XML parser context
9375
 * @param tag  the corresponding start tag
9376
 */
9377
9378
static void
9379
24.3k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9380
24.3k
    const xmlChar *name;
9381
9382
24.3k
    GROW;
9383
24.3k
    if ((RAW != '<') || (NXT(1) != '/')) {
9384
188
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9385
188
  return;
9386
188
    }
9387
24.1k
    SKIP(2);
9388
9389
24.1k
    if (tag->prefix == NULL)
9390
10.3k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9391
13.8k
    else
9392
13.8k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9393
9394
    /*
9395
     * We should definitely be at the ending "S? '>'" part
9396
     */
9397
24.1k
    GROW;
9398
24.1k
    SKIP_BLANKS;
9399
24.1k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9400
19.5k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9401
19.5k
    } else
9402
4.55k
  NEXT1;
9403
9404
    /*
9405
     * [ WFC: Element Type Match ]
9406
     * The Name in an element's end-tag must match the element type in the
9407
     * start-tag.
9408
     *
9409
     */
9410
24.1k
    if (name != (xmlChar*)1) {
9411
15.8k
        if (name == NULL) name = BAD_CAST "unparsable";
9412
15.8k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9413
15.8k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9414
15.8k
                    ctxt->name, tag->line, name);
9415
15.8k
    }
9416
9417
    /*
9418
     * SAX: End of Tag
9419
     */
9420
24.1k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9421
24.1k
  (!ctxt->disableSAX))
9422
460
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9423
460
                                tag->URI);
9424
9425
24.1k
    spacePop(ctxt);
9426
24.1k
    if (tag->nsNr != 0)
9427
1.66k
  xmlParserNsPop(ctxt, tag->nsNr);
9428
24.1k
}
9429
9430
/**
9431
 * Parse escaped pure raw content. Always consumes '<!['.
9432
 *
9433
 * @deprecated Internal function, don't use.
9434
 *
9435
 *     [18] CDSect ::= CDStart CData CDEnd
9436
 *
9437
 *     [19] CDStart ::= '<![CDATA['
9438
 *
9439
 *     [20] Data ::= (Char* - (Char* ']]>' Char*))
9440
 *
9441
 *     [21] CDEnd ::= ']]>'
9442
 * @param ctxt  an XML parser context
9443
 */
9444
void
9445
15.3k
xmlParseCDSect(xmlParserCtxt *ctxt) {
9446
15.3k
    xmlChar *buf = NULL;
9447
15.3k
    int len = 0;
9448
15.3k
    int size = XML_PARSER_BUFFER_SIZE;
9449
15.3k
    int r, rl;
9450
15.3k
    int s, sl;
9451
15.3k
    int cur, l;
9452
15.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9453
15.3k
                    XML_MAX_HUGE_LENGTH :
9454
15.3k
                    XML_MAX_TEXT_LENGTH;
9455
9456
15.3k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9457
0
        return;
9458
15.3k
    SKIP(3);
9459
9460
15.3k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9461
0
        return;
9462
15.3k
    SKIP(6);
9463
9464
15.3k
    r = xmlCurrentCharRecover(ctxt, &rl);
9465
15.3k
    if (!IS_CHAR(r)) {
9466
1.37k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9467
1.37k
        goto out;
9468
1.37k
    }
9469
13.9k
    NEXTL(rl);
9470
13.9k
    s = xmlCurrentCharRecover(ctxt, &sl);
9471
13.9k
    if (!IS_CHAR(s)) {
9472
2.41k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9473
2.41k
        goto out;
9474
2.41k
    }
9475
11.5k
    NEXTL(sl);
9476
11.5k
    cur = xmlCurrentCharRecover(ctxt, &l);
9477
11.5k
    buf = xmlMalloc(size);
9478
11.5k
    if (buf == NULL) {
9479
0
  xmlErrMemory(ctxt);
9480
0
        goto out;
9481
0
    }
9482
4.15M
    while (IS_CHAR(cur) &&
9483
4.15M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9484
4.14M
  if (len + 5 >= size) {
9485
4.47k
      xmlChar *tmp;
9486
4.47k
            int newSize;
9487
9488
4.47k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9489
4.47k
            if (newSize < 0) {
9490
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9491
0
                               "CData section too big found\n");
9492
0
                goto out;
9493
0
            }
9494
4.47k
      tmp = xmlRealloc(buf, newSize);
9495
4.47k
      if (tmp == NULL) {
9496
0
    xmlErrMemory(ctxt);
9497
0
                goto out;
9498
0
      }
9499
4.47k
      buf = tmp;
9500
4.47k
      size = newSize;
9501
4.47k
  }
9502
4.14M
  COPY_BUF(buf, len, r);
9503
4.14M
  r = s;
9504
4.14M
  rl = sl;
9505
4.14M
  s = cur;
9506
4.14M
  sl = l;
9507
4.14M
  NEXTL(l);
9508
4.14M
  cur = xmlCurrentCharRecover(ctxt, &l);
9509
4.14M
    }
9510
11.5k
    buf[len] = 0;
9511
11.5k
    if (cur != '>') {
9512
9.95k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9513
9.95k
                       "CData section not finished\n%.50s\n", buf);
9514
9.95k
        goto out;
9515
9.95k
    }
9516
1.60k
    NEXTL(l);
9517
9518
    /*
9519
     * OK the buffer is to be consumed as cdata.
9520
     */
9521
1.60k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9522
1.00k
        if ((ctxt->sax->cdataBlock != NULL) &&
9523
1.00k
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9524
1.00k
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9525
1.00k
        } else if (ctxt->sax->characters != NULL) {
9526
0
            ctxt->sax->characters(ctxt->userData, buf, len);
9527
0
        }
9528
1.00k
    }
9529
9530
15.3k
out:
9531
15.3k
    xmlFree(buf);
9532
15.3k
}
9533
9534
/**
9535
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9536
 * unexpected EOF to the caller.
9537
 *
9538
 * @param ctxt  an XML parser context
9539
 */
9540
9541
static void
9542
8.36k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9543
8.36k
    int oldNameNr = ctxt->nameNr;
9544
8.36k
    int oldSpaceNr = ctxt->spaceNr;
9545
8.36k
    int oldNodeNr = ctxt->nodeNr;
9546
9547
8.36k
    GROW;
9548
3.82M
    while ((ctxt->input->cur < ctxt->input->end) &&
9549
3.82M
     (PARSER_STOPPED(ctxt) == 0)) {
9550
3.81M
  const xmlChar *cur = ctxt->input->cur;
9551
9552
  /*
9553
   * First case : a Processing Instruction.
9554
   */
9555
3.81M
  if ((*cur == '<') && (cur[1] == '?')) {
9556
41.9k
      xmlParsePI(ctxt);
9557
41.9k
  }
9558
9559
  /*
9560
   * Second case : a CDSection
9561
   */
9562
  /* 2.6.0 test was *cur not RAW */
9563
3.77M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9564
15.3k
      xmlParseCDSect(ctxt);
9565
15.3k
  }
9566
9567
  /*
9568
   * Third case :  a comment
9569
   */
9570
3.75M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9571
3.75M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9572
38.4k
      xmlParseComment(ctxt);
9573
38.4k
  }
9574
9575
  /*
9576
   * Fourth case :  a sub-element.
9577
   */
9578
3.72M
  else if (*cur == '<') {
9579
654k
            if (NXT(1) == '/') {
9580
24.1k
                if (ctxt->nameNr <= oldNameNr)
9581
268
                    break;
9582
23.8k
          xmlParseElementEnd(ctxt);
9583
630k
            } else {
9584
630k
          xmlParseElementStart(ctxt);
9585
630k
            }
9586
654k
  }
9587
9588
  /*
9589
   * Fifth case : a reference. If if has not been resolved,
9590
   *    parsing returns it's Name, create the node
9591
   */
9592
9593
3.06M
  else if (*cur == '&') {
9594
117k
      xmlParseReference(ctxt);
9595
117k
  }
9596
9597
  /*
9598
   * Last case, text. Note that References are handled directly.
9599
   */
9600
2.94M
  else {
9601
2.94M
      xmlParseCharDataInternal(ctxt, 0);
9602
2.94M
  }
9603
9604
3.81M
  SHRINK;
9605
3.81M
  GROW;
9606
3.81M
    }
9607
9608
8.36k
    if ((ctxt->nameNr > oldNameNr) &&
9609
8.36k
        (ctxt->input->cur >= ctxt->input->end) &&
9610
8.36k
        (ctxt->wellFormed)) {
9611
281
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9612
281
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9613
281
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9614
281
                "Premature end of data in tag %s line %d\n",
9615
281
                name, line, NULL);
9616
281
    }
9617
9618
    /*
9619
     * Clean up in error case
9620
     */
9621
9622
61.6k
    while (ctxt->nodeNr > oldNodeNr)
9623
53.3k
        nodePop(ctxt);
9624
9625
217k
    while (ctxt->nameNr > oldNameNr) {
9626
208k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9627
9628
208k
        if (tag->nsNr != 0)
9629
89.9k
            xmlParserNsPop(ctxt, tag->nsNr);
9630
9631
208k
        namePop(ctxt);
9632
208k
    }
9633
9634
217k
    while (ctxt->spaceNr > oldSpaceNr)
9635
208k
        spacePop(ctxt);
9636
8.36k
}
9637
9638
/**
9639
 * Parse XML element content. This is useful if you're only interested
9640
 * in custom SAX callbacks. If you want a node list, use
9641
 * #xmlCtxtParseContent.
9642
 *
9643
 * @param ctxt  an XML parser context
9644
 */
9645
void
9646
0
xmlParseContent(xmlParserCtxt *ctxt) {
9647
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9648
0
        return;
9649
9650
0
    xmlCtxtInitializeLate(ctxt);
9651
9652
0
    xmlParseContentInternal(ctxt);
9653
9654
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9655
0
}
9656
9657
/**
9658
 * Parse an XML element
9659
 *
9660
 * @deprecated Internal function, don't use.
9661
 *
9662
 *     [39] element ::= EmptyElemTag | STag content ETag
9663
 *
9664
 * [ WFC: Element Type Match ]
9665
 * The Name in an element's end-tag must match the element type in the
9666
 * start-tag.
9667
 *
9668
 * @param ctxt  an XML parser context
9669
 */
9670
9671
void
9672
11.6k
xmlParseElement(xmlParserCtxt *ctxt) {
9673
11.6k
    if (xmlParseElementStart(ctxt) != 0)
9674
4.90k
        return;
9675
9676
6.74k
    xmlParseContentInternal(ctxt);
9677
9678
6.74k
    if (ctxt->input->cur >= ctxt->input->end) {
9679
6.30k
        if (ctxt->wellFormed) {
9680
656
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9681
656
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9682
656
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9683
656
                    "Premature end of data in tag %s line %d\n",
9684
656
                    name, line, NULL);
9685
656
        }
9686
6.30k
        return;
9687
6.30k
    }
9688
9689
446
    xmlParseElementEnd(ctxt);
9690
446
}
9691
9692
/**
9693
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9694
 * opening tag was parsed, 1 if an empty element was parsed.
9695
 *
9696
 * Always consumes '<'.
9697
 *
9698
 * @param ctxt  an XML parser context
9699
 */
9700
static int
9701
642k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9702
642k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9703
642k
    const xmlChar *name;
9704
642k
    const xmlChar *prefix = NULL;
9705
642k
    const xmlChar *URI = NULL;
9706
642k
    xmlParserNodeInfo node_info;
9707
642k
    int line;
9708
642k
    xmlNodePtr cur;
9709
642k
    int nbNs = 0;
9710
9711
642k
    if (ctxt->nameNr > maxDepth) {
9712
10
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9713
10
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9714
10
                ctxt->nameNr);
9715
10
  return(-1);
9716
10
    }
9717
9718
    /* Capture start position */
9719
642k
    if (ctxt->record_info) {
9720
0
        node_info.begin_pos = ctxt->input->consumed +
9721
0
                          (CUR_PTR - ctxt->input->base);
9722
0
  node_info.begin_line = ctxt->input->line;
9723
0
    }
9724
9725
642k
    if (ctxt->spaceNr == 0)
9726
0
  spacePush(ctxt, -1);
9727
642k
    else if (*ctxt->space == -2)
9728
0
  spacePush(ctxt, -1);
9729
642k
    else
9730
642k
  spacePush(ctxt, *ctxt->space);
9731
9732
642k
    line = ctxt->input->line;
9733
642k
#ifdef LIBXML_SAX1_ENABLED
9734
642k
    if (ctxt->sax2)
9735
642k
#endif /* LIBXML_SAX1_ENABLED */
9736
642k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9737
0
#ifdef LIBXML_SAX1_ENABLED
9738
0
    else
9739
0
  name = xmlParseStartTag(ctxt);
9740
642k
#endif /* LIBXML_SAX1_ENABLED */
9741
642k
    if (name == NULL) {
9742
215k
  spacePop(ctxt);
9743
215k
        return(-1);
9744
215k
    }
9745
426k
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9746
426k
    cur = ctxt->node;
9747
9748
426k
#ifdef LIBXML_VALID_ENABLED
9749
    /*
9750
     * [ VC: Root Element Type ]
9751
     * The Name in the document type declaration must match the element
9752
     * type of the root element.
9753
     */
9754
426k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9755
426k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9756
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9757
426k
#endif /* LIBXML_VALID_ENABLED */
9758
9759
    /*
9760
     * Check for an Empty Element.
9761
     */
9762
426k
    if ((RAW == '/') && (NXT(1) == '>')) {
9763
20.2k
        SKIP(2);
9764
20.2k
  if (ctxt->sax2) {
9765
20.2k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9766
20.2k
    (!ctxt->disableSAX))
9767
5.53k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9768
20.2k
#ifdef LIBXML_SAX1_ENABLED
9769
20.2k
  } else {
9770
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9771
0
    (!ctxt->disableSAX))
9772
0
    ctxt->sax->endElement(ctxt->userData, name);
9773
0
#endif /* LIBXML_SAX1_ENABLED */
9774
0
  }
9775
20.2k
  namePop(ctxt);
9776
20.2k
  spacePop(ctxt);
9777
20.2k
  if (nbNs > 0)
9778
1.24k
      xmlParserNsPop(ctxt, nbNs);
9779
20.2k
  if (cur != NULL && ctxt->record_info) {
9780
0
            node_info.node = cur;
9781
0
            node_info.end_pos = ctxt->input->consumed +
9782
0
                                (CUR_PTR - ctxt->input->base);
9783
0
            node_info.end_line = ctxt->input->line;
9784
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9785
0
  }
9786
20.2k
  return(1);
9787
20.2k
    }
9788
406k
    if (RAW == '>') {
9789
239k
        NEXT1;
9790
239k
        if (cur != NULL && ctxt->record_info) {
9791
0
            node_info.node = cur;
9792
0
            node_info.end_pos = 0;
9793
0
            node_info.end_line = 0;
9794
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9795
0
        }
9796
239k
    } else {
9797
167k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9798
167k
         "Couldn't find end of Start Tag %s line %d\n",
9799
167k
                    name, line, NULL);
9800
9801
  /*
9802
   * end of parsing of this node.
9803
   */
9804
167k
  nodePop(ctxt);
9805
167k
  namePop(ctxt);
9806
167k
  spacePop(ctxt);
9807
167k
  if (nbNs > 0)
9808
77.3k
      xmlParserNsPop(ctxt, nbNs);
9809
167k
  return(-1);
9810
167k
    }
9811
9812
239k
    return(0);
9813
406k
}
9814
9815
/**
9816
 * Parse the end of an XML element. Always consumes '</'.
9817
 *
9818
 * @param ctxt  an XML parser context
9819
 */
9820
static void
9821
24.3k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9822
24.3k
    xmlNodePtr cur = ctxt->node;
9823
9824
24.3k
    if (ctxt->nameNr <= 0) {
9825
0
        if ((RAW == '<') && (NXT(1) == '/'))
9826
0
            SKIP(2);
9827
0
        return;
9828
0
    }
9829
9830
    /*
9831
     * parse the end of tag: '</' should be here.
9832
     */
9833
24.3k
    if (ctxt->sax2) {
9834
24.3k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9835
24.3k
  namePop(ctxt);
9836
24.3k
    }
9837
0
#ifdef LIBXML_SAX1_ENABLED
9838
0
    else
9839
0
  xmlParseEndTag1(ctxt, 0);
9840
24.3k
#endif /* LIBXML_SAX1_ENABLED */
9841
9842
    /*
9843
     * Capture end position
9844
     */
9845
24.3k
    if (cur != NULL && ctxt->record_info) {
9846
0
        xmlParserNodeInfoPtr node_info;
9847
9848
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9849
0
        if (node_info != NULL) {
9850
0
            node_info->end_pos = ctxt->input->consumed +
9851
0
                                 (CUR_PTR - ctxt->input->base);
9852
0
            node_info->end_line = ctxt->input->line;
9853
0
        }
9854
0
    }
9855
24.3k
}
9856
9857
/**
9858
 * Parse the XML version value.
9859
 *
9860
 * @deprecated Internal function, don't use.
9861
 *
9862
 *     [26] VersionNum ::= '1.' [0-9]+
9863
 *
9864
 * In practice allow [0-9].[0-9]+ at that level
9865
 *
9866
 * @param ctxt  an XML parser context
9867
 * @returns the string giving the XML version number, or NULL
9868
 */
9869
xmlChar *
9870
716
xmlParseVersionNum(xmlParserCtxt *ctxt) {
9871
716
    xmlChar *buf = NULL;
9872
716
    int len = 0;
9873
716
    int size = 10;
9874
716
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9875
716
                    XML_MAX_TEXT_LENGTH :
9876
716
                    XML_MAX_NAME_LENGTH;
9877
716
    xmlChar cur;
9878
9879
716
    buf = xmlMalloc(size);
9880
716
    if (buf == NULL) {
9881
0
  xmlErrMemory(ctxt);
9882
0
  return(NULL);
9883
0
    }
9884
716
    cur = CUR;
9885
716
    if (!((cur >= '0') && (cur <= '9'))) {
9886
21
  xmlFree(buf);
9887
21
  return(NULL);
9888
21
    }
9889
695
    buf[len++] = cur;
9890
695
    NEXT;
9891
695
    cur=CUR;
9892
695
    if (cur != '.') {
9893
18
  xmlFree(buf);
9894
18
  return(NULL);
9895
18
    }
9896
677
    buf[len++] = cur;
9897
677
    NEXT;
9898
677
    cur=CUR;
9899
1.48M
    while ((cur >= '0') && (cur <= '9')) {
9900
1.48M
  if (len + 1 >= size) {
9901
358
      xmlChar *tmp;
9902
358
            int newSize;
9903
9904
358
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9905
358
            if (newSize < 0) {
9906
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
9907
0
                xmlFree(buf);
9908
0
                return(NULL);
9909
0
            }
9910
358
      tmp = xmlRealloc(buf, newSize);
9911
358
      if (tmp == NULL) {
9912
0
    xmlErrMemory(ctxt);
9913
0
          xmlFree(buf);
9914
0
    return(NULL);
9915
0
      }
9916
358
      buf = tmp;
9917
358
            size = newSize;
9918
358
  }
9919
1.48M
  buf[len++] = cur;
9920
1.48M
  NEXT;
9921
1.48M
  cur=CUR;
9922
1.48M
    }
9923
677
    buf[len] = 0;
9924
677
    return(buf);
9925
677
}
9926
9927
/**
9928
 * Parse the XML version.
9929
 *
9930
 * @deprecated Internal function, don't use.
9931
 *
9932
 *     [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9933
 *
9934
 *     [25] Eq ::= S? '=' S?
9935
 *
9936
 * @param ctxt  an XML parser context
9937
 * @returns the version string, e.g. "1.0"
9938
 */
9939
9940
xmlChar *
9941
3.45k
xmlParseVersionInfo(xmlParserCtxt *ctxt) {
9942
3.45k
    xmlChar *version = NULL;
9943
9944
3.45k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9945
759
  SKIP(7);
9946
759
  SKIP_BLANKS;
9947
759
  if (RAW != '=') {
9948
15
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9949
15
      return(NULL);
9950
15
        }
9951
744
  NEXT;
9952
744
  SKIP_BLANKS;
9953
744
  if (RAW == '"') {
9954
123
      NEXT;
9955
123
      version = xmlParseVersionNum(ctxt);
9956
123
      if (RAW != '"') {
9957
33
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9958
33
      } else
9959
90
          NEXT;
9960
621
  } else if (RAW == '\''){
9961
593
      NEXT;
9962
593
      version = xmlParseVersionNum(ctxt);
9963
593
      if (RAW != '\'') {
9964
44
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9965
44
      } else
9966
549
          NEXT;
9967
593
  } else {
9968
28
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9969
28
  }
9970
744
    }
9971
3.44k
    return(version);
9972
3.45k
}
9973
9974
/**
9975
 * Parse the XML encoding name
9976
 *
9977
 * @deprecated Internal function, don't use.
9978
 *
9979
 *     [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9980
 *
9981
 * @param ctxt  an XML parser context
9982
 * @returns the encoding name value or NULL
9983
 */
9984
xmlChar *
9985
1.44k
xmlParseEncName(xmlParserCtxt *ctxt) {
9986
1.44k
    xmlChar *buf = NULL;
9987
1.44k
    int len = 0;
9988
1.44k
    int size = 10;
9989
1.44k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9990
1.44k
                    XML_MAX_TEXT_LENGTH :
9991
1.44k
                    XML_MAX_NAME_LENGTH;
9992
1.44k
    xmlChar cur;
9993
9994
1.44k
    cur = CUR;
9995
1.44k
    if (((cur >= 'a') && (cur <= 'z')) ||
9996
1.44k
        ((cur >= 'A') && (cur <= 'Z'))) {
9997
1.43k
  buf = xmlMalloc(size);
9998
1.43k
  if (buf == NULL) {
9999
0
      xmlErrMemory(ctxt);
10000
0
      return(NULL);
10001
0
  }
10002
10003
1.43k
  buf[len++] = cur;
10004
1.43k
  NEXT;
10005
1.43k
  cur = CUR;
10006
2.44M
  while (((cur >= 'a') && (cur <= 'z')) ||
10007
2.44M
         ((cur >= 'A') && (cur <= 'Z')) ||
10008
2.44M
         ((cur >= '0') && (cur <= '9')) ||
10009
2.44M
         (cur == '.') || (cur == '_') ||
10010
2.44M
         (cur == '-')) {
10011
2.44M
      if (len + 1 >= size) {
10012
1.07k
          xmlChar *tmp;
10013
1.07k
                int newSize;
10014
10015
1.07k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10016
1.07k
                if (newSize < 0) {
10017
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10018
0
                    xmlFree(buf);
10019
0
                    return(NULL);
10020
0
                }
10021
1.07k
    tmp = xmlRealloc(buf, newSize);
10022
1.07k
    if (tmp == NULL) {
10023
0
        xmlErrMemory(ctxt);
10024
0
        xmlFree(buf);
10025
0
        return(NULL);
10026
0
    }
10027
1.07k
    buf = tmp;
10028
1.07k
                size = newSize;
10029
1.07k
      }
10030
2.44M
      buf[len++] = cur;
10031
2.44M
      NEXT;
10032
2.44M
      cur = CUR;
10033
2.44M
        }
10034
1.43k
  buf[len] = 0;
10035
1.43k
    } else {
10036
14
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10037
14
    }
10038
1.44k
    return(buf);
10039
1.44k
}
10040
10041
/**
10042
 * Parse the XML encoding declaration
10043
 *
10044
 * @deprecated Internal function, don't use.
10045
 *
10046
 *     [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | 
10047
 *                           "'" EncName "'")
10048
 *
10049
 * this setups the conversion filters.
10050
 *
10051
 * @param ctxt  an XML parser context
10052
 * @returns the encoding value or NULL
10053
 */
10054
10055
const xmlChar *
10056
3.42k
xmlParseEncodingDecl(xmlParserCtxt *ctxt) {
10057
3.42k
    xmlChar *encoding = NULL;
10058
10059
3.42k
    SKIP_BLANKS;
10060
3.42k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10061
1.96k
        return(NULL);
10062
10063
1.46k
    SKIP(8);
10064
1.46k
    SKIP_BLANKS;
10065
1.46k
    if (RAW != '=') {
10066
11
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10067
11
        return(NULL);
10068
11
    }
10069
1.45k
    NEXT;
10070
1.45k
    SKIP_BLANKS;
10071
1.45k
    if (RAW == '"') {
10072
881
        NEXT;
10073
881
        encoding = xmlParseEncName(ctxt);
10074
881
        if (RAW != '"') {
10075
100
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10076
100
            xmlFree(encoding);
10077
100
            return(NULL);
10078
100
        } else
10079
781
            NEXT;
10080
881
    } else if (RAW == '\''){
10081
566
        NEXT;
10082
566
        encoding = xmlParseEncName(ctxt);
10083
566
        if (RAW != '\'') {
10084
12
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10085
12
            xmlFree(encoding);
10086
12
            return(NULL);
10087
12
        } else
10088
554
            NEXT;
10089
566
    } else {
10090
4
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10091
4
    }
10092
10093
1.33k
    if (encoding == NULL)
10094
6
        return(NULL);
10095
10096
1.33k
    xmlSetDeclaredEncoding(ctxt, encoding);
10097
10098
1.33k
    return(ctxt->encoding);
10099
1.33k
}
10100
10101
/**
10102
 * Parse the XML standalone declaration
10103
 *
10104
 * @deprecated Internal function, don't use.
10105
 *
10106
 *     [32] SDDecl ::= S 'standalone' Eq
10107
 *                     (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10108
 *
10109
 * [ VC: Standalone Document Declaration ]
10110
 * TODO The standalone document declaration must have the value "no"
10111
 * if any external markup declarations contain declarations of:
10112
 *  - attributes with default values, if elements to which these
10113
 *    attributes apply appear in the document without specifications
10114
 *    of values for these attributes, or
10115
 *  - entities (other than amp, lt, gt, apos, quot), if references
10116
 *    to those entities appear in the document, or
10117
 *  - attributes with values subject to normalization, where the
10118
 *    attribute appears in the document with a value which will change
10119
 *    as a result of normalization, or
10120
 *  - element types with element content, if white space occurs directly
10121
 *    within any instance of those types.
10122
 *
10123
 * @param ctxt  an XML parser context
10124
 * @returns
10125
 *   1 if standalone="yes"
10126
 *   0 if standalone="no"
10127
 *  -2 if standalone attribute is missing or invalid
10128
 *    (A standalone value of -2 means that the XML declaration was found,
10129
 *     but no value was specified for the standalone attribute).
10130
 */
10131
10132
int
10133
3.38k
xmlParseSDDecl(xmlParserCtxt *ctxt) {
10134
3.38k
    int standalone = -2;
10135
10136
3.38k
    SKIP_BLANKS;
10137
3.38k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10138
481
  SKIP(10);
10139
481
        SKIP_BLANKS;
10140
481
  if (RAW != '=') {
10141
4
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10142
4
      return(standalone);
10143
4
        }
10144
477
  NEXT;
10145
477
  SKIP_BLANKS;
10146
477
        if (RAW == '\''){
10147
419
      NEXT;
10148
419
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10149
407
          standalone = 0;
10150
407
                SKIP(2);
10151
407
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10152
12
                 (NXT(2) == 's')) {
10153
2
          standalone = 1;
10154
2
    SKIP(3);
10155
10
            } else {
10156
10
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10157
10
      }
10158
419
      if (RAW != '\'') {
10159
13
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10160
13
      } else
10161
406
          NEXT;
10162
419
  } else if (RAW == '"'){
10163
48
      NEXT;
10164
48
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10165
2
          standalone = 0;
10166
2
    SKIP(2);
10167
46
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10168
46
                 (NXT(2) == 's')) {
10169
33
          standalone = 1;
10170
33
                SKIP(3);
10171
33
            } else {
10172
13
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10173
13
      }
10174
48
      if (RAW != '"') {
10175
21
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10176
21
      } else
10177
27
          NEXT;
10178
48
  } else {
10179
10
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10180
10
        }
10181
477
    }
10182
3.38k
    return(standalone);
10183
3.38k
}
10184
10185
/**
10186
 * Parse an XML declaration header
10187
 *
10188
 * @deprecated Internal function, don't use.
10189
 *
10190
 *     [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10191
 * @param ctxt  an XML parser context
10192
 */
10193
10194
void
10195
3.45k
xmlParseXMLDecl(xmlParserCtxt *ctxt) {
10196
3.45k
    xmlChar *version;
10197
10198
    /*
10199
     * This value for standalone indicates that the document has an
10200
     * XML declaration but it does not have a standalone attribute.
10201
     * It will be overwritten later if a standalone attribute is found.
10202
     */
10203
10204
3.45k
    ctxt->standalone = -2;
10205
10206
    /*
10207
     * We know that '<?xml' is here.
10208
     */
10209
3.45k
    SKIP(5);
10210
10211
3.45k
    if (!IS_BLANK_CH(RAW)) {
10212
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10213
0
                 "Blank needed after '<?xml'\n");
10214
0
    }
10215
3.45k
    SKIP_BLANKS;
10216
10217
    /*
10218
     * We must have the VersionInfo here.
10219
     */
10220
3.45k
    version = xmlParseVersionInfo(ctxt);
10221
3.45k
    if (version == NULL) {
10222
2.78k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10223
2.78k
    } else {
10224
677
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10225
      /*
10226
       * Changed here for XML-1.0 5th edition
10227
       */
10228
348
      if (ctxt->options & XML_PARSE_OLD10) {
10229
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10230
0
                "Unsupported version '%s'\n",
10231
0
                version);
10232
348
      } else {
10233
348
          if ((version[0] == '1') && ((version[1] == '.'))) {
10234
315
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10235
315
                      "Unsupported version '%s'\n",
10236
315
          version, NULL);
10237
315
    } else {
10238
33
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10239
33
              "Unsupported version '%s'\n",
10240
33
              version);
10241
33
    }
10242
348
      }
10243
348
  }
10244
677
  if (ctxt->version != NULL)
10245
0
      xmlFree(ctxt->version);
10246
677
  ctxt->version = version;
10247
677
    }
10248
10249
    /*
10250
     * We may have the encoding declaration
10251
     */
10252
3.45k
    if (!IS_BLANK_CH(RAW)) {
10253
2.81k
        if ((RAW == '?') && (NXT(1) == '>')) {
10254
34
      SKIP(2);
10255
34
      return;
10256
34
  }
10257
2.78k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10258
2.78k
    }
10259
3.42k
    xmlParseEncodingDecl(ctxt);
10260
10261
    /*
10262
     * We may have the standalone status.
10263
     */
10264
3.42k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10265
814
        if ((RAW == '?') && (NXT(1) == '>')) {
10266
38
      SKIP(2);
10267
38
      return;
10268
38
  }
10269
776
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10270
776
    }
10271
10272
    /*
10273
     * We can grow the input buffer freely at that point
10274
     */
10275
3.38k
    GROW;
10276
10277
3.38k
    SKIP_BLANKS;
10278
3.38k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10279
10280
3.38k
    SKIP_BLANKS;
10281
3.38k
    if ((RAW == '?') && (NXT(1) == '>')) {
10282
453
        SKIP(2);
10283
2.93k
    } else if (RAW == '>') {
10284
        /* Deprecated old WD ... */
10285
926
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10286
926
  NEXT;
10287
2.00k
    } else {
10288
2.00k
        int c;
10289
10290
2.00k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10291
359k
        while ((PARSER_STOPPED(ctxt) == 0) &&
10292
359k
               ((c = CUR) != 0)) {
10293
358k
            NEXT;
10294
358k
            if (c == '>')
10295
1.43k
                break;
10296
358k
        }
10297
2.00k
    }
10298
3.38k
}
10299
10300
/**
10301
 * @since 2.14.0
10302
 *
10303
 * @param ctxt  parser context
10304
 * @returns the version from the XML declaration.
10305
 */
10306
const xmlChar *
10307
0
xmlCtxtGetVersion(xmlParserCtxt *ctxt) {
10308
0
    if (ctxt == NULL)
10309
0
        return(NULL);
10310
10311
0
    return(ctxt->version);
10312
0
}
10313
10314
/**
10315
 * @since 2.14.0
10316
 *
10317
 * @param ctxt  parser context
10318
 * @returns the value from the standalone document declaration.
10319
 */
10320
int
10321
0
xmlCtxtGetStandalone(xmlParserCtxt *ctxt) {
10322
0
    if (ctxt == NULL)
10323
0
        return(0);
10324
10325
0
    return(ctxt->standalone);
10326
0
}
10327
10328
/**
10329
 * Parse an XML Misc* optional field.
10330
 *
10331
 * @deprecated Internal function, don't use.
10332
 *
10333
 *     [27] Misc ::= Comment | PI |  S
10334
 * @param ctxt  an XML parser context
10335
 */
10336
10337
void
10338
37.4k
xmlParseMisc(xmlParserCtxt *ctxt) {
10339
41.0k
    while (PARSER_STOPPED(ctxt) == 0) {
10340
40.8k
        SKIP_BLANKS;
10341
40.8k
        GROW;
10342
40.8k
        if ((RAW == '<') && (NXT(1) == '?')) {
10343
2.55k
      xmlParsePI(ctxt);
10344
38.2k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10345
1.06k
      xmlParseComment(ctxt);
10346
37.1k
        } else {
10347
37.1k
            break;
10348
37.1k
        }
10349
40.8k
    }
10350
37.4k
}
10351
10352
static void
10353
18.0k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10354
18.0k
    xmlDocPtr doc;
10355
10356
    /*
10357
     * SAX: end of the document processing.
10358
     */
10359
18.0k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10360
18.0k
        ctxt->sax->endDocument(ctxt->userData);
10361
10362
    /*
10363
     * Remove locally kept entity definitions if the tree was not built
10364
     */
10365
18.0k
    doc = ctxt->myDoc;
10366
18.0k
    if ((doc != NULL) &&
10367
18.0k
        (xmlStrEqual(doc->version, SAX_COMPAT_MODE))) {
10368
1.62k
        xmlFreeDoc(doc);
10369
1.62k
        ctxt->myDoc = NULL;
10370
1.62k
    }
10371
18.0k
}
10372
10373
/**
10374
 * Parse an XML document and invoke the SAX handlers. This is useful
10375
 * if you're only interested in custom SAX callbacks. If you want a
10376
 * document tree, use #xmlCtxtParseDocument.
10377
 *
10378
 * @param ctxt  an XML parser context
10379
 * @returns 0, -1 in case of error.
10380
 */
10381
10382
int
10383
18.0k
xmlParseDocument(xmlParserCtxt *ctxt) {
10384
18.0k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10385
0
        return(-1);
10386
10387
18.0k
    GROW;
10388
10389
    /*
10390
     * SAX: detecting the level.
10391
     */
10392
18.0k
    xmlCtxtInitializeLate(ctxt);
10393
10394
18.0k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10395
18.0k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10396
18.0k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10397
18.0k
    }
10398
10399
18.0k
    xmlDetectEncoding(ctxt);
10400
10401
18.0k
    if (CUR == 0) {
10402
51
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10403
51
  return(-1);
10404
51
    }
10405
10406
18.0k
    GROW;
10407
18.0k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10408
10409
  /*
10410
   * Note that we will switch encoding on the fly.
10411
   */
10412
3.45k
  xmlParseXMLDecl(ctxt);
10413
3.45k
  SKIP_BLANKS;
10414
14.5k
    } else {
10415
14.5k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10416
14.5k
        if (ctxt->version == NULL) {
10417
0
            xmlErrMemory(ctxt);
10418
0
            return(-1);
10419
0
        }
10420
14.5k
    }
10421
18.0k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10422
14.9k
        ctxt->sax->startDocument(ctxt->userData);
10423
18.0k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10424
18.0k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10425
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10426
0
    }
10427
10428
    /*
10429
     * The Misc part of the Prolog
10430
     */
10431
18.0k
    xmlParseMisc(ctxt);
10432
10433
    /*
10434
     * Then possibly doc type declaration(s) and more Misc
10435
     * (doctypedecl Misc*)?
10436
     */
10437
18.0k
    GROW;
10438
18.0k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10439
10440
7.78k
  ctxt->inSubset = 1;
10441
7.78k
  xmlParseDocTypeDecl(ctxt);
10442
7.78k
  if (RAW == '[') {
10443
7.43k
      xmlParseInternalSubset(ctxt);
10444
7.43k
  } else if (RAW == '>') {
10445
123
            NEXT;
10446
123
        }
10447
10448
  /*
10449
   * Create and update the external subset.
10450
   */
10451
7.78k
  ctxt->inSubset = 2;
10452
7.78k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10453
7.78k
      (!ctxt->disableSAX))
10454
1.16k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10455
1.16k
                                ctxt->extSubSystem, ctxt->extSubURI);
10456
7.78k
  ctxt->inSubset = 0;
10457
10458
7.78k
        xmlCleanSpecialAttr(ctxt);
10459
10460
7.78k
  xmlParseMisc(ctxt);
10461
7.78k
    }
10462
10463
    /*
10464
     * Time to start parsing the tree itself
10465
     */
10466
18.0k
    GROW;
10467
18.0k
    if (RAW != '<') {
10468
6.36k
        if (ctxt->wellFormed)
10469
387
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10470
387
                           "Start tag expected, '<' not found\n");
10471
11.6k
    } else {
10472
11.6k
  xmlParseElement(ctxt);
10473
10474
  /*
10475
   * The Misc part at the end
10476
   */
10477
11.6k
  xmlParseMisc(ctxt);
10478
10479
11.6k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10480
11.6k
    }
10481
10482
18.0k
    ctxt->instate = XML_PARSER_EOF;
10483
18.0k
    xmlFinishDocument(ctxt);
10484
10485
18.0k
    if (! ctxt->wellFormed) {
10486
17.9k
  ctxt->valid = 0;
10487
17.9k
  return(-1);
10488
17.9k
    }
10489
10490
49
    return(0);
10491
18.0k
}
10492
10493
/**
10494
 * Parse a general parsed entity
10495
 * An external general parsed entity is well-formed if it matches the
10496
 * production labeled extParsedEnt.
10497
 *
10498
 * @deprecated Internal function, don't use.
10499
 *
10500
 *     [78] extParsedEnt ::= TextDecl? content
10501
 *
10502
 * @param ctxt  an XML parser context
10503
 * @returns 0, -1 in case of error. the parser context is augmented
10504
 *                as a result of the parsing.
10505
 */
10506
10507
int
10508
0
xmlParseExtParsedEnt(xmlParserCtxt *ctxt) {
10509
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10510
0
        return(-1);
10511
10512
0
    xmlCtxtInitializeLate(ctxt);
10513
10514
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10515
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10516
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10517
0
    }
10518
10519
0
    xmlDetectEncoding(ctxt);
10520
10521
0
    if (CUR == 0) {
10522
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10523
0
    }
10524
10525
    /*
10526
     * Check for the XMLDecl in the Prolog.
10527
     */
10528
0
    GROW;
10529
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10530
10531
  /*
10532
   * Note that we will switch encoding on the fly.
10533
   */
10534
0
  xmlParseXMLDecl(ctxt);
10535
0
  SKIP_BLANKS;
10536
0
    } else {
10537
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10538
0
    }
10539
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10540
0
        ctxt->sax->startDocument(ctxt->userData);
10541
10542
    /*
10543
     * Doing validity checking on chunk doesn't make sense
10544
     */
10545
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10546
0
    ctxt->validate = 0;
10547
0
    ctxt->depth = 0;
10548
10549
0
    xmlParseContentInternal(ctxt);
10550
10551
0
    if (ctxt->input->cur < ctxt->input->end)
10552
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10553
10554
    /*
10555
     * SAX: end of the document processing.
10556
     */
10557
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10558
0
        ctxt->sax->endDocument(ctxt->userData);
10559
10560
0
    if (! ctxt->wellFormed) return(-1);
10561
0
    return(0);
10562
0
}
10563
10564
#ifdef LIBXML_PUSH_ENABLED
10565
/************************************************************************
10566
 *                  *
10567
 *    Progressive parsing interfaces        *
10568
 *                  *
10569
 ************************************************************************/
10570
10571
/**
10572
 * Check whether the input buffer contains a character.
10573
 *
10574
 * @param ctxt  an XML parser context
10575
 * @param c  character
10576
 */
10577
static int
10578
0
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10579
0
    const xmlChar *cur;
10580
10581
0
    if (ctxt->checkIndex == 0) {
10582
0
        cur = ctxt->input->cur + 1;
10583
0
    } else {
10584
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10585
0
    }
10586
10587
0
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10588
0
        size_t index = ctxt->input->end - ctxt->input->cur;
10589
10590
0
        if (index > LONG_MAX) {
10591
0
            ctxt->checkIndex = 0;
10592
0
            return(1);
10593
0
        }
10594
0
        ctxt->checkIndex = index;
10595
0
        return(0);
10596
0
    } else {
10597
0
        ctxt->checkIndex = 0;
10598
0
        return(1);
10599
0
    }
10600
0
}
10601
10602
/**
10603
 * Check whether the input buffer contains a string.
10604
 *
10605
 * @param ctxt  an XML parser context
10606
 * @param startDelta  delta to apply at the start
10607
 * @param str  string
10608
 * @param strLen  length of string
10609
 */
10610
static const xmlChar *
10611
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10612
0
                     const char *str, size_t strLen) {
10613
0
    const xmlChar *cur, *term;
10614
10615
0
    if (ctxt->checkIndex == 0) {
10616
0
        cur = ctxt->input->cur + startDelta;
10617
0
    } else {
10618
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10619
0
    }
10620
10621
0
    term = BAD_CAST strstr((const char *) cur, str);
10622
0
    if (term == NULL) {
10623
0
        const xmlChar *end = ctxt->input->end;
10624
0
        size_t index;
10625
10626
        /* Rescan (strLen - 1) characters. */
10627
0
        if ((size_t) (end - cur) < strLen)
10628
0
            end = cur;
10629
0
        else
10630
0
            end -= strLen - 1;
10631
0
        index = end - ctxt->input->cur;
10632
0
        if (index > LONG_MAX) {
10633
0
            ctxt->checkIndex = 0;
10634
0
            return(ctxt->input->end - strLen);
10635
0
        }
10636
0
        ctxt->checkIndex = index;
10637
0
    } else {
10638
0
        ctxt->checkIndex = 0;
10639
0
    }
10640
10641
0
    return(term);
10642
0
}
10643
10644
/**
10645
 * Check whether the input buffer contains terminated char data.
10646
 *
10647
 * @param ctxt  an XML parser context
10648
 */
10649
static int
10650
0
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10651
0
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10652
0
    const xmlChar *end = ctxt->input->end;
10653
0
    size_t index;
10654
10655
0
    while (cur < end) {
10656
0
        if ((*cur == '<') || (*cur == '&')) {
10657
0
            ctxt->checkIndex = 0;
10658
0
            return(1);
10659
0
        }
10660
0
        cur++;
10661
0
    }
10662
10663
0
    index = cur - ctxt->input->cur;
10664
0
    if (index > LONG_MAX) {
10665
0
        ctxt->checkIndex = 0;
10666
0
        return(1);
10667
0
    }
10668
0
    ctxt->checkIndex = index;
10669
0
    return(0);
10670
0
}
10671
10672
/**
10673
 * Check whether there's enough data in the input buffer to finish parsing
10674
 * a start tag. This has to take quotes into account.
10675
 *
10676
 * @param ctxt  an XML parser context
10677
 */
10678
static int
10679
0
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10680
0
    const xmlChar *cur;
10681
0
    const xmlChar *end = ctxt->input->end;
10682
0
    int state = ctxt->endCheckState;
10683
0
    size_t index;
10684
10685
0
    if (ctxt->checkIndex == 0)
10686
0
        cur = ctxt->input->cur + 1;
10687
0
    else
10688
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10689
10690
0
    while (cur < end) {
10691
0
        if (state) {
10692
0
            if (*cur == state)
10693
0
                state = 0;
10694
0
        } else if (*cur == '\'' || *cur == '"') {
10695
0
            state = *cur;
10696
0
        } else if (*cur == '>') {
10697
0
            ctxt->checkIndex = 0;
10698
0
            ctxt->endCheckState = 0;
10699
0
            return(1);
10700
0
        }
10701
0
        cur++;
10702
0
    }
10703
10704
0
    index = cur - ctxt->input->cur;
10705
0
    if (index > LONG_MAX) {
10706
0
        ctxt->checkIndex = 0;
10707
0
        ctxt->endCheckState = 0;
10708
0
        return(1);
10709
0
    }
10710
0
    ctxt->checkIndex = index;
10711
0
    ctxt->endCheckState = state;
10712
0
    return(0);
10713
0
}
10714
10715
/**
10716
 * Check whether there's enough data in the input buffer to finish parsing
10717
 * the internal subset.
10718
 *
10719
 * @param ctxt  an XML parser context
10720
 */
10721
static int
10722
0
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10723
    /*
10724
     * Sorry, but progressive parsing of the internal subset is not
10725
     * supported. We first check that the full content of the internal
10726
     * subset is available and parsing is launched only at that point.
10727
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10728
     * not in a ']]>' sequence which are conditional sections.
10729
     */
10730
0
    const xmlChar *cur, *start;
10731
0
    const xmlChar *end = ctxt->input->end;
10732
0
    int state = ctxt->endCheckState;
10733
0
    size_t index;
10734
10735
0
    if (ctxt->checkIndex == 0) {
10736
0
        cur = ctxt->input->cur + 1;
10737
0
    } else {
10738
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10739
0
    }
10740
0
    start = cur;
10741
10742
0
    while (cur < end) {
10743
0
        if (state == '-') {
10744
0
            if ((*cur == '-') &&
10745
0
                (cur[1] == '-') &&
10746
0
                (cur[2] == '>')) {
10747
0
                state = 0;
10748
0
                cur += 3;
10749
0
                start = cur;
10750
0
                continue;
10751
0
            }
10752
0
        }
10753
0
        else if (state == ']') {
10754
0
            if (*cur == '>') {
10755
0
                ctxt->checkIndex = 0;
10756
0
                ctxt->endCheckState = 0;
10757
0
                return(1);
10758
0
            }
10759
0
            if (IS_BLANK_CH(*cur)) {
10760
0
                state = ' ';
10761
0
            } else if (*cur != ']') {
10762
0
                state = 0;
10763
0
                start = cur;
10764
0
                continue;
10765
0
            }
10766
0
        }
10767
0
        else if (state == ' ') {
10768
0
            if (*cur == '>') {
10769
0
                ctxt->checkIndex = 0;
10770
0
                ctxt->endCheckState = 0;
10771
0
                return(1);
10772
0
            }
10773
0
            if (!IS_BLANK_CH(*cur)) {
10774
0
                state = 0;
10775
0
                start = cur;
10776
0
                continue;
10777
0
            }
10778
0
        }
10779
0
        else if (state != 0) {
10780
0
            if (*cur == state) {
10781
0
                state = 0;
10782
0
                start = cur + 1;
10783
0
            }
10784
0
        }
10785
0
        else if (*cur == '<') {
10786
0
            if ((cur[1] == '!') &&
10787
0
                (cur[2] == '-') &&
10788
0
                (cur[3] == '-')) {
10789
0
                state = '-';
10790
0
                cur += 4;
10791
                /* Don't treat <!--> as comment */
10792
0
                start = cur;
10793
0
                continue;
10794
0
            }
10795
0
        }
10796
0
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10797
0
            state = *cur;
10798
0
        }
10799
10800
0
        cur++;
10801
0
    }
10802
10803
    /*
10804
     * Rescan the three last characters to detect "<!--" and "-->"
10805
     * split across chunks.
10806
     */
10807
0
    if ((state == 0) || (state == '-')) {
10808
0
        if (cur - start < 3)
10809
0
            cur = start;
10810
0
        else
10811
0
            cur -= 3;
10812
0
    }
10813
0
    index = cur - ctxt->input->cur;
10814
0
    if (index > LONG_MAX) {
10815
0
        ctxt->checkIndex = 0;
10816
0
        ctxt->endCheckState = 0;
10817
0
        return(1);
10818
0
    }
10819
0
    ctxt->checkIndex = index;
10820
0
    ctxt->endCheckState = state;
10821
0
    return(0);
10822
0
}
10823
10824
/**
10825
 * Try to progress on parsing
10826
 *
10827
 * @param ctxt  an XML parser context
10828
 * @param terminate  last chunk indicator
10829
 * @returns zero if no parsing was possible
10830
 */
10831
static int
10832
0
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10833
0
    int ret = 0;
10834
0
    size_t avail;
10835
0
    xmlChar cur, next;
10836
10837
0
    if (ctxt->input == NULL)
10838
0
        return(0);
10839
10840
0
    if ((ctxt->input != NULL) &&
10841
0
        (ctxt->input->cur - ctxt->input->base > 4096)) {
10842
0
        xmlParserShrink(ctxt);
10843
0
    }
10844
10845
0
    while (ctxt->disableSAX == 0) {
10846
0
        avail = ctxt->input->end - ctxt->input->cur;
10847
0
        if (avail < 1)
10848
0
      goto done;
10849
0
        switch (ctxt->instate) {
10850
0
            case XML_PARSER_EOF:
10851
          /*
10852
     * Document parsing is done !
10853
     */
10854
0
          goto done;
10855
0
            case XML_PARSER_START:
10856
                /*
10857
                 * Very first chars read from the document flow.
10858
                 */
10859
0
                if ((!terminate) && (avail < 4))
10860
0
                    goto done;
10861
10862
                /*
10863
                 * We need more bytes to detect EBCDIC code pages.
10864
                 * See xmlDetectEBCDIC.
10865
                 */
10866
0
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
10867
0
                    (!terminate) && (avail < 200))
10868
0
                    goto done;
10869
10870
0
                xmlDetectEncoding(ctxt);
10871
0
                ctxt->instate = XML_PARSER_XML_DECL;
10872
0
    break;
10873
10874
0
            case XML_PARSER_XML_DECL:
10875
0
    if ((!terminate) && (avail < 2))
10876
0
        goto done;
10877
0
    cur = ctxt->input->cur[0];
10878
0
    next = ctxt->input->cur[1];
10879
0
          if ((cur == '<') && (next == '?')) {
10880
        /* PI or XML decl */
10881
0
        if ((!terminate) &&
10882
0
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
10883
0
      goto done;
10884
0
        if ((ctxt->input->cur[2] == 'x') &&
10885
0
      (ctxt->input->cur[3] == 'm') &&
10886
0
      (ctxt->input->cur[4] == 'l') &&
10887
0
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
10888
0
      ret += 5;
10889
0
      xmlParseXMLDecl(ctxt);
10890
0
        } else {
10891
0
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10892
0
                        if (ctxt->version == NULL) {
10893
0
                            xmlErrMemory(ctxt);
10894
0
                            break;
10895
0
                        }
10896
0
        }
10897
0
    } else {
10898
0
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10899
0
        if (ctxt->version == NULL) {
10900
0
            xmlErrMemory(ctxt);
10901
0
      break;
10902
0
        }
10903
0
    }
10904
0
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10905
0
                    ctxt->sax->setDocumentLocator(ctxt->userData,
10906
0
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
10907
0
                }
10908
0
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10909
0
                    (!ctxt->disableSAX))
10910
0
                    ctxt->sax->startDocument(ctxt->userData);
10911
0
                ctxt->instate = XML_PARSER_MISC;
10912
0
    break;
10913
0
            case XML_PARSER_START_TAG: {
10914
0
          const xmlChar *name;
10915
0
    const xmlChar *prefix = NULL;
10916
0
    const xmlChar *URI = NULL;
10917
0
                int line = ctxt->input->line;
10918
0
    int nbNs = 0;
10919
10920
0
    if ((!terminate) && (avail < 2))
10921
0
        goto done;
10922
0
    cur = ctxt->input->cur[0];
10923
0
          if (cur != '<') {
10924
0
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10925
0
                                   "Start tag expected, '<' not found");
10926
0
                    ctxt->instate = XML_PARSER_EOF;
10927
0
                    xmlFinishDocument(ctxt);
10928
0
        goto done;
10929
0
    }
10930
0
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
10931
0
                    goto done;
10932
0
    if (ctxt->spaceNr == 0)
10933
0
        spacePush(ctxt, -1);
10934
0
    else if (*ctxt->space == -2)
10935
0
        spacePush(ctxt, -1);
10936
0
    else
10937
0
        spacePush(ctxt, *ctxt->space);
10938
0
#ifdef LIBXML_SAX1_ENABLED
10939
0
    if (ctxt->sax2)
10940
0
#endif /* LIBXML_SAX1_ENABLED */
10941
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10942
0
#ifdef LIBXML_SAX1_ENABLED
10943
0
    else
10944
0
        name = xmlParseStartTag(ctxt);
10945
0
#endif /* LIBXML_SAX1_ENABLED */
10946
0
    if (name == NULL) {
10947
0
        spacePop(ctxt);
10948
0
                    ctxt->instate = XML_PARSER_EOF;
10949
0
                    xmlFinishDocument(ctxt);
10950
0
        goto done;
10951
0
    }
10952
0
#ifdef LIBXML_VALID_ENABLED
10953
    /*
10954
     * [ VC: Root Element Type ]
10955
     * The Name in the document type declaration must match
10956
     * the element type of the root element.
10957
     */
10958
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10959
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10960
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10961
0
#endif /* LIBXML_VALID_ENABLED */
10962
10963
    /*
10964
     * Check for an Empty Element.
10965
     */
10966
0
    if ((RAW == '/') && (NXT(1) == '>')) {
10967
0
        SKIP(2);
10968
10969
0
        if (ctxt->sax2) {
10970
0
      if ((ctxt->sax != NULL) &&
10971
0
          (ctxt->sax->endElementNs != NULL) &&
10972
0
          (!ctxt->disableSAX))
10973
0
          ctxt->sax->endElementNs(ctxt->userData, name,
10974
0
                                  prefix, URI);
10975
0
      if (nbNs > 0)
10976
0
          xmlParserNsPop(ctxt, nbNs);
10977
0
#ifdef LIBXML_SAX1_ENABLED
10978
0
        } else {
10979
0
      if ((ctxt->sax != NULL) &&
10980
0
          (ctxt->sax->endElement != NULL) &&
10981
0
          (!ctxt->disableSAX))
10982
0
          ctxt->sax->endElement(ctxt->userData, name);
10983
0
#endif /* LIBXML_SAX1_ENABLED */
10984
0
        }
10985
0
        spacePop(ctxt);
10986
0
    } else if (RAW == '>') {
10987
0
        NEXT;
10988
0
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
10989
0
    } else {
10990
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10991
0
           "Couldn't find end of Start Tag %s\n",
10992
0
           name);
10993
0
        nodePop(ctxt);
10994
0
        spacePop(ctxt);
10995
0
                    if (nbNs > 0)
10996
0
                        xmlParserNsPop(ctxt, nbNs);
10997
0
    }
10998
10999
0
                if (ctxt->nameNr == 0)
11000
0
                    ctxt->instate = XML_PARSER_EPILOG;
11001
0
                else
11002
0
                    ctxt->instate = XML_PARSER_CONTENT;
11003
0
                break;
11004
0
      }
11005
0
            case XML_PARSER_CONTENT: {
11006
0
    cur = ctxt->input->cur[0];
11007
11008
0
    if (cur == '<') {
11009
0
                    if ((!terminate) && (avail < 2))
11010
0
                        goto done;
11011
0
        next = ctxt->input->cur[1];
11012
11013
0
                    if (next == '/') {
11014
0
                        ctxt->instate = XML_PARSER_END_TAG;
11015
0
                        break;
11016
0
                    } else if (next == '?') {
11017
0
                        if ((!terminate) &&
11018
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11019
0
                            goto done;
11020
0
                        xmlParsePI(ctxt);
11021
0
                        ctxt->instate = XML_PARSER_CONTENT;
11022
0
                        break;
11023
0
                    } else if (next == '!') {
11024
0
                        if ((!terminate) && (avail < 3))
11025
0
                            goto done;
11026
0
                        next = ctxt->input->cur[2];
11027
11028
0
                        if (next == '-') {
11029
0
                            if ((!terminate) && (avail < 4))
11030
0
                                goto done;
11031
0
                            if (ctxt->input->cur[3] == '-') {
11032
0
                                if ((!terminate) &&
11033
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11034
0
                                    goto done;
11035
0
                                xmlParseComment(ctxt);
11036
0
                                ctxt->instate = XML_PARSER_CONTENT;
11037
0
                                break;
11038
0
                            }
11039
0
                        } else if (next == '[') {
11040
0
                            if ((!terminate) && (avail < 9))
11041
0
                                goto done;
11042
0
                            if ((ctxt->input->cur[2] == '[') &&
11043
0
                                (ctxt->input->cur[3] == 'C') &&
11044
0
                                (ctxt->input->cur[4] == 'D') &&
11045
0
                                (ctxt->input->cur[5] == 'A') &&
11046
0
                                (ctxt->input->cur[6] == 'T') &&
11047
0
                                (ctxt->input->cur[7] == 'A') &&
11048
0
                                (ctxt->input->cur[8] == '[')) {
11049
0
                                if ((!terminate) &&
11050
0
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11051
0
                                    goto done;
11052
0
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11053
0
                                xmlParseCDSect(ctxt);
11054
0
                                ctxt->instate = XML_PARSER_CONTENT;
11055
0
                                break;
11056
0
                            }
11057
0
                        }
11058
0
                    }
11059
0
    } else if (cur == '&') {
11060
0
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11061
0
      goto done;
11062
0
        xmlParseReference(ctxt);
11063
0
                    break;
11064
0
    } else {
11065
        /* TODO Avoid the extra copy, handle directly !!! */
11066
        /*
11067
         * Goal of the following test is:
11068
         *  - minimize calls to the SAX 'character' callback
11069
         *    when they are mergeable
11070
         *  - handle an problem for isBlank when we only parse
11071
         *    a sequence of blank chars and the next one is
11072
         *    not available to check against '<' presence.
11073
         *  - tries to homogenize the differences in SAX
11074
         *    callbacks between the push and pull versions
11075
         *    of the parser.
11076
         */
11077
0
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11078
0
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11079
0
          goto done;
11080
0
                    }
11081
0
                    ctxt->checkIndex = 0;
11082
0
        xmlParseCharDataInternal(ctxt, !terminate);
11083
0
                    break;
11084
0
    }
11085
11086
0
                ctxt->instate = XML_PARSER_START_TAG;
11087
0
    break;
11088
0
      }
11089
0
            case XML_PARSER_END_TAG:
11090
0
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11091
0
        goto done;
11092
0
    if (ctxt->sax2) {
11093
0
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11094
0
        nameNsPop(ctxt);
11095
0
    }
11096
0
#ifdef LIBXML_SAX1_ENABLED
11097
0
      else
11098
0
        xmlParseEndTag1(ctxt, 0);
11099
0
#endif /* LIBXML_SAX1_ENABLED */
11100
0
    if (ctxt->nameNr == 0) {
11101
0
        ctxt->instate = XML_PARSER_EPILOG;
11102
0
    } else {
11103
0
        ctxt->instate = XML_PARSER_CONTENT;
11104
0
    }
11105
0
    break;
11106
0
            case XML_PARSER_MISC:
11107
0
            case XML_PARSER_PROLOG:
11108
0
            case XML_PARSER_EPILOG:
11109
0
    SKIP_BLANKS;
11110
0
                avail = ctxt->input->end - ctxt->input->cur;
11111
0
    if (avail < 1)
11112
0
        goto done;
11113
0
    if (ctxt->input->cur[0] == '<') {
11114
0
                    if ((!terminate) && (avail < 2))
11115
0
                        goto done;
11116
0
                    next = ctxt->input->cur[1];
11117
0
                    if (next == '?') {
11118
0
                        if ((!terminate) &&
11119
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11120
0
                            goto done;
11121
0
                        xmlParsePI(ctxt);
11122
0
                        break;
11123
0
                    } else if (next == '!') {
11124
0
                        if ((!terminate) && (avail < 3))
11125
0
                            goto done;
11126
11127
0
                        if (ctxt->input->cur[2] == '-') {
11128
0
                            if ((!terminate) && (avail < 4))
11129
0
                                goto done;
11130
0
                            if (ctxt->input->cur[3] == '-') {
11131
0
                                if ((!terminate) &&
11132
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11133
0
                                    goto done;
11134
0
                                xmlParseComment(ctxt);
11135
0
                                break;
11136
0
                            }
11137
0
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11138
0
                            if ((!terminate) && (avail < 9))
11139
0
                                goto done;
11140
0
                            if ((ctxt->input->cur[2] == 'D') &&
11141
0
                                (ctxt->input->cur[3] == 'O') &&
11142
0
                                (ctxt->input->cur[4] == 'C') &&
11143
0
                                (ctxt->input->cur[5] == 'T') &&
11144
0
                                (ctxt->input->cur[6] == 'Y') &&
11145
0
                                (ctxt->input->cur[7] == 'P') &&
11146
0
                                (ctxt->input->cur[8] == 'E')) {
11147
0
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11148
0
                                    goto done;
11149
0
                                ctxt->inSubset = 1;
11150
0
                                xmlParseDocTypeDecl(ctxt);
11151
0
                                if (RAW == '[') {
11152
0
                                    ctxt->instate = XML_PARSER_DTD;
11153
0
                                } else {
11154
0
                                    if (RAW == '>')
11155
0
                                        NEXT;
11156
                                    /*
11157
                                     * Create and update the external subset.
11158
                                     */
11159
0
                                    ctxt->inSubset = 2;
11160
0
                                    if ((ctxt->sax != NULL) &&
11161
0
                                        (!ctxt->disableSAX) &&
11162
0
                                        (ctxt->sax->externalSubset != NULL))
11163
0
                                        ctxt->sax->externalSubset(
11164
0
                                                ctxt->userData,
11165
0
                                                ctxt->intSubName,
11166
0
                                                ctxt->extSubSystem,
11167
0
                                                ctxt->extSubURI);
11168
0
                                    ctxt->inSubset = 0;
11169
0
                                    xmlCleanSpecialAttr(ctxt);
11170
0
                                    ctxt->instate = XML_PARSER_PROLOG;
11171
0
                                }
11172
0
                                break;
11173
0
                            }
11174
0
                        }
11175
0
                    }
11176
0
                }
11177
11178
0
                if (ctxt->instate == XML_PARSER_EPILOG) {
11179
0
                    if (ctxt->errNo == XML_ERR_OK)
11180
0
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11181
0
        ctxt->instate = XML_PARSER_EOF;
11182
0
                    xmlFinishDocument(ctxt);
11183
0
                } else {
11184
0
        ctxt->instate = XML_PARSER_START_TAG;
11185
0
    }
11186
0
    break;
11187
0
            case XML_PARSER_DTD: {
11188
0
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11189
0
                    goto done;
11190
0
    xmlParseInternalSubset(ctxt);
11191
0
    ctxt->inSubset = 2;
11192
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11193
0
        (ctxt->sax->externalSubset != NULL))
11194
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11195
0
          ctxt->extSubSystem, ctxt->extSubURI);
11196
0
    ctxt->inSubset = 0;
11197
0
    xmlCleanSpecialAttr(ctxt);
11198
0
    ctxt->instate = XML_PARSER_PROLOG;
11199
0
                break;
11200
0
      }
11201
0
            default:
11202
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11203
0
      "PP: internal error\n");
11204
0
    ctxt->instate = XML_PARSER_EOF;
11205
0
    break;
11206
0
  }
11207
0
    }
11208
0
done:
11209
0
    return(ret);
11210
0
}
11211
11212
/**
11213
 * Parse a chunk of memory in push parser mode.
11214
 *
11215
 * Assumes that the parser context was initialized with
11216
 * #xmlCreatePushParserCtxt.
11217
 *
11218
 * The last chunk, which will often be empty, must be marked with
11219
 * the `terminate` flag. With the default SAX callbacks, the resulting
11220
 * document will be available in ctxt->myDoc. This pointer will not
11221
 * be freed when calling #xmlFreeParserCtxt and must be freed by the
11222
 * caller. If the document isn't well-formed, it will still be returned
11223
 * in ctxt->myDoc.
11224
 *
11225
 * As an exception, #xmlCtxtResetPush will free the document in
11226
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11227
 * the document.
11228
 *
11229
 * Since 2.14.0, #xmlCtxtGetDocument can be used to retrieve the
11230
 * result document.
11231
 *
11232
 * @param ctxt  an XML parser context
11233
 * @param chunk  chunk of memory
11234
 * @param size  size of chunk in bytes
11235
 * @param terminate  last chunk indicator
11236
 * @returns an xmlParserErrors code (0 on success).
11237
 */
11238
int
11239
xmlParseChunk(xmlParserCtxt *ctxt, const char *chunk, int size,
11240
0
              int terminate) {
11241
0
    size_t curBase;
11242
0
    size_t maxLength;
11243
0
    size_t pos;
11244
0
    int end_in_lf = 0;
11245
0
    int res;
11246
11247
0
    if ((ctxt == NULL) || (size < 0))
11248
0
        return(XML_ERR_ARGUMENT);
11249
0
    if ((chunk == NULL) && (size > 0))
11250
0
        return(XML_ERR_ARGUMENT);
11251
0
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11252
0
        return(XML_ERR_ARGUMENT);
11253
0
    if (ctxt->disableSAX != 0)
11254
0
        return(ctxt->errNo);
11255
11256
0
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11257
0
    if (ctxt->instate == XML_PARSER_START)
11258
0
        xmlCtxtInitializeLate(ctxt);
11259
0
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11260
0
        (chunk[size - 1] == '\r')) {
11261
0
  end_in_lf = 1;
11262
0
  size--;
11263
0
    }
11264
11265
    /*
11266
     * Also push an empty chunk to make sure that the raw buffer
11267
     * will be flushed if there is an encoder.
11268
     */
11269
0
    pos = ctxt->input->cur - ctxt->input->base;
11270
0
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11271
0
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11272
0
    if (res < 0) {
11273
0
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11274
0
        return(ctxt->errNo);
11275
0
    }
11276
11277
0
    xmlParseTryOrFinish(ctxt, terminate);
11278
11279
0
    curBase = ctxt->input->cur - ctxt->input->base;
11280
0
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11281
0
                XML_MAX_HUGE_LENGTH :
11282
0
                XML_MAX_LOOKUP_LIMIT;
11283
0
    if (curBase > maxLength) {
11284
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11285
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11286
0
    }
11287
11288
0
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11289
0
        return(ctxt->errNo);
11290
11291
0
    if (end_in_lf == 1) {
11292
0
  pos = ctxt->input->cur - ctxt->input->base;
11293
0
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11294
0
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11295
0
        if (res < 0) {
11296
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11297
0
            return(ctxt->errNo);
11298
0
        }
11299
0
    }
11300
0
    if (terminate) {
11301
  /*
11302
   * Check for termination
11303
   */
11304
0
        if ((ctxt->instate != XML_PARSER_EOF) &&
11305
0
            (ctxt->instate != XML_PARSER_EPILOG)) {
11306
0
            if (ctxt->nameNr > 0) {
11307
0
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11308
0
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11309
0
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11310
0
                        "Premature end of data in tag %s line %d\n",
11311
0
                        name, line, NULL);
11312
0
            } else if (ctxt->instate == XML_PARSER_START) {
11313
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11314
0
            } else {
11315
0
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11316
0
                               "Start tag expected, '<' not found\n");
11317
0
            }
11318
0
        } else {
11319
0
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11320
0
        }
11321
0
  if (ctxt->instate != XML_PARSER_EOF) {
11322
0
            ctxt->instate = XML_PARSER_EOF;
11323
0
            xmlFinishDocument(ctxt);
11324
0
  }
11325
0
    }
11326
0
    if (ctxt->wellFormed == 0)
11327
0
  return((xmlParserErrors) ctxt->errNo);
11328
0
    else
11329
0
        return(0);
11330
0
}
11331
11332
/************************************************************************
11333
 *                  *
11334
 *    I/O front end functions to the parser     *
11335
 *                  *
11336
 ************************************************************************/
11337
11338
/**
11339
 * Create a parser context for using the XML parser in push mode.
11340
 * See #xmlParseChunk.
11341
 *
11342
 * Passing an initial chunk is useless and deprecated.
11343
 *
11344
 * The push parser doesn't support recovery mode or the
11345
 * XML_PARSE_NOBLANKS option.
11346
 *
11347
 * `filename` is used as base URI to fetch external entities and for
11348
 * error reports.
11349
 *
11350
 * @param sax  a SAX handler (optional)
11351
 * @param user_data  user data for SAX callbacks (optional)
11352
 * @param chunk  initial chunk (optional, deprecated)
11353
 * @param size  size of initial chunk in bytes
11354
 * @param filename  file name or URI (optional)
11355
 * @returns the new parser context or NULL if a memory allocation
11356
 * failed.
11357
 */
11358
11359
xmlParserCtxt *
11360
xmlCreatePushParserCtxt(xmlSAXHandler *sax, void *user_data,
11361
0
                        const char *chunk, int size, const char *filename) {
11362
0
    xmlParserCtxtPtr ctxt;
11363
0
    xmlParserInputPtr input;
11364
11365
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11366
0
    if (ctxt == NULL)
11367
0
  return(NULL);
11368
11369
0
    ctxt->options &= ~XML_PARSE_NODICT;
11370
0
    ctxt->dictNames = 1;
11371
11372
0
    input = xmlNewPushInput(filename, chunk, size);
11373
0
    if (input == NULL) {
11374
0
  xmlFreeParserCtxt(ctxt);
11375
0
  return(NULL);
11376
0
    }
11377
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11378
0
        xmlFreeInputStream(input);
11379
0
        xmlFreeParserCtxt(ctxt);
11380
0
        return(NULL);
11381
0
    }
11382
11383
0
    return(ctxt);
11384
0
}
11385
#endif /* LIBXML_PUSH_ENABLED */
11386
11387
/**
11388
 * Blocks further parser processing
11389
 *
11390
 * @param ctxt  an XML parser context
11391
 */
11392
void
11393
0
xmlStopParser(xmlParserCtxt *ctxt) {
11394
0
    if (ctxt == NULL)
11395
0
        return;
11396
11397
    /* This stops the parser */
11398
0
    ctxt->disableSAX = 2;
11399
11400
    /*
11401
     * xmlStopParser is often called from error handlers,
11402
     * so we can't raise an error here to avoid infinite
11403
     * loops. Just make sure that an error condition is
11404
     * reported.
11405
     */
11406
0
    if (ctxt->errNo == XML_ERR_OK) {
11407
0
        ctxt->errNo = XML_ERR_USER_STOP;
11408
0
        ctxt->lastError.code = XML_ERR_USER_STOP;
11409
0
        ctxt->wellFormed = 0;
11410
0
    }
11411
0
}
11412
11413
/**
11414
 * Create a parser context for using the XML parser with an existing
11415
 * I/O stream
11416
 *
11417
 * @param sax  a SAX handler (optional)
11418
 * @param user_data  user data for SAX callbacks (optional)
11419
 * @param ioread  an I/O read function
11420
 * @param ioclose  an I/O close function (optional)
11421
 * @param ioctx  an I/O handler
11422
 * @param enc  the charset encoding if known (deprecated)
11423
 * @returns the new parser context or NULL
11424
 */
11425
xmlParserCtxt *
11426
xmlCreateIOParserCtxt(xmlSAXHandler *sax, void *user_data,
11427
                      xmlInputReadCallback ioread,
11428
                      xmlInputCloseCallback ioclose,
11429
0
                      void *ioctx, xmlCharEncoding enc) {
11430
0
    xmlParserCtxtPtr ctxt;
11431
0
    xmlParserInputPtr input;
11432
0
    const char *encoding;
11433
11434
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11435
0
    if (ctxt == NULL)
11436
0
  return(NULL);
11437
11438
0
    encoding = xmlGetCharEncodingName(enc);
11439
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11440
0
                                  encoding, 0);
11441
0
    if (input == NULL) {
11442
0
  xmlFreeParserCtxt(ctxt);
11443
0
        return (NULL);
11444
0
    }
11445
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11446
0
        xmlFreeInputStream(input);
11447
0
        xmlFreeParserCtxt(ctxt);
11448
0
        return(NULL);
11449
0
    }
11450
11451
0
    return(ctxt);
11452
0
}
11453
11454
#ifdef LIBXML_VALID_ENABLED
11455
/************************************************************************
11456
 *                  *
11457
 *    Front ends when parsing a DTD       *
11458
 *                  *
11459
 ************************************************************************/
11460
11461
/**
11462
 * Parse a DTD.
11463
 *
11464
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11465
 * to make external entities work.
11466
 *
11467
 * @since 2.14.0
11468
 *
11469
 * @param ctxt  a parser context
11470
 * @param input  a parser input
11471
 * @param publicId  public ID of the DTD (optional)
11472
 * @param systemId  system ID of the DTD (optional)
11473
 * @returns the resulting xmlDtd or NULL in case of error.
11474
 * `input` will be freed by the function in any case.
11475
 */
11476
xmlDtd *
11477
xmlCtxtParseDtd(xmlParserCtxt *ctxt, xmlParserInput *input,
11478
0
                const xmlChar *publicId, const xmlChar *systemId) {
11479
0
    xmlDtdPtr ret = NULL;
11480
11481
0
    if ((ctxt == NULL) || (input == NULL)) {
11482
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11483
0
        xmlFreeInputStream(input);
11484
0
        return(NULL);
11485
0
    }
11486
11487
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11488
0
        xmlFreeInputStream(input);
11489
0
        return(NULL);
11490
0
    }
11491
11492
0
    if (publicId == NULL)
11493
0
        publicId = BAD_CAST "none";
11494
0
    if (systemId == NULL)
11495
0
        systemId = BAD_CAST "none";
11496
11497
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11498
0
    if (ctxt->myDoc == NULL) {
11499
0
        xmlErrMemory(ctxt);
11500
0
        goto error;
11501
0
    }
11502
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11503
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11504
0
                                       publicId, systemId);
11505
0
    if (ctxt->myDoc->extSubset == NULL) {
11506
0
        xmlErrMemory(ctxt);
11507
0
        xmlFreeDoc(ctxt->myDoc);
11508
0
        goto error;
11509
0
    }
11510
11511
0
    xmlParseExternalSubset(ctxt, publicId, systemId);
11512
11513
0
    if (ctxt->wellFormed) {
11514
0
        ret = ctxt->myDoc->extSubset;
11515
0
        ctxt->myDoc->extSubset = NULL;
11516
0
        if (ret != NULL) {
11517
0
            xmlNodePtr tmp;
11518
11519
0
            ret->doc = NULL;
11520
0
            tmp = ret->children;
11521
0
            while (tmp != NULL) {
11522
0
                tmp->doc = NULL;
11523
0
                tmp = tmp->next;
11524
0
            }
11525
0
        }
11526
0
    } else {
11527
0
        ret = NULL;
11528
0
    }
11529
0
    xmlFreeDoc(ctxt->myDoc);
11530
0
    ctxt->myDoc = NULL;
11531
11532
0
error:
11533
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11534
11535
0
    return(ret);
11536
0
}
11537
11538
/**
11539
 * Load and parse a DTD
11540
 *
11541
 * @deprecated Use #xmlCtxtParseDtd.
11542
 *
11543
 * @param sax  the SAX handler block or NULL
11544
 * @param input  an Input Buffer
11545
 * @param enc  the charset encoding if known
11546
 * @returns the resulting xmlDtd or NULL in case of error.
11547
 * `input` will be freed by the function in any case.
11548
 */
11549
11550
xmlDtd *
11551
xmlIOParseDTD(xmlSAXHandler *sax, xmlParserInputBuffer *input,
11552
0
        xmlCharEncoding enc) {
11553
0
    xmlDtdPtr ret = NULL;
11554
0
    xmlParserCtxtPtr ctxt;
11555
0
    xmlParserInputPtr pinput = NULL;
11556
11557
0
    if (input == NULL)
11558
0
  return(NULL);
11559
11560
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11561
0
    if (ctxt == NULL) {
11562
0
        xmlFreeParserInputBuffer(input);
11563
0
  return(NULL);
11564
0
    }
11565
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11566
11567
    /*
11568
     * generate a parser input from the I/O handler
11569
     */
11570
11571
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11572
0
    if (pinput == NULL) {
11573
0
        xmlFreeParserInputBuffer(input);
11574
0
  xmlFreeParserCtxt(ctxt);
11575
0
  return(NULL);
11576
0
    }
11577
11578
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11579
0
        xmlSwitchEncoding(ctxt, enc);
11580
0
    }
11581
11582
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11583
11584
0
    xmlFreeParserCtxt(ctxt);
11585
0
    return(ret);
11586
0
}
11587
11588
/**
11589
 * Load and parse an external subset.
11590
 *
11591
 * @deprecated Use #xmlCtxtParseDtd.
11592
 *
11593
 * @param sax  the SAX handler block
11594
 * @param publicId  public identifier of the DTD (optional)
11595
 * @param systemId  system identifier (URL) of the DTD
11596
 * @returns the resulting xmlDtd or NULL in case of error.
11597
 */
11598
11599
xmlDtd *
11600
xmlSAXParseDTD(xmlSAXHandler *sax, const xmlChar *publicId,
11601
0
               const xmlChar *systemId) {
11602
0
    xmlDtdPtr ret = NULL;
11603
0
    xmlParserCtxtPtr ctxt;
11604
0
    xmlParserInputPtr input = NULL;
11605
0
    xmlChar* systemIdCanonic;
11606
11607
0
    if ((publicId == NULL) && (systemId == NULL)) return(NULL);
11608
11609
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11610
0
    if (ctxt == NULL) {
11611
0
  return(NULL);
11612
0
    }
11613
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11614
11615
    /*
11616
     * Canonicalise the system ID
11617
     */
11618
0
    systemIdCanonic = xmlCanonicPath(systemId);
11619
0
    if ((systemId != NULL) && (systemIdCanonic == NULL)) {
11620
0
  xmlFreeParserCtxt(ctxt);
11621
0
  return(NULL);
11622
0
    }
11623
11624
    /*
11625
     * Ask the Entity resolver to load the damn thing
11626
     */
11627
11628
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11629
0
  input = ctxt->sax->resolveEntity(ctxt->userData, publicId,
11630
0
                                   systemIdCanonic);
11631
0
    if (input == NULL) {
11632
0
  xmlFreeParserCtxt(ctxt);
11633
0
  if (systemIdCanonic != NULL)
11634
0
      xmlFree(systemIdCanonic);
11635
0
  return(NULL);
11636
0
    }
11637
11638
0
    if (input->filename == NULL)
11639
0
  input->filename = (char *) systemIdCanonic;
11640
0
    else
11641
0
  xmlFree(systemIdCanonic);
11642
11643
0
    ret = xmlCtxtParseDtd(ctxt, input, publicId, systemId);
11644
11645
0
    xmlFreeParserCtxt(ctxt);
11646
0
    return(ret);
11647
0
}
11648
11649
11650
/**
11651
 * Load and parse an external subset.
11652
 *
11653
 * @param publicId  public identifier of the DTD (optional)
11654
 * @param systemId  system identifier (URL) of the DTD
11655
 * @returns the resulting xmlDtd or NULL in case of error.
11656
 */
11657
11658
xmlDtd *
11659
0
xmlParseDTD(const xmlChar *publicId, const xmlChar *systemId) {
11660
0
    return(xmlSAXParseDTD(NULL, publicId, systemId));
11661
0
}
11662
#endif /* LIBXML_VALID_ENABLED */
11663
11664
/************************************************************************
11665
 *                  *
11666
 *    Front ends when parsing an Entity     *
11667
 *                  *
11668
 ************************************************************************/
11669
11670
static xmlNodePtr
11671
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11672
1.62k
                            int hasTextDecl, int buildTree) {
11673
1.62k
    xmlNodePtr root = NULL;
11674
1.62k
    xmlNodePtr list = NULL;
11675
1.62k
    xmlChar *rootName = BAD_CAST "#root";
11676
1.62k
    int result;
11677
11678
1.62k
    if (buildTree) {
11679
1.62k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11680
1.62k
        if (root == NULL) {
11681
0
            xmlErrMemory(ctxt);
11682
0
            goto error;
11683
0
        }
11684
1.62k
    }
11685
11686
1.62k
    if (xmlCtxtPushInput(ctxt, input) < 0)
11687
0
        goto error;
11688
11689
1.62k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11690
1.62k
    spacePush(ctxt, -1);
11691
11692
1.62k
    if (buildTree)
11693
1.62k
        nodePush(ctxt, root);
11694
11695
1.62k
    if (hasTextDecl) {
11696
0
        xmlDetectEncoding(ctxt);
11697
11698
        /*
11699
         * Parse a possible text declaration first
11700
         */
11701
0
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11702
0
            (IS_BLANK_CH(NXT(5)))) {
11703
0
            xmlParseTextDecl(ctxt);
11704
            /*
11705
             * An XML-1.0 document can't reference an entity not XML-1.0
11706
             */
11707
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11708
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11709
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11710
0
                               "Version mismatch between document and "
11711
0
                               "entity\n");
11712
0
            }
11713
0
        }
11714
0
    }
11715
11716
1.62k
    xmlParseContentInternal(ctxt);
11717
11718
1.62k
    if (ctxt->input->cur < ctxt->input->end)
11719
67
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11720
11721
1.62k
    if ((ctxt->wellFormed) ||
11722
1.62k
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
11723
1.43k
        if (root != NULL) {
11724
1.43k
            xmlNodePtr cur;
11725
11726
            /*
11727
             * Unlink newly created node list.
11728
             */
11729
1.43k
            list = root->children;
11730
1.43k
            root->children = NULL;
11731
1.43k
            root->last = NULL;
11732
3.09k
            for (cur = list; cur != NULL; cur = cur->next)
11733
1.66k
                cur->parent = NULL;
11734
1.43k
        }
11735
1.43k
    }
11736
11737
    /*
11738
     * Read the rest of the stream in case of errors. We want
11739
     * to account for the whole entity size.
11740
     */
11741
1.62k
    do {
11742
1.62k
        ctxt->input->cur = ctxt->input->end;
11743
1.62k
        xmlParserShrink(ctxt);
11744
1.62k
        result = xmlParserGrow(ctxt);
11745
1.62k
    } while (result > 0);
11746
11747
1.62k
    if (buildTree)
11748
1.62k
        nodePop(ctxt);
11749
11750
1.62k
    namePop(ctxt);
11751
1.62k
    spacePop(ctxt);
11752
11753
1.62k
    xmlCtxtPopInput(ctxt);
11754
11755
1.62k
error:
11756
1.62k
    xmlFreeNode(root);
11757
11758
1.62k
    return(list);
11759
1.62k
}
11760
11761
static void
11762
1.63k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11763
1.63k
    xmlParserInputPtr input;
11764
1.63k
    xmlNodePtr list;
11765
1.63k
    unsigned long consumed;
11766
1.63k
    int isExternal;
11767
1.63k
    int buildTree;
11768
1.63k
    int oldMinNsIndex;
11769
1.63k
    int oldNodelen, oldNodemem;
11770
11771
1.63k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11772
1.63k
    buildTree = (ctxt->node != NULL);
11773
11774
    /*
11775
     * Recursion check
11776
     */
11777
1.63k
    if (ent->flags & XML_ENT_EXPANDING) {
11778
15
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11779
15
        goto error;
11780
15
    }
11781
11782
    /*
11783
     * Load entity
11784
     */
11785
1.62k
    input = xmlNewEntityInputStream(ctxt, ent);
11786
1.62k
    if (input == NULL)
11787
0
        goto error;
11788
11789
    /*
11790
     * When building a tree, we need to limit the scope of namespace
11791
     * declarations, so that entities don't reference xmlNs structs
11792
     * from the parent of a reference.
11793
     */
11794
1.62k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
11795
1.62k
    if (buildTree)
11796
1.62k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
11797
11798
1.62k
    oldNodelen = ctxt->nodelen;
11799
1.62k
    oldNodemem = ctxt->nodemem;
11800
1.62k
    ctxt->nodelen = 0;
11801
1.62k
    ctxt->nodemem = 0;
11802
11803
    /*
11804
     * Parse content
11805
     *
11806
     * This initiates a recursive call chain:
11807
     *
11808
     * - xmlCtxtParseContentInternal
11809
     * - xmlParseContentInternal
11810
     * - xmlParseReference
11811
     * - xmlCtxtParseEntity
11812
     *
11813
     * The nesting depth is limited by the maximum number of inputs,
11814
     * see xmlCtxtPushInput.
11815
     *
11816
     * It's possible to make this non-recursive (minNsIndex must be
11817
     * stored in the input struct) at the expense of code readability.
11818
     */
11819
11820
1.62k
    ent->flags |= XML_ENT_EXPANDING;
11821
11822
1.62k
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
11823
11824
1.62k
    ent->flags &= ~XML_ENT_EXPANDING;
11825
11826
1.62k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
11827
1.62k
    ctxt->nodelen = oldNodelen;
11828
1.62k
    ctxt->nodemem = oldNodemem;
11829
11830
    /*
11831
     * Entity size accounting
11832
     */
11833
1.62k
    consumed = input->consumed;
11834
1.62k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
11835
11836
1.62k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
11837
983
        xmlSaturatedAdd(&ent->expandedSize, consumed);
11838
11839
1.62k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
11840
983
        if (isExternal)
11841
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
11842
11843
983
        ent->children = list;
11844
11845
2.64k
        while (list != NULL) {
11846
1.66k
            list->parent = (xmlNodePtr) ent;
11847
11848
            /*
11849
             * Downstream code like the nginx xslt module can set
11850
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
11851
             * might have a different or a NULL document.
11852
             */
11853
1.66k
            if (list->doc != ent->doc)
11854
0
                xmlSetTreeDoc(list, ent->doc);
11855
11856
1.66k
            if (list->next == NULL)
11857
740
                ent->last = list;
11858
1.66k
            list = list->next;
11859
1.66k
        }
11860
983
    } else {
11861
637
        xmlFreeNodeList(list);
11862
637
    }
11863
11864
1.62k
    xmlFreeInputStream(input);
11865
11866
1.63k
error:
11867
1.63k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
11868
1.63k
}
11869
11870
/**
11871
 * Parse an external general entity within an existing parsing context
11872
 * An external general parsed entity is well-formed if it matches the
11873
 * production labeled extParsedEnt.
11874
 *
11875
 *     [78] extParsedEnt ::= TextDecl? content
11876
 *
11877
 * @param ctxt  the existing parsing context
11878
 * @param URL  the URL for the entity to load
11879
 * @param ID  the System ID for the entity to load
11880
 * @param listOut  the return value for the set of parsed nodes
11881
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11882
 *    the parser error code otherwise
11883
 */
11884
11885
int
11886
xmlParseCtxtExternalEntity(xmlParserCtxt *ctxt, const xmlChar *URL,
11887
0
                           const xmlChar *ID, xmlNode **listOut) {
11888
0
    xmlParserInputPtr input;
11889
0
    xmlNodePtr list;
11890
11891
0
    if (listOut != NULL)
11892
0
        *listOut = NULL;
11893
11894
0
    if (ctxt == NULL)
11895
0
        return(XML_ERR_ARGUMENT);
11896
11897
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
11898
0
                            XML_RESOURCE_GENERAL_ENTITY);
11899
0
    if (input == NULL)
11900
0
        return(ctxt->errNo);
11901
11902
0
    xmlCtxtInitializeLate(ctxt);
11903
11904
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
11905
0
    if (listOut != NULL)
11906
0
        *listOut = list;
11907
0
    else
11908
0
        xmlFreeNodeList(list);
11909
11910
0
    xmlFreeInputStream(input);
11911
0
    return(ctxt->errNo);
11912
0
}
11913
11914
#ifdef LIBXML_SAX1_ENABLED
11915
/**
11916
 * Parse an external general entity
11917
 * An external general parsed entity is well-formed if it matches the
11918
 * production labeled extParsedEnt.
11919
 *
11920
 * This function uses deprecated global variables to set parser options
11921
 * which default to XML_PARSE_NODICT.
11922
 *
11923
 * @deprecated Use #xmlParseCtxtExternalEntity.
11924
 *
11925
 *     [78] extParsedEnt ::= TextDecl? content
11926
 *
11927
 * @param doc  the document the chunk pertains to
11928
 * @param sax  the SAX handler block (possibly NULL)
11929
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11930
 * @param depth  Used for loop detection, use 0
11931
 * @param URL  the URL for the entity to load
11932
 * @param ID  the System ID for the entity to load
11933
 * @param list  the return value for the set of parsed nodes
11934
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11935
 *    the parser error code otherwise
11936
 */
11937
11938
int
11939
xmlParseExternalEntity(xmlDoc *doc, xmlSAXHandler *sax, void *user_data,
11940
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNode **list) {
11941
0
    xmlParserCtxtPtr ctxt;
11942
0
    int ret;
11943
11944
0
    if (list != NULL)
11945
0
        *list = NULL;
11946
11947
0
    if (doc == NULL)
11948
0
        return(XML_ERR_ARGUMENT);
11949
11950
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11951
0
    if (ctxt == NULL)
11952
0
        return(XML_ERR_NO_MEMORY);
11953
11954
0
    ctxt->depth = depth;
11955
0
    ctxt->myDoc = doc;
11956
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
11957
11958
0
    xmlFreeParserCtxt(ctxt);
11959
0
    return(ret);
11960
0
}
11961
11962
/**
11963
 * Parse a well-balanced chunk of an XML document
11964
 * called by the parser
11965
 * The allowed sequence for the Well Balanced Chunk is the one defined by
11966
 * the content production in the XML grammar:
11967
 *
11968
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
11969
 *                       Comment)*
11970
 *
11971
 * This function uses deprecated global variables to set parser options
11972
 * which default to XML_PARSE_NODICT.
11973
 *
11974
 * @param doc  the document the chunk pertains to (must not be NULL)
11975
 * @param sax  the SAX handler block (possibly NULL)
11976
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11977
 * @param depth  Used for loop detection, use 0
11978
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
11979
 * @param lst  the return value for the set of parsed nodes
11980
 * @returns 0 if the chunk is well balanced, -1 in case of args problem and
11981
 *    the parser error code otherwise
11982
 */
11983
11984
int
11985
xmlParseBalancedChunkMemory(xmlDoc *doc, xmlSAXHandler *sax,
11986
0
     void *user_data, int depth, const xmlChar *string, xmlNode **lst) {
11987
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11988
0
                                                depth, string, lst, 0 );
11989
0
}
11990
#endif /* LIBXML_SAX1_ENABLED */
11991
11992
/**
11993
 * Parse a well-balanced chunk of XML matching the 'content' production.
11994
 *
11995
 * Namespaces in scope of `node` and entities of `node`'s document are
11996
 * recognized. When validating, the DTD of `node`'s document is used.
11997
 *
11998
 * Always consumes `input` even in error case.
11999
 *
12000
 * @since 2.14.0
12001
 *
12002
 * @param ctxt  parser context
12003
 * @param input  parser input
12004
 * @param node  target node or document
12005
 * @param hasTextDecl  whether to parse text declaration
12006
 * @returns a node list or NULL in case of error.
12007
 */
12008
xmlNode *
12009
xmlCtxtParseContent(xmlParserCtxt *ctxt, xmlParserInput *input,
12010
0
                    xmlNode *node, int hasTextDecl) {
12011
0
    xmlDocPtr doc;
12012
0
    xmlNodePtr cur, list = NULL;
12013
0
    int nsnr = 0;
12014
0
    xmlDictPtr oldDict;
12015
0
    int oldOptions, oldDictNames, oldLoadSubset;
12016
12017
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12018
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12019
0
        goto exit;
12020
0
    }
12021
12022
0
    doc = node->doc;
12023
0
    if (doc == NULL) {
12024
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12025
0
        goto exit;
12026
0
    }
12027
12028
0
    switch (node->type) {
12029
0
        case XML_ELEMENT_NODE:
12030
0
        case XML_DOCUMENT_NODE:
12031
0
        case XML_HTML_DOCUMENT_NODE:
12032
0
            break;
12033
12034
0
        case XML_ATTRIBUTE_NODE:
12035
0
        case XML_TEXT_NODE:
12036
0
        case XML_CDATA_SECTION_NODE:
12037
0
        case XML_ENTITY_REF_NODE:
12038
0
        case XML_PI_NODE:
12039
0
        case XML_COMMENT_NODE:
12040
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12041
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12042
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12043
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12044
0
                    node = cur;
12045
0
                    break;
12046
0
                }
12047
0
            }
12048
0
            break;
12049
12050
0
        default:
12051
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12052
0
            goto exit;
12053
0
    }
12054
12055
0
    xmlCtxtReset(ctxt);
12056
12057
0
    oldDict = ctxt->dict;
12058
0
    oldOptions = ctxt->options;
12059
0
    oldDictNames = ctxt->dictNames;
12060
0
    oldLoadSubset = ctxt->loadsubset;
12061
12062
    /*
12063
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12064
     */
12065
0
    if (doc->dict != NULL) {
12066
0
        ctxt->dict = doc->dict;
12067
0
    } else {
12068
0
        ctxt->options |= XML_PARSE_NODICT;
12069
0
        ctxt->dictNames = 0;
12070
0
    }
12071
12072
    /*
12073
     * Disable IDs
12074
     */
12075
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12076
0
    ctxt->options |= XML_PARSE_SKIP_IDS;
12077
12078
0
    ctxt->myDoc = doc;
12079
12080
0
#ifdef LIBXML_HTML_ENABLED
12081
0
    if (ctxt->html) {
12082
        /*
12083
         * When parsing in context, it makes no sense to add implied
12084
         * elements like html/body/etc...
12085
         */
12086
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12087
12088
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12089
0
    } else
12090
0
#endif
12091
0
    {
12092
0
        xmlCtxtInitializeLate(ctxt);
12093
12094
        /*
12095
         * initialize the SAX2 namespaces stack
12096
         */
12097
0
        cur = node;
12098
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12099
0
            xmlNsPtr ns = cur->nsDef;
12100
0
            xmlHashedString hprefix, huri;
12101
12102
0
            while (ns != NULL) {
12103
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12104
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12105
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12106
0
                    nsnr++;
12107
0
                ns = ns->next;
12108
0
            }
12109
0
            cur = cur->parent;
12110
0
        }
12111
12112
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12113
12114
0
        if (nsnr > 0)
12115
0
            xmlParserNsPop(ctxt, nsnr);
12116
0
    }
12117
12118
0
    ctxt->dict = oldDict;
12119
0
    ctxt->options = oldOptions;
12120
0
    ctxt->dictNames = oldDictNames;
12121
0
    ctxt->loadsubset = oldLoadSubset;
12122
0
    ctxt->myDoc = NULL;
12123
0
    ctxt->node = NULL;
12124
12125
0
exit:
12126
0
    xmlFreeInputStream(input);
12127
0
    return(list);
12128
0
}
12129
12130
/**
12131
 * Parse a well-balanced chunk of an XML document
12132
 * within the context (DTD, namespaces, etc ...) of the given node.
12133
 *
12134
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12135
 * the content production in the XML grammar:
12136
 *
12137
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12138
 *                       Comment)*
12139
 *
12140
 * This function assumes the encoding of `node`'s document which is
12141
 * typically not what you want. A better alternative is
12142
 * #xmlCtxtParseContent.
12143
 *
12144
 * @param node  the context node
12145
 * @param data  the input string
12146
 * @param datalen  the input string length in bytes
12147
 * @param options  a combination of xmlParserOption
12148
 * @param listOut  the return value for the set of parsed nodes
12149
 * @returns XML_ERR_OK if the chunk is well balanced, and the parser
12150
 * error code otherwise
12151
 */
12152
xmlParserErrors
12153
xmlParseInNodeContext(xmlNode *node, const char *data, int datalen,
12154
0
                      int options, xmlNode **listOut) {
12155
0
    xmlParserCtxtPtr ctxt;
12156
0
    xmlParserInputPtr input;
12157
0
    xmlDocPtr doc;
12158
0
    xmlNodePtr list;
12159
0
    xmlParserErrors ret;
12160
12161
0
    if (listOut == NULL)
12162
0
        return(XML_ERR_INTERNAL_ERROR);
12163
0
    *listOut = NULL;
12164
12165
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12166
0
        return(XML_ERR_INTERNAL_ERROR);
12167
12168
0
    doc = node->doc;
12169
0
    if (doc == NULL)
12170
0
        return(XML_ERR_INTERNAL_ERROR);
12171
12172
0
#ifdef LIBXML_HTML_ENABLED
12173
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12174
0
        ctxt = htmlNewParserCtxt();
12175
0
    }
12176
0
    else
12177
0
#endif
12178
0
        ctxt = xmlNewParserCtxt();
12179
12180
0
    if (ctxt == NULL)
12181
0
        return(XML_ERR_NO_MEMORY);
12182
12183
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12184
0
                                      (const char *) doc->encoding,
12185
0
                                      XML_INPUT_BUF_STATIC);
12186
0
    if (input == NULL) {
12187
0
        xmlFreeParserCtxt(ctxt);
12188
0
        return(XML_ERR_NO_MEMORY);
12189
0
    }
12190
12191
0
    xmlCtxtUseOptions(ctxt, options);
12192
12193
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12194
12195
0
    if (list == NULL) {
12196
0
        ret = ctxt->errNo;
12197
0
        if (ret == XML_ERR_ARGUMENT)
12198
0
            ret = XML_ERR_INTERNAL_ERROR;
12199
0
    } else {
12200
0
        ret = XML_ERR_OK;
12201
0
        *listOut = list;
12202
0
    }
12203
12204
0
    xmlFreeParserCtxt(ctxt);
12205
12206
0
    return(ret);
12207
0
}
12208
12209
#ifdef LIBXML_SAX1_ENABLED
12210
/**
12211
 * Parse a well-balanced chunk of an XML document
12212
 *
12213
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12214
 * the content production in the XML grammar:
12215
 *
12216
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12217
 *                       Comment)*
12218
 *
12219
 * In case recover is set to 1, the nodelist will not be empty even if
12220
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12221
 * some extent.
12222
 *
12223
 * This function uses deprecated global variables to set parser options
12224
 * which default to XML_PARSE_NODICT.
12225
 *
12226
 * @param doc  the document the chunk pertains to (must not be NULL)
12227
 * @param sax  the SAX handler block (possibly NULL)
12228
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
12229
 * @param depth  Used for loop detection, use 0
12230
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
12231
 * @param listOut  the return value for the set of parsed nodes
12232
 * @param recover  return nodes even if the data is broken (use 0)
12233
 * @returns 0 if the chunk is well balanced, or thehe parser error code
12234
 * otherwise.
12235
 */
12236
int
12237
xmlParseBalancedChunkMemoryRecover(xmlDoc *doc, xmlSAXHandler *sax,
12238
     void *user_data, int depth, const xmlChar *string, xmlNode **listOut,
12239
0
     int recover) {
12240
0
    xmlParserCtxtPtr ctxt;
12241
0
    xmlParserInputPtr input;
12242
0
    xmlNodePtr list;
12243
0
    int ret;
12244
12245
0
    if (listOut != NULL)
12246
0
        *listOut = NULL;
12247
12248
0
    if (string == NULL)
12249
0
        return(XML_ERR_ARGUMENT);
12250
12251
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12252
0
    if (ctxt == NULL)
12253
0
        return(XML_ERR_NO_MEMORY);
12254
12255
0
    xmlCtxtInitializeLate(ctxt);
12256
12257
0
    ctxt->depth = depth;
12258
0
    ctxt->myDoc = doc;
12259
0
    if (recover) {
12260
0
        ctxt->options |= XML_PARSE_RECOVER;
12261
0
        ctxt->recovery = 1;
12262
0
    }
12263
12264
0
    input = xmlNewStringInputStream(ctxt, string);
12265
0
    if (input == NULL) {
12266
0
        ret = ctxt->errNo;
12267
0
        goto error;
12268
0
    }
12269
12270
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12271
0
    if (listOut != NULL)
12272
0
        *listOut = list;
12273
0
    else
12274
0
        xmlFreeNodeList(list);
12275
12276
0
    if (!ctxt->wellFormed)
12277
0
        ret = ctxt->errNo;
12278
0
    else
12279
0
        ret = XML_ERR_OK;
12280
12281
0
error:
12282
0
    xmlFreeInputStream(input);
12283
0
    xmlFreeParserCtxt(ctxt);
12284
0
    return(ret);
12285
0
}
12286
12287
/**
12288
 * Parse an XML external entity out of context and build a tree.
12289
 * It use the given SAX function block to handle the parsing callback.
12290
 * If sax is NULL, fallback to the default DOM tree building routines.
12291
 *
12292
 * @deprecated Don't use.
12293
 *
12294
 *     [78] extParsedEnt ::= TextDecl? content
12295
 *
12296
 * This correspond to a "Well Balanced" chunk
12297
 *
12298
 * This function uses deprecated global variables to set parser options
12299
 * which default to XML_PARSE_NODICT.
12300
 *
12301
 * @param sax  the SAX handler block
12302
 * @param filename  the filename
12303
 * @returns the resulting document tree
12304
 */
12305
12306
xmlDoc *
12307
0
xmlSAXParseEntity(xmlSAXHandler *sax, const char *filename) {
12308
0
    xmlDocPtr ret;
12309
0
    xmlParserCtxtPtr ctxt;
12310
12311
0
    ctxt = xmlCreateFileParserCtxt(filename);
12312
0
    if (ctxt == NULL) {
12313
0
  return(NULL);
12314
0
    }
12315
0
    if (sax != NULL) {
12316
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12317
0
            *ctxt->sax = *sax;
12318
0
        } else {
12319
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12320
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12321
0
        }
12322
0
        ctxt->userData = NULL;
12323
0
    }
12324
12325
0
    xmlParseExtParsedEnt(ctxt);
12326
12327
0
    if (ctxt->wellFormed) {
12328
0
  ret = ctxt->myDoc;
12329
0
    } else {
12330
0
        ret = NULL;
12331
0
        xmlFreeDoc(ctxt->myDoc);
12332
0
    }
12333
12334
0
    xmlFreeParserCtxt(ctxt);
12335
12336
0
    return(ret);
12337
0
}
12338
12339
/**
12340
 * Parse an XML external entity out of context and build a tree.
12341
 *
12342
 *     [78] extParsedEnt ::= TextDecl? content
12343
 *
12344
 * This correspond to a "Well Balanced" chunk
12345
 *
12346
 * This function uses deprecated global variables to set parser options
12347
 * which default to XML_PARSE_NODICT.
12348
 *
12349
 * @deprecated Don't use.
12350
 *
12351
 * @param filename  the filename
12352
 * @returns the resulting document tree
12353
 */
12354
12355
xmlDoc *
12356
0
xmlParseEntity(const char *filename) {
12357
0
    return(xmlSAXParseEntity(NULL, filename));
12358
0
}
12359
#endif /* LIBXML_SAX1_ENABLED */
12360
12361
/**
12362
 * Create a parser context for an external entity
12363
 * Automatic support for ZLIB/Compress compressed document is provided
12364
 * by default if found at compile-time.
12365
 *
12366
 * @deprecated Don't use.
12367
 *
12368
 * @param URL  the entity URL
12369
 * @param ID  the entity PUBLIC ID
12370
 * @param base  a possible base for the target URI
12371
 * @returns the new parser context or NULL
12372
 */
12373
xmlParserCtxt *
12374
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12375
0
                    const xmlChar *base) {
12376
0
    xmlParserCtxtPtr ctxt;
12377
0
    xmlParserInputPtr input;
12378
0
    xmlChar *uri = NULL;
12379
12380
0
    ctxt = xmlNewParserCtxt();
12381
0
    if (ctxt == NULL)
12382
0
  return(NULL);
12383
12384
0
    if (base != NULL) {
12385
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12386
0
            goto error;
12387
0
        if (uri != NULL)
12388
0
            URL = uri;
12389
0
    }
12390
12391
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12392
0
                            XML_RESOURCE_UNKNOWN);
12393
0
    if (input == NULL)
12394
0
        goto error;
12395
12396
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12397
0
        xmlFreeInputStream(input);
12398
0
        goto error;
12399
0
    }
12400
12401
0
    xmlFree(uri);
12402
0
    return(ctxt);
12403
12404
0
error:
12405
0
    xmlFree(uri);
12406
0
    xmlFreeParserCtxt(ctxt);
12407
0
    return(NULL);
12408
0
}
12409
12410
/************************************************************************
12411
 *                  *
12412
 *    Front ends when parsing from a file     *
12413
 *                  *
12414
 ************************************************************************/
12415
12416
/**
12417
 * Create a parser context for a file or URL content.
12418
 * Automatic support for ZLIB/Compress compressed document is provided
12419
 * by default if found at compile-time and for file accesses
12420
 *
12421
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12422
 *
12423
 * @param filename  the filename or URL
12424
 * @param options  a combination of xmlParserOption
12425
 * @returns the new parser context or NULL
12426
 */
12427
xmlParserCtxt *
12428
xmlCreateURLParserCtxt(const char *filename, int options)
12429
0
{
12430
0
    xmlParserCtxtPtr ctxt;
12431
0
    xmlParserInputPtr input;
12432
12433
0
    ctxt = xmlNewParserCtxt();
12434
0
    if (ctxt == NULL)
12435
0
  return(NULL);
12436
12437
0
    options |= XML_PARSE_UNZIP;
12438
12439
0
    xmlCtxtUseOptions(ctxt, options);
12440
12441
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12442
0
    if (input == NULL) {
12443
0
  xmlFreeParserCtxt(ctxt);
12444
0
  return(NULL);
12445
0
    }
12446
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12447
0
        xmlFreeInputStream(input);
12448
0
        xmlFreeParserCtxt(ctxt);
12449
0
        return(NULL);
12450
0
    }
12451
12452
0
    return(ctxt);
12453
0
}
12454
12455
/**
12456
 * Create a parser context for a file content.
12457
 * Automatic support for ZLIB/Compress compressed document is provided
12458
 * by default if found at compile-time.
12459
 *
12460
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12461
 *
12462
 * @param filename  the filename
12463
 * @returns the new parser context or NULL
12464
 */
12465
xmlParserCtxt *
12466
xmlCreateFileParserCtxt(const char *filename)
12467
0
{
12468
0
    return(xmlCreateURLParserCtxt(filename, 0));
12469
0
}
12470
12471
#ifdef LIBXML_SAX1_ENABLED
12472
/**
12473
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12474
 * compressed document is provided by default if found at compile-time.
12475
 * It use the given SAX function block to handle the parsing callback.
12476
 * If sax is NULL, fallback to the default DOM tree building routines.
12477
 *
12478
 * This function uses deprecated global variables to set parser options
12479
 * which default to XML_PARSE_NODICT.
12480
 *
12481
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12482
 *
12483
 * User data (void *) is stored within the parser context in the
12484
 * context's _private member, so it is available nearly everywhere in libxml
12485
 *
12486
 * @param sax  the SAX handler block
12487
 * @param filename  the filename
12488
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12489
 *             documents
12490
 * @param data  the userdata
12491
 * @returns the resulting document tree
12492
 */
12493
12494
xmlDoc *
12495
xmlSAXParseFileWithData(xmlSAXHandler *sax, const char *filename,
12496
0
                        int recovery, void *data) {
12497
0
    xmlDocPtr ret = NULL;
12498
0
    xmlParserCtxtPtr ctxt;
12499
0
    xmlParserInputPtr input;
12500
12501
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12502
0
    if (ctxt == NULL)
12503
0
  return(NULL);
12504
12505
0
    if (data != NULL)
12506
0
  ctxt->_private = data;
12507
12508
0
    if (recovery) {
12509
0
        ctxt->options |= XML_PARSE_RECOVER;
12510
0
        ctxt->recovery = 1;
12511
0
    }
12512
12513
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12514
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12515
0
    else
12516
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12517
12518
0
    if (input != NULL)
12519
0
        ret = xmlCtxtParseDocument(ctxt, input);
12520
12521
0
    xmlFreeParserCtxt(ctxt);
12522
0
    return(ret);
12523
0
}
12524
12525
/**
12526
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12527
 * compressed document is provided by default if found at compile-time.
12528
 * It use the given SAX function block to handle the parsing callback.
12529
 * If sax is NULL, fallback to the default DOM tree building routines.
12530
 *
12531
 * This function uses deprecated global variables to set parser options
12532
 * which default to XML_PARSE_NODICT.
12533
 *
12534
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12535
 *
12536
 * @param sax  the SAX handler block
12537
 * @param filename  the filename
12538
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12539
 *             documents
12540
 * @returns the resulting document tree
12541
 */
12542
12543
xmlDoc *
12544
xmlSAXParseFile(xmlSAXHandler *sax, const char *filename,
12545
0
                          int recovery) {
12546
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12547
0
}
12548
12549
/**
12550
 * Parse an XML in-memory document and build a tree.
12551
 * In the case the document is not Well Formed, a attempt to build a
12552
 * tree is tried anyway
12553
 *
12554
 * This function uses deprecated global variables to set parser options
12555
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12556
 *
12557
 * @deprecated Use #xmlReadDoc with XML_PARSE_RECOVER.
12558
 *
12559
 * @param cur  a pointer to an array of xmlChar
12560
 * @returns the resulting document tree or NULL in case of failure
12561
 */
12562
12563
xmlDoc *
12564
0
xmlRecoverDoc(const xmlChar *cur) {
12565
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12566
0
}
12567
12568
/**
12569
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12570
 * compressed document is provided by default if found at compile-time.
12571
 *
12572
 * This function uses deprecated global variables to set parser options
12573
 * which default to XML_PARSE_NODICT.
12574
 *
12575
 * @deprecated Use #xmlReadFile.
12576
 *
12577
 * @param filename  the filename
12578
 * @returns the resulting document tree if the file was wellformed,
12579
 * NULL otherwise.
12580
 */
12581
12582
xmlDoc *
12583
0
xmlParseFile(const char *filename) {
12584
0
    return(xmlSAXParseFile(NULL, filename, 0));
12585
0
}
12586
12587
/**
12588
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12589
 * compressed document is provided by default if found at compile-time.
12590
 * In the case the document is not Well Formed, it attempts to build
12591
 * a tree anyway
12592
 *
12593
 * This function uses deprecated global variables to set parser options
12594
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12595
 *
12596
 * @deprecated Use #xmlReadFile with XML_PARSE_RECOVER.
12597
 *
12598
 * @param filename  the filename
12599
 * @returns the resulting document tree or NULL in case of failure
12600
 */
12601
12602
xmlDoc *
12603
0
xmlRecoverFile(const char *filename) {
12604
0
    return(xmlSAXParseFile(NULL, filename, 1));
12605
0
}
12606
12607
12608
/**
12609
 * Setup the parser context to parse a new buffer; Clears any prior
12610
 * contents from the parser context. The buffer parameter must not be
12611
 * NULL, but the filename parameter can be
12612
 *
12613
 * @deprecated Don't use.
12614
 *
12615
 * @param ctxt  an XML parser context
12616
 * @param buffer  a xmlChar * buffer
12617
 * @param filename  a file name
12618
 */
12619
void
12620
xmlSetupParserForBuffer(xmlParserCtxt *ctxt, const xmlChar* buffer,
12621
                             const char* filename)
12622
0
{
12623
0
    xmlParserInputPtr input;
12624
12625
0
    if ((ctxt == NULL) || (buffer == NULL))
12626
0
        return;
12627
12628
0
    xmlCtxtReset(ctxt);
12629
12630
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12631
0
                                      NULL, 0);
12632
0
    if (input == NULL)
12633
0
        return;
12634
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12635
0
        xmlFreeInputStream(input);
12636
0
}
12637
12638
/**
12639
 * Parse an XML file and call the given SAX handler routines.
12640
 * Automatic support for ZLIB/Compress compressed document is provided
12641
 *
12642
 * This function uses deprecated global variables to set parser options
12643
 * which default to XML_PARSE_NODICT.
12644
 *
12645
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12646
 *
12647
 * @param sax  a SAX handler
12648
 * @param user_data  The user data returned on SAX callbacks
12649
 * @param filename  a file name
12650
 * @returns 0 in case of success or a error number otherwise
12651
 */
12652
int
12653
xmlSAXUserParseFile(xmlSAXHandler *sax, void *user_data,
12654
0
                    const char *filename) {
12655
0
    int ret = 0;
12656
0
    xmlParserCtxtPtr ctxt;
12657
12658
0
    ctxt = xmlCreateFileParserCtxt(filename);
12659
0
    if (ctxt == NULL) return -1;
12660
0
    if (sax != NULL) {
12661
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12662
0
            *ctxt->sax = *sax;
12663
0
        } else {
12664
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12665
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12666
0
        }
12667
0
  ctxt->userData = user_data;
12668
0
    }
12669
12670
0
    xmlParseDocument(ctxt);
12671
12672
0
    if (ctxt->wellFormed)
12673
0
  ret = 0;
12674
0
    else {
12675
0
        if (ctxt->errNo != 0)
12676
0
      ret = ctxt->errNo;
12677
0
  else
12678
0
      ret = -1;
12679
0
    }
12680
0
    if (ctxt->myDoc != NULL) {
12681
0
        xmlFreeDoc(ctxt->myDoc);
12682
0
  ctxt->myDoc = NULL;
12683
0
    }
12684
0
    xmlFreeParserCtxt(ctxt);
12685
12686
0
    return ret;
12687
0
}
12688
#endif /* LIBXML_SAX1_ENABLED */
12689
12690
/************************************************************************
12691
 *                  *
12692
 *    Front ends when parsing from memory     *
12693
 *                  *
12694
 ************************************************************************/
12695
12696
/**
12697
 * Create a parser context for an XML in-memory document. The input buffer
12698
 * must not contain a terminating null byte.
12699
 *
12700
 * @param buffer  a pointer to a char array
12701
 * @param size  the size of the array
12702
 * @returns the new parser context or NULL
12703
 */
12704
xmlParserCtxt *
12705
18.0k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12706
18.0k
    xmlParserCtxtPtr ctxt;
12707
18.0k
    xmlParserInputPtr input;
12708
12709
18.0k
    if (size < 0)
12710
0
  return(NULL);
12711
12712
18.0k
    ctxt = xmlNewParserCtxt();
12713
18.0k
    if (ctxt == NULL)
12714
0
  return(NULL);
12715
12716
18.0k
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12717
18.0k
    if (input == NULL) {
12718
0
  xmlFreeParserCtxt(ctxt);
12719
0
  return(NULL);
12720
0
    }
12721
18.0k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12722
0
        xmlFreeInputStream(input);
12723
0
        xmlFreeParserCtxt(ctxt);
12724
0
        return(NULL);
12725
0
    }
12726
12727
18.0k
    return(ctxt);
12728
18.0k
}
12729
12730
#ifdef LIBXML_SAX1_ENABLED
12731
/**
12732
 * Parse an XML in-memory block and use the given SAX function block
12733
 * to handle the parsing callback. If sax is NULL, fallback to the default
12734
 * DOM tree building routines.
12735
 *
12736
 * This function uses deprecated global variables to set parser options
12737
 * which default to XML_PARSE_NODICT.
12738
 *
12739
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12740
 *
12741
 * User data (void *) is stored within the parser context in the
12742
 * context's _private member, so it is available nearly everywhere in libxml
12743
 *
12744
 * @param sax  the SAX handler block
12745
 * @param buffer  an pointer to a char array
12746
 * @param size  the size of the array
12747
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12748
 *             documents
12749
 * @param data  the userdata
12750
 * @returns the resulting document tree
12751
 */
12752
12753
xmlDoc *
12754
xmlSAXParseMemoryWithData(xmlSAXHandler *sax, const char *buffer,
12755
0
                          int size, int recovery, void *data) {
12756
0
    xmlDocPtr ret = NULL;
12757
0
    xmlParserCtxtPtr ctxt;
12758
0
    xmlParserInputPtr input;
12759
12760
0
    if (size < 0)
12761
0
        return(NULL);
12762
12763
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12764
0
    if (ctxt == NULL)
12765
0
        return(NULL);
12766
12767
0
    if (data != NULL)
12768
0
  ctxt->_private=data;
12769
12770
0
    if (recovery) {
12771
0
        ctxt->options |= XML_PARSE_RECOVER;
12772
0
        ctxt->recovery = 1;
12773
0
    }
12774
12775
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12776
0
                                      XML_INPUT_BUF_STATIC);
12777
12778
0
    if (input != NULL)
12779
0
        ret = xmlCtxtParseDocument(ctxt, input);
12780
12781
0
    xmlFreeParserCtxt(ctxt);
12782
0
    return(ret);
12783
0
}
12784
12785
/**
12786
 * Parse an XML in-memory block and use the given SAX function block
12787
 * to handle the parsing callback. If sax is NULL, fallback to the default
12788
 * DOM tree building routines.
12789
 *
12790
 * This function uses deprecated global variables to set parser options
12791
 * which default to XML_PARSE_NODICT.
12792
 *
12793
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12794
 *
12795
 * @param sax  the SAX handler block
12796
 * @param buffer  an pointer to a char array
12797
 * @param size  the size of the array
12798
 * @param recovery  work in recovery mode, i.e. tries to read not Well Formed
12799
 *             documents
12800
 * @returns the resulting document tree
12801
 */
12802
xmlDoc *
12803
xmlSAXParseMemory(xmlSAXHandler *sax, const char *buffer,
12804
0
            int size, int recovery) {
12805
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12806
0
}
12807
12808
/**
12809
 * Parse an XML in-memory block and build a tree.
12810
 *
12811
 * This function uses deprecated global variables to set parser options
12812
 * which default to XML_PARSE_NODICT.
12813
 *
12814
 * @deprecated Use #xmlReadMemory.
12815
 *
12816
 * @param buffer  an pointer to a char array
12817
 * @param size  the size of the array
12818
 * @returns the resulting document tree
12819
 */
12820
12821
0
xmlDoc *xmlParseMemory(const char *buffer, int size) {
12822
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
12823
0
}
12824
12825
/**
12826
 * Parse an XML in-memory block and build a tree.
12827
 * In the case the document is not Well Formed, an attempt to
12828
 * build a tree is tried anyway
12829
 *
12830
 * This function uses deprecated global variables to set parser options
12831
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12832
 *
12833
 * @deprecated Use #xmlReadMemory with XML_PARSE_RECOVER.
12834
 *
12835
 * @param buffer  an pointer to a char array
12836
 * @param size  the size of the array
12837
 * @returns the resulting document tree or NULL in case of error
12838
 */
12839
12840
0
xmlDoc *xmlRecoverMemory(const char *buffer, int size) {
12841
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
12842
0
}
12843
12844
/**
12845
 * Parse an XML in-memory buffer and call the given SAX handler routines.
12846
 *
12847
 * This function uses deprecated global variables to set parser options
12848
 * which default to XML_PARSE_NODICT.
12849
 *
12850
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12851
 *
12852
 * @param sax  a SAX handler
12853
 * @param user_data  The user data returned on SAX callbacks
12854
 * @param buffer  an in-memory XML document input
12855
 * @param size  the length of the XML document in bytes
12856
 * @returns 0 in case of success or a error number otherwise
12857
 */
12858
int xmlSAXUserParseMemory(xmlSAXHandler *sax, void *user_data,
12859
0
        const char *buffer, int size) {
12860
0
    int ret = 0;
12861
0
    xmlParserCtxtPtr ctxt;
12862
12863
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12864
0
    if (ctxt == NULL) return -1;
12865
0
    if (sax != NULL) {
12866
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12867
0
            *ctxt->sax = *sax;
12868
0
        } else {
12869
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12870
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12871
0
        }
12872
0
  ctxt->userData = user_data;
12873
0
    }
12874
12875
0
    xmlParseDocument(ctxt);
12876
12877
0
    if (ctxt->wellFormed)
12878
0
  ret = 0;
12879
0
    else {
12880
0
        if (ctxt->errNo != 0)
12881
0
      ret = ctxt->errNo;
12882
0
  else
12883
0
      ret = -1;
12884
0
    }
12885
0
    if (ctxt->myDoc != NULL) {
12886
0
        xmlFreeDoc(ctxt->myDoc);
12887
0
  ctxt->myDoc = NULL;
12888
0
    }
12889
0
    xmlFreeParserCtxt(ctxt);
12890
12891
0
    return ret;
12892
0
}
12893
#endif /* LIBXML_SAX1_ENABLED */
12894
12895
/**
12896
 * Creates a parser context for an XML in-memory document.
12897
 *
12898
 * @param str  a pointer to an array of xmlChar
12899
 * @returns the new parser context or NULL
12900
 */
12901
xmlParserCtxt *
12902
0
xmlCreateDocParserCtxt(const xmlChar *str) {
12903
0
    xmlParserCtxtPtr ctxt;
12904
0
    xmlParserInputPtr input;
12905
12906
0
    ctxt = xmlNewParserCtxt();
12907
0
    if (ctxt == NULL)
12908
0
  return(NULL);
12909
12910
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
12911
0
    if (input == NULL) {
12912
0
  xmlFreeParserCtxt(ctxt);
12913
0
  return(NULL);
12914
0
    }
12915
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12916
0
        xmlFreeInputStream(input);
12917
0
        xmlFreeParserCtxt(ctxt);
12918
0
        return(NULL);
12919
0
    }
12920
12921
0
    return(ctxt);
12922
0
}
12923
12924
#ifdef LIBXML_SAX1_ENABLED
12925
/**
12926
 * Parse an XML in-memory document and build a tree.
12927
 * It use the given SAX function block to handle the parsing callback.
12928
 * If sax is NULL, fallback to the default DOM tree building routines.
12929
 *
12930
 * This function uses deprecated global variables to set parser options
12931
 * which default to XML_PARSE_NODICT.
12932
 *
12933
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadDoc.
12934
 *
12935
 * @param sax  the SAX handler block
12936
 * @param cur  a pointer to an array of xmlChar
12937
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12938
 *             documents
12939
 * @returns the resulting document tree
12940
 */
12941
12942
xmlDoc *
12943
0
xmlSAXParseDoc(xmlSAXHandler *sax, const xmlChar *cur, int recovery) {
12944
0
    xmlDocPtr ret;
12945
0
    xmlParserCtxtPtr ctxt;
12946
0
    xmlSAXHandlerPtr oldsax = NULL;
12947
12948
0
    if (cur == NULL) return(NULL);
12949
12950
12951
0
    ctxt = xmlCreateDocParserCtxt(cur);
12952
0
    if (ctxt == NULL) return(NULL);
12953
0
    if (sax != NULL) {
12954
0
        oldsax = ctxt->sax;
12955
0
        ctxt->sax = sax;
12956
0
        ctxt->userData = NULL;
12957
0
    }
12958
12959
0
    xmlParseDocument(ctxt);
12960
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12961
0
    else {
12962
0
       ret = NULL;
12963
0
       xmlFreeDoc(ctxt->myDoc);
12964
0
       ctxt->myDoc = NULL;
12965
0
    }
12966
0
    if (sax != NULL)
12967
0
  ctxt->sax = oldsax;
12968
0
    xmlFreeParserCtxt(ctxt);
12969
12970
0
    return(ret);
12971
0
}
12972
12973
/**
12974
 * Parse an XML in-memory document and build a tree.
12975
 *
12976
 * This function uses deprecated global variables to set parser options
12977
 * which default to XML_PARSE_NODICT.
12978
 *
12979
 * @deprecated Use #xmlReadDoc.
12980
 *
12981
 * @param cur  a pointer to an array of xmlChar
12982
 * @returns the resulting document tree
12983
 */
12984
12985
xmlDoc *
12986
0
xmlParseDoc(const xmlChar *cur) {
12987
0
    return(xmlSAXParseDoc(NULL, cur, 0));
12988
0
}
12989
#endif /* LIBXML_SAX1_ENABLED */
12990
12991
/************************************************************************
12992
 *                  *
12993
 *  New set (2.6.0) of simpler and more flexible APIs   *
12994
 *                  *
12995
 ************************************************************************/
12996
12997
/**
12998
 * Reset a parser context
12999
 *
13000
 * @param ctxt  an XML parser context
13001
 */
13002
void
13003
xmlCtxtReset(xmlParserCtxt *ctxt)
13004
0
{
13005
0
    xmlParserInputPtr input;
13006
13007
0
    if (ctxt == NULL)
13008
0
        return;
13009
13010
0
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
13011
0
        xmlFreeInputStream(input);
13012
0
    }
13013
0
    ctxt->inputNr = 0;
13014
0
    ctxt->input = NULL;
13015
13016
0
    ctxt->spaceNr = 0;
13017
0
    if (ctxt->spaceTab != NULL) {
13018
0
  ctxt->spaceTab[0] = -1;
13019
0
  ctxt->space = &ctxt->spaceTab[0];
13020
0
    } else {
13021
0
        ctxt->space = NULL;
13022
0
    }
13023
13024
13025
0
    ctxt->nodeNr = 0;
13026
0
    ctxt->node = NULL;
13027
13028
0
    ctxt->nameNr = 0;
13029
0
    ctxt->name = NULL;
13030
13031
0
    ctxt->nsNr = 0;
13032
0
    xmlParserNsReset(ctxt->nsdb);
13033
13034
0
    if (ctxt->version != NULL) {
13035
0
        xmlFree(ctxt->version);
13036
0
        ctxt->version = NULL;
13037
0
    }
13038
0
    if (ctxt->encoding != NULL) {
13039
0
        xmlFree(ctxt->encoding);
13040
0
        ctxt->encoding = NULL;
13041
0
    }
13042
0
    if (ctxt->extSubURI != NULL) {
13043
0
        xmlFree(ctxt->extSubURI);
13044
0
        ctxt->extSubURI = NULL;
13045
0
    }
13046
0
    if (ctxt->extSubSystem != NULL) {
13047
0
        xmlFree(ctxt->extSubSystem);
13048
0
        ctxt->extSubSystem = NULL;
13049
0
    }
13050
0
    if (ctxt->directory != NULL) {
13051
0
        xmlFree(ctxt->directory);
13052
0
        ctxt->directory = NULL;
13053
0
    }
13054
13055
0
    if (ctxt->myDoc != NULL)
13056
0
        xmlFreeDoc(ctxt->myDoc);
13057
0
    ctxt->myDoc = NULL;
13058
13059
0
    ctxt->standalone = -1;
13060
0
    ctxt->hasExternalSubset = 0;
13061
0
    ctxt->hasPErefs = 0;
13062
0
    ctxt->html = ctxt->html ? 1 : 0;
13063
0
    ctxt->instate = XML_PARSER_START;
13064
13065
0
    ctxt->wellFormed = 1;
13066
0
    ctxt->nsWellFormed = 1;
13067
0
    ctxt->disableSAX = 0;
13068
0
    ctxt->valid = 1;
13069
0
    ctxt->record_info = 0;
13070
0
    ctxt->checkIndex = 0;
13071
0
    ctxt->endCheckState = 0;
13072
0
    ctxt->inSubset = 0;
13073
0
    ctxt->errNo = XML_ERR_OK;
13074
0
    ctxt->depth = 0;
13075
0
    ctxt->catalogs = NULL;
13076
0
    ctxt->sizeentities = 0;
13077
0
    ctxt->sizeentcopy = 0;
13078
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13079
13080
0
    if (ctxt->attsDefault != NULL) {
13081
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13082
0
        ctxt->attsDefault = NULL;
13083
0
    }
13084
0
    if (ctxt->attsSpecial != NULL) {
13085
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13086
0
        ctxt->attsSpecial = NULL;
13087
0
    }
13088
13089
0
#ifdef LIBXML_CATALOG_ENABLED
13090
0
    if (ctxt->catalogs != NULL)
13091
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13092
0
#endif
13093
0
    ctxt->nbErrors = 0;
13094
0
    ctxt->nbWarnings = 0;
13095
0
    if (ctxt->lastError.code != XML_ERR_OK)
13096
0
        xmlResetError(&ctxt->lastError);
13097
0
}
13098
13099
/**
13100
 * Reset a push parser context
13101
 *
13102
 * @param ctxt  an XML parser context
13103
 * @param chunk  a pointer to an array of chars
13104
 * @param size  number of chars in the array
13105
 * @param filename  an optional file name or URI
13106
 * @param encoding  the document encoding, or NULL
13107
 * @returns 0 in case of success and 1 in case of error
13108
 */
13109
int
13110
xmlCtxtResetPush(xmlParserCtxt *ctxt, const char *chunk,
13111
                 int size, const char *filename, const char *encoding)
13112
0
{
13113
0
    xmlParserInputPtr input;
13114
13115
0
    if (ctxt == NULL)
13116
0
        return(1);
13117
13118
0
    xmlCtxtReset(ctxt);
13119
13120
0
    input = xmlNewPushInput(filename, chunk, size);
13121
0
    if (input == NULL)
13122
0
        return(1);
13123
13124
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13125
0
        xmlFreeInputStream(input);
13126
0
        return(1);
13127
0
    }
13128
13129
0
    if (encoding != NULL)
13130
0
        xmlSwitchEncodingName(ctxt, encoding);
13131
13132
0
    return(0);
13133
0
}
13134
13135
static int
13136
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13137
36.1k
{
13138
36.1k
    int allMask;
13139
13140
36.1k
    if (ctxt == NULL)
13141
0
        return(-1);
13142
13143
    /*
13144
     * XInclude options aren't handled by the parser.
13145
     *
13146
     * XML_PARSE_XINCLUDE
13147
     * XML_PARSE_NOXINCNODE
13148
     * XML_PARSE_NOBASEFIX
13149
     */
13150
36.1k
    allMask = XML_PARSE_RECOVER |
13151
36.1k
              XML_PARSE_NOENT |
13152
36.1k
              XML_PARSE_DTDLOAD |
13153
36.1k
              XML_PARSE_DTDATTR |
13154
36.1k
              XML_PARSE_DTDVALID |
13155
36.1k
              XML_PARSE_NOERROR |
13156
36.1k
              XML_PARSE_NOWARNING |
13157
36.1k
              XML_PARSE_PEDANTIC |
13158
36.1k
              XML_PARSE_NOBLANKS |
13159
36.1k
#ifdef LIBXML_SAX1_ENABLED
13160
36.1k
              XML_PARSE_SAX1 |
13161
36.1k
#endif
13162
36.1k
              XML_PARSE_NONET |
13163
36.1k
              XML_PARSE_NODICT |
13164
36.1k
              XML_PARSE_NSCLEAN |
13165
36.1k
              XML_PARSE_NOCDATA |
13166
36.1k
              XML_PARSE_COMPACT |
13167
36.1k
              XML_PARSE_OLD10 |
13168
36.1k
              XML_PARSE_HUGE |
13169
36.1k
              XML_PARSE_OLDSAX |
13170
36.1k
              XML_PARSE_IGNORE_ENC |
13171
36.1k
              XML_PARSE_BIG_LINES |
13172
36.1k
              XML_PARSE_NO_XXE |
13173
36.1k
              XML_PARSE_UNZIP |
13174
36.1k
              XML_PARSE_NO_SYS_CATALOG |
13175
36.1k
              XML_PARSE_CATALOG_PI;
13176
13177
36.1k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13178
13179
    /*
13180
     * For some options, struct members are historically the source
13181
     * of truth. The values are initalized from global variables and
13182
     * old code could also modify them directly. Several older API
13183
     * functions that don't take an options argument rely on these
13184
     * deprecated mechanisms.
13185
     *
13186
     * Once public access to struct members and the globals are
13187
     * disabled, we can use the options bitmask as source of
13188
     * truth, making all these struct members obsolete.
13189
     *
13190
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13191
     * loading of the external subset.
13192
     */
13193
36.1k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13194
36.1k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13195
36.1k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13196
36.1k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13197
36.1k
    ctxt->loadsubset |= (options & XML_PARSE_SKIP_IDS) ? XML_SKIP_IDS : 0;
13198
36.1k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13199
36.1k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13200
36.1k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13201
36.1k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13202
13203
36.1k
    return(options & ~allMask);
13204
36.1k
}
13205
13206
/**
13207
 * Applies the options to the parser context. Unset options are
13208
 * cleared.
13209
 *
13210
 * @since 2.13.0
13211
 *
13212
 * With older versions, you can use #xmlCtxtUseOptions.
13213
 *
13214
 * @param ctxt  an XML parser context
13215
 * @param options  a bitmask of xmlParserOption values
13216
 * @returns 0 in case of success, the set of unknown or unimplemented options
13217
 *         in case of error.
13218
 */
13219
int
13220
xmlCtxtSetOptions(xmlParserCtxt *ctxt, int options)
13221
18.0k
{
13222
18.0k
#ifdef LIBXML_HTML_ENABLED
13223
18.0k
    if ((ctxt != NULL) && (ctxt->html))
13224
0
        return(htmlCtxtSetOptions(ctxt, options));
13225
18.0k
#endif
13226
13227
18.0k
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13228
18.0k
}
13229
13230
/**
13231
 * Get the current options of the parser context.
13232
 *
13233
 * @since 2.14.0
13234
 *
13235
 * @param ctxt  an XML parser context
13236
 * @returns the current options set in the parser context, or -1 if ctxt is NULL.
13237
 */
13238
int
13239
xmlCtxtGetOptions(xmlParserCtxt *ctxt)
13240
0
{
13241
0
    if (ctxt == NULL)
13242
0
        return(-1);
13243
13244
0
    return(ctxt->options);
13245
0
}
13246
13247
/**
13248
 * Applies the options to the parser context. The following options
13249
 * are never cleared and can only be enabled:
13250
 *
13251
 * - XML_PARSE_NOERROR
13252
 * - XML_PARSE_NOWARNING
13253
 * - XML_PARSE_NONET
13254
 * - XML_PARSE_NSCLEAN
13255
 * - XML_PARSE_NOCDATA
13256
 * - XML_PARSE_COMPACT
13257
 * - XML_PARSE_OLD10
13258
 * - XML_PARSE_HUGE
13259
 * - XML_PARSE_OLDSAX
13260
 * - XML_PARSE_IGNORE_ENC
13261
 * - XML_PARSE_BIG_LINES
13262
 *
13263
 * @deprecated Use #xmlCtxtSetOptions.
13264
 *
13265
 * @param ctxt  an XML parser context
13266
 * @param options  a combination of xmlParserOption
13267
 * @returns 0 in case of success, the set of unknown or unimplemented options
13268
 *         in case of error.
13269
 */
13270
int
13271
xmlCtxtUseOptions(xmlParserCtxt *ctxt, int options)
13272
18.0k
{
13273
18.0k
    int keepMask;
13274
13275
18.0k
#ifdef LIBXML_HTML_ENABLED
13276
18.0k
    if ((ctxt != NULL) && (ctxt->html))
13277
0
        return(htmlCtxtUseOptions(ctxt, options));
13278
18.0k
#endif
13279
13280
    /*
13281
     * For historic reasons, some options can only be enabled.
13282
     */
13283
18.0k
    keepMask = XML_PARSE_NOERROR |
13284
18.0k
               XML_PARSE_NOWARNING |
13285
18.0k
               XML_PARSE_NONET |
13286
18.0k
               XML_PARSE_NSCLEAN |
13287
18.0k
               XML_PARSE_NOCDATA |
13288
18.0k
               XML_PARSE_COMPACT |
13289
18.0k
               XML_PARSE_OLD10 |
13290
18.0k
               XML_PARSE_HUGE |
13291
18.0k
               XML_PARSE_OLDSAX |
13292
18.0k
               XML_PARSE_IGNORE_ENC |
13293
18.0k
               XML_PARSE_BIG_LINES;
13294
13295
18.0k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13296
18.0k
}
13297
13298
/**
13299
 * To protect against exponential entity expansion ("billion laughs"), the
13300
 * size of serialized output is (roughly) limited to the input size
13301
 * multiplied by this factor. The default value is 5.
13302
 *
13303
 * When working with documents making heavy use of entity expansion, it can
13304
 * be necessary to increase the value. For security reasons, this should only
13305
 * be considered when processing trusted input.
13306
 *
13307
 * @param ctxt  an XML parser context
13308
 * @param maxAmpl  maximum amplification factor
13309
 */
13310
void
13311
xmlCtxtSetMaxAmplification(xmlParserCtxt *ctxt, unsigned maxAmpl)
13312
0
{
13313
0
    if (ctxt == NULL)
13314
0
        return;
13315
0
    ctxt->maxAmpl = maxAmpl;
13316
0
}
13317
13318
/**
13319
 * Parse an XML document and return the resulting document tree.
13320
 * Takes ownership of the input object.
13321
 *
13322
 * @since 2.13.0
13323
 *
13324
 * @param ctxt  an XML parser context
13325
 * @param input  parser input
13326
 * @returns the resulting document tree or NULL
13327
 */
13328
xmlDoc *
13329
xmlCtxtParseDocument(xmlParserCtxt *ctxt, xmlParserInput *input)
13330
0
{
13331
0
    xmlDocPtr ret = NULL;
13332
13333
0
    if ((ctxt == NULL) || (input == NULL)) {
13334
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13335
0
        xmlFreeInputStream(input);
13336
0
        return(NULL);
13337
0
    }
13338
13339
    /* assert(ctxt->inputNr == 0); */
13340
0
    while (ctxt->inputNr > 0)
13341
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13342
13343
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13344
0
        xmlFreeInputStream(input);
13345
0
        return(NULL);
13346
0
    }
13347
13348
0
    xmlParseDocument(ctxt);
13349
13350
0
    ret = xmlCtxtGetDocument(ctxt);
13351
13352
    /* assert(ctxt->inputNr == 1); */
13353
0
    while (ctxt->inputNr > 0)
13354
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13355
13356
0
    return(ret);
13357
0
}
13358
13359
/**
13360
 * Convenience function to parse an XML document from a
13361
 * zero-terminated string.
13362
 *
13363
 * See #xmlCtxtReadDoc for details.
13364
 *
13365
 * @param cur  a pointer to a zero terminated string
13366
 * @param URL  base URL (optional)
13367
 * @param encoding  the document encoding (optional)
13368
 * @param options  a combination of xmlParserOption
13369
 * @returns the resulting document tree
13370
 */
13371
xmlDoc *
13372
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13373
           int options)
13374
0
{
13375
0
    xmlParserCtxtPtr ctxt;
13376
0
    xmlParserInputPtr input;
13377
0
    xmlDocPtr doc = NULL;
13378
13379
0
    ctxt = xmlNewParserCtxt();
13380
0
    if (ctxt == NULL)
13381
0
        return(NULL);
13382
13383
0
    xmlCtxtUseOptions(ctxt, options);
13384
13385
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13386
0
                                      XML_INPUT_BUF_STATIC);
13387
13388
0
    if (input != NULL)
13389
0
        doc = xmlCtxtParseDocument(ctxt, input);
13390
13391
0
    xmlFreeParserCtxt(ctxt);
13392
0
    return(doc);
13393
0
}
13394
13395
/**
13396
 * Convenience function to parse an XML file from the filesystem
13397
 * or a global, user-defined resource loader.
13398
 *
13399
 * This function always enables the XML_PARSE_UNZIP option for
13400
 * backward compatibility. If a "-" filename is passed, it will
13401
 * read from stdin. Both of these features are potentially
13402
 * insecure and might be removed from later versions.
13403
 *
13404
 * See #xmlCtxtReadFile for details.
13405
 *
13406
 * @param filename  a file or URL
13407
 * @param encoding  the document encoding (optional)
13408
 * @param options  a combination of xmlParserOption
13409
 * @returns the resulting document tree
13410
 */
13411
xmlDoc *
13412
xmlReadFile(const char *filename, const char *encoding, int options)
13413
0
{
13414
0
    xmlParserCtxtPtr ctxt;
13415
0
    xmlParserInputPtr input;
13416
0
    xmlDocPtr doc = NULL;
13417
13418
0
    ctxt = xmlNewParserCtxt();
13419
0
    if (ctxt == NULL)
13420
0
        return(NULL);
13421
13422
0
    options |= XML_PARSE_UNZIP;
13423
13424
0
    xmlCtxtUseOptions(ctxt, options);
13425
13426
    /*
13427
     * Backward compatibility for users of command line utilities like
13428
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13429
     * should be removed at some point.
13430
     */
13431
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13432
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13433
0
                                      encoding, 0);
13434
0
    else
13435
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13436
13437
0
    if (input != NULL)
13438
0
        doc = xmlCtxtParseDocument(ctxt, input);
13439
13440
0
    xmlFreeParserCtxt(ctxt);
13441
0
    return(doc);
13442
0
}
13443
13444
/**
13445
 * Parse an XML in-memory document and build a tree. The input buffer must
13446
 * not contain a terminating null byte.
13447
 *
13448
 * See #xmlCtxtReadMemory for details.
13449
 *
13450
 * @param buffer  a pointer to a char array
13451
 * @param size  the size of the array
13452
 * @param url  base URL (optional)
13453
 * @param encoding  the document encoding (optional)
13454
 * @param options  a combination of xmlParserOption
13455
 * @returns the resulting document tree
13456
 */
13457
xmlDoc *
13458
xmlReadMemory(const char *buffer, int size, const char *url,
13459
              const char *encoding, int options)
13460
0
{
13461
0
    xmlParserCtxtPtr ctxt;
13462
0
    xmlParserInputPtr input;
13463
0
    xmlDocPtr doc = NULL;
13464
13465
0
    if (size < 0)
13466
0
  return(NULL);
13467
13468
0
    ctxt = xmlNewParserCtxt();
13469
0
    if (ctxt == NULL)
13470
0
        return(NULL);
13471
13472
0
    xmlCtxtUseOptions(ctxt, options);
13473
13474
0
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13475
0
                                      XML_INPUT_BUF_STATIC);
13476
13477
0
    if (input != NULL)
13478
0
        doc = xmlCtxtParseDocument(ctxt, input);
13479
13480
0
    xmlFreeParserCtxt(ctxt);
13481
0
    return(doc);
13482
0
}
13483
13484
/**
13485
 * Parse an XML from a file descriptor and build a tree.
13486
 *
13487
 * See #xmlCtxtReadFd for details.
13488
 *
13489
 * NOTE that the file descriptor will not be closed when the
13490
 * context is freed or reset.
13491
 *
13492
 * @param fd  an open file descriptor
13493
 * @param URL  base URL (optional)
13494
 * @param encoding  the document encoding (optional)
13495
 * @param options  a combination of xmlParserOption
13496
 * @returns the resulting document tree
13497
 */
13498
xmlDoc *
13499
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13500
0
{
13501
0
    xmlParserCtxtPtr ctxt;
13502
0
    xmlParserInputPtr input;
13503
0
    xmlDocPtr doc = NULL;
13504
13505
0
    ctxt = xmlNewParserCtxt();
13506
0
    if (ctxt == NULL)
13507
0
        return(NULL);
13508
13509
0
    xmlCtxtUseOptions(ctxt, options);
13510
13511
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13512
13513
0
    if (input != NULL)
13514
0
        doc = xmlCtxtParseDocument(ctxt, input);
13515
13516
0
    xmlFreeParserCtxt(ctxt);
13517
0
    return(doc);
13518
0
}
13519
13520
/**
13521
 * Parse an XML document from I/O functions and context and build a tree.
13522
 *
13523
 * See #xmlCtxtReadIO for details.
13524
 *
13525
 * @param ioread  an I/O read function
13526
 * @param ioclose  an I/O close function (optional)
13527
 * @param ioctx  an I/O handler
13528
 * @param URL  base URL (optional)
13529
 * @param encoding  the document encoding (optional)
13530
 * @param options  a combination of xmlParserOption
13531
 * @returns the resulting document tree
13532
 */
13533
xmlDoc *
13534
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13535
          void *ioctx, const char *URL, const char *encoding, int options)
13536
0
{
13537
0
    xmlParserCtxtPtr ctxt;
13538
0
    xmlParserInputPtr input;
13539
0
    xmlDocPtr doc = NULL;
13540
13541
0
    ctxt = xmlNewParserCtxt();
13542
0
    if (ctxt == NULL)
13543
0
        return(NULL);
13544
13545
0
    xmlCtxtUseOptions(ctxt, options);
13546
13547
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13548
0
                                  encoding, 0);
13549
13550
0
    if (input != NULL)
13551
0
        doc = xmlCtxtParseDocument(ctxt, input);
13552
13553
0
    xmlFreeParserCtxt(ctxt);
13554
0
    return(doc);
13555
0
}
13556
13557
/**
13558
 * Parse an XML in-memory document and build a tree.
13559
 *
13560
 * `URL` is used as base to resolve external entities and for error
13561
 * reporting.
13562
 *
13563
 * @param ctxt  an XML parser context
13564
 * @param str  a pointer to a zero terminated string
13565
 * @param URL  base URL (optional)
13566
 * @param encoding  the document encoding (optional)
13567
 * @param options  a combination of xmlParserOption
13568
 * @returns the resulting document tree
13569
 */
13570
xmlDoc *
13571
xmlCtxtReadDoc(xmlParserCtxt *ctxt, const xmlChar *str,
13572
               const char *URL, const char *encoding, int options)
13573
0
{
13574
0
    xmlParserInputPtr input;
13575
13576
0
    if (ctxt == NULL)
13577
0
        return(NULL);
13578
13579
0
    xmlCtxtReset(ctxt);
13580
0
    xmlCtxtUseOptions(ctxt, options);
13581
13582
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13583
0
                                      XML_INPUT_BUF_STATIC);
13584
0
    if (input == NULL)
13585
0
        return(NULL);
13586
13587
0
    return(xmlCtxtParseDocument(ctxt, input));
13588
0
}
13589
13590
/**
13591
 * Parse an XML file from the filesystem or a global, user-defined
13592
 * resource loader.
13593
 *
13594
 * This function always enables the XML_PARSE_UNZIP option for
13595
 * backward compatibility. This feature is potentially insecure
13596
 * and might be removed from later versions.
13597
 *
13598
 * @param ctxt  an XML parser context
13599
 * @param filename  a file or URL
13600
 * @param encoding  the document encoding (optional)
13601
 * @param options  a combination of xmlParserOption
13602
 * @returns the resulting document tree
13603
 */
13604
xmlDoc *
13605
xmlCtxtReadFile(xmlParserCtxt *ctxt, const char *filename,
13606
                const char *encoding, int options)
13607
0
{
13608
0
    xmlParserInputPtr input;
13609
13610
0
    if (ctxt == NULL)
13611
0
        return(NULL);
13612
13613
0
    options |= XML_PARSE_UNZIP;
13614
13615
0
    xmlCtxtReset(ctxt);
13616
0
    xmlCtxtUseOptions(ctxt, options);
13617
13618
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13619
0
    if (input == NULL)
13620
0
        return(NULL);
13621
13622
0
    return(xmlCtxtParseDocument(ctxt, input));
13623
0
}
13624
13625
/**
13626
 * Parse an XML in-memory document and build a tree. The input buffer must
13627
 * not contain a terminating null byte.
13628
 *
13629
 * `URL` is used as base to resolve external entities and for error
13630
 * reporting.
13631
 *
13632
 * @param ctxt  an XML parser context
13633
 * @param buffer  a pointer to a char array
13634
 * @param size  the size of the array
13635
 * @param URL  base URL (optional)
13636
 * @param encoding  the document encoding (optional)
13637
 * @param options  a combination of xmlParserOption
13638
 * @returns the resulting document tree
13639
 */
13640
xmlDoc *
13641
xmlCtxtReadMemory(xmlParserCtxt *ctxt, const char *buffer, int size,
13642
                  const char *URL, const char *encoding, int options)
13643
0
{
13644
0
    xmlParserInputPtr input;
13645
13646
0
    if ((ctxt == NULL) || (size < 0))
13647
0
        return(NULL);
13648
13649
0
    xmlCtxtReset(ctxt);
13650
0
    xmlCtxtUseOptions(ctxt, options);
13651
13652
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
13653
0
                                      XML_INPUT_BUF_STATIC);
13654
0
    if (input == NULL)
13655
0
        return(NULL);
13656
13657
0
    return(xmlCtxtParseDocument(ctxt, input));
13658
0
}
13659
13660
/**
13661
 * Parse an XML document from a file descriptor and build a tree.
13662
 *
13663
 * NOTE that the file descriptor will not be closed when the
13664
 * context is freed or reset.
13665
 *
13666
 * `URL` is used as base to resolve external entities and for error
13667
 * reporting.
13668
 *
13669
 * @param ctxt  an XML parser context
13670
 * @param fd  an open file descriptor
13671
 * @param URL  base URL (optional)
13672
 * @param encoding  the document encoding (optional)
13673
 * @param options  a combination of xmlParserOption
13674
 * @returns the resulting document tree
13675
 */
13676
xmlDoc *
13677
xmlCtxtReadFd(xmlParserCtxt *ctxt, int fd,
13678
              const char *URL, const char *encoding, int options)
13679
0
{
13680
0
    xmlParserInputPtr input;
13681
13682
0
    if (ctxt == NULL)
13683
0
        return(NULL);
13684
13685
0
    xmlCtxtReset(ctxt);
13686
0
    xmlCtxtUseOptions(ctxt, options);
13687
13688
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13689
0
    if (input == NULL)
13690
0
        return(NULL);
13691
13692
0
    return(xmlCtxtParseDocument(ctxt, input));
13693
0
}
13694
13695
/**
13696
 * Parse an XML document from I/O functions and source and build a tree.
13697
 * This reuses the existing `ctxt` parser context
13698
 *
13699
 * `URL` is used as base to resolve external entities and for error
13700
 * reporting.
13701
 *
13702
 * @param ctxt  an XML parser context
13703
 * @param ioread  an I/O read function
13704
 * @param ioclose  an I/O close function
13705
 * @param ioctx  an I/O handler
13706
 * @param URL  the base URL to use for the document
13707
 * @param encoding  the document encoding, or NULL
13708
 * @param options  a combination of xmlParserOption
13709
 * @returns the resulting document tree
13710
 */
13711
xmlDoc *
13712
xmlCtxtReadIO(xmlParserCtxt *ctxt, xmlInputReadCallback ioread,
13713
              xmlInputCloseCallback ioclose, void *ioctx,
13714
        const char *URL,
13715
              const char *encoding, int options)
13716
0
{
13717
0
    xmlParserInputPtr input;
13718
13719
0
    if (ctxt == NULL)
13720
0
        return(NULL);
13721
13722
0
    xmlCtxtReset(ctxt);
13723
0
    xmlCtxtUseOptions(ctxt, options);
13724
13725
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13726
0
                                  encoding, 0);
13727
0
    if (input == NULL)
13728
0
        return(NULL);
13729
13730
0
    return(xmlCtxtParseDocument(ctxt, input));
13731
0
}
13732