Coverage Report

Created: 2025-08-28 07:12

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * Author: Daniel Veillard
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
#include "private/tree.h"
79
80
478k
#define NS_INDEX_EMPTY  INT_MAX
81
81.9k
#define NS_INDEX_XML    (INT_MAX - 1)
82
184k
#define URI_HASH_EMPTY  0xD943A04E
83
9.29k
#define URI_HASH_XML    0xF0451F02
84
85
#ifndef STDIN_FILENO
86
0
  #define STDIN_FILENO 0
87
#endif
88
89
#ifndef SIZE_MAX
90
  #define SIZE_MAX ((size_t) -1)
91
#endif
92
93
135k
#define XML_MAX_ATTRS 100000000 /* 100 million */
94
95
539k
#define XML_SPECIAL_EXTERNAL    (1 << 20)
96
539k
#define XML_SPECIAL_TYPE_MASK   (XML_SPECIAL_EXTERNAL - 1)
97
98
633k
#define XML_ATTVAL_ALLOC        (1 << 0)
99
192k
#define XML_ATTVAL_NORM_CHANGE  (1 << 1)
100
101
struct _xmlStartTag {
102
    const xmlChar *prefix;
103
    const xmlChar *URI;
104
    int line;
105
    int nsNr;
106
};
107
108
typedef struct {
109
    void *saxData;
110
    unsigned prefixHashValue;
111
    unsigned uriHashValue;
112
    unsigned elementId;
113
    int oldIndex;
114
} xmlParserNsExtra;
115
116
typedef struct {
117
    unsigned hashValue;
118
    int index;
119
} xmlParserNsBucket;
120
121
struct _xmlParserNsData {
122
    xmlParserNsExtra *extra;
123
124
    unsigned hashSize;
125
    unsigned hashElems;
126
    xmlParserNsBucket *hash;
127
128
    unsigned elementId;
129
    int defaultNsIndex;
130
    int minNsIndex;
131
};
132
133
static int
134
xmlParseElementStart(xmlParserCtxtPtr ctxt);
135
136
static void
137
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
138
139
static xmlEntityPtr
140
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
141
142
static const xmlChar *
143
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
144
145
/************************************************************************
146
 *                  *
147
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
148
 *                  *
149
 ************************************************************************/
150
151
#define XML_PARSER_BIG_ENTITY 1000
152
#define XML_PARSER_LOT_ENTITY 5000
153
154
/*
155
 * Constants for protection against abusive entity expansion
156
 * ("billion laughs").
157
 */
158
159
/*
160
 * A certain amount of entity expansion which is always allowed.
161
 */
162
2.02M
#define XML_PARSER_ALLOWED_EXPANSION 1000000
163
164
/*
165
 * Fixed cost for each entity reference. This crudely models processing time
166
 * as well to protect, for example, against exponential expansion of empty
167
 * or very short entities.
168
 */
169
2.02M
#define XML_ENT_FIXED_COST 20
170
171
113M
#define XML_PARSER_BIG_BUFFER_SIZE 300
172
190k
#define XML_PARSER_BUFFER_SIZE 100
173
106k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
174
175
/**
176
 * XML_PARSER_CHUNK_SIZE
177
 *
178
 * When calling GROW that's the minimal amount of data
179
 * the parser expected to have received. It is not a hard
180
 * limit but an optimization when reading strings like Names
181
 * It is not strictly needed as long as inputs available characters
182
 * are followed by 0, which should be provided by the I/O level
183
 */
184
#define XML_PARSER_CHUNK_SIZE 100
185
186
/**
187
 * Constant string describing the version of the library used at
188
 * run-time.
189
 */
190
const char *const
191
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
192
193
/*
194
 * List of XML prefixed PI allowed by W3C specs
195
 */
196
197
static const char* const xmlW3CPIs[] = {
198
    "xml-stylesheet",
199
    "xml-model",
200
    NULL
201
};
202
203
204
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206
                                              const xmlChar **str);
207
208
static void
209
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
210
211
static int
212
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
213
214
static void
215
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl);
216
217
/************************************************************************
218
 *                  *
219
 *    Some factorized error routines        *
220
 *                  *
221
 ************************************************************************/
222
223
static void
224
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
225
0
    xmlCtxtErrMemory(ctxt);
226
0
}
227
228
/**
229
 * Handle a redefinition of attribute error
230
 *
231
 * @param ctxt  an XML parser context
232
 * @param prefix  the attribute prefix
233
 * @param localname  the attribute localname
234
 */
235
static void
236
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237
                   const xmlChar * localname)
238
55.2k
{
239
55.2k
    if (prefix == NULL)
240
40.6k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
40.6k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
242
40.6k
                   "Attribute %s redefined\n", localname);
243
14.6k
    else
244
14.6k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245
14.6k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
246
14.6k
                   "Attribute %s:%s redefined\n", prefix, localname);
247
55.2k
}
248
249
/**
250
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
251
 *
252
 * @param ctxt  an XML parser context
253
 * @param error  the error number
254
 * @param msg  the error message
255
 */
256
static void LIBXML_ATTR_FORMAT(3,0)
257
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
258
               const char *msg)
259
18.6M
{
260
18.6M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
261
18.6M
               NULL, NULL, NULL, 0, "%s", msg);
262
18.6M
}
263
264
/**
265
 * Handle a warning.
266
 *
267
 * @param ctxt  an XML parser context
268
 * @param error  the error number
269
 * @param msg  the error message
270
 * @param str1  extra data
271
 * @param str2  extra data
272
 */
273
void LIBXML_ATTR_FORMAT(3,0)
274
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
275
              const char *msg, const xmlChar *str1, const xmlChar *str2)
276
78.6k
{
277
78.6k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
278
78.6k
               str1, str2, NULL, 0, msg, str1, str2);
279
78.6k
}
280
281
#ifdef LIBXML_VALID_ENABLED
282
/**
283
 * Handle a validity error.
284
 *
285
 * @param ctxt  an XML parser context
286
 * @param error  the error number
287
 * @param msg  the error message
288
 * @param str1  extra data
289
 * @param str2  extra data
290
 */
291
static void LIBXML_ATTR_FORMAT(3,0)
292
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
293
              const char *msg, const xmlChar *str1, const xmlChar *str2)
294
0
{
295
0
    ctxt->valid = 0;
296
297
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
298
0
               str1, str2, NULL, 0, msg, str1, str2);
299
0
}
300
#endif
301
302
/**
303
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
304
 *
305
 * @param ctxt  an XML parser context
306
 * @param error  the error number
307
 * @param msg  the error message
308
 * @param val  an integer value
309
 */
310
static void LIBXML_ATTR_FORMAT(3,0)
311
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
312
                  const char *msg, int val)
313
35.0M
{
314
35.0M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
315
35.0M
               NULL, NULL, NULL, val, msg, val);
316
35.0M
}
317
318
/**
319
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
320
 *
321
 * @param ctxt  an XML parser context
322
 * @param error  the error number
323
 * @param msg  the error message
324
 * @param str1  an string info
325
 * @param val  an integer value
326
 * @param str2  an string info
327
 */
328
static void LIBXML_ATTR_FORMAT(3,0)
329
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
330
                  const char *msg, const xmlChar *str1, int val,
331
      const xmlChar *str2)
332
416k
{
333
416k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
334
416k
               str1, str2, NULL, val, msg, str1, val, str2);
335
416k
}
336
337
/**
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 *
340
 * @param ctxt  an XML parser context
341
 * @param error  the error number
342
 * @param msg  the error message
343
 * @param val  a string value
344
 */
345
static void LIBXML_ATTR_FORMAT(3,0)
346
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
347
                  const char *msg, const xmlChar * val)
348
915k
{
349
915k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
350
915k
               val, NULL, NULL, 0, msg, val);
351
915k
}
352
353
/**
354
 * Handle a non fatal parser error
355
 *
356
 * @param ctxt  an XML parser context
357
 * @param error  the error number
358
 * @param msg  the error message
359
 * @param val  a string value
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
0
{
365
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
0
               val, NULL, NULL, 0, msg, val);
367
0
}
368
369
/**
370
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
371
 *
372
 * @param ctxt  an XML parser context
373
 * @param error  the error number
374
 * @param msg  the message
375
 * @param info1  extra information string
376
 * @param info2  extra information string
377
 * @param info3  extra information string
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
407k
{
385
407k
    ctxt->nsWellFormed = 0;
386
387
407k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
407k
               info1, info2, info3, 0, msg, info1, info2, info3);
389
407k
}
390
391
/**
392
 * Handle a namespace warning error
393
 *
394
 * @param ctxt  an XML parser context
395
 * @param error  the error number
396
 * @param msg  the message
397
 * @param info1  extra information string
398
 * @param info2  extra information string
399
 * @param info3  extra information string
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
15.7k
{
407
15.7k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
15.7k
               info1, info2, info3, 0, msg, info1, info2, info3);
409
15.7k
}
410
411
/**
412
 * Check for non-linear entity expansion behaviour.
413
 *
414
 * In some cases like xmlExpandEntityInAttValue, this function is called
415
 * for each, possibly nested entity and its unexpanded content length.
416
 *
417
 * In other cases like #xmlParseReference, it's only called for each
418
 * top-level entity with its unexpanded content length plus the sum of
419
 * the unexpanded content lengths (plus fixed cost) of all nested
420
 * entities.
421
 *
422
 * Summing the unexpanded lengths also adds the length of the reference.
423
 * This is by design. Taking the length of the entity name into account
424
 * discourages attacks that try to waste CPU time with abusively long
425
 * entity names. See test/recurse/lol6.xml for example. Each call also
426
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
427
 * short entities.
428
 *
429
 * @param ctxt  parser context
430
 * @param extra  sum of unexpanded entity sizes
431
 * @returns 1 on error, 0 on success.
432
 */
433
static int
434
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
435
2.02M
{
436
2.02M
    unsigned long consumed;
437
2.02M
    unsigned long *expandedSize;
438
2.02M
    xmlParserInputPtr input = ctxt->input;
439
2.02M
    xmlEntityPtr entity = input->entity;
440
441
2.02M
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
442
1.58k
        return(0);
443
444
    /*
445
     * Compute total consumed bytes so far, including input streams of
446
     * external entities.
447
     */
448
2.02M
    consumed = input->consumed;
449
2.02M
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
450
2.02M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
451
452
2.02M
    if (entity)
453
2.29k
        expandedSize = &entity->expandedSize;
454
2.01M
    else
455
2.01M
        expandedSize = &ctxt->sizeentcopy;
456
457
    /*
458
     * Add extra cost and some fixed cost.
459
     */
460
2.02M
    xmlSaturatedAdd(expandedSize, extra);
461
2.02M
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
462
463
    /*
464
     * It's important to always use saturation arithmetic when tracking
465
     * entity sizes to make the size checks reliable. If "sizeentcopy"
466
     * overflows, we have to abort.
467
     */
468
2.02M
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
469
2.02M
        ((*expandedSize >= ULONG_MAX) ||
470
1.04M
         (*expandedSize / ctxt->maxAmpl > consumed))) {
471
304
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
472
304
                       "Maximum entity amplification factor exceeded, see "
473
304
                       "xmlCtxtSetMaxAmplification.\n");
474
304
        return(1);
475
304
    }
476
477
2.01M
    return(0);
478
2.02M
}
479
480
/************************************************************************
481
 *                  *
482
 *    Library wide options          *
483
 *                  *
484
 ************************************************************************/
485
486
/**
487
 * Examines if the library has been compiled with a given feature.
488
 *
489
 * @param feature  the feature to be examined
490
 * @returns zero (0) if the feature does not exist or an unknown
491
 * feature is requested, non-zero otherwise.
492
 */
493
int
494
xmlHasFeature(xmlFeature feature)
495
0
{
496
0
    switch (feature) {
497
0
  case XML_WITH_THREAD:
498
0
#ifdef LIBXML_THREAD_ENABLED
499
0
      return(1);
500
#else
501
      return(0);
502
#endif
503
0
        case XML_WITH_TREE:
504
0
            return(1);
505
0
        case XML_WITH_OUTPUT:
506
0
#ifdef LIBXML_OUTPUT_ENABLED
507
0
            return(1);
508
#else
509
            return(0);
510
#endif
511
0
        case XML_WITH_PUSH:
512
0
#ifdef LIBXML_PUSH_ENABLED
513
0
            return(1);
514
#else
515
            return(0);
516
#endif
517
0
        case XML_WITH_READER:
518
0
#ifdef LIBXML_READER_ENABLED
519
0
            return(1);
520
#else
521
            return(0);
522
#endif
523
0
        case XML_WITH_PATTERN:
524
0
#ifdef LIBXML_PATTERN_ENABLED
525
0
            return(1);
526
#else
527
            return(0);
528
#endif
529
0
        case XML_WITH_WRITER:
530
0
#ifdef LIBXML_WRITER_ENABLED
531
0
            return(1);
532
#else
533
            return(0);
534
#endif
535
0
        case XML_WITH_SAX1:
536
0
#ifdef LIBXML_SAX1_ENABLED
537
0
            return(1);
538
#else
539
            return(0);
540
#endif
541
0
        case XML_WITH_HTTP:
542
0
            return(0);
543
0
        case XML_WITH_VALID:
544
0
#ifdef LIBXML_VALID_ENABLED
545
0
            return(1);
546
#else
547
            return(0);
548
#endif
549
0
        case XML_WITH_HTML:
550
0
#ifdef LIBXML_HTML_ENABLED
551
0
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_LEGACY:
556
0
            return(0);
557
0
        case XML_WITH_C14N:
558
0
#ifdef LIBXML_C14N_ENABLED
559
0
            return(1);
560
#else
561
            return(0);
562
#endif
563
0
        case XML_WITH_CATALOG:
564
0
#ifdef LIBXML_CATALOG_ENABLED
565
0
            return(1);
566
#else
567
            return(0);
568
#endif
569
0
        case XML_WITH_XPATH:
570
0
#ifdef LIBXML_XPATH_ENABLED
571
0
            return(1);
572
#else
573
            return(0);
574
#endif
575
0
        case XML_WITH_XPTR:
576
0
#ifdef LIBXML_XPTR_ENABLED
577
0
            return(1);
578
#else
579
            return(0);
580
#endif
581
0
        case XML_WITH_XINCLUDE:
582
0
#ifdef LIBXML_XINCLUDE_ENABLED
583
0
            return(1);
584
#else
585
            return(0);
586
#endif
587
0
        case XML_WITH_ICONV:
588
0
#ifdef LIBXML_ICONV_ENABLED
589
0
            return(1);
590
#else
591
            return(0);
592
#endif
593
0
        case XML_WITH_ISO8859X:
594
0
#ifdef LIBXML_ISO8859X_ENABLED
595
0
            return(1);
596
#else
597
            return(0);
598
#endif
599
0
        case XML_WITH_UNICODE:
600
0
            return(0);
601
0
        case XML_WITH_REGEXP:
602
0
#ifdef LIBXML_REGEXP_ENABLED
603
0
            return(1);
604
#else
605
            return(0);
606
#endif
607
0
        case XML_WITH_AUTOMATA:
608
0
#ifdef LIBXML_REGEXP_ENABLED
609
0
            return(1);
610
#else
611
            return(0);
612
#endif
613
0
        case XML_WITH_EXPR:
614
0
            return(0);
615
0
        case XML_WITH_RELAXNG:
616
0
#ifdef LIBXML_RELAXNG_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_SCHEMAS:
622
0
#ifdef LIBXML_SCHEMAS_ENABLED
623
0
            return(1);
624
#else
625
            return(0);
626
#endif
627
0
        case XML_WITH_SCHEMATRON:
628
#ifdef LIBXML_SCHEMATRON_ENABLED
629
            return(1);
630
#else
631
0
            return(0);
632
0
#endif
633
0
        case XML_WITH_MODULES:
634
0
#ifdef LIBXML_MODULES_ENABLED
635
0
            return(1);
636
#else
637
            return(0);
638
#endif
639
0
        case XML_WITH_DEBUG:
640
#ifdef LIBXML_DEBUG_ENABLED
641
            return(1);
642
#else
643
0
            return(0);
644
0
#endif
645
0
        case XML_WITH_DEBUG_MEM:
646
0
            return(0);
647
0
        case XML_WITH_ZLIB:
648
#ifdef LIBXML_ZLIB_ENABLED
649
            return(1);
650
#else
651
0
            return(0);
652
0
#endif
653
0
        case XML_WITH_LZMA:
654
#ifdef LIBXML_LZMA_ENABLED
655
            return(1);
656
#else
657
0
            return(0);
658
0
#endif
659
0
        case XML_WITH_ICU:
660
#ifdef LIBXML_ICU_ENABLED
661
            return(1);
662
#else
663
0
            return(0);
664
0
#endif
665
0
        default:
666
0
      break;
667
0
     }
668
0
     return(0);
669
0
}
670
671
/************************************************************************
672
 *                  *
673
 *      Simple string buffer        *
674
 *                  *
675
 ************************************************************************/
676
677
typedef struct {
678
    xmlChar *mem;
679
    unsigned size;
680
    unsigned cap; /* size < cap */
681
    unsigned max; /* size <= max */
682
    xmlParserErrors code;
683
} xmlSBuf;
684
685
static void
686
624k
xmlSBufInit(xmlSBuf *buf, unsigned max) {
687
624k
    buf->mem = NULL;
688
624k
    buf->size = 0;
689
624k
    buf->cap = 0;
690
624k
    buf->max = max;
691
624k
    buf->code = XML_ERR_OK;
692
624k
}
693
694
static int
695
294k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
696
294k
    xmlChar *mem;
697
294k
    unsigned cap;
698
699
294k
    if (len >= UINT_MAX / 2 - buf->size) {
700
0
        if (buf->code == XML_ERR_OK)
701
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
702
0
        return(-1);
703
0
    }
704
705
294k
    cap = (buf->size + len) * 2;
706
294k
    if (cap < 240)
707
210k
        cap = 240;
708
709
294k
    mem = xmlRealloc(buf->mem, cap);
710
294k
    if (mem == NULL) {
711
0
        buf->code = XML_ERR_NO_MEMORY;
712
0
        return(-1);
713
0
    }
714
715
294k
    buf->mem = mem;
716
294k
    buf->cap = cap;
717
718
294k
    return(0);
719
294k
}
720
721
static void
722
115M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
723
115M
    if (buf->max - buf->size < len) {
724
350k
        if (buf->code == XML_ERR_OK)
725
82
            buf->code = XML_ERR_RESOURCE_LIMIT;
726
350k
        return;
727
350k
    }
728
729
114M
    if (buf->cap - buf->size <= len) {
730
253k
        if (xmlSBufGrow(buf, len) < 0)
731
0
            return;
732
253k
    }
733
734
114M
    if (len > 0)
735
114M
        memcpy(buf->mem + buf->size, str, len);
736
114M
    buf->size += len;
737
114M
}
738
739
static void
740
91.0M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
741
91.0M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
742
91.0M
}
743
744
static void
745
483k
xmlSBufAddChar(xmlSBuf *buf, int c) {
746
483k
    xmlChar *end;
747
748
483k
    if (buf->max - buf->size < 4) {
749
208
        if (buf->code == XML_ERR_OK)
750
1
            buf->code = XML_ERR_RESOURCE_LIMIT;
751
208
        return;
752
208
    }
753
754
483k
    if (buf->cap - buf->size <= 4) {
755
40.6k
        if (xmlSBufGrow(buf, 4) < 0)
756
0
            return;
757
40.6k
    }
758
759
483k
    end = buf->mem + buf->size;
760
761
483k
    if (c < 0x80) {
762
209k
        *end = (xmlChar) c;
763
209k
        buf->size += 1;
764
274k
    } else {
765
274k
        buf->size += xmlCopyCharMultiByte(end, c);
766
274k
    }
767
483k
}
768
769
static void
770
67.6M
xmlSBufAddReplChar(xmlSBuf *buf) {
771
67.6M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
772
67.6M
}
773
774
static void
775
83
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
776
83
    if (buf->code == XML_ERR_NO_MEMORY)
777
0
        xmlCtxtErrMemory(ctxt);
778
83
    else
779
83
        xmlFatalErr(ctxt, buf->code, errMsg);
780
83
}
781
782
static xmlChar *
783
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
784
229k
              const char *errMsg) {
785
229k
    if (buf->mem == NULL) {
786
20.6k
        buf->mem = xmlMalloc(1);
787
20.6k
        if (buf->mem == NULL) {
788
0
            buf->code = XML_ERR_NO_MEMORY;
789
20.6k
        } else {
790
20.6k
            buf->mem[0] = 0;
791
20.6k
        }
792
209k
    } else {
793
209k
        buf->mem[buf->size] = 0;
794
209k
    }
795
796
229k
    if (buf->code == XML_ERR_OK) {
797
229k
        if (sizeOut != NULL)
798
113k
            *sizeOut = buf->size;
799
229k
        return(buf->mem);
800
229k
    }
801
802
5
    xmlSBufReportError(buf, ctxt, errMsg);
803
804
5
    xmlFree(buf->mem);
805
806
5
    if (sizeOut != NULL)
807
5
        *sizeOut = 0;
808
5
    return(NULL);
809
229k
}
810
811
static void
812
383k
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
813
383k
    if (buf->code != XML_ERR_OK)
814
78
        xmlSBufReportError(buf, ctxt, errMsg);
815
816
383k
    xmlFree(buf->mem);
817
383k
}
818
819
static int
820
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
821
171M
                    const char *errMsg) {
822
171M
    int c = str[0];
823
171M
    int c1 = str[1];
824
825
171M
    if ((c1 & 0xC0) != 0x80)
826
11.1M
        goto encoding_error;
827
828
160M
    if (c < 0xE0) {
829
        /* 2-byte sequence */
830
78.1M
        if (c < 0xC2)
831
39.6M
            goto encoding_error;
832
833
38.4M
        return(2);
834
82.0M
    } else {
835
82.0M
        int c2 = str[2];
836
837
82.0M
        if ((c2 & 0xC0) != 0x80)
838
40.0k
            goto encoding_error;
839
840
81.9M
        if (c < 0xF0) {
841
            /* 3-byte sequence */
842
81.9M
            if (c == 0xE0) {
843
                /* overlong */
844
41.4k
                if (c1 < 0xA0)
845
5.24k
                    goto encoding_error;
846
81.9M
            } else if (c == 0xED) {
847
                /* surrogate */
848
2.20k
                if (c1 >= 0xA0)
849
949
                    goto encoding_error;
850
81.9M
            } else if (c == 0xEF) {
851
                /* U+FFFE and U+FFFF are invalid Chars */
852
5.18k
                if ((c1 == 0xBF) && (c2 >= 0xBE))
853
1.12k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
854
5.18k
            }
855
856
81.9M
            return(3);
857
81.9M
        } else {
858
            /* 4-byte sequence */
859
22.7k
            if ((str[3] & 0xC0) != 0x80)
860
4.25k
                goto encoding_error;
861
18.4k
            if (c == 0xF0) {
862
                /* overlong */
863
2.48k
                if (c1 < 0x90)
864
1.81k
                    goto encoding_error;
865
15.9k
            } else if (c >= 0xF4) {
866
                /* greater than 0x10FFFF */
867
7.94k
                if ((c > 0xF4) || (c1 >= 0x90))
868
5.86k
                    goto encoding_error;
869
7.94k
            }
870
871
10.7k
            return(4);
872
18.4k
        }
873
81.9M
    }
874
875
50.9M
encoding_error:
876
    /* Only report the first error */
877
50.9M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
878
2.83k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
879
2.83k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
880
2.83k
    }
881
882
50.9M
    return(0);
883
160M
}
884
885
/************************************************************************
886
 *                  *
887
 *    SAX2 defaulted attributes handling      *
888
 *                  *
889
 ************************************************************************/
890
891
/**
892
 * Final initialization of the parser context before starting to parse.
893
 *
894
 * This accounts for users modifying struct members of parser context
895
 * directly.
896
 *
897
 * @param ctxt  an XML parser context
898
 */
899
static void
900
39.9k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
901
39.9k
    xmlSAXHandlerPtr sax;
902
903
    /* Avoid unused variable warning if features are disabled. */
904
39.9k
    (void) sax;
905
906
    /*
907
     * Changing the SAX struct directly is still widespread practice
908
     * in internal and external code.
909
     */
910
39.9k
    if (ctxt == NULL) return;
911
39.9k
    sax = ctxt->sax;
912
39.9k
#ifdef LIBXML_SAX1_ENABLED
913
    /*
914
     * Only enable SAX2 if there SAX2 element handlers, except when there
915
     * are no element handlers at all.
916
     */
917
39.9k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
918
39.9k
        (sax) &&
919
39.9k
        (sax->initialized == XML_SAX2_MAGIC) &&
920
39.9k
        ((sax->startElementNs != NULL) ||
921
39.9k
         (sax->endElementNs != NULL) ||
922
39.9k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
923
39.9k
        ctxt->sax2 = 1;
924
#else
925
    ctxt->sax2 = 1;
926
#endif /* LIBXML_SAX1_ENABLED */
927
928
    /*
929
     * Some users replace the dictionary directly in the context struct.
930
     * We really need an API function to do that cleanly.
931
     */
932
39.9k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
933
39.9k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
934
39.9k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
935
39.9k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
936
39.9k
    (ctxt->str_xml_ns == NULL)) {
937
0
        xmlErrMemory(ctxt);
938
0
    }
939
940
39.9k
    xmlDictSetLimit(ctxt->dict,
941
39.9k
                    (ctxt->options & XML_PARSE_HUGE) ?
942
0
                        0 :
943
39.9k
                        XML_MAX_DICTIONARY_LIMIT);
944
945
39.9k
#ifdef LIBXML_VALID_ENABLED
946
39.9k
    if (ctxt->validate)
947
0
        ctxt->vctxt.flags |= XML_VCTXT_VALIDATE;
948
39.9k
    else
949
39.9k
        ctxt->vctxt.flags &= ~XML_VCTXT_VALIDATE;
950
39.9k
#endif /* LIBXML_VALID_ENABLED */
951
39.9k
}
952
953
typedef struct {
954
    xmlHashedString prefix;
955
    xmlHashedString name;
956
    xmlHashedString value;
957
    const xmlChar *valueEnd;
958
    int external;
959
    int expandedSize;
960
} xmlDefAttr;
961
962
typedef struct _xmlDefAttrs xmlDefAttrs;
963
typedef xmlDefAttrs *xmlDefAttrsPtr;
964
struct _xmlDefAttrs {
965
    int nbAttrs;  /* number of defaulted attributes on that element */
966
    int maxAttrs;       /* the size of the array */
967
#if __STDC_VERSION__ >= 199901L
968
    /* Using a C99 flexible array member avoids UBSan errors. */
969
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
970
#else
971
    xmlDefAttr attrs[1];
972
#endif
973
};
974
975
/**
976
 * Normalize the space in non CDATA attribute values:
977
 * If the attribute type is not CDATA, then the XML processor MUST further
978
 * process the normalized attribute value by discarding any leading and
979
 * trailing space (\#x20) characters, and by replacing sequences of space
980
 * (\#x20) characters by a single space (\#x20) character.
981
 * Note that the size of dst need to be at least src, and if one doesn't need
982
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
983
 * passing src as dst is just fine.
984
 *
985
 * @param src  the source string
986
 * @param dst  the target string
987
 * @returns a pointer to the normalized value (dst) or NULL if no conversion
988
 *         is needed.
989
 */
990
static xmlChar *
991
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
992
29.9k
{
993
29.9k
    if ((src == NULL) || (dst == NULL))
994
0
        return(NULL);
995
996
34.3k
    while (*src == 0x20) src++;
997
2.15M
    while (*src != 0) {
998
2.12M
  if (*src == 0x20) {
999
1.46M
      while (*src == 0x20) src++;
1000
27.1k
      if (*src != 0)
1001
25.0k
    *dst++ = 0x20;
1002
2.09M
  } else {
1003
2.09M
      *dst++ = *src++;
1004
2.09M
  }
1005
2.12M
    }
1006
29.9k
    *dst = 0;
1007
29.9k
    if (dst == src)
1008
25.6k
       return(NULL);
1009
4.35k
    return(dst);
1010
29.9k
}
1011
1012
/**
1013
 * Add a defaulted attribute for an element
1014
 *
1015
 * @param ctxt  an XML parser context
1016
 * @param fullname  the element fullname
1017
 * @param fullattr  the attribute fullname
1018
 * @param value  the attribute value
1019
 */
1020
static void
1021
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1022
               const xmlChar *fullname,
1023
               const xmlChar *fullattr,
1024
31.3k
               const xmlChar *value) {
1025
31.3k
    xmlDefAttrsPtr defaults;
1026
31.3k
    xmlDefAttr *attr;
1027
31.3k
    int len, expandedSize;
1028
31.3k
    xmlHashedString name;
1029
31.3k
    xmlHashedString prefix;
1030
31.3k
    xmlHashedString hvalue;
1031
31.3k
    const xmlChar *localname;
1032
1033
    /*
1034
     * Allows to detect attribute redefinitions
1035
     */
1036
31.3k
    if (ctxt->attsSpecial != NULL) {
1037
28.5k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1038
13.7k
      return;
1039
28.5k
    }
1040
1041
17.6k
    if (ctxt->attsDefault == NULL) {
1042
2.81k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1043
2.81k
  if (ctxt->attsDefault == NULL)
1044
0
      goto mem_error;
1045
2.81k
    }
1046
1047
    /*
1048
     * split the element name into prefix:localname , the string found
1049
     * are within the DTD and then not associated to namespace names.
1050
     */
1051
17.6k
    localname = xmlSplitQName3(fullname, &len);
1052
17.6k
    if (localname == NULL) {
1053
13.1k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1054
13.1k
  prefix.name = NULL;
1055
13.1k
    } else {
1056
4.50k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1057
4.50k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1058
4.50k
        if (prefix.name == NULL)
1059
0
            goto mem_error;
1060
4.50k
    }
1061
17.6k
    if (name.name == NULL)
1062
0
        goto mem_error;
1063
1064
    /*
1065
     * make sure there is some storage
1066
     */
1067
17.6k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1068
17.6k
    if ((defaults == NULL) ||
1069
17.6k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1070
6.18k
        xmlDefAttrsPtr temp;
1071
6.18k
        int newSize;
1072
1073
6.18k
        if (defaults == NULL) {
1074
3.95k
            newSize = 4;
1075
3.95k
        } else {
1076
2.22k
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1077
2.22k
                ((size_t) defaults->maxAttrs >
1078
2.22k
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1079
0
                goto mem_error;
1080
1081
2.22k
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1082
0
                newSize = XML_MAX_ATTRS;
1083
2.22k
            else
1084
2.22k
                newSize = defaults->maxAttrs * 2;
1085
2.22k
        }
1086
6.18k
        temp = xmlRealloc(defaults,
1087
6.18k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1088
6.18k
  if (temp == NULL)
1089
0
      goto mem_error;
1090
6.18k
        if (defaults == NULL)
1091
3.95k
            temp->nbAttrs = 0;
1092
6.18k
  temp->maxAttrs = newSize;
1093
6.18k
        defaults = temp;
1094
6.18k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1095
6.18k
                          defaults, NULL) < 0) {
1096
0
      xmlFree(defaults);
1097
0
      goto mem_error;
1098
0
  }
1099
6.18k
    }
1100
1101
    /*
1102
     * Split the attribute name into prefix:localname , the string found
1103
     * are within the DTD and hen not associated to namespace names.
1104
     */
1105
17.6k
    localname = xmlSplitQName3(fullattr, &len);
1106
17.6k
    if (localname == NULL) {
1107
10.6k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1108
10.6k
  prefix.name = NULL;
1109
10.6k
    } else {
1110
6.94k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1111
6.94k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1112
6.94k
        if (prefix.name == NULL)
1113
0
            goto mem_error;
1114
6.94k
    }
1115
17.6k
    if (name.name == NULL)
1116
0
        goto mem_error;
1117
1118
    /* intern the string and precompute the end */
1119
17.6k
    len = strlen((const char *) value);
1120
17.6k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1121
17.6k
    if (hvalue.name == NULL)
1122
0
        goto mem_error;
1123
1124
17.6k
    expandedSize = strlen((const char *) name.name);
1125
17.6k
    if (prefix.name != NULL)
1126
6.94k
        expandedSize += strlen((const char *) prefix.name);
1127
17.6k
    expandedSize += len;
1128
1129
17.6k
    attr = &defaults->attrs[defaults->nbAttrs++];
1130
17.6k
    attr->name = name;
1131
17.6k
    attr->prefix = prefix;
1132
17.6k
    attr->value = hvalue;
1133
17.6k
    attr->valueEnd = hvalue.name + len;
1134
17.6k
    attr->external = PARSER_EXTERNAL(ctxt);
1135
17.6k
    attr->expandedSize = expandedSize;
1136
1137
17.6k
    return;
1138
1139
0
mem_error:
1140
0
    xmlErrMemory(ctxt);
1141
0
}
1142
1143
/**
1144
 * Register this attribute type
1145
 *
1146
 * @param ctxt  an XML parser context
1147
 * @param fullname  the element fullname
1148
 * @param fullattr  the attribute fullname
1149
 * @param type  the attribute type
1150
 */
1151
static void
1152
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1153
      const xmlChar *fullname,
1154
      const xmlChar *fullattr,
1155
      int type)
1156
34.6k
{
1157
34.6k
    if (ctxt->attsSpecial == NULL) {
1158
2.95k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1159
2.95k
  if (ctxt->attsSpecial == NULL)
1160
0
      goto mem_error;
1161
2.95k
    }
1162
1163
34.6k
    if (PARSER_EXTERNAL(ctxt))
1164
0
        type |= XML_SPECIAL_EXTERNAL;
1165
1166
34.6k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1167
34.6k
                    XML_INT_TO_PTR(type)) < 0)
1168
0
        goto mem_error;
1169
34.6k
    return;
1170
1171
34.6k
mem_error:
1172
0
    xmlErrMemory(ctxt);
1173
0
}
1174
1175
/**
1176
 * Removes CDATA attributes from the special attribute table
1177
 */
1178
static void
1179
xmlCleanSpecialAttrCallback(void *payload, void *data,
1180
                            const xmlChar *fullname, const xmlChar *fullattr,
1181
19.6k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1182
19.6k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1183
1184
19.6k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1185
1.04k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1186
1.04k
    }
1187
19.6k
}
1188
1189
/**
1190
 * Trim the list of attributes defined to remove all those of type
1191
 * CDATA as they are not special. This call should be done when finishing
1192
 * to parse the DTD and before starting to parse the document root.
1193
 *
1194
 * @param ctxt  an XML parser context
1195
 */
1196
static void
1197
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1198
11.3k
{
1199
11.3k
    if (ctxt->attsSpecial == NULL)
1200
8.40k
        return;
1201
1202
2.95k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1203
1204
2.95k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1205
72
        xmlHashFree(ctxt->attsSpecial, NULL);
1206
72
        ctxt->attsSpecial = NULL;
1207
72
    }
1208
2.95k
}
1209
1210
/**
1211
 * Checks that the value conforms to the LanguageID production:
1212
 *
1213
 * @deprecated Internal function, do not use.
1214
 *
1215
 * NOTE: this is somewhat deprecated, those productions were removed from
1216
 * the XML Second edition.
1217
 *
1218
 *     [33] LanguageID ::= Langcode ('-' Subcode)*
1219
 *     [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1220
 *     [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1221
 *     [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1222
 *     [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1223
 *     [38] Subcode ::= ([a-z] | [A-Z])+
1224
 *
1225
 * The current REC reference the successors of RFC 1766, currently 5646
1226
 *
1227
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1228
 *
1229
 *     langtag       = language
1230
 *                     ["-" script]
1231
 *                     ["-" region]
1232
 *                     *("-" variant)
1233
 *                     *("-" extension)
1234
 *                     ["-" privateuse]
1235
 *     language      = 2*3ALPHA            ; shortest ISO 639 code
1236
 *                     ["-" extlang]       ; sometimes followed by
1237
 *                                         ; extended language subtags
1238
 *                   / 4ALPHA              ; or reserved for future use
1239
 *                   / 5*8ALPHA            ; or registered language subtag
1240
 *
1241
 *     extlang       = 3ALPHA              ; selected ISO 639 codes
1242
 *                     *2("-" 3ALPHA)      ; permanently reserved
1243
 *
1244
 *     script        = 4ALPHA              ; ISO 15924 code
1245
 *
1246
 *     region        = 2ALPHA              ; ISO 3166-1 code
1247
 *                   / 3DIGIT              ; UN M.49 code
1248
 *
1249
 *     variant       = 5*8alphanum         ; registered variants
1250
 *                   / (DIGIT 3alphanum)
1251
 *
1252
 *     extension     = singleton 1*("-" (2*8alphanum))
1253
 *
1254
 *                                         ; Single alphanumerics
1255
 *                                         ; "x" reserved for private use
1256
 *     singleton     = DIGIT               ; 0 - 9
1257
 *                   / %x41-57             ; A - W
1258
 *                   / %x59-5A             ; Y - Z
1259
 *                   / %x61-77             ; a - w
1260
 *                   / %x79-7A             ; y - z
1261
 *
1262
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1263
 * The parser below doesn't try to cope with extension or privateuse
1264
 * that could be added but that's not interoperable anyway
1265
 *
1266
 * @param lang  pointer to the string value
1267
 * @returns 1 if correct 0 otherwise
1268
 **/
1269
int
1270
xmlCheckLanguageID(const xmlChar * lang)
1271
0
{
1272
0
    const xmlChar *cur = lang, *nxt;
1273
1274
0
    if (cur == NULL)
1275
0
        return (0);
1276
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1277
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1278
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1279
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1280
        /*
1281
         * Still allow IANA code and user code which were coming
1282
         * from the previous version of the XML-1.0 specification
1283
         * it's deprecated but we should not fail
1284
         */
1285
0
        cur += 2;
1286
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1287
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1288
0
            cur++;
1289
0
        return(cur[0] == 0);
1290
0
    }
1291
0
    nxt = cur;
1292
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1293
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1294
0
           nxt++;
1295
0
    if (nxt - cur >= 4) {
1296
        /*
1297
         * Reserved
1298
         */
1299
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1300
0
            return(0);
1301
0
        return(1);
1302
0
    }
1303
0
    if (nxt - cur < 2)
1304
0
        return(0);
1305
    /* we got an ISO 639 code */
1306
0
    if (nxt[0] == 0)
1307
0
        return(1);
1308
0
    if (nxt[0] != '-')
1309
0
        return(0);
1310
1311
0
    nxt++;
1312
0
    cur = nxt;
1313
    /* now we can have extlang or script or region or variant */
1314
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1315
0
        goto region_m49;
1316
1317
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1318
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1319
0
           nxt++;
1320
0
    if (nxt - cur == 4)
1321
0
        goto script;
1322
0
    if (nxt - cur == 2)
1323
0
        goto region;
1324
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1325
0
        goto variant;
1326
0
    if (nxt - cur != 3)
1327
0
        return(0);
1328
    /* we parsed an extlang */
1329
0
    if (nxt[0] == 0)
1330
0
        return(1);
1331
0
    if (nxt[0] != '-')
1332
0
        return(0);
1333
1334
0
    nxt++;
1335
0
    cur = nxt;
1336
    /* now we can have script or region or variant */
1337
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1338
0
        goto region_m49;
1339
1340
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1341
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1342
0
           nxt++;
1343
0
    if (nxt - cur == 2)
1344
0
        goto region;
1345
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1346
0
        goto variant;
1347
0
    if (nxt - cur != 4)
1348
0
        return(0);
1349
    /* we parsed a script */
1350
0
script:
1351
0
    if (nxt[0] == 0)
1352
0
        return(1);
1353
0
    if (nxt[0] != '-')
1354
0
        return(0);
1355
1356
0
    nxt++;
1357
0
    cur = nxt;
1358
    /* now we can have region or variant */
1359
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1360
0
        goto region_m49;
1361
1362
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1363
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1364
0
           nxt++;
1365
1366
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1367
0
        goto variant;
1368
0
    if (nxt - cur != 2)
1369
0
        return(0);
1370
    /* we parsed a region */
1371
0
region:
1372
0
    if (nxt[0] == 0)
1373
0
        return(1);
1374
0
    if (nxt[0] != '-')
1375
0
        return(0);
1376
1377
0
    nxt++;
1378
0
    cur = nxt;
1379
    /* now we can just have a variant */
1380
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1381
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1382
0
           nxt++;
1383
1384
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1385
0
        return(0);
1386
1387
    /* we parsed a variant */
1388
0
variant:
1389
0
    if (nxt[0] == 0)
1390
0
        return(1);
1391
0
    if (nxt[0] != '-')
1392
0
        return(0);
1393
    /* extensions and private use subtags not checked */
1394
0
    return (1);
1395
1396
0
region_m49:
1397
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1398
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1399
0
        nxt += 3;
1400
0
        goto region;
1401
0
    }
1402
0
    return(0);
1403
0
}
1404
1405
/************************************************************************
1406
 *                  *
1407
 *    Parser stacks related functions and macros    *
1408
 *                  *
1409
 ************************************************************************/
1410
1411
static xmlChar *
1412
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1413
1414
/**
1415
 * Create a new namespace database.
1416
 *
1417
 * @returns the new obejct.
1418
 */
1419
xmlParserNsData *
1420
39.9k
xmlParserNsCreate(void) {
1421
39.9k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1422
1423
39.9k
    if (nsdb == NULL)
1424
0
        return(NULL);
1425
39.9k
    memset(nsdb, 0, sizeof(*nsdb));
1426
39.9k
    nsdb->defaultNsIndex = INT_MAX;
1427
1428
39.9k
    return(nsdb);
1429
39.9k
}
1430
1431
/**
1432
 * Free a namespace database.
1433
 *
1434
 * @param nsdb  namespace database
1435
 */
1436
void
1437
39.9k
xmlParserNsFree(xmlParserNsData *nsdb) {
1438
39.9k
    if (nsdb == NULL)
1439
0
        return;
1440
1441
39.9k
    xmlFree(nsdb->extra);
1442
39.9k
    xmlFree(nsdb->hash);
1443
39.9k
    xmlFree(nsdb);
1444
39.9k
}
1445
1446
/**
1447
 * Reset a namespace database.
1448
 *
1449
 * @param nsdb  namespace database
1450
 */
1451
static void
1452
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1453
0
    if (nsdb == NULL)
1454
0
        return;
1455
1456
0
    nsdb->hashElems = 0;
1457
0
    nsdb->elementId = 0;
1458
0
    nsdb->defaultNsIndex = INT_MAX;
1459
1460
0
    if (nsdb->hash)
1461
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1462
0
}
1463
1464
/**
1465
 * Signal that a new element has started.
1466
 *
1467
 * @param nsdb  namespace database
1468
 * @returns 0 on success, -1 if the element counter overflowed.
1469
 */
1470
static int
1471
1.99M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1472
1.99M
    if (nsdb->elementId == UINT_MAX)
1473
0
        return(-1);
1474
1.99M
    nsdb->elementId++;
1475
1476
1.99M
    return(0);
1477
1.99M
}
1478
1479
/**
1480
 * Lookup namespace with given prefix. If `bucketPtr` is non-NULL, it will
1481
 * be set to the matching bucket, or the first empty bucket if no match
1482
 * was found.
1483
 *
1484
 * @param ctxt  parser context
1485
 * @param prefix  namespace prefix
1486
 * @param bucketPtr  optional bucket (return value)
1487
 * @returns the namespace index on success, INT_MAX if no namespace was
1488
 * found.
1489
 */
1490
static int
1491
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1492
1.62M
                  xmlParserNsBucket **bucketPtr) {
1493
1.62M
    xmlParserNsBucket *bucket, *tombstone;
1494
1.62M
    unsigned index, hashValue;
1495
1496
1.62M
    if (prefix->name == NULL)
1497
759k
        return(ctxt->nsdb->defaultNsIndex);
1498
1499
865k
    if (ctxt->nsdb->hashSize == 0)
1500
33.1k
        return(INT_MAX);
1501
1502
832k
    hashValue = prefix->hashValue;
1503
832k
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1504
832k
    bucket = &ctxt->nsdb->hash[index];
1505
832k
    tombstone = NULL;
1506
1507
1.29M
    while (bucket->hashValue) {
1508
1.04M
        if (bucket->index == INT_MAX) {
1509
112k
            if (tombstone == NULL)
1510
100k
                tombstone = bucket;
1511
935k
        } else if (bucket->hashValue == hashValue) {
1512
587k
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1513
587k
                if (bucketPtr != NULL)
1514
452k
                    *bucketPtr = bucket;
1515
587k
                return(bucket->index);
1516
587k
            }
1517
587k
        }
1518
1519
459k
        index++;
1520
459k
        bucket++;
1521
459k
        if (index == ctxt->nsdb->hashSize) {
1522
49.0k
            index = 0;
1523
49.0k
            bucket = ctxt->nsdb->hash;
1524
49.0k
        }
1525
459k
    }
1526
1527
245k
    if (bucketPtr != NULL)
1528
69.4k
        *bucketPtr = tombstone ? tombstone : bucket;
1529
245k
    return(INT_MAX);
1530
832k
}
1531
1532
/**
1533
 * Lookup namespace URI with given prefix.
1534
 *
1535
 * @param ctxt  parser context
1536
 * @param prefix  namespace prefix
1537
 * @returns the namespace URI on success, NULL if no namespace was found.
1538
 */
1539
static const xmlChar *
1540
891k
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1541
891k
    const xmlChar *ret;
1542
891k
    int nsIndex;
1543
1544
891k
    if (prefix->name == ctxt->str_xml)
1545
1.48k
        return(ctxt->str_xml_ns);
1546
1547
    /*
1548
     * minNsIndex is used when building an entity tree. We must
1549
     * ignore namespaces declared outside the entity.
1550
     */
1551
890k
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1552
890k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1553
480k
        return(NULL);
1554
1555
409k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1556
409k
    if (ret[0] == 0)
1557
4.11k
        ret = NULL;
1558
409k
    return(ret);
1559
890k
}
1560
1561
/**
1562
 * Lookup extra data for the given prefix. This returns data stored
1563
 * with xmlParserNsUdpateSax.
1564
 *
1565
 * @param ctxt  parser context
1566
 * @param prefix  namespace prefix
1567
 * @returns the data on success, NULL if no namespace was found.
1568
 */
1569
void *
1570
25.1k
xmlParserNsLookupSax(xmlParserCtxt *ctxt, const xmlChar *prefix) {
1571
25.1k
    xmlHashedString hprefix;
1572
25.1k
    int nsIndex;
1573
1574
25.1k
    if (prefix == ctxt->str_xml)
1575
13.2k
        return(NULL);
1576
1577
11.8k
    hprefix.name = prefix;
1578
11.8k
    if (prefix != NULL)
1579
7.01k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1580
4.82k
    else
1581
4.82k
        hprefix.hashValue = 0;
1582
11.8k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1583
11.8k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1584
0
        return(NULL);
1585
1586
11.8k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1587
11.8k
}
1588
1589
/**
1590
 * Sets or updates extra data for the given prefix. This value will be
1591
 * returned by xmlParserNsLookupSax as long as the namespace with the
1592
 * given prefix is in scope.
1593
 *
1594
 * @param ctxt  parser context
1595
 * @param prefix  namespace prefix
1596
 * @param saxData  extra data for SAX handler
1597
 * @returns the data on success, NULL if no namespace was found.
1598
 */
1599
int
1600
xmlParserNsUpdateSax(xmlParserCtxt *ctxt, const xmlChar *prefix,
1601
27.4k
                     void *saxData) {
1602
27.4k
    xmlHashedString hprefix;
1603
27.4k
    int nsIndex;
1604
1605
27.4k
    if (prefix == ctxt->str_xml)
1606
0
        return(-1);
1607
1608
27.4k
    hprefix.name = prefix;
1609
27.4k
    if (prefix != NULL)
1610
21.3k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1611
6.06k
    else
1612
6.06k
        hprefix.hashValue = 0;
1613
27.4k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1614
27.4k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1615
0
        return(-1);
1616
1617
27.4k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1618
27.4k
    return(0);
1619
27.4k
}
1620
1621
/**
1622
 * Grows the namespace tables.
1623
 *
1624
 * @param ctxt  parser context
1625
 * @returns 0 on success, -1 if a memory allocation failed.
1626
 */
1627
static int
1628
33.6k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1629
33.6k
    const xmlChar **table;
1630
33.6k
    xmlParserNsExtra *extra;
1631
33.6k
    int newSize;
1632
1633
33.6k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1634
33.6k
                              sizeof(table[0]) + sizeof(extra[0]),
1635
33.6k
                              16, XML_MAX_ITEMS);
1636
33.6k
    if (newSize < 0)
1637
0
        goto error;
1638
1639
33.6k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1640
33.6k
    if (table == NULL)
1641
0
        goto error;
1642
33.6k
    ctxt->nsTab = table;
1643
1644
33.6k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1645
33.6k
    if (extra == NULL)
1646
0
        goto error;
1647
33.6k
    ctxt->nsdb->extra = extra;
1648
1649
33.6k
    ctxt->nsMax = newSize;
1650
33.6k
    return(0);
1651
1652
0
error:
1653
0
    xmlErrMemory(ctxt);
1654
0
    return(-1);
1655
33.6k
}
1656
1657
/**
1658
 * Push a new namespace on the table.
1659
 *
1660
 * @param ctxt  parser context
1661
 * @param prefix  prefix with hash value
1662
 * @param uri  uri with hash value
1663
 * @param saxData  extra data for SAX handler
1664
 * @param defAttr  whether the namespace comes from a default attribute
1665
 * @returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1666
 * -1 if a memory allocation failed.
1667
 */
1668
static int
1669
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1670
390k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1671
390k
    xmlParserNsBucket *bucket = NULL;
1672
390k
    xmlParserNsExtra *extra;
1673
390k
    const xmlChar **ns;
1674
390k
    unsigned hashValue, nsIndex, oldIndex;
1675
1676
390k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1677
391
        return(0);
1678
1679
389k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1680
0
        xmlErrMemory(ctxt);
1681
0
        return(-1);
1682
0
    }
1683
1684
    /*
1685
     * Default namespace and 'xml' namespace
1686
     */
1687
389k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1688
119k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1689
1690
119k
        if (oldIndex != INT_MAX) {
1691
96.7k
            extra = &ctxt->nsdb->extra[oldIndex];
1692
1693
96.7k
            if (extra->elementId == ctxt->nsdb->elementId) {
1694
7.41k
                if (defAttr == 0)
1695
6.67k
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1696
7.41k
                return(0);
1697
7.41k
            }
1698
1699
89.3k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1700
89.3k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1701
0
                return(0);
1702
89.3k
        }
1703
1704
112k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1705
112k
        goto populate_entry;
1706
119k
    }
1707
1708
    /*
1709
     * Hash table lookup
1710
     */
1711
270k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1712
270k
    if (oldIndex != INT_MAX) {
1713
196k
        extra = &ctxt->nsdb->extra[oldIndex];
1714
1715
        /*
1716
         * Check for duplicate definitions on the same element.
1717
         */
1718
196k
        if (extra->elementId == ctxt->nsdb->elementId) {
1719
9.41k
            if (defAttr == 0)
1720
8.58k
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1721
9.41k
            return(0);
1722
9.41k
        }
1723
1724
187k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1725
187k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1726
0
            return(0);
1727
1728
187k
        bucket->index = ctxt->nsNr;
1729
187k
        goto populate_entry;
1730
187k
    }
1731
1732
    /*
1733
     * Insert new bucket
1734
     */
1735
1736
73.6k
    hashValue = prefix->hashValue;
1737
1738
    /*
1739
     * Grow hash table, 50% fill factor
1740
     */
1741
73.6k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1742
6.88k
        xmlParserNsBucket *newHash;
1743
6.88k
        unsigned newSize, i, index;
1744
1745
6.88k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1746
0
            xmlErrMemory(ctxt);
1747
0
            return(-1);
1748
0
        }
1749
6.88k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1750
6.88k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1751
6.88k
        if (newHash == NULL) {
1752
0
            xmlErrMemory(ctxt);
1753
0
            return(-1);
1754
0
        }
1755
6.88k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1756
1757
190k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1758
183k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1759
183k
            unsigned newIndex;
1760
1761
183k
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1762
171k
                continue;
1763
11.9k
            newIndex = hv & (newSize - 1);
1764
1765
14.3k
            while (newHash[newIndex].hashValue != 0) {
1766
2.48k
                newIndex++;
1767
2.48k
                if (newIndex == newSize)
1768
331
                    newIndex = 0;
1769
2.48k
            }
1770
1771
11.9k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1772
11.9k
        }
1773
1774
6.88k
        xmlFree(ctxt->nsdb->hash);
1775
6.88k
        ctxt->nsdb->hash = newHash;
1776
6.88k
        ctxt->nsdb->hashSize = newSize;
1777
1778
        /*
1779
         * Relookup
1780
         */
1781
6.88k
        index = hashValue & (newSize - 1);
1782
1783
7.62k
        while (newHash[index].hashValue != 0) {
1784
735
            index++;
1785
735
            if (index == newSize)
1786
118
                index = 0;
1787
735
        }
1788
1789
6.88k
        bucket = &newHash[index];
1790
6.88k
    }
1791
1792
73.6k
    bucket->hashValue = hashValue;
1793
73.6k
    bucket->index = ctxt->nsNr;
1794
73.6k
    ctxt->nsdb->hashElems++;
1795
73.6k
    oldIndex = INT_MAX;
1796
1797
373k
populate_entry:
1798
373k
    nsIndex = ctxt->nsNr;
1799
1800
373k
    ns = &ctxt->nsTab[nsIndex * 2];
1801
373k
    ns[0] = prefix ? prefix->name : NULL;
1802
373k
    ns[1] = uri->name;
1803
1804
373k
    extra = &ctxt->nsdb->extra[nsIndex];
1805
373k
    extra->saxData = saxData;
1806
373k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1807
373k
    extra->uriHashValue = uri->hashValue;
1808
373k
    extra->elementId = ctxt->nsdb->elementId;
1809
373k
    extra->oldIndex = oldIndex;
1810
1811
373k
    ctxt->nsNr++;
1812
1813
373k
    return(1);
1814
73.6k
}
1815
1816
/**
1817
 * Pops the top `nr` namespaces and restores the hash table.
1818
 *
1819
 * @param ctxt  an XML parser context
1820
 * @param nr  the number to pop
1821
 * @returns the number of namespaces popped.
1822
 */
1823
static int
1824
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1825
185k
{
1826
185k
    int i;
1827
1828
    /* assert(nr <= ctxt->nsNr); */
1829
1830
551k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1831
366k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1832
366k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1833
1834
366k
        if (prefix == NULL) {
1835
110k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1836
256k
        } else {
1837
256k
            xmlHashedString hprefix;
1838
256k
            xmlParserNsBucket *bucket = NULL;
1839
1840
256k
            hprefix.name = prefix;
1841
256k
            hprefix.hashValue = extra->prefixHashValue;
1842
256k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1843
            /* assert(bucket && bucket->hashValue); */
1844
256k
            bucket->index = extra->oldIndex;
1845
256k
        }
1846
366k
    }
1847
1848
185k
    ctxt->nsNr -= nr;
1849
185k
    return(nr);
1850
185k
}
1851
1852
static int
1853
12.7k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1854
12.7k
    const xmlChar **atts;
1855
12.7k
    unsigned *attallocs;
1856
12.7k
    int newSize;
1857
1858
12.7k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1859
12.7k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1860
12.7k
                              10, XML_MAX_ATTRS);
1861
12.7k
    if (newSize < 0) {
1862
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1863
0
                    "Maximum number of attributes exceeded");
1864
0
        return(-1);
1865
0
    }
1866
1867
12.7k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1868
12.7k
    if (atts == NULL)
1869
0
        goto mem_error;
1870
12.7k
    ctxt->atts = atts;
1871
1872
12.7k
    attallocs = xmlRealloc(ctxt->attallocs,
1873
12.7k
                           newSize * sizeof(attallocs[0]));
1874
12.7k
    if (attallocs == NULL)
1875
0
        goto mem_error;
1876
12.7k
    ctxt->attallocs = attallocs;
1877
1878
12.7k
    ctxt->maxatts = newSize * 5;
1879
1880
12.7k
    return(0);
1881
1882
0
mem_error:
1883
0
    xmlErrMemory(ctxt);
1884
0
    return(-1);
1885
12.7k
}
1886
1887
/**
1888
 * Pushes a new parser input on top of the input stack
1889
 *
1890
 * @param ctxt  an XML parser context
1891
 * @param value  the parser input
1892
 * @returns -1 in case of error, the index in the stack otherwise
1893
 */
1894
int
1895
xmlCtxtPushInput(xmlParserCtxt *ctxt, xmlParserInput *value)
1896
70.0k
{
1897
70.0k
    char *directory = NULL;
1898
70.0k
    int maxDepth;
1899
1900
70.0k
    if ((ctxt == NULL) || (value == NULL))
1901
0
        return(-1);
1902
1903
70.0k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1904
1905
70.0k
    if (ctxt->inputNr >= ctxt->inputMax) {
1906
2.45k
        xmlParserInputPtr *tmp;
1907
2.45k
        int newSize;
1908
1909
2.45k
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1910
2.45k
                                  5, maxDepth);
1911
2.45k
        if (newSize < 0) {
1912
3
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1913
3
                           "Maximum entity nesting depth exceeded");
1914
3
            return(-1);
1915
3
        }
1916
2.45k
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1917
2.45k
        if (tmp == NULL) {
1918
0
            xmlErrMemory(ctxt);
1919
0
            return(-1);
1920
0
        }
1921
2.45k
        ctxt->inputTab = tmp;
1922
2.45k
        ctxt->inputMax = newSize;
1923
2.45k
    }
1924
1925
70.0k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1926
18.3k
        directory = xmlParserGetDirectory(value->filename);
1927
18.3k
        if (directory == NULL) {
1928
0
            xmlErrMemory(ctxt);
1929
0
            return(-1);
1930
0
        }
1931
18.3k
    }
1932
1933
70.0k
    if (ctxt->input_id >= INT_MAX) {
1934
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1935
0
        return(-1);
1936
0
    }
1937
1938
70.0k
    ctxt->inputTab[ctxt->inputNr] = value;
1939
70.0k
    ctxt->input = value;
1940
1941
70.0k
    if (ctxt->inputNr == 0) {
1942
39.9k
        xmlFree(ctxt->directory);
1943
39.9k
        ctxt->directory = directory;
1944
39.9k
    }
1945
1946
    /*
1947
     * The input ID is unused internally, but there are entity
1948
     * loaders in downstream code that detect the main document
1949
     * by checking for "input_id == 1".
1950
     */
1951
70.0k
    value->id = ctxt->input_id++;
1952
1953
70.0k
    return(ctxt->inputNr++);
1954
70.0k
}
1955
1956
/**
1957
 * Pops the top parser input from the input stack
1958
 *
1959
 * @param ctxt  an XML parser context
1960
 * @returns the input just removed
1961
 */
1962
xmlParserInput *
1963
xmlCtxtPopInput(xmlParserCtxt *ctxt)
1964
149k
{
1965
149k
    xmlParserInputPtr ret;
1966
1967
149k
    if (ctxt == NULL)
1968
0
        return(NULL);
1969
149k
    if (ctxt->inputNr <= 0)
1970
79.9k
        return (NULL);
1971
70.0k
    ctxt->inputNr--;
1972
70.0k
    if (ctxt->inputNr > 0)
1973
30.0k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1974
39.9k
    else
1975
39.9k
        ctxt->input = NULL;
1976
70.0k
    ret = ctxt->inputTab[ctxt->inputNr];
1977
70.0k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1978
70.0k
    return (ret);
1979
149k
}
1980
1981
/**
1982
 * Pushes a new element node on top of the node stack
1983
 *
1984
 * @deprecated Internal function, do not use.
1985
 *
1986
 * @param ctxt  an XML parser context
1987
 * @param value  the element node
1988
 * @returns -1 in case of error, the index in the stack otherwise
1989
 */
1990
int
1991
nodePush(xmlParserCtxt *ctxt, xmlNode *value)
1992
83.8k
{
1993
83.8k
    if (ctxt == NULL)
1994
0
        return(0);
1995
1996
83.8k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1997
15.7k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
1998
15.7k
        xmlNodePtr *tmp;
1999
15.7k
        int newSize;
2000
2001
15.7k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
2002
15.7k
                                  10, maxDepth);
2003
15.7k
        if (newSize < 0) {
2004
14
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2005
14
                    "Excessive depth in document: %d,"
2006
14
                    " use XML_PARSE_HUGE option\n",
2007
14
                    ctxt->nodeNr);
2008
14
            return(-1);
2009
14
        }
2010
2011
15.7k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2012
15.7k
        if (tmp == NULL) {
2013
0
            xmlErrMemory(ctxt);
2014
0
            return (-1);
2015
0
        }
2016
15.7k
        ctxt->nodeTab = tmp;
2017
15.7k
  ctxt->nodeMax = newSize;
2018
15.7k
    }
2019
2020
83.8k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2021
83.8k
    ctxt->node = value;
2022
83.8k
    return (ctxt->nodeNr++);
2023
83.8k
}
2024
2025
/**
2026
 * Pops the top element node from the node stack
2027
 *
2028
 * @deprecated Internal function, do not use.
2029
 *
2030
 * @param ctxt  an XML parser context
2031
 * @returns the node just removed
2032
 */
2033
xmlNode *
2034
nodePop(xmlParserCtxt *ctxt)
2035
422k
{
2036
422k
    xmlNodePtr ret;
2037
2038
422k
    if (ctxt == NULL) return(NULL);
2039
422k
    if (ctxt->nodeNr <= 0)
2040
344k
        return (NULL);
2041
77.1k
    ctxt->nodeNr--;
2042
77.1k
    if (ctxt->nodeNr > 0)
2043
71.2k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2044
5.86k
    else
2045
5.86k
        ctxt->node = NULL;
2046
77.1k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2047
77.1k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2048
77.1k
    return (ret);
2049
422k
}
2050
2051
/**
2052
 * Pushes a new element name/prefix/URL on top of the name stack
2053
 *
2054
 * @param ctxt  an XML parser context
2055
 * @param value  the element name
2056
 * @param prefix  the element prefix
2057
 * @param URI  the element namespace name
2058
 * @param line  the current line number for error messages
2059
 * @param nsNr  the number of namespaces pushed on the namespace table
2060
 * @returns -1 in case of error, the index in the stack otherwise
2061
 */
2062
static int
2063
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2064
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2065
893k
{
2066
893k
    xmlStartTag *tag;
2067
2068
893k
    if (ctxt->nameNr >= ctxt->nameMax) {
2069
52.1k
        const xmlChar **tmp;
2070
52.1k
        xmlStartTag *tmp2;
2071
52.1k
        int newSize;
2072
2073
52.1k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2074
52.1k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2075
52.1k
                                  10, XML_MAX_ITEMS);
2076
52.1k
        if (newSize < 0)
2077
0
            goto mem_error;
2078
2079
52.1k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2080
52.1k
        if (tmp == NULL)
2081
0
      goto mem_error;
2082
52.1k
  ctxt->nameTab = tmp;
2083
2084
52.1k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2085
52.1k
        if (tmp2 == NULL)
2086
0
      goto mem_error;
2087
52.1k
  ctxt->pushTab = tmp2;
2088
2089
52.1k
        ctxt->nameMax = newSize;
2090
841k
    } else if (ctxt->pushTab == NULL) {
2091
21.4k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2092
21.4k
        if (ctxt->pushTab == NULL)
2093
0
            goto mem_error;
2094
21.4k
    }
2095
893k
    ctxt->nameTab[ctxt->nameNr] = value;
2096
893k
    ctxt->name = value;
2097
893k
    tag = &ctxt->pushTab[ctxt->nameNr];
2098
893k
    tag->prefix = prefix;
2099
893k
    tag->URI = URI;
2100
893k
    tag->line = line;
2101
893k
    tag->nsNr = nsNr;
2102
893k
    return (ctxt->nameNr++);
2103
0
mem_error:
2104
0
    xmlErrMemory(ctxt);
2105
0
    return (-1);
2106
893k
}
2107
#ifdef LIBXML_PUSH_ENABLED
2108
/**
2109
 * Pops the top element/prefix/URI name from the name stack
2110
 *
2111
 * @param ctxt  an XML parser context
2112
 * @returns the name just removed
2113
 */
2114
static const xmlChar *
2115
nameNsPop(xmlParserCtxtPtr ctxt)
2116
0
{
2117
0
    const xmlChar *ret;
2118
2119
0
    if (ctxt->nameNr <= 0)
2120
0
        return (NULL);
2121
0
    ctxt->nameNr--;
2122
0
    if (ctxt->nameNr > 0)
2123
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2124
0
    else
2125
0
        ctxt->name = NULL;
2126
0
    ret = ctxt->nameTab[ctxt->nameNr];
2127
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
2128
0
    return (ret);
2129
0
}
2130
#endif /* LIBXML_PUSH_ENABLED */
2131
2132
/**
2133
 * Pops the top element name from the name stack
2134
 *
2135
 * @deprecated Internal function, do not use.
2136
 *
2137
 * @param ctxt  an XML parser context
2138
 * @returns the name just removed
2139
 */
2140
static const xmlChar *
2141
namePop(xmlParserCtxtPtr ctxt)
2142
878k
{
2143
878k
    const xmlChar *ret;
2144
2145
878k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2146
0
        return (NULL);
2147
878k
    ctxt->nameNr--;
2148
878k
    if (ctxt->nameNr > 0)
2149
871k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2150
6.50k
    else
2151
6.50k
        ctxt->name = NULL;
2152
878k
    ret = ctxt->nameTab[ctxt->nameNr];
2153
878k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2154
878k
    return (ret);
2155
878k
}
2156
2157
1.99M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2158
1.99M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2159
69.2k
        int *tmp;
2160
69.2k
        int newSize;
2161
2162
69.2k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2163
69.2k
                                  10, XML_MAX_ITEMS);
2164
69.2k
        if (newSize < 0) {
2165
0
      xmlErrMemory(ctxt);
2166
0
      return(-1);
2167
0
        }
2168
2169
69.2k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2170
69.2k
        if (tmp == NULL) {
2171
0
      xmlErrMemory(ctxt);
2172
0
      return(-1);
2173
0
  }
2174
69.2k
  ctxt->spaceTab = tmp;
2175
2176
69.2k
        ctxt->spaceMax = newSize;
2177
69.2k
    }
2178
1.99M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2179
1.99M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2180
1.99M
    return(ctxt->spaceNr++);
2181
1.99M
}
2182
2183
1.97M
static int spacePop(xmlParserCtxtPtr ctxt) {
2184
1.97M
    int ret;
2185
1.97M
    if (ctxt->spaceNr <= 0) return(0);
2186
1.97M
    ctxt->spaceNr--;
2187
1.97M
    if (ctxt->spaceNr > 0)
2188
1.97M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2189
0
    else
2190
0
        ctxt->space = &ctxt->spaceTab[0];
2191
1.97M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2192
1.97M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2193
1.97M
    return(ret);
2194
1.97M
}
2195
2196
/*
2197
 * Macros for accessing the content. Those should be used only by the parser,
2198
 * and not exported.
2199
 *
2200
 * Dirty macros, i.e. one often need to make assumption on the context to
2201
 * use them
2202
 *
2203
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2204
 *           To be used with extreme caution since operations consuming
2205
 *           characters may move the input buffer to a different location !
2206
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2207
 *           This should be used internally by the parser
2208
 *           only to compare to ASCII values otherwise it would break when
2209
 *           running with UTF-8 encoding.
2210
 *   RAW     same as CUR but in the input buffer, bypass any token
2211
 *           extraction that may have been done
2212
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2213
 *           to compare on ASCII based substring.
2214
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2215
 *           strings without newlines within the parser.
2216
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2217
 *           defined char within the parser.
2218
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2219
 *
2220
 *   NEXT    Skip to the next character, this does the proper decoding
2221
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2222
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2223
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2224
 *            the index
2225
 *   GROW, SHRINK  handling of input buffers
2226
 */
2227
2228
17.4M
#define RAW (*ctxt->input->cur)
2229
350M
#define CUR (*ctxt->input->cur)
2230
9.90M
#define NXT(val) ctxt->input->cur[(val)]
2231
786M
#define CUR_PTR ctxt->input->cur
2232
4.83M
#define BASE_PTR ctxt->input->base
2233
2234
#define CMP4( s, c1, c2, c3, c4 ) \
2235
80.0M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2236
40.0M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2237
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2238
79.8M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2239
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2240
79.4M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2241
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2242
79.1M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2243
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2244
78.9M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2245
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2246
39.4M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2247
39.4M
    ((unsigned char *) s)[ 8 ] == c9 )
2248
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2249
4.05k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2250
4.05k
    ((unsigned char *) s)[ 9 ] == c10 )
2251
2252
1.05M
#define SKIP(val) do {             \
2253
1.05M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2254
1.05M
    if (*ctxt->input->cur == 0)           \
2255
1.05M
        xmlParserGrow(ctxt);           \
2256
1.05M
  } while (0)
2257
2258
#define SKIPL(val) do {             \
2259
    int skipl;                \
2260
    for(skipl=0; skipl<val; skipl++) {          \
2261
  if (*(ctxt->input->cur) == '\n') {        \
2262
  ctxt->input->line++; ctxt->input->col = 1;      \
2263
  } else ctxt->input->col++;          \
2264
  ctxt->input->cur++;           \
2265
    }                 \
2266
    if (*ctxt->input->cur == 0)           \
2267
        xmlParserGrow(ctxt);            \
2268
  } while (0)
2269
2270
#define SHRINK \
2271
76.4M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2272
76.4M
  xmlParserShrink(ctxt);
2273
2274
#define GROW \
2275
125M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2276
125M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2277
2.26M
  xmlParserGrow(ctxt);
2278
2279
3.25M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2280
2281
554k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2282
2283
8.19M
#define NEXT xmlNextChar(ctxt)
2284
2285
2.69M
#define NEXT1 {               \
2286
2.69M
  ctxt->input->col++;           \
2287
2.69M
  ctxt->input->cur++;           \
2288
2.69M
  if (*ctxt->input->cur == 0)         \
2289
2.69M
      xmlParserGrow(ctxt);           \
2290
2.69M
    }
2291
2292
516M
#define NEXTL(l) do {             \
2293
516M
    if (*(ctxt->input->cur) == '\n') {         \
2294
42.5M
  ctxt->input->line++; ctxt->input->col = 1;      \
2295
474M
    } else ctxt->input->col++;           \
2296
516M
    ctxt->input->cur += l;        \
2297
516M
  } while (0)
2298
2299
#define COPY_BUF(b, i, v)           \
2300
147M
    if (v < 0x80) b[i++] = v;           \
2301
147M
    else i += xmlCopyCharMultiByte(&b[i],v)
2302
2303
static int
2304
177M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2305
177M
    int c = xmlCurrentChar(ctxt, len);
2306
2307
177M
    if (c == XML_INVALID_CHAR)
2308
34.0M
        c = 0xFFFD; /* replacement character */
2309
2310
177M
    return(c);
2311
177M
}
2312
2313
/**
2314
 * Skip whitespace in the input stream.
2315
 *
2316
 * @deprecated Internal function, do not use.
2317
 *
2318
 * @param ctxt  the XML parser context
2319
 * @returns the number of space chars skipped
2320
 */
2321
int
2322
3.67M
xmlSkipBlankChars(xmlParserCtxt *ctxt) {
2323
3.67M
    const xmlChar *cur;
2324
3.67M
    int res = 0;
2325
2326
3.67M
    cur = ctxt->input->cur;
2327
5.23M
    while (IS_BLANK_CH(*cur)) {
2328
5.23M
        if (*cur == '\n') {
2329
1.42M
            ctxt->input->line++; ctxt->input->col = 1;
2330
3.81M
        } else {
2331
3.81M
            ctxt->input->col++;
2332
3.81M
        }
2333
5.23M
        cur++;
2334
5.23M
        if (res < INT_MAX)
2335
5.23M
            res++;
2336
5.23M
        if (*cur == 0) {
2337
4.95k
            ctxt->input->cur = cur;
2338
4.95k
            xmlParserGrow(ctxt);
2339
4.95k
            cur = ctxt->input->cur;
2340
4.95k
        }
2341
5.23M
    }
2342
3.67M
    ctxt->input->cur = cur;
2343
2344
3.67M
    if (res > 4)
2345
10.4k
        GROW;
2346
2347
3.67M
    return(res);
2348
3.67M
}
2349
2350
static void
2351
28.5k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2352
28.5k
    unsigned long consumed;
2353
28.5k
    xmlEntityPtr ent;
2354
2355
28.5k
    ent = ctxt->input->entity;
2356
2357
28.5k
    ent->flags &= ~XML_ENT_EXPANDING;
2358
2359
28.5k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2360
1.81k
        int result;
2361
2362
        /*
2363
         * Read the rest of the stream in case of errors. We want
2364
         * to account for the whole entity size.
2365
         */
2366
1.81k
        do {
2367
1.81k
            ctxt->input->cur = ctxt->input->end;
2368
1.81k
            xmlParserShrink(ctxt);
2369
1.81k
            result = xmlParserGrow(ctxt);
2370
1.81k
        } while (result > 0);
2371
2372
1.81k
        consumed = ctxt->input->consumed;
2373
1.81k
        xmlSaturatedAddSizeT(&consumed,
2374
1.81k
                             ctxt->input->end - ctxt->input->base);
2375
2376
1.81k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2377
2378
        /*
2379
         * Add to sizeentities when parsing an external entity
2380
         * for the first time.
2381
         */
2382
1.81k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2383
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2384
0
        }
2385
2386
1.81k
        ent->flags |= XML_ENT_CHECKED;
2387
1.81k
    }
2388
2389
28.5k
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2390
2391
28.5k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2392
2393
28.5k
    GROW;
2394
28.5k
}
2395
2396
/**
2397
 * Skip whitespace in the input stream, also handling parameter
2398
 * entities.
2399
 *
2400
 * @param ctxt  the XML parser context
2401
 * @returns the number of space chars skipped
2402
 */
2403
static int
2404
554k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2405
554k
    int res = 0;
2406
554k
    int inParam;
2407
554k
    int expandParam;
2408
2409
554k
    inParam = PARSER_IN_PE(ctxt);
2410
554k
    expandParam = PARSER_EXTERNAL(ctxt);
2411
2412
554k
    if (!inParam && !expandParam)
2413
415k
        return(xmlSkipBlankChars(ctxt));
2414
2415
    /*
2416
     * It's Okay to use CUR/NEXT here since all the blanks are on
2417
     * the ASCII range.
2418
     */
2419
245k
    while (PARSER_STOPPED(ctxt) == 0) {
2420
245k
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2421
106k
            NEXT;
2422
138k
        } else if (CUR == '%') {
2423
4
            if ((expandParam == 0) ||
2424
4
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2425
4
                break;
2426
2427
            /*
2428
             * Expand parameter entity. We continue to consume
2429
             * whitespace at the start of the entity and possible
2430
             * even consume the whole entity and pop it. We might
2431
             * even pop multiple PEs in this loop.
2432
             */
2433
0
            xmlParsePERefInternal(ctxt, 0);
2434
2435
0
            inParam = PARSER_IN_PE(ctxt);
2436
0
            expandParam = PARSER_EXTERNAL(ctxt);
2437
138k
        } else if (CUR == 0) {
2438
8.21k
            if (inParam == 0)
2439
0
                break;
2440
2441
            /*
2442
             * Don't pop parameter entities that start a markup
2443
             * declaration to detect Well-formedness constraint:
2444
             * PE Between Declarations.
2445
             */
2446
8.21k
            if (ctxt->input->flags & XML_INPUT_MARKUP_DECL)
2447
8.21k
                break;
2448
2449
0
            xmlPopPE(ctxt);
2450
2451
0
            inParam = PARSER_IN_PE(ctxt);
2452
0
            expandParam = PARSER_EXTERNAL(ctxt);
2453
130k
        } else {
2454
130k
            break;
2455
130k
        }
2456
2457
        /*
2458
         * Also increase the counter when entering or exiting a PERef.
2459
         * The spec says: "When a parameter-entity reference is recognized
2460
         * in the DTD and included, its replacement text MUST be enlarged
2461
         * by the attachment of one leading and one following space (#x20)
2462
         * character."
2463
         */
2464
106k
        if (res < INT_MAX)
2465
106k
            res++;
2466
106k
    }
2467
2468
138k
    return(res);
2469
554k
}
2470
2471
/************************************************************************
2472
 *                  *
2473
 *    Commodity functions to handle entities      *
2474
 *                  *
2475
 ************************************************************************/
2476
2477
/**
2478
 * @deprecated Internal function, don't use.
2479
 *
2480
 * @param ctxt  an XML parser context
2481
 * @returns the current xmlChar in the parser context
2482
 */
2483
xmlChar
2484
0
xmlPopInput(xmlParserCtxt *ctxt) {
2485
0
    xmlParserInputPtr input;
2486
2487
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2488
0
    input = xmlCtxtPopInput(ctxt);
2489
0
    xmlFreeInputStream(input);
2490
0
    if (*ctxt->input->cur == 0)
2491
0
        xmlParserGrow(ctxt);
2492
0
    return(CUR);
2493
0
}
2494
2495
/**
2496
 * Push an input stream onto the stack.
2497
 *
2498
 * @deprecated Internal function, don't use.
2499
 *
2500
 * @param ctxt  an XML parser context
2501
 * @param input  an XML parser input fragment (entity, XML fragment ...).
2502
 * @returns -1 in case of error or the index in the input stack
2503
 */
2504
int
2505
0
xmlPushInput(xmlParserCtxt *ctxt, xmlParserInput *input) {
2506
0
    int ret;
2507
2508
0
    if ((ctxt == NULL) || (input == NULL))
2509
0
        return(-1);
2510
2511
0
    ret = xmlCtxtPushInput(ctxt, input);
2512
0
    if (ret >= 0)
2513
0
        GROW;
2514
0
    return(ret);
2515
0
}
2516
2517
/**
2518
 * Parse a numeric character reference. Always consumes '&'.
2519
 *
2520
 * @deprecated Internal function, don't use.
2521
 *
2522
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2523
 *                      '&#x' [0-9a-fA-F]+ ';'
2524
 *
2525
 * [ WFC: Legal Character ]
2526
 * Characters referred to using character references must match the
2527
 * production for Char.
2528
 *
2529
 * @param ctxt  an XML parser context
2530
 * @returns the value parsed (as an int), 0 in case of error
2531
 */
2532
int
2533
162k
xmlParseCharRef(xmlParserCtxt *ctxt) {
2534
162k
    int val = 0;
2535
162k
    int count = 0;
2536
2537
    /*
2538
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2539
     */
2540
162k
    if ((RAW == '&') && (NXT(1) == '#') &&
2541
162k
        (NXT(2) == 'x')) {
2542
93.0k
  SKIP(3);
2543
93.0k
  GROW;
2544
361k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2545
294k
      if (count++ > 20) {
2546
827
    count = 0;
2547
827
    GROW;
2548
827
      }
2549
294k
      if ((RAW >= '0') && (RAW <= '9'))
2550
105k
          val = val * 16 + (CUR - '0');
2551
188k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2552
31.0k
          val = val * 16 + (CUR - 'a') + 10;
2553
157k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2554
132k
          val = val * 16 + (CUR - 'A') + 10;
2555
25.3k
      else {
2556
25.3k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2557
25.3k
    val = 0;
2558
25.3k
    break;
2559
25.3k
      }
2560
268k
      if (val > 0x110000)
2561
61.8k
          val = 0x110000;
2562
2563
268k
      NEXT;
2564
268k
      count++;
2565
268k
  }
2566
93.0k
  if (RAW == ';') {
2567
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2568
67.7k
      ctxt->input->col++;
2569
67.7k
      ctxt->input->cur++;
2570
67.7k
  }
2571
93.0k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2572
69.7k
  SKIP(2);
2573
69.7k
  GROW;
2574
233k
  while (RAW != ';') { /* loop blocked by count */
2575
180k
      if (count++ > 20) {
2576
2.75k
    count = 0;
2577
2.75k
    GROW;
2578
2.75k
      }
2579
180k
      if ((RAW >= '0') && (RAW <= '9'))
2580
164k
          val = val * 10 + (CUR - '0');
2581
15.9k
      else {
2582
15.9k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2583
15.9k
    val = 0;
2584
15.9k
    break;
2585
15.9k
      }
2586
164k
      if (val > 0x110000)
2587
28.1k
          val = 0x110000;
2588
2589
164k
      NEXT;
2590
164k
      count++;
2591
164k
  }
2592
69.7k
  if (RAW == ';') {
2593
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2594
53.8k
      ctxt->input->col++;
2595
53.8k
      ctxt->input->cur++;
2596
53.8k
  }
2597
69.7k
    } else {
2598
0
        if (RAW == '&')
2599
0
            SKIP(1);
2600
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2601
0
    }
2602
2603
    /*
2604
     * [ WFC: Legal Character ]
2605
     * Characters referred to using character references must match the
2606
     * production for Char.
2607
     */
2608
162k
    if (val >= 0x110000) {
2609
1.50k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2610
1.50k
                "xmlParseCharRef: character reference out of bounds\n",
2611
1.50k
          val);
2612
1.50k
        val = 0xFFFD;
2613
161k
    } else if (!IS_CHAR(val)) {
2614
45.6k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2615
45.6k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2616
45.6k
                    val);
2617
45.6k
    }
2618
162k
    return(val);
2619
162k
}
2620
2621
/**
2622
 * Parse Reference declarations, variant parsing from a string rather
2623
 * than an an input flow.
2624
 *
2625
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2626
 *                      '&#x' [0-9a-fA-F]+ ';'
2627
 *
2628
 * [ WFC: Legal Character ]
2629
 * Characters referred to using character references must match the
2630
 * production for Char.
2631
 *
2632
 * @param ctxt  an XML parser context
2633
 * @param str  a pointer to an index in the string
2634
 * @returns the value parsed (as an int), 0 in case of error, str will be
2635
 *         updated to the current value of the index
2636
 */
2637
static int
2638
491k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2639
491k
    const xmlChar *ptr;
2640
491k
    xmlChar cur;
2641
491k
    int val = 0;
2642
2643
491k
    if ((str == NULL) || (*str == NULL)) return(0);
2644
491k
    ptr = *str;
2645
491k
    cur = *ptr;
2646
491k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2647
258k
  ptr += 3;
2648
258k
  cur = *ptr;
2649
866k
  while (cur != ';') { /* Non input consuming loop */
2650
612k
      if ((cur >= '0') && (cur <= '9'))
2651
420k
          val = val * 16 + (cur - '0');
2652
192k
      else if ((cur >= 'a') && (cur <= 'f'))
2653
26.1k
          val = val * 16 + (cur - 'a') + 10;
2654
165k
      else if ((cur >= 'A') && (cur <= 'F'))
2655
160k
          val = val * 16 + (cur - 'A') + 10;
2656
5.07k
      else {
2657
5.07k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2658
5.07k
    val = 0;
2659
5.07k
    break;
2660
5.07k
      }
2661
607k
      if (val > 0x110000)
2662
9.09k
          val = 0x110000;
2663
2664
607k
      ptr++;
2665
607k
      cur = *ptr;
2666
607k
  }
2667
258k
  if (cur == ';')
2668
253k
      ptr++;
2669
258k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2670
232k
  ptr += 2;
2671
232k
  cur = *ptr;
2672
902k
  while (cur != ';') { /* Non input consuming loops */
2673
676k
      if ((cur >= '0') && (cur <= '9'))
2674
669k
          val = val * 10 + (cur - '0');
2675
7.09k
      else {
2676
7.09k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2677
7.09k
    val = 0;
2678
7.09k
    break;
2679
7.09k
      }
2680
669k
      if (val > 0x110000)
2681
11.5k
          val = 0x110000;
2682
2683
669k
      ptr++;
2684
669k
      cur = *ptr;
2685
669k
  }
2686
232k
  if (cur == ';')
2687
225k
      ptr++;
2688
232k
    } else {
2689
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2690
0
  return(0);
2691
0
    }
2692
491k
    *str = ptr;
2693
2694
    /*
2695
     * [ WFC: Legal Character ]
2696
     * Characters referred to using character references must match the
2697
     * production for Char.
2698
     */
2699
491k
    if (val >= 0x110000) {
2700
1.55k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2701
1.55k
                "xmlParseStringCharRef: character reference out of bounds\n",
2702
1.55k
                val);
2703
489k
    } else if (IS_CHAR(val)) {
2704
474k
        return(val);
2705
474k
    } else {
2706
14.8k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2707
14.8k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2708
14.8k
        val);
2709
14.8k
    }
2710
16.3k
    return(0);
2711
491k
}
2712
2713
/**
2714
 *     [69] PEReference ::= '%' Name ';'
2715
 *
2716
 * @deprecated Internal function, do not use.
2717
 *
2718
 * [ WFC: No Recursion ]
2719
 * A parsed entity must not contain a recursive
2720
 * reference to itself, either directly or indirectly.
2721
 *
2722
 * [ WFC: Entity Declared ]
2723
 * In a document without any DTD, a document with only an internal DTD
2724
 * subset which contains no parameter entity references, or a document
2725
 * with "standalone='yes'", ...  ... The declaration of a parameter
2726
 * entity must precede any reference to it...
2727
 *
2728
 * [ VC: Entity Declared ]
2729
 * In a document with an external subset or external parameter entities
2730
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2731
 * must precede any reference to it...
2732
 *
2733
 * [ WFC: In DTD ]
2734
 * Parameter-entity references may only appear in the DTD.
2735
 * NOTE: misleading but this is handled.
2736
 *
2737
 * A PEReference may have been detected in the current input stream
2738
 * the handling is done accordingly to
2739
 *      http://www.w3.org/TR/REC-xml#entproc
2740
 * i.e.
2741
 *   - Included in literal in entity values
2742
 *   - Included as Parameter Entity reference within DTDs
2743
 * @param ctxt  the parser context
2744
 */
2745
void
2746
0
xmlParserHandlePEReference(xmlParserCtxt *ctxt) {
2747
0
    xmlParsePERefInternal(ctxt, 0);
2748
0
}
2749
2750
/**
2751
 * @deprecated Internal function, don't use.
2752
 *
2753
 * @param ctxt  the parser context
2754
 * @param str  the input string
2755
 * @param len  the string length
2756
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2757
 * @param end  an end marker xmlChar, 0 if none
2758
 * @param end2  an end marker xmlChar, 0 if none
2759
 * @param end3  an end marker xmlChar, 0 if none
2760
 * @returns A newly allocated string with the substitution done. The caller
2761
 *      must deallocate it !
2762
 */
2763
xmlChar *
2764
xmlStringLenDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str, int len,
2765
                           int what ATTRIBUTE_UNUSED,
2766
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2767
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2768
0
        return(NULL);
2769
2770
0
    if ((str[len] != 0) ||
2771
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2772
0
        return(NULL);
2773
2774
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2775
0
}
2776
2777
/**
2778
 * @deprecated Internal function, don't use.
2779
 *
2780
 * @param ctxt  the parser context
2781
 * @param str  the input string
2782
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2783
 * @param end  an end marker xmlChar, 0 if none
2784
 * @param end2  an end marker xmlChar, 0 if none
2785
 * @param end3  an end marker xmlChar, 0 if none
2786
 * @returns A newly allocated string with the substitution done. The caller
2787
 *      must deallocate it !
2788
 */
2789
xmlChar *
2790
xmlStringDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str,
2791
                        int what ATTRIBUTE_UNUSED,
2792
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2793
0
    if ((ctxt == NULL) || (str == NULL))
2794
0
        return(NULL);
2795
2796
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2797
0
        return(NULL);
2798
2799
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2800
0
}
2801
2802
/************************************************************************
2803
 *                  *
2804
 *    Commodity functions, cleanup needed ?     *
2805
 *                  *
2806
 ************************************************************************/
2807
2808
/**
2809
 * Is this a sequence of blank chars that one can ignore ?
2810
 *
2811
 * @param ctxt  an XML parser context
2812
 * @param str  a xmlChar *
2813
 * @param len  the size of `str`
2814
 * @param blank_chars  we know the chars are blanks
2815
 * @returns 1 if ignorable 0 otherwise.
2816
 */
2817
2818
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2819
0
                     int blank_chars) {
2820
0
    int i;
2821
0
    xmlNodePtr lastChild;
2822
2823
    /*
2824
     * Check for xml:space value.
2825
     */
2826
0
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2827
0
        (*(ctxt->space) == -2))
2828
0
  return(0);
2829
2830
    /*
2831
     * Check that the string is made of blanks
2832
     */
2833
0
    if (blank_chars == 0) {
2834
0
  for (i = 0;i < len;i++)
2835
0
      if (!(IS_BLANK_CH(str[i]))) return(0);
2836
0
    }
2837
2838
    /*
2839
     * Look if the element is mixed content in the DTD if available
2840
     */
2841
0
    if (ctxt->node == NULL) return(0);
2842
0
    if (ctxt->myDoc != NULL) {
2843
0
        xmlElementPtr elemDecl = NULL;
2844
0
        xmlDocPtr doc = ctxt->myDoc;
2845
0
        const xmlChar *prefix = NULL;
2846
2847
0
        if (ctxt->node->ns)
2848
0
            prefix = ctxt->node->ns->prefix;
2849
0
        if (doc->intSubset != NULL)
2850
0
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2851
0
                                      prefix);
2852
0
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2853
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2854
0
                                      prefix);
2855
0
        if (elemDecl != NULL) {
2856
0
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2857
0
                return(1);
2858
0
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2859
0
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2860
0
                return(0);
2861
0
        }
2862
0
    }
2863
2864
    /*
2865
     * Otherwise, heuristic :-\
2866
     *
2867
     * When push parsing, we could be at the end of a chunk.
2868
     * This makes the look-ahead and consequently the NOBLANKS
2869
     * option unreliable.
2870
     */
2871
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2872
0
    if ((ctxt->node->children == NULL) &&
2873
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2874
2875
0
    lastChild = xmlGetLastChild(ctxt->node);
2876
0
    if (lastChild == NULL) {
2877
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2878
0
            (ctxt->node->content != NULL)) return(0);
2879
0
    } else if (xmlNodeIsText(lastChild))
2880
0
        return(0);
2881
0
    else if ((ctxt->node->children != NULL) &&
2882
0
             (xmlNodeIsText(ctxt->node->children)))
2883
0
        return(0);
2884
0
    return(1);
2885
0
}
2886
2887
/************************************************************************
2888
 *                  *
2889
 *    Extra stuff for namespace support     *
2890
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2891
 *                  *
2892
 ************************************************************************/
2893
2894
/**
2895
 * Parse an UTF8 encoded XML qualified name string
2896
 *
2897
 * @deprecated Don't use.
2898
 *
2899
 * @param ctxt  an XML parser context
2900
 * @param name  an XML parser context
2901
 * @param prefixOut  a xmlChar **
2902
 * @returns the local part, and prefix is updated
2903
 *   to get the Prefix if any.
2904
 */
2905
2906
xmlChar *
2907
0
xmlSplitQName(xmlParserCtxt *ctxt, const xmlChar *name, xmlChar **prefixOut) {
2908
0
    xmlChar *ret;
2909
0
    const xmlChar *localname;
2910
2911
0
    localname = xmlSplitQName4(name, prefixOut);
2912
0
    if (localname == NULL) {
2913
0
        xmlCtxtErrMemory(ctxt);
2914
0
        return(NULL);
2915
0
    }
2916
2917
0
    ret = xmlStrdup(localname);
2918
0
    if (ret == NULL) {
2919
0
        xmlCtxtErrMemory(ctxt);
2920
0
        xmlFree(*prefixOut);
2921
0
    }
2922
2923
0
    return(ret);
2924
0
}
2925
2926
/************************************************************************
2927
 *                  *
2928
 *      The parser itself       *
2929
 *  Relates to http://www.w3.org/TR/REC-xml       *
2930
 *                  *
2931
 ************************************************************************/
2932
2933
/************************************************************************
2934
 *                  *
2935
 *  Routines to parse Name, NCName and NmToken      *
2936
 *                  *
2937
 ************************************************************************/
2938
2939
/*
2940
 * The two following functions are related to the change of accepted
2941
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2942
 * They correspond to the modified production [4] and the new production [4a]
2943
 * changes in that revision. Also note that the macros used for the
2944
 * productions Letter, Digit, CombiningChar and Extender are not needed
2945
 * anymore.
2946
 * We still keep compatibility to pre-revision5 parsing semantic if the
2947
 * new XML_PARSE_OLD10 option is given to the parser.
2948
 */
2949
2950
static int
2951
10.3M
xmlIsNameStartCharNew(int c) {
2952
    /*
2953
     * Use the new checks of production [4] [4a] amd [5] of the
2954
     * Update 5 of XML-1.0
2955
     */
2956
10.3M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2957
10.3M
        (((c >= 'a') && (c <= 'z')) ||
2958
10.3M
         ((c >= 'A') && (c <= 'Z')) ||
2959
10.3M
         (c == '_') || (c == ':') ||
2960
10.3M
         ((c >= 0xC0) && (c <= 0xD6)) ||
2961
10.3M
         ((c >= 0xD8) && (c <= 0xF6)) ||
2962
10.3M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2963
10.3M
         ((c >= 0x370) && (c <= 0x37D)) ||
2964
10.3M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2965
10.3M
         ((c >= 0x200C) && (c <= 0x200D)) ||
2966
10.3M
         ((c >= 0x2070) && (c <= 0x218F)) ||
2967
10.3M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2968
10.3M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2969
10.3M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2970
10.3M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2971
10.3M
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2972
8.66M
        return(1);
2973
1.69M
    return(0);
2974
10.3M
}
2975
2976
static int
2977
40.5M
xmlIsNameCharNew(int c) {
2978
    /*
2979
     * Use the new checks of production [4] [4a] amd [5] of the
2980
     * Update 5 of XML-1.0
2981
     */
2982
40.5M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2983
40.5M
        (((c >= 'a') && (c <= 'z')) ||
2984
40.5M
         ((c >= 'A') && (c <= 'Z')) ||
2985
40.5M
         ((c >= '0') && (c <= '9')) || /* !start */
2986
40.5M
         (c == '_') || (c == ':') ||
2987
40.5M
         (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2988
40.5M
         ((c >= 0xC0) && (c <= 0xD6)) ||
2989
40.5M
         ((c >= 0xD8) && (c <= 0xF6)) ||
2990
40.5M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2991
40.5M
         ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2992
40.5M
         ((c >= 0x370) && (c <= 0x37D)) ||
2993
40.5M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2994
40.5M
         ((c >= 0x200C) && (c <= 0x200D)) ||
2995
40.5M
         ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2996
40.5M
         ((c >= 0x2070) && (c <= 0x218F)) ||
2997
40.5M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2998
40.5M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2999
40.5M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3000
40.5M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3001
40.5M
         ((c >= 0x10000) && (c <= 0xEFFFF))))
3002
31.9M
         return(1);
3003
8.67M
    return(0);
3004
40.5M
}
3005
3006
static int
3007
0
xmlIsNameStartCharOld(int c) {
3008
0
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3009
0
        ((IS_LETTER(c) || (c == '_') || (c == ':'))))
3010
0
        return(1);
3011
0
    return(0);
3012
0
}
3013
3014
static int
3015
0
xmlIsNameCharOld(int c) {
3016
0
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3017
0
        ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3018
0
         (c == '.') || (c == '-') ||
3019
0
         (c == '_') || (c == ':') ||
3020
0
         (IS_COMBINING(c)) ||
3021
0
         (IS_EXTENDER(c))))
3022
0
        return(1);
3023
0
    return(0);
3024
0
}
3025
3026
static int
3027
10.3M
xmlIsNameStartChar(int c, int old10) {
3028
10.3M
    if (!old10)
3029
10.3M
        return(xmlIsNameStartCharNew(c));
3030
0
    else
3031
0
        return(xmlIsNameStartCharOld(c));
3032
10.3M
}
3033
3034
static int
3035
40.5M
xmlIsNameChar(int c, int old10) {
3036
40.5M
    if (!old10)
3037
40.5M
        return(xmlIsNameCharNew(c));
3038
0
    else
3039
0
        return(xmlIsNameCharOld(c));
3040
40.5M
}
3041
3042
/*
3043
 * Scan an XML Name, NCName or Nmtoken.
3044
 *
3045
 * Returns a pointer to the end of the name on success. If the
3046
 * name is invalid, returns `ptr`. If the name is longer than
3047
 * `maxSize` bytes, returns NULL.
3048
 *
3049
 * @param ptr  pointer to the start of the name
3050
 * @param maxSize  maximum size in bytes
3051
 * @param flags  XML_SCAN_* flags
3052
 * @returns a pointer to the end of the name or NULL
3053
 */
3054
const xmlChar *
3055
8.38M
xmlScanName(const xmlChar *ptr, size_t maxSize, int flags) {
3056
8.38M
    int stop = flags & XML_SCAN_NC ? ':' : 0;
3057
8.38M
    int old10 = flags & XML_SCAN_OLD10 ? 1 : 0;
3058
3059
27.1M
    while (1) {
3060
27.1M
        int c, len;
3061
3062
27.1M
        c = *ptr;
3063
27.1M
        if (c < 0x80) {
3064
25.0M
            if (c == stop)
3065
155
                break;
3066
25.0M
            len = 1;
3067
25.0M
        } else {
3068
2.09M
            len = 4;
3069
2.09M
            c = xmlGetUTF8Char(ptr, &len);
3070
2.09M
            if (c < 0)
3071
1.87k
                break;
3072
2.09M
        }
3073
3074
27.1M
        if (flags & XML_SCAN_NMTOKEN ?
3075
18.8M
                !xmlIsNameChar(c, old10) :
3076
27.1M
                !xmlIsNameStartChar(c, old10))
3077
8.38M
            break;
3078
3079
18.8M
        if ((size_t) len > maxSize)
3080
71
            return(NULL);
3081
18.8M
        ptr += len;
3082
18.8M
        maxSize -= len;
3083
18.8M
        flags |= XML_SCAN_NMTOKEN;
3084
18.8M
    }
3085
3086
8.38M
    return(ptr);
3087
8.38M
}
3088
3089
static const xmlChar *
3090
550k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3091
550k
    const xmlChar *ret;
3092
550k
    int len = 0, l;
3093
550k
    int c;
3094
550k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3095
0
                    XML_MAX_TEXT_LENGTH :
3096
550k
                    XML_MAX_NAME_LENGTH;
3097
550k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3098
3099
    /*
3100
     * Handler for more complex cases
3101
     */
3102
550k
    c = xmlCurrentChar(ctxt, &l);
3103
550k
    if (!xmlIsNameStartChar(c, old10))
3104
423k
        return(NULL);
3105
126k
    len += l;
3106
126k
    NEXTL(l);
3107
126k
    c = xmlCurrentChar(ctxt, &l);
3108
8.48M
    while (xmlIsNameChar(c, old10)) {
3109
8.35M
        if (len <= INT_MAX - l)
3110
8.35M
            len += l;
3111
8.35M
        NEXTL(l);
3112
8.35M
        c = xmlCurrentChar(ctxt, &l);
3113
8.35M
    }
3114
126k
    if (len > maxLength) {
3115
137
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3116
137
        return(NULL);
3117
137
    }
3118
126k
    if (ctxt->input->cur - ctxt->input->base < len) {
3119
        /*
3120
         * There were a couple of bugs where PERefs lead to to a change
3121
         * of the buffer. Check the buffer size to avoid passing an invalid
3122
         * pointer to xmlDictLookup.
3123
         */
3124
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3125
0
                    "unexpected change of input buffer");
3126
0
        return (NULL);
3127
0
    }
3128
126k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3129
533
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3130
126k
    else
3131
126k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3132
126k
    if (ret == NULL)
3133
0
        xmlErrMemory(ctxt);
3134
126k
    return(ret);
3135
126k
}
3136
3137
/**
3138
 * Parse an XML name.
3139
 *
3140
 * @deprecated Internal function, don't use.
3141
 *
3142
 *     [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3143
 *                      CombiningChar | Extender
3144
 *
3145
 *     [5] Name ::= (Letter | '_' | ':') (NameChar)*
3146
 *
3147
 *     [6] Names ::= Name (#x20 Name)*
3148
 *
3149
 * @param ctxt  an XML parser context
3150
 * @returns the Name parsed or NULL
3151
 */
3152
3153
const xmlChar *
3154
2.85M
xmlParseName(xmlParserCtxt *ctxt) {
3155
2.85M
    const xmlChar *in;
3156
2.85M
    const xmlChar *ret;
3157
2.85M
    size_t count = 0;
3158
2.85M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3159
0
                       XML_MAX_TEXT_LENGTH :
3160
2.85M
                       XML_MAX_NAME_LENGTH;
3161
3162
2.85M
    GROW;
3163
3164
    /*
3165
     * Accelerator for simple ASCII names
3166
     */
3167
2.85M
    in = ctxt->input->cur;
3168
2.85M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3169
2.85M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3170
2.85M
  (*in == '_') || (*in == ':')) {
3171
2.38M
  in++;
3172
8.98M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3173
8.98M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3174
8.98M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3175
8.98M
         (*in == '_') || (*in == '-') ||
3176
8.98M
         (*in == ':') || (*in == '.'))
3177
6.59M
      in++;
3178
2.38M
  if ((*in > 0) && (*in < 0x80)) {
3179
2.30M
      count = in - ctxt->input->cur;
3180
2.30M
            if (count > maxLength) {
3181
5
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3182
5
                return(NULL);
3183
5
            }
3184
2.30M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3185
2.30M
      ctxt->input->cur = in;
3186
2.30M
      ctxt->input->col += count;
3187
2.30M
      if (ret == NULL)
3188
0
          xmlErrMemory(ctxt);
3189
2.30M
      return(ret);
3190
2.30M
  }
3191
2.38M
    }
3192
    /* accelerator for special cases */
3193
550k
    return(xmlParseNameComplex(ctxt));
3194
2.85M
}
3195
3196
static xmlHashedString
3197
1.45M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3198
1.45M
    xmlHashedString ret;
3199
1.45M
    int len = 0, l;
3200
1.45M
    int c;
3201
1.45M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3202
0
                    XML_MAX_TEXT_LENGTH :
3203
1.45M
                    XML_MAX_NAME_LENGTH;
3204
1.45M
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3205
1.45M
    size_t startPosition = 0;
3206
3207
1.45M
    ret.name = NULL;
3208
1.45M
    ret.hashValue = 0;
3209
3210
    /*
3211
     * Handler for more complex cases
3212
     */
3213
1.45M
    startPosition = CUR_PTR - BASE_PTR;
3214
1.45M
    c = xmlCurrentChar(ctxt, &l);
3215
1.45M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3216
1.45M
  (!xmlIsNameStartChar(c, old10) || (c == ':'))) {
3217
1.31M
  return(ret);
3218
1.31M
    }
3219
3220
7.34M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3221
7.34M
     (xmlIsNameChar(c, old10) && (c != ':'))) {
3222
7.20M
        if (len <= INT_MAX - l)
3223
7.20M
      len += l;
3224
7.20M
  NEXTL(l);
3225
7.20M
  c = xmlCurrentChar(ctxt, &l);
3226
7.20M
    }
3227
137k
    if (len > maxLength) {
3228
149
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3229
149
        return(ret);
3230
149
    }
3231
136k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3232
136k
    if (ret.name == NULL)
3233
0
        xmlErrMemory(ctxt);
3234
136k
    return(ret);
3235
137k
}
3236
3237
/**
3238
 * Parse an XML name.
3239
 *
3240
 *     [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3241
 *                          CombiningChar | Extender
3242
 *
3243
 *     [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3244
 *
3245
 * @param ctxt  an XML parser context
3246
 * @returns the Name parsed or NULL
3247
 */
3248
3249
static xmlHashedString
3250
3.20M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3251
3.20M
    const xmlChar *in, *e;
3252
3.20M
    xmlHashedString ret;
3253
3.20M
    size_t count = 0;
3254
3.20M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3255
0
                       XML_MAX_TEXT_LENGTH :
3256
3.20M
                       XML_MAX_NAME_LENGTH;
3257
3258
3.20M
    ret.name = NULL;
3259
3260
    /*
3261
     * Accelerator for simple ASCII names
3262
     */
3263
3.20M
    in = ctxt->input->cur;
3264
3.20M
    e = ctxt->input->end;
3265
3.20M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3266
3.20M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3267
3.20M
   (*in == '_')) && (in < e)) {
3268
1.85M
  in++;
3269
8.53M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3270
8.53M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3271
8.53M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3272
8.53M
          (*in == '_') || (*in == '-') ||
3273
8.53M
          (*in == '.')) && (in < e))
3274
6.68M
      in++;
3275
1.85M
  if (in >= e)
3276
1.82k
      goto complex;
3277
1.84M
  if ((*in > 0) && (*in < 0x80)) {
3278
1.75M
      count = in - ctxt->input->cur;
3279
1.75M
            if (count > maxLength) {
3280
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3281
0
                return(ret);
3282
0
            }
3283
1.75M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3284
1.75M
      ctxt->input->cur = in;
3285
1.75M
      ctxt->input->col += count;
3286
1.75M
      if (ret.name == NULL) {
3287
0
          xmlErrMemory(ctxt);
3288
0
      }
3289
1.75M
      return(ret);
3290
1.75M
  }
3291
1.84M
    }
3292
1.45M
complex:
3293
1.45M
    return(xmlParseNCNameComplex(ctxt));
3294
3.20M
}
3295
3296
/**
3297
 * Parse an XML name and compares for match
3298
 * (specialized for endtag parsing)
3299
 *
3300
 * @param ctxt  an XML parser context
3301
 * @param other  the name to compare with
3302
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
3303
 * and the name for mismatch
3304
 */
3305
3306
static const xmlChar *
3307
155k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3308
155k
    register const xmlChar *cmp = other;
3309
155k
    register const xmlChar *in;
3310
155k
    const xmlChar *ret;
3311
3312
155k
    GROW;
3313
3314
155k
    in = ctxt->input->cur;
3315
885k
    while (*in != 0 && *in == *cmp) {
3316
729k
  ++in;
3317
729k
  ++cmp;
3318
729k
    }
3319
155k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3320
  /* success */
3321
116k
  ctxt->input->col += in - ctxt->input->cur;
3322
116k
  ctxt->input->cur = in;
3323
116k
  return (const xmlChar*) 1;
3324
116k
    }
3325
    /* failure (or end of input buffer), check with full function */
3326
38.7k
    ret = xmlParseName (ctxt);
3327
    /* strings coming from the dictionary direct compare possible */
3328
38.7k
    if (ret == other) {
3329
1.94k
  return (const xmlChar*) 1;
3330
1.94k
    }
3331
36.7k
    return ret;
3332
38.7k
}
3333
3334
/**
3335
 * Parse an XML name.
3336
 *
3337
 * @param ctxt  an XML parser context
3338
 * @param str  a pointer to the string pointer (IN/OUT)
3339
 * @returns the Name parsed or NULL. The `str` pointer
3340
 * is updated to the current location in the string.
3341
 */
3342
3343
static xmlChar *
3344
8.37M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3345
8.37M
    xmlChar *ret;
3346
8.37M
    const xmlChar *cur = *str;
3347
8.37M
    int flags = 0;
3348
8.37M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3349
0
                    XML_MAX_TEXT_LENGTH :
3350
8.37M
                    XML_MAX_NAME_LENGTH;
3351
3352
8.37M
    if (ctxt->options & XML_PARSE_OLD10)
3353
0
        flags |= XML_SCAN_OLD10;
3354
3355
8.37M
    cur = xmlScanName(*str, maxLength, flags);
3356
8.37M
    if (cur == NULL) {
3357
71
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3358
71
        return(NULL);
3359
71
    }
3360
8.37M
    if (cur == *str)
3361
4.72k
        return(NULL);
3362
3363
8.37M
    ret = xmlStrndup(*str, cur - *str);
3364
8.37M
    if (ret == NULL)
3365
0
        xmlErrMemory(ctxt);
3366
8.37M
    *str = cur;
3367
8.37M
    return(ret);
3368
8.37M
}
3369
3370
/**
3371
 * Parse an XML Nmtoken.
3372
 *
3373
 * @deprecated Internal function, don't use.
3374
 *
3375
 *     [7] Nmtoken ::= (NameChar)+
3376
 *
3377
 *     [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3378
 *
3379
 * @param ctxt  an XML parser context
3380
 * @returns the Nmtoken parsed or NULL
3381
 */
3382
3383
xmlChar *
3384
50.0k
xmlParseNmtoken(xmlParserCtxt *ctxt) {
3385
50.0k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3386
50.0k
    xmlChar *ret;
3387
50.0k
    int len = 0, l;
3388
50.0k
    int c;
3389
50.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3390
0
                    XML_MAX_TEXT_LENGTH :
3391
50.0k
                    XML_MAX_NAME_LENGTH;
3392
50.0k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3393
3394
50.0k
    c = xmlCurrentChar(ctxt, &l);
3395
3396
342k
    while (xmlIsNameChar(c, old10)) {
3397
294k
  COPY_BUF(buf, len, c);
3398
294k
  NEXTL(l);
3399
294k
  c = xmlCurrentChar(ctxt, &l);
3400
294k
  if (len >= XML_MAX_NAMELEN) {
3401
      /*
3402
       * Okay someone managed to make a huge token, so he's ready to pay
3403
       * for the processing speed.
3404
       */
3405
2.06k
      xmlChar *buffer;
3406
2.06k
      int max = len * 2;
3407
3408
2.06k
      buffer = xmlMalloc(max);
3409
2.06k
      if (buffer == NULL) {
3410
0
          xmlErrMemory(ctxt);
3411
0
    return(NULL);
3412
0
      }
3413
2.06k
      memcpy(buffer, buf, len);
3414
5.62M
      while (xmlIsNameChar(c, old10)) {
3415
5.62M
    if (len + 10 > max) {
3416
5.63k
        xmlChar *tmp;
3417
5.63k
                    int newSize;
3418
3419
5.63k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3420
5.63k
                    if (newSize < 0) {
3421
173
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3422
173
                        xmlFree(buffer);
3423
173
                        return(NULL);
3424
173
                    }
3425
5.46k
        tmp = xmlRealloc(buffer, newSize);
3426
5.46k
        if (tmp == NULL) {
3427
0
      xmlErrMemory(ctxt);
3428
0
      xmlFree(buffer);
3429
0
      return(NULL);
3430
0
        }
3431
5.46k
        buffer = tmp;
3432
5.46k
                    max = newSize;
3433
5.46k
    }
3434
5.62M
    COPY_BUF(buffer, len, c);
3435
5.62M
    NEXTL(l);
3436
5.62M
    c = xmlCurrentChar(ctxt, &l);
3437
5.62M
      }
3438
1.89k
      buffer[len] = 0;
3439
1.89k
      return(buffer);
3440
2.06k
  }
3441
294k
    }
3442
47.9k
    if (len == 0)
3443
13.9k
        return(NULL);
3444
34.0k
    if (len > maxLength) {
3445
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3446
0
        return(NULL);
3447
0
    }
3448
34.0k
    ret = xmlStrndup(buf, len);
3449
34.0k
    if (ret == NULL)
3450
0
        xmlErrMemory(ctxt);
3451
34.0k
    return(ret);
3452
34.0k
}
3453
3454
/**
3455
 * Validate an entity value and expand parameter entities.
3456
 *
3457
 * @param ctxt  parser context
3458
 * @param buf  string buffer
3459
 * @param str  entity value
3460
 * @param length  size of entity value
3461
 * @param depth  nesting depth
3462
 */
3463
static void
3464
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3465
85.0k
                          const xmlChar *str, int length, int depth) {
3466
85.0k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3467
85.0k
    const xmlChar *end, *chunk;
3468
85.0k
    int c, l;
3469
3470
85.0k
    if (str == NULL)
3471
0
        return;
3472
3473
85.0k
    depth += 1;
3474
85.0k
    if (depth > maxDepth) {
3475
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3476
0
                       "Maximum entity nesting depth exceeded");
3477
0
  return;
3478
0
    }
3479
3480
85.0k
    end = str + length;
3481
85.0k
    chunk = str;
3482
3483
54.4M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3484
54.4M
        c = *str;
3485
3486
54.4M
        if (c >= 0x80) {
3487
34.1M
            l = xmlUTF8MultibyteLen(ctxt, str,
3488
34.1M
                    "invalid character in entity value\n");
3489
34.1M
            if (l == 0) {
3490
10.4M
                if (chunk < str)
3491
263k
                    xmlSBufAddString(buf, chunk, str - chunk);
3492
10.4M
                xmlSBufAddReplChar(buf);
3493
10.4M
                str += 1;
3494
10.4M
                chunk = str;
3495
23.6M
            } else {
3496
23.6M
                str += l;
3497
23.6M
            }
3498
34.1M
        } else if (c == '&') {
3499
200k
            if (str[1] == '#') {
3500
100k
                if (chunk < str)
3501
54.5k
                    xmlSBufAddString(buf, chunk, str - chunk);
3502
3503
100k
                c = xmlParseStringCharRef(ctxt, &str);
3504
100k
                if (c == 0)
3505
16.3k
                    return;
3506
3507
83.7k
                xmlSBufAddChar(buf, c);
3508
3509
83.7k
                chunk = str;
3510
100k
            } else {
3511
100k
                xmlChar *name;
3512
3513
                /*
3514
                 * General entity references are checked for
3515
                 * syntactic validity.
3516
                 */
3517
100k
                str++;
3518
100k
                name = xmlParseStringName(ctxt, &str);
3519
3520
100k
                if ((name == NULL) || (*str++ != ';')) {
3521
7.84k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3522
7.84k
                            "EntityValue: '&' forbidden except for entities "
3523
7.84k
                            "references\n");
3524
7.84k
                    xmlFree(name);
3525
7.84k
                    return;
3526
7.84k
                }
3527
3528
92.5k
                xmlFree(name);
3529
92.5k
            }
3530
20.0M
        } else if (c == '%') {
3531
5.92k
            xmlEntityPtr ent;
3532
3533
5.92k
            if (chunk < str)
3534
5.26k
                xmlSBufAddString(buf, chunk, str - chunk);
3535
3536
5.92k
            ent = xmlParseStringPEReference(ctxt, &str);
3537
5.92k
            if (ent == NULL)
3538
4.95k
                return;
3539
3540
967
            if (!PARSER_EXTERNAL(ctxt)) {
3541
967
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3542
967
                return;
3543
967
            }
3544
3545
0
            if (ent->content == NULL) {
3546
                /*
3547
                 * Note: external parsed entities will not be loaded,
3548
                 * it is not required for a non-validating parser to
3549
                 * complete external PEReferences coming from the
3550
                 * internal subset
3551
                 */
3552
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3553
0
                    ((ctxt->replaceEntities) ||
3554
0
                     (ctxt->validate))) {
3555
0
                    xmlLoadEntityContent(ctxt, ent);
3556
0
                } else {
3557
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3558
0
                                  "not validating will not read content for "
3559
0
                                  "PE entity %s\n", ent->name, NULL);
3560
0
                }
3561
0
            }
3562
3563
            /*
3564
             * TODO: Skip if ent->content is still NULL.
3565
             */
3566
3567
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3568
0
                return;
3569
3570
0
            if (ent->flags & XML_ENT_EXPANDING) {
3571
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3572
0
                return;
3573
0
            }
3574
3575
0
            ent->flags |= XML_ENT_EXPANDING;
3576
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3577
0
                                      depth);
3578
0
            ent->flags &= ~XML_ENT_EXPANDING;
3579
3580
0
            chunk = str;
3581
20.0M
        } else {
3582
            /* Normal ASCII char */
3583
20.0M
            if (!IS_BYTE_CHAR(c)) {
3584
1.22M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3585
1.22M
                        "invalid character in entity value\n");
3586
1.22M
                if (chunk < str)
3587
12.4k
                    xmlSBufAddString(buf, chunk, str - chunk);
3588
1.22M
                xmlSBufAddReplChar(buf);
3589
1.22M
                str += 1;
3590
1.22M
                chunk = str;
3591
18.8M
            } else {
3592
18.8M
                str += 1;
3593
18.8M
            }
3594
20.0M
        }
3595
54.4M
    }
3596
3597
54.9k
    if (chunk < str)
3598
49.8k
        xmlSBufAddString(buf, chunk, str - chunk);
3599
54.9k
}
3600
3601
/**
3602
 * Parse a value for ENTITY declarations
3603
 *
3604
 * @deprecated Internal function, don't use.
3605
 *
3606
 *     [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3607
 *                         "'" ([^%&'] | PEReference | Reference)* "'"
3608
 *
3609
 * @param ctxt  an XML parser context
3610
 * @param orig  if non-NULL store a copy of the original entity value
3611
 * @returns the EntityValue parsed with reference substituted or NULL
3612
 */
3613
xmlChar *
3614
85.4k
xmlParseEntityValue(xmlParserCtxt *ctxt, xmlChar **orig) {
3615
85.4k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3616
0
                         XML_MAX_HUGE_LENGTH :
3617
85.4k
                         XML_MAX_TEXT_LENGTH;
3618
85.4k
    xmlSBuf buf;
3619
85.4k
    const xmlChar *start;
3620
85.4k
    int quote, length;
3621
3622
85.4k
    xmlSBufInit(&buf, maxLength);
3623
3624
85.4k
    GROW;
3625
3626
85.4k
    quote = CUR;
3627
85.4k
    if ((quote != '"') && (quote != '\'')) {
3628
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3629
0
  return(NULL);
3630
0
    }
3631
85.4k
    CUR_PTR++;
3632
3633
85.4k
    length = 0;
3634
3635
    /*
3636
     * Copy raw content of the entity into a buffer
3637
     */
3638
90.4M
    while (1) {
3639
90.4M
        int c;
3640
3641
90.4M
        if (PARSER_STOPPED(ctxt))
3642
0
            goto error;
3643
3644
90.4M
        if (CUR_PTR >= ctxt->input->end) {
3645
366
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3646
366
            goto error;
3647
366
        }
3648
3649
90.4M
        c = CUR;
3650
3651
90.4M
        if (c == 0) {
3652
30
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3653
30
                    "invalid character in entity value\n");
3654
30
            goto error;
3655
30
        }
3656
90.4M
        if (c == quote)
3657
85.0k
            break;
3658
90.3M
        NEXTL(1);
3659
90.3M
        length += 1;
3660
3661
        /*
3662
         * TODO: Check growth threshold
3663
         */
3664
90.3M
        if (ctxt->input->end - CUR_PTR < 10)
3665
22.0k
            GROW;
3666
90.3M
    }
3667
3668
85.0k
    start = CUR_PTR - length;
3669
3670
85.0k
    if (orig != NULL) {
3671
85.0k
        *orig = xmlStrndup(start, length);
3672
85.0k
        if (*orig == NULL)
3673
0
            xmlErrMemory(ctxt);
3674
85.0k
    }
3675
3676
85.0k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3677
3678
85.0k
    NEXTL(1);
3679
3680
85.0k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3681
3682
396
error:
3683
396
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3684
396
    return(NULL);
3685
85.4k
}
3686
3687
/**
3688
 * Check an entity reference in an attribute value for validity
3689
 * without expanding it.
3690
 *
3691
 * @param ctxt  parser context
3692
 * @param pent  entity
3693
 * @param depth  nesting depth
3694
 */
3695
static void
3696
2.44k
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3697
2.44k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3698
2.44k
    const xmlChar *str;
3699
2.44k
    unsigned long expandedSize = pent->length;
3700
2.44k
    int c, flags;
3701
3702
2.44k
    depth += 1;
3703
2.44k
    if (depth > maxDepth) {
3704
2
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3705
2
                       "Maximum entity nesting depth exceeded");
3706
2
  return;
3707
2
    }
3708
3709
2.44k
    if (pent->flags & XML_ENT_EXPANDING) {
3710
31
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3711
31
        return;
3712
31
    }
3713
3714
    /*
3715
     * If we're parsing a default attribute value in DTD content,
3716
     * the entity might reference other entities which weren't
3717
     * defined yet, so the check isn't reliable.
3718
     */
3719
2.41k
    if (ctxt->inSubset == 0)
3720
2.34k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3721
68
    else
3722
68
        flags = XML_ENT_VALIDATED;
3723
3724
2.41k
    str = pent->content;
3725
2.41k
    if (str == NULL)
3726
0
        goto done;
3727
3728
    /*
3729
     * Note that entity values are already validated. We only check
3730
     * for illegal less-than signs and compute the expanded size
3731
     * of the entity. No special handling for multi-byte characters
3732
     * is needed.
3733
     */
3734
23.4M
    while (!PARSER_STOPPED(ctxt)) {
3735
23.4M
        c = *str;
3736
3737
23.4M
  if (c != '&') {
3738
23.4M
            if (c == 0)
3739
2.22k
                break;
3740
3741
23.4M
            if (c == '<')
3742
1.63k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3743
1.63k
                        "'<' in entity '%s' is not allowed in attributes "
3744
1.63k
                        "values\n", pent->name);
3745
3746
23.4M
            str += 1;
3747
23.4M
        } else if (str[1] == '#') {
3748
4.21k
            int val;
3749
3750
4.21k
      val = xmlParseStringCharRef(ctxt, &str);
3751
4.21k
      if (val == 0) {
3752
9
                pent->content[0] = 0;
3753
9
                break;
3754
9
            }
3755
15.6k
  } else {
3756
15.6k
            xmlChar *name;
3757
15.6k
            xmlEntityPtr ent;
3758
3759
15.6k
      name = xmlParseStringEntityRef(ctxt, &str);
3760
15.6k
      if (name == NULL) {
3761
16
                pent->content[0] = 0;
3762
16
                break;
3763
16
            }
3764
3765
15.6k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3766
15.6k
            xmlFree(name);
3767
3768
15.6k
            if ((ent != NULL) &&
3769
15.6k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
3770
5.00k
                if ((ent->flags & flags) != flags) {
3771
1.21k
                    pent->flags |= XML_ENT_EXPANDING;
3772
1.21k
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
3773
1.21k
                    pent->flags &= ~XML_ENT_EXPANDING;
3774
1.21k
                }
3775
3776
5.00k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
3777
5.00k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
3778
5.00k
            }
3779
15.6k
        }
3780
23.4M
    }
3781
3782
2.41k
done:
3783
2.41k
    if (ctxt->inSubset == 0)
3784
2.34k
        pent->expandedSize = expandedSize;
3785
3786
2.41k
    pent->flags |= flags;
3787
2.41k
}
3788
3789
/**
3790
 * Expand general entity references in an entity or attribute value.
3791
 * Perform attribute value normalization.
3792
 *
3793
 * @param ctxt  parser context
3794
 * @param buf  string buffer
3795
 * @param str  entity or attribute value
3796
 * @param pent  entity for entity value, NULL for attribute values
3797
 * @param normalize  whether to collapse whitespace
3798
 * @param inSpace  whitespace state
3799
 * @param depth  nesting depth
3800
 * @param check  whether to check for amplification
3801
 * @returns  whether there was a normalization change
3802
 */
3803
static int
3804
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3805
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
3806
1.62M
                          int *inSpace, int depth, int check) {
3807
1.62M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3808
1.62M
    int c, chunkSize;
3809
1.62M
    int normChange = 0;
3810
3811
1.62M
    if (str == NULL)
3812
0
        return(0);
3813
3814
1.62M
    depth += 1;
3815
1.62M
    if (depth > maxDepth) {
3816
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3817
0
                       "Maximum entity nesting depth exceeded");
3818
0
  return(0);
3819
0
    }
3820
3821
1.62M
    if (pent != NULL) {
3822
1.62M
        if (pent->flags & XML_ENT_EXPANDING) {
3823
10
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3824
10
            return(0);
3825
10
        }
3826
3827
1.62M
        if (check) {
3828
1.62M
            if (xmlParserEntityCheck(ctxt, pent->length))
3829
136
                return(0);
3830
1.62M
        }
3831
1.62M
    }
3832
3833
1.62M
    chunkSize = 0;
3834
3835
    /*
3836
     * Note that entity values are already validated. No special
3837
     * handling for multi-byte characters is needed.
3838
     */
3839
1.42G
    while (!PARSER_STOPPED(ctxt)) {
3840
1.42G
        c = *str;
3841
3842
1.42G
  if (c != '&') {
3843
1.41G
            if (c == 0)
3844
1.60M
                break;
3845
3846
            /*
3847
             * If this function is called without an entity, it is used to
3848
             * expand entities in an attribute content where less-than was
3849
             * already unscaped and is allowed.
3850
             */
3851
1.41G
            if ((pent != NULL) && (c == '<')) {
3852
24.3k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3853
24.3k
                        "'<' in entity '%s' is not allowed in attributes "
3854
24.3k
                        "values\n", pent->name);
3855
24.3k
                break;
3856
24.3k
            }
3857
3858
1.41G
            if (c <= 0x20) {
3859
11.4M
                if ((normalize) && (*inSpace)) {
3860
                    /* Skip char */
3861
739k
                    if (chunkSize > 0) {
3862
40.0k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3863
40.0k
                        chunkSize = 0;
3864
40.0k
                    }
3865
739k
                    normChange = 1;
3866
10.7M
                } else if (c < 0x20) {
3867
10.3M
                    if (chunkSize > 0) {
3868
2.02M
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3869
2.02M
                        chunkSize = 0;
3870
2.02M
                    }
3871
3872
10.3M
                    xmlSBufAddCString(buf, " ", 1);
3873
10.3M
                } else {
3874
404k
                    chunkSize += 1;
3875
404k
                }
3876
3877
11.4M
                *inSpace = 1;
3878
1.40G
            } else {
3879
1.40G
                chunkSize += 1;
3880
1.40G
                *inSpace = 0;
3881
1.40G
            }
3882
3883
1.41G
            str += 1;
3884
1.41G
        } else if (str[1] == '#') {
3885
386k
            int val;
3886
3887
386k
            if (chunkSize > 0) {
3888
316k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3889
316k
                chunkSize = 0;
3890
316k
            }
3891
3892
386k
      val = xmlParseStringCharRef(ctxt, &str);
3893
386k
      if (val == 0) {
3894
10
                if (pent != NULL)
3895
10
                    pent->content[0] = 0;
3896
10
                break;
3897
10
            }
3898
3899
386k
            if (val == ' ') {
3900
29.3k
                if ((normalize) && (*inSpace))
3901
197
                    normChange = 1;
3902
29.1k
                else
3903
29.1k
                    xmlSBufAddCString(buf, " ", 1);
3904
29.3k
                *inSpace = 1;
3905
357k
            } else {
3906
357k
                xmlSBufAddChar(buf, val);
3907
357k
                *inSpace = 0;
3908
357k
            }
3909
8.25M
  } else {
3910
8.25M
            xmlChar *name;
3911
8.25M
            xmlEntityPtr ent;
3912
3913
8.25M
            if (chunkSize > 0) {
3914
8.00M
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3915
8.00M
                chunkSize = 0;
3916
8.00M
            }
3917
3918
8.25M
      name = xmlParseStringEntityRef(ctxt, &str);
3919
8.25M
            if (name == NULL) {
3920
10
                if (pent != NULL)
3921
10
                    pent->content[0] = 0;
3922
10
                break;
3923
10
            }
3924
3925
8.25M
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3926
8.25M
            xmlFree(name);
3927
3928
8.25M
      if ((ent != NULL) &&
3929
8.25M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3930
7.76M
    if (ent->content == NULL) {
3931
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
3932
0
          "predefined entity has no content\n");
3933
0
                    break;
3934
0
                }
3935
3936
7.76M
                xmlSBufAddString(buf, ent->content, ent->length);
3937
3938
7.76M
                *inSpace = 0;
3939
7.76M
      } else if ((ent != NULL) && (ent->content != NULL)) {
3940
415k
                if (pent != NULL)
3941
415k
                    pent->flags |= XML_ENT_EXPANDING;
3942
415k
    normChange |= xmlExpandEntityInAttValue(ctxt, buf,
3943
415k
                        ent->content, ent, normalize, inSpace, depth, check);
3944
415k
                if (pent != NULL)
3945
415k
                    pent->flags &= ~XML_ENT_EXPANDING;
3946
415k
      }
3947
8.25M
        }
3948
1.42G
    }
3949
3950
1.62M
    if (chunkSize > 0)
3951
1.51M
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3952
3953
1.62M
    return(normChange);
3954
1.62M
}
3955
3956
/**
3957
 * Expand general entity references in an entity or attribute value.
3958
 * Perform attribute value normalization.
3959
 *
3960
 * @param ctxt  parser context
3961
 * @param str  entity or attribute value
3962
 * @param normalize  whether to collapse whitespace
3963
 * @returns the expanded attribtue value.
3964
 */
3965
xmlChar *
3966
xmlExpandEntitiesInAttValue(xmlParserCtxt *ctxt, const xmlChar *str,
3967
0
                            int normalize) {
3968
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3969
0
                         XML_MAX_HUGE_LENGTH :
3970
0
                         XML_MAX_TEXT_LENGTH;
3971
0
    xmlSBuf buf;
3972
0
    int inSpace = 1;
3973
3974
0
    xmlSBufInit(&buf, maxLength);
3975
3976
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
3977
0
                              ctxt->inputNr, /* check */ 0);
3978
3979
0
    if ((normalize) && (inSpace) && (buf.size > 0))
3980
0
        buf.size--;
3981
3982
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
3983
0
}
3984
3985
/**
3986
 * Parse a value for an attribute.
3987
 *
3988
 * NOTE: if no normalization is needed, the routine will return pointers
3989
 * directly from the data buffer.
3990
 *
3991
 * 3.3.3 Attribute-Value Normalization:
3992
 *
3993
 * Before the value of an attribute is passed to the application or
3994
 * checked for validity, the XML processor must normalize it as follows:
3995
 *
3996
 * - a character reference is processed by appending the referenced
3997
 *   character to the attribute value
3998
 * - an entity reference is processed by recursively processing the
3999
 *   replacement text of the entity
4000
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4001
 *   appending \#x20 to the normalized value, except that only a single
4002
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4003
 *   parsed entity or the literal entity value of an internal parsed entity
4004
 * - other characters are processed by appending them to the normalized value
4005
 *
4006
 * If the declared value is not CDATA, then the XML processor must further
4007
 * process the normalized attribute value by discarding any leading and
4008
 * trailing space (\#x20) characters, and by replacing sequences of space
4009
 * (\#x20) characters by a single space (\#x20) character.
4010
 * All attributes for which no declaration has been read should be treated
4011
 * by a non-validating parser as if declared CDATA.
4012
 *
4013
 * @param ctxt  an XML parser context
4014
 * @param attlen  attribute len result
4015
 * @param outFlags  resulting XML_ATTVAL_* flags
4016
 * @param special  value from attsSpecial
4017
 * @param isNamespace  whether this is a namespace declaration
4018
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4019
 *     caller if it was copied, this can be detected by val[*len] == 0.
4020
 */
4021
static xmlChar *
4022
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *outFlags,
4023
539k
                         int special, int isNamespace) {
4024
539k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4025
0
                         XML_MAX_HUGE_LENGTH :
4026
539k
                         XML_MAX_TEXT_LENGTH;
4027
539k
    xmlSBuf buf;
4028
539k
    xmlChar *ret;
4029
539k
    int c, l, quote, entFlags, chunkSize;
4030
539k
    int inSpace = 1;
4031
539k
    int replaceEntities;
4032
539k
    int normalize = (special & XML_SPECIAL_TYPE_MASK) != 0;
4033
539k
    int attvalFlags = 0;
4034
4035
    /* Always expand namespace URIs */
4036
539k
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4037
4038
539k
    xmlSBufInit(&buf, maxLength);
4039
4040
539k
    GROW;
4041
4042
539k
    quote = CUR;
4043
539k
    if ((quote != '"') && (quote != '\'')) {
4044
11.2k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4045
11.2k
  return(NULL);
4046
11.2k
    }
4047
527k
    NEXTL(1);
4048
4049
527k
    if (ctxt->inSubset == 0)
4050
494k
        entFlags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4051
33.3k
    else
4052
33.3k
        entFlags = XML_ENT_VALIDATED;
4053
4054
527k
    inSpace = 1;
4055
527k
    chunkSize = 0;
4056
4057
229M
    while (1) {
4058
229M
        if (PARSER_STOPPED(ctxt))
4059
174
            goto error;
4060
4061
229M
        if (CUR_PTR >= ctxt->input->end) {
4062
4.15k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4063
4.15k
                           "AttValue: ' expected\n");
4064
4.15k
            goto error;
4065
4.15k
        }
4066
4067
        /*
4068
         * TODO: Check growth threshold
4069
         */
4070
229M
        if (ctxt->input->end - CUR_PTR < 10)
4071
113k
            GROW;
4072
4073
229M
        c = CUR;
4074
4075
229M
        if (c >= 0x80) {
4076
137M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4077
137M
                    "invalid character in attribute value\n");
4078
137M
            if (l == 0) {
4079
40.4M
                if (chunkSize > 0) {
4080
974k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4081
974k
                    chunkSize = 0;
4082
974k
                }
4083
40.4M
                xmlSBufAddReplChar(&buf);
4084
40.4M
                NEXTL(1);
4085
96.6M
            } else {
4086
96.6M
                chunkSize += l;
4087
96.6M
                NEXTL(l);
4088
96.6M
            }
4089
4090
137M
            inSpace = 0;
4091
137M
        } else if (c != '&') {
4092
90.7M
            if (c > 0x20) {
4093
61.7M
                if (c == quote)
4094
520k
                    break;
4095
4096
61.2M
                if (c == '<')
4097
454k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4098
4099
61.2M
                chunkSize += 1;
4100
61.2M
                inSpace = 0;
4101
61.2M
            } else if (!IS_BYTE_CHAR(c)) {
4102
15.4M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4103
15.4M
                        "invalid character in attribute value\n");
4104
15.4M
                if (chunkSize > 0) {
4105
652k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4106
652k
                    chunkSize = 0;
4107
652k
                }
4108
15.4M
                xmlSBufAddReplChar(&buf);
4109
15.4M
                inSpace = 0;
4110
15.4M
            } else {
4111
                /* Whitespace */
4112
13.5M
                if ((normalize) && (inSpace)) {
4113
                    /* Skip char */
4114
150k
                    if (chunkSize > 0) {
4115
25.0k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4116
25.0k
                        chunkSize = 0;
4117
25.0k
                    }
4118
150k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4119
13.3M
                } else if (c < 0x20) {
4120
                    /* Convert to space */
4121
12.7M
                    if (chunkSize > 0) {
4122
436k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4123
436k
                        chunkSize = 0;
4124
436k
                    }
4125
4126
12.7M
                    xmlSBufAddCString(&buf, " ", 1);
4127
12.7M
                } else {
4128
647k
                    chunkSize += 1;
4129
647k
                }
4130
4131
13.5M
                inSpace = 1;
4132
4133
13.5M
                if ((c == 0xD) && (NXT(1) == 0xA))
4134
29.3k
                    CUR_PTR++;
4135
13.5M
            }
4136
4137
90.2M
            NEXTL(1);
4138
90.2M
        } else if (NXT(1) == '#') {
4139
89.2k
            int val;
4140
4141
89.2k
            if (chunkSize > 0) {
4142
70.2k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4143
70.2k
                chunkSize = 0;
4144
70.2k
            }
4145
4146
89.2k
            val = xmlParseCharRef(ctxt);
4147
89.2k
            if (val == 0)
4148
3.16k
                goto error;
4149
4150
86.1k
            if ((val == '&') && (!replaceEntities)) {
4151
                /*
4152
                 * The reparsing will be done in xmlNodeParseContent()
4153
                 * called from SAX2.c
4154
                 */
4155
24.8k
                xmlSBufAddCString(&buf, "&#38;", 5);
4156
24.8k
                inSpace = 0;
4157
61.2k
            } else if (val == ' ') {
4158
18.7k
                if ((normalize) && (inSpace))
4159
720
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4160
18.0k
                else
4161
18.0k
                    xmlSBufAddCString(&buf, " ", 1);
4162
18.7k
                inSpace = 1;
4163
42.4k
            } else {
4164
42.4k
                xmlSBufAddChar(&buf, val);
4165
42.4k
                inSpace = 0;
4166
42.4k
            }
4167
1.85M
        } else {
4168
1.85M
            const xmlChar *name;
4169
1.85M
            xmlEntityPtr ent;
4170
4171
1.85M
            if (chunkSize > 0) {
4172
1.38M
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4173
1.38M
                chunkSize = 0;
4174
1.38M
            }
4175
4176
1.85M
            name = xmlParseEntityRefInternal(ctxt);
4177
1.85M
            if (name == NULL) {
4178
                /*
4179
                 * Probably a literal '&' which wasn't escaped.
4180
                 * TODO: Handle gracefully in recovery mode.
4181
                 */
4182
96.1k
                continue;
4183
96.1k
            }
4184
4185
1.76M
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4186
1.76M
            if (ent == NULL)
4187
73.4k
                continue;
4188
4189
1.68M
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4190
367k
                if ((ent->content[0] == '&') && (!replaceEntities))
4191
11.4k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4192
355k
                else
4193
355k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4194
367k
                inSpace = 0;
4195
1.31M
            } else if (replaceEntities) {
4196
1.21M
                if (xmlExpandEntityInAttValue(ctxt, &buf,
4197
1.21M
                        ent->content, ent, normalize, &inSpace, ctxt->inputNr,
4198
1.21M
                        /* check */ 1) > 0)
4199
38.9k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4200
1.21M
            } else {
4201
109k
                if ((ent->flags & entFlags) != entFlags)
4202
1.22k
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4203
4204
109k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4205
46
                    ent->content[0] = 0;
4206
46
                    goto error;
4207
46
                }
4208
4209
                /*
4210
                 * Just output the reference
4211
                 */
4212
108k
                xmlSBufAddCString(&buf, "&", 1);
4213
108k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4214
108k
                xmlSBufAddCString(&buf, ";", 1);
4215
4216
108k
                inSpace = 0;
4217
108k
            }
4218
1.68M
  }
4219
229M
    }
4220
4221
520k
    if ((buf.mem == NULL) && (outFlags != NULL)) {
4222
375k
        ret = (xmlChar *) CUR_PTR - chunkSize;
4223
4224
375k
        if (attlen != NULL)
4225
375k
            *attlen = chunkSize;
4226
375k
        if ((normalize) && (inSpace) && (chunkSize > 0)) {
4227
405
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4228
405
            *attlen -= 1;
4229
405
        }
4230
4231
        /* Report potential error */
4232
375k
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4233
375k
    } else {
4234
144k
        if (chunkSize > 0)
4235
96.8k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4236
4237
144k
        if ((normalize) && (inSpace) && (buf.size > 0)) {
4238
1.71k
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4239
1.71k
            buf.size--;
4240
1.71k
        }
4241
4242
144k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4243
144k
        attvalFlags |= XML_ATTVAL_ALLOC;
4244
4245
144k
        if (ret != NULL) {
4246
144k
            if (attlen != NULL)
4247
113k
                *attlen = buf.size;
4248
144k
        }
4249
144k
    }
4250
4251
520k
    if (outFlags != NULL)
4252
488k
        *outFlags = attvalFlags;
4253
4254
520k
    NEXTL(1);
4255
4256
520k
    return(ret);
4257
4258
7.53k
error:
4259
7.53k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4260
7.53k
    return(NULL);
4261
527k
}
4262
4263
/**
4264
 * Parse a value for an attribute
4265
 * Note: the parser won't do substitution of entities here, this
4266
 * will be handled later in #xmlStringGetNodeList
4267
 *
4268
 * @deprecated Internal function, don't use.
4269
 *
4270
 *     [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4271
 *                       "'" ([^<&'] | Reference)* "'"
4272
 *
4273
 * 3.3.3 Attribute-Value Normalization:
4274
 *
4275
 * Before the value of an attribute is passed to the application or
4276
 * checked for validity, the XML processor must normalize it as follows:
4277
 *
4278
 * - a character reference is processed by appending the referenced
4279
 *   character to the attribute value
4280
 * - an entity reference is processed by recursively processing the
4281
 *   replacement text of the entity
4282
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4283
 *   appending \#x20 to the normalized value, except that only a single
4284
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4285
 *   parsed entity or the literal entity value of an internal parsed entity
4286
 * - other characters are processed by appending them to the normalized value
4287
 *
4288
 * If the declared value is not CDATA, then the XML processor must further
4289
 * process the normalized attribute value by discarding any leading and
4290
 * trailing space (\#x20) characters, and by replacing sequences of space
4291
 * (\#x20) characters by a single space (\#x20) character.
4292
 * All attributes for which no declaration has been read should be treated
4293
 * by a non-validating parser as if declared CDATA.
4294
 *
4295
 * @param ctxt  an XML parser context
4296
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4297
 * caller.
4298
 */
4299
xmlChar *
4300
33.9k
xmlParseAttValue(xmlParserCtxt *ctxt) {
4301
33.9k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4302
33.9k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4303
33.9k
}
4304
4305
/**
4306
 * Parse an XML Literal
4307
 *
4308
 * @deprecated Internal function, don't use.
4309
 *
4310
 *     [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4311
 *
4312
 * @param ctxt  an XML parser context
4313
 * @returns the SystemLiteral parsed or NULL
4314
 */
4315
4316
xmlChar *
4317
13.8k
xmlParseSystemLiteral(xmlParserCtxt *ctxt) {
4318
13.8k
    xmlChar *buf = NULL;
4319
13.8k
    int len = 0;
4320
13.8k
    int size = XML_PARSER_BUFFER_SIZE;
4321
13.8k
    int cur, l;
4322
13.8k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4323
0
                    XML_MAX_TEXT_LENGTH :
4324
13.8k
                    XML_MAX_NAME_LENGTH;
4325
13.8k
    xmlChar stop;
4326
4327
13.8k
    if (RAW == '"') {
4328
6.74k
        NEXT;
4329
6.74k
  stop = '"';
4330
7.14k
    } else if (RAW == '\'') {
4331
5.24k
        NEXT;
4332
5.24k
  stop = '\'';
4333
5.24k
    } else {
4334
1.89k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4335
1.89k
  return(NULL);
4336
1.89k
    }
4337
4338
11.9k
    buf = xmlMalloc(size);
4339
11.9k
    if (buf == NULL) {
4340
0
        xmlErrMemory(ctxt);
4341
0
  return(NULL);
4342
0
    }
4343
11.9k
    cur = xmlCurrentCharRecover(ctxt, &l);
4344
3.25M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4345
3.24M
  if (len + 5 >= size) {
4346
7.86k
      xmlChar *tmp;
4347
7.86k
            int newSize;
4348
4349
7.86k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4350
7.86k
            if (newSize < 0) {
4351
5
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4352
5
                xmlFree(buf);
4353
5
                return(NULL);
4354
5
            }
4355
7.86k
      tmp = xmlRealloc(buf, newSize);
4356
7.86k
      if (tmp == NULL) {
4357
0
          xmlFree(buf);
4358
0
    xmlErrMemory(ctxt);
4359
0
    return(NULL);
4360
0
      }
4361
7.86k
      buf = tmp;
4362
7.86k
            size = newSize;
4363
7.86k
  }
4364
3.24M
  COPY_BUF(buf, len, cur);
4365
3.24M
  NEXTL(l);
4366
3.24M
  cur = xmlCurrentCharRecover(ctxt, &l);
4367
3.24M
    }
4368
11.9k
    buf[len] = 0;
4369
11.9k
    if (!IS_CHAR(cur)) {
4370
478
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4371
11.5k
    } else {
4372
11.5k
  NEXT;
4373
11.5k
    }
4374
11.9k
    return(buf);
4375
11.9k
}
4376
4377
/**
4378
 * Parse an XML public literal
4379
 *
4380
 * @deprecated Internal function, don't use.
4381
 *
4382
 *     [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4383
 *
4384
 * @param ctxt  an XML parser context
4385
 * @returns the PubidLiteral parsed or NULL.
4386
 */
4387
4388
xmlChar *
4389
8.74k
xmlParsePubidLiteral(xmlParserCtxt *ctxt) {
4390
8.74k
    xmlChar *buf = NULL;
4391
8.74k
    int len = 0;
4392
8.74k
    int size = XML_PARSER_BUFFER_SIZE;
4393
8.74k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4394
0
                    XML_MAX_TEXT_LENGTH :
4395
8.74k
                    XML_MAX_NAME_LENGTH;
4396
8.74k
    xmlChar cur;
4397
8.74k
    xmlChar stop;
4398
4399
8.74k
    if (RAW == '"') {
4400
7.04k
        NEXT;
4401
7.04k
  stop = '"';
4402
7.04k
    } else if (RAW == '\'') {
4403
1.24k
        NEXT;
4404
1.24k
  stop = '\'';
4405
1.24k
    } else {
4406
458
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4407
458
  return(NULL);
4408
458
    }
4409
8.28k
    buf = xmlMalloc(size);
4410
8.28k
    if (buf == NULL) {
4411
0
  xmlErrMemory(ctxt);
4412
0
  return(NULL);
4413
0
    }
4414
8.28k
    cur = CUR;
4415
181k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4416
181k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4417
173k
  if (len + 1 >= size) {
4418
184
      xmlChar *tmp;
4419
184
            int newSize;
4420
4421
184
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4422
184
            if (newSize < 0) {
4423
2
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4424
2
                xmlFree(buf);
4425
2
                return(NULL);
4426
2
            }
4427
182
      tmp = xmlRealloc(buf, newSize);
4428
182
      if (tmp == NULL) {
4429
0
    xmlErrMemory(ctxt);
4430
0
    xmlFree(buf);
4431
0
    return(NULL);
4432
0
      }
4433
182
      buf = tmp;
4434
182
            size = newSize;
4435
182
  }
4436
173k
  buf[len++] = cur;
4437
173k
  NEXT;
4438
173k
  cur = CUR;
4439
173k
    }
4440
8.28k
    buf[len] = 0;
4441
8.28k
    if (cur != stop) {
4442
964
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4443
7.32k
    } else {
4444
7.32k
  NEXTL(1);
4445
7.32k
    }
4446
8.28k
    return(buf);
4447
8.28k
}
4448
4449
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4450
4451
/*
4452
 * used for the test in the inner loop of the char data testing
4453
 */
4454
static const unsigned char test_char_data[256] = {
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4457
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4458
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4459
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4460
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4461
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4462
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4463
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4464
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4465
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4466
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4467
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4468
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4469
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4470
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4471
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4472
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4474
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4475
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4476
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4477
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4478
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4479
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4482
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4483
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4484
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4485
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4486
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4487
};
4488
4489
static void
4490
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4491
15.3M
              int isBlank) {
4492
15.3M
    int checkBlanks;
4493
4494
15.3M
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4495
15.2M
        return;
4496
4497
133k
    checkBlanks = (!ctxt->keepBlanks) ||
4498
133k
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4499
4500
    /*
4501
     * Calling areBlanks with only parts of a text node
4502
     * is fundamentally broken, making the NOBLANKS option
4503
     * essentially unusable.
4504
     */
4505
133k
    if ((checkBlanks) &&
4506
133k
        (areBlanks(ctxt, buf, size, isBlank))) {
4507
0
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4508
0
            (ctxt->keepBlanks))
4509
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4510
133k
    } else {
4511
133k
        if (ctxt->sax->characters != NULL)
4512
133k
            ctxt->sax->characters(ctxt->userData, buf, size);
4513
4514
        /*
4515
         * The old code used to update this value for "complex" data
4516
         * even if checkBlanks was false. This was probably a bug.
4517
         */
4518
133k
        if ((checkBlanks) && (*ctxt->space == -1))
4519
0
            *ctxt->space = -2;
4520
133k
    }
4521
133k
}
4522
4523
/**
4524
 * Parse character data. Always makes progress if the first char isn't
4525
 * '<' or '&'.
4526
 *
4527
 * The right angle bracket (>) may be represented using the string "&gt;",
4528
 * and must, for compatibility, be escaped using "&gt;" or a character
4529
 * reference when it appears in the string "]]>" in content, when that
4530
 * string is not marking the end of a CDATA section.
4531
 *
4532
 *     [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4533
 * @param ctxt  an XML parser context
4534
 * @param partial  buffer may contain partial UTF-8 sequences
4535
 */
4536
static void
4537
36.4M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4538
36.4M
    const xmlChar *in;
4539
36.4M
    int line = ctxt->input->line;
4540
36.4M
    int col = ctxt->input->col;
4541
36.4M
    int ccol;
4542
36.4M
    int terminate = 0;
4543
4544
36.4M
    GROW;
4545
    /*
4546
     * Accelerated common case where input don't need to be
4547
     * modified before passing it to the handler.
4548
     */
4549
36.4M
    in = ctxt->input->cur;
4550
36.6M
    do {
4551
36.8M
get_more_space:
4552
40.3M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4553
36.8M
        if (*in == 0xA) {
4554
1.46M
            do {
4555
1.46M
                ctxt->input->line++; ctxt->input->col = 1;
4556
1.46M
                in++;
4557
1.46M
            } while (*in == 0xA);
4558
153k
            goto get_more_space;
4559
153k
        }
4560
36.6M
        if (*in == '<') {
4561
462k
            while (in > ctxt->input->cur) {
4562
231k
                const xmlChar *tmp = ctxt->input->cur;
4563
231k
                size_t nbchar = in - tmp;
4564
4565
231k
                if (nbchar > XML_MAX_ITEMS)
4566
0
                    nbchar = XML_MAX_ITEMS;
4567
231k
                ctxt->input->cur += nbchar;
4568
4569
231k
                xmlCharacters(ctxt, tmp, nbchar, 1);
4570
231k
            }
4571
231k
            return;
4572
231k
        }
4573
4574
36.7M
get_more:
4575
36.7M
        ccol = ctxt->input->col;
4576
66.5M
        while (test_char_data[*in]) {
4577
29.8M
            in++;
4578
29.8M
            ccol++;
4579
29.8M
        }
4580
36.7M
        ctxt->input->col = ccol;
4581
36.7M
        if (*in == 0xA) {
4582
811k
            do {
4583
811k
                ctxt->input->line++; ctxt->input->col = 1;
4584
811k
                in++;
4585
811k
            } while (*in == 0xA);
4586
137k
            goto get_more;
4587
137k
        }
4588
36.5M
        if (*in == ']') {
4589
109k
            size_t avail = ctxt->input->end - in;
4590
4591
109k
            if (partial && avail < 2) {
4592
0
                terminate = 1;
4593
0
                goto invoke_callback;
4594
0
            }
4595
109k
            if (in[1] == ']') {
4596
14.2k
                if (partial && avail < 3) {
4597
0
                    terminate = 1;
4598
0
                    goto invoke_callback;
4599
0
                }
4600
14.2k
                if (in[2] == '>')
4601
858
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4602
14.2k
            }
4603
4604
109k
            in++;
4605
109k
            ctxt->input->col++;
4606
109k
            goto get_more;
4607
109k
        }
4608
4609
36.4M
invoke_callback:
4610
42.6M
        while (in > ctxt->input->cur) {
4611
6.18M
            const xmlChar *tmp = ctxt->input->cur;
4612
6.18M
            size_t nbchar = in - tmp;
4613
4614
6.18M
            if (nbchar > XML_MAX_ITEMS)
4615
0
                nbchar = XML_MAX_ITEMS;
4616
6.18M
            ctxt->input->cur += nbchar;
4617
4618
6.18M
            xmlCharacters(ctxt, tmp, nbchar, 0);
4619
4620
6.18M
            line = ctxt->input->line;
4621
6.18M
            col = ctxt->input->col;
4622
6.18M
        }
4623
36.4M
        ctxt->input->cur = in;
4624
36.4M
        if (*in == 0xD) {
4625
380k
            in++;
4626
380k
            if (*in == 0xA) {
4627
217k
                ctxt->input->cur = in;
4628
217k
                in++;
4629
217k
                ctxt->input->line++; ctxt->input->col = 1;
4630
217k
                continue; /* while */
4631
217k
            }
4632
163k
            in--;
4633
163k
        }
4634
36.2M
        if (*in == '<') {
4635
406k
            return;
4636
406k
        }
4637
35.8M
        if (*in == '&') {
4638
162k
            return;
4639
162k
        }
4640
35.6M
        if (terminate) {
4641
0
            return;
4642
0
        }
4643
35.6M
        SHRINK;
4644
35.6M
        GROW;
4645
35.6M
        in = ctxt->input->cur;
4646
35.8M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4647
35.8M
             (*in == 0x09) || (*in == 0x0a));
4648
35.6M
    ctxt->input->line = line;
4649
35.6M
    ctxt->input->col = col;
4650
35.6M
    xmlParseCharDataComplex(ctxt, partial);
4651
35.6M
}
4652
4653
/**
4654
 * Always makes progress if the first char isn't '<' or '&'.
4655
 *
4656
 * parse a CharData section.this is the fallback function
4657
 * of #xmlParseCharData when the parsing requires handling
4658
 * of non-ASCII characters.
4659
 *
4660
 * @param ctxt  an XML parser context
4661
 * @param partial  whether the input can end with truncated UTF-8
4662
 */
4663
static void
4664
35.6M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4665
35.6M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4666
35.6M
    int nbchar = 0;
4667
35.6M
    int cur, l;
4668
4669
35.6M
    cur = xmlCurrentCharRecover(ctxt, &l);
4670
149M
    while ((cur != '<') && /* checked */
4671
149M
           (cur != '&') &&
4672
149M
     (IS_CHAR(cur))) {
4673
113M
        if (cur == ']') {
4674
300k
            size_t avail = ctxt->input->end - ctxt->input->cur;
4675
4676
300k
            if (partial && avail < 2)
4677
0
                break;
4678
300k
            if (NXT(1) == ']') {
4679
76.3k
                if (partial && avail < 3)
4680
0
                    break;
4681
76.3k
                if (NXT(2) == '>')
4682
843
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4683
76.3k
            }
4684
300k
        }
4685
4686
113M
  COPY_BUF(buf, nbchar, cur);
4687
  /* move current position before possible calling of ctxt->sax->characters */
4688
113M
  NEXTL(l);
4689
113M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4690
324k
      buf[nbchar] = 0;
4691
4692
324k
            xmlCharacters(ctxt, buf, nbchar, 0);
4693
324k
      nbchar = 0;
4694
324k
            SHRINK;
4695
324k
  }
4696
113M
  cur = xmlCurrentCharRecover(ctxt, &l);
4697
113M
    }
4698
35.6M
    if (nbchar != 0) {
4699
8.61M
        buf[nbchar] = 0;
4700
4701
8.61M
        xmlCharacters(ctxt, buf, nbchar, 0);
4702
8.61M
    }
4703
    /*
4704
     * cur == 0 can mean
4705
     *
4706
     * - End of buffer.
4707
     * - An actual 0 character.
4708
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4709
     */
4710
35.6M
    if (ctxt->input->cur < ctxt->input->end) {
4711
35.6M
        if ((cur == 0) && (CUR != 0)) {
4712
1.58k
            if (partial == 0) {
4713
1.58k
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4714
1.58k
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4715
1.58k
                NEXTL(1);
4716
1.58k
            }
4717
35.6M
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4718
            /* Generate the error and skip the offending character */
4719
35.0M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4720
35.0M
                              "PCDATA invalid Char value %d\n", cur);
4721
35.0M
            NEXTL(l);
4722
35.0M
        }
4723
35.6M
    }
4724
35.6M
}
4725
4726
/**
4727
 * @deprecated Internal function, don't use.
4728
 * @param ctxt  an XML parser context
4729
 * @param cdata  unused
4730
 */
4731
void
4732
0
xmlParseCharData(xmlParserCtxt *ctxt, ATTRIBUTE_UNUSED int cdata) {
4733
0
    xmlParseCharDataInternal(ctxt, 0);
4734
0
}
4735
4736
/**
4737
 * Parse an External ID or a Public ID
4738
 *
4739
 * @deprecated Internal function, don't use.
4740
 *
4741
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4742
 * `'PUBLIC' S PubidLiteral S SystemLiteral`
4743
 *
4744
 *     [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4745
 *                       | 'PUBLIC' S PubidLiteral S SystemLiteral
4746
 *
4747
 *     [83] PublicID ::= 'PUBLIC' S PubidLiteral
4748
 *
4749
 * @param ctxt  an XML parser context
4750
 * @param publicId  a xmlChar** receiving PubidLiteral
4751
 * @param strict  indicate whether we should restrict parsing to only
4752
 *          production [75], see NOTE below
4753
 * @returns the function returns SystemLiteral and in the second
4754
 *                case publicID receives PubidLiteral, is strict is off
4755
 *                it is possible to return NULL and have publicID set.
4756
 */
4757
4758
xmlChar *
4759
28.0k
xmlParseExternalID(xmlParserCtxt *ctxt, xmlChar **publicId, int strict) {
4760
28.0k
    xmlChar *URI = NULL;
4761
4762
28.0k
    *publicId = NULL;
4763
28.0k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4764
7.31k
        SKIP(6);
4765
7.31k
  if (SKIP_BLANKS == 0) {
4766
868
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4767
868
                     "Space required after 'SYSTEM'\n");
4768
868
  }
4769
7.31k
  URI = xmlParseSystemLiteral(ctxt);
4770
7.31k
  if (URI == NULL) {
4771
730
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4772
730
        }
4773
20.7k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4774
8.74k
        SKIP(6);
4775
8.74k
  if (SKIP_BLANKS == 0) {
4776
1.11k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4777
1.11k
        "Space required after 'PUBLIC'\n");
4778
1.11k
  }
4779
8.74k
  *publicId = xmlParsePubidLiteral(ctxt);
4780
8.74k
  if (*publicId == NULL) {
4781
460
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4782
460
  }
4783
8.74k
  if (strict) {
4784
      /*
4785
       * We don't handle [83] so "S SystemLiteral" is required.
4786
       */
4787
6.18k
      if (SKIP_BLANKS == 0) {
4788
944
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4789
944
      "Space required after the Public Identifier\n");
4790
944
      }
4791
6.18k
  } else {
4792
      /*
4793
       * We handle [83] so we return immediately, if
4794
       * "S SystemLiteral" is not detected. We skip blanks if no
4795
             * system literal was found, but this is harmless since we must
4796
             * be at the end of a NotationDecl.
4797
       */
4798
2.56k
      if (SKIP_BLANKS == 0) return(NULL);
4799
767
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4800
767
  }
4801
6.57k
  URI = xmlParseSystemLiteral(ctxt);
4802
6.57k
  if (URI == NULL) {
4803
1.17k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4804
1.17k
        }
4805
6.57k
    }
4806
25.8k
    return(URI);
4807
28.0k
}
4808
4809
/**
4810
 * Skip an XML (SGML) comment <!-- .... -->
4811
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4812
 *  must not occur within comments. "
4813
 * This is the slow routine in case the accelerator for ascii didn't work
4814
 *
4815
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4816
 * @param ctxt  an XML parser context
4817
 * @param buf  the already parsed part of the buffer
4818
 * @param len  number of bytes in the buffer
4819
 * @param size  allocated size of the buffer
4820
 */
4821
static void
4822
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4823
33.7k
                       size_t len, size_t size) {
4824
33.7k
    int q, ql;
4825
33.7k
    int r, rl;
4826
33.7k
    int cur, l;
4827
33.7k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4828
0
                    XML_MAX_HUGE_LENGTH :
4829
33.7k
                    XML_MAX_TEXT_LENGTH;
4830
4831
33.7k
    if (buf == NULL) {
4832
10.2k
        len = 0;
4833
10.2k
  size = XML_PARSER_BUFFER_SIZE;
4834
10.2k
  buf = xmlMalloc(size);
4835
10.2k
  if (buf == NULL) {
4836
0
      xmlErrMemory(ctxt);
4837
0
      return;
4838
0
  }
4839
10.2k
    }
4840
33.7k
    q = xmlCurrentCharRecover(ctxt, &ql);
4841
33.7k
    if (q == 0)
4842
5.78k
        goto not_terminated;
4843
27.9k
    if (!IS_CHAR(q)) {
4844
4.64k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4845
4.64k
                          "xmlParseComment: invalid xmlChar value %d\n",
4846
4.64k
                    q);
4847
4.64k
  xmlFree (buf);
4848
4.64k
  return;
4849
4.64k
    }
4850
23.2k
    NEXTL(ql);
4851
23.2k
    r = xmlCurrentCharRecover(ctxt, &rl);
4852
23.2k
    if (r == 0)
4853
2.02k
        goto not_terminated;
4854
21.2k
    if (!IS_CHAR(r)) {
4855
2.54k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4856
2.54k
                          "xmlParseComment: invalid xmlChar value %d\n",
4857
2.54k
                    r);
4858
2.54k
  xmlFree (buf);
4859
2.54k
  return;
4860
2.54k
    }
4861
18.7k
    NEXTL(rl);
4862
18.7k
    cur = xmlCurrentCharRecover(ctxt, &l);
4863
18.7k
    if (cur == 0)
4864
842
        goto not_terminated;
4865
5.98M
    while (IS_CHAR(cur) && /* checked */
4866
5.98M
           ((cur != '>') ||
4867
5.96M
      (r != '-') || (q != '-'))) {
4868
5.96M
  if ((r == '-') && (q == '-')) {
4869
11.6k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4870
11.6k
  }
4871
5.96M
  if (len + 5 >= size) {
4872
5.72k
      xmlChar *tmp;
4873
5.72k
            int newSize;
4874
4875
5.72k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4876
5.72k
            if (newSize < 0) {
4877
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4878
0
                             "Comment too big found", NULL);
4879
0
                xmlFree (buf);
4880
0
                return;
4881
0
            }
4882
5.72k
      tmp = xmlRealloc(buf, newSize);
4883
5.72k
      if (tmp == NULL) {
4884
0
    xmlErrMemory(ctxt);
4885
0
    xmlFree(buf);
4886
0
    return;
4887
0
      }
4888
5.72k
      buf = tmp;
4889
5.72k
            size = newSize;
4890
5.72k
  }
4891
5.96M
  COPY_BUF(buf, len, q);
4892
4893
5.96M
  q = r;
4894
5.96M
  ql = rl;
4895
5.96M
  r = cur;
4896
5.96M
  rl = l;
4897
4898
5.96M
  NEXTL(l);
4899
5.96M
  cur = xmlCurrentCharRecover(ctxt, &l);
4900
4901
5.96M
    }
4902
17.8k
    buf[len] = 0;
4903
17.8k
    if (cur == 0) {
4904
5.00k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4905
5.00k
                       "Comment not terminated \n<!--%.50s\n", buf);
4906
12.8k
    } else if (!IS_CHAR(cur)) {
4907
7.81k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4908
7.81k
                          "xmlParseComment: invalid xmlChar value %d\n",
4909
7.81k
                    cur);
4910
7.81k
    } else {
4911
5.05k
        NEXT;
4912
5.05k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4913
5.05k
      (!ctxt->disableSAX))
4914
1.56k
      ctxt->sax->comment(ctxt->userData, buf);
4915
5.05k
    }
4916
17.8k
    xmlFree(buf);
4917
17.8k
    return;
4918
8.65k
not_terminated:
4919
8.65k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4920
8.65k
       "Comment not terminated\n", NULL);
4921
8.65k
    xmlFree(buf);
4922
8.65k
}
4923
4924
/**
4925
 * Parse an XML (SGML) comment. Always consumes '<!'.
4926
 *
4927
 * @deprecated Internal function, don't use.
4928
 *
4929
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4930
 *  must not occur within comments. "
4931
 *
4932
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4933
 * @param ctxt  an XML parser context
4934
 */
4935
void
4936
40.4k
xmlParseComment(xmlParserCtxt *ctxt) {
4937
40.4k
    xmlChar *buf = NULL;
4938
40.4k
    size_t size = XML_PARSER_BUFFER_SIZE;
4939
40.4k
    size_t len = 0;
4940
40.4k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4941
0
                       XML_MAX_HUGE_LENGTH :
4942
40.4k
                       XML_MAX_TEXT_LENGTH;
4943
40.4k
    const xmlChar *in;
4944
40.4k
    size_t nbchar = 0;
4945
40.4k
    int ccol;
4946
4947
    /*
4948
     * Check that there is a comment right here.
4949
     */
4950
40.4k
    if ((RAW != '<') || (NXT(1) != '!'))
4951
0
        return;
4952
40.4k
    SKIP(2);
4953
40.4k
    if ((RAW != '-') || (NXT(1) != '-'))
4954
16
        return;
4955
40.4k
    SKIP(2);
4956
40.4k
    GROW;
4957
4958
    /*
4959
     * Accelerated common case where input don't need to be
4960
     * modified before passing it to the handler.
4961
     */
4962
40.4k
    in = ctxt->input->cur;
4963
41.3k
    do {
4964
41.3k
  if (*in == 0xA) {
4965
34.1k
      do {
4966
34.1k
    ctxt->input->line++; ctxt->input->col = 1;
4967
34.1k
    in++;
4968
34.1k
      } while (*in == 0xA);
4969
2.38k
  }
4970
798k
get_more:
4971
798k
        ccol = ctxt->input->col;
4972
7.04M
  while (((*in > '-') && (*in <= 0x7F)) ||
4973
7.04M
         ((*in >= 0x20) && (*in < '-')) ||
4974
7.04M
         (*in == 0x09)) {
4975
6.24M
        in++;
4976
6.24M
        ccol++;
4977
6.24M
  }
4978
798k
  ctxt->input->col = ccol;
4979
798k
  if (*in == 0xA) {
4980
124k
      do {
4981
124k
    ctxt->input->line++; ctxt->input->col = 1;
4982
124k
    in++;
4983
124k
      } while (*in == 0xA);
4984
17.3k
      goto get_more;
4985
17.3k
  }
4986
781k
  nbchar = in - ctxt->input->cur;
4987
  /*
4988
   * save current set of data
4989
   */
4990
781k
  if (nbchar > 0) {
4991
759k
            if (nbchar > maxLength - len) {
4992
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4993
0
                                  "Comment too big found", NULL);
4994
0
                xmlFree(buf);
4995
0
                return;
4996
0
            }
4997
759k
            if (buf == NULL) {
4998
27.2k
                if ((*in == '-') && (in[1] == '-'))
4999
5.58k
                    size = nbchar + 1;
5000
21.6k
                else
5001
21.6k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5002
27.2k
                buf = xmlMalloc(size);
5003
27.2k
                if (buf == NULL) {
5004
0
                    xmlErrMemory(ctxt);
5005
0
                    return;
5006
0
                }
5007
27.2k
                len = 0;
5008
732k
            } else if (len + nbchar + 1 >= size) {
5009
7.23k
                xmlChar *new_buf;
5010
7.23k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5011
7.23k
                new_buf = xmlRealloc(buf, size);
5012
7.23k
                if (new_buf == NULL) {
5013
0
                    xmlErrMemory(ctxt);
5014
0
                    xmlFree(buf);
5015
0
                    return;
5016
0
                }
5017
7.23k
                buf = new_buf;
5018
7.23k
            }
5019
759k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5020
759k
            len += nbchar;
5021
759k
            buf[len] = 0;
5022
759k
  }
5023
781k
  ctxt->input->cur = in;
5024
781k
  if (*in == 0xA) {
5025
0
      in++;
5026
0
      ctxt->input->line++; ctxt->input->col = 1;
5027
0
  }
5028
781k
  if (*in == 0xD) {
5029
21.5k
      in++;
5030
21.5k
      if (*in == 0xA) {
5031
15.7k
    ctxt->input->cur = in;
5032
15.7k
    in++;
5033
15.7k
    ctxt->input->line++; ctxt->input->col = 1;
5034
15.7k
    goto get_more;
5035
15.7k
      }
5036
5.78k
      in--;
5037
5.78k
  }
5038
765k
  SHRINK;
5039
765k
  GROW;
5040
765k
  in = ctxt->input->cur;
5041
765k
  if (*in == '-') {
5042
730k
      if (in[1] == '-') {
5043
610k
          if (in[2] == '>') {
5044
6.71k
        SKIP(3);
5045
6.71k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5046
6.71k
            (!ctxt->disableSAX)) {
5047
3.26k
      if (buf != NULL)
5048
940
          ctxt->sax->comment(ctxt->userData, buf);
5049
2.32k
      else
5050
2.32k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5051
3.26k
        }
5052
6.71k
        if (buf != NULL)
5053
3.78k
            xmlFree(buf);
5054
6.71k
        return;
5055
6.71k
    }
5056
603k
    if (buf != NULL) {
5057
601k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5058
601k
                          "Double hyphen within comment: "
5059
601k
                                      "<!--%.50s\n",
5060
601k
              buf);
5061
601k
    } else
5062
2.24k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5063
2.24k
                          "Double hyphen within comment\n", NULL);
5064
603k
    in++;
5065
603k
    ctxt->input->col++;
5066
603k
      }
5067
724k
      in++;
5068
724k
      ctxt->input->col++;
5069
724k
      goto get_more;
5070
730k
  }
5071
765k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5072
33.7k
    xmlParseCommentComplex(ctxt, buf, len, size);
5073
33.7k
}
5074
5075
5076
/**
5077
 * Parse the name of a PI
5078
 *
5079
 * @deprecated Internal function, don't use.
5080
 *
5081
 *     [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5082
 *
5083
 * @param ctxt  an XML parser context
5084
 * @returns the PITarget name or NULL
5085
 */
5086
5087
const xmlChar *
5088
68.6k
xmlParsePITarget(xmlParserCtxt *ctxt) {
5089
68.6k
    const xmlChar *name;
5090
5091
68.6k
    name = xmlParseName(ctxt);
5092
68.6k
    if ((name != NULL) &&
5093
68.6k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5094
68.6k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5095
68.6k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5096
21.6k
  int i;
5097
21.6k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5098
21.6k
      (name[2] == 'l') && (name[3] == 0)) {
5099
12.3k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5100
12.3k
     "XML declaration allowed only at the start of the document\n");
5101
12.3k
      return(name);
5102
12.3k
  } else if (name[3] == 0) {
5103
2.90k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5104
2.90k
      return(name);
5105
2.90k
  }
5106
18.4k
  for (i = 0;;i++) {
5107
18.4k
      if (xmlW3CPIs[i] == NULL) break;
5108
12.6k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5109
597
          return(name);
5110
12.6k
  }
5111
5.80k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5112
5.80k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5113
5.80k
          NULL, NULL);
5114
5.80k
    }
5115
52.8k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5116
4.24k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5117
4.24k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5118
4.24k
    }
5119
52.8k
    return(name);
5120
68.6k
}
5121
5122
#ifdef LIBXML_CATALOG_ENABLED
5123
/**
5124
 * Parse an XML Catalog Processing Instruction.
5125
 *
5126
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5127
 *
5128
 * Occurs only if allowed by the user and if happening in the Misc
5129
 * part of the document before any doctype information
5130
 * This will add the given catalog to the parsing context in order
5131
 * to be used if there is a resolution need further down in the document
5132
 *
5133
 * @param ctxt  an XML parser context
5134
 * @param catalog  the PI value string
5135
 */
5136
5137
static void
5138
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5139
0
    xmlChar *URL = NULL;
5140
0
    const xmlChar *tmp, *base;
5141
0
    xmlChar marker;
5142
5143
0
    tmp = catalog;
5144
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5145
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5146
0
  goto error;
5147
0
    tmp += 7;
5148
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5149
0
    if (*tmp != '=') {
5150
0
  return;
5151
0
    }
5152
0
    tmp++;
5153
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5154
0
    marker = *tmp;
5155
0
    if ((marker != '\'') && (marker != '"'))
5156
0
  goto error;
5157
0
    tmp++;
5158
0
    base = tmp;
5159
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5160
0
    if (*tmp == 0)
5161
0
  goto error;
5162
0
    URL = xmlStrndup(base, tmp - base);
5163
0
    tmp++;
5164
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5165
0
    if (*tmp != 0)
5166
0
  goto error;
5167
5168
0
    if (URL != NULL) {
5169
        /*
5170
         * Unfortunately, the catalog API doesn't report OOM errors.
5171
         * xmlGetLastError isn't very helpful since we don't know
5172
         * where the last error came from. We'd have to reset it
5173
         * before this call and restore it afterwards.
5174
         */
5175
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5176
0
  xmlFree(URL);
5177
0
    }
5178
0
    return;
5179
5180
0
error:
5181
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5182
0
            "Catalog PI syntax error: %s\n",
5183
0
      catalog, NULL);
5184
0
    if (URL != NULL)
5185
0
  xmlFree(URL);
5186
0
}
5187
#endif
5188
5189
/**
5190
 * Parse an XML Processing Instruction.
5191
 *
5192
 * @deprecated Internal function, don't use.
5193
 *
5194
 *     [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5195
 *
5196
 * The processing is transferred to SAX once parsed.
5197
 *
5198
 * @param ctxt  an XML parser context
5199
 */
5200
5201
void
5202
68.6k
xmlParsePI(xmlParserCtxt *ctxt) {
5203
68.6k
    xmlChar *buf = NULL;
5204
68.6k
    size_t len = 0;
5205
68.6k
    size_t size = XML_PARSER_BUFFER_SIZE;
5206
68.6k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5207
0
                       XML_MAX_HUGE_LENGTH :
5208
68.6k
                       XML_MAX_TEXT_LENGTH;
5209
68.6k
    int cur, l;
5210
68.6k
    const xmlChar *target;
5211
5212
68.6k
    if ((RAW == '<') && (NXT(1) == '?')) {
5213
  /*
5214
   * this is a Processing Instruction.
5215
   */
5216
68.6k
  SKIP(2);
5217
5218
  /*
5219
   * Parse the target name and check for special support like
5220
   * namespace.
5221
   */
5222
68.6k
        target = xmlParsePITarget(ctxt);
5223
68.6k
  if (target != NULL) {
5224
62.3k
      if ((RAW == '?') && (NXT(1) == '>')) {
5225
14.2k
    SKIP(2);
5226
5227
    /*
5228
     * SAX: PI detected.
5229
     */
5230
14.2k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5231
14.2k
        (ctxt->sax->processingInstruction != NULL))
5232
7.42k
        ctxt->sax->processingInstruction(ctxt->userData,
5233
7.42k
                                         target, NULL);
5234
14.2k
    return;
5235
14.2k
      }
5236
48.1k
      buf = xmlMalloc(size);
5237
48.1k
      if (buf == NULL) {
5238
0
    xmlErrMemory(ctxt);
5239
0
    return;
5240
0
      }
5241
48.1k
      if (SKIP_BLANKS == 0) {
5242
26.0k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5243
26.0k
        "ParsePI: PI %s space expected\n", target);
5244
26.0k
      }
5245
48.1k
      cur = xmlCurrentCharRecover(ctxt, &l);
5246
16.8M
      while (IS_CHAR(cur) && /* checked */
5247
16.8M
       ((cur != '?') || (NXT(1) != '>'))) {
5248
16.8M
    if (len + 5 >= size) {
5249
20.4k
        xmlChar *tmp;
5250
20.4k
                    int newSize;
5251
5252
20.4k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5253
20.4k
                    if (newSize < 0) {
5254
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5255
0
                                          "PI %s too big found", target);
5256
0
                        xmlFree(buf);
5257
0
                        return;
5258
0
                    }
5259
20.4k
        tmp = xmlRealloc(buf, newSize);
5260
20.4k
        if (tmp == NULL) {
5261
0
      xmlErrMemory(ctxt);
5262
0
      xmlFree(buf);
5263
0
      return;
5264
0
        }
5265
20.4k
        buf = tmp;
5266
20.4k
                    size = newSize;
5267
20.4k
    }
5268
16.8M
    COPY_BUF(buf, len, cur);
5269
16.8M
    NEXTL(l);
5270
16.8M
    cur = xmlCurrentCharRecover(ctxt, &l);
5271
16.8M
      }
5272
48.1k
      buf[len] = 0;
5273
48.1k
      if (cur != '?') {
5274
24.2k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5275
24.2k
          "ParsePI: PI %s never end ...\n", target);
5276
24.2k
      } else {
5277
23.8k
    SKIP(2);
5278
5279
23.8k
#ifdef LIBXML_CATALOG_ENABLED
5280
23.8k
    if ((ctxt->inSubset == 0) &&
5281
23.8k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5282
791
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5283
5284
791
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5285
791
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5286
0
       (allow == XML_CATA_ALLOW_ALL)))
5287
0
      xmlParseCatalogPI(ctxt, buf);
5288
791
    }
5289
23.8k
#endif
5290
5291
    /*
5292
     * SAX: PI detected.
5293
     */
5294
23.8k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5295
23.8k
        (ctxt->sax->processingInstruction != NULL))
5296
2.05k
        ctxt->sax->processingInstruction(ctxt->userData,
5297
2.05k
                                         target, buf);
5298
23.8k
      }
5299
48.1k
      xmlFree(buf);
5300
48.1k
  } else {
5301
6.28k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5302
6.28k
  }
5303
68.6k
    }
5304
68.6k
}
5305
5306
/**
5307
 * Parse a notation declaration. Always consumes '<!'.
5308
 *
5309
 * @deprecated Internal function, don't use.
5310
 *
5311
 *     [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID)
5312
 *                           S? '>'
5313
 *
5314
 * Hence there is actually 3 choices:
5315
 *
5316
 *     'PUBLIC' S PubidLiteral
5317
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5318
 *     'SYSTEM' S SystemLiteral
5319
 *
5320
 * See the NOTE on #xmlParseExternalID.
5321
 *
5322
 * @param ctxt  an XML parser context
5323
 */
5324
5325
void
5326
4.66k
xmlParseNotationDecl(xmlParserCtxt *ctxt) {
5327
4.66k
    const xmlChar *name;
5328
4.66k
    xmlChar *Pubid;
5329
4.66k
    xmlChar *Systemid;
5330
5331
4.66k
    if ((CUR != '<') || (NXT(1) != '!'))
5332
0
        return;
5333
4.66k
    SKIP(2);
5334
5335
4.66k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5336
4.63k
#ifdef LIBXML_VALID_ENABLED
5337
4.63k
  int oldInputNr = ctxt->inputNr;
5338
4.63k
#endif
5339
5340
4.63k
  SKIP(8);
5341
4.63k
  if (SKIP_BLANKS_PE == 0) {
5342
354
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5343
354
         "Space required after '<!NOTATION'\n");
5344
354
      return;
5345
354
  }
5346
5347
4.27k
        name = xmlParseName(ctxt);
5348
4.27k
  if (name == NULL) {
5349
392
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5350
392
      return;
5351
392
  }
5352
3.88k
  if (xmlStrchr(name, ':') != NULL) {
5353
334
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5354
334
         "colons are forbidden from notation names '%s'\n",
5355
334
         name, NULL, NULL);
5356
334
  }
5357
3.88k
  if (SKIP_BLANKS_PE == 0) {
5358
281
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5359
281
         "Space required after the NOTATION name'\n");
5360
281
      return;
5361
281
  }
5362
5363
  /*
5364
   * Parse the IDs.
5365
   */
5366
3.60k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5367
3.60k
  SKIP_BLANKS_PE;
5368
5369
3.60k
  if (RAW == '>') {
5370
2.48k
#ifdef LIBXML_VALID_ENABLED
5371
2.48k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5372
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5373
0
                           "Notation declaration doesn't start and stop"
5374
0
                                 " in the same entity\n",
5375
0
                                 NULL, NULL);
5376
0
      }
5377
2.48k
#endif
5378
2.48k
      NEXT;
5379
2.48k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5380
2.48k
    (ctxt->sax->notationDecl != NULL))
5381
1.53k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5382
2.48k
  } else {
5383
1.12k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5384
1.12k
  }
5385
3.60k
  if (Systemid != NULL) xmlFree(Systemid);
5386
3.60k
  if (Pubid != NULL) xmlFree(Pubid);
5387
3.60k
    }
5388
4.66k
}
5389
5390
/**
5391
 * Parse an entity declaration. Always consumes '<!'.
5392
 *
5393
 * @deprecated Internal function, don't use.
5394
 *
5395
 *     [70] EntityDecl ::= GEDecl | PEDecl
5396
 *
5397
 *     [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5398
 *
5399
 *     [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5400
 *
5401
 *     [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5402
 *
5403
 *     [74] PEDef ::= EntityValue | ExternalID
5404
 *
5405
 *     [76] NDataDecl ::= S 'NDATA' S Name
5406
 *
5407
 * [ VC: Notation Declared ]
5408
 * The Name must match the declared name of a notation.
5409
 *
5410
 * @param ctxt  an XML parser context
5411
 */
5412
5413
void
5414
99.4k
xmlParseEntityDecl(xmlParserCtxt *ctxt) {
5415
99.4k
    const xmlChar *name = NULL;
5416
99.4k
    xmlChar *value = NULL;
5417
99.4k
    xmlChar *URI = NULL, *literal = NULL;
5418
99.4k
    const xmlChar *ndata = NULL;
5419
99.4k
    int isParameter = 0;
5420
99.4k
    xmlChar *orig = NULL;
5421
5422
99.4k
    if ((CUR != '<') || (NXT(1) != '!'))
5423
0
        return;
5424
99.4k
    SKIP(2);
5425
5426
    /* GROW; done in the caller */
5427
99.4k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5428
99.3k
#ifdef LIBXML_VALID_ENABLED
5429
99.3k
  int oldInputNr = ctxt->inputNr;
5430
99.3k
#endif
5431
5432
99.3k
  SKIP(6);
5433
99.3k
  if (SKIP_BLANKS_PE == 0) {
5434
19.0k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5435
19.0k
         "Space required after '<!ENTITY'\n");
5436
19.0k
  }
5437
5438
99.3k
  if (RAW == '%') {
5439
13.5k
      NEXT;
5440
13.5k
      if (SKIP_BLANKS_PE == 0) {
5441
3.58k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5442
3.58k
             "Space required after '%%'\n");
5443
3.58k
      }
5444
13.5k
      isParameter = 1;
5445
13.5k
  }
5446
5447
99.3k
        name = xmlParseName(ctxt);
5448
99.3k
  if (name == NULL) {
5449
799
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5450
799
                     "xmlParseEntityDecl: no name\n");
5451
799
            return;
5452
799
  }
5453
98.5k
  if (xmlStrchr(name, ':') != NULL) {
5454
4.08k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5455
4.08k
         "colons are forbidden from entities names '%s'\n",
5456
4.08k
         name, NULL, NULL);
5457
4.08k
  }
5458
98.5k
  if (SKIP_BLANKS_PE == 0) {
5459
22.5k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5460
22.5k
         "Space required after the entity name\n");
5461
22.5k
  }
5462
5463
  /*
5464
   * handle the various case of definitions...
5465
   */
5466
98.5k
  if (isParameter) {
5467
13.4k
      if ((RAW == '"') || (RAW == '\'')) {
5468
9.06k
          value = xmlParseEntityValue(ctxt, &orig);
5469
9.06k
    if (value) {
5470
9.02k
        if ((ctxt->sax != NULL) &&
5471
9.02k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5472
2.80k
      ctxt->sax->entityDecl(ctxt->userData, name,
5473
2.80k
                        XML_INTERNAL_PARAMETER_ENTITY,
5474
2.80k
            NULL, NULL, value);
5475
9.02k
    }
5476
9.06k
      } else {
5477
4.40k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5478
4.40k
    if ((URI == NULL) && (literal == NULL)) {
5479
363
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5480
363
    }
5481
4.40k
    if (URI) {
5482
3.41k
                    if (xmlStrchr(URI, '#')) {
5483
1.44k
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5484
1.97k
                    } else {
5485
1.97k
                        if ((ctxt->sax != NULL) &&
5486
1.97k
                            (!ctxt->disableSAX) &&
5487
1.97k
                            (ctxt->sax->entityDecl != NULL))
5488
607
                            ctxt->sax->entityDecl(ctxt->userData, name,
5489
607
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5490
607
                                        literal, URI, NULL);
5491
1.97k
                    }
5492
3.41k
    }
5493
4.40k
      }
5494
85.0k
  } else {
5495
85.0k
      if ((RAW == '"') || (RAW == '\'')) {
5496
76.3k
          value = xmlParseEntityValue(ctxt, &orig);
5497
76.3k
    if ((ctxt->sax != NULL) &&
5498
76.3k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5499
4.96k
        ctxt->sax->entityDecl(ctxt->userData, name,
5500
4.96k
        XML_INTERNAL_GENERAL_ENTITY,
5501
4.96k
        NULL, NULL, value);
5502
    /*
5503
     * For expat compatibility in SAX mode.
5504
     */
5505
76.3k
    if ((ctxt->myDoc == NULL) ||
5506
76.3k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5507
46.3k
        if (ctxt->myDoc == NULL) {
5508
1.35k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5509
1.35k
      if (ctxt->myDoc == NULL) {
5510
0
          xmlErrMemory(ctxt);
5511
0
          goto done;
5512
0
      }
5513
1.35k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5514
1.35k
        }
5515
46.3k
        if (ctxt->myDoc->intSubset == NULL) {
5516
1.35k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5517
1.35k
              BAD_CAST "fake", NULL, NULL);
5518
1.35k
                        if (ctxt->myDoc->intSubset == NULL) {
5519
0
                            xmlErrMemory(ctxt);
5520
0
                            goto done;
5521
0
                        }
5522
1.35k
                    }
5523
5524
46.3k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5525
46.3k
                    NULL, NULL, value);
5526
46.3k
    }
5527
76.3k
      } else {
5528
8.68k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5529
8.68k
    if ((URI == NULL) && (literal == NULL)) {
5530
1.25k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5531
1.25k
    }
5532
8.68k
    if (URI) {
5533
7.00k
                    if (xmlStrchr(URI, '#')) {
5534
1.02k
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5535
1.02k
                    }
5536
7.00k
    }
5537
8.68k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5538
1.50k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5539
1.50k
           "Space required before 'NDATA'\n");
5540
1.50k
    }
5541
8.68k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5542
2.87k
        SKIP(5);
5543
2.87k
        if (SKIP_BLANKS_PE == 0) {
5544
438
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5545
438
               "Space required after 'NDATA'\n");
5546
438
        }
5547
2.87k
        ndata = xmlParseName(ctxt);
5548
2.87k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5549
2.87k
            (ctxt->sax->unparsedEntityDecl != NULL))
5550
595
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5551
595
            literal, URI, ndata);
5552
5.81k
    } else {
5553
5.81k
        if ((ctxt->sax != NULL) &&
5554
5.81k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5555
2.59k
      ctxt->sax->entityDecl(ctxt->userData, name,
5556
2.59k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5557
2.59k
            literal, URI, NULL);
5558
        /*
5559
         * For expat compatibility in SAX mode.
5560
         * assuming the entity replacement was asked for
5561
         */
5562
5.81k
        if ((ctxt->replaceEntities != 0) &&
5563
5.81k
      ((ctxt->myDoc == NULL) ||
5564
0
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5565
0
      if (ctxt->myDoc == NULL) {
5566
0
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5567
0
          if (ctxt->myDoc == NULL) {
5568
0
              xmlErrMemory(ctxt);
5569
0
        goto done;
5570
0
          }
5571
0
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5572
0
      }
5573
5574
0
      if (ctxt->myDoc->intSubset == NULL) {
5575
0
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5576
0
            BAD_CAST "fake", NULL, NULL);
5577
0
                            if (ctxt->myDoc->intSubset == NULL) {
5578
0
                                xmlErrMemory(ctxt);
5579
0
                                goto done;
5580
0
                            }
5581
0
                        }
5582
0
      xmlSAX2EntityDecl(ctxt, name,
5583
0
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5584
0
                  literal, URI, NULL);
5585
0
        }
5586
5.81k
    }
5587
8.68k
      }
5588
85.0k
  }
5589
98.5k
  SKIP_BLANKS_PE;
5590
98.5k
  if (RAW != '>') {
5591
2.08k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5592
2.08k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5593
96.4k
  } else {
5594
96.4k
#ifdef LIBXML_VALID_ENABLED
5595
96.4k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5596
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5597
0
                           "Entity declaration doesn't start and stop in"
5598
0
                                 " the same entity\n",
5599
0
                                 NULL, NULL);
5600
0
      }
5601
96.4k
#endif
5602
96.4k
      NEXT;
5603
96.4k
  }
5604
98.5k
  if (orig != NULL) {
5605
      /*
5606
       * Ugly mechanism to save the raw entity value.
5607
       */
5608
85.0k
      xmlEntityPtr cur = NULL;
5609
5610
85.0k
      if (isParameter) {
5611
9.02k
          if ((ctxt->sax != NULL) &&
5612
9.02k
        (ctxt->sax->getParameterEntity != NULL))
5613
9.02k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5614
76.0k
      } else {
5615
76.0k
          if ((ctxt->sax != NULL) &&
5616
76.0k
        (ctxt->sax->getEntity != NULL))
5617
76.0k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5618
76.0k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5619
23.8k
        cur = xmlSAX2GetEntity(ctxt, name);
5620
23.8k
    }
5621
76.0k
      }
5622
85.0k
            if ((cur != NULL) && (cur->orig == NULL)) {
5623
8.04k
    cur->orig = orig;
5624
8.04k
                orig = NULL;
5625
8.04k
      }
5626
85.0k
  }
5627
5628
98.5k
done:
5629
98.5k
  if (value != NULL) xmlFree(value);
5630
98.5k
  if (URI != NULL) xmlFree(URI);
5631
98.5k
  if (literal != NULL) xmlFree(literal);
5632
98.5k
        if (orig != NULL) xmlFree(orig);
5633
98.5k
    }
5634
99.4k
}
5635
5636
/**
5637
 * Parse an attribute default declaration
5638
 *
5639
 * @deprecated Internal function, don't use.
5640
 *
5641
 *     [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5642
 *
5643
 * [ VC: Required Attribute ]
5644
 * if the default declaration is the keyword \#REQUIRED, then the
5645
 * attribute must be specified for all elements of the type in the
5646
 * attribute-list declaration.
5647
 *
5648
 * [ VC: Attribute Default Legal ]
5649
 * The declared default value must meet the lexical constraints of
5650
 * the declared attribute type c.f. #xmlValidateAttributeDecl
5651
 *
5652
 * [ VC: Fixed Attribute Default ]
5653
 * if an attribute has a default value declared with the \#FIXED
5654
 * keyword, instances of that attribute must match the default value.
5655
 *
5656
 * [ WFC: No < in Attribute Values ]
5657
 * handled in #xmlParseAttValue
5658
 *
5659
 * @param ctxt  an XML parser context
5660
 * @param value  Receive a possible fixed default value for the attribute
5661
 * @returns XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5662
 *          or XML_ATTRIBUTE_FIXED.
5663
 */
5664
5665
int
5666
37.1k
xmlParseDefaultDecl(xmlParserCtxt *ctxt, xmlChar **value) {
5667
37.1k
    int val;
5668
37.1k
    xmlChar *ret;
5669
5670
37.1k
    *value = NULL;
5671
37.1k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5672
2.25k
  SKIP(9);
5673
2.25k
  return(XML_ATTRIBUTE_REQUIRED);
5674
2.25k
    }
5675
34.9k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5676
1.00k
  SKIP(8);
5677
1.00k
  return(XML_ATTRIBUTE_IMPLIED);
5678
1.00k
    }
5679
33.9k
    val = XML_ATTRIBUTE_NONE;
5680
33.9k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5681
1.10k
  SKIP(6);
5682
1.10k
  val = XML_ATTRIBUTE_FIXED;
5683
1.10k
  if (SKIP_BLANKS_PE == 0) {
5684
659
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5685
659
         "Space required after '#FIXED'\n");
5686
659
  }
5687
1.10k
    }
5688
33.9k
    ret = xmlParseAttValue(ctxt);
5689
33.9k
    if (ret == NULL) {
5690
2.00k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5691
2.00k
           "Attribute default value declaration error\n");
5692
2.00k
    } else
5693
31.9k
        *value = ret;
5694
33.9k
    return(val);
5695
34.9k
}
5696
5697
/**
5698
 * Parse an Notation attribute type.
5699
 *
5700
 * @deprecated Internal function, don't use.
5701
 *
5702
 * Note: the leading 'NOTATION' S part has already being parsed...
5703
 *
5704
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5705
 *
5706
 * [ VC: Notation Attributes ]
5707
 * Values of this type must match one of the notation names included
5708
 * in the declaration; all notation names in the declaration must be declared.
5709
 *
5710
 * @param ctxt  an XML parser context
5711
 * @returns the notation attribute tree built while parsing
5712
 */
5713
5714
xmlEnumeration *
5715
978
xmlParseNotationType(xmlParserCtxt *ctxt) {
5716
978
    const xmlChar *name;
5717
978
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5718
5719
978
    if (RAW != '(') {
5720
219
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5721
219
  return(NULL);
5722
219
    }
5723
1.21k
    do {
5724
1.21k
        NEXT;
5725
1.21k
  SKIP_BLANKS_PE;
5726
1.21k
        name = xmlParseName(ctxt);
5727
1.21k
  if (name == NULL) {
5728
109
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5729
109
         "Name expected in NOTATION declaration\n");
5730
109
            xmlFreeEnumeration(ret);
5731
109
      return(NULL);
5732
109
  }
5733
1.10k
        tmp = NULL;
5734
1.10k
#ifdef LIBXML_VALID_ENABLED
5735
1.10k
        if (ctxt->validate) {
5736
0
            tmp = ret;
5737
0
            while (tmp != NULL) {
5738
0
                if (xmlStrEqual(name, tmp->name)) {
5739
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5740
0
              "standalone: attribute notation value token %s duplicated\n",
5741
0
                                     name, NULL);
5742
0
                    if (!xmlDictOwns(ctxt->dict, name))
5743
0
                        xmlFree((xmlChar *) name);
5744
0
                    break;
5745
0
                }
5746
0
                tmp = tmp->next;
5747
0
            }
5748
0
        }
5749
1.10k
#endif /* LIBXML_VALID_ENABLED */
5750
1.10k
  if (tmp == NULL) {
5751
1.10k
      cur = xmlCreateEnumeration(name);
5752
1.10k
      if (cur == NULL) {
5753
0
                xmlErrMemory(ctxt);
5754
0
                xmlFreeEnumeration(ret);
5755
0
                return(NULL);
5756
0
            }
5757
1.10k
      if (last == NULL) ret = last = cur;
5758
449
      else {
5759
449
    last->next = cur;
5760
449
    last = cur;
5761
449
      }
5762
1.10k
  }
5763
1.10k
  SKIP_BLANKS_PE;
5764
1.10k
    } while (RAW == '|');
5765
650
    if (RAW != ')') {
5766
107
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5767
107
        xmlFreeEnumeration(ret);
5768
107
  return(NULL);
5769
107
    }
5770
543
    NEXT;
5771
543
    return(ret);
5772
650
}
5773
5774
/**
5775
 * Parse an Enumeration attribute type.
5776
 *
5777
 * @deprecated Internal function, don't use.
5778
 *
5779
 *     [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5780
 *
5781
 * [ VC: Enumeration ]
5782
 * Values of this type must match one of the Nmtoken tokens in
5783
 * the declaration
5784
 *
5785
 * @param ctxt  an XML parser context
5786
 * @returns the enumeration attribute tree built while parsing
5787
 */
5788
5789
xmlEnumeration *
5790
4.77k
xmlParseEnumerationType(xmlParserCtxt *ctxt) {
5791
4.77k
    xmlChar *name;
5792
4.77k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5793
5794
4.77k
    if (RAW != '(') {
5795
731
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5796
731
  return(NULL);
5797
731
    }
5798
4.88k
    do {
5799
4.88k
        NEXT;
5800
4.88k
  SKIP_BLANKS_PE;
5801
4.88k
        name = xmlParseNmtoken(ctxt);
5802
4.88k
  if (name == NULL) {
5803
109
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5804
109
      return(ret);
5805
109
  }
5806
4.77k
        tmp = NULL;
5807
4.77k
#ifdef LIBXML_VALID_ENABLED
5808
4.77k
        if (ctxt->validate) {
5809
0
            tmp = ret;
5810
0
            while (tmp != NULL) {
5811
0
                if (xmlStrEqual(name, tmp->name)) {
5812
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5813
0
              "standalone: attribute enumeration value token %s duplicated\n",
5814
0
                                     name, NULL);
5815
0
                    if (!xmlDictOwns(ctxt->dict, name))
5816
0
                        xmlFree(name);
5817
0
                    break;
5818
0
                }
5819
0
                tmp = tmp->next;
5820
0
            }
5821
0
        }
5822
4.77k
#endif /* LIBXML_VALID_ENABLED */
5823
4.77k
  if (tmp == NULL) {
5824
4.77k
      cur = xmlCreateEnumeration(name);
5825
4.77k
      if (!xmlDictOwns(ctxt->dict, name))
5826
4.77k
    xmlFree(name);
5827
4.77k
      if (cur == NULL) {
5828
0
                xmlErrMemory(ctxt);
5829
0
                xmlFreeEnumeration(ret);
5830
0
                return(NULL);
5831
0
            }
5832
4.77k
      if (last == NULL) ret = last = cur;
5833
832
      else {
5834
832
    last->next = cur;
5835
832
    last = cur;
5836
832
      }
5837
4.77k
  }
5838
4.77k
  SKIP_BLANKS_PE;
5839
4.77k
    } while (RAW == '|');
5840
3.93k
    if (RAW != ')') {
5841
220
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5842
220
  return(ret);
5843
220
    }
5844
3.71k
    NEXT;
5845
3.71k
    return(ret);
5846
3.93k
}
5847
5848
/**
5849
 * Parse an Enumerated attribute type.
5850
 *
5851
 * @deprecated Internal function, don't use.
5852
 *
5853
 *     [57] EnumeratedType ::= NotationType | Enumeration
5854
 *
5855
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5856
 *
5857
 * @param ctxt  an XML parser context
5858
 * @param tree  the enumeration tree built while parsing
5859
 * @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5860
 */
5861
5862
int
5863
5.83k
xmlParseEnumeratedType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5864
5.83k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5865
1.06k
  SKIP(8);
5866
1.06k
  if (SKIP_BLANKS_PE == 0) {
5867
82
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5868
82
         "Space required after 'NOTATION'\n");
5869
82
      return(0);
5870
82
  }
5871
978
  *tree = xmlParseNotationType(ctxt);
5872
978
  if (*tree == NULL) return(0);
5873
543
  return(XML_ATTRIBUTE_NOTATION);
5874
978
    }
5875
4.77k
    *tree = xmlParseEnumerationType(ctxt);
5876
4.77k
    if (*tree == NULL) return(0);
5877
3.94k
    return(XML_ATTRIBUTE_ENUMERATION);
5878
4.77k
}
5879
5880
/**
5881
 * Parse the Attribute list def for an element
5882
 *
5883
 * @deprecated Internal function, don't use.
5884
 *
5885
 *     [54] AttType ::= StringType | TokenizedType | EnumeratedType
5886
 *
5887
 *     [55] StringType ::= 'CDATA'
5888
 *
5889
 *     [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5890
 *                            'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5891
 *
5892
 * Validity constraints for attribute values syntax are checked in
5893
 * #xmlValidateAttributeValue
5894
 *
5895
 * [ VC: ID ]
5896
 * Values of type ID must match the Name production. A name must not
5897
 * appear more than once in an XML document as a value of this type;
5898
 * i.e., ID values must uniquely identify the elements which bear them.
5899
 *
5900
 * [ VC: One ID per Element Type ]
5901
 * No element type may have more than one ID attribute specified.
5902
 *
5903
 * [ VC: ID Attribute Default ]
5904
 * An ID attribute must have a declared default of \#IMPLIED or \#REQUIRED.
5905
 *
5906
 * [ VC: IDREF ]
5907
 * Values of type IDREF must match the Name production, and values
5908
 * of type IDREFS must match Names; each IDREF Name must match the value
5909
 * of an ID attribute on some element in the XML document; i.e. IDREF
5910
 * values must match the value of some ID attribute.
5911
 *
5912
 * [ VC: Entity Name ]
5913
 * Values of type ENTITY must match the Name production, values
5914
 * of type ENTITIES must match Names; each Entity Name must match the
5915
 * name of an unparsed entity declared in the DTD.
5916
 *
5917
 * [ VC: Name Token ]
5918
 * Values of type NMTOKEN must match the Nmtoken production; values
5919
 * of type NMTOKENS must match Nmtokens.
5920
 *
5921
 * @param ctxt  an XML parser context
5922
 * @param tree  the enumeration tree built while parsing
5923
 * @returns the attribute type
5924
 */
5925
int
5926
40.1k
xmlParseAttributeType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5927
40.1k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5928
2.15k
  SKIP(5);
5929
2.15k
  return(XML_ATTRIBUTE_CDATA);
5930
37.9k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5931
4.98k
  SKIP(6);
5932
4.98k
  return(XML_ATTRIBUTE_IDREFS);
5933
33.0k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5934
781
  SKIP(5);
5935
781
  return(XML_ATTRIBUTE_IDREF);
5936
32.2k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5937
23.8k
        SKIP(2);
5938
23.8k
  return(XML_ATTRIBUTE_ID);
5939
23.8k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5940
659
  SKIP(6);
5941
659
  return(XML_ATTRIBUTE_ENTITY);
5942
7.70k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5943
1.20k
  SKIP(8);
5944
1.20k
  return(XML_ATTRIBUTE_ENTITIES);
5945
6.50k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5946
110
  SKIP(8);
5947
110
  return(XML_ATTRIBUTE_NMTOKENS);
5948
6.39k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5949
554
  SKIP(7);
5950
554
  return(XML_ATTRIBUTE_NMTOKEN);
5951
554
     }
5952
5.83k
     return(xmlParseEnumeratedType(ctxt, tree));
5953
40.1k
}
5954
5955
/**
5956
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5957
 *
5958
 * @deprecated Internal function, don't use.
5959
 *
5960
 *     [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5961
 *
5962
 *     [53] AttDef ::= S Name S AttType S DefaultDecl
5963
 * @param ctxt  an XML parser context
5964
 */
5965
void
5966
16.8k
xmlParseAttributeListDecl(xmlParserCtxt *ctxt) {
5967
16.8k
    const xmlChar *elemName;
5968
16.8k
    const xmlChar *attrName;
5969
16.8k
    xmlEnumerationPtr tree;
5970
5971
16.8k
    if ((CUR != '<') || (NXT(1) != '!'))
5972
0
        return;
5973
16.8k
    SKIP(2);
5974
5975
16.8k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5976
16.8k
#ifdef LIBXML_VALID_ENABLED
5977
16.8k
  int oldInputNr = ctxt->inputNr;
5978
16.8k
#endif
5979
5980
16.8k
  SKIP(7);
5981
16.8k
  if (SKIP_BLANKS_PE == 0) {
5982
3.31k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5983
3.31k
                     "Space required after '<!ATTLIST'\n");
5984
3.31k
  }
5985
16.8k
        elemName = xmlParseName(ctxt);
5986
16.8k
  if (elemName == NULL) {
5987
1.84k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5988
1.84k
         "ATTLIST: no name for Element\n");
5989
1.84k
      return;
5990
1.84k
  }
5991
14.9k
  SKIP_BLANKS_PE;
5992
14.9k
  GROW;
5993
49.6k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
5994
41.6k
      int type;
5995
41.6k
      int def;
5996
41.6k
      xmlChar *defaultValue = NULL;
5997
5998
41.6k
      GROW;
5999
41.6k
            tree = NULL;
6000
41.6k
      attrName = xmlParseName(ctxt);
6001
41.6k
      if (attrName == NULL) {
6002
700
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6003
700
             "ATTLIST: no name for Attribute\n");
6004
700
    break;
6005
700
      }
6006
40.9k
      GROW;
6007
40.9k
      if (SKIP_BLANKS_PE == 0) {
6008
844
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6009
844
            "Space required after the attribute name\n");
6010
844
    break;
6011
844
      }
6012
6013
40.1k
      type = xmlParseAttributeType(ctxt, &tree);
6014
40.1k
      if (type <= 0) {
6015
1.35k
          break;
6016
1.35k
      }
6017
6018
38.8k
      GROW;
6019
38.8k
      if (SKIP_BLANKS_PE == 0) {
6020
1.61k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6021
1.61k
             "Space required after the attribute type\n");
6022
1.61k
          if (tree != NULL)
6023
237
        xmlFreeEnumeration(tree);
6024
1.61k
    break;
6025
1.61k
      }
6026
6027
37.1k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6028
37.1k
      if (def <= 0) {
6029
0
                if (defaultValue != NULL)
6030
0
        xmlFree(defaultValue);
6031
0
          if (tree != NULL)
6032
0
        xmlFreeEnumeration(tree);
6033
0
          break;
6034
0
      }
6035
37.1k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6036
29.9k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6037
6038
37.1k
      GROW;
6039
37.1k
            if (RAW != '>') {
6040
29.7k
    if (SKIP_BLANKS_PE == 0) {
6041
2.56k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6042
2.56k
      "Space required after the attribute default value\n");
6043
2.56k
        if (defaultValue != NULL)
6044
569
      xmlFree(defaultValue);
6045
2.56k
        if (tree != NULL)
6046
420
      xmlFreeEnumeration(tree);
6047
2.56k
        break;
6048
2.56k
    }
6049
29.7k
      }
6050
34.6k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6051
34.6k
    (ctxt->sax->attributeDecl != NULL))
6052
23.9k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6053
23.9k
                          type, def, defaultValue, tree);
6054
10.6k
      else if (tree != NULL)
6055
980
    xmlFreeEnumeration(tree);
6056
6057
34.6k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6058
34.6k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6059
34.6k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6060
31.3k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6061
31.3k
      }
6062
34.6k
      if (ctxt->sax2) {
6063
34.6k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6064
34.6k
      }
6065
34.6k
      if (defaultValue != NULL)
6066
31.3k
          xmlFree(defaultValue);
6067
34.6k
      GROW;
6068
34.6k
  }
6069
14.9k
  if (RAW == '>') {
6070
8.49k
#ifdef LIBXML_VALID_ENABLED
6071
8.49k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6072
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6073
0
                                 "Attribute list declaration doesn't start and"
6074
0
                                 " stop in the same entity\n",
6075
0
                                 NULL, NULL);
6076
0
      }
6077
8.49k
#endif
6078
8.49k
      NEXT;
6079
8.49k
  }
6080
14.9k
    }
6081
16.8k
}
6082
6083
/**
6084
 * Handle PEs and check that we don't pop the entity that started
6085
 * a balanced group.
6086
 *
6087
 * @param ctxt  parser context
6088
 * @param openInputNr  input nr of the entity with opening '('
6089
 */
6090
static void
6091
149k
xmlSkipBlankCharsPEBalanced(xmlParserCtxt *ctxt, int openInputNr) {
6092
149k
    SKIP_BLANKS;
6093
149k
    GROW;
6094
6095
149k
    (void) openInputNr;
6096
6097
149k
    if (!PARSER_EXTERNAL(ctxt) && !PARSER_IN_PE(ctxt))
6098
87.6k
        return;
6099
6100
67.6k
    while (!PARSER_STOPPED(ctxt)) {
6101
67.6k
        if (ctxt->input->cur >= ctxt->input->end) {
6102
2.64k
#ifdef LIBXML_VALID_ENABLED
6103
2.64k
            if ((ctxt->validate) && (ctxt->inputNr <= openInputNr)) {
6104
0
                xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6105
0
                                 "Element content declaration doesn't start "
6106
0
                                 "and stop in the same entity\n",
6107
0
                                 NULL, NULL);
6108
0
            }
6109
2.64k
#endif
6110
2.64k
            if (PARSER_IN_PE(ctxt))
6111
2.60k
                xmlPopPE(ctxt);
6112
34
            else
6113
34
                break;
6114
65.0k
        } else if (RAW == '%') {
6115
2.90k
            xmlParsePERefInternal(ctxt, 0);
6116
62.0k
        } else {
6117
62.0k
            break;
6118
62.0k
        }
6119
6120
5.51k
        SKIP_BLANKS;
6121
5.51k
        GROW;
6122
5.51k
    }
6123
62.1k
}
6124
6125
/**
6126
 * Parse the declaration for a Mixed Element content
6127
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6128
 *
6129
 * @deprecated Internal function, don't use.
6130
 *
6131
 *     [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6132
 *                    '(' S? '#PCDATA' S? ')'
6133
 *
6134
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6135
 *
6136
 * [ VC: No Duplicate Types ]
6137
 * The same name must not appear more than once in a single
6138
 * mixed-content declaration.
6139
 *
6140
 * @param ctxt  an XML parser context
6141
 * @param openInputNr  the input used for the current entity, needed for
6142
 * boundary checks
6143
 * @returns the list of the xmlElementContent describing the element choices
6144
 */
6145
xmlElementContent *
6146
3.78k
xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int openInputNr) {
6147
3.78k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6148
3.78k
    const xmlChar *elem = NULL;
6149
6150
3.78k
    GROW;
6151
3.78k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6152
3.78k
  SKIP(7);
6153
3.78k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6154
3.78k
  if (RAW == ')') {
6155
1.19k
#ifdef LIBXML_VALID_ENABLED
6156
1.19k
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6157
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6158
0
                                 "Element content declaration doesn't start "
6159
0
                                 "and stop in the same entity\n",
6160
0
                                 NULL, NULL);
6161
0
      }
6162
1.19k
#endif
6163
1.19k
      NEXT;
6164
1.19k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6165
1.19k
      if (ret == NULL)
6166
0
                goto mem_error;
6167
1.19k
      if (RAW == '*') {
6168
529
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6169
529
    NEXT;
6170
529
      }
6171
1.19k
      return(ret);
6172
1.19k
  }
6173
2.59k
  if ((RAW == '(') || (RAW == '|')) {
6174
2.03k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6175
2.03k
      if (ret == NULL)
6176
0
                goto mem_error;
6177
2.03k
  }
6178
4.89k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6179
2.61k
      NEXT;
6180
2.61k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6181
2.61k
            if (n == NULL)
6182
0
                goto mem_error;
6183
2.61k
      if (elem == NULL) {
6184
2.03k
    n->c1 = cur;
6185
2.03k
    if (cur != NULL)
6186
2.03k
        cur->parent = n;
6187
2.03k
    ret = cur = n;
6188
2.03k
      } else {
6189
578
          cur->c2 = n;
6190
578
    n->parent = cur;
6191
578
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6192
578
                if (n->c1 == NULL)
6193
0
                    goto mem_error;
6194
578
    n->c1->parent = n;
6195
578
    cur = n;
6196
578
      }
6197
2.61k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6198
2.61k
      elem = xmlParseName(ctxt);
6199
2.61k
      if (elem == NULL) {
6200
304
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6201
304
      "xmlParseElementMixedContentDecl : Name expected\n");
6202
304
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6203
304
    return(NULL);
6204
304
      }
6205
2.30k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6206
2.30k
  }
6207
2.28k
  if ((RAW == ')') && (NXT(1) == '*')) {
6208
1.70k
      if (elem != NULL) {
6209
1.70k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6210
1.70k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6211
1.70k
    if (cur->c2 == NULL)
6212
0
                    goto mem_error;
6213
1.70k
    cur->c2->parent = cur;
6214
1.70k
            }
6215
1.70k
            if (ret != NULL)
6216
1.70k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6217
1.70k
#ifdef LIBXML_VALID_ENABLED
6218
1.70k
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6219
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6220
0
                                 "Element content declaration doesn't start "
6221
0
                                 "and stop in the same entity\n",
6222
0
                                 NULL, NULL);
6223
0
      }
6224
1.70k
#endif
6225
1.70k
      SKIP(2);
6226
1.70k
  } else {
6227
582
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6228
582
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6229
582
      return(NULL);
6230
582
  }
6231
6232
2.28k
    } else {
6233
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6234
0
    }
6235
1.70k
    return(ret);
6236
6237
0
mem_error:
6238
0
    xmlErrMemory(ctxt);
6239
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6240
0
    return(NULL);
6241
3.78k
}
6242
6243
/**
6244
 * Parse the declaration for a Mixed Element content
6245
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6246
 *
6247
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6248
 *
6249
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6250
 *
6251
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6252
 *
6253
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6254
 *
6255
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6256
 * TODO Parameter-entity replacement text must be properly nested
6257
 *  with parenthesized groups. That is to say, if either of the
6258
 *  opening or closing parentheses in a choice, seq, or Mixed
6259
 *  construct is contained in the replacement text for a parameter
6260
 *  entity, both must be contained in the same replacement text. For
6261
 *  interoperability, if a parameter-entity reference appears in a
6262
 *  choice, seq, or Mixed construct, its replacement text should not
6263
 *  be empty, and neither the first nor last non-blank character of
6264
 *  the replacement text should be a connector (| or ,).
6265
 *
6266
 * @param ctxt  an XML parser context
6267
 * @param openInputNr  the input used for the current entity, needed for
6268
 * boundary checks
6269
 * @param depth  the level of recursion
6270
 * @returns the tree of xmlElementContent describing the element
6271
 *          hierarchy.
6272
 */
6273
static xmlElementContentPtr
6274
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int openInputNr,
6275
30.6k
                                       int depth) {
6276
30.6k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6277
30.6k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6278
30.6k
    const xmlChar *elem;
6279
30.6k
    xmlChar type = 0;
6280
6281
30.6k
    if (depth > maxDepth) {
6282
3
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6283
3
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6284
3
                "use XML_PARSE_HUGE\n", depth);
6285
3
  return(NULL);
6286
3
    }
6287
30.6k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6288
30.6k
    if (RAW == '(') {
6289
10.1k
        int newInputNr = ctxt->inputNr;
6290
6291
        /* Recurse on first child */
6292
10.1k
  NEXT;
6293
10.1k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6294
10.1k
                                                           depth + 1);
6295
10.1k
        if (cur == NULL)
6296
6.38k
            return(NULL);
6297
20.5k
    } else {
6298
20.5k
  elem = xmlParseName(ctxt);
6299
20.5k
  if (elem == NULL) {
6300
350
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6301
350
      return(NULL);
6302
350
  }
6303
20.1k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6304
20.1k
  if (cur == NULL) {
6305
0
      xmlErrMemory(ctxt);
6306
0
      return(NULL);
6307
0
  }
6308
20.1k
  GROW;
6309
20.1k
  if (RAW == '?') {
6310
1.44k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6311
1.44k
      NEXT;
6312
18.7k
  } else if (RAW == '*') {
6313
609
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6314
609
      NEXT;
6315
18.1k
  } else if (RAW == '+') {
6316
689
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6317
689
      NEXT;
6318
17.4k
  } else {
6319
17.4k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6320
17.4k
  }
6321
20.1k
  GROW;
6322
20.1k
    }
6323
56.2k
    while (!PARSER_STOPPED(ctxt)) {
6324
56.2k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6325
56.2k
        if (RAW == ')')
6326
16.3k
            break;
6327
        /*
6328
   * Each loop we parse one separator and one element.
6329
   */
6330
39.9k
        if (RAW == ',') {
6331
2.69k
      if (type == 0) type = CUR;
6332
6333
      /*
6334
       * Detect "Name | Name , Name" error
6335
       */
6336
1.68k
      else if (type != CUR) {
6337
3
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6338
3
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6339
3
                      type);
6340
3
    if ((last != NULL) && (last != ret))
6341
3
        xmlFreeDocElementContent(ctxt->myDoc, last);
6342
3
    if (ret != NULL)
6343
3
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6344
3
    return(NULL);
6345
3
      }
6346
2.69k
      NEXT;
6347
6348
2.69k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6349
2.69k
      if (op == NULL) {
6350
0
                xmlErrMemory(ctxt);
6351
0
    if ((last != NULL) && (last != ret))
6352
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6353
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6354
0
    return(NULL);
6355
0
      }
6356
2.69k
      if (last == NULL) {
6357
1.01k
    op->c1 = ret;
6358
1.01k
    if (ret != NULL)
6359
1.01k
        ret->parent = op;
6360
1.01k
    ret = cur = op;
6361
1.68k
      } else {
6362
1.68k
          cur->c2 = op;
6363
1.68k
    if (op != NULL)
6364
1.68k
        op->parent = cur;
6365
1.68k
    op->c1 = last;
6366
1.68k
    if (last != NULL)
6367
1.68k
        last->parent = op;
6368
1.68k
    cur =op;
6369
1.68k
    last = NULL;
6370
1.68k
      }
6371
37.2k
  } else if (RAW == '|') {
6372
32.2k
      if (type == 0) type = CUR;
6373
6374
      /*
6375
       * Detect "Name , Name | Name" error
6376
       */
6377
19.2k
      else if (type != CUR) {
6378
3
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6379
3
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6380
3
          type);
6381
3
    if ((last != NULL) && (last != ret))
6382
3
        xmlFreeDocElementContent(ctxt->myDoc, last);
6383
3
    if (ret != NULL)
6384
3
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6385
3
    return(NULL);
6386
3
      }
6387
32.2k
      NEXT;
6388
6389
32.2k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6390
32.2k
      if (op == NULL) {
6391
0
                xmlErrMemory(ctxt);
6392
0
    if ((last != NULL) && (last != ret))
6393
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6394
0
    if (ret != NULL)
6395
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6396
0
    return(NULL);
6397
0
      }
6398
32.2k
      if (last == NULL) {
6399
13.0k
    op->c1 = ret;
6400
13.0k
    if (ret != NULL)
6401
13.0k
        ret->parent = op;
6402
13.0k
    ret = cur = op;
6403
19.2k
      } else {
6404
19.2k
          cur->c2 = op;
6405
19.2k
    if (op != NULL)
6406
19.2k
        op->parent = cur;
6407
19.2k
    op->c1 = last;
6408
19.2k
    if (last != NULL)
6409
19.2k
        last->parent = op;
6410
19.2k
    cur =op;
6411
19.2k
    last = NULL;
6412
19.2k
      }
6413
32.2k
  } else {
6414
4.98k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6415
4.98k
      if ((last != NULL) && (last != ret))
6416
2.40k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6417
4.98k
      if (ret != NULL)
6418
4.98k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
4.98k
      return(NULL);
6420
4.98k
  }
6421
34.9k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6422
34.9k
        if (RAW == '(') {
6423
5.04k
            int newInputNr = ctxt->inputNr;
6424
6425
      /* Recurse on second child */
6426
5.04k
      NEXT;
6427
5.04k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6428
5.04k
                                                          depth + 1);
6429
5.04k
            if (last == NULL) {
6430
1.95k
    if (ret != NULL)
6431
1.95k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
1.95k
    return(NULL);
6433
1.95k
            }
6434
29.8k
  } else {
6435
29.8k
      elem = xmlParseName(ctxt);
6436
29.8k
      if (elem == NULL) {
6437
673
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6438
673
    if (ret != NULL)
6439
673
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6440
673
    return(NULL);
6441
673
      }
6442
29.2k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6443
29.2k
      if (last == NULL) {
6444
0
                xmlErrMemory(ctxt);
6445
0
    if (ret != NULL)
6446
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6447
0
    return(NULL);
6448
0
      }
6449
29.2k
      if (RAW == '?') {
6450
2.91k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6451
2.91k
    NEXT;
6452
26.3k
      } else if (RAW == '*') {
6453
859
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6454
859
    NEXT;
6455
25.4k
      } else if (RAW == '+') {
6456
680
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6457
680
    NEXT;
6458
24.7k
      } else {
6459
24.7k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6460
24.7k
      }
6461
29.2k
  }
6462
34.9k
    }
6463
16.3k
    if ((cur != NULL) && (last != NULL)) {
6464
8.98k
        cur->c2 = last;
6465
8.98k
  if (last != NULL)
6466
8.98k
      last->parent = cur;
6467
8.98k
    }
6468
16.3k
#ifdef LIBXML_VALID_ENABLED
6469
16.3k
    if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6470
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6471
0
                         "Element content declaration doesn't start "
6472
0
                         "and stop in the same entity\n",
6473
0
                         NULL, NULL);
6474
0
    }
6475
16.3k
#endif
6476
16.3k
    NEXT;
6477
16.3k
    if (RAW == '?') {
6478
2.46k
  if (ret != NULL) {
6479
2.46k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6480
2.46k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6481
1.14k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6482
1.32k
      else
6483
1.32k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6484
2.46k
  }
6485
2.46k
  NEXT;
6486
13.8k
    } else if (RAW == '*') {
6487
6.60k
  if (ret != NULL) {
6488
6.60k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6489
6.60k
      cur = ret;
6490
      /*
6491
       * Some normalization:
6492
       * (a | b* | c?)* == (a | b | c)*
6493
       */
6494
18.7k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6495
12.1k
    if ((cur->c1 != NULL) &&
6496
12.1k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6497
12.1k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6498
2.15k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6499
12.1k
    if ((cur->c2 != NULL) &&
6500
12.1k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6501
12.1k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6502
1.57k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6503
12.1k
    cur = cur->c2;
6504
12.1k
      }
6505
6.60k
  }
6506
6.60k
  NEXT;
6507
7.23k
    } else if (RAW == '+') {
6508
4.07k
  if (ret != NULL) {
6509
4.07k
      int found = 0;
6510
6511
4.07k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6512
4.07k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6513
973
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6514
3.10k
      else
6515
3.10k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6516
      /*
6517
       * Some normalization:
6518
       * (a | b*)+ == (a | b)*
6519
       * (a | b?)+ == (a | b)*
6520
       */
6521
14.4k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6522
10.3k
    if ((cur->c1 != NULL) &&
6523
10.3k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6524
10.3k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6525
1.40k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6526
1.40k
        found = 1;
6527
1.40k
    }
6528
10.3k
    if ((cur->c2 != NULL) &&
6529
10.3k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6530
10.3k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6531
2.14k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6532
2.14k
        found = 1;
6533
2.14k
    }
6534
10.3k
    cur = cur->c2;
6535
10.3k
      }
6536
4.07k
      if (found)
6537
2.50k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6538
4.07k
  }
6539
4.07k
  NEXT;
6540
4.07k
    }
6541
16.3k
    return(ret);
6542
23.9k
}
6543
6544
/**
6545
 * Parse the declaration for a Mixed Element content
6546
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6547
 *
6548
 * @deprecated Internal function, don't use.
6549
 *
6550
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6551
 *
6552
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6553
 *
6554
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6555
 *
6556
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6557
 *
6558
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6559
 * TODO Parameter-entity replacement text must be properly nested
6560
 *  with parenthesized groups. That is to say, if either of the
6561
 *  opening or closing parentheses in a choice, seq, or Mixed
6562
 *  construct is contained in the replacement text for a parameter
6563
 *  entity, both must be contained in the same replacement text. For
6564
 *  interoperability, if a parameter-entity reference appears in a
6565
 *  choice, seq, or Mixed construct, its replacement text should not
6566
 *  be empty, and neither the first nor last non-blank character of
6567
 *  the replacement text should be a connector (| or ,).
6568
 *
6569
 * @param ctxt  an XML parser context
6570
 * @param inputchk  the input used for the current entity, needed for boundary checks
6571
 * @returns the tree of xmlElementContent describing the element
6572
 *          hierarchy.
6573
 */
6574
xmlElementContent *
6575
0
xmlParseElementChildrenContentDecl(xmlParserCtxt *ctxt, int inputchk) {
6576
    /* stub left for API/ABI compat */
6577
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6578
0
}
6579
6580
/**
6581
 * Parse the declaration for an Element content either Mixed or Children,
6582
 * the cases EMPTY and ANY are handled directly in #xmlParseElementDecl
6583
 *
6584
 * @deprecated Internal function, don't use.
6585
 *
6586
 *     [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6587
 *
6588
 * @param ctxt  an XML parser context
6589
 * @param name  the name of the element being defined.
6590
 * @param result  the Element Content pointer will be stored here if any
6591
 * @returns an xmlElementTypeVal value or -1 on error
6592
 */
6593
6594
int
6595
xmlParseElementContentDecl(xmlParserCtxt *ctxt, const xmlChar *name,
6596
19.2k
                           xmlElementContent **result) {
6597
6598
19.2k
    xmlElementContentPtr tree = NULL;
6599
19.2k
    int openInputNr = ctxt->inputNr;
6600
19.2k
    int res;
6601
6602
19.2k
    *result = NULL;
6603
6604
19.2k
    if (RAW != '(') {
6605
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6606
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6607
0
  return(-1);
6608
0
    }
6609
19.2k
    NEXT;
6610
19.2k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6611
19.2k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6612
3.78k
        tree = xmlParseElementMixedContentDecl(ctxt, openInputNr);
6613
3.78k
  res = XML_ELEMENT_TYPE_MIXED;
6614
15.5k
    } else {
6615
15.5k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, openInputNr, 1);
6616
15.5k
  res = XML_ELEMENT_TYPE_ELEMENT;
6617
15.5k
    }
6618
19.2k
    if (tree == NULL)
6619
6.90k
        return(-1);
6620
12.3k
    SKIP_BLANKS_PE;
6621
12.3k
    *result = tree;
6622
12.3k
    return(res);
6623
19.2k
}
6624
6625
/**
6626
 * Parse an element declaration. Always consumes '<!'.
6627
 *
6628
 * @deprecated Internal function, don't use.
6629
 *
6630
 *     [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6631
 *
6632
 * [ VC: Unique Element Type Declaration ]
6633
 * No element type may be declared more than once
6634
 *
6635
 * @param ctxt  an XML parser context
6636
 * @returns the type of the element, or -1 in case of error
6637
 */
6638
int
6639
23.2k
xmlParseElementDecl(xmlParserCtxt *ctxt) {
6640
23.2k
    const xmlChar *name;
6641
23.2k
    int ret = -1;
6642
23.2k
    xmlElementContentPtr content  = NULL;
6643
6644
23.2k
    if ((CUR != '<') || (NXT(1) != '!'))
6645
0
        return(ret);
6646
23.2k
    SKIP(2);
6647
6648
    /* GROW; done in the caller */
6649
23.2k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6650
23.2k
#ifdef LIBXML_VALID_ENABLED
6651
23.2k
  int oldInputNr = ctxt->inputNr;
6652
23.2k
#endif
6653
6654
23.2k
  SKIP(7);
6655
23.2k
  if (SKIP_BLANKS_PE == 0) {
6656
1.28k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6657
1.28k
               "Space required after 'ELEMENT'\n");
6658
1.28k
      return(-1);
6659
1.28k
  }
6660
21.9k
        name = xmlParseName(ctxt);
6661
21.9k
  if (name == NULL) {
6662
543
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6663
543
         "xmlParseElementDecl: no name for Element\n");
6664
543
      return(-1);
6665
543
  }
6666
21.4k
  if (SKIP_BLANKS_PE == 0) {
6667
3.18k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6668
3.18k
         "Space required after the element name\n");
6669
3.18k
  }
6670
21.4k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6671
678
      SKIP(5);
6672
      /*
6673
       * Element must always be empty.
6674
       */
6675
678
      ret = XML_ELEMENT_TYPE_EMPTY;
6676
20.7k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6677
20.7k
             (NXT(2) == 'Y')) {
6678
426
      SKIP(3);
6679
      /*
6680
       * Element is a generic container.
6681
       */
6682
426
      ret = XML_ELEMENT_TYPE_ANY;
6683
20.3k
  } else if (RAW == '(') {
6684
19.2k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6685
19.2k
            if (ret <= 0)
6686
6.90k
                return(-1);
6687
19.2k
  } else {
6688
      /*
6689
       * [ WFC: PEs in Internal Subset ] error handling.
6690
       */
6691
1.00k
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6692
1.00k
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6693
1.00k
      return(-1);
6694
1.00k
  }
6695
6696
13.4k
  SKIP_BLANKS_PE;
6697
6698
13.4k
  if (RAW != '>') {
6699
917
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6700
917
      if (content != NULL) {
6701
490
    xmlFreeDocElementContent(ctxt->myDoc, content);
6702
490
      }
6703
12.5k
  } else {
6704
12.5k
#ifdef LIBXML_VALID_ENABLED
6705
12.5k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6706
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6707
0
                                 "Element declaration doesn't start and stop in"
6708
0
                                 " the same entity\n",
6709
0
                                 NULL, NULL);
6710
0
      }
6711
12.5k
#endif
6712
6713
12.5k
      NEXT;
6714
12.5k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6715
12.5k
    (ctxt->sax->elementDecl != NULL)) {
6716
6.35k
    if (content != NULL)
6717
5.82k
        content->parent = NULL;
6718
6.35k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6719
6.35k
                           content);
6720
6.35k
    if ((content != NULL) && (content->parent == NULL)) {
6721
        /*
6722
         * this is a trick: if xmlAddElementDecl is called,
6723
         * instead of copying the full tree it is plugged directly
6724
         * if called from the parser. Avoid duplicating the
6725
         * interfaces or change the API/ABI
6726
         */
6727
5.33k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6728
5.33k
    }
6729
6.35k
      } else if (content != NULL) {
6730
6.08k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6731
6.08k
      }
6732
12.5k
  }
6733
13.4k
    }
6734
13.5k
    return(ret);
6735
23.2k
}
6736
6737
/**
6738
 * Parse a conditional section. Always consumes '<!['.
6739
 *
6740
 *     [61] conditionalSect ::= includeSect | ignoreSect
6741
 *     [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6742
 *     [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6743
 *     [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>'
6744
 *                                 Ignore)*
6745
 *     [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6746
 * @param ctxt  an XML parser context
6747
 */
6748
6749
static void
6750
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6751
0
    size_t depth = 0;
6752
0
    int isFreshPE = 0;
6753
0
    int oldInputNr = ctxt->inputNr;
6754
0
    int declInputNr = ctxt->inputNr;
6755
6756
0
    while (!PARSER_STOPPED(ctxt)) {
6757
0
        if (ctxt->input->cur >= ctxt->input->end) {
6758
0
            if (ctxt->inputNr <= oldInputNr) {
6759
0
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6760
0
                return;
6761
0
            }
6762
6763
0
            xmlPopPE(ctxt);
6764
0
            declInputNr = ctxt->inputNr;
6765
0
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6766
0
            SKIP(3);
6767
0
            SKIP_BLANKS_PE;
6768
6769
0
            isFreshPE = 0;
6770
6771
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6772
0
                SKIP(7);
6773
0
                SKIP_BLANKS_PE;
6774
0
                if (RAW != '[') {
6775
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6776
0
                    return;
6777
0
                }
6778
0
#ifdef LIBXML_VALID_ENABLED
6779
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6780
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6781
0
                                     "All markup of the conditional section is"
6782
0
                                     " not in the same entity\n",
6783
0
                                     NULL, NULL);
6784
0
                }
6785
0
#endif
6786
0
                NEXT;
6787
6788
0
                depth++;
6789
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6790
0
                size_t ignoreDepth = 0;
6791
6792
0
                SKIP(6);
6793
0
                SKIP_BLANKS_PE;
6794
0
                if (RAW != '[') {
6795
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6796
0
                    return;
6797
0
                }
6798
0
#ifdef LIBXML_VALID_ENABLED
6799
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6800
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6801
0
                                     "All markup of the conditional section is"
6802
0
                                     " not in the same entity\n",
6803
0
                                     NULL, NULL);
6804
0
                }
6805
0
#endif
6806
0
                NEXT;
6807
6808
0
                while (PARSER_STOPPED(ctxt) == 0) {
6809
0
                    if (RAW == 0) {
6810
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6811
0
                        return;
6812
0
                    }
6813
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6814
0
                        SKIP(3);
6815
0
                        ignoreDepth++;
6816
                        /* Check for integer overflow */
6817
0
                        if (ignoreDepth == 0) {
6818
0
                            xmlErrMemory(ctxt);
6819
0
                            return;
6820
0
                        }
6821
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6822
0
                               (NXT(2) == '>')) {
6823
0
                        SKIP(3);
6824
0
                        if (ignoreDepth == 0)
6825
0
                            break;
6826
0
                        ignoreDepth--;
6827
0
                    } else {
6828
0
                        NEXT;
6829
0
                    }
6830
0
                }
6831
6832
0
#ifdef LIBXML_VALID_ENABLED
6833
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6834
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6835
0
                                     "All markup of the conditional section is"
6836
0
                                     " not in the same entity\n",
6837
0
                                     NULL, NULL);
6838
0
                }
6839
0
#endif
6840
0
            } else {
6841
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6842
0
                return;
6843
0
            }
6844
0
        } else if ((depth > 0) &&
6845
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6846
0
            if (isFreshPE) {
6847
0
                xmlFatalErrMsg(ctxt, XML_ERR_CONDSEC_INVALID,
6848
0
                               "Parameter entity must match "
6849
0
                               "extSubsetDecl\n");
6850
0
                return;
6851
0
            }
6852
6853
0
            depth--;
6854
0
#ifdef LIBXML_VALID_ENABLED
6855
0
            if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6856
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
0
                                 "All markup of the conditional section is not"
6858
0
                                 " in the same entity\n",
6859
0
                                 NULL, NULL);
6860
0
            }
6861
0
#endif
6862
0
            SKIP(3);
6863
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6864
0
            isFreshPE = 0;
6865
0
            xmlParseMarkupDecl(ctxt);
6866
0
        } else if (RAW == '%') {
6867
0
            xmlParsePERefInternal(ctxt, 1);
6868
0
            if (ctxt->inputNr > declInputNr) {
6869
0
                isFreshPE = 1;
6870
0
                declInputNr = ctxt->inputNr;
6871
0
            }
6872
0
        } else {
6873
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6874
0
            return;
6875
0
        }
6876
6877
0
        if (depth == 0)
6878
0
            break;
6879
6880
0
        SKIP_BLANKS;
6881
0
        SHRINK;
6882
0
        GROW;
6883
0
    }
6884
0
}
6885
6886
/**
6887
 * Parse markup declarations. Always consumes '<!' or '<?'.
6888
 *
6889
 * @deprecated Internal function, don't use.
6890
 *
6891
 *     [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6892
 *                         NotationDecl | PI | Comment
6893
 *
6894
 * [ VC: Proper Declaration/PE Nesting ]
6895
 * Parameter-entity replacement text must be properly nested with
6896
 * markup declarations. That is to say, if either the first character
6897
 * or the last character of a markup declaration (markupdecl above) is
6898
 * contained in the replacement text for a parameter-entity reference,
6899
 * both must be contained in the same replacement text.
6900
 *
6901
 * [ WFC: PEs in Internal Subset ]
6902
 * In the internal DTD subset, parameter-entity references can occur
6903
 * only where markup declarations can occur, not within markup declarations.
6904
 * (This does not apply to references that occur in external parameter
6905
 * entities or to the external subset.)
6906
 *
6907
 * @param ctxt  an XML parser context
6908
 */
6909
void
6910
173k
xmlParseMarkupDecl(xmlParserCtxt *ctxt) {
6911
173k
    GROW;
6912
173k
    if (CUR == '<') {
6913
173k
        if (NXT(1) == '!') {
6914
153k
      switch (NXT(2)) {
6915
122k
          case 'E':
6916
122k
        if (NXT(3) == 'L')
6917
23.2k
      xmlParseElementDecl(ctxt);
6918
99.5k
        else if (NXT(3) == 'N')
6919
99.4k
      xmlParseEntityDecl(ctxt);
6920
76
                    else
6921
76
                        SKIP(2);
6922
122k
        break;
6923
16.8k
          case 'A':
6924
16.8k
        xmlParseAttributeListDecl(ctxt);
6925
16.8k
        break;
6926
4.66k
          case 'N':
6927
4.66k
        xmlParseNotationDecl(ctxt);
6928
4.66k
        break;
6929
6.85k
          case '-':
6930
6.85k
        xmlParseComment(ctxt);
6931
6.85k
        break;
6932
2.50k
    default:
6933
2.50k
                    xmlFatalErr(ctxt,
6934
2.50k
                                ctxt->inSubset == 2 ?
6935
0
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
6936
2.50k
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
6937
2.50k
                                NULL);
6938
2.50k
                    SKIP(2);
6939
2.50k
        break;
6940
153k
      }
6941
153k
  } else if (NXT(1) == '?') {
6942
19.8k
      xmlParsePI(ctxt);
6943
19.8k
  }
6944
173k
    }
6945
173k
}
6946
6947
/**
6948
 * Parse an XML declaration header for external entities
6949
 *
6950
 * @deprecated Internal function, don't use.
6951
 *
6952
 *     [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6953
 * @param ctxt  an XML parser context
6954
 */
6955
6956
void
6957
0
xmlParseTextDecl(xmlParserCtxt *ctxt) {
6958
0
    xmlChar *version;
6959
6960
    /*
6961
     * We know that '<?xml' is here.
6962
     */
6963
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6964
0
  SKIP(5);
6965
0
    } else {
6966
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6967
0
  return;
6968
0
    }
6969
6970
0
    if (SKIP_BLANKS == 0) {
6971
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6972
0
           "Space needed after '<?xml'\n");
6973
0
    }
6974
6975
    /*
6976
     * We may have the VersionInfo here.
6977
     */
6978
0
    version = xmlParseVersionInfo(ctxt);
6979
0
    if (version == NULL) {
6980
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6981
0
        if (version == NULL) {
6982
0
            xmlErrMemory(ctxt);
6983
0
            return;
6984
0
        }
6985
0
    } else {
6986
0
  if (SKIP_BLANKS == 0) {
6987
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6988
0
               "Space needed here\n");
6989
0
  }
6990
0
    }
6991
0
    ctxt->input->version = version;
6992
6993
    /*
6994
     * We must have the encoding declaration
6995
     */
6996
0
    xmlParseEncodingDecl(ctxt);
6997
6998
0
    SKIP_BLANKS;
6999
0
    if ((RAW == '?') && (NXT(1) == '>')) {
7000
0
        SKIP(2);
7001
0
    } else if (RAW == '>') {
7002
        /* Deprecated old WD ... */
7003
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7004
0
  NEXT;
7005
0
    } else {
7006
0
        int c;
7007
7008
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7009
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7010
0
            NEXT;
7011
0
            if (c == '>')
7012
0
                break;
7013
0
        }
7014
0
    }
7015
0
}
7016
7017
/**
7018
 * Parse Markup declarations from an external subset
7019
 *
7020
 * @deprecated Internal function, don't use.
7021
 *
7022
 *     [30] extSubset ::= textDecl? extSubsetDecl
7023
 *
7024
 *     [31] extSubsetDecl ::= (markupdecl | conditionalSect |
7025
 *                             PEReference | S) *
7026
 * @param ctxt  an XML parser context
7027
 * @param publicId  the public identifier
7028
 * @param systemId  the system identifier (URL)
7029
 */
7030
void
7031
xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
7032
0
                       const xmlChar *systemId) {
7033
0
    int oldInputNr;
7034
7035
0
    xmlCtxtInitializeLate(ctxt);
7036
7037
0
    xmlDetectEncoding(ctxt);
7038
7039
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7040
0
  xmlParseTextDecl(ctxt);
7041
0
    }
7042
0
    if (ctxt->myDoc == NULL) {
7043
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7044
0
  if (ctxt->myDoc == NULL) {
7045
0
      xmlErrMemory(ctxt);
7046
0
      return;
7047
0
  }
7048
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7049
0
    }
7050
0
    if ((ctxt->myDoc->intSubset == NULL) &&
7051
0
        (xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) {
7052
0
        xmlErrMemory(ctxt);
7053
0
    }
7054
7055
0
    ctxt->inSubset = 2;
7056
0
    oldInputNr = ctxt->inputNr;
7057
7058
0
    SKIP_BLANKS;
7059
0
    while (!PARSER_STOPPED(ctxt)) {
7060
0
        if (ctxt->input->cur >= ctxt->input->end) {
7061
0
            if (ctxt->inputNr <= oldInputNr) {
7062
0
                xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7063
0
                break;
7064
0
            }
7065
7066
0
            xmlPopPE(ctxt);
7067
0
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7068
0
            xmlParseConditionalSections(ctxt);
7069
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7070
0
            xmlParseMarkupDecl(ctxt);
7071
0
        } else if (RAW == '%') {
7072
0
            xmlParsePERefInternal(ctxt, 1);
7073
0
        } else {
7074
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7075
7076
0
            while (ctxt->inputNr > oldInputNr)
7077
0
                xmlPopPE(ctxt);
7078
0
            break;
7079
0
        }
7080
0
        SKIP_BLANKS;
7081
0
        SHRINK;
7082
0
        GROW;
7083
0
    }
7084
0
}
7085
7086
/**
7087
 * Parse and handle entity references in content, depending on the SAX
7088
 * interface, this may end-up in a call to character() if this is a
7089
 * CharRef, a predefined entity, if there is no reference() callback.
7090
 * or if the parser was asked to switch to that mode.
7091
 *
7092
 * @deprecated Internal function, don't use.
7093
 *
7094
 * Always consumes '&'.
7095
 *
7096
 *     [67] Reference ::= EntityRef | CharRef
7097
 * @param ctxt  an XML parser context
7098
 */
7099
void
7100
656k
xmlParseReference(xmlParserCtxt *ctxt) {
7101
656k
    xmlEntityPtr ent = NULL;
7102
656k
    const xmlChar *name;
7103
656k
    xmlChar *val;
7104
7105
656k
    if (RAW != '&')
7106
0
        return;
7107
7108
    /*
7109
     * Simple case of a CharRef
7110
     */
7111
656k
    if (NXT(1) == '#') {
7112
73.5k
  int i = 0;
7113
73.5k
  xmlChar out[16];
7114
73.5k
  int value = xmlParseCharRef(ctxt);
7115
7116
73.5k
  if (value == 0)
7117
40.2k
      return;
7118
7119
        /*
7120
         * Just encode the value in UTF-8
7121
         */
7122
33.2k
        COPY_BUF(out, i, value);
7123
33.2k
        out[i] = 0;
7124
33.2k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7125
33.2k
            (!ctxt->disableSAX))
7126
4.00k
            ctxt->sax->characters(ctxt->userData, out, i);
7127
33.2k
  return;
7128
73.5k
    }
7129
7130
    /*
7131
     * We are seeing an entity reference
7132
     */
7133
582k
    name = xmlParseEntityRefInternal(ctxt);
7134
582k
    if (name == NULL)
7135
439k
        return;
7136
142k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7137
142k
    if (ent == NULL) {
7138
        /*
7139
         * Create a reference for undeclared entities.
7140
         */
7141
50.0k
        if ((ctxt->replaceEntities == 0) &&
7142
50.0k
            (ctxt->sax != NULL) &&
7143
50.0k
            (ctxt->disableSAX == 0) &&
7144
50.0k
            (ctxt->sax->reference != NULL)) {
7145
3.76k
            ctxt->sax->reference(ctxt->userData, name);
7146
3.76k
        }
7147
50.0k
        return;
7148
50.0k
    }
7149
92.5k
    if (!ctxt->wellFormed)
7150
79.1k
  return;
7151
7152
    /* special case of predefined entities */
7153
13.4k
    if ((ent->name == NULL) ||
7154
13.4k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7155
8.63k
  val = ent->content;
7156
8.63k
  if (val == NULL) return;
7157
  /*
7158
   * inline the entity.
7159
   */
7160
8.63k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7161
8.63k
      (!ctxt->disableSAX))
7162
8.63k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7163
8.63k
  return;
7164
8.63k
    }
7165
7166
    /*
7167
     * Some users try to parse entities on their own and used to set
7168
     * the renamed "checked" member. Fix the flags to cover this
7169
     * case.
7170
     */
7171
4.78k
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7172
0
        ent->flags |= XML_ENT_PARSED;
7173
7174
    /*
7175
     * The first reference to the entity trigger a parsing phase
7176
     * where the ent->children is filled with the result from
7177
     * the parsing.
7178
     * Note: external parsed entities will not be loaded, it is not
7179
     * required for a non-validating parser, unless the parsing option
7180
     * of validating, or substituting entities were given. Doing so is
7181
     * far more secure as the parser will only process data coming from
7182
     * the document entity by default.
7183
     *
7184
     * FIXME: This doesn't work correctly since entities can be
7185
     * expanded with different namespace declarations in scope.
7186
     * For example:
7187
     *
7188
     * <!DOCTYPE doc [
7189
     *   <!ENTITY ent "<ns:elem/>">
7190
     * ]>
7191
     * <doc>
7192
     *   <decl1 xmlns:ns="urn:ns1">
7193
     *     &ent;
7194
     *   </decl1>
7195
     *   <decl2 xmlns:ns="urn:ns2">
7196
     *     &ent;
7197
     *   </decl2>
7198
     * </doc>
7199
     *
7200
     * Proposed fix:
7201
     *
7202
     * - Ignore current namespace declarations when parsing the
7203
     *   entity. If a prefix can't be resolved, don't report an error
7204
     *   but mark it as unresolved.
7205
     * - Try to resolve these prefixes when expanding the entity.
7206
     *   This will require a specialized version of xmlStaticCopyNode
7207
     *   which can also make use of the namespace hash table to avoid
7208
     *   quadratic behavior.
7209
     *
7210
     * Alternatively, we could simply reparse the entity on each
7211
     * expansion like we already do with custom SAX callbacks.
7212
     * External entity content should be cached in this case.
7213
     */
7214
4.78k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7215
4.78k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7216
731
         ((ctxt->replaceEntities) ||
7217
4.05k
          (ctxt->validate)))) {
7218
4.05k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7219
842
            xmlCtxtParseEntity(ctxt, ent);
7220
3.21k
        } else if (ent->children == NULL) {
7221
            /*
7222
             * Probably running in SAX mode and the callbacks don't
7223
             * build the entity content. Parse the entity again.
7224
             *
7225
             * This will also be triggered in normal tree builder mode
7226
             * if an entity happens to be empty, causing unnecessary
7227
             * reloads. It's hard to come up with a reliable check in
7228
             * which mode we're running.
7229
             */
7230
663
            xmlCtxtParseEntity(ctxt, ent);
7231
663
        }
7232
4.05k
    }
7233
7234
    /*
7235
     * We also check for amplification if entities aren't substituted.
7236
     * They might be expanded later.
7237
     */
7238
4.78k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7239
27
        return;
7240
7241
4.75k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7242
458
        return;
7243
7244
4.30k
    if (ctxt->replaceEntities == 0) {
7245
  /*
7246
   * Create a reference
7247
   */
7248
4.30k
        if (ctxt->sax->reference != NULL)
7249
4.30k
      ctxt->sax->reference(ctxt->userData, ent->name);
7250
4.30k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7251
0
        xmlNodePtr copy, cur;
7252
7253
        /*
7254
         * Seems we are generating the DOM content, copy the tree
7255
   */
7256
0
        cur = ent->children;
7257
7258
        /*
7259
         * Handle first text node with SAX to coalesce text efficiently
7260
         */
7261
0
        if ((cur->type == XML_TEXT_NODE) ||
7262
0
            (cur->type == XML_CDATA_SECTION_NODE)) {
7263
0
            int len = xmlStrlen(cur->content);
7264
7265
0
            if ((cur->type == XML_TEXT_NODE) ||
7266
0
                (ctxt->options & XML_PARSE_NOCDATA)) {
7267
0
                if (ctxt->sax->characters != NULL)
7268
0
                    ctxt->sax->characters(ctxt, cur->content, len);
7269
0
            } else {
7270
0
                if (ctxt->sax->cdataBlock != NULL)
7271
0
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7272
0
            }
7273
7274
0
            cur = cur->next;
7275
0
        }
7276
7277
0
        while (cur != NULL) {
7278
0
            xmlNodePtr last;
7279
7280
            /*
7281
             * Handle last text node with SAX to coalesce text efficiently
7282
             */
7283
0
            if ((cur->next == NULL) &&
7284
0
                ((cur->type == XML_TEXT_NODE) ||
7285
0
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7286
0
                int len = xmlStrlen(cur->content);
7287
7288
0
                if ((cur->type == XML_TEXT_NODE) ||
7289
0
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7290
0
                    if (ctxt->sax->characters != NULL)
7291
0
                        ctxt->sax->characters(ctxt, cur->content, len);
7292
0
                } else {
7293
0
                    if (ctxt->sax->cdataBlock != NULL)
7294
0
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7295
0
                }
7296
7297
0
                break;
7298
0
            }
7299
7300
            /*
7301
             * Reset coalesce buffer stats only for non-text nodes.
7302
             */
7303
0
            ctxt->nodemem = 0;
7304
0
            ctxt->nodelen = 0;
7305
7306
0
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7307
7308
0
            if (copy == NULL) {
7309
0
                xmlErrMemory(ctxt);
7310
0
                break;
7311
0
            }
7312
7313
0
            if (ctxt->parseMode == XML_PARSE_READER) {
7314
                /* Needed for reader */
7315
0
                copy->extra = cur->extra;
7316
                /* Maybe needed for reader */
7317
0
                copy->_private = cur->_private;
7318
0
            }
7319
7320
0
            copy->parent = ctxt->node;
7321
0
            last = ctxt->node->last;
7322
0
            if (last == NULL) {
7323
0
                ctxt->node->children = copy;
7324
0
            } else {
7325
0
                last->next = copy;
7326
0
                copy->prev = last;
7327
0
            }
7328
0
            ctxt->node->last = copy;
7329
7330
0
            cur = cur->next;
7331
0
        }
7332
0
    }
7333
4.30k
}
7334
7335
static void
7336
222k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7337
    /*
7338
     * [ WFC: Entity Declared ]
7339
     * In a document without any DTD, a document with only an
7340
     * internal DTD subset which contains no parameter entity
7341
     * references, or a document with "standalone='yes'", the
7342
     * Name given in the entity reference must match that in an
7343
     * entity declaration, except that well-formed documents
7344
     * need not declare any of the following entities: amp, lt,
7345
     * gt, apos, quot.
7346
     * The declaration of a parameter entity must precede any
7347
     * reference to it.
7348
     * Similarly, the declaration of a general entity must
7349
     * precede any reference to it which appears in a default
7350
     * value in an attribute-list declaration. Note that if
7351
     * entities are declared in the external subset or in
7352
     * external parameter entities, a non-validating processor
7353
     * is not obligated to read and process their declarations;
7354
     * for such documents, the rule that an entity must be
7355
     * declared is a well-formedness constraint only if
7356
     * standalone='yes'.
7357
     */
7358
222k
    if ((ctxt->standalone == 1) ||
7359
222k
        ((ctxt->hasExternalSubset == 0) &&
7360
222k
         (ctxt->hasPErefs == 0))) {
7361
151k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7362
151k
                          "Entity '%s' not defined\n", name);
7363
151k
#ifdef LIBXML_VALID_ENABLED
7364
151k
    } else if (ctxt->validate) {
7365
        /*
7366
         * [ VC: Entity Declared ]
7367
         * In a document with an external subset or external
7368
         * parameter entities with "standalone='no'", ...
7369
         * ... The declaration of a parameter entity must
7370
         * precede any reference to it...
7371
         */
7372
0
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7373
0
                         "Entity '%s' not defined\n", name, NULL);
7374
0
#endif
7375
71.0k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7376
71.0k
               ((ctxt->replaceEntities) &&
7377
71.0k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7378
        /*
7379
         * Also raise a non-fatal error
7380
         *
7381
         * - if the external subset is loaded and all entity declarations
7382
         *   should be available, or
7383
         * - entity substition was requested without restricting
7384
         *   external entity access.
7385
         */
7386
0
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7387
0
                     "Entity '%s' not defined\n", name);
7388
71.0k
    } else {
7389
71.0k
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7390
71.0k
                      "Entity '%s' not defined\n", name, NULL);
7391
71.0k
    }
7392
7393
222k
    ctxt->valid = 0;
7394
222k
}
7395
7396
static xmlEntityPtr
7397
10.1M
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7398
10.1M
    xmlEntityPtr ent = NULL;
7399
7400
    /*
7401
     * Predefined entities override any extra definition
7402
     */
7403
10.1M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7404
10.1M
        ent = xmlGetPredefinedEntity(name);
7405
10.1M
        if (ent != NULL)
7406
8.19M
            return(ent);
7407
10.1M
    }
7408
7409
    /*
7410
     * Ask first SAX for entity resolution, otherwise try the
7411
     * entities which may have stored in the parser context.
7412
     */
7413
1.97M
    if (ctxt->sax != NULL) {
7414
1.97M
  if (ctxt->sax->getEntity != NULL)
7415
1.97M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7416
1.97M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7417
1.97M
      (ctxt->options & XML_PARSE_OLDSAX))
7418
0
      ent = xmlGetPredefinedEntity(name);
7419
1.97M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7420
1.97M
      (ctxt->userData==ctxt)) {
7421
13.5k
      ent = xmlSAX2GetEntity(ctxt, name);
7422
13.5k
  }
7423
1.97M
    }
7424
7425
1.97M
    if (ent == NULL) {
7426
203k
        xmlHandleUndeclaredEntity(ctxt, name);
7427
203k
    }
7428
7429
    /*
7430
     * [ WFC: Parsed Entity ]
7431
     * An entity reference must not contain the name of an
7432
     * unparsed entity
7433
     */
7434
1.77M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7435
393
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7436
393
     "Entity reference to unparsed entity %s\n", name);
7437
393
        ent = NULL;
7438
393
    }
7439
7440
    /*
7441
     * [ WFC: No External Entity References ]
7442
     * Attribute values cannot contain direct or indirect
7443
     * entity references to external entities.
7444
     */
7445
1.77M
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7446
1.48k
        if (inAttr) {
7447
389
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7448
389
                 "Attribute references external entity '%s'\n", name);
7449
389
            ent = NULL;
7450
389
        }
7451
1.48k
    }
7452
7453
1.97M
    return(ent);
7454
10.1M
}
7455
7456
/**
7457
 * Parse an entity reference. Always consumes '&'.
7458
 *
7459
 *     [68] EntityRef ::= '&' Name ';'
7460
 *
7461
 * @param ctxt  an XML parser context
7462
 * @returns the name, or NULL in case of error.
7463
 */
7464
static const xmlChar *
7465
2.43M
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7466
2.43M
    const xmlChar *name;
7467
7468
2.43M
    GROW;
7469
7470
2.43M
    if (RAW != '&')
7471
0
        return(NULL);
7472
2.43M
    NEXT;
7473
2.43M
    name = xmlParseName(ctxt);
7474
2.43M
    if (name == NULL) {
7475
397k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7476
397k
           "xmlParseEntityRef: no name\n");
7477
397k
        return(NULL);
7478
397k
    }
7479
2.04M
    if (RAW != ';') {
7480
138k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7481
138k
  return(NULL);
7482
138k
    }
7483
1.90M
    NEXT;
7484
7485
1.90M
    return(name);
7486
2.04M
}
7487
7488
/**
7489
 * @deprecated Internal function, don't use.
7490
 *
7491
 * @param ctxt  an XML parser context
7492
 * @returns the xmlEntity if found, or NULL otherwise.
7493
 */
7494
xmlEntity *
7495
0
xmlParseEntityRef(xmlParserCtxt *ctxt) {
7496
0
    const xmlChar *name;
7497
7498
0
    if (ctxt == NULL)
7499
0
        return(NULL);
7500
7501
0
    name = xmlParseEntityRefInternal(ctxt);
7502
0
    if (name == NULL)
7503
0
        return(NULL);
7504
7505
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7506
0
}
7507
7508
/**
7509
 * Parse ENTITY references declarations, but this version parses it from
7510
 * a string value.
7511
 *
7512
 *     [68] EntityRef ::= '&' Name ';'
7513
 *
7514
 * [ WFC: Entity Declared ]
7515
 * In a document without any DTD, a document with only an internal DTD
7516
 * subset which contains no parameter entity references, or a document
7517
 * with "standalone='yes'", the Name given in the entity reference
7518
 * must match that in an entity declaration, except that well-formed
7519
 * documents need not declare any of the following entities: amp, lt,
7520
 * gt, apos, quot.  The declaration of a parameter entity must precede
7521
 * any reference to it.  Similarly, the declaration of a general entity
7522
 * must precede any reference to it which appears in a default value in an
7523
 * attribute-list declaration. Note that if entities are declared in the
7524
 * external subset or in external parameter entities, a non-validating
7525
 * processor is not obligated to read and process their declarations;
7526
 * for such documents, the rule that an entity must be declared is a
7527
 * well-formedness constraint only if standalone='yes'.
7528
 *
7529
 * [ WFC: Parsed Entity ]
7530
 * An entity reference must not contain the name of an unparsed entity
7531
 *
7532
 * @param ctxt  an XML parser context
7533
 * @param str  a pointer to an index in the string
7534
 * @returns the xmlEntity if found, or NULL otherwise. The str pointer
7535
 * is updated to the current location in the string.
7536
 */
7537
static xmlChar *
7538
8.26M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7539
8.26M
    xmlChar *name;
7540
8.26M
    const xmlChar *ptr;
7541
8.26M
    xmlChar cur;
7542
7543
8.26M
    if ((str == NULL) || (*str == NULL))
7544
0
        return(NULL);
7545
8.26M
    ptr = *str;
7546
8.26M
    cur = *ptr;
7547
8.26M
    if (cur != '&')
7548
0
  return(NULL);
7549
7550
8.26M
    ptr++;
7551
8.26M
    name = xmlParseStringName(ctxt, &ptr);
7552
8.26M
    if (name == NULL) {
7553
12
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7554
12
           "xmlParseStringEntityRef: no name\n");
7555
12
  *str = ptr;
7556
12
  return(NULL);
7557
12
    }
7558
8.26M
    if (*ptr != ';') {
7559
14
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7560
14
        xmlFree(name);
7561
14
  *str = ptr;
7562
14
  return(NULL);
7563
14
    }
7564
8.26M
    ptr++;
7565
7566
8.26M
    *str = ptr;
7567
8.26M
    return(name);
7568
8.26M
}
7569
7570
/**
7571
 * Parse a parameter entity reference. Always consumes '%'.
7572
 *
7573
 * The entity content is handled directly by pushing it's content as
7574
 * a new input stream.
7575
 *
7576
 *     [69] PEReference ::= '%' Name ';'
7577
 *
7578
 * [ WFC: No Recursion ]
7579
 * A parsed entity must not contain a recursive
7580
 * reference to itself, either directly or indirectly.
7581
 *
7582
 * [ WFC: Entity Declared ]
7583
 * In a document without any DTD, a document with only an internal DTD
7584
 * subset which contains no parameter entity references, or a document
7585
 * with "standalone='yes'", ...  ... The declaration of a parameter
7586
 * entity must precede any reference to it...
7587
 *
7588
 * [ VC: Entity Declared ]
7589
 * In a document with an external subset or external parameter entities
7590
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7591
 * must precede any reference to it...
7592
 *
7593
 * [ WFC: In DTD ]
7594
 * Parameter-entity references may only appear in the DTD.
7595
 * NOTE: misleading but this is handled.
7596
 *
7597
 * @param ctxt  an XML parser context
7598
 * @param markupDecl  whether the PERef starts a markup declaration
7599
 */
7600
static void
7601
57.2k
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl) {
7602
57.2k
    const xmlChar *name;
7603
57.2k
    xmlEntityPtr entity = NULL;
7604
57.2k
    xmlParserInputPtr input;
7605
7606
57.2k
    if (RAW != '%')
7607
0
        return;
7608
57.2k
    NEXT;
7609
57.2k
    name = xmlParseName(ctxt);
7610
57.2k
    if (name == NULL) {
7611
7.24k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7612
7.24k
  return;
7613
7.24k
    }
7614
49.9k
    if (RAW != ';') {
7615
4.60k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7616
4.60k
        return;
7617
4.60k
    }
7618
7619
45.3k
    NEXT;
7620
7621
    /* Must be set before xmlHandleUndeclaredEntity */
7622
45.3k
    ctxt->hasPErefs = 1;
7623
7624
    /*
7625
     * Request the entity from SAX
7626
     */
7627
45.3k
    if ((ctxt->sax != NULL) &&
7628
45.3k
  (ctxt->sax->getParameterEntity != NULL))
7629
45.3k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7630
7631
45.3k
    if (entity == NULL) {
7632
16.4k
        xmlHandleUndeclaredEntity(ctxt, name);
7633
28.9k
    } else {
7634
  /*
7635
   * Internal checking in case the entity quest barfed
7636
   */
7637
28.9k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7638
28.9k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7639
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7640
0
      "Internal: %%%s; is not a parameter entity\n",
7641
0
        name, NULL);
7642
28.9k
  } else {
7643
28.9k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7644
28.9k
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7645
388
     (((ctxt->loadsubset & ~XML_SKIP_IDS) == 0) &&
7646
388
      (ctxt->replaceEntities == 0) &&
7647
388
      (ctxt->validate == 0))))
7648
388
    return;
7649
7650
28.5k
            if (entity->flags & XML_ENT_EXPANDING) {
7651
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7652
0
                return;
7653
0
            }
7654
7655
28.5k
      input = xmlNewEntityInputStream(ctxt, entity);
7656
28.5k
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7657
0
                xmlFreeInputStream(input);
7658
0
    return;
7659
0
            }
7660
7661
28.5k
            entity->flags |= XML_ENT_EXPANDING;
7662
7663
28.5k
            if (markupDecl)
7664
26.3k
                input->flags |= XML_INPUT_MARKUP_DECL;
7665
7666
28.5k
            GROW;
7667
7668
28.5k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7669
0
                xmlDetectEncoding(ctxt);
7670
7671
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7672
0
                    (IS_BLANK_CH(NXT(5)))) {
7673
0
                    xmlParseTextDecl(ctxt);
7674
0
                }
7675
0
            }
7676
28.5k
  }
7677
28.9k
    }
7678
45.3k
}
7679
7680
/**
7681
 * Parse a parameter entity reference.
7682
 *
7683
 * @deprecated Internal function, don't use.
7684
 *
7685
 * @param ctxt  an XML parser context
7686
 */
7687
void
7688
0
xmlParsePEReference(xmlParserCtxt *ctxt) {
7689
0
    xmlParsePERefInternal(ctxt, 0);
7690
0
}
7691
7692
/**
7693
 * Load the content of an entity.
7694
 *
7695
 * @param ctxt  an XML parser context
7696
 * @param entity  an unloaded system entity
7697
 * @returns 0 in case of success and -1 in case of failure
7698
 */
7699
static int
7700
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7701
0
    xmlParserInputPtr oldinput, input = NULL;
7702
0
    xmlParserInputPtr *oldinputTab;
7703
0
    xmlChar *oldencoding;
7704
0
    xmlChar *content = NULL;
7705
0
    xmlResourceType rtype;
7706
0
    size_t length, i;
7707
0
    int oldinputNr, oldinputMax;
7708
0
    int ret = -1;
7709
0
    int res;
7710
7711
0
    if ((ctxt == NULL) || (entity == NULL) ||
7712
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7713
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7714
0
  (entity->content != NULL)) {
7715
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7716
0
              "xmlLoadEntityContent parameter error");
7717
0
        return(-1);
7718
0
    }
7719
7720
0
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7721
0
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7722
0
    else
7723
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7724
7725
0
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7726
0
                            (char *) entity->ExternalID, rtype);
7727
0
    if (input == NULL)
7728
0
        return(-1);
7729
7730
0
    oldinput = ctxt->input;
7731
0
    oldinputNr = ctxt->inputNr;
7732
0
    oldinputMax = ctxt->inputMax;
7733
0
    oldinputTab = ctxt->inputTab;
7734
0
    oldencoding = ctxt->encoding;
7735
7736
0
    ctxt->input = NULL;
7737
0
    ctxt->inputNr = 0;
7738
0
    ctxt->inputMax = 1;
7739
0
    ctxt->encoding = NULL;
7740
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7741
0
    if (ctxt->inputTab == NULL) {
7742
0
        xmlErrMemory(ctxt);
7743
0
        xmlFreeInputStream(input);
7744
0
        goto error;
7745
0
    }
7746
7747
0
    xmlBufResetInput(input->buf->buffer, input);
7748
7749
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7750
0
        xmlFreeInputStream(input);
7751
0
        goto error;
7752
0
    }
7753
7754
0
    xmlDetectEncoding(ctxt);
7755
7756
    /*
7757
     * Parse a possible text declaration first
7758
     */
7759
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7760
0
  xmlParseTextDecl(ctxt);
7761
        /*
7762
         * An XML-1.0 document can't reference an entity not XML-1.0
7763
         */
7764
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7765
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7766
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7767
0
                           "Version mismatch between document and entity\n");
7768
0
        }
7769
0
    }
7770
7771
0
    length = input->cur - input->base;
7772
0
    xmlBufShrink(input->buf->buffer, length);
7773
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7774
7775
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7776
0
        ;
7777
7778
0
    xmlBufResetInput(input->buf->buffer, input);
7779
7780
0
    if (res < 0) {
7781
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7782
0
        goto error;
7783
0
    }
7784
7785
0
    length = xmlBufUse(input->buf->buffer);
7786
0
    if (length > INT_MAX) {
7787
0
        xmlErrMemory(ctxt);
7788
0
        goto error;
7789
0
    }
7790
7791
0
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
7792
0
    if (content == NULL) {
7793
0
        xmlErrMemory(ctxt);
7794
0
        goto error;
7795
0
    }
7796
7797
0
    for (i = 0; i < length; ) {
7798
0
        int clen = length - i;
7799
0
        int c = xmlGetUTF8Char(content + i, &clen);
7800
7801
0
        if ((c < 0) || (!IS_CHAR(c))) {
7802
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7803
0
                              "xmlLoadEntityContent: invalid char value %d\n",
7804
0
                              content[i]);
7805
0
            goto error;
7806
0
        }
7807
0
        i += clen;
7808
0
    }
7809
7810
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7811
0
    entity->content = content;
7812
0
    entity->length = length;
7813
0
    content = NULL;
7814
0
    ret = 0;
7815
7816
0
error:
7817
0
    while (ctxt->inputNr > 0)
7818
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
7819
0
    xmlFree(ctxt->inputTab);
7820
0
    xmlFree(ctxt->encoding);
7821
7822
0
    ctxt->input = oldinput;
7823
0
    ctxt->inputNr = oldinputNr;
7824
0
    ctxt->inputMax = oldinputMax;
7825
0
    ctxt->inputTab = oldinputTab;
7826
0
    ctxt->encoding = oldencoding;
7827
7828
0
    xmlFree(content);
7829
7830
0
    return(ret);
7831
0
}
7832
7833
/**
7834
 * Parse PEReference declarations
7835
 *
7836
 *     [69] PEReference ::= '%' Name ';'
7837
 *
7838
 * [ WFC: No Recursion ]
7839
 * A parsed entity must not contain a recursive
7840
 * reference to itself, either directly or indirectly.
7841
 *
7842
 * [ WFC: Entity Declared ]
7843
 * In a document without any DTD, a document with only an internal DTD
7844
 * subset which contains no parameter entity references, or a document
7845
 * with "standalone='yes'", ...  ... The declaration of a parameter
7846
 * entity must precede any reference to it...
7847
 *
7848
 * [ VC: Entity Declared ]
7849
 * In a document with an external subset or external parameter entities
7850
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7851
 * must precede any reference to it...
7852
 *
7853
 * [ WFC: In DTD ]
7854
 * Parameter-entity references may only appear in the DTD.
7855
 * NOTE: misleading but this is handled.
7856
 *
7857
 * @param ctxt  an XML parser context
7858
 * @param str  a pointer to an index in the string
7859
 * @returns the string of the entity content.
7860
 *         str is updated to the current value of the index
7861
 */
7862
static xmlEntityPtr
7863
5.92k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7864
5.92k
    const xmlChar *ptr;
7865
5.92k
    xmlChar cur;
7866
5.92k
    xmlChar *name;
7867
5.92k
    xmlEntityPtr entity = NULL;
7868
7869
5.92k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7870
5.92k
    ptr = *str;
7871
5.92k
    cur = *ptr;
7872
5.92k
    if (cur != '%')
7873
0
        return(NULL);
7874
5.92k
    ptr++;
7875
5.92k
    name = xmlParseStringName(ctxt, &ptr);
7876
5.92k
    if (name == NULL) {
7877
971
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7878
971
           "xmlParseStringPEReference: no name\n");
7879
971
  *str = ptr;
7880
971
  return(NULL);
7881
971
    }
7882
4.95k
    cur = *ptr;
7883
4.95k
    if (cur != ';') {
7884
1.34k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7885
1.34k
  xmlFree(name);
7886
1.34k
  *str = ptr;
7887
1.34k
  return(NULL);
7888
1.34k
    }
7889
3.60k
    ptr++;
7890
7891
    /* Must be set before xmlHandleUndeclaredEntity */
7892
3.60k
    ctxt->hasPErefs = 1;
7893
7894
    /*
7895
     * Request the entity from SAX
7896
     */
7897
3.60k
    if ((ctxt->sax != NULL) &&
7898
3.60k
  (ctxt->sax->getParameterEntity != NULL))
7899
3.60k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7900
7901
3.60k
    if (entity == NULL) {
7902
2.63k
        xmlHandleUndeclaredEntity(ctxt, name);
7903
2.63k
    } else {
7904
  /*
7905
   * Internal checking in case the entity quest barfed
7906
   */
7907
967
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7908
967
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7909
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7910
0
        "%%%s; is not a parameter entity\n",
7911
0
        name, NULL);
7912
0
  }
7913
967
    }
7914
7915
3.60k
    xmlFree(name);
7916
3.60k
    *str = ptr;
7917
3.60k
    return(entity);
7918
4.95k
}
7919
7920
/**
7921
 * Parse a DOCTYPE declaration
7922
 *
7923
 * @deprecated Internal function, don't use.
7924
 *
7925
 *     [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7926
 *                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7927
 *
7928
 * [ VC: Root Element Type ]
7929
 * The Name in the document type declaration must match the element
7930
 * type of the root element.
7931
 *
7932
 * @param ctxt  an XML parser context
7933
 */
7934
7935
void
7936
11.3k
xmlParseDocTypeDecl(xmlParserCtxt *ctxt) {
7937
11.3k
    const xmlChar *name = NULL;
7938
11.3k
    xmlChar *publicId = NULL;
7939
11.3k
    xmlChar *URI = NULL;
7940
7941
    /*
7942
     * We know that '<!DOCTYPE' has been detected.
7943
     */
7944
11.3k
    SKIP(9);
7945
7946
11.3k
    if (SKIP_BLANKS == 0) {
7947
3.99k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7948
3.99k
                       "Space required after 'DOCTYPE'\n");
7949
3.99k
    }
7950
7951
    /*
7952
     * Parse the DOCTYPE name.
7953
     */
7954
11.3k
    name = xmlParseName(ctxt);
7955
11.3k
    if (name == NULL) {
7956
1.45k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7957
1.45k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7958
1.45k
    }
7959
11.3k
    ctxt->intSubName = name;
7960
7961
11.3k
    SKIP_BLANKS;
7962
7963
    /*
7964
     * Check for public and system identifier (URI)
7965
     */
7966
11.3k
    URI = xmlParseExternalID(ctxt, &publicId, 1);
7967
7968
11.3k
    if ((URI != NULL) || (publicId != NULL)) {
7969
595
        ctxt->hasExternalSubset = 1;
7970
595
    }
7971
11.3k
    ctxt->extSubURI = URI;
7972
11.3k
    ctxt->extSubSystem = publicId;
7973
7974
11.3k
    SKIP_BLANKS;
7975
7976
    /*
7977
     * Create and update the internal subset.
7978
     */
7979
11.3k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7980
11.3k
  (!ctxt->disableSAX))
7981
7.01k
  ctxt->sax->internalSubset(ctxt->userData, name, publicId, URI);
7982
7983
11.3k
    if ((RAW != '[') && (RAW != '>')) {
7984
280
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7985
280
    }
7986
11.3k
}
7987
7988
/**
7989
 * Parse the internal subset declaration
7990
 *
7991
 *     [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7992
 * @param ctxt  an XML parser context
7993
 */
7994
7995
static void
7996
10.6k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7997
    /*
7998
     * Is there any DTD definition ?
7999
     */
8000
10.6k
    if (RAW == '[') {
8001
10.6k
        int oldInputNr = ctxt->inputNr;
8002
8003
10.6k
        NEXT;
8004
  /*
8005
   * Parse the succession of Markup declarations and
8006
   * PEReferences.
8007
   * Subsequence (markupdecl | PEReference | S)*
8008
   */
8009
10.6k
  SKIP_BLANKS;
8010
264k
        while (1) {
8011
264k
            if (PARSER_STOPPED(ctxt)) {
8012
16
                return;
8013
264k
            } else if (ctxt->input->cur >= ctxt->input->end) {
8014
28.9k
                if (ctxt->inputNr <= oldInputNr) {
8015
3.08k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8016
3.08k
                    return;
8017
3.08k
                }
8018
25.8k
                xmlPopPE(ctxt);
8019
235k
            } else if ((RAW == ']') && (ctxt->inputNr <= oldInputNr)) {
8020
4.22k
                NEXT;
8021
4.22k
                SKIP_BLANKS;
8022
4.22k
                break;
8023
231k
            } else if ((PARSER_EXTERNAL(ctxt)) &&
8024
231k
                       (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8025
                /*
8026
                 * Conditional sections are allowed in external entities
8027
                 * included by PE References in the internal subset.
8028
                 */
8029
0
                xmlParseConditionalSections(ctxt);
8030
231k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8031
173k
                xmlParseMarkupDecl(ctxt);
8032
173k
            } else if (RAW == '%') {
8033
54.3k
                xmlParsePERefInternal(ctxt, 1);
8034
54.3k
            } else {
8035
3.37k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8036
8037
3.44k
                while (ctxt->inputNr > oldInputNr)
8038
75
                    xmlPopPE(ctxt);
8039
3.37k
                return;
8040
3.37k
            }
8041
253k
            SKIP_BLANKS;
8042
253k
            SHRINK;
8043
253k
            GROW;
8044
253k
        }
8045
10.6k
    }
8046
8047
    /*
8048
     * We should be at the end of the DOCTYPE declaration.
8049
     */
8050
4.22k
    if (RAW != '>') {
8051
149
        xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8052
149
        return;
8053
149
    }
8054
4.07k
    NEXT;
8055
4.07k
}
8056
8057
#ifdef LIBXML_SAX1_ENABLED
8058
/**
8059
 * Parse an attribute
8060
 *
8061
 * @deprecated Internal function, don't use.
8062
 *
8063
 *     [41] Attribute ::= Name Eq AttValue
8064
 *
8065
 * [ WFC: No External Entity References ]
8066
 * Attribute values cannot contain direct or indirect entity references
8067
 * to external entities.
8068
 *
8069
 * [ WFC: No < in Attribute Values ]
8070
 * The replacement text of any entity referred to directly or indirectly in
8071
 * an attribute value (other than "&lt;") must not contain a <.
8072
 *
8073
 * [ VC: Attribute Value Type ]
8074
 * The attribute must have been declared; the value must be of the type
8075
 * declared for it.
8076
 *
8077
 *     [25] Eq ::= S? '=' S?
8078
 *
8079
 * With namespace:
8080
 *
8081
 *     [NS 11] Attribute ::= QName Eq AttValue
8082
 *
8083
 * Also the case QName == xmlns:??? is handled independently as a namespace
8084
 * definition.
8085
 *
8086
 * @param ctxt  an XML parser context
8087
 * @param value  a xmlChar ** used to store the value of the attribute
8088
 * @returns the attribute name, and the value in *value.
8089
 */
8090
8091
const xmlChar *
8092
0
xmlParseAttribute(xmlParserCtxt *ctxt, xmlChar **value) {
8093
0
    const xmlChar *name;
8094
0
    xmlChar *val;
8095
8096
0
    *value = NULL;
8097
0
    GROW;
8098
0
    name = xmlParseName(ctxt);
8099
0
    if (name == NULL) {
8100
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8101
0
                 "error parsing attribute name\n");
8102
0
        return(NULL);
8103
0
    }
8104
8105
    /*
8106
     * read the value
8107
     */
8108
0
    SKIP_BLANKS;
8109
0
    if (RAW == '=') {
8110
0
        NEXT;
8111
0
  SKIP_BLANKS;
8112
0
  val = xmlParseAttValue(ctxt);
8113
0
    } else {
8114
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8115
0
         "Specification mandates value for attribute %s\n", name);
8116
0
  return(name);
8117
0
    }
8118
8119
    /*
8120
     * Check that xml:lang conforms to the specification
8121
     * No more registered as an error, just generate a warning now
8122
     * since this was deprecated in XML second edition
8123
     */
8124
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8125
0
  if (!xmlCheckLanguageID(val)) {
8126
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8127
0
              "Malformed value for xml:lang : %s\n",
8128
0
        val, NULL);
8129
0
  }
8130
0
    }
8131
8132
    /*
8133
     * Check that xml:space conforms to the specification
8134
     */
8135
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8136
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8137
0
      *(ctxt->space) = 0;
8138
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8139
0
      *(ctxt->space) = 1;
8140
0
  else {
8141
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8142
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8143
0
                                 val, NULL);
8144
0
  }
8145
0
    }
8146
8147
0
    *value = val;
8148
0
    return(name);
8149
0
}
8150
8151
/**
8152
 * Parse a start tag. Always consumes '<'.
8153
 *
8154
 * @deprecated Internal function, don't use.
8155
 *
8156
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8157
 *
8158
 * [ WFC: Unique Att Spec ]
8159
 * No attribute name may appear more than once in the same start-tag or
8160
 * empty-element tag.
8161
 *
8162
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8163
 *
8164
 * [ WFC: Unique Att Spec ]
8165
 * No attribute name may appear more than once in the same start-tag or
8166
 * empty-element tag.
8167
 *
8168
 * With namespace:
8169
 *
8170
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8171
 *
8172
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8173
 *
8174
 * @param ctxt  an XML parser context
8175
 * @returns the element name parsed
8176
 */
8177
8178
const xmlChar *
8179
0
xmlParseStartTag(xmlParserCtxt *ctxt) {
8180
0
    const xmlChar *name;
8181
0
    const xmlChar *attname;
8182
0
    xmlChar *attvalue;
8183
0
    const xmlChar **atts = ctxt->atts;
8184
0
    int nbatts = 0;
8185
0
    int maxatts = ctxt->maxatts;
8186
0
    int i;
8187
8188
0
    if (RAW != '<') return(NULL);
8189
0
    NEXT1;
8190
8191
0
    name = xmlParseName(ctxt);
8192
0
    if (name == NULL) {
8193
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8194
0
       "xmlParseStartTag: invalid element name\n");
8195
0
        return(NULL);
8196
0
    }
8197
8198
    /*
8199
     * Now parse the attributes, it ends up with the ending
8200
     *
8201
     * (S Attribute)* S?
8202
     */
8203
0
    SKIP_BLANKS;
8204
0
    GROW;
8205
8206
0
    while (((RAW != '>') &&
8207
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8208
0
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8209
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8210
0
        if (attname == NULL)
8211
0
      break;
8212
0
        if (attvalue != NULL) {
8213
      /*
8214
       * [ WFC: Unique Att Spec ]
8215
       * No attribute name may appear more than once in the same
8216
       * start-tag or empty-element tag.
8217
       */
8218
0
      for (i = 0; i < nbatts;i += 2) {
8219
0
          if (xmlStrEqual(atts[i], attname)) {
8220
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8221
0
        goto failed;
8222
0
    }
8223
0
      }
8224
      /*
8225
       * Add the pair to atts
8226
       */
8227
0
      if (nbatts + 4 > maxatts) {
8228
0
          const xmlChar **n;
8229
0
                int newSize;
8230
8231
0
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8232
0
                                          11, XML_MAX_ATTRS);
8233
0
                if (newSize < 0) {
8234
0
        xmlErrMemory(ctxt);
8235
0
        goto failed;
8236
0
    }
8237
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8238
0
                if (newSize < 2)
8239
0
                    newSize = 2;
8240
0
#endif
8241
0
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8242
0
    if (n == NULL) {
8243
0
        xmlErrMemory(ctxt);
8244
0
        goto failed;
8245
0
    }
8246
0
    atts = n;
8247
0
                maxatts = newSize * 2;
8248
0
    ctxt->atts = atts;
8249
0
    ctxt->maxatts = maxatts;
8250
0
      }
8251
8252
0
      atts[nbatts++] = attname;
8253
0
      atts[nbatts++] = attvalue;
8254
0
      atts[nbatts] = NULL;
8255
0
      atts[nbatts + 1] = NULL;
8256
8257
0
            attvalue = NULL;
8258
0
  }
8259
8260
0
failed:
8261
8262
0
        if (attvalue != NULL)
8263
0
            xmlFree(attvalue);
8264
8265
0
  GROW
8266
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8267
0
      break;
8268
0
  if (SKIP_BLANKS == 0) {
8269
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8270
0
         "attributes construct error\n");
8271
0
  }
8272
0
  SHRINK;
8273
0
        GROW;
8274
0
    }
8275
8276
    /*
8277
     * SAX: Start of Element !
8278
     */
8279
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8280
0
  (!ctxt->disableSAX)) {
8281
0
  if (nbatts > 0)
8282
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8283
0
  else
8284
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8285
0
    }
8286
8287
0
    if (atts != NULL) {
8288
        /* Free only the content strings */
8289
0
        for (i = 1;i < nbatts;i+=2)
8290
0
      if (atts[i] != NULL)
8291
0
         xmlFree((xmlChar *) atts[i]);
8292
0
    }
8293
0
    return(name);
8294
0
}
8295
8296
/**
8297
 * Parse an end tag. Always consumes '</'.
8298
 *
8299
 *     [42] ETag ::= '</' Name S? '>'
8300
 *
8301
 * With namespace
8302
 *
8303
 *     [NS 9] ETag ::= '</' QName S? '>'
8304
 * @param ctxt  an XML parser context
8305
 * @param line  line of the start tag
8306
 */
8307
8308
static void
8309
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8310
0
    const xmlChar *name;
8311
8312
0
    GROW;
8313
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8314
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8315
0
           "xmlParseEndTag: '</' not found\n");
8316
0
  return;
8317
0
    }
8318
0
    SKIP(2);
8319
8320
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8321
8322
    /*
8323
     * We should definitely be at the ending "S? '>'" part
8324
     */
8325
0
    GROW;
8326
0
    SKIP_BLANKS;
8327
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8328
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8329
0
    } else
8330
0
  NEXT1;
8331
8332
    /*
8333
     * [ WFC: Element Type Match ]
8334
     * The Name in an element's end-tag must match the element type in the
8335
     * start-tag.
8336
     *
8337
     */
8338
0
    if (name != (xmlChar*)1) {
8339
0
        if (name == NULL) name = BAD_CAST "unparsable";
8340
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8341
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8342
0
                    ctxt->name, line, name);
8343
0
    }
8344
8345
    /*
8346
     * SAX: End of Tag
8347
     */
8348
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8349
0
  (!ctxt->disableSAX))
8350
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8351
8352
0
    namePop(ctxt);
8353
0
    spacePop(ctxt);
8354
0
}
8355
8356
/**
8357
 * Parse an end of tag
8358
 *
8359
 * @deprecated Internal function, don't use.
8360
 *
8361
 *     [42] ETag ::= '</' Name S? '>'
8362
 *
8363
 * With namespace
8364
 *
8365
 *     [NS 9] ETag ::= '</' QName S? '>'
8366
 * @param ctxt  an XML parser context
8367
 */
8368
8369
void
8370
0
xmlParseEndTag(xmlParserCtxt *ctxt) {
8371
0
    xmlParseEndTag1(ctxt, 0);
8372
0
}
8373
#endif /* LIBXML_SAX1_ENABLED */
8374
8375
/************************************************************************
8376
 *                  *
8377
 *          SAX 2 specific operations       *
8378
 *                  *
8379
 ************************************************************************/
8380
8381
/**
8382
 * Parse an XML Namespace QName
8383
 *
8384
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8385
 *     [7]  Prefix  ::= NCName
8386
 *     [8]  LocalPart  ::= NCName
8387
 *
8388
 * @param ctxt  an XML parser context
8389
 * @param prefix  pointer to store the prefix part
8390
 * @returns the Name parsed or NULL
8391
 */
8392
8393
static xmlHashedString
8394
2.75M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8395
2.75M
    xmlHashedString l, p;
8396
2.75M
    int start, isNCName = 0;
8397
8398
2.75M
    l.name = NULL;
8399
2.75M
    p.name = NULL;
8400
8401
2.75M
    GROW;
8402
2.75M
    start = CUR_PTR - BASE_PTR;
8403
8404
2.75M
    l = xmlParseNCName(ctxt);
8405
2.75M
    if (l.name != NULL) {
8406
1.45M
        isNCName = 1;
8407
1.45M
        if (CUR == ':') {
8408
452k
            NEXT;
8409
452k
            p = l;
8410
452k
            l = xmlParseNCName(ctxt);
8411
452k
        }
8412
1.45M
    }
8413
2.75M
    if ((l.name == NULL) || (CUR == ':')) {
8414
1.32M
        xmlChar *tmp;
8415
8416
1.32M
        l.name = NULL;
8417
1.32M
        p.name = NULL;
8418
1.32M
        if ((isNCName == 0) && (CUR != ':'))
8419
1.27M
            return(l);
8420
45.1k
        tmp = xmlParseNmtoken(ctxt);
8421
45.1k
        if (tmp != NULL)
8422
31.1k
            xmlFree(tmp);
8423
45.1k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8424
45.1k
                                CUR_PTR - (BASE_PTR + start));
8425
45.1k
        if (l.name == NULL) {
8426
0
            xmlErrMemory(ctxt);
8427
0
            return(l);
8428
0
        }
8429
45.1k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8430
45.1k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8431
45.1k
    }
8432
8433
1.47M
    *prefix = p;
8434
1.47M
    return(l);
8435
2.75M
}
8436
8437
/**
8438
 * Parse an XML Namespace QName
8439
 *
8440
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8441
 *     [7]  Prefix  ::= NCName
8442
 *     [8]  LocalPart  ::= NCName
8443
 *
8444
 * @param ctxt  an XML parser context
8445
 * @param prefix  pointer to store the prefix part
8446
 * @returns the Name parsed or NULL
8447
 */
8448
8449
static const xmlChar *
8450
22.4k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8451
22.4k
    xmlHashedString n, p;
8452
8453
22.4k
    n = xmlParseQNameHashed(ctxt, &p);
8454
22.4k
    if (n.name == NULL)
8455
2.15k
        return(NULL);
8456
20.3k
    *prefix = p.name;
8457
20.3k
    return(n.name);
8458
22.4k
}
8459
8460
/**
8461
 * Parse an XML name and compares for match
8462
 * (specialized for endtag parsing)
8463
 *
8464
 * @param ctxt  an XML parser context
8465
 * @param name  the localname
8466
 * @param prefix  the prefix, if any.
8467
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
8468
 * and the name for mismatch
8469
 */
8470
8471
static const xmlChar *
8472
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8473
84.3k
                        xmlChar const *prefix) {
8474
84.3k
    const xmlChar *cmp;
8475
84.3k
    const xmlChar *in;
8476
84.3k
    const xmlChar *ret;
8477
84.3k
    const xmlChar *prefix2;
8478
8479
84.3k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8480
8481
84.3k
    GROW;
8482
84.3k
    in = ctxt->input->cur;
8483
8484
84.3k
    cmp = prefix;
8485
242k
    while (*in != 0 && *in == *cmp) {
8486
158k
  ++in;
8487
158k
  ++cmp;
8488
158k
    }
8489
84.3k
    if ((*cmp == 0) && (*in == ':')) {
8490
66.4k
        in++;
8491
66.4k
  cmp = name;
8492
659k
  while (*in != 0 && *in == *cmp) {
8493
593k
      ++in;
8494
593k
      ++cmp;
8495
593k
  }
8496
66.4k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8497
      /* success */
8498
61.8k
            ctxt->input->col += in - ctxt->input->cur;
8499
61.8k
      ctxt->input->cur = in;
8500
61.8k
      return((const xmlChar*) 1);
8501
61.8k
  }
8502
66.4k
    }
8503
    /*
8504
     * all strings coms from the dictionary, equality can be done directly
8505
     */
8506
22.4k
    ret = xmlParseQName (ctxt, &prefix2);
8507
22.4k
    if (ret == NULL)
8508
2.15k
        return(NULL);
8509
20.3k
    if ((ret == name) && (prefix == prefix2))
8510
1.13k
  return((const xmlChar*) 1);
8511
19.2k
    return ret;
8512
20.3k
}
8513
8514
/**
8515
 * Parse an attribute in the new SAX2 framework.
8516
 *
8517
 * @param ctxt  an XML parser context
8518
 * @param pref  the element prefix
8519
 * @param elem  the element name
8520
 * @param hprefix  resulting attribute prefix
8521
 * @param value  resulting value of the attribute
8522
 * @param len  resulting length of the attribute
8523
 * @param alloc  resulting indicator if the attribute was allocated
8524
 * @returns the attribute name, and the value in *value, .
8525
 */
8526
8527
static xmlHashedString
8528
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8529
                   const xmlChar * pref, const xmlChar * elem,
8530
                   xmlHashedString * hprefix, xmlChar ** value,
8531
                   int *len, int *alloc)
8532
742k
{
8533
742k
    xmlHashedString hname;
8534
742k
    const xmlChar *prefix, *name;
8535
742k
    xmlChar *val = NULL, *internal_val = NULL;
8536
742k
    int special = 0;
8537
742k
    int isNamespace;
8538
742k
    int flags;
8539
8540
742k
    *value = NULL;
8541
742k
    GROW;
8542
742k
    hname = xmlParseQNameHashed(ctxt, hprefix);
8543
742k
    if (hname.name == NULL) {
8544
177k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8545
177k
                       "error parsing attribute name\n");
8546
177k
        return(hname);
8547
177k
    }
8548
564k
    name = hname.name;
8549
564k
    prefix = hprefix->name;
8550
8551
    /*
8552
     * get the type if needed
8553
     */
8554
564k
    if (ctxt->attsSpecial != NULL) {
8555
50.3k
        special = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8556
50.3k
                                              prefix, name));
8557
50.3k
    }
8558
8559
    /*
8560
     * read the value
8561
     */
8562
564k
    SKIP_BLANKS;
8563
564k
    if (RAW != '=') {
8564
59.2k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8565
59.2k
                          "Specification mandates value for attribute %s\n",
8566
59.2k
                          name);
8567
59.2k
        goto error;
8568
59.2k
    }
8569
8570
8571
505k
    NEXT;
8572
505k
    SKIP_BLANKS;
8573
505k
    flags = 0;
8574
505k
    isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8575
505k
                   (prefix == ctxt->str_xmlns));
8576
505k
    val = xmlParseAttValueInternal(ctxt, len, &flags, special,
8577
505k
                                   isNamespace);
8578
505k
    if (val == NULL)
8579
16.7k
        goto error;
8580
8581
488k
    *alloc = (flags & XML_ATTVAL_ALLOC) != 0;
8582
8583
488k
#ifdef LIBXML_VALID_ENABLED
8584
488k
    if ((ctxt->validate) &&
8585
488k
        (ctxt->standalone) &&
8586
488k
        (special & XML_SPECIAL_EXTERNAL) &&
8587
488k
        (flags & XML_ATTVAL_NORM_CHANGE)) {
8588
0
        xmlValidityError(ctxt, XML_DTD_NOT_STANDALONE,
8589
0
                         "standalone: normalization of attribute %s on %s "
8590
0
                         "by external subset declaration\n",
8591
0
                         name, elem);
8592
0
    }
8593
488k
#endif
8594
8595
488k
    if (prefix == ctxt->str_xml) {
8596
        /*
8597
         * Check that xml:lang conforms to the specification
8598
         * No more registered as an error, just generate a warning now
8599
         * since this was deprecated in XML second edition
8600
         */
8601
22.8k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8602
0
            internal_val = xmlStrndup(val, *len);
8603
0
            if (internal_val == NULL)
8604
0
                goto mem_error;
8605
0
            if (!xmlCheckLanguageID(internal_val)) {
8606
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8607
0
                              "Malformed value for xml:lang : %s\n",
8608
0
                              internal_val, NULL);
8609
0
            }
8610
0
        }
8611
8612
        /*
8613
         * Check that xml:space conforms to the specification
8614
         */
8615
22.8k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8616
2.25k
            internal_val = xmlStrndup(val, *len);
8617
2.25k
            if (internal_val == NULL)
8618
0
                goto mem_error;
8619
2.25k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8620
829
                *(ctxt->space) = 0;
8621
1.42k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8622
395
                *(ctxt->space) = 1;
8623
1.03k
            else {
8624
1.03k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8625
1.03k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8626
1.03k
                              internal_val, NULL);
8627
1.03k
            }
8628
2.25k
        }
8629
22.8k
        if (internal_val) {
8630
2.25k
            xmlFree(internal_val);
8631
2.25k
        }
8632
22.8k
    }
8633
8634
488k
    *value = val;
8635
488k
    return (hname);
8636
8637
0
mem_error:
8638
0
    xmlErrMemory(ctxt);
8639
76.0k
error:
8640
76.0k
    if ((val != NULL) && (*alloc != 0))
8641
0
        xmlFree(val);
8642
76.0k
    return(hname);
8643
0
}
8644
8645
/**
8646
 * Inserts a new attribute into the hash table.
8647
 *
8648
 * @param ctxt  parser context
8649
 * @param size  size of the hash table
8650
 * @param name  attribute name
8651
 * @param uri  namespace uri
8652
 * @param hashValue  combined hash value of name and uri
8653
 * @param aindex  attribute index (this is a multiple of 5)
8654
 * @returns INT_MAX if no existing attribute was found, the attribute
8655
 * index if an attribute was found, -1 if a memory allocation failed.
8656
 */
8657
static int
8658
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8659
228k
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8660
228k
    xmlAttrHashBucket *table = ctxt->attrHash;
8661
228k
    xmlAttrHashBucket *bucket;
8662
228k
    unsigned hindex;
8663
8664
228k
    hindex = hashValue & (size - 1);
8665
228k
    bucket = &table[hindex];
8666
8667
265k
    while (bucket->index >= 0) {
8668
95.5k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8669
8670
95.5k
        if (name == atts[0]) {
8671
61.8k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8672
8673
61.8k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8674
61.8k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8675
12.2k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8676
58.0k
                return(bucket->index);
8677
61.8k
        }
8678
8679
37.4k
        hindex++;
8680
37.4k
        bucket++;
8681
37.4k
        if (hindex >= size) {
8682
4.85k
            hindex = 0;
8683
4.85k
            bucket = table;
8684
4.85k
        }
8685
37.4k
    }
8686
8687
169k
    bucket->index = aindex;
8688
8689
169k
    return(INT_MAX);
8690
228k
}
8691
8692
static int
8693
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8694
                       const xmlChar *name, const xmlChar *prefix,
8695
10.2k
                       unsigned hashValue, int aindex) {
8696
10.2k
    xmlAttrHashBucket *table = ctxt->attrHash;
8697
10.2k
    xmlAttrHashBucket *bucket;
8698
10.2k
    unsigned hindex;
8699
8700
10.2k
    hindex = hashValue & (size - 1);
8701
10.2k
    bucket = &table[hindex];
8702
8703
14.5k
    while (bucket->index >= 0) {
8704
7.79k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8705
8706
7.79k
        if ((name == atts[0]) && (prefix == atts[1]))
8707
3.54k
            return(bucket->index);
8708
8709
4.24k
        hindex++;
8710
4.24k
        bucket++;
8711
4.24k
        if (hindex >= size) {
8712
806
            hindex = 0;
8713
806
            bucket = table;
8714
806
        }
8715
4.24k
    }
8716
8717
6.73k
    bucket->index = aindex;
8718
8719
6.73k
    return(INT_MAX);
8720
10.2k
}
8721
/**
8722
 * Parse a start tag. Always consumes '<'.
8723
 *
8724
 * This routine is called when running SAX2 parsing
8725
 *
8726
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8727
 *
8728
 * [ WFC: Unique Att Spec ]
8729
 * No attribute name may appear more than once in the same start-tag or
8730
 * empty-element tag.
8731
 *
8732
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8733
 *
8734
 * [ WFC: Unique Att Spec ]
8735
 * No attribute name may appear more than once in the same start-tag or
8736
 * empty-element tag.
8737
 *
8738
 * With namespace:
8739
 *
8740
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8741
 *
8742
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8743
 *
8744
 * @param ctxt  an XML parser context
8745
 * @param pref  resulting namespace prefix
8746
 * @param URI  resulting namespace URI
8747
 * @param nbNsPtr  resulting number of namespace declarations
8748
 * @returns the element name parsed
8749
 */
8750
8751
static const xmlChar *
8752
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8753
1.99M
                  const xmlChar **URI, int *nbNsPtr) {
8754
1.99M
    xmlHashedString hlocalname;
8755
1.99M
    xmlHashedString hprefix;
8756
1.99M
    xmlHashedString hattname;
8757
1.99M
    xmlHashedString haprefix;
8758
1.99M
    const xmlChar *localname;
8759
1.99M
    const xmlChar *prefix;
8760
1.99M
    const xmlChar *attname;
8761
1.99M
    const xmlChar *aprefix;
8762
1.99M
    const xmlChar *uri;
8763
1.99M
    xmlChar *attvalue = NULL;
8764
1.99M
    const xmlChar **atts = ctxt->atts;
8765
1.99M
    unsigned attrHashSize = 0;
8766
1.99M
    int maxatts = ctxt->maxatts;
8767
1.99M
    int nratts, nbatts, nbdef;
8768
1.99M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8769
1.99M
    int alloc = 0;
8770
1.99M
    int numNsErr = 0;
8771
1.99M
    int numDupErr = 0;
8772
8773
1.99M
    if (RAW != '<') return(NULL);
8774
1.99M
    NEXT1;
8775
8776
1.99M
    nbatts = 0;
8777
1.99M
    nratts = 0;
8778
1.99M
    nbdef = 0;
8779
1.99M
    nbNs = 0;
8780
1.99M
    nbTotalDef = 0;
8781
1.99M
    attval = 0;
8782
8783
1.99M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8784
0
        xmlErrMemory(ctxt);
8785
0
        return(NULL);
8786
0
    }
8787
8788
1.99M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8789
1.99M
    if (hlocalname.name == NULL) {
8790
1.09M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8791
1.09M
           "StartTag: invalid element name\n");
8792
1.09M
        return(NULL);
8793
1.09M
    }
8794
891k
    localname = hlocalname.name;
8795
891k
    prefix = hprefix.name;
8796
8797
    /*
8798
     * Now parse the attributes, it ends up with the ending
8799
     *
8800
     * (S Attribute)* S?
8801
     */
8802
891k
    SKIP_BLANKS;
8803
891k
    GROW;
8804
8805
    /*
8806
     * The ctxt->atts array will be ultimately passed to the SAX callback
8807
     * containing five xmlChar pointers for each attribute:
8808
     *
8809
     * [0] attribute name
8810
     * [1] attribute prefix
8811
     * [2] namespace URI
8812
     * [3] attribute value
8813
     * [4] end of attribute value
8814
     *
8815
     * To save memory, we reuse this array temporarily and store integers
8816
     * in these pointer variables.
8817
     *
8818
     * [0] attribute name
8819
     * [1] attribute prefix
8820
     * [2] hash value of attribute prefix, and later namespace index
8821
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
8822
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
8823
     *
8824
     * The ctxt->attallocs array contains an additional unsigned int for
8825
     * each attribute, containing the hash value of the attribute name
8826
     * and the alloc flag in bit 31.
8827
     */
8828
8829
1.15M
    while (((RAW != '>') &&
8830
1.15M
     ((RAW != '/') || (NXT(1) != '>')) &&
8831
1.15M
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8832
742k
  int len = -1;
8833
8834
742k
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
8835
742k
                                          &haprefix, &attvalue, &len,
8836
742k
                                          &alloc);
8837
742k
        if (hattname.name == NULL)
8838
177k
      break;
8839
564k
        if (attvalue == NULL)
8840
76.0k
            goto next_attr;
8841
488k
        attname = hattname.name;
8842
488k
        aprefix = haprefix.name;
8843
488k
  if (len < 0) len = xmlStrlen(attvalue);
8844
8845
488k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8846
108k
            xmlHashedString huri;
8847
108k
            xmlURIPtr parsedUri;
8848
8849
108k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8850
108k
            uri = huri.name;
8851
108k
            if (uri == NULL) {
8852
0
                xmlErrMemory(ctxt);
8853
0
                goto next_attr;
8854
0
            }
8855
108k
            if (*uri != 0) {
8856
105k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8857
0
                    xmlErrMemory(ctxt);
8858
0
                    goto next_attr;
8859
0
                }
8860
105k
                if (parsedUri == NULL) {
8861
73.3k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8862
73.3k
                             "xmlns: '%s' is not a valid URI\n",
8863
73.3k
                                       uri, NULL, NULL);
8864
73.3k
                } else {
8865
32.5k
                    if (parsedUri->scheme == NULL) {
8866
15.7k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8867
15.7k
                                  "xmlns: URI %s is not absolute\n",
8868
15.7k
                                  uri, NULL, NULL);
8869
15.7k
                    }
8870
32.5k
                    xmlFreeURI(parsedUri);
8871
32.5k
                }
8872
105k
                if (uri == ctxt->str_xml_ns) {
8873
443
                    if (attname != ctxt->str_xml) {
8874
443
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8875
443
                     "xml namespace URI cannot be the default namespace\n",
8876
443
                                 NULL, NULL, NULL);
8877
443
                    }
8878
443
                    goto next_attr;
8879
443
                }
8880
105k
                if ((len == 29) &&
8881
105k
                    (xmlStrEqual(uri,
8882
1.50k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8883
472
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8884
472
                         "reuse of the xmlns namespace name is forbidden\n",
8885
472
                             NULL, NULL, NULL);
8886
472
                    goto next_attr;
8887
472
                }
8888
105k
            }
8889
8890
107k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
8891
100k
                nbNs++;
8892
380k
        } else if (aprefix == ctxt->str_xmlns) {
8893
145k
            xmlHashedString huri;
8894
145k
            xmlURIPtr parsedUri;
8895
8896
145k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8897
145k
            uri = huri.name;
8898
145k
            if (uri == NULL) {
8899
0
                xmlErrMemory(ctxt);
8900
0
                goto next_attr;
8901
0
            }
8902
8903
145k
            if (attname == ctxt->str_xml) {
8904
1.42k
                if (uri != ctxt->str_xml_ns) {
8905
1.03k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8906
1.03k
                             "xml namespace prefix mapped to wrong URI\n",
8907
1.03k
                             NULL, NULL, NULL);
8908
1.03k
                }
8909
                /*
8910
                 * Do not keep a namespace definition node
8911
                 */
8912
1.42k
                goto next_attr;
8913
1.42k
            }
8914
144k
            if (uri == ctxt->str_xml_ns) {
8915
281
                if (attname != ctxt->str_xml) {
8916
281
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8917
281
                             "xml namespace URI mapped to wrong prefix\n",
8918
281
                             NULL, NULL, NULL);
8919
281
                }
8920
281
                goto next_attr;
8921
281
            }
8922
144k
            if (attname == ctxt->str_xmlns) {
8923
700
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8924
700
                         "redefinition of the xmlns prefix is forbidden\n",
8925
700
                         NULL, NULL, NULL);
8926
700
                goto next_attr;
8927
700
            }
8928
143k
            if ((len == 29) &&
8929
143k
                (xmlStrEqual(uri,
8930
1.67k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8931
429
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8932
429
                         "reuse of the xmlns namespace name is forbidden\n",
8933
429
                         NULL, NULL, NULL);
8934
429
                goto next_attr;
8935
429
            }
8936
143k
            if ((uri == NULL) || (uri[0] == 0)) {
8937
938
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8938
938
                         "xmlns:%s: Empty XML namespace is not allowed\n",
8939
938
                              attname, NULL, NULL);
8940
938
                goto next_attr;
8941
142k
            } else {
8942
142k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8943
0
                    xmlErrMemory(ctxt);
8944
0
                    goto next_attr;
8945
0
                }
8946
142k
                if (parsedUri == NULL) {
8947
55.1k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8948
55.1k
                         "xmlns:%s: '%s' is not a valid URI\n",
8949
55.1k
                                       attname, uri, NULL);
8950
86.9k
                } else {
8951
86.9k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
8952
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8953
0
                                  "xmlns:%s: URI %s is not absolute\n",
8954
0
                                  attname, uri, NULL);
8955
0
                    }
8956
86.9k
                    xmlFreeURI(parsedUri);
8957
86.9k
                }
8958
142k
            }
8959
8960
142k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
8961
133k
                nbNs++;
8962
234k
        } else {
8963
            /*
8964
             * Populate attributes array, see above for repurposing
8965
             * of xmlChar pointers.
8966
             */
8967
234k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8968
10.1k
                int res = xmlCtxtGrowAttrs(ctxt);
8969
8970
10.1k
                maxatts = ctxt->maxatts;
8971
10.1k
                atts = ctxt->atts;
8972
8973
10.1k
                if (res < 0)
8974
0
                    goto next_attr;
8975
10.1k
            }
8976
234k
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
8977
234k
                                        ((unsigned) alloc << 31);
8978
234k
            atts[nbatts++] = attname;
8979
234k
            atts[nbatts++] = aprefix;
8980
234k
            atts[nbatts++] = XML_INT_TO_PTR(haprefix.hashValue);
8981
234k
            if (alloc) {
8982
36.1k
                atts[nbatts++] = attvalue;
8983
36.1k
                attvalue += len;
8984
36.1k
                atts[nbatts++] = attvalue;
8985
198k
            } else {
8986
                /*
8987
                 * attvalue points into the input buffer which can be
8988
                 * reallocated. Store differences to input->base instead.
8989
                 * The pointers will be reconstructed later.
8990
                 */
8991
198k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8992
198k
                attvalue += len;
8993
198k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8994
198k
            }
8995
            /*
8996
             * tag if some deallocation is needed
8997
             */
8998
234k
            if (alloc != 0) attval = 1;
8999
234k
            attvalue = NULL; /* moved into atts */
9000
234k
        }
9001
9002
564k
next_attr:
9003
564k
        if ((attvalue != NULL) && (alloc != 0)) {
9004
76.8k
            xmlFree(attvalue);
9005
76.8k
            attvalue = NULL;
9006
76.8k
        }
9007
9008
564k
  GROW
9009
564k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9010
154k
      break;
9011
410k
  if (SKIP_BLANKS == 0) {
9012
147k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9013
147k
         "attributes construct error\n");
9014
147k
      break;
9015
147k
  }
9016
263k
        GROW;
9017
263k
    }
9018
9019
    /*
9020
     * Namespaces from default attributes
9021
     */
9022
891k
    if (ctxt->attsDefault != NULL) {
9023
73.5k
        xmlDefAttrsPtr defaults;
9024
9025
73.5k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9026
73.5k
  if (defaults != NULL) {
9027
301k
      for (i = 0; i < defaults->nbAttrs; i++) {
9028
259k
                xmlDefAttr *attr = &defaults->attrs[i];
9029
9030
259k
          attname = attr->name.name;
9031
259k
    aprefix = attr->prefix.name;
9032
9033
259k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9034
12.2k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9035
9036
12.2k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9037
11.4k
                        nbNs++;
9038
246k
    } else if (aprefix == ctxt->str_xmlns) {
9039
128k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9040
9041
128k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9042
128k
                                      NULL, 1) > 0)
9043
127k
                        nbNs++;
9044
128k
    } else {
9045
118k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9046
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9047
0
                                    "Maximum number of attributes exceeded");
9048
0
                        break;
9049
0
                    }
9050
118k
                    nbTotalDef += 1;
9051
118k
                }
9052
259k
      }
9053
41.8k
  }
9054
73.5k
    }
9055
9056
    /*
9057
     * Resolve attribute namespaces
9058
     */
9059
1.12M
    for (i = 0; i < nbatts; i += 5) {
9060
234k
        attname = atts[i];
9061
234k
        aprefix = atts[i+1];
9062
9063
        /*
9064
  * The default namespace does not apply to attribute names.
9065
  */
9066
234k
  if (aprefix == NULL) {
9067
117k
            nsIndex = NS_INDEX_EMPTY;
9068
117k
        } else if (aprefix == ctxt->str_xml) {
9069
22.8k
            nsIndex = NS_INDEX_XML;
9070
94.4k
        } else {
9071
94.4k
            haprefix.name = aprefix;
9072
94.4k
            haprefix.hashValue = (size_t) atts[i+2];
9073
94.4k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9074
9075
94.4k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9076
43.1k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9077
43.1k
        "Namespace prefix %s for %s on %s is not defined\n",
9078
43.1k
        aprefix, attname, localname);
9079
43.1k
                nsIndex = NS_INDEX_EMPTY;
9080
43.1k
            }
9081
94.4k
        }
9082
9083
234k
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9084
234k
    }
9085
9086
    /*
9087
     * Maximum number of attributes including default attributes.
9088
     */
9089
891k
    maxAtts = nratts + nbTotalDef;
9090
9091
    /*
9092
     * Verify that attribute names are unique.
9093
     */
9094
891k
    if (maxAtts > 1) {
9095
57.6k
        attrHashSize = 4;
9096
114k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9097
56.4k
            attrHashSize *= 2;
9098
9099
57.6k
        if (attrHashSize > ctxt->attrHashMax) {
9100
2.99k
            xmlAttrHashBucket *tmp;
9101
9102
2.99k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9103
2.99k
            if (tmp == NULL) {
9104
0
                xmlErrMemory(ctxt);
9105
0
                goto done;
9106
0
            }
9107
9108
2.99k
            ctxt->attrHash = tmp;
9109
2.99k
            ctxt->attrHashMax = attrHashSize;
9110
2.99k
        }
9111
9112
57.6k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9113
9114
204k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9115
147k
            const xmlChar *nsuri;
9116
147k
            unsigned hashValue, nameHashValue, uriHashValue;
9117
147k
            int res;
9118
9119
147k
            attname = atts[i];
9120
147k
            aprefix = atts[i+1];
9121
147k
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9122
            /* Hash values always have bit 31 set, see dict.c */
9123
147k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9124
9125
147k
            if (nsIndex == NS_INDEX_EMPTY) {
9126
                /*
9127
                 * Prefix with empty namespace means an undeclared
9128
                 * prefix which was already reported above.
9129
                 */
9130
106k
                if (aprefix != NULL)
9131
31.1k
                    continue;
9132
74.9k
                nsuri = NULL;
9133
74.9k
                uriHashValue = URI_HASH_EMPTY;
9134
74.9k
            } else if (nsIndex == NS_INDEX_XML) {
9135
3.35k
                nsuri = ctxt->str_xml_ns;
9136
3.35k
                uriHashValue = URI_HASH_XML;
9137
37.5k
            } else {
9138
37.5k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9139
37.5k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9140
37.5k
            }
9141
9142
115k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9143
115k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9144
115k
                                    hashValue, i);
9145
115k
            if (res < 0)
9146
0
                continue;
9147
9148
            /*
9149
             * [ WFC: Unique Att Spec ]
9150
             * No attribute name may appear more than once in the same
9151
             * start-tag or empty-element tag.
9152
             * As extended by the Namespace in XML REC.
9153
             */
9154
115k
            if (res < INT_MAX) {
9155
42.9k
                if (aprefix == atts[res+1]) {
9156
36.4k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9157
36.4k
                    numDupErr += 1;
9158
36.4k
                } else {
9159
6.45k
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9160
6.45k
                             "Namespaced Attribute %s in '%s' redefined\n",
9161
6.45k
                             attname, nsuri, NULL);
9162
6.45k
                    numNsErr += 1;
9163
6.45k
                }
9164
42.9k
            }
9165
115k
        }
9166
57.6k
    }
9167
9168
    /*
9169
     * Default attributes
9170
     */
9171
891k
    if (ctxt->attsDefault != NULL) {
9172
73.5k
        xmlDefAttrsPtr defaults;
9173
9174
73.5k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9175
73.5k
  if (defaults != NULL) {
9176
301k
      for (i = 0; i < defaults->nbAttrs; i++) {
9177
259k
                xmlDefAttr *attr = &defaults->attrs[i];
9178
259k
                const xmlChar *nsuri = NULL;
9179
259k
                unsigned hashValue, uriHashValue = 0;
9180
259k
                int res;
9181
9182
259k
          attname = attr->name.name;
9183
259k
    aprefix = attr->prefix.name;
9184
9185
259k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9186
12.2k
                    continue;
9187
246k
    if (aprefix == ctxt->str_xmlns)
9188
128k
                    continue;
9189
9190
118k
                if (aprefix == NULL) {
9191
37.2k
                    nsIndex = NS_INDEX_EMPTY;
9192
37.2k
                    nsuri = NULL;
9193
37.2k
                    uriHashValue = URI_HASH_EMPTY;
9194
81.1k
                } else if (aprefix == ctxt->str_xml) {
9195
5.93k
                    nsIndex = NS_INDEX_XML;
9196
5.93k
                    nsuri = ctxt->str_xml_ns;
9197
5.93k
                    uriHashValue = URI_HASH_XML;
9198
75.2k
                } else {
9199
75.2k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9200
75.2k
                    if ((nsIndex == INT_MAX) ||
9201
75.2k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9202
72.2k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9203
72.2k
                                 "Namespace prefix %s for %s on %s is not "
9204
72.2k
                                 "defined\n",
9205
72.2k
                                 aprefix, attname, localname);
9206
72.2k
                        nsIndex = NS_INDEX_EMPTY;
9207
72.2k
                        nsuri = NULL;
9208
72.2k
                        uriHashValue = URI_HASH_EMPTY;
9209
72.2k
                    } else {
9210
3.02k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9211
3.02k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9212
3.02k
                    }
9213
75.2k
                }
9214
9215
                /*
9216
                 * Check whether the attribute exists
9217
                 */
9218
118k
                if (maxAtts > 1) {
9219
112k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9220
112k
                                                   uriHashValue);
9221
112k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9222
112k
                                            hashValue, nbatts);
9223
112k
                    if (res < 0)
9224
0
                        continue;
9225
112k
                    if (res < INT_MAX) {
9226
15.1k
                        if (aprefix == atts[res+1])
9227
5.56k
                            continue;
9228
9.62k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9229
9.62k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9230
9.62k
                                 attname, nsuri, NULL);
9231
9.62k
                    }
9232
112k
                }
9233
9234
112k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9235
9236
112k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9237
2.54k
                    res = xmlCtxtGrowAttrs(ctxt);
9238
9239
2.54k
                    maxatts = ctxt->maxatts;
9240
2.54k
                    atts = ctxt->atts;
9241
9242
2.54k
                    if (res < 0) {
9243
0
                        localname = NULL;
9244
0
                        goto done;
9245
0
                    }
9246
2.54k
                }
9247
9248
112k
                atts[nbatts++] = attname;
9249
112k
                atts[nbatts++] = aprefix;
9250
112k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9251
112k
                atts[nbatts++] = attr->value.name;
9252
112k
                atts[nbatts++] = attr->valueEnd;
9253
9254
112k
#ifdef LIBXML_VALID_ENABLED
9255
                /*
9256
                 * This should be moved to valid.c, but we don't keep track
9257
                 * whether an attribute was defaulted.
9258
                 */
9259
112k
                if ((ctxt->validate) &&
9260
112k
                    (ctxt->standalone == 1) &&
9261
112k
                    (attr->external != 0)) {
9262
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9263
0
                            "standalone: attribute %s on %s defaulted "
9264
0
                            "from external subset\n",
9265
0
                            attname, localname);
9266
0
                }
9267
112k
#endif
9268
112k
                nbdef++;
9269
112k
      }
9270
41.8k
  }
9271
73.5k
    }
9272
9273
    /*
9274
     * Using a single hash table for nsUri/localName pairs cannot
9275
     * detect duplicate QNames reliably. The following example will
9276
     * only result in two namespace errors.
9277
     *
9278
     * <doc xmlns:a="a" xmlns:b="a">
9279
     *   <elem a:a="" b:a="" b:a=""/>
9280
     * </doc>
9281
     *
9282
     * If we saw more than one namespace error but no duplicate QNames
9283
     * were found, we have to scan for duplicate QNames.
9284
     */
9285
891k
    if ((numDupErr == 0) && (numNsErr > 1)) {
9286
1.92k
        memset(ctxt->attrHash, -1,
9287
1.92k
               attrHashSize * sizeof(ctxt->attrHash[0]));
9288
9289
13.1k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9290
11.1k
            unsigned hashValue, nameHashValue, prefixHashValue;
9291
11.1k
            int res;
9292
9293
11.1k
            aprefix = atts[i+1];
9294
11.1k
            if (aprefix == NULL)
9295
892
                continue;
9296
9297
10.2k
            attname = atts[i];
9298
            /* Hash values always have bit 31 set, see dict.c */
9299
10.2k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9300
10.2k
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9301
9302
10.2k
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9303
10.2k
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9304
10.2k
                                         aprefix, hashValue, i);
9305
10.2k
            if (res < INT_MAX)
9306
3.54k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9307
10.2k
        }
9308
1.92k
    }
9309
9310
    /*
9311
     * Reconstruct attribute pointers
9312
     */
9313
1.23M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9314
        /* namespace URI */
9315
347k
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9316
347k
        if (nsIndex == INT_MAX)
9317
264k
            atts[i+2] = NULL;
9318
82.4k
        else if (nsIndex == INT_MAX - 1)
9319
28.3k
            atts[i+2] = ctxt->str_xml_ns;
9320
54.1k
        else
9321
54.1k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9322
9323
347k
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9324
198k
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9325
198k
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9326
198k
        }
9327
347k
    }
9328
9329
891k
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9330
891k
    if ((prefix != NULL) && (uri == NULL)) {
9331
89.5k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9332
89.5k
           "Namespace prefix %s on %s is not defined\n",
9333
89.5k
     prefix, localname, NULL);
9334
89.5k
    }
9335
891k
    *pref = prefix;
9336
891k
    *URI = uri;
9337
9338
    /*
9339
     * SAX callback
9340
     */
9341
891k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9342
891k
  (!ctxt->disableSAX)) {
9343
82.3k
  if (nbNs > 0)
9344
11.9k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9345
11.9k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9346
11.9k
        nbatts / 5, nbdef, atts);
9347
70.3k
  else
9348
70.3k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9349
70.3k
                          0, NULL, nbatts / 5, nbdef, atts);
9350
82.3k
    }
9351
9352
891k
done:
9353
    /*
9354
     * Free allocated attribute values
9355
     */
9356
891k
    if (attval != 0) {
9357
107k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9358
73.8k
      if (ctxt->attallocs[j] & 0x80000000)
9359
36.1k
          xmlFree((xmlChar *) atts[i+3]);
9360
34.1k
    }
9361
9362
891k
    *nbNsPtr = nbNs;
9363
891k
    return(localname);
9364
891k
}
9365
9366
/**
9367
 * Parse an end tag. Always consumes '</'.
9368
 *
9369
 *     [42] ETag ::= '</' Name S? '>'
9370
 *
9371
 * With namespace
9372
 *
9373
 *     [NS 9] ETag ::= '</' QName S? '>'
9374
 * @param ctxt  an XML parser context
9375
 * @param tag  the corresponding start tag
9376
 */
9377
9378
static void
9379
239k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9380
239k
    const xmlChar *name;
9381
9382
239k
    GROW;
9383
239k
    if ((RAW != '<') || (NXT(1) != '/')) {
9384
277
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9385
277
  return;
9386
277
    }
9387
239k
    SKIP(2);
9388
9389
239k
    if (tag->prefix == NULL)
9390
155k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9391
84.3k
    else
9392
84.3k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9393
9394
    /*
9395
     * We should definitely be at the ending "S? '>'" part
9396
     */
9397
239k
    GROW;
9398
239k
    SKIP_BLANKS;
9399
239k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9400
28.9k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9401
28.9k
    } else
9402
210k
  NEXT1;
9403
9404
    /*
9405
     * [ WFC: Element Type Match ]
9406
     * The Name in an element's end-tag must match the element type in the
9407
     * start-tag.
9408
     *
9409
     */
9410
239k
    if (name != (xmlChar*)1) {
9411
58.1k
        if (name == NULL) name = BAD_CAST "unparsable";
9412
58.1k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9413
58.1k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9414
58.1k
                    ctxt->name, tag->line, name);
9415
58.1k
    }
9416
9417
    /*
9418
     * SAX: End of Tag
9419
     */
9420
239k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9421
239k
  (!ctxt->disableSAX))
9422
8.35k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9423
8.35k
                                tag->URI);
9424
9425
239k
    spacePop(ctxt);
9426
239k
    if (tag->nsNr != 0)
9427
15.8k
  xmlParserNsPop(ctxt, tag->nsNr);
9428
239k
}
9429
9430
/**
9431
 * Parse escaped pure raw content. Always consumes '<!['.
9432
 *
9433
 * @deprecated Internal function, don't use.
9434
 *
9435
 *     [18] CDSect ::= CDStart CData CDEnd
9436
 *
9437
 *     [19] CDStart ::= '<![CDATA['
9438
 *
9439
 *     [20] Data ::= (Char* - (Char* ']]>' Char*))
9440
 *
9441
 *     [21] CDEnd ::= ']]>'
9442
 * @param ctxt  an XML parser context
9443
 */
9444
void
9445
20.0k
xmlParseCDSect(xmlParserCtxt *ctxt) {
9446
20.0k
    xmlChar *buf = NULL;
9447
20.0k
    int len = 0;
9448
20.0k
    int size = XML_PARSER_BUFFER_SIZE;
9449
20.0k
    int r, rl;
9450
20.0k
    int s, sl;
9451
20.0k
    int cur, l;
9452
20.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9453
0
                    XML_MAX_HUGE_LENGTH :
9454
20.0k
                    XML_MAX_TEXT_LENGTH;
9455
9456
20.0k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9457
0
        return;
9458
20.0k
    SKIP(3);
9459
9460
20.0k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9461
0
        return;
9462
20.0k
    SKIP(6);
9463
9464
20.0k
    r = xmlCurrentCharRecover(ctxt, &rl);
9465
20.0k
    if (!IS_CHAR(r)) {
9466
5.87k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9467
5.87k
        goto out;
9468
5.87k
    }
9469
14.2k
    NEXTL(rl);
9470
14.2k
    s = xmlCurrentCharRecover(ctxt, &sl);
9471
14.2k
    if (!IS_CHAR(s)) {
9472
2.07k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9473
2.07k
        goto out;
9474
2.07k
    }
9475
12.1k
    NEXTL(sl);
9476
12.1k
    cur = xmlCurrentCharRecover(ctxt, &l);
9477
12.1k
    buf = xmlMalloc(size);
9478
12.1k
    if (buf == NULL) {
9479
0
  xmlErrMemory(ctxt);
9480
0
        goto out;
9481
0
    }
9482
1.67M
    while (IS_CHAR(cur) &&
9483
1.67M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9484
1.65M
  if (len + 5 >= size) {
9485
2.48k
      xmlChar *tmp;
9486
2.48k
            int newSize;
9487
9488
2.48k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9489
2.48k
            if (newSize < 0) {
9490
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9491
0
                               "CData section too big found\n");
9492
0
                goto out;
9493
0
            }
9494
2.48k
      tmp = xmlRealloc(buf, newSize);
9495
2.48k
      if (tmp == NULL) {
9496
0
    xmlErrMemory(ctxt);
9497
0
                goto out;
9498
0
      }
9499
2.48k
      buf = tmp;
9500
2.48k
      size = newSize;
9501
2.48k
  }
9502
1.65M
  COPY_BUF(buf, len, r);
9503
1.65M
  r = s;
9504
1.65M
  rl = sl;
9505
1.65M
  s = cur;
9506
1.65M
  sl = l;
9507
1.65M
  NEXTL(l);
9508
1.65M
  cur = xmlCurrentCharRecover(ctxt, &l);
9509
1.65M
    }
9510
12.1k
    buf[len] = 0;
9511
12.1k
    if (cur != '>') {
9512
8.03k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9513
8.03k
                       "CData section not finished\n%.50s\n", buf);
9514
8.03k
        goto out;
9515
8.03k
    }
9516
4.11k
    NEXTL(l);
9517
9518
    /*
9519
     * OK the buffer is to be consumed as cdata.
9520
     */
9521
4.11k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9522
2.64k
        if ((ctxt->sax->cdataBlock != NULL) &&
9523
2.64k
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9524
2.64k
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9525
2.64k
        } else if (ctxt->sax->characters != NULL) {
9526
0
            ctxt->sax->characters(ctxt->userData, buf, len);
9527
0
        }
9528
2.64k
    }
9529
9530
20.0k
out:
9531
20.0k
    xmlFree(buf);
9532
20.0k
}
9533
9534
/**
9535
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9536
 * unexpected EOF to the caller.
9537
 *
9538
 * @param ctxt  an XML parser context
9539
 */
9540
9541
static void
9542
17.1k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9543
17.1k
    int oldNameNr = ctxt->nameNr;
9544
17.1k
    int oldSpaceNr = ctxt->spaceNr;
9545
17.1k
    int oldNodeNr = ctxt->nodeNr;
9546
9547
17.1k
    GROW;
9548
39.4M
    while ((ctxt->input->cur < ctxt->input->end) &&
9549
39.4M
     (PARSER_STOPPED(ctxt) == 0)) {
9550
39.4M
  const xmlChar *cur = ctxt->input->cur;
9551
9552
  /*
9553
   * First case : a Processing Instruction.
9554
   */
9555
39.4M
  if ((*cur == '<') && (cur[1] == '?')) {
9556
43.2k
      xmlParsePI(ctxt);
9557
43.2k
  }
9558
9559
  /*
9560
   * Second case : a CDSection
9561
   */
9562
  /* 2.6.0 test was *cur not RAW */
9563
39.3M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9564
20.0k
      xmlParseCDSect(ctxt);
9565
20.0k
  }
9566
9567
  /*
9568
   * Third case :  a comment
9569
   */
9570
39.3M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9571
39.3M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9572
31.0k
      xmlParseComment(ctxt);
9573
31.0k
  }
9574
9575
  /*
9576
   * Fourth case :  a sub-element.
9577
   */
9578
39.3M
  else if (*cur == '<') {
9579
2.20M
            if (NXT(1) == '/') {
9580
239k
                if (ctxt->nameNr <= oldNameNr)
9581
468
                    break;
9582
239k
          xmlParseElementEnd(ctxt);
9583
1.96M
            } else {
9584
1.96M
          xmlParseElementStart(ctxt);
9585
1.96M
            }
9586
2.20M
  }
9587
9588
  /*
9589
   * Fifth case : a reference. If if has not been resolved,
9590
   *    parsing returns it's Name, create the node
9591
   */
9592
9593
37.1M
  else if (*cur == '&') {
9594
656k
      xmlParseReference(ctxt);
9595
656k
  }
9596
9597
  /*
9598
   * Last case, text. Note that References are handled directly.
9599
   */
9600
36.4M
  else {
9601
36.4M
      xmlParseCharDataInternal(ctxt, 0);
9602
36.4M
  }
9603
9604
39.4M
  SHRINK;
9605
39.4M
  GROW;
9606
39.4M
    }
9607
9608
17.1k
    if ((ctxt->nameNr > oldNameNr) &&
9609
17.1k
        (ctxt->input->cur >= ctxt->input->end) &&
9610
17.1k
        (ctxt->wellFormed)) {
9611
691
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9612
691
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9613
691
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9614
691
                "Premature end of data in tag %s line %d\n",
9615
691
                name, line, NULL);
9616
691
    }
9617
9618
    /*
9619
     * Clean up in error case
9620
     */
9621
9622
60.1k
    while (ctxt->nodeNr > oldNodeNr)
9623
43.0k
        nodePop(ctxt);
9624
9625
258k
    while (ctxt->nameNr > oldNameNr) {
9626
241k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9627
9628
241k
        if (tag->nsNr != 0)
9629
74.6k
            xmlParserNsPop(ctxt, tag->nsNr);
9630
9631
241k
        namePop(ctxt);
9632
241k
    }
9633
9634
258k
    while (ctxt->spaceNr > oldSpaceNr)
9635
241k
        spacePop(ctxt);
9636
17.1k
}
9637
9638
/**
9639
 * Parse XML element content. This is useful if you're only interested
9640
 * in custom SAX callbacks. If you want a node list, use
9641
 * #xmlCtxtParseContent.
9642
 *
9643
 * @param ctxt  an XML parser context
9644
 */
9645
void
9646
0
xmlParseContent(xmlParserCtxt *ctxt) {
9647
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9648
0
        return;
9649
9650
0
    xmlCtxtInitializeLate(ctxt);
9651
9652
0
    xmlParseContentInternal(ctxt);
9653
9654
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9655
0
}
9656
9657
/**
9658
 * Parse an XML element
9659
 *
9660
 * @deprecated Internal function, don't use.
9661
 *
9662
 *     [39] element ::= EmptyElemTag | STag content ETag
9663
 *
9664
 * [ WFC: Element Type Match ]
9665
 * The Name in an element's end-tag must match the element type in the
9666
 * start-tag.
9667
 *
9668
 * @param ctxt  an XML parser context
9669
 */
9670
9671
void
9672
22.1k
xmlParseElement(xmlParserCtxt *ctxt) {
9673
22.1k
    if (xmlParseElementStart(ctxt) != 0)
9674
6.48k
        return;
9675
9676
15.6k
    xmlParseContentInternal(ctxt);
9677
9678
15.6k
    if (ctxt->input->cur >= ctxt->input->end) {
9679
14.9k
        if (ctxt->wellFormed) {
9680
931
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9681
931
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9682
931
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9683
931
                    "Premature end of data in tag %s line %d\n",
9684
931
                    name, line, NULL);
9685
931
        }
9686
14.9k
        return;
9687
14.9k
    }
9688
9689
742
    xmlParseElementEnd(ctxt);
9690
742
}
9691
9692
/**
9693
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9694
 * opening tag was parsed, 1 if an empty element was parsed.
9695
 *
9696
 * Always consumes '<'.
9697
 *
9698
 * @param ctxt  an XML parser context
9699
 */
9700
static int
9701
1.99M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9702
1.99M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9703
1.99M
    const xmlChar *name;
9704
1.99M
    const xmlChar *prefix = NULL;
9705
1.99M
    const xmlChar *URI = NULL;
9706
1.99M
    xmlParserNodeInfo node_info;
9707
1.99M
    int line;
9708
1.99M
    xmlNodePtr cur;
9709
1.99M
    int nbNs = 0;
9710
9711
1.99M
    if (ctxt->nameNr > maxDepth) {
9712
55
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9713
55
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9714
55
                ctxt->nameNr);
9715
55
  return(-1);
9716
55
    }
9717
9718
    /* Capture start position */
9719
1.99M
    if (ctxt->record_info) {
9720
0
        node_info.begin_pos = ctxt->input->consumed +
9721
0
                          (CUR_PTR - ctxt->input->base);
9722
0
  node_info.begin_line = ctxt->input->line;
9723
0
    }
9724
9725
1.99M
    if (ctxt->spaceNr == 0)
9726
0
  spacePush(ctxt, -1);
9727
1.99M
    else if (*ctxt->space == -2)
9728
0
  spacePush(ctxt, -1);
9729
1.99M
    else
9730
1.99M
  spacePush(ctxt, *ctxt->space);
9731
9732
1.99M
    line = ctxt->input->line;
9733
1.99M
#ifdef LIBXML_SAX1_ENABLED
9734
1.99M
    if (ctxt->sax2)
9735
1.99M
#endif /* LIBXML_SAX1_ENABLED */
9736
1.99M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9737
0
#ifdef LIBXML_SAX1_ENABLED
9738
0
    else
9739
0
  name = xmlParseStartTag(ctxt);
9740
1.99M
#endif /* LIBXML_SAX1_ENABLED */
9741
1.99M
    if (name == NULL) {
9742
1.09M
  spacePop(ctxt);
9743
1.09M
        return(-1);
9744
1.09M
    }
9745
891k
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9746
891k
    cur = ctxt->node;
9747
9748
891k
#ifdef LIBXML_VALID_ENABLED
9749
    /*
9750
     * [ VC: Root Element Type ]
9751
     * The Name in the document type declaration must match the element
9752
     * type of the root element.
9753
     */
9754
891k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9755
891k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9756
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9757
891k
#endif /* LIBXML_VALID_ENABLED */
9758
9759
    /*
9760
     * Check for an Empty Element.
9761
     */
9762
891k
    if ((RAW == '/') && (NXT(1) == '>')) {
9763
38.7k
        SKIP(2);
9764
38.7k
  if (ctxt->sax2) {
9765
38.7k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9766
38.7k
    (!ctxt->disableSAX))
9767
12.2k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9768
38.7k
#ifdef LIBXML_SAX1_ENABLED
9769
38.7k
  } else {
9770
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9771
0
    (!ctxt->disableSAX))
9772
0
    ctxt->sax->endElement(ctxt->userData, name);
9773
0
#endif /* LIBXML_SAX1_ENABLED */
9774
0
  }
9775
38.7k
  namePop(ctxt);
9776
38.7k
  spacePop(ctxt);
9777
38.7k
  if (nbNs > 0)
9778
6.89k
      xmlParserNsPop(ctxt, nbNs);
9779
38.7k
  if (cur != NULL && ctxt->record_info) {
9780
0
            node_info.node = cur;
9781
0
            node_info.end_pos = ctxt->input->consumed +
9782
0
                                (CUR_PTR - ctxt->input->base);
9783
0
            node_info.end_line = ctxt->input->line;
9784
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9785
0
  }
9786
38.7k
  return(1);
9787
38.7k
    }
9788
852k
    if (RAW == '>') {
9789
496k
        NEXT1;
9790
496k
        if (cur != NULL && ctxt->record_info) {
9791
0
            node_info.node = cur;
9792
0
            node_info.end_pos = 0;
9793
0
            node_info.end_line = 0;
9794
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9795
0
        }
9796
496k
    } else {
9797
356k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9798
356k
         "Couldn't find end of Start Tag %s line %d\n",
9799
356k
                    name, line, NULL);
9800
9801
  /*
9802
   * end of parsing of this node.
9803
   */
9804
356k
  nodePop(ctxt);
9805
356k
  namePop(ctxt);
9806
356k
  spacePop(ctxt);
9807
356k
  if (nbNs > 0)
9808
88.3k
      xmlParserNsPop(ctxt, nbNs);
9809
356k
  return(-1);
9810
356k
    }
9811
9812
496k
    return(0);
9813
852k
}
9814
9815
/**
9816
 * Parse the end of an XML element. Always consumes '</'.
9817
 *
9818
 * @param ctxt  an XML parser context
9819
 */
9820
static void
9821
239k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9822
239k
    xmlNodePtr cur = ctxt->node;
9823
9824
239k
    if (ctxt->nameNr <= 0) {
9825
0
        if ((RAW == '<') && (NXT(1) == '/'))
9826
0
            SKIP(2);
9827
0
        return;
9828
0
    }
9829
9830
    /*
9831
     * parse the end of tag: '</' should be here.
9832
     */
9833
239k
    if (ctxt->sax2) {
9834
239k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9835
239k
  namePop(ctxt);
9836
239k
    }
9837
0
#ifdef LIBXML_SAX1_ENABLED
9838
0
    else
9839
0
  xmlParseEndTag1(ctxt, 0);
9840
239k
#endif /* LIBXML_SAX1_ENABLED */
9841
9842
    /*
9843
     * Capture end position
9844
     */
9845
239k
    if (cur != NULL && ctxt->record_info) {
9846
0
        xmlParserNodeInfoPtr node_info;
9847
9848
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9849
0
        if (node_info != NULL) {
9850
0
            node_info->end_pos = ctxt->input->consumed +
9851
0
                                 (CUR_PTR - ctxt->input->base);
9852
0
            node_info->end_line = ctxt->input->line;
9853
0
        }
9854
0
    }
9855
239k
}
9856
9857
/**
9858
 * Parse the XML version value.
9859
 *
9860
 * @deprecated Internal function, don't use.
9861
 *
9862
 *     [26] VersionNum ::= '1.' [0-9]+
9863
 *
9864
 * In practice allow [0-9].[0-9]+ at that level
9865
 *
9866
 * @param ctxt  an XML parser context
9867
 * @returns the string giving the XML version number, or NULL
9868
 */
9869
xmlChar *
9870
2.35k
xmlParseVersionNum(xmlParserCtxt *ctxt) {
9871
2.35k
    xmlChar *buf = NULL;
9872
2.35k
    int len = 0;
9873
2.35k
    int size = 10;
9874
2.35k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9875
0
                    XML_MAX_TEXT_LENGTH :
9876
2.35k
                    XML_MAX_NAME_LENGTH;
9877
2.35k
    xmlChar cur;
9878
9879
2.35k
    buf = xmlMalloc(size);
9880
2.35k
    if (buf == NULL) {
9881
0
  xmlErrMemory(ctxt);
9882
0
  return(NULL);
9883
0
    }
9884
2.35k
    cur = CUR;
9885
2.35k
    if (!((cur >= '0') && (cur <= '9'))) {
9886
137
  xmlFree(buf);
9887
137
  return(NULL);
9888
137
    }
9889
2.21k
    buf[len++] = cur;
9890
2.21k
    NEXT;
9891
2.21k
    cur=CUR;
9892
2.21k
    if (cur != '.') {
9893
23
  xmlFree(buf);
9894
23
  return(NULL);
9895
23
    }
9896
2.19k
    buf[len++] = cur;
9897
2.19k
    NEXT;
9898
2.19k
    cur=CUR;
9899
5.45k
    while ((cur >= '0') && (cur <= '9')) {
9900
3.26k
  if (len + 1 >= size) {
9901
113
      xmlChar *tmp;
9902
113
            int newSize;
9903
9904
113
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9905
113
            if (newSize < 0) {
9906
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
9907
0
                xmlFree(buf);
9908
0
                return(NULL);
9909
0
            }
9910
113
      tmp = xmlRealloc(buf, newSize);
9911
113
      if (tmp == NULL) {
9912
0
    xmlErrMemory(ctxt);
9913
0
          xmlFree(buf);
9914
0
    return(NULL);
9915
0
      }
9916
113
      buf = tmp;
9917
113
            size = newSize;
9918
113
  }
9919
3.26k
  buf[len++] = cur;
9920
3.26k
  NEXT;
9921
3.26k
  cur=CUR;
9922
3.26k
    }
9923
2.19k
    buf[len] = 0;
9924
2.19k
    return(buf);
9925
2.19k
}
9926
9927
/**
9928
 * Parse the XML version.
9929
 *
9930
 * @deprecated Internal function, don't use.
9931
 *
9932
 *     [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9933
 *
9934
 *     [25] Eq ::= S? '=' S?
9935
 *
9936
 * @param ctxt  an XML parser context
9937
 * @returns the version string, e.g. "1.0"
9938
 */
9939
9940
xmlChar *
9941
5.19k
xmlParseVersionInfo(xmlParserCtxt *ctxt) {
9942
5.19k
    xmlChar *version = NULL;
9943
9944
5.19k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9945
2.40k
  SKIP(7);
9946
2.40k
  SKIP_BLANKS;
9947
2.40k
  if (RAW != '=') {
9948
24
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9949
24
      return(NULL);
9950
24
        }
9951
2.37k
  NEXT;
9952
2.37k
  SKIP_BLANKS;
9953
2.37k
  if (RAW == '"') {
9954
2.01k
      NEXT;
9955
2.01k
      version = xmlParseVersionNum(ctxt);
9956
2.01k
      if (RAW != '"') {
9957
159
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9958
159
      } else
9959
1.85k
          NEXT;
9960
2.01k
  } else if (RAW == '\''){
9961
342
      NEXT;
9962
342
      version = xmlParseVersionNum(ctxt);
9963
342
      if (RAW != '\'') {
9964
11
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9965
11
      } else
9966
331
          NEXT;
9967
342
  } else {
9968
23
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9969
23
  }
9970
2.37k
    }
9971
5.16k
    return(version);
9972
5.19k
}
9973
9974
/**
9975
 * Parse the XML encoding name
9976
 *
9977
 * @deprecated Internal function, don't use.
9978
 *
9979
 *     [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9980
 *
9981
 * @param ctxt  an XML parser context
9982
 * @returns the encoding name value or NULL
9983
 */
9984
xmlChar *
9985
3.25k
xmlParseEncName(xmlParserCtxt *ctxt) {
9986
3.25k
    xmlChar *buf = NULL;
9987
3.25k
    int len = 0;
9988
3.25k
    int size = 10;
9989
3.25k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9990
0
                    XML_MAX_TEXT_LENGTH :
9991
3.25k
                    XML_MAX_NAME_LENGTH;
9992
3.25k
    xmlChar cur;
9993
9994
3.25k
    cur = CUR;
9995
3.25k
    if (((cur >= 'a') && (cur <= 'z')) ||
9996
3.25k
        ((cur >= 'A') && (cur <= 'Z'))) {
9997
3.22k
  buf = xmlMalloc(size);
9998
3.22k
  if (buf == NULL) {
9999
0
      xmlErrMemory(ctxt);
10000
0
      return(NULL);
10001
0
  }
10002
10003
3.22k
  buf[len++] = cur;
10004
3.22k
  NEXT;
10005
3.22k
  cur = CUR;
10006
263k
  while (((cur >= 'a') && (cur <= 'z')) ||
10007
263k
         ((cur >= 'A') && (cur <= 'Z')) ||
10008
263k
         ((cur >= '0') && (cur <= '9')) ||
10009
263k
         (cur == '.') || (cur == '_') ||
10010
263k
         (cur == '-')) {
10011
260k
      if (len + 1 >= size) {
10012
1.39k
          xmlChar *tmp;
10013
1.39k
                int newSize;
10014
10015
1.39k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10016
1.39k
                if (newSize < 0) {
10017
2
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10018
2
                    xmlFree(buf);
10019
2
                    return(NULL);
10020
2
                }
10021
1.38k
    tmp = xmlRealloc(buf, newSize);
10022
1.38k
    if (tmp == NULL) {
10023
0
        xmlErrMemory(ctxt);
10024
0
        xmlFree(buf);
10025
0
        return(NULL);
10026
0
    }
10027
1.38k
    buf = tmp;
10028
1.38k
                size = newSize;
10029
1.38k
      }
10030
260k
      buf[len++] = cur;
10031
260k
      NEXT;
10032
260k
      cur = CUR;
10033
260k
        }
10034
3.22k
  buf[len] = 0;
10035
3.22k
    } else {
10036
23
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10037
23
    }
10038
3.25k
    return(buf);
10039
3.25k
}
10040
10041
/**
10042
 * Parse the XML encoding declaration
10043
 *
10044
 * @deprecated Internal function, don't use.
10045
 *
10046
 *     [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | 
10047
 *                           "'" EncName "'")
10048
 *
10049
 * this setups the conversion filters.
10050
 *
10051
 * @param ctxt  an XML parser context
10052
 * @returns the encoding value or NULL
10053
 */
10054
10055
const xmlChar *
10056
5.17k
xmlParseEncodingDecl(xmlParserCtxt *ctxt) {
10057
5.17k
    xmlChar *encoding = NULL;
10058
10059
5.17k
    SKIP_BLANKS;
10060
5.17k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10061
1.81k
        return(NULL);
10062
10063
3.36k
    SKIP(8);
10064
3.36k
    SKIP_BLANKS;
10065
3.36k
    if (RAW != '=') {
10066
21
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10067
21
        return(NULL);
10068
21
    }
10069
3.34k
    NEXT;
10070
3.34k
    SKIP_BLANKS;
10071
3.34k
    if (RAW == '"') {
10072
2.88k
        NEXT;
10073
2.88k
        encoding = xmlParseEncName(ctxt);
10074
2.88k
        if (RAW != '"') {
10075
164
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10076
164
            xmlFree(encoding);
10077
164
            return(NULL);
10078
164
        } else
10079
2.71k
            NEXT;
10080
2.88k
    } else if (RAW == '\''){
10081
369
        NEXT;
10082
369
        encoding = xmlParseEncName(ctxt);
10083
369
        if (RAW != '\'') {
10084
12
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10085
12
            xmlFree(encoding);
10086
12
            return(NULL);
10087
12
        } else
10088
357
            NEXT;
10089
369
    } else {
10090
89
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10091
89
    }
10092
10093
3.16k
    if (encoding == NULL)
10094
92
        return(NULL);
10095
10096
3.07k
    xmlSetDeclaredEncoding(ctxt, encoding);
10097
10098
3.07k
    return(ctxt->encoding);
10099
3.16k
}
10100
10101
/**
10102
 * Parse the XML standalone declaration
10103
 *
10104
 * @deprecated Internal function, don't use.
10105
 *
10106
 *     [32] SDDecl ::= S 'standalone' Eq
10107
 *                     (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10108
 *
10109
 * [ VC: Standalone Document Declaration ]
10110
 * TODO The standalone document declaration must have the value "no"
10111
 * if any external markup declarations contain declarations of:
10112
 *  - attributes with default values, if elements to which these
10113
 *    attributes apply appear in the document without specifications
10114
 *    of values for these attributes, or
10115
 *  - entities (other than amp, lt, gt, apos, quot), if references
10116
 *    to those entities appear in the document, or
10117
 *  - attributes with values subject to normalization, where the
10118
 *    attribute appears in the document with a value which will change
10119
 *    as a result of normalization, or
10120
 *  - element types with element content, if white space occurs directly
10121
 *    within any instance of those types.
10122
 *
10123
 * @param ctxt  an XML parser context
10124
 * @returns
10125
 *   1 if standalone="yes"
10126
 *   0 if standalone="no"
10127
 *  -2 if standalone attribute is missing or invalid
10128
 *    (A standalone value of -2 means that the XML declaration was found,
10129
 *     but no value was specified for the standalone attribute).
10130
 */
10131
10132
int
10133
4.05k
xmlParseSDDecl(xmlParserCtxt *ctxt) {
10134
4.05k
    int standalone = -2;
10135
10136
4.05k
    SKIP_BLANKS;
10137
4.05k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10138
791
  SKIP(10);
10139
791
        SKIP_BLANKS;
10140
791
  if (RAW != '=') {
10141
120
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10142
120
      return(standalone);
10143
120
        }
10144
671
  NEXT;
10145
671
  SKIP_BLANKS;
10146
671
        if (RAW == '\''){
10147
322
      NEXT;
10148
322
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10149
306
          standalone = 0;
10150
306
                SKIP(2);
10151
306
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10152
16
                 (NXT(2) == 's')) {
10153
6
          standalone = 1;
10154
6
    SKIP(3);
10155
10
            } else {
10156
10
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10157
10
      }
10158
322
      if (RAW != '\'') {
10159
18
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10160
18
      } else
10161
304
          NEXT;
10162
349
  } else if (RAW == '"'){
10163
344
      NEXT;
10164
344
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10165
281
          standalone = 0;
10166
281
    SKIP(2);
10167
281
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10168
63
                 (NXT(2) == 's')) {
10169
49
          standalone = 1;
10170
49
                SKIP(3);
10171
49
            } else {
10172
14
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10173
14
      }
10174
344
      if (RAW != '"') {
10175
22
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10176
22
      } else
10177
322
          NEXT;
10178
344
  } else {
10179
5
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10180
5
        }
10181
671
    }
10182
3.93k
    return(standalone);
10183
4.05k
}
10184
10185
/**
10186
 * Parse an XML declaration header
10187
 *
10188
 * @deprecated Internal function, don't use.
10189
 *
10190
 *     [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10191
 * @param ctxt  an XML parser context
10192
 */
10193
10194
void
10195
5.19k
xmlParseXMLDecl(xmlParserCtxt *ctxt) {
10196
5.19k
    xmlChar *version;
10197
10198
    /*
10199
     * This value for standalone indicates that the document has an
10200
     * XML declaration but it does not have a standalone attribute.
10201
     * It will be overwritten later if a standalone attribute is found.
10202
     */
10203
10204
5.19k
    ctxt->standalone = -2;
10205
10206
    /*
10207
     * We know that '<?xml' is here.
10208
     */
10209
5.19k
    SKIP(5);
10210
10211
5.19k
    if (!IS_BLANK_CH(RAW)) {
10212
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10213
0
                 "Blank needed after '<?xml'\n");
10214
0
    }
10215
5.19k
    SKIP_BLANKS;
10216
10217
    /*
10218
     * We must have the VersionInfo here.
10219
     */
10220
5.19k
    version = xmlParseVersionInfo(ctxt);
10221
5.19k
    if (version == NULL) {
10222
2.99k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10223
2.99k
    } else {
10224
2.19k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10225
      /*
10226
       * Changed here for XML-1.0 5th edition
10227
       */
10228
1.08k
      if (ctxt->options & XML_PARSE_OLD10) {
10229
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10230
0
                "Unsupported version '%s'\n",
10231
0
                version);
10232
1.08k
      } else {
10233
1.08k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10234
807
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10235
807
                      "Unsupported version '%s'\n",
10236
807
          version, NULL);
10237
807
    } else {
10238
279
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10239
279
              "Unsupported version '%s'\n",
10240
279
              version);
10241
279
    }
10242
1.08k
      }
10243
1.08k
  }
10244
2.19k
  if (ctxt->version != NULL)
10245
0
      xmlFree(ctxt->version);
10246
2.19k
  ctxt->version = version;
10247
2.19k
    }
10248
10249
    /*
10250
     * We may have the encoding declaration
10251
     */
10252
5.19k
    if (!IS_BLANK_CH(RAW)) {
10253
2.97k
        if ((RAW == '?') && (NXT(1) == '>')) {
10254
18
      SKIP(2);
10255
18
      return;
10256
18
  }
10257
2.95k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10258
2.95k
    }
10259
5.17k
    xmlParseEncodingDecl(ctxt);
10260
10261
    /*
10262
     * We may have the standalone status.
10263
     */
10264
5.17k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10265
2.14k
        if ((RAW == '?') && (NXT(1) == '>')) {
10266
1.11k
      SKIP(2);
10267
1.11k
      return;
10268
1.11k
  }
10269
1.02k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10270
1.02k
    }
10271
10272
    /*
10273
     * We can grow the input buffer freely at that point
10274
     */
10275
4.05k
    GROW;
10276
10277
4.05k
    SKIP_BLANKS;
10278
4.05k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10279
10280
4.05k
    SKIP_BLANKS;
10281
4.05k
    if ((RAW == '?') && (NXT(1) == '>')) {
10282
713
        SKIP(2);
10283
3.34k
    } else if (RAW == '>') {
10284
        /* Deprecated old WD ... */
10285
379
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10286
379
  NEXT;
10287
2.96k
    } else {
10288
2.96k
        int c;
10289
10290
2.96k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10291
1.48M
        while ((PARSER_STOPPED(ctxt) == 0) &&
10292
1.48M
               ((c = CUR) != 0)) {
10293
1.48M
            NEXT;
10294
1.48M
            if (c == '>')
10295
2.12k
                break;
10296
1.48M
        }
10297
2.96k
    }
10298
4.05k
}
10299
10300
/**
10301
 * @since 2.14.0
10302
 *
10303
 * @param ctxt  parser context
10304
 * @returns the version from the XML declaration.
10305
 */
10306
const xmlChar *
10307
0
xmlCtxtGetVersion(xmlParserCtxt *ctxt) {
10308
0
    if (ctxt == NULL)
10309
0
        return(NULL);
10310
10311
0
    return(ctxt->version);
10312
0
}
10313
10314
/**
10315
 * @since 2.14.0
10316
 *
10317
 * @param ctxt  parser context
10318
 * @returns the value from the standalone document declaration.
10319
 */
10320
int
10321
0
xmlCtxtGetStandalone(xmlParserCtxt *ctxt) {
10322
0
    if (ctxt == NULL)
10323
0
        return(0);
10324
10325
0
    return(ctxt->standalone);
10326
0
}
10327
10328
/**
10329
 * Parse an XML Misc* optional field.
10330
 *
10331
 * @deprecated Internal function, don't use.
10332
 *
10333
 *     [27] Misc ::= Comment | PI |  S
10334
 * @param ctxt  an XML parser context
10335
 */
10336
10337
void
10338
66.0k
xmlParseMisc(xmlParserCtxt *ctxt) {
10339
74.1k
    while (PARSER_STOPPED(ctxt) == 0) {
10340
73.6k
        SKIP_BLANKS;
10341
73.6k
        GROW;
10342
73.6k
        if ((RAW == '<') && (NXT(1) == '?')) {
10343
5.49k
      xmlParsePI(ctxt);
10344
68.1k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10345
2.57k
      xmlParseComment(ctxt);
10346
65.6k
        } else {
10347
65.6k
            break;
10348
65.6k
        }
10349
73.6k
    }
10350
66.0k
}
10351
10352
static void
10353
32.5k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10354
32.5k
    xmlDocPtr doc;
10355
10356
    /*
10357
     * SAX: end of the document processing.
10358
     */
10359
32.5k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10360
32.5k
        ctxt->sax->endDocument(ctxt->userData);
10361
10362
    /*
10363
     * Remove locally kept entity definitions if the tree was not built
10364
     */
10365
32.5k
    doc = ctxt->myDoc;
10366
32.5k
    if ((doc != NULL) &&
10367
32.5k
        (xmlStrEqual(doc->version, SAX_COMPAT_MODE))) {
10368
1.35k
        xmlFreeDoc(doc);
10369
1.35k
        ctxt->myDoc = NULL;
10370
1.35k
    }
10371
32.5k
}
10372
10373
/**
10374
 * Parse an XML document and invoke the SAX handlers. This is useful
10375
 * if you're only interested in custom SAX callbacks. If you want a
10376
 * document tree, use #xmlCtxtParseDocument.
10377
 *
10378
 * @param ctxt  an XML parser context
10379
 * @returns 0, -1 in case of error.
10380
 */
10381
10382
int
10383
39.9k
xmlParseDocument(xmlParserCtxt *ctxt) {
10384
39.9k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10385
0
        return(-1);
10386
10387
39.9k
    GROW;
10388
10389
    /*
10390
     * SAX: detecting the level.
10391
     */
10392
39.9k
    xmlCtxtInitializeLate(ctxt);
10393
10394
39.9k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10395
39.9k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10396
39.9k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10397
39.9k
    }
10398
10399
39.9k
    xmlDetectEncoding(ctxt);
10400
10401
39.9k
    if (CUR == 0) {
10402
7.42k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10403
7.42k
  return(-1);
10404
7.42k
    }
10405
10406
32.5k
    GROW;
10407
32.5k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10408
10409
  /*
10410
   * Note that we will switch encoding on the fly.
10411
   */
10412
5.19k
  xmlParseXMLDecl(ctxt);
10413
5.19k
  SKIP_BLANKS;
10414
27.3k
    } else {
10415
27.3k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10416
27.3k
        if (ctxt->version == NULL) {
10417
0
            xmlErrMemory(ctxt);
10418
0
            return(-1);
10419
0
        }
10420
27.3k
    }
10421
32.5k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10422
28.8k
        ctxt->sax->startDocument(ctxt->userData);
10423
32.5k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10424
32.5k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10425
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10426
0
    }
10427
10428
    /*
10429
     * The Misc part of the Prolog
10430
     */
10431
32.5k
    xmlParseMisc(ctxt);
10432
10433
    /*
10434
     * Then possibly doc type declaration(s) and more Misc
10435
     * (doctypedecl Misc*)?
10436
     */
10437
32.5k
    GROW;
10438
32.5k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10439
10440
11.3k
  ctxt->inSubset = 1;
10441
11.3k
  xmlParseDocTypeDecl(ctxt);
10442
11.3k
  if (RAW == '[') {
10443
10.6k
      xmlParseInternalSubset(ctxt);
10444
10.6k
  } else if (RAW == '>') {
10445
380
            NEXT;
10446
380
        }
10447
10448
  /*
10449
   * Create and update the external subset.
10450
   */
10451
11.3k
  ctxt->inSubset = 2;
10452
11.3k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10453
11.3k
      (!ctxt->disableSAX))
10454
2.54k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10455
2.54k
                                ctxt->extSubSystem, ctxt->extSubURI);
10456
11.3k
  ctxt->inSubset = 0;
10457
10458
11.3k
        xmlCleanSpecialAttr(ctxt);
10459
10460
11.3k
  xmlParseMisc(ctxt);
10461
11.3k
    }
10462
10463
    /*
10464
     * Time to start parsing the tree itself
10465
     */
10466
32.5k
    GROW;
10467
32.5k
    if (RAW != '<') {
10468
10.4k
        if (ctxt->wellFormed)
10469
1.16k
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10470
1.16k
                           "Start tag expected, '<' not found\n");
10471
22.1k
    } else {
10472
22.1k
  xmlParseElement(ctxt);
10473
10474
  /*
10475
   * The Misc part at the end
10476
   */
10477
22.1k
  xmlParseMisc(ctxt);
10478
10479
22.1k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10480
22.1k
    }
10481
10482
32.5k
    ctxt->instate = XML_PARSER_EOF;
10483
32.5k
    xmlFinishDocument(ctxt);
10484
10485
32.5k
    if (! ctxt->wellFormed) {
10486
32.4k
  ctxt->valid = 0;
10487
32.4k
  return(-1);
10488
32.4k
    }
10489
10490
79
    return(0);
10491
32.5k
}
10492
10493
/**
10494
 * Parse a general parsed entity
10495
 * An external general parsed entity is well-formed if it matches the
10496
 * production labeled extParsedEnt.
10497
 *
10498
 * @deprecated Internal function, don't use.
10499
 *
10500
 *     [78] extParsedEnt ::= TextDecl? content
10501
 *
10502
 * @param ctxt  an XML parser context
10503
 * @returns 0, -1 in case of error. the parser context is augmented
10504
 *                as a result of the parsing.
10505
 */
10506
10507
int
10508
0
xmlParseExtParsedEnt(xmlParserCtxt *ctxt) {
10509
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10510
0
        return(-1);
10511
10512
0
    xmlCtxtInitializeLate(ctxt);
10513
10514
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10515
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10516
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10517
0
    }
10518
10519
0
    xmlDetectEncoding(ctxt);
10520
10521
0
    if (CUR == 0) {
10522
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10523
0
    }
10524
10525
    /*
10526
     * Check for the XMLDecl in the Prolog.
10527
     */
10528
0
    GROW;
10529
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10530
10531
  /*
10532
   * Note that we will switch encoding on the fly.
10533
   */
10534
0
  xmlParseXMLDecl(ctxt);
10535
0
  SKIP_BLANKS;
10536
0
    } else {
10537
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10538
0
    }
10539
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10540
0
        ctxt->sax->startDocument(ctxt->userData);
10541
10542
    /*
10543
     * Doing validity checking on chunk doesn't make sense
10544
     */
10545
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10546
0
    ctxt->validate = 0;
10547
0
    ctxt->depth = 0;
10548
10549
0
    xmlParseContentInternal(ctxt);
10550
10551
0
    if (ctxt->input->cur < ctxt->input->end)
10552
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10553
10554
    /*
10555
     * SAX: end of the document processing.
10556
     */
10557
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10558
0
        ctxt->sax->endDocument(ctxt->userData);
10559
10560
0
    if (! ctxt->wellFormed) return(-1);
10561
0
    return(0);
10562
0
}
10563
10564
#ifdef LIBXML_PUSH_ENABLED
10565
/************************************************************************
10566
 *                  *
10567
 *    Progressive parsing interfaces        *
10568
 *                  *
10569
 ************************************************************************/
10570
10571
/**
10572
 * Check whether the input buffer contains a character.
10573
 *
10574
 * @param ctxt  an XML parser context
10575
 * @param c  character
10576
 */
10577
static int
10578
0
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10579
0
    const xmlChar *cur;
10580
10581
0
    if (ctxt->checkIndex == 0) {
10582
0
        cur = ctxt->input->cur + 1;
10583
0
    } else {
10584
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10585
0
    }
10586
10587
0
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10588
0
        size_t index = ctxt->input->end - ctxt->input->cur;
10589
10590
0
        if (index > LONG_MAX) {
10591
0
            ctxt->checkIndex = 0;
10592
0
            return(1);
10593
0
        }
10594
0
        ctxt->checkIndex = index;
10595
0
        return(0);
10596
0
    } else {
10597
0
        ctxt->checkIndex = 0;
10598
0
        return(1);
10599
0
    }
10600
0
}
10601
10602
/**
10603
 * Check whether the input buffer contains a string.
10604
 *
10605
 * @param ctxt  an XML parser context
10606
 * @param startDelta  delta to apply at the start
10607
 * @param str  string
10608
 * @param strLen  length of string
10609
 */
10610
static const xmlChar *
10611
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10612
0
                     const char *str, size_t strLen) {
10613
0
    const xmlChar *cur, *term;
10614
10615
0
    if (ctxt->checkIndex == 0) {
10616
0
        cur = ctxt->input->cur + startDelta;
10617
0
    } else {
10618
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10619
0
    }
10620
10621
0
    term = BAD_CAST strstr((const char *) cur, str);
10622
0
    if (term == NULL) {
10623
0
        const xmlChar *end = ctxt->input->end;
10624
0
        size_t index;
10625
10626
        /* Rescan (strLen - 1) characters. */
10627
0
        if ((size_t) (end - cur) < strLen)
10628
0
            end = cur;
10629
0
        else
10630
0
            end -= strLen - 1;
10631
0
        index = end - ctxt->input->cur;
10632
0
        if (index > LONG_MAX) {
10633
0
            ctxt->checkIndex = 0;
10634
0
            return(ctxt->input->end - strLen);
10635
0
        }
10636
0
        ctxt->checkIndex = index;
10637
0
    } else {
10638
0
        ctxt->checkIndex = 0;
10639
0
    }
10640
10641
0
    return(term);
10642
0
}
10643
10644
/**
10645
 * Check whether the input buffer contains terminated char data.
10646
 *
10647
 * @param ctxt  an XML parser context
10648
 */
10649
static int
10650
0
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10651
0
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10652
0
    const xmlChar *end = ctxt->input->end;
10653
0
    size_t index;
10654
10655
0
    while (cur < end) {
10656
0
        if ((*cur == '<') || (*cur == '&')) {
10657
0
            ctxt->checkIndex = 0;
10658
0
            return(1);
10659
0
        }
10660
0
        cur++;
10661
0
    }
10662
10663
0
    index = cur - ctxt->input->cur;
10664
0
    if (index > LONG_MAX) {
10665
0
        ctxt->checkIndex = 0;
10666
0
        return(1);
10667
0
    }
10668
0
    ctxt->checkIndex = index;
10669
0
    return(0);
10670
0
}
10671
10672
/**
10673
 * Check whether there's enough data in the input buffer to finish parsing
10674
 * a start tag. This has to take quotes into account.
10675
 *
10676
 * @param ctxt  an XML parser context
10677
 */
10678
static int
10679
0
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10680
0
    const xmlChar *cur;
10681
0
    const xmlChar *end = ctxt->input->end;
10682
0
    int state = ctxt->endCheckState;
10683
0
    size_t index;
10684
10685
0
    if (ctxt->checkIndex == 0)
10686
0
        cur = ctxt->input->cur + 1;
10687
0
    else
10688
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10689
10690
0
    while (cur < end) {
10691
0
        if (state) {
10692
0
            if (*cur == state)
10693
0
                state = 0;
10694
0
        } else if (*cur == '\'' || *cur == '"') {
10695
0
            state = *cur;
10696
0
        } else if (*cur == '>') {
10697
0
            ctxt->checkIndex = 0;
10698
0
            ctxt->endCheckState = 0;
10699
0
            return(1);
10700
0
        }
10701
0
        cur++;
10702
0
    }
10703
10704
0
    index = cur - ctxt->input->cur;
10705
0
    if (index > LONG_MAX) {
10706
0
        ctxt->checkIndex = 0;
10707
0
        ctxt->endCheckState = 0;
10708
0
        return(1);
10709
0
    }
10710
0
    ctxt->checkIndex = index;
10711
0
    ctxt->endCheckState = state;
10712
0
    return(0);
10713
0
}
10714
10715
/**
10716
 * Check whether there's enough data in the input buffer to finish parsing
10717
 * the internal subset.
10718
 *
10719
 * @param ctxt  an XML parser context
10720
 */
10721
static int
10722
0
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10723
    /*
10724
     * Sorry, but progressive parsing of the internal subset is not
10725
     * supported. We first check that the full content of the internal
10726
     * subset is available and parsing is launched only at that point.
10727
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10728
     * not in a ']]>' sequence which are conditional sections.
10729
     */
10730
0
    const xmlChar *cur, *start;
10731
0
    const xmlChar *end = ctxt->input->end;
10732
0
    int state = ctxt->endCheckState;
10733
0
    size_t index;
10734
10735
0
    if (ctxt->checkIndex == 0) {
10736
0
        cur = ctxt->input->cur + 1;
10737
0
    } else {
10738
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10739
0
    }
10740
0
    start = cur;
10741
10742
0
    while (cur < end) {
10743
0
        if (state == '-') {
10744
0
            if ((*cur == '-') &&
10745
0
                (cur[1] == '-') &&
10746
0
                (cur[2] == '>')) {
10747
0
                state = 0;
10748
0
                cur += 3;
10749
0
                start = cur;
10750
0
                continue;
10751
0
            }
10752
0
        }
10753
0
        else if (state == ']') {
10754
0
            if (*cur == '>') {
10755
0
                ctxt->checkIndex = 0;
10756
0
                ctxt->endCheckState = 0;
10757
0
                return(1);
10758
0
            }
10759
0
            if (IS_BLANK_CH(*cur)) {
10760
0
                state = ' ';
10761
0
            } else if (*cur != ']') {
10762
0
                state = 0;
10763
0
                start = cur;
10764
0
                continue;
10765
0
            }
10766
0
        }
10767
0
        else if (state == ' ') {
10768
0
            if (*cur == '>') {
10769
0
                ctxt->checkIndex = 0;
10770
0
                ctxt->endCheckState = 0;
10771
0
                return(1);
10772
0
            }
10773
0
            if (!IS_BLANK_CH(*cur)) {
10774
0
                state = 0;
10775
0
                start = cur;
10776
0
                continue;
10777
0
            }
10778
0
        }
10779
0
        else if (state != 0) {
10780
0
            if (*cur == state) {
10781
0
                state = 0;
10782
0
                start = cur + 1;
10783
0
            }
10784
0
        }
10785
0
        else if (*cur == '<') {
10786
0
            if ((cur[1] == '!') &&
10787
0
                (cur[2] == '-') &&
10788
0
                (cur[3] == '-')) {
10789
0
                state = '-';
10790
0
                cur += 4;
10791
                /* Don't treat <!--> as comment */
10792
0
                start = cur;
10793
0
                continue;
10794
0
            }
10795
0
        }
10796
0
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10797
0
            state = *cur;
10798
0
        }
10799
10800
0
        cur++;
10801
0
    }
10802
10803
    /*
10804
     * Rescan the three last characters to detect "<!--" and "-->"
10805
     * split across chunks.
10806
     */
10807
0
    if ((state == 0) || (state == '-')) {
10808
0
        if (cur - start < 3)
10809
0
            cur = start;
10810
0
        else
10811
0
            cur -= 3;
10812
0
    }
10813
0
    index = cur - ctxt->input->cur;
10814
0
    if (index > LONG_MAX) {
10815
0
        ctxt->checkIndex = 0;
10816
0
        ctxt->endCheckState = 0;
10817
0
        return(1);
10818
0
    }
10819
0
    ctxt->checkIndex = index;
10820
0
    ctxt->endCheckState = state;
10821
0
    return(0);
10822
0
}
10823
10824
/**
10825
 * Try to progress on parsing
10826
 *
10827
 * @param ctxt  an XML parser context
10828
 * @param terminate  last chunk indicator
10829
 * @returns zero if no parsing was possible
10830
 */
10831
static int
10832
0
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10833
0
    int ret = 0;
10834
0
    size_t avail;
10835
0
    xmlChar cur, next;
10836
10837
0
    if (ctxt->input == NULL)
10838
0
        return(0);
10839
10840
0
    if ((ctxt->input != NULL) &&
10841
0
        (ctxt->input->cur - ctxt->input->base > 4096)) {
10842
0
        xmlParserShrink(ctxt);
10843
0
    }
10844
10845
0
    while (ctxt->disableSAX == 0) {
10846
0
        avail = ctxt->input->end - ctxt->input->cur;
10847
0
        if (avail < 1)
10848
0
      goto done;
10849
0
        switch (ctxt->instate) {
10850
0
            case XML_PARSER_EOF:
10851
          /*
10852
     * Document parsing is done !
10853
     */
10854
0
          goto done;
10855
0
            case XML_PARSER_START:
10856
                /*
10857
                 * Very first chars read from the document flow.
10858
                 */
10859
0
                if ((!terminate) && (avail < 4))
10860
0
                    goto done;
10861
10862
                /*
10863
                 * We need more bytes to detect EBCDIC code pages.
10864
                 * See xmlDetectEBCDIC.
10865
                 */
10866
0
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
10867
0
                    (!terminate) && (avail < 200))
10868
0
                    goto done;
10869
10870
0
                xmlDetectEncoding(ctxt);
10871
0
                ctxt->instate = XML_PARSER_XML_DECL;
10872
0
    break;
10873
10874
0
            case XML_PARSER_XML_DECL:
10875
0
    if ((!terminate) && (avail < 2))
10876
0
        goto done;
10877
0
    cur = ctxt->input->cur[0];
10878
0
    next = ctxt->input->cur[1];
10879
0
          if ((cur == '<') && (next == '?')) {
10880
        /* PI or XML decl */
10881
0
        if ((!terminate) &&
10882
0
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
10883
0
      goto done;
10884
0
        if ((ctxt->input->cur[2] == 'x') &&
10885
0
      (ctxt->input->cur[3] == 'm') &&
10886
0
      (ctxt->input->cur[4] == 'l') &&
10887
0
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
10888
0
      ret += 5;
10889
0
      xmlParseXMLDecl(ctxt);
10890
0
        } else {
10891
0
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10892
0
                        if (ctxt->version == NULL) {
10893
0
                            xmlErrMemory(ctxt);
10894
0
                            break;
10895
0
                        }
10896
0
        }
10897
0
    } else {
10898
0
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10899
0
        if (ctxt->version == NULL) {
10900
0
            xmlErrMemory(ctxt);
10901
0
      break;
10902
0
        }
10903
0
    }
10904
0
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10905
0
                    ctxt->sax->setDocumentLocator(ctxt->userData,
10906
0
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
10907
0
                }
10908
0
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10909
0
                    (!ctxt->disableSAX))
10910
0
                    ctxt->sax->startDocument(ctxt->userData);
10911
0
                ctxt->instate = XML_PARSER_MISC;
10912
0
    break;
10913
0
            case XML_PARSER_START_TAG: {
10914
0
          const xmlChar *name;
10915
0
    const xmlChar *prefix = NULL;
10916
0
    const xmlChar *URI = NULL;
10917
0
                int line = ctxt->input->line;
10918
0
    int nbNs = 0;
10919
10920
0
    if ((!terminate) && (avail < 2))
10921
0
        goto done;
10922
0
    cur = ctxt->input->cur[0];
10923
0
          if (cur != '<') {
10924
0
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10925
0
                                   "Start tag expected, '<' not found");
10926
0
                    ctxt->instate = XML_PARSER_EOF;
10927
0
                    xmlFinishDocument(ctxt);
10928
0
        goto done;
10929
0
    }
10930
0
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
10931
0
                    goto done;
10932
0
    if (ctxt->spaceNr == 0)
10933
0
        spacePush(ctxt, -1);
10934
0
    else if (*ctxt->space == -2)
10935
0
        spacePush(ctxt, -1);
10936
0
    else
10937
0
        spacePush(ctxt, *ctxt->space);
10938
0
#ifdef LIBXML_SAX1_ENABLED
10939
0
    if (ctxt->sax2)
10940
0
#endif /* LIBXML_SAX1_ENABLED */
10941
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10942
0
#ifdef LIBXML_SAX1_ENABLED
10943
0
    else
10944
0
        name = xmlParseStartTag(ctxt);
10945
0
#endif /* LIBXML_SAX1_ENABLED */
10946
0
    if (name == NULL) {
10947
0
        spacePop(ctxt);
10948
0
                    ctxt->instate = XML_PARSER_EOF;
10949
0
                    xmlFinishDocument(ctxt);
10950
0
        goto done;
10951
0
    }
10952
0
#ifdef LIBXML_VALID_ENABLED
10953
    /*
10954
     * [ VC: Root Element Type ]
10955
     * The Name in the document type declaration must match
10956
     * the element type of the root element.
10957
     */
10958
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10959
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10960
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10961
0
#endif /* LIBXML_VALID_ENABLED */
10962
10963
    /*
10964
     * Check for an Empty Element.
10965
     */
10966
0
    if ((RAW == '/') && (NXT(1) == '>')) {
10967
0
        SKIP(2);
10968
10969
0
        if (ctxt->sax2) {
10970
0
      if ((ctxt->sax != NULL) &&
10971
0
          (ctxt->sax->endElementNs != NULL) &&
10972
0
          (!ctxt->disableSAX))
10973
0
          ctxt->sax->endElementNs(ctxt->userData, name,
10974
0
                                  prefix, URI);
10975
0
      if (nbNs > 0)
10976
0
          xmlParserNsPop(ctxt, nbNs);
10977
0
#ifdef LIBXML_SAX1_ENABLED
10978
0
        } else {
10979
0
      if ((ctxt->sax != NULL) &&
10980
0
          (ctxt->sax->endElement != NULL) &&
10981
0
          (!ctxt->disableSAX))
10982
0
          ctxt->sax->endElement(ctxt->userData, name);
10983
0
#endif /* LIBXML_SAX1_ENABLED */
10984
0
        }
10985
0
        spacePop(ctxt);
10986
0
    } else if (RAW == '>') {
10987
0
        NEXT;
10988
0
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
10989
0
    } else {
10990
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10991
0
           "Couldn't find end of Start Tag %s\n",
10992
0
           name);
10993
0
        nodePop(ctxt);
10994
0
        spacePop(ctxt);
10995
0
                    if (nbNs > 0)
10996
0
                        xmlParserNsPop(ctxt, nbNs);
10997
0
    }
10998
10999
0
                if (ctxt->nameNr == 0)
11000
0
                    ctxt->instate = XML_PARSER_EPILOG;
11001
0
                else
11002
0
                    ctxt->instate = XML_PARSER_CONTENT;
11003
0
                break;
11004
0
      }
11005
0
            case XML_PARSER_CONTENT: {
11006
0
    cur = ctxt->input->cur[0];
11007
11008
0
    if (cur == '<') {
11009
0
                    if ((!terminate) && (avail < 2))
11010
0
                        goto done;
11011
0
        next = ctxt->input->cur[1];
11012
11013
0
                    if (next == '/') {
11014
0
                        ctxt->instate = XML_PARSER_END_TAG;
11015
0
                        break;
11016
0
                    } else if (next == '?') {
11017
0
                        if ((!terminate) &&
11018
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11019
0
                            goto done;
11020
0
                        xmlParsePI(ctxt);
11021
0
                        ctxt->instate = XML_PARSER_CONTENT;
11022
0
                        break;
11023
0
                    } else if (next == '!') {
11024
0
                        if ((!terminate) && (avail < 3))
11025
0
                            goto done;
11026
0
                        next = ctxt->input->cur[2];
11027
11028
0
                        if (next == '-') {
11029
0
                            if ((!terminate) && (avail < 4))
11030
0
                                goto done;
11031
0
                            if (ctxt->input->cur[3] == '-') {
11032
0
                                if ((!terminate) &&
11033
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11034
0
                                    goto done;
11035
0
                                xmlParseComment(ctxt);
11036
0
                                ctxt->instate = XML_PARSER_CONTENT;
11037
0
                                break;
11038
0
                            }
11039
0
                        } else if (next == '[') {
11040
0
                            if ((!terminate) && (avail < 9))
11041
0
                                goto done;
11042
0
                            if ((ctxt->input->cur[2] == '[') &&
11043
0
                                (ctxt->input->cur[3] == 'C') &&
11044
0
                                (ctxt->input->cur[4] == 'D') &&
11045
0
                                (ctxt->input->cur[5] == 'A') &&
11046
0
                                (ctxt->input->cur[6] == 'T') &&
11047
0
                                (ctxt->input->cur[7] == 'A') &&
11048
0
                                (ctxt->input->cur[8] == '[')) {
11049
0
                                if ((!terminate) &&
11050
0
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11051
0
                                    goto done;
11052
0
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11053
0
                                xmlParseCDSect(ctxt);
11054
0
                                ctxt->instate = XML_PARSER_CONTENT;
11055
0
                                break;
11056
0
                            }
11057
0
                        }
11058
0
                    }
11059
0
    } else if (cur == '&') {
11060
0
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11061
0
      goto done;
11062
0
        xmlParseReference(ctxt);
11063
0
                    break;
11064
0
    } else {
11065
        /* TODO Avoid the extra copy, handle directly !!! */
11066
        /*
11067
         * Goal of the following test is:
11068
         *  - minimize calls to the SAX 'character' callback
11069
         *    when they are mergeable
11070
         *  - handle an problem for isBlank when we only parse
11071
         *    a sequence of blank chars and the next one is
11072
         *    not available to check against '<' presence.
11073
         *  - tries to homogenize the differences in SAX
11074
         *    callbacks between the push and pull versions
11075
         *    of the parser.
11076
         */
11077
0
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11078
0
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11079
0
          goto done;
11080
0
                    }
11081
0
                    ctxt->checkIndex = 0;
11082
0
        xmlParseCharDataInternal(ctxt, !terminate);
11083
0
                    break;
11084
0
    }
11085
11086
0
                ctxt->instate = XML_PARSER_START_TAG;
11087
0
    break;
11088
0
      }
11089
0
            case XML_PARSER_END_TAG:
11090
0
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11091
0
        goto done;
11092
0
    if (ctxt->sax2) {
11093
0
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11094
0
        nameNsPop(ctxt);
11095
0
    }
11096
0
#ifdef LIBXML_SAX1_ENABLED
11097
0
      else
11098
0
        xmlParseEndTag1(ctxt, 0);
11099
0
#endif /* LIBXML_SAX1_ENABLED */
11100
0
    if (ctxt->nameNr == 0) {
11101
0
        ctxt->instate = XML_PARSER_EPILOG;
11102
0
    } else {
11103
0
        ctxt->instate = XML_PARSER_CONTENT;
11104
0
    }
11105
0
    break;
11106
0
            case XML_PARSER_MISC:
11107
0
            case XML_PARSER_PROLOG:
11108
0
            case XML_PARSER_EPILOG:
11109
0
    SKIP_BLANKS;
11110
0
                avail = ctxt->input->end - ctxt->input->cur;
11111
0
    if (avail < 1)
11112
0
        goto done;
11113
0
    if (ctxt->input->cur[0] == '<') {
11114
0
                    if ((!terminate) && (avail < 2))
11115
0
                        goto done;
11116
0
                    next = ctxt->input->cur[1];
11117
0
                    if (next == '?') {
11118
0
                        if ((!terminate) &&
11119
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11120
0
                            goto done;
11121
0
                        xmlParsePI(ctxt);
11122
0
                        break;
11123
0
                    } else if (next == '!') {
11124
0
                        if ((!terminate) && (avail < 3))
11125
0
                            goto done;
11126
11127
0
                        if (ctxt->input->cur[2] == '-') {
11128
0
                            if ((!terminate) && (avail < 4))
11129
0
                                goto done;
11130
0
                            if (ctxt->input->cur[3] == '-') {
11131
0
                                if ((!terminate) &&
11132
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11133
0
                                    goto done;
11134
0
                                xmlParseComment(ctxt);
11135
0
                                break;
11136
0
                            }
11137
0
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11138
0
                            if ((!terminate) && (avail < 9))
11139
0
                                goto done;
11140
0
                            if ((ctxt->input->cur[2] == 'D') &&
11141
0
                                (ctxt->input->cur[3] == 'O') &&
11142
0
                                (ctxt->input->cur[4] == 'C') &&
11143
0
                                (ctxt->input->cur[5] == 'T') &&
11144
0
                                (ctxt->input->cur[6] == 'Y') &&
11145
0
                                (ctxt->input->cur[7] == 'P') &&
11146
0
                                (ctxt->input->cur[8] == 'E')) {
11147
0
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11148
0
                                    goto done;
11149
0
                                ctxt->inSubset = 1;
11150
0
                                xmlParseDocTypeDecl(ctxt);
11151
0
                                if (RAW == '[') {
11152
0
                                    ctxt->instate = XML_PARSER_DTD;
11153
0
                                } else {
11154
0
                                    if (RAW == '>')
11155
0
                                        NEXT;
11156
                                    /*
11157
                                     * Create and update the external subset.
11158
                                     */
11159
0
                                    ctxt->inSubset = 2;
11160
0
                                    if ((ctxt->sax != NULL) &&
11161
0
                                        (!ctxt->disableSAX) &&
11162
0
                                        (ctxt->sax->externalSubset != NULL))
11163
0
                                        ctxt->sax->externalSubset(
11164
0
                                                ctxt->userData,
11165
0
                                                ctxt->intSubName,
11166
0
                                                ctxt->extSubSystem,
11167
0
                                                ctxt->extSubURI);
11168
0
                                    ctxt->inSubset = 0;
11169
0
                                    xmlCleanSpecialAttr(ctxt);
11170
0
                                    ctxt->instate = XML_PARSER_PROLOG;
11171
0
                                }
11172
0
                                break;
11173
0
                            }
11174
0
                        }
11175
0
                    }
11176
0
                }
11177
11178
0
                if (ctxt->instate == XML_PARSER_EPILOG) {
11179
0
                    if (ctxt->errNo == XML_ERR_OK)
11180
0
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11181
0
        ctxt->instate = XML_PARSER_EOF;
11182
0
                    xmlFinishDocument(ctxt);
11183
0
                } else {
11184
0
        ctxt->instate = XML_PARSER_START_TAG;
11185
0
    }
11186
0
    break;
11187
0
            case XML_PARSER_DTD: {
11188
0
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11189
0
                    goto done;
11190
0
    xmlParseInternalSubset(ctxt);
11191
0
    ctxt->inSubset = 2;
11192
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11193
0
        (ctxt->sax->externalSubset != NULL))
11194
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11195
0
          ctxt->extSubSystem, ctxt->extSubURI);
11196
0
    ctxt->inSubset = 0;
11197
0
    xmlCleanSpecialAttr(ctxt);
11198
0
    ctxt->instate = XML_PARSER_PROLOG;
11199
0
                break;
11200
0
      }
11201
0
            default:
11202
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11203
0
      "PP: internal error\n");
11204
0
    ctxt->instate = XML_PARSER_EOF;
11205
0
    break;
11206
0
  }
11207
0
    }
11208
0
done:
11209
0
    return(ret);
11210
0
}
11211
11212
/**
11213
 * Parse a chunk of memory in push parser mode.
11214
 *
11215
 * Assumes that the parser context was initialized with
11216
 * #xmlCreatePushParserCtxt.
11217
 *
11218
 * The last chunk, which will often be empty, must be marked with
11219
 * the `terminate` flag. With the default SAX callbacks, the resulting
11220
 * document will be available in ctxt->myDoc. This pointer will not
11221
 * be freed when calling #xmlFreeParserCtxt and must be freed by the
11222
 * caller. If the document isn't well-formed, it will still be returned
11223
 * in ctxt->myDoc.
11224
 *
11225
 * As an exception, #xmlCtxtResetPush will free the document in
11226
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11227
 * the document.
11228
 *
11229
 * Since 2.14.0, #xmlCtxtGetDocument can be used to retrieve the
11230
 * result document.
11231
 *
11232
 * @param ctxt  an XML parser context
11233
 * @param chunk  chunk of memory
11234
 * @param size  size of chunk in bytes
11235
 * @param terminate  last chunk indicator
11236
 * @returns an xmlParserErrors code (0 on success).
11237
 */
11238
int
11239
xmlParseChunk(xmlParserCtxt *ctxt, const char *chunk, int size,
11240
0
              int terminate) {
11241
0
    size_t curBase;
11242
0
    size_t maxLength;
11243
0
    size_t pos;
11244
0
    int end_in_lf = 0;
11245
0
    int res;
11246
11247
0
    if ((ctxt == NULL) || (size < 0))
11248
0
        return(XML_ERR_ARGUMENT);
11249
0
    if ((chunk == NULL) && (size > 0))
11250
0
        return(XML_ERR_ARGUMENT);
11251
0
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11252
0
        return(XML_ERR_ARGUMENT);
11253
0
    if (ctxt->disableSAX != 0)
11254
0
        return(ctxt->errNo);
11255
11256
0
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11257
0
    if (ctxt->instate == XML_PARSER_START)
11258
0
        xmlCtxtInitializeLate(ctxt);
11259
0
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11260
0
        (chunk[size - 1] == '\r')) {
11261
0
  end_in_lf = 1;
11262
0
  size--;
11263
0
    }
11264
11265
    /*
11266
     * Also push an empty chunk to make sure that the raw buffer
11267
     * will be flushed if there is an encoder.
11268
     */
11269
0
    pos = ctxt->input->cur - ctxt->input->base;
11270
0
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11271
0
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11272
0
    if (res < 0) {
11273
0
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11274
0
        return(ctxt->errNo);
11275
0
    }
11276
11277
0
    xmlParseTryOrFinish(ctxt, terminate);
11278
11279
0
    curBase = ctxt->input->cur - ctxt->input->base;
11280
0
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11281
0
                XML_MAX_HUGE_LENGTH :
11282
0
                XML_MAX_LOOKUP_LIMIT;
11283
0
    if (curBase > maxLength) {
11284
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11285
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11286
0
    }
11287
11288
0
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11289
0
        return(ctxt->errNo);
11290
11291
0
    if (end_in_lf == 1) {
11292
0
  pos = ctxt->input->cur - ctxt->input->base;
11293
0
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11294
0
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11295
0
        if (res < 0) {
11296
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11297
0
            return(ctxt->errNo);
11298
0
        }
11299
0
    }
11300
0
    if (terminate) {
11301
  /*
11302
   * Check for termination
11303
   */
11304
0
        if ((ctxt->instate != XML_PARSER_EOF) &&
11305
0
            (ctxt->instate != XML_PARSER_EPILOG)) {
11306
0
            if (ctxt->nameNr > 0) {
11307
0
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11308
0
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11309
0
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11310
0
                        "Premature end of data in tag %s line %d\n",
11311
0
                        name, line, NULL);
11312
0
            } else if (ctxt->instate == XML_PARSER_START) {
11313
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11314
0
            } else {
11315
0
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11316
0
                               "Start tag expected, '<' not found\n");
11317
0
            }
11318
0
        } else {
11319
0
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11320
0
        }
11321
0
  if (ctxt->instate != XML_PARSER_EOF) {
11322
0
            ctxt->instate = XML_PARSER_EOF;
11323
0
            xmlFinishDocument(ctxt);
11324
0
  }
11325
0
    }
11326
0
    if (ctxt->wellFormed == 0)
11327
0
  return((xmlParserErrors) ctxt->errNo);
11328
0
    else
11329
0
        return(0);
11330
0
}
11331
11332
/************************************************************************
11333
 *                  *
11334
 *    I/O front end functions to the parser     *
11335
 *                  *
11336
 ************************************************************************/
11337
11338
/**
11339
 * Create a parser context for using the XML parser in push mode.
11340
 * See #xmlParseChunk.
11341
 *
11342
 * Passing an initial chunk is useless and deprecated.
11343
 *
11344
 * The push parser doesn't support recovery mode or the
11345
 * XML_PARSE_NOBLANKS option.
11346
 *
11347
 * `filename` is used as base URI to fetch external entities and for
11348
 * error reports.
11349
 *
11350
 * @param sax  a SAX handler (optional)
11351
 * @param user_data  user data for SAX callbacks (optional)
11352
 * @param chunk  initial chunk (optional, deprecated)
11353
 * @param size  size of initial chunk in bytes
11354
 * @param filename  file name or URI (optional)
11355
 * @returns the new parser context or NULL if a memory allocation
11356
 * failed.
11357
 */
11358
11359
xmlParserCtxt *
11360
xmlCreatePushParserCtxt(xmlSAXHandler *sax, void *user_data,
11361
0
                        const char *chunk, int size, const char *filename) {
11362
0
    xmlParserCtxtPtr ctxt;
11363
0
    xmlParserInputPtr input;
11364
11365
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11366
0
    if (ctxt == NULL)
11367
0
  return(NULL);
11368
11369
0
    ctxt->options &= ~XML_PARSE_NODICT;
11370
0
    ctxt->dictNames = 1;
11371
11372
0
    input = xmlNewPushInput(filename, chunk, size);
11373
0
    if (input == NULL) {
11374
0
  xmlFreeParserCtxt(ctxt);
11375
0
  return(NULL);
11376
0
    }
11377
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11378
0
        xmlFreeInputStream(input);
11379
0
        xmlFreeParserCtxt(ctxt);
11380
0
        return(NULL);
11381
0
    }
11382
11383
0
    return(ctxt);
11384
0
}
11385
#endif /* LIBXML_PUSH_ENABLED */
11386
11387
/**
11388
 * Blocks further parser processing
11389
 *
11390
 * @param ctxt  an XML parser context
11391
 */
11392
void
11393
0
xmlStopParser(xmlParserCtxt *ctxt) {
11394
0
    if (ctxt == NULL)
11395
0
        return;
11396
11397
    /* This stops the parser */
11398
0
    ctxt->disableSAX = 2;
11399
11400
    /*
11401
     * xmlStopParser is often called from error handlers,
11402
     * so we can't raise an error here to avoid infinite
11403
     * loops. Just make sure that an error condition is
11404
     * reported.
11405
     */
11406
0
    if (ctxt->errNo == XML_ERR_OK) {
11407
0
        ctxt->errNo = XML_ERR_USER_STOP;
11408
0
        ctxt->lastError.code = XML_ERR_USER_STOP;
11409
0
        ctxt->wellFormed = 0;
11410
0
    }
11411
0
}
11412
11413
/**
11414
 * Create a parser context for using the XML parser with an existing
11415
 * I/O stream
11416
 *
11417
 * @param sax  a SAX handler (optional)
11418
 * @param user_data  user data for SAX callbacks (optional)
11419
 * @param ioread  an I/O read function
11420
 * @param ioclose  an I/O close function (optional)
11421
 * @param ioctx  an I/O handler
11422
 * @param enc  the charset encoding if known (deprecated)
11423
 * @returns the new parser context or NULL
11424
 */
11425
xmlParserCtxt *
11426
xmlCreateIOParserCtxt(xmlSAXHandler *sax, void *user_data,
11427
                      xmlInputReadCallback ioread,
11428
                      xmlInputCloseCallback ioclose,
11429
0
                      void *ioctx, xmlCharEncoding enc) {
11430
0
    xmlParserCtxtPtr ctxt;
11431
0
    xmlParserInputPtr input;
11432
0
    const char *encoding;
11433
11434
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11435
0
    if (ctxt == NULL)
11436
0
  return(NULL);
11437
11438
0
    encoding = xmlGetCharEncodingName(enc);
11439
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11440
0
                                  encoding, 0);
11441
0
    if (input == NULL) {
11442
0
  xmlFreeParserCtxt(ctxt);
11443
0
        return (NULL);
11444
0
    }
11445
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11446
0
        xmlFreeInputStream(input);
11447
0
        xmlFreeParserCtxt(ctxt);
11448
0
        return(NULL);
11449
0
    }
11450
11451
0
    return(ctxt);
11452
0
}
11453
11454
#ifdef LIBXML_VALID_ENABLED
11455
/************************************************************************
11456
 *                  *
11457
 *    Front ends when parsing a DTD       *
11458
 *                  *
11459
 ************************************************************************/
11460
11461
/**
11462
 * Parse a DTD.
11463
 *
11464
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11465
 * to make external entities work.
11466
 *
11467
 * @since 2.14.0
11468
 *
11469
 * @param ctxt  a parser context
11470
 * @param input  a parser input
11471
 * @param publicId  public ID of the DTD (optional)
11472
 * @param systemId  system ID of the DTD (optional)
11473
 * @returns the resulting xmlDtd or NULL in case of error.
11474
 * `input` will be freed by the function in any case.
11475
 */
11476
xmlDtd *
11477
xmlCtxtParseDtd(xmlParserCtxt *ctxt, xmlParserInput *input,
11478
0
                const xmlChar *publicId, const xmlChar *systemId) {
11479
0
    xmlDtdPtr ret = NULL;
11480
11481
0
    if ((ctxt == NULL) || (input == NULL)) {
11482
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11483
0
        xmlFreeInputStream(input);
11484
0
        return(NULL);
11485
0
    }
11486
11487
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11488
0
        xmlFreeInputStream(input);
11489
0
        return(NULL);
11490
0
    }
11491
11492
0
    if (publicId == NULL)
11493
0
        publicId = BAD_CAST "none";
11494
0
    if (systemId == NULL)
11495
0
        systemId = BAD_CAST "none";
11496
11497
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11498
0
    if (ctxt->myDoc == NULL) {
11499
0
        xmlErrMemory(ctxt);
11500
0
        goto error;
11501
0
    }
11502
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11503
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11504
0
                                       publicId, systemId);
11505
0
    if (ctxt->myDoc->extSubset == NULL) {
11506
0
        xmlErrMemory(ctxt);
11507
0
        xmlFreeDoc(ctxt->myDoc);
11508
0
        goto error;
11509
0
    }
11510
11511
0
    xmlParseExternalSubset(ctxt, publicId, systemId);
11512
11513
0
    if (ctxt->wellFormed) {
11514
0
        ret = ctxt->myDoc->extSubset;
11515
0
        ctxt->myDoc->extSubset = NULL;
11516
0
        if (ret != NULL) {
11517
0
            xmlNodePtr tmp;
11518
11519
0
            ret->doc = NULL;
11520
0
            tmp = ret->children;
11521
0
            while (tmp != NULL) {
11522
0
                tmp->doc = NULL;
11523
0
                tmp = tmp->next;
11524
0
            }
11525
0
        }
11526
0
    } else {
11527
0
        ret = NULL;
11528
0
    }
11529
0
    xmlFreeDoc(ctxt->myDoc);
11530
0
    ctxt->myDoc = NULL;
11531
11532
0
error:
11533
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11534
11535
0
    return(ret);
11536
0
}
11537
11538
/**
11539
 * Load and parse a DTD
11540
 *
11541
 * @deprecated Use #xmlCtxtParseDtd.
11542
 *
11543
 * @param sax  the SAX handler block or NULL
11544
 * @param input  an Input Buffer
11545
 * @param enc  the charset encoding if known
11546
 * @returns the resulting xmlDtd or NULL in case of error.
11547
 * `input` will be freed by the function in any case.
11548
 */
11549
11550
xmlDtd *
11551
xmlIOParseDTD(xmlSAXHandler *sax, xmlParserInputBuffer *input,
11552
0
        xmlCharEncoding enc) {
11553
0
    xmlDtdPtr ret = NULL;
11554
0
    xmlParserCtxtPtr ctxt;
11555
0
    xmlParserInputPtr pinput = NULL;
11556
11557
0
    if (input == NULL)
11558
0
  return(NULL);
11559
11560
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11561
0
    if (ctxt == NULL) {
11562
0
        xmlFreeParserInputBuffer(input);
11563
0
  return(NULL);
11564
0
    }
11565
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11566
11567
    /*
11568
     * generate a parser input from the I/O handler
11569
     */
11570
11571
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11572
0
    if (pinput == NULL) {
11573
0
        xmlFreeParserInputBuffer(input);
11574
0
  xmlFreeParserCtxt(ctxt);
11575
0
  return(NULL);
11576
0
    }
11577
11578
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11579
0
        xmlSwitchEncoding(ctxt, enc);
11580
0
    }
11581
11582
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11583
11584
0
    xmlFreeParserCtxt(ctxt);
11585
0
    return(ret);
11586
0
}
11587
11588
/**
11589
 * Load and parse an external subset.
11590
 *
11591
 * @deprecated Use #xmlCtxtParseDtd.
11592
 *
11593
 * @param sax  the SAX handler block
11594
 * @param publicId  public identifier of the DTD (optional)
11595
 * @param systemId  system identifier (URL) of the DTD
11596
 * @returns the resulting xmlDtd or NULL in case of error.
11597
 */
11598
11599
xmlDtd *
11600
xmlSAXParseDTD(xmlSAXHandler *sax, const xmlChar *publicId,
11601
0
               const xmlChar *systemId) {
11602
0
    xmlDtdPtr ret = NULL;
11603
0
    xmlParserCtxtPtr ctxt;
11604
0
    xmlParserInputPtr input = NULL;
11605
0
    xmlChar* systemIdCanonic;
11606
11607
0
    if ((publicId == NULL) && (systemId == NULL)) return(NULL);
11608
11609
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11610
0
    if (ctxt == NULL) {
11611
0
  return(NULL);
11612
0
    }
11613
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11614
11615
    /*
11616
     * Canonicalise the system ID
11617
     */
11618
0
    systemIdCanonic = xmlCanonicPath(systemId);
11619
0
    if ((systemId != NULL) && (systemIdCanonic == NULL)) {
11620
0
  xmlFreeParserCtxt(ctxt);
11621
0
  return(NULL);
11622
0
    }
11623
11624
    /*
11625
     * Ask the Entity resolver to load the damn thing
11626
     */
11627
11628
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11629
0
  input = ctxt->sax->resolveEntity(ctxt->userData, publicId,
11630
0
                                   systemIdCanonic);
11631
0
    if (input == NULL) {
11632
0
  xmlFreeParserCtxt(ctxt);
11633
0
  if (systemIdCanonic != NULL)
11634
0
      xmlFree(systemIdCanonic);
11635
0
  return(NULL);
11636
0
    }
11637
11638
0
    if (input->filename == NULL)
11639
0
  input->filename = (char *) systemIdCanonic;
11640
0
    else
11641
0
  xmlFree(systemIdCanonic);
11642
11643
0
    ret = xmlCtxtParseDtd(ctxt, input, publicId, systemId);
11644
11645
0
    xmlFreeParserCtxt(ctxt);
11646
0
    return(ret);
11647
0
}
11648
11649
11650
/**
11651
 * Load and parse an external subset.
11652
 *
11653
 * @param publicId  public identifier of the DTD (optional)
11654
 * @param systemId  system identifier (URL) of the DTD
11655
 * @returns the resulting xmlDtd or NULL in case of error.
11656
 */
11657
11658
xmlDtd *
11659
0
xmlParseDTD(const xmlChar *publicId, const xmlChar *systemId) {
11660
0
    return(xmlSAXParseDTD(NULL, publicId, systemId));
11661
0
}
11662
#endif /* LIBXML_VALID_ENABLED */
11663
11664
/************************************************************************
11665
 *                  *
11666
 *    Front ends when parsing an Entity     *
11667
 *                  *
11668
 ************************************************************************/
11669
11670
static xmlNodePtr
11671
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11672
1.49k
                            int hasTextDecl, int buildTree) {
11673
1.49k
    xmlNodePtr root = NULL;
11674
1.49k
    xmlNodePtr list = NULL;
11675
1.49k
    xmlChar *rootName = BAD_CAST "#root";
11676
1.49k
    int result;
11677
11678
1.49k
    if (buildTree) {
11679
1.49k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11680
1.49k
        if (root == NULL) {
11681
0
            xmlErrMemory(ctxt);
11682
0
            goto error;
11683
0
        }
11684
1.49k
    }
11685
11686
1.49k
    if (xmlCtxtPushInput(ctxt, input) < 0)
11687
3
        goto error;
11688
11689
1.48k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11690
1.48k
    spacePush(ctxt, -1);
11691
11692
1.48k
    if (buildTree)
11693
1.48k
        nodePush(ctxt, root);
11694
11695
1.48k
    if (hasTextDecl) {
11696
0
        xmlDetectEncoding(ctxt);
11697
11698
        /*
11699
         * Parse a possible text declaration first
11700
         */
11701
0
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11702
0
            (IS_BLANK_CH(NXT(5)))) {
11703
0
            xmlParseTextDecl(ctxt);
11704
            /*
11705
             * An XML-1.0 document can't reference an entity not XML-1.0
11706
             */
11707
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11708
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11709
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11710
0
                               "Version mismatch between document and "
11711
0
                               "entity\n");
11712
0
            }
11713
0
        }
11714
0
    }
11715
11716
1.48k
    xmlParseContentInternal(ctxt);
11717
11718
1.48k
    if (ctxt->input->cur < ctxt->input->end)
11719
103
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11720
11721
1.48k
    if ((ctxt->wellFormed) ||
11722
1.48k
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
11723
1.03k
        if (root != NULL) {
11724
1.03k
            xmlNodePtr cur;
11725
11726
            /*
11727
             * Unlink newly created node list.
11728
             */
11729
1.03k
            list = root->children;
11730
1.03k
            root->children = NULL;
11731
1.03k
            root->last = NULL;
11732
3.48k
            for (cur = list; cur != NULL; cur = cur->next)
11733
2.45k
                cur->parent = NULL;
11734
1.03k
        }
11735
1.03k
    }
11736
11737
    /*
11738
     * Read the rest of the stream in case of errors. We want
11739
     * to account for the whole entity size.
11740
     */
11741
1.48k
    do {
11742
1.48k
        ctxt->input->cur = ctxt->input->end;
11743
1.48k
        xmlParserShrink(ctxt);
11744
1.48k
        result = xmlParserGrow(ctxt);
11745
1.48k
    } while (result > 0);
11746
11747
1.48k
    if (buildTree)
11748
1.48k
        nodePop(ctxt);
11749
11750
1.48k
    namePop(ctxt);
11751
1.48k
    spacePop(ctxt);
11752
11753
1.48k
    xmlCtxtPopInput(ctxt);
11754
11755
1.49k
error:
11756
1.49k
    xmlFreeNode(root);
11757
11758
1.49k
    return(list);
11759
1.48k
}
11760
11761
static void
11762
1.50k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11763
1.50k
    xmlParserInputPtr input;
11764
1.50k
    xmlNodePtr list;
11765
1.50k
    unsigned long consumed;
11766
1.50k
    int isExternal;
11767
1.50k
    int buildTree;
11768
1.50k
    int oldMinNsIndex;
11769
1.50k
    int oldNodelen, oldNodemem;
11770
11771
1.50k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11772
1.50k
    buildTree = (ctxt->node != NULL);
11773
11774
    /*
11775
     * Recursion check
11776
     */
11777
1.50k
    if (ent->flags & XML_ENT_EXPANDING) {
11778
13
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11779
13
        goto error;
11780
13
    }
11781
11782
    /*
11783
     * Load entity
11784
     */
11785
1.49k
    input = xmlNewEntityInputStream(ctxt, ent);
11786
1.49k
    if (input == NULL)
11787
0
        goto error;
11788
11789
    /*
11790
     * When building a tree, we need to limit the scope of namespace
11791
     * declarations, so that entities don't reference xmlNs structs
11792
     * from the parent of a reference.
11793
     */
11794
1.49k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
11795
1.49k
    if (buildTree)
11796
1.49k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
11797
11798
1.49k
    oldNodelen = ctxt->nodelen;
11799
1.49k
    oldNodemem = ctxt->nodemem;
11800
1.49k
    ctxt->nodelen = 0;
11801
1.49k
    ctxt->nodemem = 0;
11802
11803
    /*
11804
     * Parse content
11805
     *
11806
     * This initiates a recursive call chain:
11807
     *
11808
     * - xmlCtxtParseContentInternal
11809
     * - xmlParseContentInternal
11810
     * - xmlParseReference
11811
     * - xmlCtxtParseEntity
11812
     *
11813
     * The nesting depth is limited by the maximum number of inputs,
11814
     * see xmlCtxtPushInput.
11815
     *
11816
     * It's possible to make this non-recursive (minNsIndex must be
11817
     * stored in the input struct) at the expense of code readability.
11818
     */
11819
11820
1.49k
    ent->flags |= XML_ENT_EXPANDING;
11821
11822
1.49k
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
11823
11824
1.49k
    ent->flags &= ~XML_ENT_EXPANDING;
11825
11826
1.49k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
11827
1.49k
    ctxt->nodelen = oldNodelen;
11828
1.49k
    ctxt->nodemem = oldNodemem;
11829
11830
    /*
11831
     * Entity size accounting
11832
     */
11833
1.49k
    consumed = input->consumed;
11834
1.49k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
11835
11836
1.49k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
11837
816
        xmlSaturatedAdd(&ent->expandedSize, consumed);
11838
11839
1.49k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
11840
816
        if (isExternal)
11841
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
11842
11843
816
        ent->children = list;
11844
11845
3.27k
        while (list != NULL) {
11846
2.45k
            list->parent = (xmlNodePtr) ent;
11847
11848
            /*
11849
             * Downstream code like the nginx xslt module can set
11850
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
11851
             * might have a different or a NULL document.
11852
             */
11853
2.45k
            if (list->doc != ent->doc)
11854
0
                xmlSetTreeDoc(list, ent->doc);
11855
11856
2.45k
            if (list->next == NULL)
11857
329
                ent->last = list;
11858
2.45k
            list = list->next;
11859
2.45k
        }
11860
816
    } else {
11861
676
        xmlFreeNodeList(list);
11862
676
    }
11863
11864
1.49k
    xmlFreeInputStream(input);
11865
11866
1.50k
error:
11867
1.50k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
11868
1.50k
}
11869
11870
/**
11871
 * Parse an external general entity within an existing parsing context
11872
 * An external general parsed entity is well-formed if it matches the
11873
 * production labeled extParsedEnt.
11874
 *
11875
 *     [78] extParsedEnt ::= TextDecl? content
11876
 *
11877
 * @param ctxt  the existing parsing context
11878
 * @param URL  the URL for the entity to load
11879
 * @param ID  the System ID for the entity to load
11880
 * @param listOut  the return value for the set of parsed nodes
11881
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11882
 *    the parser error code otherwise
11883
 */
11884
11885
int
11886
xmlParseCtxtExternalEntity(xmlParserCtxt *ctxt, const xmlChar *URL,
11887
0
                           const xmlChar *ID, xmlNode **listOut) {
11888
0
    xmlParserInputPtr input;
11889
0
    xmlNodePtr list;
11890
11891
0
    if (listOut != NULL)
11892
0
        *listOut = NULL;
11893
11894
0
    if (ctxt == NULL)
11895
0
        return(XML_ERR_ARGUMENT);
11896
11897
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
11898
0
                            XML_RESOURCE_GENERAL_ENTITY);
11899
0
    if (input == NULL)
11900
0
        return(ctxt->errNo);
11901
11902
0
    xmlCtxtInitializeLate(ctxt);
11903
11904
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
11905
0
    if (listOut != NULL)
11906
0
        *listOut = list;
11907
0
    else
11908
0
        xmlFreeNodeList(list);
11909
11910
0
    xmlFreeInputStream(input);
11911
0
    return(ctxt->errNo);
11912
0
}
11913
11914
#ifdef LIBXML_SAX1_ENABLED
11915
/**
11916
 * Parse an external general entity
11917
 * An external general parsed entity is well-formed if it matches the
11918
 * production labeled extParsedEnt.
11919
 *
11920
 * This function uses deprecated global variables to set parser options
11921
 * which default to XML_PARSE_NODICT.
11922
 *
11923
 * @deprecated Use #xmlParseCtxtExternalEntity.
11924
 *
11925
 *     [78] extParsedEnt ::= TextDecl? content
11926
 *
11927
 * @param doc  the document the chunk pertains to
11928
 * @param sax  the SAX handler block (possibly NULL)
11929
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11930
 * @param depth  Used for loop detection, use 0
11931
 * @param URL  the URL for the entity to load
11932
 * @param ID  the System ID for the entity to load
11933
 * @param list  the return value for the set of parsed nodes
11934
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11935
 *    the parser error code otherwise
11936
 */
11937
11938
int
11939
xmlParseExternalEntity(xmlDoc *doc, xmlSAXHandler *sax, void *user_data,
11940
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNode **list) {
11941
0
    xmlParserCtxtPtr ctxt;
11942
0
    int ret;
11943
11944
0
    if (list != NULL)
11945
0
        *list = NULL;
11946
11947
0
    if (doc == NULL)
11948
0
        return(XML_ERR_ARGUMENT);
11949
11950
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11951
0
    if (ctxt == NULL)
11952
0
        return(XML_ERR_NO_MEMORY);
11953
11954
0
    ctxt->depth = depth;
11955
0
    ctxt->myDoc = doc;
11956
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
11957
11958
0
    xmlFreeParserCtxt(ctxt);
11959
0
    return(ret);
11960
0
}
11961
11962
/**
11963
 * Parse a well-balanced chunk of an XML document
11964
 * called by the parser
11965
 * The allowed sequence for the Well Balanced Chunk is the one defined by
11966
 * the content production in the XML grammar:
11967
 *
11968
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
11969
 *                       Comment)*
11970
 *
11971
 * This function uses deprecated global variables to set parser options
11972
 * which default to XML_PARSE_NODICT.
11973
 *
11974
 * @param doc  the document the chunk pertains to (must not be NULL)
11975
 * @param sax  the SAX handler block (possibly NULL)
11976
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11977
 * @param depth  Used for loop detection, use 0
11978
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
11979
 * @param lst  the return value for the set of parsed nodes
11980
 * @returns 0 if the chunk is well balanced, -1 in case of args problem and
11981
 *    the parser error code otherwise
11982
 */
11983
11984
int
11985
xmlParseBalancedChunkMemory(xmlDoc *doc, xmlSAXHandler *sax,
11986
0
     void *user_data, int depth, const xmlChar *string, xmlNode **lst) {
11987
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11988
0
                                                depth, string, lst, 0 );
11989
0
}
11990
#endif /* LIBXML_SAX1_ENABLED */
11991
11992
/**
11993
 * Parse a well-balanced chunk of XML matching the 'content' production.
11994
 *
11995
 * Namespaces in scope of `node` and entities of `node`'s document are
11996
 * recognized. When validating, the DTD of `node`'s document is used.
11997
 *
11998
 * Always consumes `input` even in error case.
11999
 *
12000
 * @since 2.14.0
12001
 *
12002
 * @param ctxt  parser context
12003
 * @param input  parser input
12004
 * @param node  target node or document
12005
 * @param hasTextDecl  whether to parse text declaration
12006
 * @returns a node list or NULL in case of error.
12007
 */
12008
xmlNode *
12009
xmlCtxtParseContent(xmlParserCtxt *ctxt, xmlParserInput *input,
12010
0
                    xmlNode *node, int hasTextDecl) {
12011
0
    xmlDocPtr doc;
12012
0
    xmlNodePtr cur, list = NULL;
12013
0
    int nsnr = 0;
12014
0
    xmlDictPtr oldDict;
12015
0
    int oldOptions, oldDictNames, oldLoadSubset;
12016
12017
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12018
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12019
0
        goto exit;
12020
0
    }
12021
12022
0
    doc = node->doc;
12023
0
    if (doc == NULL) {
12024
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12025
0
        goto exit;
12026
0
    }
12027
12028
0
    switch (node->type) {
12029
0
        case XML_ELEMENT_NODE:
12030
0
        case XML_DOCUMENT_NODE:
12031
0
        case XML_HTML_DOCUMENT_NODE:
12032
0
            break;
12033
12034
0
        case XML_ATTRIBUTE_NODE:
12035
0
        case XML_TEXT_NODE:
12036
0
        case XML_CDATA_SECTION_NODE:
12037
0
        case XML_ENTITY_REF_NODE:
12038
0
        case XML_PI_NODE:
12039
0
        case XML_COMMENT_NODE:
12040
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12041
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12042
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12043
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12044
0
                    node = cur;
12045
0
                    break;
12046
0
                }
12047
0
            }
12048
0
            break;
12049
12050
0
        default:
12051
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12052
0
            goto exit;
12053
0
    }
12054
12055
0
    xmlCtxtReset(ctxt);
12056
12057
0
    oldDict = ctxt->dict;
12058
0
    oldOptions = ctxt->options;
12059
0
    oldDictNames = ctxt->dictNames;
12060
0
    oldLoadSubset = ctxt->loadsubset;
12061
12062
    /*
12063
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12064
     */
12065
0
    if (doc->dict != NULL) {
12066
0
        ctxt->dict = doc->dict;
12067
0
    } else {
12068
0
        ctxt->options |= XML_PARSE_NODICT;
12069
0
        ctxt->dictNames = 0;
12070
0
    }
12071
12072
    /*
12073
     * Disable IDs
12074
     */
12075
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12076
0
    ctxt->options |= XML_PARSE_SKIP_IDS;
12077
12078
0
    ctxt->myDoc = doc;
12079
12080
0
#ifdef LIBXML_HTML_ENABLED
12081
0
    if (ctxt->html) {
12082
        /*
12083
         * When parsing in context, it makes no sense to add implied
12084
         * elements like html/body/etc...
12085
         */
12086
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12087
12088
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12089
0
    } else
12090
0
#endif
12091
0
    {
12092
0
        xmlCtxtInitializeLate(ctxt);
12093
12094
        /*
12095
         * initialize the SAX2 namespaces stack
12096
         */
12097
0
        cur = node;
12098
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12099
0
            xmlNsPtr ns = cur->nsDef;
12100
0
            xmlHashedString hprefix, huri;
12101
12102
0
            while (ns != NULL) {
12103
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12104
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12105
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12106
0
                    nsnr++;
12107
0
                ns = ns->next;
12108
0
            }
12109
0
            cur = cur->parent;
12110
0
        }
12111
12112
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12113
12114
0
        if (nsnr > 0)
12115
0
            xmlParserNsPop(ctxt, nsnr);
12116
0
    }
12117
12118
0
    ctxt->dict = oldDict;
12119
0
    ctxt->options = oldOptions;
12120
0
    ctxt->dictNames = oldDictNames;
12121
0
    ctxt->loadsubset = oldLoadSubset;
12122
0
    ctxt->myDoc = NULL;
12123
0
    ctxt->node = NULL;
12124
12125
0
exit:
12126
0
    xmlFreeInputStream(input);
12127
0
    return(list);
12128
0
}
12129
12130
/**
12131
 * Parse a well-balanced chunk of an XML document
12132
 * within the context (DTD, namespaces, etc ...) of the given node.
12133
 *
12134
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12135
 * the content production in the XML grammar:
12136
 *
12137
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12138
 *                       Comment)*
12139
 *
12140
 * This function assumes the encoding of `node`'s document which is
12141
 * typically not what you want. A better alternative is
12142
 * #xmlCtxtParseContent.
12143
 *
12144
 * @param node  the context node
12145
 * @param data  the input string
12146
 * @param datalen  the input string length in bytes
12147
 * @param options  a combination of xmlParserOption
12148
 * @param listOut  the return value for the set of parsed nodes
12149
 * @returns XML_ERR_OK if the chunk is well balanced, and the parser
12150
 * error code otherwise
12151
 */
12152
xmlParserErrors
12153
xmlParseInNodeContext(xmlNode *node, const char *data, int datalen,
12154
0
                      int options, xmlNode **listOut) {
12155
0
    xmlParserCtxtPtr ctxt;
12156
0
    xmlParserInputPtr input;
12157
0
    xmlDocPtr doc;
12158
0
    xmlNodePtr list;
12159
0
    xmlParserErrors ret;
12160
12161
0
    if (listOut == NULL)
12162
0
        return(XML_ERR_INTERNAL_ERROR);
12163
0
    *listOut = NULL;
12164
12165
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12166
0
        return(XML_ERR_INTERNAL_ERROR);
12167
12168
0
    doc = node->doc;
12169
0
    if (doc == NULL)
12170
0
        return(XML_ERR_INTERNAL_ERROR);
12171
12172
0
#ifdef LIBXML_HTML_ENABLED
12173
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12174
0
        ctxt = htmlNewParserCtxt();
12175
0
    }
12176
0
    else
12177
0
#endif
12178
0
        ctxt = xmlNewParserCtxt();
12179
12180
0
    if (ctxt == NULL)
12181
0
        return(XML_ERR_NO_MEMORY);
12182
12183
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12184
0
                                      (const char *) doc->encoding,
12185
0
                                      XML_INPUT_BUF_STATIC);
12186
0
    if (input == NULL) {
12187
0
        xmlFreeParserCtxt(ctxt);
12188
0
        return(XML_ERR_NO_MEMORY);
12189
0
    }
12190
12191
0
    xmlCtxtUseOptions(ctxt, options);
12192
12193
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12194
12195
0
    if (list == NULL) {
12196
0
        ret = ctxt->errNo;
12197
0
        if (ret == XML_ERR_ARGUMENT)
12198
0
            ret = XML_ERR_INTERNAL_ERROR;
12199
0
    } else {
12200
0
        ret = XML_ERR_OK;
12201
0
        *listOut = list;
12202
0
    }
12203
12204
0
    xmlFreeParserCtxt(ctxt);
12205
12206
0
    return(ret);
12207
0
}
12208
12209
#ifdef LIBXML_SAX1_ENABLED
12210
/**
12211
 * Parse a well-balanced chunk of an XML document
12212
 *
12213
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12214
 * the content production in the XML grammar:
12215
 *
12216
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12217
 *                       Comment)*
12218
 *
12219
 * In case recover is set to 1, the nodelist will not be empty even if
12220
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12221
 * some extent.
12222
 *
12223
 * This function uses deprecated global variables to set parser options
12224
 * which default to XML_PARSE_NODICT.
12225
 *
12226
 * @param doc  the document the chunk pertains to (must not be NULL)
12227
 * @param sax  the SAX handler block (possibly NULL)
12228
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
12229
 * @param depth  Used for loop detection, use 0
12230
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
12231
 * @param listOut  the return value for the set of parsed nodes
12232
 * @param recover  return nodes even if the data is broken (use 0)
12233
 * @returns 0 if the chunk is well balanced, or thehe parser error code
12234
 * otherwise.
12235
 */
12236
int
12237
xmlParseBalancedChunkMemoryRecover(xmlDoc *doc, xmlSAXHandler *sax,
12238
     void *user_data, int depth, const xmlChar *string, xmlNode **listOut,
12239
0
     int recover) {
12240
0
    xmlParserCtxtPtr ctxt;
12241
0
    xmlParserInputPtr input;
12242
0
    xmlNodePtr list;
12243
0
    int ret;
12244
12245
0
    if (listOut != NULL)
12246
0
        *listOut = NULL;
12247
12248
0
    if (string == NULL)
12249
0
        return(XML_ERR_ARGUMENT);
12250
12251
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12252
0
    if (ctxt == NULL)
12253
0
        return(XML_ERR_NO_MEMORY);
12254
12255
0
    xmlCtxtInitializeLate(ctxt);
12256
12257
0
    ctxt->depth = depth;
12258
0
    ctxt->myDoc = doc;
12259
0
    if (recover) {
12260
0
        ctxt->options |= XML_PARSE_RECOVER;
12261
0
        ctxt->recovery = 1;
12262
0
    }
12263
12264
0
    input = xmlNewStringInputStream(ctxt, string);
12265
0
    if (input == NULL) {
12266
0
        ret = ctxt->errNo;
12267
0
        goto error;
12268
0
    }
12269
12270
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12271
0
    if (listOut != NULL)
12272
0
        *listOut = list;
12273
0
    else
12274
0
        xmlFreeNodeList(list);
12275
12276
0
    if (!ctxt->wellFormed)
12277
0
        ret = ctxt->errNo;
12278
0
    else
12279
0
        ret = XML_ERR_OK;
12280
12281
0
error:
12282
0
    xmlFreeInputStream(input);
12283
0
    xmlFreeParserCtxt(ctxt);
12284
0
    return(ret);
12285
0
}
12286
12287
/**
12288
 * Parse an XML external entity out of context and build a tree.
12289
 * It use the given SAX function block to handle the parsing callback.
12290
 * If sax is NULL, fallback to the default DOM tree building routines.
12291
 *
12292
 * @deprecated Don't use.
12293
 *
12294
 *     [78] extParsedEnt ::= TextDecl? content
12295
 *
12296
 * This correspond to a "Well Balanced" chunk
12297
 *
12298
 * This function uses deprecated global variables to set parser options
12299
 * which default to XML_PARSE_NODICT.
12300
 *
12301
 * @param sax  the SAX handler block
12302
 * @param filename  the filename
12303
 * @returns the resulting document tree
12304
 */
12305
12306
xmlDoc *
12307
0
xmlSAXParseEntity(xmlSAXHandler *sax, const char *filename) {
12308
0
    xmlDocPtr ret;
12309
0
    xmlParserCtxtPtr ctxt;
12310
12311
0
    ctxt = xmlCreateFileParserCtxt(filename);
12312
0
    if (ctxt == NULL) {
12313
0
  return(NULL);
12314
0
    }
12315
0
    if (sax != NULL) {
12316
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12317
0
            *ctxt->sax = *sax;
12318
0
        } else {
12319
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12320
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12321
0
        }
12322
0
        ctxt->userData = NULL;
12323
0
    }
12324
12325
0
    xmlParseExtParsedEnt(ctxt);
12326
12327
0
    if (ctxt->wellFormed) {
12328
0
  ret = ctxt->myDoc;
12329
0
    } else {
12330
0
        ret = NULL;
12331
0
        xmlFreeDoc(ctxt->myDoc);
12332
0
    }
12333
12334
0
    xmlFreeParserCtxt(ctxt);
12335
12336
0
    return(ret);
12337
0
}
12338
12339
/**
12340
 * Parse an XML external entity out of context and build a tree.
12341
 *
12342
 *     [78] extParsedEnt ::= TextDecl? content
12343
 *
12344
 * This correspond to a "Well Balanced" chunk
12345
 *
12346
 * This function uses deprecated global variables to set parser options
12347
 * which default to XML_PARSE_NODICT.
12348
 *
12349
 * @deprecated Don't use.
12350
 *
12351
 * @param filename  the filename
12352
 * @returns the resulting document tree
12353
 */
12354
12355
xmlDoc *
12356
0
xmlParseEntity(const char *filename) {
12357
0
    return(xmlSAXParseEntity(NULL, filename));
12358
0
}
12359
#endif /* LIBXML_SAX1_ENABLED */
12360
12361
/**
12362
 * Create a parser context for an external entity
12363
 * Automatic support for ZLIB/Compress compressed document is provided
12364
 * by default if found at compile-time.
12365
 *
12366
 * @deprecated Don't use.
12367
 *
12368
 * @param URL  the entity URL
12369
 * @param ID  the entity PUBLIC ID
12370
 * @param base  a possible base for the target URI
12371
 * @returns the new parser context or NULL
12372
 */
12373
xmlParserCtxt *
12374
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12375
0
                    const xmlChar *base) {
12376
0
    xmlParserCtxtPtr ctxt;
12377
0
    xmlParserInputPtr input;
12378
0
    xmlChar *uri = NULL;
12379
12380
0
    ctxt = xmlNewParserCtxt();
12381
0
    if (ctxt == NULL)
12382
0
  return(NULL);
12383
12384
0
    if (base != NULL) {
12385
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12386
0
            goto error;
12387
0
        if (uri != NULL)
12388
0
            URL = uri;
12389
0
    }
12390
12391
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12392
0
                            XML_RESOURCE_UNKNOWN);
12393
0
    if (input == NULL)
12394
0
        goto error;
12395
12396
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12397
0
        xmlFreeInputStream(input);
12398
0
        goto error;
12399
0
    }
12400
12401
0
    xmlFree(uri);
12402
0
    return(ctxt);
12403
12404
0
error:
12405
0
    xmlFree(uri);
12406
0
    xmlFreeParserCtxt(ctxt);
12407
0
    return(NULL);
12408
0
}
12409
12410
/************************************************************************
12411
 *                  *
12412
 *    Front ends when parsing from a file     *
12413
 *                  *
12414
 ************************************************************************/
12415
12416
/**
12417
 * Create a parser context for a file or URL content.
12418
 * Automatic support for ZLIB/Compress compressed document is provided
12419
 * by default if found at compile-time and for file accesses
12420
 *
12421
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12422
 *
12423
 * @param filename  the filename or URL
12424
 * @param options  a combination of xmlParserOption
12425
 * @returns the new parser context or NULL
12426
 */
12427
xmlParserCtxt *
12428
xmlCreateURLParserCtxt(const char *filename, int options)
12429
0
{
12430
0
    xmlParserCtxtPtr ctxt;
12431
0
    xmlParserInputPtr input;
12432
12433
0
    ctxt = xmlNewParserCtxt();
12434
0
    if (ctxt == NULL)
12435
0
  return(NULL);
12436
12437
0
    options |= XML_PARSE_UNZIP;
12438
12439
0
    xmlCtxtUseOptions(ctxt, options);
12440
12441
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12442
0
    if (input == NULL) {
12443
0
  xmlFreeParserCtxt(ctxt);
12444
0
  return(NULL);
12445
0
    }
12446
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12447
0
        xmlFreeInputStream(input);
12448
0
        xmlFreeParserCtxt(ctxt);
12449
0
        return(NULL);
12450
0
    }
12451
12452
0
    return(ctxt);
12453
0
}
12454
12455
/**
12456
 * Create a parser context for a file content.
12457
 * Automatic support for ZLIB/Compress compressed document is provided
12458
 * by default if found at compile-time.
12459
 *
12460
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12461
 *
12462
 * @param filename  the filename
12463
 * @returns the new parser context or NULL
12464
 */
12465
xmlParserCtxt *
12466
xmlCreateFileParserCtxt(const char *filename)
12467
0
{
12468
0
    return(xmlCreateURLParserCtxt(filename, 0));
12469
0
}
12470
12471
#ifdef LIBXML_SAX1_ENABLED
12472
/**
12473
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12474
 * compressed document is provided by default if found at compile-time.
12475
 * It use the given SAX function block to handle the parsing callback.
12476
 * If sax is NULL, fallback to the default DOM tree building routines.
12477
 *
12478
 * This function uses deprecated global variables to set parser options
12479
 * which default to XML_PARSE_NODICT.
12480
 *
12481
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12482
 *
12483
 * User data (void *) is stored within the parser context in the
12484
 * context's _private member, so it is available nearly everywhere in libxml
12485
 *
12486
 * @param sax  the SAX handler block
12487
 * @param filename  the filename
12488
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12489
 *             documents
12490
 * @param data  the userdata
12491
 * @returns the resulting document tree
12492
 */
12493
12494
xmlDoc *
12495
xmlSAXParseFileWithData(xmlSAXHandler *sax, const char *filename,
12496
0
                        int recovery, void *data) {
12497
0
    xmlDocPtr ret = NULL;
12498
0
    xmlParserCtxtPtr ctxt;
12499
0
    xmlParserInputPtr input;
12500
12501
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12502
0
    if (ctxt == NULL)
12503
0
  return(NULL);
12504
12505
0
    if (data != NULL)
12506
0
  ctxt->_private = data;
12507
12508
0
    if (recovery) {
12509
0
        ctxt->options |= XML_PARSE_RECOVER;
12510
0
        ctxt->recovery = 1;
12511
0
    }
12512
12513
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12514
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12515
0
    else
12516
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12517
12518
0
    if (input != NULL)
12519
0
        ret = xmlCtxtParseDocument(ctxt, input);
12520
12521
0
    xmlFreeParserCtxt(ctxt);
12522
0
    return(ret);
12523
0
}
12524
12525
/**
12526
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12527
 * compressed document is provided by default if found at compile-time.
12528
 * It use the given SAX function block to handle the parsing callback.
12529
 * If sax is NULL, fallback to the default DOM tree building routines.
12530
 *
12531
 * This function uses deprecated global variables to set parser options
12532
 * which default to XML_PARSE_NODICT.
12533
 *
12534
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12535
 *
12536
 * @param sax  the SAX handler block
12537
 * @param filename  the filename
12538
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12539
 *             documents
12540
 * @returns the resulting document tree
12541
 */
12542
12543
xmlDoc *
12544
xmlSAXParseFile(xmlSAXHandler *sax, const char *filename,
12545
0
                          int recovery) {
12546
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12547
0
}
12548
12549
/**
12550
 * Parse an XML in-memory document and build a tree.
12551
 * In the case the document is not Well Formed, a attempt to build a
12552
 * tree is tried anyway
12553
 *
12554
 * This function uses deprecated global variables to set parser options
12555
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12556
 *
12557
 * @deprecated Use #xmlReadDoc with XML_PARSE_RECOVER.
12558
 *
12559
 * @param cur  a pointer to an array of xmlChar
12560
 * @returns the resulting document tree or NULL in case of failure
12561
 */
12562
12563
xmlDoc *
12564
0
xmlRecoverDoc(const xmlChar *cur) {
12565
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12566
0
}
12567
12568
/**
12569
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12570
 * compressed document is provided by default if found at compile-time.
12571
 *
12572
 * This function uses deprecated global variables to set parser options
12573
 * which default to XML_PARSE_NODICT.
12574
 *
12575
 * @deprecated Use #xmlReadFile.
12576
 *
12577
 * @param filename  the filename
12578
 * @returns the resulting document tree if the file was wellformed,
12579
 * NULL otherwise.
12580
 */
12581
12582
xmlDoc *
12583
0
xmlParseFile(const char *filename) {
12584
0
    return(xmlSAXParseFile(NULL, filename, 0));
12585
0
}
12586
12587
/**
12588
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12589
 * compressed document is provided by default if found at compile-time.
12590
 * In the case the document is not Well Formed, it attempts to build
12591
 * a tree anyway
12592
 *
12593
 * This function uses deprecated global variables to set parser options
12594
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12595
 *
12596
 * @deprecated Use #xmlReadFile with XML_PARSE_RECOVER.
12597
 *
12598
 * @param filename  the filename
12599
 * @returns the resulting document tree or NULL in case of failure
12600
 */
12601
12602
xmlDoc *
12603
0
xmlRecoverFile(const char *filename) {
12604
0
    return(xmlSAXParseFile(NULL, filename, 1));
12605
0
}
12606
12607
12608
/**
12609
 * Setup the parser context to parse a new buffer; Clears any prior
12610
 * contents from the parser context. The buffer parameter must not be
12611
 * NULL, but the filename parameter can be
12612
 *
12613
 * @deprecated Don't use.
12614
 *
12615
 * @param ctxt  an XML parser context
12616
 * @param buffer  a xmlChar * buffer
12617
 * @param filename  a file name
12618
 */
12619
void
12620
xmlSetupParserForBuffer(xmlParserCtxt *ctxt, const xmlChar* buffer,
12621
                             const char* filename)
12622
0
{
12623
0
    xmlParserInputPtr input;
12624
12625
0
    if ((ctxt == NULL) || (buffer == NULL))
12626
0
        return;
12627
12628
0
    xmlCtxtReset(ctxt);
12629
12630
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12631
0
                                      NULL, 0);
12632
0
    if (input == NULL)
12633
0
        return;
12634
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12635
0
        xmlFreeInputStream(input);
12636
0
}
12637
12638
/**
12639
 * Parse an XML file and call the given SAX handler routines.
12640
 * Automatic support for ZLIB/Compress compressed document is provided
12641
 *
12642
 * This function uses deprecated global variables to set parser options
12643
 * which default to XML_PARSE_NODICT.
12644
 *
12645
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12646
 *
12647
 * @param sax  a SAX handler
12648
 * @param user_data  The user data returned on SAX callbacks
12649
 * @param filename  a file name
12650
 * @returns 0 in case of success or a error number otherwise
12651
 */
12652
int
12653
xmlSAXUserParseFile(xmlSAXHandler *sax, void *user_data,
12654
0
                    const char *filename) {
12655
0
    int ret = 0;
12656
0
    xmlParserCtxtPtr ctxt;
12657
12658
0
    ctxt = xmlCreateFileParserCtxt(filename);
12659
0
    if (ctxt == NULL) return -1;
12660
0
    if (sax != NULL) {
12661
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12662
0
            *ctxt->sax = *sax;
12663
0
        } else {
12664
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12665
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12666
0
        }
12667
0
  ctxt->userData = user_data;
12668
0
    }
12669
12670
0
    xmlParseDocument(ctxt);
12671
12672
0
    if (ctxt->wellFormed)
12673
0
  ret = 0;
12674
0
    else {
12675
0
        if (ctxt->errNo != 0)
12676
0
      ret = ctxt->errNo;
12677
0
  else
12678
0
      ret = -1;
12679
0
    }
12680
0
    if (ctxt->myDoc != NULL) {
12681
0
        xmlFreeDoc(ctxt->myDoc);
12682
0
  ctxt->myDoc = NULL;
12683
0
    }
12684
0
    xmlFreeParserCtxt(ctxt);
12685
12686
0
    return ret;
12687
0
}
12688
#endif /* LIBXML_SAX1_ENABLED */
12689
12690
/************************************************************************
12691
 *                  *
12692
 *    Front ends when parsing from memory     *
12693
 *                  *
12694
 ************************************************************************/
12695
12696
/**
12697
 * Create a parser context for an XML in-memory document. The input buffer
12698
 * must not contain a terminating null byte.
12699
 *
12700
 * @param buffer  a pointer to a char array
12701
 * @param size  the size of the array
12702
 * @returns the new parser context or NULL
12703
 */
12704
xmlParserCtxt *
12705
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12706
0
    xmlParserCtxtPtr ctxt;
12707
0
    xmlParserInputPtr input;
12708
12709
0
    if (size < 0)
12710
0
  return(NULL);
12711
12712
0
    ctxt = xmlNewParserCtxt();
12713
0
    if (ctxt == NULL)
12714
0
  return(NULL);
12715
12716
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12717
0
    if (input == NULL) {
12718
0
  xmlFreeParserCtxt(ctxt);
12719
0
  return(NULL);
12720
0
    }
12721
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12722
0
        xmlFreeInputStream(input);
12723
0
        xmlFreeParserCtxt(ctxt);
12724
0
        return(NULL);
12725
0
    }
12726
12727
0
    return(ctxt);
12728
0
}
12729
12730
#ifdef LIBXML_SAX1_ENABLED
12731
/**
12732
 * Parse an XML in-memory block and use the given SAX function block
12733
 * to handle the parsing callback. If sax is NULL, fallback to the default
12734
 * DOM tree building routines.
12735
 *
12736
 * This function uses deprecated global variables to set parser options
12737
 * which default to XML_PARSE_NODICT.
12738
 *
12739
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12740
 *
12741
 * User data (void *) is stored within the parser context in the
12742
 * context's _private member, so it is available nearly everywhere in libxml
12743
 *
12744
 * @param sax  the SAX handler block
12745
 * @param buffer  an pointer to a char array
12746
 * @param size  the size of the array
12747
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12748
 *             documents
12749
 * @param data  the userdata
12750
 * @returns the resulting document tree
12751
 */
12752
12753
xmlDoc *
12754
xmlSAXParseMemoryWithData(xmlSAXHandler *sax, const char *buffer,
12755
0
                          int size, int recovery, void *data) {
12756
0
    xmlDocPtr ret = NULL;
12757
0
    xmlParserCtxtPtr ctxt;
12758
0
    xmlParserInputPtr input;
12759
12760
0
    if (size < 0)
12761
0
        return(NULL);
12762
12763
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12764
0
    if (ctxt == NULL)
12765
0
        return(NULL);
12766
12767
0
    if (data != NULL)
12768
0
  ctxt->_private=data;
12769
12770
0
    if (recovery) {
12771
0
        ctxt->options |= XML_PARSE_RECOVER;
12772
0
        ctxt->recovery = 1;
12773
0
    }
12774
12775
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12776
0
                                      XML_INPUT_BUF_STATIC);
12777
12778
0
    if (input != NULL)
12779
0
        ret = xmlCtxtParseDocument(ctxt, input);
12780
12781
0
    xmlFreeParserCtxt(ctxt);
12782
0
    return(ret);
12783
0
}
12784
12785
/**
12786
 * Parse an XML in-memory block and use the given SAX function block
12787
 * to handle the parsing callback. If sax is NULL, fallback to the default
12788
 * DOM tree building routines.
12789
 *
12790
 * This function uses deprecated global variables to set parser options
12791
 * which default to XML_PARSE_NODICT.
12792
 *
12793
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12794
 *
12795
 * @param sax  the SAX handler block
12796
 * @param buffer  an pointer to a char array
12797
 * @param size  the size of the array
12798
 * @param recovery  work in recovery mode, i.e. tries to read not Well Formed
12799
 *             documents
12800
 * @returns the resulting document tree
12801
 */
12802
xmlDoc *
12803
xmlSAXParseMemory(xmlSAXHandler *sax, const char *buffer,
12804
0
            int size, int recovery) {
12805
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12806
0
}
12807
12808
/**
12809
 * Parse an XML in-memory block and build a tree.
12810
 *
12811
 * This function uses deprecated global variables to set parser options
12812
 * which default to XML_PARSE_NODICT.
12813
 *
12814
 * @deprecated Use #xmlReadMemory.
12815
 *
12816
 * @param buffer  an pointer to a char array
12817
 * @param size  the size of the array
12818
 * @returns the resulting document tree
12819
 */
12820
12821
0
xmlDoc *xmlParseMemory(const char *buffer, int size) {
12822
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
12823
0
}
12824
12825
/**
12826
 * Parse an XML in-memory block and build a tree.
12827
 * In the case the document is not Well Formed, an attempt to
12828
 * build a tree is tried anyway
12829
 *
12830
 * This function uses deprecated global variables to set parser options
12831
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12832
 *
12833
 * @deprecated Use #xmlReadMemory with XML_PARSE_RECOVER.
12834
 *
12835
 * @param buffer  an pointer to a char array
12836
 * @param size  the size of the array
12837
 * @returns the resulting document tree or NULL in case of error
12838
 */
12839
12840
0
xmlDoc *xmlRecoverMemory(const char *buffer, int size) {
12841
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
12842
0
}
12843
12844
/**
12845
 * Parse an XML in-memory buffer and call the given SAX handler routines.
12846
 *
12847
 * This function uses deprecated global variables to set parser options
12848
 * which default to XML_PARSE_NODICT.
12849
 *
12850
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12851
 *
12852
 * @param sax  a SAX handler
12853
 * @param user_data  The user data returned on SAX callbacks
12854
 * @param buffer  an in-memory XML document input
12855
 * @param size  the length of the XML document in bytes
12856
 * @returns 0 in case of success or a error number otherwise
12857
 */
12858
int xmlSAXUserParseMemory(xmlSAXHandler *sax, void *user_data,
12859
0
        const char *buffer, int size) {
12860
0
    int ret = 0;
12861
0
    xmlParserCtxtPtr ctxt;
12862
12863
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12864
0
    if (ctxt == NULL) return -1;
12865
0
    if (sax != NULL) {
12866
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12867
0
            *ctxt->sax = *sax;
12868
0
        } else {
12869
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12870
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12871
0
        }
12872
0
  ctxt->userData = user_data;
12873
0
    }
12874
12875
0
    xmlParseDocument(ctxt);
12876
12877
0
    if (ctxt->wellFormed)
12878
0
  ret = 0;
12879
0
    else {
12880
0
        if (ctxt->errNo != 0)
12881
0
      ret = ctxt->errNo;
12882
0
  else
12883
0
      ret = -1;
12884
0
    }
12885
0
    if (ctxt->myDoc != NULL) {
12886
0
        xmlFreeDoc(ctxt->myDoc);
12887
0
  ctxt->myDoc = NULL;
12888
0
    }
12889
0
    xmlFreeParserCtxt(ctxt);
12890
12891
0
    return ret;
12892
0
}
12893
#endif /* LIBXML_SAX1_ENABLED */
12894
12895
/**
12896
 * Creates a parser context for an XML in-memory document.
12897
 *
12898
 * @param str  a pointer to an array of xmlChar
12899
 * @returns the new parser context or NULL
12900
 */
12901
xmlParserCtxt *
12902
0
xmlCreateDocParserCtxt(const xmlChar *str) {
12903
0
    xmlParserCtxtPtr ctxt;
12904
0
    xmlParserInputPtr input;
12905
12906
0
    ctxt = xmlNewParserCtxt();
12907
0
    if (ctxt == NULL)
12908
0
  return(NULL);
12909
12910
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
12911
0
    if (input == NULL) {
12912
0
  xmlFreeParserCtxt(ctxt);
12913
0
  return(NULL);
12914
0
    }
12915
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12916
0
        xmlFreeInputStream(input);
12917
0
        xmlFreeParserCtxt(ctxt);
12918
0
        return(NULL);
12919
0
    }
12920
12921
0
    return(ctxt);
12922
0
}
12923
12924
#ifdef LIBXML_SAX1_ENABLED
12925
/**
12926
 * Parse an XML in-memory document and build a tree.
12927
 * It use the given SAX function block to handle the parsing callback.
12928
 * If sax is NULL, fallback to the default DOM tree building routines.
12929
 *
12930
 * This function uses deprecated global variables to set parser options
12931
 * which default to XML_PARSE_NODICT.
12932
 *
12933
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadDoc.
12934
 *
12935
 * @param sax  the SAX handler block
12936
 * @param cur  a pointer to an array of xmlChar
12937
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12938
 *             documents
12939
 * @returns the resulting document tree
12940
 */
12941
12942
xmlDoc *
12943
0
xmlSAXParseDoc(xmlSAXHandler *sax, const xmlChar *cur, int recovery) {
12944
0
    xmlDocPtr ret;
12945
0
    xmlParserCtxtPtr ctxt;
12946
0
    xmlSAXHandlerPtr oldsax = NULL;
12947
12948
0
    if (cur == NULL) return(NULL);
12949
12950
12951
0
    ctxt = xmlCreateDocParserCtxt(cur);
12952
0
    if (ctxt == NULL) return(NULL);
12953
0
    if (sax != NULL) {
12954
0
        oldsax = ctxt->sax;
12955
0
        ctxt->sax = sax;
12956
0
        ctxt->userData = NULL;
12957
0
    }
12958
12959
0
    xmlParseDocument(ctxt);
12960
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12961
0
    else {
12962
0
       ret = NULL;
12963
0
       xmlFreeDoc(ctxt->myDoc);
12964
0
       ctxt->myDoc = NULL;
12965
0
    }
12966
0
    if (sax != NULL)
12967
0
  ctxt->sax = oldsax;
12968
0
    xmlFreeParserCtxt(ctxt);
12969
12970
0
    return(ret);
12971
0
}
12972
12973
/**
12974
 * Parse an XML in-memory document and build a tree.
12975
 *
12976
 * This function uses deprecated global variables to set parser options
12977
 * which default to XML_PARSE_NODICT.
12978
 *
12979
 * @deprecated Use #xmlReadDoc.
12980
 *
12981
 * @param cur  a pointer to an array of xmlChar
12982
 * @returns the resulting document tree
12983
 */
12984
12985
xmlDoc *
12986
0
xmlParseDoc(const xmlChar *cur) {
12987
0
    return(xmlSAXParseDoc(NULL, cur, 0));
12988
0
}
12989
#endif /* LIBXML_SAX1_ENABLED */
12990
12991
/************************************************************************
12992
 *                  *
12993
 *  New set (2.6.0) of simpler and more flexible APIs   *
12994
 *                  *
12995
 ************************************************************************/
12996
12997
/**
12998
 * Reset a parser context
12999
 *
13000
 * @param ctxt  an XML parser context
13001
 */
13002
void
13003
xmlCtxtReset(xmlParserCtxt *ctxt)
13004
0
{
13005
0
    xmlParserInputPtr input;
13006
13007
0
    if (ctxt == NULL)
13008
0
        return;
13009
13010
0
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
13011
0
        xmlFreeInputStream(input);
13012
0
    }
13013
0
    ctxt->inputNr = 0;
13014
0
    ctxt->input = NULL;
13015
13016
0
    ctxt->spaceNr = 0;
13017
0
    if (ctxt->spaceTab != NULL) {
13018
0
  ctxt->spaceTab[0] = -1;
13019
0
  ctxt->space = &ctxt->spaceTab[0];
13020
0
    } else {
13021
0
        ctxt->space = NULL;
13022
0
    }
13023
13024
13025
0
    ctxt->nodeNr = 0;
13026
0
    ctxt->node = NULL;
13027
13028
0
    ctxt->nameNr = 0;
13029
0
    ctxt->name = NULL;
13030
13031
0
    ctxt->nsNr = 0;
13032
0
    xmlParserNsReset(ctxt->nsdb);
13033
13034
0
    if (ctxt->version != NULL) {
13035
0
        xmlFree(ctxt->version);
13036
0
        ctxt->version = NULL;
13037
0
    }
13038
0
    if (ctxt->encoding != NULL) {
13039
0
        xmlFree(ctxt->encoding);
13040
0
        ctxt->encoding = NULL;
13041
0
    }
13042
0
    if (ctxt->extSubURI != NULL) {
13043
0
        xmlFree(ctxt->extSubURI);
13044
0
        ctxt->extSubURI = NULL;
13045
0
    }
13046
0
    if (ctxt->extSubSystem != NULL) {
13047
0
        xmlFree(ctxt->extSubSystem);
13048
0
        ctxt->extSubSystem = NULL;
13049
0
    }
13050
0
    if (ctxt->directory != NULL) {
13051
0
        xmlFree(ctxt->directory);
13052
0
        ctxt->directory = NULL;
13053
0
    }
13054
13055
0
    if (ctxt->myDoc != NULL)
13056
0
        xmlFreeDoc(ctxt->myDoc);
13057
0
    ctxt->myDoc = NULL;
13058
13059
0
    ctxt->standalone = -1;
13060
0
    ctxt->hasExternalSubset = 0;
13061
0
    ctxt->hasPErefs = 0;
13062
0
    ctxt->html = ctxt->html ? 1 : 0;
13063
0
    ctxt->instate = XML_PARSER_START;
13064
13065
0
    ctxt->wellFormed = 1;
13066
0
    ctxt->nsWellFormed = 1;
13067
0
    ctxt->disableSAX = 0;
13068
0
    ctxt->valid = 1;
13069
0
    ctxt->record_info = 0;
13070
0
    ctxt->checkIndex = 0;
13071
0
    ctxt->endCheckState = 0;
13072
0
    ctxt->inSubset = 0;
13073
0
    ctxt->errNo = XML_ERR_OK;
13074
0
    ctxt->depth = 0;
13075
0
    ctxt->catalogs = NULL;
13076
0
    ctxt->sizeentities = 0;
13077
0
    ctxt->sizeentcopy = 0;
13078
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13079
13080
0
    if (ctxt->attsDefault != NULL) {
13081
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13082
0
        ctxt->attsDefault = NULL;
13083
0
    }
13084
0
    if (ctxt->attsSpecial != NULL) {
13085
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13086
0
        ctxt->attsSpecial = NULL;
13087
0
    }
13088
13089
0
#ifdef LIBXML_CATALOG_ENABLED
13090
0
    if (ctxt->catalogs != NULL)
13091
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13092
0
#endif
13093
0
    ctxt->nbErrors = 0;
13094
0
    ctxt->nbWarnings = 0;
13095
0
    if (ctxt->lastError.code != XML_ERR_OK)
13096
0
        xmlResetError(&ctxt->lastError);
13097
0
}
13098
13099
/**
13100
 * Reset a push parser context
13101
 *
13102
 * @param ctxt  an XML parser context
13103
 * @param chunk  a pointer to an array of chars
13104
 * @param size  number of chars in the array
13105
 * @param filename  an optional file name or URI
13106
 * @param encoding  the document encoding, or NULL
13107
 * @returns 0 in case of success and 1 in case of error
13108
 */
13109
int
13110
xmlCtxtResetPush(xmlParserCtxt *ctxt, const char *chunk,
13111
                 int size, const char *filename, const char *encoding)
13112
0
{
13113
0
    xmlParserInputPtr input;
13114
13115
0
    if (ctxt == NULL)
13116
0
        return(1);
13117
13118
0
    xmlCtxtReset(ctxt);
13119
13120
0
    input = xmlNewPushInput(filename, chunk, size);
13121
0
    if (input == NULL)
13122
0
        return(1);
13123
13124
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13125
0
        xmlFreeInputStream(input);
13126
0
        return(1);
13127
0
    }
13128
13129
0
    if (encoding != NULL)
13130
0
        xmlSwitchEncodingName(ctxt, encoding);
13131
13132
0
    return(0);
13133
0
}
13134
13135
static int
13136
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13137
39.9k
{
13138
39.9k
    int allMask;
13139
13140
39.9k
    if (ctxt == NULL)
13141
0
        return(-1);
13142
13143
    /*
13144
     * XInclude options aren't handled by the parser.
13145
     *
13146
     * XML_PARSE_XINCLUDE
13147
     * XML_PARSE_NOXINCNODE
13148
     * XML_PARSE_NOBASEFIX
13149
     */
13150
39.9k
    allMask = XML_PARSE_RECOVER |
13151
39.9k
              XML_PARSE_NOENT |
13152
39.9k
              XML_PARSE_DTDLOAD |
13153
39.9k
              XML_PARSE_DTDATTR |
13154
39.9k
              XML_PARSE_DTDVALID |
13155
39.9k
              XML_PARSE_NOERROR |
13156
39.9k
              XML_PARSE_NOWARNING |
13157
39.9k
              XML_PARSE_PEDANTIC |
13158
39.9k
              XML_PARSE_NOBLANKS |
13159
39.9k
#ifdef LIBXML_SAX1_ENABLED
13160
39.9k
              XML_PARSE_SAX1 |
13161
39.9k
#endif
13162
39.9k
              XML_PARSE_NONET |
13163
39.9k
              XML_PARSE_NODICT |
13164
39.9k
              XML_PARSE_NSCLEAN |
13165
39.9k
              XML_PARSE_NOCDATA |
13166
39.9k
              XML_PARSE_COMPACT |
13167
39.9k
              XML_PARSE_OLD10 |
13168
39.9k
              XML_PARSE_HUGE |
13169
39.9k
              XML_PARSE_OLDSAX |
13170
39.9k
              XML_PARSE_IGNORE_ENC |
13171
39.9k
              XML_PARSE_BIG_LINES |
13172
39.9k
              XML_PARSE_NO_XXE |
13173
39.9k
              XML_PARSE_UNZIP |
13174
39.9k
              XML_PARSE_NO_SYS_CATALOG |
13175
39.9k
              XML_PARSE_CATALOG_PI;
13176
13177
39.9k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13178
13179
    /*
13180
     * For some options, struct members are historically the source
13181
     * of truth. The values are initalized from global variables and
13182
     * old code could also modify them directly. Several older API
13183
     * functions that don't take an options argument rely on these
13184
     * deprecated mechanisms.
13185
     *
13186
     * Once public access to struct members and the globals are
13187
     * disabled, we can use the options bitmask as source of
13188
     * truth, making all these struct members obsolete.
13189
     *
13190
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13191
     * loading of the external subset.
13192
     */
13193
39.9k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13194
39.9k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13195
39.9k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13196
39.9k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13197
39.9k
    ctxt->loadsubset |= (options & XML_PARSE_SKIP_IDS) ? XML_SKIP_IDS : 0;
13198
39.9k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13199
39.9k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13200
39.9k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13201
39.9k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13202
13203
39.9k
    return(options & ~allMask);
13204
39.9k
}
13205
13206
/**
13207
 * Applies the options to the parser context. Unset options are
13208
 * cleared.
13209
 *
13210
 * @since 2.13.0
13211
 *
13212
 * With older versions, you can use #xmlCtxtUseOptions.
13213
 *
13214
 * @param ctxt  an XML parser context
13215
 * @param options  a bitmask of xmlParserOption values
13216
 * @returns 0 in case of success, the set of unknown or unimplemented options
13217
 *         in case of error.
13218
 */
13219
int
13220
xmlCtxtSetOptions(xmlParserCtxt *ctxt, int options)
13221
0
{
13222
0
#ifdef LIBXML_HTML_ENABLED
13223
0
    if ((ctxt != NULL) && (ctxt->html))
13224
0
        return(htmlCtxtSetOptions(ctxt, options));
13225
0
#endif
13226
13227
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13228
0
}
13229
13230
/**
13231
 * Get the current options of the parser context.
13232
 *
13233
 * @since 2.14.0
13234
 *
13235
 * @param ctxt  an XML parser context
13236
 * @returns the current options set in the parser context, or -1 if ctxt is NULL.
13237
 */
13238
int
13239
xmlCtxtGetOptions(xmlParserCtxt *ctxt)
13240
0
{
13241
0
    if (ctxt == NULL)
13242
0
        return(-1);
13243
13244
0
    return(ctxt->options);
13245
0
}
13246
13247
/**
13248
 * Applies the options to the parser context. The following options
13249
 * are never cleared and can only be enabled:
13250
 *
13251
 * - XML_PARSE_NOERROR
13252
 * - XML_PARSE_NOWARNING
13253
 * - XML_PARSE_NONET
13254
 * - XML_PARSE_NSCLEAN
13255
 * - XML_PARSE_NOCDATA
13256
 * - XML_PARSE_COMPACT
13257
 * - XML_PARSE_OLD10
13258
 * - XML_PARSE_HUGE
13259
 * - XML_PARSE_OLDSAX
13260
 * - XML_PARSE_IGNORE_ENC
13261
 * - XML_PARSE_BIG_LINES
13262
 *
13263
 * @deprecated Use #xmlCtxtSetOptions.
13264
 *
13265
 * @param ctxt  an XML parser context
13266
 * @param options  a combination of xmlParserOption
13267
 * @returns 0 in case of success, the set of unknown or unimplemented options
13268
 *         in case of error.
13269
 */
13270
int
13271
xmlCtxtUseOptions(xmlParserCtxt *ctxt, int options)
13272
39.9k
{
13273
39.9k
    int keepMask;
13274
13275
39.9k
#ifdef LIBXML_HTML_ENABLED
13276
39.9k
    if ((ctxt != NULL) && (ctxt->html))
13277
0
        return(htmlCtxtUseOptions(ctxt, options));
13278
39.9k
#endif
13279
13280
    /*
13281
     * For historic reasons, some options can only be enabled.
13282
     */
13283
39.9k
    keepMask = XML_PARSE_NOERROR |
13284
39.9k
               XML_PARSE_NOWARNING |
13285
39.9k
               XML_PARSE_NONET |
13286
39.9k
               XML_PARSE_NSCLEAN |
13287
39.9k
               XML_PARSE_NOCDATA |
13288
39.9k
               XML_PARSE_COMPACT |
13289
39.9k
               XML_PARSE_OLD10 |
13290
39.9k
               XML_PARSE_HUGE |
13291
39.9k
               XML_PARSE_OLDSAX |
13292
39.9k
               XML_PARSE_IGNORE_ENC |
13293
39.9k
               XML_PARSE_BIG_LINES;
13294
13295
39.9k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13296
39.9k
}
13297
13298
/**
13299
 * To protect against exponential entity expansion ("billion laughs"), the
13300
 * size of serialized output is (roughly) limited to the input size
13301
 * multiplied by this factor. The default value is 5.
13302
 *
13303
 * When working with documents making heavy use of entity expansion, it can
13304
 * be necessary to increase the value. For security reasons, this should only
13305
 * be considered when processing trusted input.
13306
 *
13307
 * @param ctxt  an XML parser context
13308
 * @param maxAmpl  maximum amplification factor
13309
 */
13310
void
13311
xmlCtxtSetMaxAmplification(xmlParserCtxt *ctxt, unsigned maxAmpl)
13312
0
{
13313
0
    if (ctxt == NULL)
13314
0
        return;
13315
0
    ctxt->maxAmpl = maxAmpl;
13316
0
}
13317
13318
/**
13319
 * Parse an XML document and return the resulting document tree.
13320
 * Takes ownership of the input object.
13321
 *
13322
 * @since 2.13.0
13323
 *
13324
 * @param ctxt  an XML parser context
13325
 * @param input  parser input
13326
 * @returns the resulting document tree or NULL
13327
 */
13328
xmlDoc *
13329
xmlCtxtParseDocument(xmlParserCtxt *ctxt, xmlParserInput *input)
13330
39.9k
{
13331
39.9k
    xmlDocPtr ret = NULL;
13332
13333
39.9k
    if ((ctxt == NULL) || (input == NULL)) {
13334
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13335
0
        xmlFreeInputStream(input);
13336
0
        return(NULL);
13337
0
    }
13338
13339
    /* assert(ctxt->inputNr == 0); */
13340
39.9k
    while (ctxt->inputNr > 0)
13341
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13342
13343
39.9k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13344
0
        xmlFreeInputStream(input);
13345
0
        return(NULL);
13346
0
    }
13347
13348
39.9k
    xmlParseDocument(ctxt);
13349
13350
39.9k
    ret = xmlCtxtGetDocument(ctxt);
13351
13352
    /* assert(ctxt->inputNr == 1); */
13353
79.9k
    while (ctxt->inputNr > 0)
13354
39.9k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13355
13356
39.9k
    return(ret);
13357
39.9k
}
13358
13359
/**
13360
 * Convenience function to parse an XML document from a
13361
 * zero-terminated string.
13362
 *
13363
 * See #xmlCtxtReadDoc for details.
13364
 *
13365
 * @param cur  a pointer to a zero terminated string
13366
 * @param URL  base URL (optional)
13367
 * @param encoding  the document encoding (optional)
13368
 * @param options  a combination of xmlParserOption
13369
 * @returns the resulting document tree
13370
 */
13371
xmlDoc *
13372
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13373
           int options)
13374
0
{
13375
0
    xmlParserCtxtPtr ctxt;
13376
0
    xmlParserInputPtr input;
13377
0
    xmlDocPtr doc = NULL;
13378
13379
0
    ctxt = xmlNewParserCtxt();
13380
0
    if (ctxt == NULL)
13381
0
        return(NULL);
13382
13383
0
    xmlCtxtUseOptions(ctxt, options);
13384
13385
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13386
0
                                      XML_INPUT_BUF_STATIC);
13387
13388
0
    if (input != NULL)
13389
0
        doc = xmlCtxtParseDocument(ctxt, input);
13390
13391
0
    xmlFreeParserCtxt(ctxt);
13392
0
    return(doc);
13393
0
}
13394
13395
/**
13396
 * Convenience function to parse an XML file from the filesystem
13397
 * or a global, user-defined resource loader.
13398
 *
13399
 * This function always enables the XML_PARSE_UNZIP option for
13400
 * backward compatibility. If a "-" filename is passed, it will
13401
 * read from stdin. Both of these features are potentially
13402
 * insecure and might be removed from later versions.
13403
 *
13404
 * See #xmlCtxtReadFile for details.
13405
 *
13406
 * @param filename  a file or URL
13407
 * @param encoding  the document encoding (optional)
13408
 * @param options  a combination of xmlParserOption
13409
 * @returns the resulting document tree
13410
 */
13411
xmlDoc *
13412
xmlReadFile(const char *filename, const char *encoding, int options)
13413
0
{
13414
0
    xmlParserCtxtPtr ctxt;
13415
0
    xmlParserInputPtr input;
13416
0
    xmlDocPtr doc = NULL;
13417
13418
0
    ctxt = xmlNewParserCtxt();
13419
0
    if (ctxt == NULL)
13420
0
        return(NULL);
13421
13422
0
    options |= XML_PARSE_UNZIP;
13423
13424
0
    xmlCtxtUseOptions(ctxt, options);
13425
13426
    /*
13427
     * Backward compatibility for users of command line utilities like
13428
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13429
     * should be removed at some point.
13430
     */
13431
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13432
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13433
0
                                      encoding, 0);
13434
0
    else
13435
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13436
13437
0
    if (input != NULL)
13438
0
        doc = xmlCtxtParseDocument(ctxt, input);
13439
13440
0
    xmlFreeParserCtxt(ctxt);
13441
0
    return(doc);
13442
0
}
13443
13444
/**
13445
 * Parse an XML in-memory document and build a tree. The input buffer must
13446
 * not contain a terminating null byte.
13447
 *
13448
 * See #xmlCtxtReadMemory for details.
13449
 *
13450
 * @param buffer  a pointer to a char array
13451
 * @param size  the size of the array
13452
 * @param url  base URL (optional)
13453
 * @param encoding  the document encoding (optional)
13454
 * @param options  a combination of xmlParserOption
13455
 * @returns the resulting document tree
13456
 */
13457
xmlDoc *
13458
xmlReadMemory(const char *buffer, int size, const char *url,
13459
              const char *encoding, int options)
13460
39.9k
{
13461
39.9k
    xmlParserCtxtPtr ctxt;
13462
39.9k
    xmlParserInputPtr input;
13463
39.9k
    xmlDocPtr doc = NULL;
13464
13465
39.9k
    if (size < 0)
13466
0
  return(NULL);
13467
13468
39.9k
    ctxt = xmlNewParserCtxt();
13469
39.9k
    if (ctxt == NULL)
13470
0
        return(NULL);
13471
13472
39.9k
    xmlCtxtUseOptions(ctxt, options);
13473
13474
39.9k
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13475
39.9k
                                      XML_INPUT_BUF_STATIC);
13476
13477
39.9k
    if (input != NULL)
13478
39.9k
        doc = xmlCtxtParseDocument(ctxt, input);
13479
13480
39.9k
    xmlFreeParserCtxt(ctxt);
13481
39.9k
    return(doc);
13482
39.9k
}
13483
13484
/**
13485
 * Parse an XML from a file descriptor and build a tree.
13486
 *
13487
 * See #xmlCtxtReadFd for details.
13488
 *
13489
 * NOTE that the file descriptor will not be closed when the
13490
 * context is freed or reset.
13491
 *
13492
 * @param fd  an open file descriptor
13493
 * @param URL  base URL (optional)
13494
 * @param encoding  the document encoding (optional)
13495
 * @param options  a combination of xmlParserOption
13496
 * @returns the resulting document tree
13497
 */
13498
xmlDoc *
13499
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13500
0
{
13501
0
    xmlParserCtxtPtr ctxt;
13502
0
    xmlParserInputPtr input;
13503
0
    xmlDocPtr doc = NULL;
13504
13505
0
    ctxt = xmlNewParserCtxt();
13506
0
    if (ctxt == NULL)
13507
0
        return(NULL);
13508
13509
0
    xmlCtxtUseOptions(ctxt, options);
13510
13511
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13512
13513
0
    if (input != NULL)
13514
0
        doc = xmlCtxtParseDocument(ctxt, input);
13515
13516
0
    xmlFreeParserCtxt(ctxt);
13517
0
    return(doc);
13518
0
}
13519
13520
/**
13521
 * Parse an XML document from I/O functions and context and build a tree.
13522
 *
13523
 * See #xmlCtxtReadIO for details.
13524
 *
13525
 * @param ioread  an I/O read function
13526
 * @param ioclose  an I/O close function (optional)
13527
 * @param ioctx  an I/O handler
13528
 * @param URL  base URL (optional)
13529
 * @param encoding  the document encoding (optional)
13530
 * @param options  a combination of xmlParserOption
13531
 * @returns the resulting document tree
13532
 */
13533
xmlDoc *
13534
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13535
          void *ioctx, const char *URL, const char *encoding, int options)
13536
0
{
13537
0
    xmlParserCtxtPtr ctxt;
13538
0
    xmlParserInputPtr input;
13539
0
    xmlDocPtr doc = NULL;
13540
13541
0
    ctxt = xmlNewParserCtxt();
13542
0
    if (ctxt == NULL)
13543
0
        return(NULL);
13544
13545
0
    xmlCtxtUseOptions(ctxt, options);
13546
13547
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13548
0
                                  encoding, 0);
13549
13550
0
    if (input != NULL)
13551
0
        doc = xmlCtxtParseDocument(ctxt, input);
13552
13553
0
    xmlFreeParserCtxt(ctxt);
13554
0
    return(doc);
13555
0
}
13556
13557
/**
13558
 * Parse an XML in-memory document and build a tree.
13559
 *
13560
 * `URL` is used as base to resolve external entities and for error
13561
 * reporting.
13562
 *
13563
 * @param ctxt  an XML parser context
13564
 * @param str  a pointer to a zero terminated string
13565
 * @param URL  base URL (optional)
13566
 * @param encoding  the document encoding (optional)
13567
 * @param options  a combination of xmlParserOption
13568
 * @returns the resulting document tree
13569
 */
13570
xmlDoc *
13571
xmlCtxtReadDoc(xmlParserCtxt *ctxt, const xmlChar *str,
13572
               const char *URL, const char *encoding, int options)
13573
0
{
13574
0
    xmlParserInputPtr input;
13575
13576
0
    if (ctxt == NULL)
13577
0
        return(NULL);
13578
13579
0
    xmlCtxtReset(ctxt);
13580
0
    xmlCtxtUseOptions(ctxt, options);
13581
13582
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13583
0
                                      XML_INPUT_BUF_STATIC);
13584
0
    if (input == NULL)
13585
0
        return(NULL);
13586
13587
0
    return(xmlCtxtParseDocument(ctxt, input));
13588
0
}
13589
13590
/**
13591
 * Parse an XML file from the filesystem or a global, user-defined
13592
 * resource loader.
13593
 *
13594
 * This function always enables the XML_PARSE_UNZIP option for
13595
 * backward compatibility. This feature is potentially insecure
13596
 * and might be removed from later versions.
13597
 *
13598
 * @param ctxt  an XML parser context
13599
 * @param filename  a file or URL
13600
 * @param encoding  the document encoding (optional)
13601
 * @param options  a combination of xmlParserOption
13602
 * @returns the resulting document tree
13603
 */
13604
xmlDoc *
13605
xmlCtxtReadFile(xmlParserCtxt *ctxt, const char *filename,
13606
                const char *encoding, int options)
13607
0
{
13608
0
    xmlParserInputPtr input;
13609
13610
0
    if (ctxt == NULL)
13611
0
        return(NULL);
13612
13613
0
    options |= XML_PARSE_UNZIP;
13614
13615
0
    xmlCtxtReset(ctxt);
13616
0
    xmlCtxtUseOptions(ctxt, options);
13617
13618
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13619
0
    if (input == NULL)
13620
0
        return(NULL);
13621
13622
0
    return(xmlCtxtParseDocument(ctxt, input));
13623
0
}
13624
13625
/**
13626
 * Parse an XML in-memory document and build a tree. The input buffer must
13627
 * not contain a terminating null byte.
13628
 *
13629
 * `URL` is used as base to resolve external entities and for error
13630
 * reporting.
13631
 *
13632
 * @param ctxt  an XML parser context
13633
 * @param buffer  a pointer to a char array
13634
 * @param size  the size of the array
13635
 * @param URL  base URL (optional)
13636
 * @param encoding  the document encoding (optional)
13637
 * @param options  a combination of xmlParserOption
13638
 * @returns the resulting document tree
13639
 */
13640
xmlDoc *
13641
xmlCtxtReadMemory(xmlParserCtxt *ctxt, const char *buffer, int size,
13642
                  const char *URL, const char *encoding, int options)
13643
0
{
13644
0
    xmlParserInputPtr input;
13645
13646
0
    if ((ctxt == NULL) || (size < 0))
13647
0
        return(NULL);
13648
13649
0
    xmlCtxtReset(ctxt);
13650
0
    xmlCtxtUseOptions(ctxt, options);
13651
13652
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
13653
0
                                      XML_INPUT_BUF_STATIC);
13654
0
    if (input == NULL)
13655
0
        return(NULL);
13656
13657
0
    return(xmlCtxtParseDocument(ctxt, input));
13658
0
}
13659
13660
/**
13661
 * Parse an XML document from a file descriptor and build a tree.
13662
 *
13663
 * NOTE that the file descriptor will not be closed when the
13664
 * context is freed or reset.
13665
 *
13666
 * `URL` is used as base to resolve external entities and for error
13667
 * reporting.
13668
 *
13669
 * @param ctxt  an XML parser context
13670
 * @param fd  an open file descriptor
13671
 * @param URL  base URL (optional)
13672
 * @param encoding  the document encoding (optional)
13673
 * @param options  a combination of xmlParserOption
13674
 * @returns the resulting document tree
13675
 */
13676
xmlDoc *
13677
xmlCtxtReadFd(xmlParserCtxt *ctxt, int fd,
13678
              const char *URL, const char *encoding, int options)
13679
0
{
13680
0
    xmlParserInputPtr input;
13681
13682
0
    if (ctxt == NULL)
13683
0
        return(NULL);
13684
13685
0
    xmlCtxtReset(ctxt);
13686
0
    xmlCtxtUseOptions(ctxt, options);
13687
13688
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13689
0
    if (input == NULL)
13690
0
        return(NULL);
13691
13692
0
    return(xmlCtxtParseDocument(ctxt, input));
13693
0
}
13694
13695
/**
13696
 * Parse an XML document from I/O functions and source and build a tree.
13697
 * This reuses the existing `ctxt` parser context
13698
 *
13699
 * `URL` is used as base to resolve external entities and for error
13700
 * reporting.
13701
 *
13702
 * @param ctxt  an XML parser context
13703
 * @param ioread  an I/O read function
13704
 * @param ioclose  an I/O close function
13705
 * @param ioctx  an I/O handler
13706
 * @param URL  the base URL to use for the document
13707
 * @param encoding  the document encoding, or NULL
13708
 * @param options  a combination of xmlParserOption
13709
 * @returns the resulting document tree
13710
 */
13711
xmlDoc *
13712
xmlCtxtReadIO(xmlParserCtxt *ctxt, xmlInputReadCallback ioread,
13713
              xmlInputCloseCallback ioclose, void *ioctx,
13714
        const char *URL,
13715
              const char *encoding, int options)
13716
0
{
13717
0
    xmlParserInputPtr input;
13718
13719
0
    if (ctxt == NULL)
13720
0
        return(NULL);
13721
13722
0
    xmlCtxtReset(ctxt);
13723
0
    xmlCtxtUseOptions(ctxt, options);
13724
13725
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13726
0
                                  encoding, 0);
13727
0
    if (input == NULL)
13728
0
        return(NULL);
13729
13730
0
    return(xmlCtxtParseDocument(ctxt, input));
13731
0
}
13732