Coverage Report

Created: 2026-05-11 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxml2/parser.c
Line
Count
Source
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * Author: Daniel Veillard
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
#include "private/tree.h"
79
80
5.47M
#define NS_INDEX_EMPTY  INT_MAX
81
533k
#define NS_INDEX_XML    (INT_MAX - 1)
82
1.56M
#define URI_HASH_EMPTY  0xD943A04E
83
196k
#define URI_HASH_XML    0xF0451F02
84
85
#ifndef STDIN_FILENO
86
0
  #define STDIN_FILENO 0
87
#endif
88
89
#ifndef SIZE_MAX
90
  #define SIZE_MAX ((size_t) -1)
91
#endif
92
93
1.22M
#define XML_MAX_ATTRS 100000000 /* 100 million */
94
95
4.07M
#define XML_SPECIAL_EXTERNAL    (1 << 20)
96
3.90M
#define XML_SPECIAL_TYPE_MASK   (XML_SPECIAL_EXTERNAL - 1)
97
98
4.11M
#define XML_ATTVAL_ALLOC        (1 << 0)
99
2.31M
#define XML_ATTVAL_NORM_CHANGE  (1 << 1)
100
101
struct _xmlStartTag {
102
    const xmlChar *prefix;
103
    const xmlChar *URI;
104
    int line;
105
    int nsNr;
106
};
107
108
typedef struct {
109
    void *saxData;
110
    unsigned prefixHashValue;
111
    unsigned uriHashValue;
112
    unsigned elementId;
113
    int oldIndex;
114
} xmlParserNsExtra;
115
116
typedef struct {
117
    unsigned hashValue;
118
    int index;
119
} xmlParserNsBucket;
120
121
struct _xmlParserNsData {
122
    xmlParserNsExtra *extra;
123
124
    unsigned hashSize;
125
    unsigned hashElems;
126
    xmlParserNsBucket *hash;
127
128
    unsigned elementId;
129
    int defaultNsIndex;
130
    int minNsIndex;
131
};
132
133
static int
134
xmlParseElementStart(xmlParserCtxtPtr ctxt);
135
136
static void
137
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
138
139
static xmlEntityPtr
140
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
141
142
static const xmlChar *
143
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
144
145
/************************************************************************
146
 *                  *
147
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
148
 *                  *
149
 ************************************************************************/
150
151
#define XML_PARSER_BIG_ENTITY 1000
152
#define XML_PARSER_LOT_ENTITY 5000
153
154
/*
155
 * Constants for protection against abusive entity expansion
156
 * ("billion laughs").
157
 */
158
159
/*
160
 * A certain amount of entity expansion which is always allowed.
161
 */
162
7.66M
#define XML_PARSER_ALLOWED_EXPANSION 1000000
163
164
/*
165
 * Fixed cost for each entity reference. This crudely models processing time
166
 * as well to protect, for example, against exponential expansion of empty
167
 * or very short entities.
168
 */
169
7.70M
#define XML_ENT_FIXED_COST 20
170
171
310M
#define XML_PARSER_BIG_BUFFER_SIZE 300
172
4.14M
#define XML_PARSER_BUFFER_SIZE 100
173
617k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
174
175
/**
176
 * XML_PARSER_CHUNK_SIZE
177
 *
178
 * When calling GROW that's the minimal amount of data
179
 * the parser expected to have received. It is not a hard
180
 * limit but an optimization when reading strings like Names
181
 * It is not strictly needed as long as inputs available characters
182
 * are followed by 0, which should be provided by the I/O level
183
 */
184
#define XML_PARSER_CHUNK_SIZE 100
185
186
/**
187
 * Constant string describing the version of the library used at
188
 * run-time.
189
 */
190
const char *const
191
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
192
193
/*
194
 * List of XML prefixed PI allowed by W3C specs
195
 */
196
197
static const char* const xmlW3CPIs[] = {
198
    "xml-stylesheet",
199
    "xml-model",
200
    NULL
201
};
202
203
204
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206
                                              const xmlChar **str);
207
208
static void
209
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
210
211
static int
212
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
213
214
static void
215
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl);
216
217
/************************************************************************
218
 *                  *
219
 *    Some factorized error routines        *
220
 *                  *
221
 ************************************************************************/
222
223
static void
224
7.81k
xmlErrMemory(xmlParserCtxtPtr ctxt) {
225
7.81k
    xmlCtxtErrMemory(ctxt);
226
7.81k
}
227
228
/**
229
 * Handle a redefinition of attribute error
230
 *
231
 * @param ctxt  an XML parser context
232
 * @param prefix  the attribute prefix
233
 * @param localname  the attribute localname
234
 */
235
static void
236
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237
                   const xmlChar * localname)
238
234k
{
239
234k
    if (prefix == NULL)
240
151k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
151k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
242
151k
                   "Attribute %s redefined\n", localname);
243
83.3k
    else
244
83.3k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245
83.3k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
246
83.3k
                   "Attribute %s:%s redefined\n", prefix, localname);
247
234k
}
248
249
/**
250
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
251
 *
252
 * @param ctxt  an XML parser context
253
 * @param error  the error number
254
 * @param msg  the error message
255
 */
256
static void LIBXML_ATTR_FORMAT(3,0)
257
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
258
               const char *msg)
259
223M
{
260
223M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
261
223M
               NULL, NULL, NULL, 0, "%s", msg);
262
223M
}
263
264
/**
265
 * Handle a warning.
266
 *
267
 * @param ctxt  an XML parser context
268
 * @param error  the error number
269
 * @param msg  the error message
270
 * @param str1  extra data
271
 * @param str2  extra data
272
 */
273
void LIBXML_ATTR_FORMAT(3,0)
274
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
275
              const char *msg, const xmlChar *str1, const xmlChar *str2)
276
227k
{
277
227k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
278
227k
               str1, str2, NULL, 0, msg, str1, str2);
279
227k
}
280
281
#ifdef LIBXML_VALID_ENABLED
282
/**
283
 * Handle a validity error.
284
 *
285
 * @param ctxt  an XML parser context
286
 * @param error  the error number
287
 * @param msg  the error message
288
 * @param str1  extra data
289
 * @param str2  extra data
290
 */
291
static void LIBXML_ATTR_FORMAT(3,0)
292
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
293
              const char *msg, const xmlChar *str1, const xmlChar *str2)
294
243k
{
295
243k
    ctxt->valid = 0;
296
297
243k
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
298
243k
               str1, str2, NULL, 0, msg, str1, str2);
299
243k
}
300
#endif
301
302
/**
303
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
304
 *
305
 * @param ctxt  an XML parser context
306
 * @param error  the error number
307
 * @param msg  the error message
308
 * @param val  an integer value
309
 */
310
static void LIBXML_ATTR_FORMAT(3,0)
311
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
312
                  const char *msg, int val)
313
104M
{
314
104M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
315
104M
               NULL, NULL, NULL, val, msg, val);
316
104M
}
317
318
/**
319
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
320
 *
321
 * @param ctxt  an XML parser context
322
 * @param error  the error number
323
 * @param msg  the error message
324
 * @param str1  an string info
325
 * @param val  an integer value
326
 * @param str2  an string info
327
 */
328
static void LIBXML_ATTR_FORMAT(3,0)
329
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
330
                  const char *msg, const xmlChar *str1, int val,
331
      const xmlChar *str2)
332
1.69M
{
333
1.69M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
334
1.69M
               str1, str2, NULL, val, msg, str1, val, str2);
335
1.69M
}
336
337
/**
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 *
340
 * @param ctxt  an XML parser context
341
 * @param error  the error number
342
 * @param msg  the error message
343
 * @param val  a string value
344
 */
345
static void LIBXML_ATTR_FORMAT(3,0)
346
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
347
                  const char *msg, const xmlChar * val)
348
4.61M
{
349
4.61M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
350
4.61M
               val, NULL, NULL, 0, msg, val);
351
4.61M
}
352
353
/**
354
 * Handle a non fatal parser error
355
 *
356
 * @param ctxt  an XML parser context
357
 * @param error  the error number
358
 * @param msg  the error message
359
 * @param val  a string value
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
287k
{
365
287k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
287k
               val, NULL, NULL, 0, msg, val);
367
287k
}
368
369
/**
370
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
371
 *
372
 * @param ctxt  an XML parser context
373
 * @param error  the error number
374
 * @param msg  the message
375
 * @param info1  extra information string
376
 * @param info2  extra information string
377
 * @param info3  extra information string
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
1.89M
{
385
1.89M
    ctxt->nsWellFormed = 0;
386
387
1.89M
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
1.89M
               info1, info2, info3, 0, msg, info1, info2, info3);
389
1.89M
}
390
391
/**
392
 * Handle a namespace warning error
393
 *
394
 * @param ctxt  an XML parser context
395
 * @param error  the error number
396
 * @param msg  the message
397
 * @param info1  extra information string
398
 * @param info2  extra information string
399
 * @param info3  extra information string
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
103k
{
407
103k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
103k
               info1, info2, info3, 0, msg, info1, info2, info3);
409
103k
}
410
411
/**
412
 * Check for non-linear entity expansion behaviour.
413
 *
414
 * In some cases like xmlExpandEntityInAttValue, this function is called
415
 * for each, possibly nested entity and its unexpanded content length.
416
 *
417
 * In other cases like #xmlParseReference, it's only called for each
418
 * top-level entity with its unexpanded content length plus the sum of
419
 * the unexpanded content lengths (plus fixed cost) of all nested
420
 * entities.
421
 *
422
 * Summing the unexpanded lengths also adds the length of the reference.
423
 * This is by design. Taking the length of the entity name into account
424
 * discourages attacks that try to waste CPU time with abusively long
425
 * entity names. See test/recurse/lol6.xml for example. Each call also
426
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
427
 * short entities.
428
 *
429
 * @param ctxt  parser context
430
 * @param extra  sum of unexpanded entity sizes
431
 * @returns 1 on error, 0 on success.
432
 */
433
static int
434
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
435
8.31M
{
436
8.31M
    unsigned long consumed;
437
8.31M
    unsigned long *expandedSize;
438
8.31M
    xmlParserInputPtr input = ctxt->input;
439
8.31M
    xmlEntityPtr entity = input->entity;
440
441
8.31M
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
442
659k
        return(0);
443
444
    /*
445
     * Compute total consumed bytes so far, including input streams of
446
     * external entities.
447
     */
448
7.66M
    consumed = input->consumed;
449
7.66M
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
450
7.66M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
451
452
7.66M
    if (entity)
453
300k
        expandedSize = &entity->expandedSize;
454
7.36M
    else
455
7.36M
        expandedSize = &ctxt->sizeentcopy;
456
457
    /*
458
     * Add extra cost and some fixed cost.
459
     */
460
7.66M
    xmlSaturatedAdd(expandedSize, extra);
461
7.66M
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
462
463
    /*
464
     * It's important to always use saturation arithmetic when tracking
465
     * entity sizes to make the size checks reliable. If "sizeentcopy"
466
     * overflows, we have to abort.
467
     */
468
7.66M
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
469
1.06M
        ((*expandedSize >= ULONG_MAX) ||
470
1.06M
         (*expandedSize / ctxt->maxAmpl > consumed))) {
471
4.08k
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
472
4.08k
                       "Maximum entity amplification factor exceeded, see "
473
4.08k
                       "xmlCtxtSetMaxAmplification.\n");
474
4.08k
        return(1);
475
4.08k
    }
476
477
7.65M
    return(0);
478
7.66M
}
479
480
/************************************************************************
481
 *                  *
482
 *    Library wide options          *
483
 *                  *
484
 ************************************************************************/
485
486
/**
487
 * Examines if the library has been compiled with a given feature.
488
 *
489
 * @param feature  the feature to be examined
490
 * @returns zero (0) if the feature does not exist or an unknown
491
 * feature is requested, non-zero otherwise.
492
 */
493
int
494
xmlHasFeature(xmlFeature feature)
495
59.8k
{
496
59.8k
    switch (feature) {
497
2.49k
  case XML_WITH_THREAD:
498
2.49k
#ifdef LIBXML_THREAD_ENABLED
499
2.49k
      return(1);
500
#else
501
      return(0);
502
#endif
503
0
        case XML_WITH_TREE:
504
0
            return(1);
505
2.49k
        case XML_WITH_OUTPUT:
506
2.49k
#ifdef LIBXML_OUTPUT_ENABLED
507
2.49k
            return(1);
508
#else
509
            return(0);
510
#endif
511
2.49k
        case XML_WITH_PUSH:
512
2.49k
#ifdef LIBXML_PUSH_ENABLED
513
2.49k
            return(1);
514
#else
515
            return(0);
516
#endif
517
2.49k
        case XML_WITH_READER:
518
2.49k
#ifdef LIBXML_READER_ENABLED
519
2.49k
            return(1);
520
#else
521
            return(0);
522
#endif
523
2.49k
        case XML_WITH_PATTERN:
524
2.49k
#ifdef LIBXML_PATTERN_ENABLED
525
2.49k
            return(1);
526
#else
527
            return(0);
528
#endif
529
2.49k
        case XML_WITH_WRITER:
530
2.49k
#ifdef LIBXML_WRITER_ENABLED
531
2.49k
            return(1);
532
#else
533
            return(0);
534
#endif
535
2.49k
        case XML_WITH_SAX1:
536
2.49k
#ifdef LIBXML_SAX1_ENABLED
537
2.49k
            return(1);
538
#else
539
            return(0);
540
#endif
541
0
        case XML_WITH_HTTP:
542
0
            return(0);
543
2.49k
        case XML_WITH_VALID:
544
2.49k
#ifdef LIBXML_VALID_ENABLED
545
2.49k
            return(1);
546
#else
547
            return(0);
548
#endif
549
2.49k
        case XML_WITH_HTML:
550
2.49k
#ifdef LIBXML_HTML_ENABLED
551
2.49k
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_LEGACY:
556
0
            return(0);
557
2.49k
        case XML_WITH_C14N:
558
2.49k
#ifdef LIBXML_C14N_ENABLED
559
2.49k
            return(1);
560
#else
561
            return(0);
562
#endif
563
2.49k
        case XML_WITH_CATALOG:
564
2.49k
#ifdef LIBXML_CATALOG_ENABLED
565
2.49k
            return(1);
566
#else
567
            return(0);
568
#endif
569
2.49k
        case XML_WITH_XPATH:
570
2.49k
#ifdef LIBXML_XPATH_ENABLED
571
2.49k
            return(1);
572
#else
573
            return(0);
574
#endif
575
2.49k
        case XML_WITH_XPTR:
576
2.49k
#ifdef LIBXML_XPTR_ENABLED
577
2.49k
            return(1);
578
#else
579
            return(0);
580
#endif
581
2.49k
        case XML_WITH_XINCLUDE:
582
2.49k
#ifdef LIBXML_XINCLUDE_ENABLED
583
2.49k
            return(1);
584
#else
585
            return(0);
586
#endif
587
2.49k
        case XML_WITH_ICONV:
588
2.49k
#ifdef LIBXML_ICONV_ENABLED
589
2.49k
            return(1);
590
#else
591
            return(0);
592
#endif
593
2.49k
        case XML_WITH_ISO8859X:
594
2.49k
#ifdef LIBXML_ISO8859X_ENABLED
595
2.49k
            return(1);
596
#else
597
            return(0);
598
#endif
599
0
        case XML_WITH_UNICODE:
600
0
            return(0);
601
2.49k
        case XML_WITH_REGEXP:
602
2.49k
#ifdef LIBXML_REGEXP_ENABLED
603
2.49k
            return(1);
604
#else
605
            return(0);
606
#endif
607
0
        case XML_WITH_AUTOMATA:
608
0
#ifdef LIBXML_REGEXP_ENABLED
609
0
            return(1);
610
#else
611
            return(0);
612
#endif
613
0
        case XML_WITH_EXPR:
614
0
            return(0);
615
2.49k
        case XML_WITH_RELAXNG:
616
2.49k
#ifdef LIBXML_RELAXNG_ENABLED
617
2.49k
            return(1);
618
#else
619
            return(0);
620
#endif
621
2.49k
        case XML_WITH_SCHEMAS:
622
2.49k
#ifdef LIBXML_SCHEMAS_ENABLED
623
2.49k
            return(1);
624
#else
625
            return(0);
626
#endif
627
2.49k
        case XML_WITH_SCHEMATRON:
628
#ifdef LIBXML_SCHEMATRON_ENABLED
629
            return(1);
630
#else
631
2.49k
            return(0);
632
0
#endif
633
2.49k
        case XML_WITH_MODULES:
634
2.49k
#ifdef LIBXML_MODULES_ENABLED
635
2.49k
            return(1);
636
#else
637
            return(0);
638
#endif
639
2.49k
        case XML_WITH_DEBUG:
640
#ifdef LIBXML_DEBUG_ENABLED
641
            return(1);
642
#else
643
2.49k
            return(0);
644
0
#endif
645
0
        case XML_WITH_DEBUG_MEM:
646
0
            return(0);
647
2.49k
        case XML_WITH_ZLIB:
648
2.49k
#ifdef LIBXML_ZLIB_ENABLED
649
2.49k
            return(1);
650
#else
651
            return(0);
652
#endif
653
0
        case XML_WITH_LZMA:
654
0
            return(0);
655
2.49k
        case XML_WITH_ICU:
656
#ifdef LIBXML_ICU_ENABLED
657
            return(1);
658
#else
659
2.49k
            return(0);
660
0
#endif
661
0
        default:
662
0
      break;
663
59.8k
     }
664
0
     return(0);
665
59.8k
}
666
667
/************************************************************************
668
 *                  *
669
 *      Simple string buffer        *
670
 *                  *
671
 ************************************************************************/
672
673
typedef struct {
674
    xmlChar *mem;
675
    unsigned size;
676
    unsigned cap; /* size < cap */
677
    unsigned max; /* size <= max */
678
    xmlParserErrors code;
679
} xmlSBuf;
680
681
static void
682
4.31M
xmlSBufInit(xmlSBuf *buf, unsigned max) {
683
4.31M
    buf->mem = NULL;
684
4.31M
    buf->size = 0;
685
4.31M
    buf->cap = 0;
686
4.31M
    buf->max = max;
687
4.31M
    buf->code = XML_ERR_OK;
688
4.31M
}
689
690
static int
691
1.52M
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
692
1.52M
    xmlChar *mem;
693
1.52M
    unsigned cap;
694
695
1.52M
    if (len >= UINT_MAX / 2 - buf->size) {
696
0
        if (buf->code == XML_ERR_OK)
697
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
698
0
        return(-1);
699
0
    }
700
701
1.52M
    cap = (buf->size + len) * 2;
702
1.52M
    if (cap < 240)
703
1.18M
        cap = 240;
704
705
1.52M
    mem = xmlRealloc(buf->mem, cap);
706
1.52M
    if (mem == NULL) {
707
1.25k
        buf->code = XML_ERR_NO_MEMORY;
708
1.25k
        return(-1);
709
1.25k
    }
710
711
1.51M
    buf->mem = mem;
712
1.51M
    buf->cap = cap;
713
714
1.51M
    return(0);
715
1.52M
}
716
717
static void
718
821M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
719
821M
    if (buf->max - buf->size < len) {
720
5.27M
        if (buf->code == XML_ERR_OK)
721
966
            buf->code = XML_ERR_RESOURCE_LIMIT;
722
5.27M
        return;
723
5.27M
    }
724
725
816M
    if (buf->cap - buf->size <= len) {
726
1.47M
        if (xmlSBufGrow(buf, len) < 0)
727
1.15k
            return;
728
1.47M
    }
729
730
816M
    if (len > 0)
731
816M
        memcpy(buf->mem + buf->size, str, len);
732
816M
    buf->size += len;
733
816M
}
734
735
static void
736
803M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
737
803M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
738
803M
}
739
740
static void
741
2.24M
xmlSBufAddChar(xmlSBuf *buf, int c) {
742
2.24M
    xmlChar *end;
743
744
2.24M
    if (buf->max - buf->size < 4) {
745
11.0k
        if (buf->code == XML_ERR_OK)
746
21
            buf->code = XML_ERR_RESOURCE_LIMIT;
747
11.0k
        return;
748
11.0k
    }
749
750
2.23M
    if (buf->cap - buf->size <= 4) {
751
43.7k
        if (xmlSBufGrow(buf, 4) < 0)
752
99
            return;
753
43.7k
    }
754
755
2.23M
    end = buf->mem + buf->size;
756
757
2.23M
    if (c < 0x80) {
758
2.10M
        *end = (xmlChar) c;
759
2.10M
        buf->size += 1;
760
2.10M
    } else {
761
136k
        buf->size += xmlCopyCharMultiByte(end, c);
762
136k
    }
763
2.23M
}
764
765
static void
766
675M
xmlSBufAddReplChar(xmlSBuf *buf) {
767
675M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
768
675M
}
769
770
static void
771
2.35k
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
772
2.35k
    if (buf->code == XML_ERR_NO_MEMORY)
773
1.36k
        xmlCtxtErrMemory(ctxt);
774
987
    else
775
987
        xmlFatalErr(ctxt, buf->code, errMsg);
776
2.35k
}
777
778
static xmlChar *
779
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
780
1.25M
              const char *errMsg) {
781
1.25M
    if (buf->mem == NULL) {
782
117k
        buf->mem = xmlMalloc(1);
783
117k
        if (buf->mem == NULL) {
784
108
            buf->code = XML_ERR_NO_MEMORY;
785
117k
        } else {
786
117k
            buf->mem[0] = 0;
787
117k
        }
788
1.13M
    } else {
789
1.13M
        buf->mem[buf->size] = 0;
790
1.13M
    }
791
792
1.25M
    if (buf->code == XML_ERR_OK) {
793
1.25M
        if (sizeOut != NULL)
794
350k
            *sizeOut = buf->size;
795
1.25M
        return(buf->mem);
796
1.25M
    }
797
798
1.34k
    xmlSBufReportError(buf, ctxt, errMsg);
799
800
1.34k
    xmlFree(buf->mem);
801
802
1.34k
    if (sizeOut != NULL)
803
285
        *sizeOut = 0;
804
1.34k
    return(NULL);
805
1.25M
}
806
807
static void
808
3.01M
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
809
3.01M
    if (buf->code != XML_ERR_OK)
810
1.01k
        xmlSBufReportError(buf, ctxt, errMsg);
811
812
3.01M
    xmlFree(buf->mem);
813
3.01M
}
814
815
static int
816
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
817
3.43G
                    const char *errMsg) {
818
3.43G
    int c = str[0];
819
3.43G
    int c1 = str[1];
820
821
3.43G
    if ((c1 & 0xC0) != 0x80)
822
334M
        goto encoding_error;
823
824
3.10G
    if (c < 0xE0) {
825
        /* 2-byte sequence */
826
181M
        if (c < 0xC2)
827
121M
            goto encoding_error;
828
829
59.7M
        return(2);
830
2.92G
    } else {
831
2.92G
        int c2 = str[2];
832
833
2.92G
        if ((c2 & 0xC0) != 0x80)
834
77.6k
            goto encoding_error;
835
836
2.92G
        if (c < 0xF0) {
837
            /* 3-byte sequence */
838
2.92G
            if (c == 0xE0) {
839
                /* overlong */
840
8.75M
                if (c1 < 0xA0)
841
6.57k
                    goto encoding_error;
842
2.91G
            } else if (c == 0xED) {
843
                /* surrogate */
844
354k
                if (c1 >= 0xA0)
845
11.6k
                    goto encoding_error;
846
2.91G
            } else if (c == 0xEF) {
847
                /* U+FFFE and U+FFFF are invalid Chars */
848
1.92G
                if ((c1 == 0xBF) && (c2 >= 0xBE))
849
3.19k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
850
1.92G
            }
851
852
2.92G
            return(3);
853
2.92G
        } else {
854
            /* 4-byte sequence */
855
909k
            if ((str[3] & 0xC0) != 0x80)
856
30.2k
                goto encoding_error;
857
878k
            if (c == 0xF0) {
858
                /* overlong */
859
72.9k
                if (c1 < 0x90)
860
2.61k
                    goto encoding_error;
861
805k
            } else if (c >= 0xF4) {
862
                /* greater than 0x10FFFF */
863
80.5k
                if ((c > 0xF4) || (c1 >= 0x90))
864
78.7k
                    goto encoding_error;
865
80.5k
            }
866
867
797k
            return(4);
868
878k
        }
869
2.92G
    }
870
871
456M
encoding_error:
872
    /* Only report the first error */
873
456M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
874
99.4k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
875
99.4k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
876
99.4k
    }
877
878
456M
    return(0);
879
3.10G
}
880
881
/************************************************************************
882
 *                  *
883
 *    SAX2 defaulted attributes handling      *
884
 *                  *
885
 ************************************************************************/
886
887
/**
888
 * Final initialization of the parser context before starting to parse.
889
 *
890
 * This accounts for users modifying struct members of parser context
891
 * directly.
892
 *
893
 * @param ctxt  an XML parser context
894
 */
895
static void
896
461k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
897
461k
    xmlSAXHandlerPtr sax;
898
899
    /* Avoid unused variable warning if features are disabled. */
900
461k
    (void) sax;
901
902
    /*
903
     * Changing the SAX struct directly is still widespread practice
904
     * in internal and external code.
905
     */
906
461k
    if (ctxt == NULL) return;
907
461k
    sax = ctxt->sax;
908
461k
#ifdef LIBXML_SAX1_ENABLED
909
    /*
910
     * Only enable SAX2 if there SAX2 element handlers, except when there
911
     * are no element handlers at all.
912
     */
913
461k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
914
366k
        (sax) &&
915
366k
        (sax->initialized == XML_SAX2_MAGIC) &&
916
366k
        ((sax->startElementNs != NULL) ||
917
0
         (sax->endElementNs != NULL) ||
918
0
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
919
366k
        ctxt->sax2 = 1;
920
#else
921
    ctxt->sax2 = 1;
922
#endif /* LIBXML_SAX1_ENABLED */
923
924
    /*
925
     * Some users replace the dictionary directly in the context struct.
926
     * We really need an API function to do that cleanly.
927
     */
928
461k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
929
461k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
930
461k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
931
461k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
932
460k
    (ctxt->str_xml_ns == NULL)) {
933
1.38k
        xmlErrMemory(ctxt);
934
1.38k
    }
935
936
461k
    xmlDictSetLimit(ctxt->dict,
937
461k
                    (ctxt->options & XML_PARSE_HUGE) ?
938
132k
                        0 :
939
461k
                        XML_MAX_DICTIONARY_LIMIT);
940
941
461k
#ifdef LIBXML_VALID_ENABLED
942
461k
    if (ctxt->validate)
943
124k
        ctxt->vctxt.flags |= XML_VCTXT_VALIDATE;
944
337k
    else
945
337k
        ctxt->vctxt.flags &= ~XML_VCTXT_VALIDATE;
946
461k
#endif /* LIBXML_VALID_ENABLED */
947
461k
}
948
949
typedef struct {
950
    xmlHashedString prefix;
951
    xmlHashedString name;
952
    xmlHashedString value;
953
    const xmlChar *valueEnd;
954
    int external;
955
    int expandedSize;
956
} xmlDefAttr;
957
958
typedef struct _xmlDefAttrs xmlDefAttrs;
959
typedef xmlDefAttrs *xmlDefAttrsPtr;
960
struct _xmlDefAttrs {
961
    int nbAttrs;  /* number of defaulted attributes on that element */
962
    int maxAttrs;       /* the size of the array */
963
#if __STDC_VERSION__ >= 199901L
964
    /* Using a C99 flexible array member avoids UBSan errors. */
965
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
966
#else
967
    xmlDefAttr attrs[1];
968
#endif
969
};
970
971
/**
972
 * Normalize the space in non CDATA attribute values:
973
 * If the attribute type is not CDATA, then the XML processor MUST further
974
 * process the normalized attribute value by discarding any leading and
975
 * trailing space (\#x20) characters, and by replacing sequences of space
976
 * (\#x20) characters by a single space (\#x20) character.
977
 * Note that the size of dst need to be at least src, and if one doesn't need
978
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
979
 * passing src as dst is just fine.
980
 *
981
 * @param src  the source string
982
 * @param dst  the target string
983
 * @returns a pointer to the normalized value (dst) or NULL if no conversion
984
 *         is needed.
985
 */
986
static xmlChar *
987
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
988
215k
{
989
215k
    if ((src == NULL) || (dst == NULL))
990
0
        return(NULL);
991
992
231k
    while (*src == 0x20) src++;
993
161M
    while (*src != 0) {
994
161M
  if (*src == 0x20) {
995
12.8M
      while (*src == 0x20) src++;
996
84.5k
      if (*src != 0)
997
79.2k
    *dst++ = 0x20;
998
161M
  } else {
999
161M
      *dst++ = *src++;
1000
161M
  }
1001
161M
    }
1002
215k
    *dst = 0;
1003
215k
    if (dst == src)
1004
202k
       return(NULL);
1005
12.8k
    return(dst);
1006
215k
}
1007
1008
/**
1009
 * Add a defaulted attribute for an element
1010
 *
1011
 * @param ctxt  an XML parser context
1012
 * @param fullname  the element fullname
1013
 * @param fullattr  the attribute fullname
1014
 * @param value  the attribute value
1015
 */
1016
static void
1017
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1018
               const xmlChar *fullname,
1019
               const xmlChar *fullattr,
1020
206k
               const xmlChar *value) {
1021
206k
    xmlDefAttrsPtr defaults;
1022
206k
    xmlDefAttr *attr;
1023
206k
    int len, expandedSize;
1024
206k
    xmlHashedString name;
1025
206k
    xmlHashedString prefix;
1026
206k
    xmlHashedString hvalue;
1027
206k
    const xmlChar *localname;
1028
1029
    /*
1030
     * Allows to detect attribute redefinitions
1031
     */
1032
206k
    if (ctxt->attsSpecial != NULL) {
1033
177k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1034
109k
      return;
1035
177k
    }
1036
1037
97.1k
    if (ctxt->attsDefault == NULL) {
1038
29.3k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1039
29.3k
  if (ctxt->attsDefault == NULL)
1040
63
      goto mem_error;
1041
29.3k
    }
1042
1043
    /*
1044
     * split the element name into prefix:localname , the string found
1045
     * are within the DTD and then not associated to namespace names.
1046
     */
1047
97.0k
    localname = xmlSplitQName3(fullname, &len);
1048
97.0k
    if (localname == NULL) {
1049
89.5k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1050
89.5k
  prefix.name = NULL;
1051
89.5k
    } else {
1052
7.46k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1053
7.46k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1054
7.46k
        if (prefix.name == NULL)
1055
10
            goto mem_error;
1056
7.46k
    }
1057
97.0k
    if (name.name == NULL)
1058
14
        goto mem_error;
1059
1060
    /*
1061
     * make sure there is some storage
1062
     */
1063
97.0k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1064
97.0k
    if ((defaults == NULL) ||
1065
63.8k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1066
38.4k
        xmlDefAttrsPtr temp;
1067
38.4k
        int newSize;
1068
1069
38.4k
        if (defaults == NULL) {
1070
33.2k
            newSize = 4;
1071
33.2k
        } else {
1072
5.20k
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1073
5.20k
                ((size_t) defaults->maxAttrs >
1074
5.20k
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1075
0
                goto mem_error;
1076
1077
5.20k
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1078
0
                newSize = XML_MAX_ATTRS;
1079
5.20k
            else
1080
5.20k
                newSize = defaults->maxAttrs * 2;
1081
5.20k
        }
1082
38.4k
        temp = xmlRealloc(defaults,
1083
38.4k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1084
38.4k
  if (temp == NULL)
1085
44
      goto mem_error;
1086
38.3k
        if (defaults == NULL)
1087
33.1k
            temp->nbAttrs = 0;
1088
38.3k
  temp->maxAttrs = newSize;
1089
38.3k
        defaults = temp;
1090
38.3k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1091
38.3k
                          defaults, NULL) < 0) {
1092
2
      xmlFree(defaults);
1093
2
      goto mem_error;
1094
2
  }
1095
38.3k
    }
1096
1097
    /*
1098
     * Split the attribute name into prefix:localname , the string found
1099
     * are within the DTD and hen not associated to namespace names.
1100
     */
1101
96.9k
    localname = xmlSplitQName3(fullattr, &len);
1102
96.9k
    if (localname == NULL) {
1103
67.7k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1104
67.7k
  prefix.name = NULL;
1105
67.7k
    } else {
1106
29.2k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1107
29.2k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1108
29.2k
        if (prefix.name == NULL)
1109
13
            goto mem_error;
1110
29.2k
    }
1111
96.9k
    if (name.name == NULL)
1112
13
        goto mem_error;
1113
1114
    /* intern the string and precompute the end */
1115
96.9k
    len = strlen((const char *) value);
1116
96.9k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1117
96.9k
    if (hvalue.name == NULL)
1118
22
        goto mem_error;
1119
1120
96.9k
    expandedSize = strlen((const char *) name.name);
1121
96.9k
    if (prefix.name != NULL)
1122
29.1k
        expandedSize += strlen((const char *) prefix.name);
1123
96.9k
    expandedSize += len;
1124
1125
96.9k
    attr = &defaults->attrs[defaults->nbAttrs++];
1126
96.9k
    attr->name = name;
1127
96.9k
    attr->prefix = prefix;
1128
96.9k
    attr->value = hvalue;
1129
96.9k
    attr->valueEnd = hvalue.name + len;
1130
96.9k
    attr->external = PARSER_EXTERNAL(ctxt);
1131
96.9k
    attr->expandedSize = expandedSize;
1132
1133
96.9k
    return;
1134
1135
181
mem_error:
1136
181
    xmlErrMemory(ctxt);
1137
181
}
1138
1139
/**
1140
 * Register this attribute type
1141
 *
1142
 * @param ctxt  an XML parser context
1143
 * @param fullname  the element fullname
1144
 * @param fullattr  the attribute fullname
1145
 * @param type  the attribute type
1146
 */
1147
static void
1148
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1149
      const xmlChar *fullname,
1150
      const xmlChar *fullattr,
1151
      int type)
1152
314k
{
1153
314k
    if (ctxt->attsSpecial == NULL) {
1154
37.6k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1155
37.6k
  if (ctxt->attsSpecial == NULL)
1156
69
      goto mem_error;
1157
37.6k
    }
1158
1159
314k
    if (PARSER_EXTERNAL(ctxt))
1160
164k
        type |= XML_SPECIAL_EXTERNAL;
1161
1162
314k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1163
314k
                    XML_INT_TO_PTR(type)) < 0)
1164
13
        goto mem_error;
1165
314k
    return;
1166
1167
314k
mem_error:
1168
82
    xmlErrMemory(ctxt);
1169
82
}
1170
1171
/**
1172
 * Removes CDATA attributes from the special attribute table
1173
 */
1174
static void
1175
xmlCleanSpecialAttrCallback(void *payload, void *data,
1176
                            const xmlChar *fullname, const xmlChar *fullattr,
1177
171k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1178
171k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1179
1180
171k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1181
21.7k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1182
21.7k
    }
1183
171k
}
1184
1185
/**
1186
 * Trim the list of attributes defined to remove all those of type
1187
 * CDATA as they are not special. This call should be done when finishing
1188
 * to parse the DTD and before starting to parse the document root.
1189
 *
1190
 * @param ctxt  an XML parser context
1191
 */
1192
static void
1193
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1194
226k
{
1195
226k
    if (ctxt->attsSpecial == NULL)
1196
188k
        return;
1197
1198
37.5k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1199
1200
37.5k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1201
6.98k
        xmlHashFree(ctxt->attsSpecial, NULL);
1202
6.98k
        ctxt->attsSpecial = NULL;
1203
6.98k
    }
1204
37.5k
}
1205
1206
/**
1207
 * Checks that the value conforms to the LanguageID production:
1208
 *
1209
 * @deprecated Internal function, do not use.
1210
 *
1211
 * NOTE: this is somewhat deprecated, those productions were removed from
1212
 * the XML Second edition.
1213
 *
1214
 *     [33] LanguageID ::= Langcode ('-' Subcode)*
1215
 *     [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1216
 *     [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1217
 *     [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1218
 *     [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1219
 *     [38] Subcode ::= ([a-z] | [A-Z])+
1220
 *
1221
 * The current REC reference the successors of RFC 1766, currently 5646
1222
 *
1223
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1224
 *
1225
 *     langtag       = language
1226
 *                     ["-" script]
1227
 *                     ["-" region]
1228
 *                     *("-" variant)
1229
 *                     *("-" extension)
1230
 *                     ["-" privateuse]
1231
 *     language      = 2*3ALPHA            ; shortest ISO 639 code
1232
 *                     ["-" extlang]       ; sometimes followed by
1233
 *                                         ; extended language subtags
1234
 *                   / 4ALPHA              ; or reserved for future use
1235
 *                   / 5*8ALPHA            ; or registered language subtag
1236
 *
1237
 *     extlang       = 3ALPHA              ; selected ISO 639 codes
1238
 *                     *2("-" 3ALPHA)      ; permanently reserved
1239
 *
1240
 *     script        = 4ALPHA              ; ISO 15924 code
1241
 *
1242
 *     region        = 2ALPHA              ; ISO 3166-1 code
1243
 *                   / 3DIGIT              ; UN M.49 code
1244
 *
1245
 *     variant       = 5*8alphanum         ; registered variants
1246
 *                   / (DIGIT 3alphanum)
1247
 *
1248
 *     extension     = singleton 1*("-" (2*8alphanum))
1249
 *
1250
 *                                         ; Single alphanumerics
1251
 *                                         ; "x" reserved for private use
1252
 *     singleton     = DIGIT               ; 0 - 9
1253
 *                   / %x41-57             ; A - W
1254
 *                   / %x59-5A             ; Y - Z
1255
 *                   / %x61-77             ; a - w
1256
 *                   / %x79-7A             ; y - z
1257
 *
1258
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1259
 * The parser below doesn't try to cope with extension or privateuse
1260
 * that could be added but that's not interoperable anyway
1261
 *
1262
 * @param lang  pointer to the string value
1263
 * @returns 1 if correct 0 otherwise
1264
 **/
1265
int
1266
xmlCheckLanguageID(const xmlChar * lang)
1267
111k
{
1268
111k
    const xmlChar *cur = lang, *nxt;
1269
1270
111k
    if (cur == NULL)
1271
1.24k
        return (0);
1272
110k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1273
108k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1274
106k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1275
105k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1276
        /*
1277
         * Still allow IANA code and user code which were coming
1278
         * from the previous version of the XML-1.0 specification
1279
         * it's deprecated but we should not fail
1280
         */
1281
5.44k
        cur += 2;
1282
13.1k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1283
8.41k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1284
7.66k
            cur++;
1285
5.44k
        return(cur[0] == 0);
1286
5.44k
    }
1287
104k
    nxt = cur;
1288
444k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1289
169k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1290
340k
           nxt++;
1291
104k
    if (nxt - cur >= 4) {
1292
        /*
1293
         * Reserved
1294
         */
1295
7.31k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1296
4.62k
            return(0);
1297
2.69k
        return(1);
1298
7.31k
    }
1299
97.3k
    if (nxt - cur < 2)
1300
8.13k
        return(0);
1301
    /* we got an ISO 639 code */
1302
89.1k
    if (nxt[0] == 0)
1303
3.57k
        return(1);
1304
85.6k
    if (nxt[0] != '-')
1305
5.07k
        return(0);
1306
1307
80.5k
    nxt++;
1308
80.5k
    cur = nxt;
1309
    /* now we can have extlang or script or region or variant */
1310
80.5k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1311
16.5k
        goto region_m49;
1312
1313
267k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1314
122k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1315
203k
           nxt++;
1316
64.0k
    if (nxt - cur == 4)
1317
10.6k
        goto script;
1318
53.3k
    if (nxt - cur == 2)
1319
5.07k
        goto region;
1320
48.2k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1321
2.64k
        goto variant;
1322
45.6k
    if (nxt - cur != 3)
1323
6.97k
        return(0);
1324
    /* we parsed an extlang */
1325
38.6k
    if (nxt[0] == 0)
1326
1.03k
        return(1);
1327
37.6k
    if (nxt[0] != '-')
1328
10.6k
        return(0);
1329
1330
26.9k
    nxt++;
1331
26.9k
    cur = nxt;
1332
    /* now we can have script or region or variant */
1333
26.9k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1334
1.99k
        goto region_m49;
1335
1336
102k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1337
47.7k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1338
77.0k
           nxt++;
1339
24.9k
    if (nxt - cur == 2)
1340
13.8k
        goto region;
1341
11.1k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1342
2.57k
        goto variant;
1343
8.58k
    if (nxt - cur != 4)
1344
3.98k
        return(0);
1345
    /* we parsed a script */
1346
15.2k
script:
1347
15.2k
    if (nxt[0] == 0)
1348
3.39k
        return(1);
1349
11.8k
    if (nxt[0] != '-')
1350
1.46k
        return(0);
1351
1352
10.4k
    nxt++;
1353
10.4k
    cur = nxt;
1354
    /* now we can have region or variant */
1355
10.4k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1356
1.35k
        goto region_m49;
1357
1358
54.1k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1359
21.3k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1360
45.0k
           nxt++;
1361
1362
9.08k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1363
1.17k
        goto variant;
1364
7.90k
    if (nxt - cur != 2)
1365
3.27k
        return(0);
1366
    /* we parsed a region */
1367
24.5k
region:
1368
24.5k
    if (nxt[0] == 0)
1369
5.83k
        return(1);
1370
18.7k
    if (nxt[0] != '-')
1371
14.1k
        return(0);
1372
1373
4.59k
    nxt++;
1374
4.59k
    cur = nxt;
1375
    /* now we can just have a variant */
1376
26.0k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1377
17.7k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1378
21.4k
           nxt++;
1379
1380
4.59k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1381
3.41k
        return(0);
1382
1383
    /* we parsed a variant */
1384
7.57k
variant:
1385
7.57k
    if (nxt[0] == 0)
1386
2.25k
        return(1);
1387
5.32k
    if (nxt[0] != '-')
1388
3.53k
        return(0);
1389
    /* extensions and private use subtags not checked */
1390
1.79k
    return (1);
1391
1392
19.8k
region_m49:
1393
19.8k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1394
12.6k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1395
1.05k
        nxt += 3;
1396
1.05k
        goto region;
1397
1.05k
    }
1398
18.7k
    return(0);
1399
19.8k
}
1400
1401
/************************************************************************
1402
 *                  *
1403
 *    Parser stacks related functions and macros    *
1404
 *                  *
1405
 ************************************************************************/
1406
1407
static xmlChar *
1408
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1409
1410
/**
1411
 * Create a new namespace database.
1412
 *
1413
 * @returns the new obejct.
1414
 */
1415
xmlParserNsData *
1416
470k
xmlParserNsCreate(void) {
1417
470k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1418
1419
470k
    if (nsdb == NULL)
1420
55
        return(NULL);
1421
469k
    memset(nsdb, 0, sizeof(*nsdb));
1422
469k
    nsdb->defaultNsIndex = INT_MAX;
1423
1424
469k
    return(nsdb);
1425
470k
}
1426
1427
/**
1428
 * Free a namespace database.
1429
 *
1430
 * @param nsdb  namespace database
1431
 */
1432
void
1433
469k
xmlParserNsFree(xmlParserNsData *nsdb) {
1434
469k
    if (nsdb == NULL)
1435
0
        return;
1436
1437
469k
    xmlFree(nsdb->extra);
1438
469k
    xmlFree(nsdb->hash);
1439
469k
    xmlFree(nsdb);
1440
469k
}
1441
1442
/**
1443
 * Reset a namespace database.
1444
 *
1445
 * @param nsdb  namespace database
1446
 */
1447
static void
1448
189k
xmlParserNsReset(xmlParserNsData *nsdb) {
1449
189k
    if (nsdb == NULL)
1450
12.2k
        return;
1451
1452
177k
    nsdb->hashElems = 0;
1453
177k
    nsdb->elementId = 0;
1454
177k
    nsdb->defaultNsIndex = INT_MAX;
1455
1456
177k
    if (nsdb->hash)
1457
863
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1458
177k
}
1459
1460
/**
1461
 * Signal that a new element has started.
1462
 *
1463
 * @param nsdb  namespace database
1464
 * @returns 0 on success, -1 if the element counter overflowed.
1465
 */
1466
static int
1467
4.96M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1468
4.96M
    if (nsdb->elementId == UINT_MAX)
1469
0
        return(-1);
1470
4.96M
    nsdb->elementId++;
1471
1472
4.96M
    return(0);
1473
4.96M
}
1474
1475
/**
1476
 * Lookup namespace with given prefix. If `bucketPtr` is non-NULL, it will
1477
 * be set to the matching bucket, or the first empty bucket if no match
1478
 * was found.
1479
 *
1480
 * @param ctxt  parser context
1481
 * @param prefix  namespace prefix
1482
 * @param bucketPtr  optional bucket (return value)
1483
 * @returns the namespace index on success, INT_MAX if no namespace was
1484
 * found.
1485
 */
1486
static int
1487
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1488
11.3M
                  xmlParserNsBucket **bucketPtr) {
1489
11.3M
    xmlParserNsBucket *bucket, *tombstone;
1490
11.3M
    unsigned index, hashValue;
1491
1492
11.3M
    if (prefix->name == NULL)
1493
4.45M
        return(ctxt->nsdb->defaultNsIndex);
1494
1495
6.90M
    if (ctxt->nsdb->hashSize == 0)
1496
1.17M
        return(INT_MAX);
1497
1498
5.73M
    hashValue = prefix->hashValue;
1499
5.73M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1500
5.73M
    bucket = &ctxt->nsdb->hash[index];
1501
5.73M
    tombstone = NULL;
1502
1503
7.14M
    while (bucket->hashValue) {
1504
6.50M
        if (bucket->index == INT_MAX) {
1505
429k
            if (tombstone == NULL)
1506
360k
                tombstone = bucket;
1507
6.07M
        } else if (bucket->hashValue == hashValue) {
1508
5.08M
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1509
5.08M
                if (bucketPtr != NULL)
1510
2.57M
                    *bucketPtr = bucket;
1511
5.08M
                return(bucket->index);
1512
5.08M
            }
1513
5.08M
        }
1514
1515
1.41M
        index++;
1516
1.41M
        bucket++;
1517
1.41M
        if (index == ctxt->nsdb->hashSize) {
1518
272k
            index = 0;
1519
272k
            bucket = ctxt->nsdb->hash;
1520
272k
        }
1521
1.41M
    }
1522
1523
643k
    if (bucketPtr != NULL)
1524
339k
        *bucketPtr = tombstone ? tombstone : bucket;
1525
643k
    return(INT_MAX);
1526
5.73M
}
1527
1528
/**
1529
 * Lookup namespace URI with given prefix.
1530
 *
1531
 * @param ctxt  parser context
1532
 * @param prefix  namespace prefix
1533
 * @returns the namespace URI on success, NULL if no namespace was found.
1534
 */
1535
static const xmlChar *
1536
4.52M
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1537
4.52M
    const xmlChar *ret;
1538
4.52M
    int nsIndex;
1539
1540
4.52M
    if (prefix->name == ctxt->str_xml)
1541
6.36k
        return(ctxt->str_xml_ns);
1542
1543
    /*
1544
     * minNsIndex is used when building an entity tree. We must
1545
     * ignore namespaces declared outside the entity.
1546
     */
1547
4.52M
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1548
4.52M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1549
2.77M
        return(NULL);
1550
1551
1.74M
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1552
1.74M
    if (ret[0] == 0)
1553
63.1k
        ret = NULL;
1554
1.74M
    return(ret);
1555
4.52M
}
1556
1557
/**
1558
 * Lookup extra data for the given prefix. This returns data stored
1559
 * with xmlParserNsUdpateSax.
1560
 *
1561
 * @param ctxt  parser context
1562
 * @param prefix  namespace prefix
1563
 * @returns the data on success, NULL if no namespace was found.
1564
 */
1565
void *
1566
1.51M
xmlParserNsLookupSax(xmlParserCtxt *ctxt, const xmlChar *prefix) {
1567
1.51M
    xmlHashedString hprefix;
1568
1.51M
    int nsIndex;
1569
1570
1.51M
    if (prefix == ctxt->str_xml)
1571
228k
        return(NULL);
1572
1573
1.28M
    hprefix.name = prefix;
1574
1.28M
    if (prefix != NULL)
1575
644k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1576
637k
    else
1577
637k
        hprefix.hashValue = 0;
1578
1.28M
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1579
1.28M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1580
0
        return(NULL);
1581
1582
1.28M
    return(ctxt->nsdb->extra[nsIndex].saxData);
1583
1.28M
}
1584
1585
/**
1586
 * Sets or updates extra data for the given prefix. This value will be
1587
 * returned by xmlParserNsLookupSax as long as the namespace with the
1588
 * given prefix is in scope.
1589
 *
1590
 * @param ctxt  parser context
1591
 * @param prefix  namespace prefix
1592
 * @param saxData  extra data for SAX handler
1593
 * @returns the data on success, NULL if no namespace was found.
1594
 */
1595
int
1596
xmlParserNsUpdateSax(xmlParserCtxt *ctxt, const xmlChar *prefix,
1597
1.17M
                     void *saxData) {
1598
1.17M
    xmlHashedString hprefix;
1599
1.17M
    int nsIndex;
1600
1601
1.17M
    if (prefix == ctxt->str_xml)
1602
0
        return(-1);
1603
1604
1.17M
    hprefix.name = prefix;
1605
1.17M
    if (prefix != NULL)
1606
1.01M
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1607
158k
    else
1608
158k
        hprefix.hashValue = 0;
1609
1.17M
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1610
1.17M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1611
0
        return(-1);
1612
1613
1.17M
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1614
1.17M
    return(0);
1615
1.17M
}
1616
1617
/**
1618
 * Grows the namespace tables.
1619
 *
1620
 * @param ctxt  parser context
1621
 * @returns 0 on success, -1 if a memory allocation failed.
1622
 */
1623
static int
1624
168k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1625
168k
    const xmlChar **table;
1626
168k
    xmlParserNsExtra *extra;
1627
168k
    int newSize;
1628
1629
168k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1630
168k
                              sizeof(table[0]) + sizeof(extra[0]),
1631
168k
                              16, XML_MAX_ITEMS);
1632
168k
    if (newSize < 0)
1633
0
        goto error;
1634
1635
168k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1636
168k
    if (table == NULL)
1637
95
        goto error;
1638
167k
    ctxt->nsTab = table;
1639
1640
167k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1641
167k
    if (extra == NULL)
1642
121
        goto error;
1643
167k
    ctxt->nsdb->extra = extra;
1644
1645
167k
    ctxt->nsMax = newSize;
1646
167k
    return(0);
1647
1648
216
error:
1649
216
    xmlErrMemory(ctxt);
1650
216
    return(-1);
1651
167k
}
1652
1653
/**
1654
 * Push a new namespace on the table.
1655
 *
1656
 * @param ctxt  parser context
1657
 * @param prefix  prefix with hash value
1658
 * @param uri  uri with hash value
1659
 * @param saxData  extra data for SAX handler
1660
 * @param defAttr  whether the namespace comes from a default attribute
1661
 * @returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1662
 * -1 if a memory allocation failed.
1663
 */
1664
static int
1665
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1666
1.88M
                const xmlHashedString *uri, void *saxData, int defAttr) {
1667
1.88M
    xmlParserNsBucket *bucket = NULL;
1668
1.88M
    xmlParserNsExtra *extra;
1669
1.88M
    const xmlChar **ns;
1670
1.88M
    unsigned hashValue, nsIndex, oldIndex;
1671
1672
1.88M
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1673
2.65k
        return(0);
1674
1675
1.87M
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1676
216
        xmlErrMemory(ctxt);
1677
216
        return(-1);
1678
216
    }
1679
1680
    /*
1681
     * Default namespace and 'xml' namespace
1682
     */
1683
1.87M
    if ((prefix == NULL) || (prefix->name == NULL)) {
1684
319k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1685
1686
319k
        if (oldIndex != INT_MAX) {
1687
273k
            extra = &ctxt->nsdb->extra[oldIndex];
1688
1689
273k
            if (extra->elementId == ctxt->nsdb->elementId) {
1690
23.2k
                if (defAttr == 0)
1691
21.4k
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1692
23.2k
                return(0);
1693
23.2k
            }
1694
1695
250k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1696
69.5k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1697
36.7k
                return(0);
1698
250k
        }
1699
1700
259k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1701
259k
        goto populate_entry;
1702
319k
    }
1703
1704
    /*
1705
     * Hash table lookup
1706
     */
1707
1.55M
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1708
1.55M
    if (oldIndex != INT_MAX) {
1709
1.15M
        extra = &ctxt->nsdb->extra[oldIndex];
1710
1711
        /*
1712
         * Check for duplicate definitions on the same element.
1713
         */
1714
1.15M
        if (extra->elementId == ctxt->nsdb->elementId) {
1715
7.06k
            if (defAttr == 0)
1716
5.24k
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1717
7.06k
            return(0);
1718
7.06k
        }
1719
1720
1.15M
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1721
32.7k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1722
21.4k
            return(0);
1723
1724
1.12M
        bucket->index = ctxt->nsNr;
1725
1.12M
        goto populate_entry;
1726
1.15M
    }
1727
1728
    /*
1729
     * Insert new bucket
1730
     */
1731
1732
401k
    hashValue = prefix->hashValue;
1733
1734
    /*
1735
     * Grow hash table, 50% fill factor
1736
     */
1737
401k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1738
71.1k
        xmlParserNsBucket *newHash;
1739
71.1k
        unsigned newSize, i, index;
1740
1741
71.1k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1742
0
            xmlErrMemory(ctxt);
1743
0
            return(-1);
1744
0
        }
1745
71.1k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1746
71.1k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1747
71.1k
        if (newHash == NULL) {
1748
40
            xmlErrMemory(ctxt);
1749
40
            return(-1);
1750
40
        }
1751
71.0k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1752
1753
1.08M
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1754
1.01M
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1755
1.01M
            unsigned newIndex;
1756
1757
1.01M
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1758
987k
                continue;
1759
24.2k
            newIndex = hv & (newSize - 1);
1760
1761
35.0k
            while (newHash[newIndex].hashValue != 0) {
1762
10.7k
                newIndex++;
1763
10.7k
                if (newIndex == newSize)
1764
3.36k
                    newIndex = 0;
1765
10.7k
            }
1766
1767
24.2k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1768
24.2k
        }
1769
1770
71.0k
        xmlFree(ctxt->nsdb->hash);
1771
71.0k
        ctxt->nsdb->hash = newHash;
1772
71.0k
        ctxt->nsdb->hashSize = newSize;
1773
1774
        /*
1775
         * Relookup
1776
         */
1777
71.0k
        index = hashValue & (newSize - 1);
1778
1779
77.1k
        while (newHash[index].hashValue != 0) {
1780
6.09k
            index++;
1781
6.09k
            if (index == newSize)
1782
1.34k
                index = 0;
1783
6.09k
        }
1784
1785
71.0k
        bucket = &newHash[index];
1786
71.0k
    }
1787
1788
401k
    bucket->hashValue = hashValue;
1789
401k
    bucket->index = ctxt->nsNr;
1790
401k
    ctxt->nsdb->hashElems++;
1791
401k
    oldIndex = INT_MAX;
1792
1793
1.79M
populate_entry:
1794
1.79M
    nsIndex = ctxt->nsNr;
1795
1796
1.79M
    ns = &ctxt->nsTab[nsIndex * 2];
1797
1.79M
    ns[0] = prefix ? prefix->name : NULL;
1798
1.79M
    ns[1] = uri->name;
1799
1800
1.79M
    extra = &ctxt->nsdb->extra[nsIndex];
1801
1.79M
    extra->saxData = saxData;
1802
1.79M
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1803
1.79M
    extra->uriHashValue = uri->hashValue;
1804
1.79M
    extra->elementId = ctxt->nsdb->elementId;
1805
1.79M
    extra->oldIndex = oldIndex;
1806
1807
1.79M
    ctxt->nsNr++;
1808
1809
1.79M
    return(1);
1810
401k
}
1811
1812
/**
1813
 * Pops the top `nr` namespaces and restores the hash table.
1814
 *
1815
 * @param ctxt  an XML parser context
1816
 * @param nr  the number to pop
1817
 * @returns the number of namespaces popped.
1818
 */
1819
static int
1820
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1821
716k
{
1822
716k
    int i;
1823
1824
    /* assert(nr <= ctxt->nsNr); */
1825
1826
2.36M
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1827
1.65M
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1828
1.65M
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1829
1830
1.65M
        if (prefix == NULL) {
1831
230k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1832
1.42M
        } else {
1833
1.42M
            xmlHashedString hprefix;
1834
1.42M
            xmlParserNsBucket *bucket = NULL;
1835
1836
1.42M
            hprefix.name = prefix;
1837
1.42M
            hprefix.hashValue = extra->prefixHashValue;
1838
1.42M
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1839
            /* assert(bucket && bucket->hashValue); */
1840
1.42M
            bucket->index = extra->oldIndex;
1841
1.42M
        }
1842
1.65M
    }
1843
1844
716k
    ctxt->nsNr -= nr;
1845
716k
    return(nr);
1846
716k
}
1847
1848
static int
1849
154k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1850
154k
    const xmlChar **atts;
1851
154k
    unsigned *attallocs;
1852
154k
    int newSize;
1853
1854
154k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1855
154k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1856
154k
                              10, XML_MAX_ATTRS);
1857
154k
    if (newSize < 0) {
1858
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1859
0
                    "Maximum number of attributes exceeded");
1860
0
        return(-1);
1861
0
    }
1862
1863
154k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1864
154k
    if (atts == NULL)
1865
97
        goto mem_error;
1866
154k
    ctxt->atts = atts;
1867
1868
154k
    attallocs = xmlRealloc(ctxt->attallocs,
1869
154k
                           newSize * sizeof(attallocs[0]));
1870
154k
    if (attallocs == NULL)
1871
91
        goto mem_error;
1872
153k
    ctxt->attallocs = attallocs;
1873
1874
153k
    ctxt->maxatts = newSize * 5;
1875
1876
153k
    return(0);
1877
1878
188
mem_error:
1879
188
    xmlErrMemory(ctxt);
1880
188
    return(-1);
1881
154k
}
1882
1883
/**
1884
 * Pushes a new parser input on top of the input stack
1885
 *
1886
 * @param ctxt  an XML parser context
1887
 * @param value  the parser input
1888
 * @returns -1 in case of error, the index in the stack otherwise
1889
 */
1890
int
1891
xmlCtxtPushInput(xmlParserCtxt *ctxt, xmlParserInput *value)
1892
1.28M
{
1893
1.28M
    char *directory = NULL;
1894
1.28M
    int maxDepth;
1895
1896
1.28M
    if ((ctxt == NULL) || (value == NULL))
1897
14.2k
        return(-1);
1898
1899
1.26M
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1900
1901
1.26M
    if (ctxt->inputNr >= ctxt->inputMax) {
1902
56.8k
        xmlParserInputPtr *tmp;
1903
56.8k
        int newSize;
1904
1905
56.8k
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1906
56.8k
                                  5, maxDepth);
1907
56.8k
        if (newSize < 0) {
1908
10
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1909
10
                           "Maximum entity nesting depth exceeded");
1910
10
            return(-1);
1911
10
        }
1912
56.7k
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1913
56.7k
        if (tmp == NULL) {
1914
130
            xmlErrMemory(ctxt);
1915
130
            return(-1);
1916
130
        }
1917
56.6k
        ctxt->inputTab = tmp;
1918
56.6k
        ctxt->inputMax = newSize;
1919
56.6k
    }
1920
1921
1.26M
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1922
583k
        directory = xmlParserGetDirectory(value->filename);
1923
583k
        if (directory == NULL) {
1924
190
            xmlErrMemory(ctxt);
1925
190
            return(-1);
1926
190
        }
1927
583k
    }
1928
1929
1.26M
    if (ctxt->input_id >= INT_MAX) {
1930
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1931
0
        return(-1);
1932
0
    }
1933
1934
1.26M
    ctxt->inputTab[ctxt->inputNr] = value;
1935
1.26M
    ctxt->input = value;
1936
1937
1.26M
    if (ctxt->inputNr == 0) {
1938
698k
        xmlFree(ctxt->directory);
1939
698k
        ctxt->directory = directory;
1940
698k
    }
1941
1942
    /*
1943
     * The input ID is unused internally, but there are entity
1944
     * loaders in downstream code that detect the main document
1945
     * by checking for "input_id == 1".
1946
     */
1947
1.26M
    value->id = ctxt->input_id++;
1948
1949
1.26M
    return(ctxt->inputNr++);
1950
1.26M
}
1951
1952
/**
1953
 * Pops the top parser input from the input stack
1954
 *
1955
 * @param ctxt  an XML parser context
1956
 * @returns the input just removed
1957
 */
1958
xmlParserInput *
1959
xmlCtxtPopInput(xmlParserCtxt *ctxt)
1960
2.41M
{
1961
2.41M
    xmlParserInputPtr ret;
1962
1963
2.41M
    if (ctxt == NULL)
1964
0
        return(NULL);
1965
2.41M
    if (ctxt->inputNr <= 0)
1966
1.15M
        return (NULL);
1967
1.25M
    ctxt->inputNr--;
1968
1.25M
    if (ctxt->inputNr > 0)
1969
570k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1970
688k
    else
1971
688k
        ctxt->input = NULL;
1972
1.25M
    ret = ctxt->inputTab[ctxt->inputNr];
1973
1.25M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1974
1.25M
    return (ret);
1975
2.41M
}
1976
1977
/**
1978
 * Pushes a new element node on top of the node stack
1979
 *
1980
 * @deprecated Internal function, do not use.
1981
 *
1982
 * @param ctxt  an XML parser context
1983
 * @param value  the element node
1984
 * @returns -1 in case of error, the index in the stack otherwise
1985
 */
1986
int
1987
nodePush(xmlParserCtxt *ctxt, xmlNode *value)
1988
4.77M
{
1989
4.77M
    if (ctxt == NULL)
1990
0
        return(0);
1991
1992
4.77M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1993
390k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
1994
390k
        xmlNodePtr *tmp;
1995
390k
        int newSize;
1996
1997
390k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
1998
390k
                                  10, maxDepth);
1999
390k
        if (newSize < 0) {
2000
758
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2001
758
                    "Excessive depth in document: %d,"
2002
758
                    " use XML_PARSE_HUGE option\n",
2003
758
                    ctxt->nodeNr);
2004
758
            return(-1);
2005
758
        }
2006
2007
389k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2008
389k
        if (tmp == NULL) {
2009
307
            xmlErrMemory(ctxt);
2010
307
            return (-1);
2011
307
        }
2012
389k
        ctxt->nodeTab = tmp;
2013
389k
  ctxt->nodeMax = newSize;
2014
389k
    }
2015
2016
4.76M
    ctxt->nodeTab[ctxt->nodeNr] = value;
2017
4.76M
    ctxt->node = value;
2018
4.76M
    return (ctxt->nodeNr++);
2019
4.77M
}
2020
2021
/**
2022
 * Pops the top element node from the node stack
2023
 *
2024
 * @deprecated Internal function, do not use.
2025
 *
2026
 * @param ctxt  an XML parser context
2027
 * @returns the node just removed
2028
 */
2029
xmlNode *
2030
nodePop(xmlParserCtxt *ctxt)
2031
4.80M
{
2032
4.80M
    xmlNodePtr ret;
2033
2034
4.80M
    if (ctxt == NULL) return(NULL);
2035
4.80M
    if (ctxt->nodeNr <= 0)
2036
420k
        return (NULL);
2037
4.37M
    ctxt->nodeNr--;
2038
4.37M
    if (ctxt->nodeNr > 0)
2039
4.24M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2040
131k
    else
2041
131k
        ctxt->node = NULL;
2042
4.37M
    ret = ctxt->nodeTab[ctxt->nodeNr];
2043
4.37M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2044
4.37M
    return (ret);
2045
4.80M
}
2046
2047
/**
2048
 * Pushes a new element name/prefix/URL on top of the name stack
2049
 *
2050
 * @param ctxt  an XML parser context
2051
 * @param value  the element name
2052
 * @param prefix  the element prefix
2053
 * @param URI  the element namespace name
2054
 * @param line  the current line number for error messages
2055
 * @param nsNr  the number of namespaces pushed on the namespace table
2056
 * @returns -1 in case of error, the index in the stack otherwise
2057
 */
2058
static int
2059
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2060
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2061
5.19M
{
2062
5.19M
    xmlStartTag *tag;
2063
2064
5.19M
    if (ctxt->nameNr >= ctxt->nameMax) {
2065
399k
        const xmlChar **tmp;
2066
399k
        xmlStartTag *tmp2;
2067
399k
        int newSize;
2068
2069
399k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2070
399k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2071
399k
                                  10, XML_MAX_ITEMS);
2072
399k
        if (newSize < 0)
2073
0
            goto mem_error;
2074
2075
399k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2076
399k
        if (tmp == NULL)
2077
146
      goto mem_error;
2078
399k
  ctxt->nameTab = tmp;
2079
2080
399k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2081
399k
        if (tmp2 == NULL)
2082
162
      goto mem_error;
2083
398k
  ctxt->pushTab = tmp2;
2084
2085
398k
        ctxt->nameMax = newSize;
2086
4.79M
    } else if (ctxt->pushTab == NULL) {
2087
228k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2088
228k
        if (ctxt->pushTab == NULL)
2089
467
            goto mem_error;
2090
228k
    }
2091
5.19M
    ctxt->nameTab[ctxt->nameNr] = value;
2092
5.19M
    ctxt->name = value;
2093
5.19M
    tag = &ctxt->pushTab[ctxt->nameNr];
2094
5.19M
    tag->prefix = prefix;
2095
5.19M
    tag->URI = URI;
2096
5.19M
    tag->line = line;
2097
5.19M
    tag->nsNr = nsNr;
2098
5.19M
    return (ctxt->nameNr++);
2099
775
mem_error:
2100
775
    xmlErrMemory(ctxt);
2101
775
    return (-1);
2102
5.19M
}
2103
#ifdef LIBXML_PUSH_ENABLED
2104
/**
2105
 * Pops the top element/prefix/URI name from the name stack
2106
 *
2107
 * @param ctxt  an XML parser context
2108
 * @returns the name just removed
2109
 */
2110
static const xmlChar *
2111
nameNsPop(xmlParserCtxtPtr ctxt)
2112
51.5k
{
2113
51.5k
    const xmlChar *ret;
2114
2115
51.5k
    if (ctxt->nameNr <= 0)
2116
0
        return (NULL);
2117
51.5k
    ctxt->nameNr--;
2118
51.5k
    if (ctxt->nameNr > 0)
2119
49.5k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2120
1.95k
    else
2121
1.95k
        ctxt->name = NULL;
2122
51.5k
    ret = ctxt->nameTab[ctxt->nameNr];
2123
51.5k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2124
51.5k
    return (ret);
2125
51.5k
}
2126
#endif /* LIBXML_PUSH_ENABLED */
2127
2128
/**
2129
 * Pops the top element name from the name stack
2130
 *
2131
 * @deprecated Internal function, do not use.
2132
 *
2133
 * @param ctxt  an XML parser context
2134
 * @returns the name just removed
2135
 */
2136
static const xmlChar *
2137
namePop(xmlParserCtxtPtr ctxt)
2138
4.80M
{
2139
4.80M
    const xmlChar *ret;
2140
2141
4.80M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2142
224
        return (NULL);
2143
4.80M
    ctxt->nameNr--;
2144
4.80M
    if (ctxt->nameNr > 0)
2145
4.69M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2146
118k
    else
2147
118k
        ctxt->name = NULL;
2148
4.80M
    ret = ctxt->nameTab[ctxt->nameNr];
2149
4.80M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2150
4.80M
    return (ret);
2151
4.80M
}
2152
2153
6.32M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2154
6.32M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2155
543k
        int *tmp;
2156
543k
        int newSize;
2157
2158
543k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2159
543k
                                  10, XML_MAX_ITEMS);
2160
543k
        if (newSize < 0) {
2161
0
      xmlErrMemory(ctxt);
2162
0
      return(-1);
2163
0
        }
2164
2165
543k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2166
543k
        if (tmp == NULL) {
2167
310
      xmlErrMemory(ctxt);
2168
310
      return(-1);
2169
310
  }
2170
543k
  ctxt->spaceTab = tmp;
2171
2172
543k
        ctxt->spaceMax = newSize;
2173
543k
    }
2174
6.32M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2175
6.32M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2176
6.32M
    return(ctxt->spaceNr++);
2177
6.32M
}
2178
2179
5.98M
static int spacePop(xmlParserCtxtPtr ctxt) {
2180
5.98M
    int ret;
2181
5.98M
    if (ctxt->spaceNr <= 0) return(0);
2182
5.98M
    ctxt->spaceNr--;
2183
5.98M
    if (ctxt->spaceNr > 0)
2184
5.92M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2185
52.7k
    else
2186
52.7k
        ctxt->space = &ctxt->spaceTab[0];
2187
5.98M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2188
5.98M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2189
5.98M
    return(ret);
2190
5.98M
}
2191
2192
/*
2193
 * Macros for accessing the content. Those should be used only by the parser,
2194
 * and not exported.
2195
 *
2196
 * Dirty macros, i.e. one often need to make assumption on the context to
2197
 * use them
2198
 *
2199
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2200
 *           To be used with extreme caution since operations consuming
2201
 *           characters may move the input buffer to a different location !
2202
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2203
 *           This should be used internally by the parser
2204
 *           only to compare to ASCII values otherwise it would break when
2205
 *           running with UTF-8 encoding.
2206
 *   RAW     same as CUR but in the input buffer, bypass any token
2207
 *           extraction that may have been done
2208
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2209
 *           to compare on ASCII based substring.
2210
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2211
 *           strings without newlines within the parser.
2212
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2213
 *           defined char within the parser.
2214
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2215
 *
2216
 *   NEXT    Skip to the next character, this does the proper decoding
2217
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2218
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2219
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2220
 *            the index
2221
 *   GROW, SHRINK  handling of input buffers
2222
 */
2223
2224
106M
#define RAW (*ctxt->input->cur)
2225
5.59G
#define CUR (*ctxt->input->cur)
2226
56.0M
#define NXT(val) ctxt->input->cur[(val)]
2227
7.27G
#define CUR_PTR ctxt->input->cur
2228
16.8M
#define BASE_PTR ctxt->input->base
2229
2230
#define CMP4( s, c1, c2, c3, c4 ) \
2231
230M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2232
115M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2233
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2234
226M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2235
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2236
222M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2237
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2238
218M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2239
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2240
215M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2241
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2242
107M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2243
107M
    ((unsigned char *) s)[ 8 ] == c9 )
2244
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2245
114k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2246
114k
    ((unsigned char *) s)[ 9 ] == c10 )
2247
2248
16.6M
#define SKIP(val) do {             \
2249
16.6M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2250
16.6M
    if (*ctxt->input->cur == 0)           \
2251
16.6M
        xmlParserGrow(ctxt);           \
2252
16.6M
  } while (0)
2253
2254
#define SKIPL(val) do {             \
2255
    int skipl;                \
2256
    for(skipl=0; skipl<val; skipl++) {          \
2257
  if (*(ctxt->input->cur) == '\n') {        \
2258
  ctxt->input->line++; ctxt->input->col = 1;      \
2259
  } else ctxt->input->col++;          \
2260
  ctxt->input->cur++;           \
2261
    }                 \
2262
    if (*ctxt->input->cur == 0)           \
2263
        xmlParserGrow(ctxt);            \
2264
  } while (0)
2265
2266
#define SHRINK \
2267
226M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2268
226M
  xmlParserShrink(ctxt);
2269
2270
#define GROW \
2271
399M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2272
399M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2273
24.3M
  xmlParserGrow(ctxt);
2274
2275
30.3M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2276
2277
4.86M
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2278
2279
2.21G
#define NEXT xmlNextChar(ctxt)
2280
2281
9.74M
#define NEXT1 {               \
2282
9.74M
  ctxt->input->col++;           \
2283
9.74M
  ctxt->input->cur++;           \
2284
9.74M
  if (*ctxt->input->cur == 0)         \
2285
9.74M
      xmlParserGrow(ctxt);           \
2286
9.74M
    }
2287
2288
4.07G
#define NEXTL(l) do {             \
2289
4.07G
    if (*(ctxt->input->cur) == '\n') {         \
2290
77.5M
  ctxt->input->line++; ctxt->input->col = 1;      \
2291
3.99G
    } else ctxt->input->col++;           \
2292
4.07G
    ctxt->input->cur += l;        \
2293
4.07G
  } while (0)
2294
2295
#define COPY_BUF(b, i, v)           \
2296
480M
    if (v < 0x80) b[i++] = v;           \
2297
480M
    else i += xmlCopyCharMultiByte(&b[i],v)
2298
2299
static int
2300
553M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2301
553M
    int c = xmlCurrentChar(ctxt, len);
2302
2303
553M
    if (c == XML_INVALID_CHAR)
2304
87.6M
        c = 0xFFFD; /* replacement character */
2305
2306
553M
    return(c);
2307
553M
}
2308
2309
/**
2310
 * Skip whitespace in the input stream.
2311
 *
2312
 * @deprecated Internal function, do not use.
2313
 *
2314
 * @param ctxt  the XML parser context
2315
 * @returns the number of space chars skipped
2316
 */
2317
int
2318
31.7M
xmlSkipBlankChars(xmlParserCtxt *ctxt) {
2319
31.7M
    const xmlChar *cur;
2320
31.7M
    int res = 0;
2321
2322
31.7M
    cur = ctxt->input->cur;
2323
31.7M
    while (IS_BLANK_CH(*cur)) {
2324
14.6M
        if (*cur == '\n') {
2325
6.07M
            ctxt->input->line++; ctxt->input->col = 1;
2326
8.61M
        } else {
2327
8.61M
            ctxt->input->col++;
2328
8.61M
        }
2329
14.6M
        cur++;
2330
14.6M
        if (res < INT_MAX)
2331
14.6M
            res++;
2332
14.6M
        if (*cur == 0) {
2333
149k
            ctxt->input->cur = cur;
2334
149k
            xmlParserGrow(ctxt);
2335
149k
            cur = ctxt->input->cur;
2336
149k
        }
2337
14.6M
    }
2338
31.7M
    ctxt->input->cur = cur;
2339
2340
31.7M
    if (res > 4)
2341
215k
        GROW;
2342
2343
31.7M
    return(res);
2344
31.7M
}
2345
2346
static void
2347
522k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2348
522k
    unsigned long consumed;
2349
522k
    xmlEntityPtr ent;
2350
2351
522k
    ent = ctxt->input->entity;
2352
2353
522k
    ent->flags &= ~XML_ENT_EXPANDING;
2354
2355
522k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2356
36.3k
        int result;
2357
2358
        /*
2359
         * Read the rest of the stream in case of errors. We want
2360
         * to account for the whole entity size.
2361
         */
2362
208k
        do {
2363
208k
            ctxt->input->cur = ctxt->input->end;
2364
208k
            xmlParserShrink(ctxt);
2365
208k
            result = xmlParserGrow(ctxt);
2366
208k
        } while (result > 0);
2367
2368
36.3k
        consumed = ctxt->input->consumed;
2369
36.3k
        xmlSaturatedAddSizeT(&consumed,
2370
36.3k
                             ctxt->input->end - ctxt->input->base);
2371
2372
36.3k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2373
2374
        /*
2375
         * Add to sizeentities when parsing an external entity
2376
         * for the first time.
2377
         */
2378
36.3k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2379
20.5k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2380
20.5k
        }
2381
2382
36.3k
        ent->flags |= XML_ENT_CHECKED;
2383
36.3k
    }
2384
2385
522k
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2386
2387
522k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2388
2389
522k
    GROW;
2390
522k
}
2391
2392
/**
2393
 * Skip whitespace in the input stream, also handling parameter
2394
 * entities.
2395
 *
2396
 * @param ctxt  the XML parser context
2397
 * @returns the number of space chars skipped
2398
 */
2399
static int
2400
4.86M
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2401
4.86M
    int res = 0;
2402
4.86M
    int inParam;
2403
4.86M
    int expandParam;
2404
2405
4.86M
    inParam = PARSER_IN_PE(ctxt);
2406
4.86M
    expandParam = PARSER_EXTERNAL(ctxt);
2407
2408
4.86M
    if (!inParam && !expandParam)
2409
1.41M
        return(xmlSkipBlankChars(ctxt));
2410
2411
    /*
2412
     * It's Okay to use CUR/NEXT here since all the blanks are on
2413
     * the ASCII range.
2414
     */
2415
8.23M
    while (PARSER_STOPPED(ctxt) == 0) {
2416
8.22M
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2417
4.67M
            NEXT;
2418
4.67M
        } else if (CUR == '%') {
2419
210k
            if ((expandParam == 0) ||
2420
208k
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2421
143k
                break;
2422
2423
            /*
2424
             * Expand parameter entity. We continue to consume
2425
             * whitespace at the start of the entity and possible
2426
             * even consume the whole entity and pop it. We might
2427
             * even pop multiple PEs in this loop.
2428
             */
2429
67.6k
            xmlParsePERefInternal(ctxt, 0);
2430
2431
67.6k
            inParam = PARSER_IN_PE(ctxt);
2432
67.6k
            expandParam = PARSER_EXTERNAL(ctxt);
2433
3.34M
        } else if (CUR == 0) {
2434
243k
            if (inParam == 0)
2435
655
                break;
2436
2437
            /*
2438
             * Don't pop parameter entities that start a markup
2439
             * declaration to detect Well-formedness constraint:
2440
             * PE Between Declarations.
2441
             */
2442
242k
            if (ctxt->input->flags & XML_INPUT_MARKUP_DECL)
2443
210k
                break;
2444
2445
32.5k
            xmlPopPE(ctxt);
2446
2447
32.5k
            inParam = PARSER_IN_PE(ctxt);
2448
32.5k
            expandParam = PARSER_EXTERNAL(ctxt);
2449
3.09M
        } else {
2450
3.09M
            break;
2451
3.09M
        }
2452
2453
        /*
2454
         * Also increase the counter when entering or exiting a PERef.
2455
         * The spec says: "When a parameter-entity reference is recognized
2456
         * in the DTD and included, its replacement text MUST be enlarged
2457
         * by the attachment of one leading and one following space (#x20)
2458
         * character."
2459
         */
2460
4.77M
        if (res < INT_MAX)
2461
4.77M
            res++;
2462
4.77M
    }
2463
2464
3.45M
    return(res);
2465
4.86M
}
2466
2467
/************************************************************************
2468
 *                  *
2469
 *    Commodity functions to handle entities      *
2470
 *                  *
2471
 ************************************************************************/
2472
2473
/**
2474
 * @deprecated Internal function, don't use.
2475
 *
2476
 * @param ctxt  an XML parser context
2477
 * @returns the current xmlChar in the parser context
2478
 */
2479
xmlChar
2480
0
xmlPopInput(xmlParserCtxt *ctxt) {
2481
0
    xmlParserInputPtr input;
2482
2483
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2484
0
    input = xmlCtxtPopInput(ctxt);
2485
0
    xmlFreeInputStream(input);
2486
0
    if (*ctxt->input->cur == 0)
2487
0
        xmlParserGrow(ctxt);
2488
0
    return(CUR);
2489
0
}
2490
2491
/**
2492
 * Push an input stream onto the stack.
2493
 *
2494
 * @deprecated Internal function, don't use.
2495
 *
2496
 * @param ctxt  an XML parser context
2497
 * @param input  an XML parser input fragment (entity, XML fragment ...).
2498
 * @returns -1 in case of error or the index in the input stack
2499
 */
2500
int
2501
0
xmlPushInput(xmlParserCtxt *ctxt, xmlParserInput *input) {
2502
0
    int ret;
2503
2504
0
    if ((ctxt == NULL) || (input == NULL))
2505
0
        return(-1);
2506
2507
0
    ret = xmlCtxtPushInput(ctxt, input);
2508
0
    if (ret >= 0)
2509
0
        GROW;
2510
0
    return(ret);
2511
0
}
2512
2513
/**
2514
 * Parse a numeric character reference. Always consumes '&'.
2515
 *
2516
 * @deprecated Internal function, don't use.
2517
 *
2518
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2519
 *                      '&#x' [0-9a-fA-F]+ ';'
2520
 *
2521
 * [ WFC: Legal Character ]
2522
 * Characters referred to using character references must match the
2523
 * production for Char.
2524
 *
2525
 * @param ctxt  an XML parser context
2526
 * @returns the value parsed (as an int), 0 in case of error
2527
 */
2528
int
2529
791k
xmlParseCharRef(xmlParserCtxt *ctxt) {
2530
791k
    int val = 0;
2531
791k
    int count = 0;
2532
2533
    /*
2534
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2535
     */
2536
791k
    if ((RAW == '&') && (NXT(1) == '#') &&
2537
791k
        (NXT(2) == 'x')) {
2538
454k
  SKIP(3);
2539
454k
  GROW;
2540
1.41M
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2541
1.04M
      if (count++ > 20) {
2542
26.2k
    count = 0;
2543
26.2k
    GROW;
2544
26.2k
      }
2545
1.04M
      if ((RAW >= '0') && (RAW <= '9'))
2546
485k
          val = val * 16 + (CUR - '0');
2547
563k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2548
332k
          val = val * 16 + (CUR - 'a') + 10;
2549
230k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2550
140k
          val = val * 16 + (CUR - 'A') + 10;
2551
90.1k
      else {
2552
90.1k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2553
90.1k
    val = 0;
2554
90.1k
    break;
2555
90.1k
      }
2556
958k
      if (val > 0x110000)
2557
292k
          val = 0x110000;
2558
2559
958k
      NEXT;
2560
958k
      count++;
2561
958k
  }
2562
454k
  if (RAW == ';') {
2563
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2564
364k
      ctxt->input->col++;
2565
364k
      ctxt->input->cur++;
2566
364k
  }
2567
454k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2568
337k
  SKIP(2);
2569
337k
  GROW;
2570
1.03M
  while (RAW != ';') { /* loop blocked by count */
2571
750k
      if (count++ > 20) {
2572
13.0k
    count = 0;
2573
13.0k
    GROW;
2574
13.0k
      }
2575
750k
      if ((RAW >= '0') && (RAW <= '9'))
2576
702k
          val = val * 10 + (CUR - '0');
2577
47.9k
      else {
2578
47.9k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2579
47.9k
    val = 0;
2580
47.9k
    break;
2581
47.9k
      }
2582
702k
      if (val > 0x110000)
2583
125k
          val = 0x110000;
2584
2585
702k
      NEXT;
2586
702k
      count++;
2587
702k
  }
2588
337k
  if (RAW == ';') {
2589
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2590
289k
      ctxt->input->col++;
2591
289k
      ctxt->input->cur++;
2592
289k
  }
2593
337k
    } else {
2594
0
        if (RAW == '&')
2595
0
            SKIP(1);
2596
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2597
0
    }
2598
2599
    /*
2600
     * [ WFC: Legal Character ]
2601
     * Characters referred to using character references must match the
2602
     * production for Char.
2603
     */
2604
791k
    if (val >= 0x110000) {
2605
5.03k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2606
5.03k
                "xmlParseCharRef: character reference out of bounds\n",
2607
5.03k
          val);
2608
5.03k
        val = 0xFFFD;
2609
786k
    } else if (!IS_CHAR(val)) {
2610
187k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2611
187k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2612
187k
                    val);
2613
187k
    }
2614
791k
    return(val);
2615
791k
}
2616
2617
/**
2618
 * Parse Reference declarations, variant parsing from a string rather
2619
 * than an an input flow.
2620
 *
2621
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2622
 *                      '&#x' [0-9a-fA-F]+ ';'
2623
 *
2624
 * [ WFC: Legal Character ]
2625
 * Characters referred to using character references must match the
2626
 * production for Char.
2627
 *
2628
 * @param ctxt  an XML parser context
2629
 * @param str  a pointer to an index in the string
2630
 * @returns the value parsed (as an int), 0 in case of error, str will be
2631
 *         updated to the current value of the index
2632
 */
2633
static int
2634
2.14M
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2635
2.14M
    const xmlChar *ptr;
2636
2.14M
    xmlChar cur;
2637
2.14M
    int val = 0;
2638
2639
2.14M
    if ((str == NULL) || (*str == NULL)) return(0);
2640
2.14M
    ptr = *str;
2641
2.14M
    cur = *ptr;
2642
2.14M
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2643
61.0k
  ptr += 3;
2644
61.0k
  cur = *ptr;
2645
236k
  while (cur != ';') { /* Non input consuming loop */
2646
180k
      if ((cur >= '0') && (cur <= '9'))
2647
96.1k
          val = val * 16 + (cur - '0');
2648
84.8k
      else if ((cur >= 'a') && (cur <= 'f'))
2649
24.3k
          val = val * 16 + (cur - 'a') + 10;
2650
60.5k
      else if ((cur >= 'A') && (cur <= 'F'))
2651
54.5k
          val = val * 16 + (cur - 'A') + 10;
2652
5.95k
      else {
2653
5.95k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2654
5.95k
    val = 0;
2655
5.95k
    break;
2656
5.95k
      }
2657
175k
      if (val > 0x110000)
2658
20.6k
          val = 0x110000;
2659
2660
175k
      ptr++;
2661
175k
      cur = *ptr;
2662
175k
  }
2663
61.0k
  if (cur == ';')
2664
55.0k
      ptr++;
2665
2.08M
    } else if  ((cur == '&') && (ptr[1] == '#')){
2666
2.08M
  ptr += 2;
2667
2.08M
  cur = *ptr;
2668
6.44M
  while (cur != ';') { /* Non input consuming loops */
2669
4.38M
      if ((cur >= '0') && (cur <= '9'))
2670
4.36M
          val = val * 10 + (cur - '0');
2671
24.1k
      else {
2672
24.1k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2673
24.1k
    val = 0;
2674
24.1k
    break;
2675
24.1k
      }
2676
4.36M
      if (val > 0x110000)
2677
14.5k
          val = 0x110000;
2678
2679
4.36M
      ptr++;
2680
4.36M
      cur = *ptr;
2681
4.36M
  }
2682
2.08M
  if (cur == ';')
2683
2.05M
      ptr++;
2684
2.08M
    } else {
2685
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2686
0
  return(0);
2687
0
    }
2688
2.14M
    *str = ptr;
2689
2690
    /*
2691
     * [ WFC: Legal Character ]
2692
     * Characters referred to using character references must match the
2693
     * production for Char.
2694
     */
2695
2.14M
    if (val >= 0x110000) {
2696
1.88k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2697
1.88k
                "xmlParseStringCharRef: character reference out of bounds\n",
2698
1.88k
                val);
2699
2.13M
    } else if (IS_CHAR(val)) {
2700
2.10M
        return(val);
2701
2.10M
    } else {
2702
35.7k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2703
35.7k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2704
35.7k
        val);
2705
35.7k
    }
2706
37.6k
    return(0);
2707
2.14M
}
2708
2709
/**
2710
 *     [69] PEReference ::= '%' Name ';'
2711
 *
2712
 * @deprecated Internal function, do not use.
2713
 *
2714
 * [ WFC: No Recursion ]
2715
 * A parsed entity must not contain a recursive
2716
 * reference to itself, either directly or indirectly.
2717
 *
2718
 * [ WFC: Entity Declared ]
2719
 * In a document without any DTD, a document with only an internal DTD
2720
 * subset which contains no parameter entity references, or a document
2721
 * with "standalone='yes'", ...  ... The declaration of a parameter
2722
 * entity must precede any reference to it...
2723
 *
2724
 * [ VC: Entity Declared ]
2725
 * In a document with an external subset or external parameter entities
2726
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2727
 * must precede any reference to it...
2728
 *
2729
 * [ WFC: In DTD ]
2730
 * Parameter-entity references may only appear in the DTD.
2731
 * NOTE: misleading but this is handled.
2732
 *
2733
 * A PEReference may have been detected in the current input stream
2734
 * the handling is done accordingly to
2735
 *      http://www.w3.org/TR/REC-xml#entproc
2736
 * i.e.
2737
 *   - Included in literal in entity values
2738
 *   - Included as Parameter Entity reference within DTDs
2739
 * @param ctxt  the parser context
2740
 */
2741
void
2742
0
xmlParserHandlePEReference(xmlParserCtxt *ctxt) {
2743
0
    xmlParsePERefInternal(ctxt, 0);
2744
0
}
2745
2746
/**
2747
 * @deprecated Internal function, don't use.
2748
 *
2749
 * @param ctxt  the parser context
2750
 * @param str  the input string
2751
 * @param len  the string length
2752
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2753
 * @param end  an end marker xmlChar, 0 if none
2754
 * @param end2  an end marker xmlChar, 0 if none
2755
 * @param end3  an end marker xmlChar, 0 if none
2756
 * @returns A newly allocated string with the substitution done. The caller
2757
 *      must deallocate it !
2758
 */
2759
xmlChar *
2760
xmlStringLenDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str, int len,
2761
                           int what ATTRIBUTE_UNUSED,
2762
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2763
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2764
0
        return(NULL);
2765
2766
0
    if ((str[len] != 0) ||
2767
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2768
0
        return(NULL);
2769
2770
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2771
0
}
2772
2773
/**
2774
 * @deprecated Internal function, don't use.
2775
 *
2776
 * @param ctxt  the parser context
2777
 * @param str  the input string
2778
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2779
 * @param end  an end marker xmlChar, 0 if none
2780
 * @param end2  an end marker xmlChar, 0 if none
2781
 * @param end3  an end marker xmlChar, 0 if none
2782
 * @returns A newly allocated string with the substitution done. The caller
2783
 *      must deallocate it !
2784
 */
2785
xmlChar *
2786
xmlStringDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str,
2787
                        int what ATTRIBUTE_UNUSED,
2788
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2789
0
    if ((ctxt == NULL) || (str == NULL))
2790
0
        return(NULL);
2791
2792
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2793
0
        return(NULL);
2794
2795
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2796
0
}
2797
2798
/************************************************************************
2799
 *                  *
2800
 *    Commodity functions, cleanup needed ?     *
2801
 *                  *
2802
 ************************************************************************/
2803
2804
/**
2805
 * Is this a sequence of blank chars that one can ignore ?
2806
 *
2807
 * @param ctxt  an XML parser context
2808
 * @param str  a xmlChar *
2809
 * @param len  the size of `str`
2810
 * @param blank_chars  we know the chars are blanks
2811
 * @returns 1 if ignorable 0 otherwise.
2812
 */
2813
2814
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2815
3.08M
                     int blank_chars) {
2816
3.08M
    int i;
2817
3.08M
    xmlNodePtr lastChild;
2818
2819
    /*
2820
     * Check for xml:space value.
2821
     */
2822
3.08M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2823
3.08M
        (*(ctxt->space) == -2))
2824
2.62M
  return(0);
2825
2826
    /*
2827
     * Check that the string is made of blanks
2828
     */
2829
460k
    if (blank_chars == 0) {
2830
765k
  for (i = 0;i < len;i++)
2831
740k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2832
390k
    }
2833
2834
    /*
2835
     * Look if the element is mixed content in the DTD if available
2836
     */
2837
94.6k
    if (ctxt->node == NULL) return(0);
2838
94.6k
    if (ctxt->myDoc != NULL) {
2839
94.6k
        xmlElementPtr elemDecl = NULL;
2840
94.6k
        xmlDocPtr doc = ctxt->myDoc;
2841
94.6k
        const xmlChar *prefix = NULL;
2842
2843
94.6k
        if (ctxt->node->ns)
2844
10.6k
            prefix = ctxt->node->ns->prefix;
2845
94.6k
        if (doc->intSubset != NULL)
2846
61.6k
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2847
61.6k
                                      prefix);
2848
94.6k
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2849
4.15k
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2850
4.15k
                                      prefix);
2851
94.6k
        if (elemDecl != NULL) {
2852
14.6k
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2853
6.96k
                return(1);
2854
7.64k
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2855
6.54k
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2856
2.54k
                return(0);
2857
7.64k
        }
2858
94.6k
    }
2859
2860
    /*
2861
     * Otherwise, heuristic :-\
2862
     *
2863
     * When push parsing, we could be at the end of a chunk.
2864
     * This makes the look-ahead and consequently the NOBLANKS
2865
     * option unreliable.
2866
     */
2867
85.1k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2868
73.3k
    if ((ctxt->node->children == NULL) &&
2869
38.3k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2870
2871
71.2k
    lastChild = xmlGetLastChild(ctxt->node);
2872
71.2k
    if (lastChild == NULL) {
2873
36.2k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2874
0
            (ctxt->node->content != NULL)) return(0);
2875
36.2k
    } else if (xmlNodeIsText(lastChild))
2876
1.44k
        return(0);
2877
33.5k
    else if ((ctxt->node->children != NULL) &&
2878
33.5k
             (xmlNodeIsText(ctxt->node->children)))
2879
1.16k
        return(0);
2880
68.6k
    return(1);
2881
71.2k
}
2882
2883
/************************************************************************
2884
 *                  *
2885
 *    Extra stuff for namespace support     *
2886
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2887
 *                  *
2888
 ************************************************************************/
2889
2890
/**
2891
 * Parse an UTF8 encoded XML qualified name string
2892
 *
2893
 * @deprecated Don't use.
2894
 *
2895
 * @param ctxt  an XML parser context
2896
 * @param name  an XML parser context
2897
 * @param prefixOut  a xmlChar **
2898
 * @returns the local part, and prefix is updated
2899
 *   to get the Prefix if any.
2900
 */
2901
2902
xmlChar *
2903
0
xmlSplitQName(xmlParserCtxt *ctxt, const xmlChar *name, xmlChar **prefixOut) {
2904
0
    xmlChar *ret;
2905
0
    const xmlChar *localname;
2906
2907
0
    localname = xmlSplitQName4(name, prefixOut);
2908
0
    if (localname == NULL) {
2909
0
        xmlCtxtErrMemory(ctxt);
2910
0
        return(NULL);
2911
0
    }
2912
2913
0
    ret = xmlStrdup(localname);
2914
0
    if (ret == NULL) {
2915
0
        xmlCtxtErrMemory(ctxt);
2916
0
        xmlFree(*prefixOut);
2917
0
    }
2918
2919
0
    return(ret);
2920
0
}
2921
2922
/************************************************************************
2923
 *                  *
2924
 *      The parser itself       *
2925
 *  Relates to http://www.w3.org/TR/REC-xml       *
2926
 *                  *
2927
 ************************************************************************/
2928
2929
/************************************************************************
2930
 *                  *
2931
 *  Routines to parse Name, NCName and NmToken      *
2932
 *                  *
2933
 ************************************************************************/
2934
2935
/*
2936
 * The two following functions are related to the change of accepted
2937
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2938
 * They correspond to the modified production [4] and the new production [4a]
2939
 * changes in that revision. Also note that the macros used for the
2940
 * productions Letter, Digit, CombiningChar and Extender are not needed
2941
 * anymore.
2942
 * We still keep compatibility to pre-revision5 parsing semantic if the
2943
 * new XML_PARSE_OLD10 option is given to the parser.
2944
 */
2945
2946
static int
2947
37.3M
xmlIsNameStartCharNew(int c) {
2948
    /*
2949
     * Use the new checks of production [4] [4a] amd [5] of the
2950
     * Update 5 of XML-1.0
2951
     */
2952
37.3M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2953
37.2M
        (((c >= 'a') && (c <= 'z')) ||
2954
22.9M
         ((c >= 'A') && (c <= 'Z')) ||
2955
21.1M
         (c == '_') || (c == ':') ||
2956
21.0M
         ((c >= 0xC0) && (c <= 0xD6)) ||
2957
21.0M
         ((c >= 0xD8) && (c <= 0xF6)) ||
2958
21.0M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2959
20.9M
         ((c >= 0x370) && (c <= 0x37D)) ||
2960
20.9M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2961
20.9M
         ((c >= 0x200C) && (c <= 0x200D)) ||
2962
20.9M
         ((c >= 0x2070) && (c <= 0x218F)) ||
2963
20.9M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2964
20.9M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2965
20.9M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2966
20.9M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2967
20.8M
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2968
16.4M
        return(1);
2969
20.9M
    return(0);
2970
37.3M
}
2971
2972
static int
2973
281M
xmlIsNameCharNew(int c) {
2974
    /*
2975
     * Use the new checks of production [4] [4a] amd [5] of the
2976
     * Update 5 of XML-1.0
2977
     */
2978
281M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2979
273M
        (((c >= 'a') && (c <= 'z')) ||
2980
237M
         ((c >= 'A') && (c <= 'Z')) ||
2981
223M
         ((c >= '0') && (c <= '9')) || /* !start */
2982
222M
         (c == '_') || (c == ':') ||
2983
221M
         (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2984
219M
         ((c >= 0xC0) && (c <= 0xD6)) ||
2985
215M
         ((c >= 0xD8) && (c <= 0xF6)) ||
2986
213M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2987
209M
         ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2988
209M
         ((c >= 0x370) && (c <= 0x37D)) ||
2989
209M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2990
203M
         ((c >= 0x200C) && (c <= 0x200D)) ||
2991
203M
         ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2992
203M
         ((c >= 0x2070) && (c <= 0x218F)) ||
2993
33.7M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2994
33.6M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2995
30.2M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2996
30.2M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2997
8.33M
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2998
265M
         return(1);
2999
16.3M
    return(0);
3000
281M
}
3001
3002
static int
3003
3.82M
xmlIsNameStartCharOld(int c) {
3004
3.82M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3005
3.79M
        ((IS_LETTER(c) || (c == '_') || (c == ':'))))
3006
3.02M
        return(1);
3007
799k
    return(0);
3008
3.82M
}
3009
3010
static int
3011
25.4M
xmlIsNameCharOld(int c) {
3012
25.4M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3013
25.3M
        ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3014
3.36M
         (c == '.') || (c == '-') ||
3015
3.06M
         (c == '_') || (c == ':') ||
3016
3.02M
         (IS_COMBINING(c)) ||
3017
3.01M
         (IS_EXTENDER(c))))
3018
22.3M
        return(1);
3019
3.04M
    return(0);
3020
25.4M
}
3021
3022
static int
3023
41.1M
xmlIsNameStartChar(int c, int old10) {
3024
41.1M
    if (!old10)
3025
37.3M
        return(xmlIsNameStartCharNew(c));
3026
3.82M
    else
3027
3.82M
        return(xmlIsNameStartCharOld(c));
3028
41.1M
}
3029
3030
static int
3031
307M
xmlIsNameChar(int c, int old10) {
3032
307M
    if (!old10)
3033
281M
        return(xmlIsNameCharNew(c));
3034
25.4M
    else
3035
25.4M
        return(xmlIsNameCharOld(c));
3036
307M
}
3037
3038
/*
3039
 * Scan an XML Name, NCName or Nmtoken.
3040
 *
3041
 * Returns a pointer to the end of the name on success. If the
3042
 * name is invalid, returns `ptr`. If the name is longer than
3043
 * `maxSize` bytes, returns NULL.
3044
 *
3045
 * @param ptr  pointer to the start of the name
3046
 * @param maxSize  maximum size in bytes
3047
 * @param flags  XML_SCAN_* flags
3048
 * @returns a pointer to the end of the name or NULL
3049
 */
3050
const xmlChar *
3051
37.8M
xmlScanName(const xmlChar *ptr, size_t maxSize, int flags) {
3052
37.8M
    int stop = flags & XML_SCAN_NC ? ':' : 0;
3053
37.8M
    int old10 = flags & XML_SCAN_OLD10 ? 1 : 0;
3054
3055
100M
    while (1) {
3056
100M
        int c, len;
3057
3058
100M
        c = *ptr;
3059
100M
        if (c < 0x80) {
3060
79.1M
            if (c == stop)
3061
385k
                break;
3062
78.7M
            len = 1;
3063
78.7M
        } else {
3064
20.8M
            len = 4;
3065
20.8M
            c = xmlGetUTF8Char(ptr, &len);
3066
20.8M
            if (c < 0)
3067
21.1k
                break;
3068
20.8M
        }
3069
3070
99.6M
        if (flags & XML_SCAN_NMTOKEN ?
3071
61.9M
                !xmlIsNameChar(c, old10) :
3072
99.6M
                !xmlIsNameStartChar(c, old10))
3073
37.4M
            break;
3074
3075
62.1M
        if ((size_t) len > maxSize)
3076
579
            return(NULL);
3077
62.1M
        ptr += len;
3078
62.1M
        maxSize -= len;
3079
62.1M
        flags |= XML_SCAN_NMTOKEN;
3080
62.1M
    }
3081
3082
37.8M
    return(ptr);
3083
37.8M
}
3084
3085
static const xmlChar *
3086
1.67M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3087
1.67M
    const xmlChar *ret;
3088
1.67M
    int len = 0, l;
3089
1.67M
    int c;
3090
1.67M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3091
963k
                    XML_MAX_TEXT_LENGTH :
3092
1.67M
                    XML_MAX_NAME_LENGTH;
3093
1.67M
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3094
3095
    /*
3096
     * Handler for more complex cases
3097
     */
3098
1.67M
    c = xmlCurrentChar(ctxt, &l);
3099
1.67M
    if (!xmlIsNameStartChar(c, old10))
3100
1.32M
        return(NULL);
3101
351k
    len += l;
3102
351k
    NEXTL(l);
3103
351k
    c = xmlCurrentChar(ctxt, &l);
3104
69.4M
    while (xmlIsNameChar(c, old10)) {
3105
69.0M
        if (len <= INT_MAX - l)
3106
69.0M
            len += l;
3107
69.0M
        NEXTL(l);
3108
69.0M
        c = xmlCurrentChar(ctxt, &l);
3109
69.0M
    }
3110
351k
    if (len > maxLength) {
3111
1.07k
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3112
1.07k
        return(NULL);
3113
1.07k
    }
3114
350k
    if (ctxt->input->cur - ctxt->input->base < len) {
3115
        /*
3116
         * There were a couple of bugs where PERefs lead to to a change
3117
         * of the buffer. Check the buffer size to avoid passing an invalid
3118
         * pointer to xmlDictLookup.
3119
         */
3120
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3121
0
                    "unexpected change of input buffer");
3122
0
        return (NULL);
3123
0
    }
3124
350k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3125
2.14k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3126
347k
    else
3127
347k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3128
350k
    if (ret == NULL)
3129
26
        xmlErrMemory(ctxt);
3130
350k
    return(ret);
3131
350k
}
3132
3133
/**
3134
 * Parse an XML name.
3135
 *
3136
 * @deprecated Internal function, don't use.
3137
 *
3138
 *     [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3139
 *                      CombiningChar | Extender
3140
 *
3141
 *     [5] Name ::= (Letter | '_' | ':') (NameChar)*
3142
 *
3143
 *     [6] Names ::= Name (#x20 Name)*
3144
 *
3145
 * @param ctxt  an XML parser context
3146
 * @returns the Name parsed or NULL
3147
 */
3148
3149
const xmlChar *
3150
10.4M
xmlParseName(xmlParserCtxt *ctxt) {
3151
10.4M
    const xmlChar *in;
3152
10.4M
    const xmlChar *ret;
3153
10.4M
    size_t count = 0;
3154
10.4M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3155
4.80M
                       XML_MAX_TEXT_LENGTH :
3156
10.4M
                       XML_MAX_NAME_LENGTH;
3157
3158
10.4M
    GROW;
3159
3160
    /*
3161
     * Accelerator for simple ASCII names
3162
     */
3163
10.4M
    in = ctxt->input->cur;
3164
10.4M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3165
2.17M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3166
9.03M
  (*in == '_') || (*in == ':')) {
3167
9.03M
  in++;
3168
72.2M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3169
29.9M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3170
14.9M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3171
10.8M
         (*in == '_') || (*in == '-') ||
3172
10.4M
         (*in == ':') || (*in == '.'))
3173
63.2M
      in++;
3174
9.03M
  if ((*in > 0) && (*in < 0x80)) {
3175
8.78M
      count = in - ctxt->input->cur;
3176
8.78M
            if (count > maxLength) {
3177
438
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3178
438
                return(NULL);
3179
438
            }
3180
8.78M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3181
8.78M
      ctxt->input->cur = in;
3182
8.78M
      ctxt->input->col += count;
3183
8.78M
      if (ret == NULL)
3184
39
          xmlErrMemory(ctxt);
3185
8.78M
      return(ret);
3186
8.78M
  }
3187
9.03M
    }
3188
    /* accelerator for special cases */
3189
1.67M
    return(xmlParseNameComplex(ctxt));
3190
10.4M
}
3191
3192
static xmlHashedString
3193
1.89M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3194
1.89M
    xmlHashedString ret;
3195
1.89M
    int len = 0, l;
3196
1.89M
    int c;
3197
1.89M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3198
775k
                    XML_MAX_TEXT_LENGTH :
3199
1.89M
                    XML_MAX_NAME_LENGTH;
3200
1.89M
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3201
1.89M
    size_t startPosition = 0;
3202
3203
1.89M
    ret.name = NULL;
3204
1.89M
    ret.hashValue = 0;
3205
3206
    /*
3207
     * Handler for more complex cases
3208
     */
3209
1.89M
    startPosition = CUR_PTR - BASE_PTR;
3210
1.89M
    c = xmlCurrentChar(ctxt, &l);
3211
1.89M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3212
1.82M
  (!xmlIsNameStartChar(c, old10) || (c == ':'))) {
3213
1.65M
  return(ret);
3214
1.65M
    }
3215
3216
142M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3217
142M
     (xmlIsNameChar(c, old10) && (c != ':'))) {
3218
142M
        if (len <= INT_MAX - l)
3219
142M
      len += l;
3220
142M
  NEXTL(l);
3221
142M
  c = xmlCurrentChar(ctxt, &l);
3222
142M
    }
3223
240k
    if (len > maxLength) {
3224
454
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3225
454
        return(ret);
3226
454
    }
3227
239k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3228
239k
    if (ret.name == NULL)
3229
23
        xmlErrMemory(ctxt);
3230
239k
    return(ret);
3231
240k
}
3232
3233
/**
3234
 * Parse an XML name.
3235
 *
3236
 *     [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3237
 *                          CombiningChar | Extender
3238
 *
3239
 *     [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3240
 *
3241
 * @param ctxt  an XML parser context
3242
 * @returns the Name parsed or NULL
3243
 */
3244
3245
static xmlHashedString
3246
12.1M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3247
12.1M
    const xmlChar *in, *e;
3248
12.1M
    xmlHashedString ret;
3249
12.1M
    size_t count = 0;
3250
12.1M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3251
3.81M
                       XML_MAX_TEXT_LENGTH :
3252
12.1M
                       XML_MAX_NAME_LENGTH;
3253
3254
12.1M
    ret.name = NULL;
3255
3256
    /*
3257
     * Accelerator for simple ASCII names
3258
     */
3259
12.1M
    in = ctxt->input->cur;
3260
12.1M
    e = ctxt->input->end;
3261
12.1M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3262
2.09M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3263
10.4M
   (*in == '_')) && (in < e)) {
3264
10.4M
  in++;
3265
64.7M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3266
25.9M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3267
12.6M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3268
12.0M
          (*in == '_') || (*in == '-') ||
3269
54.2M
          (*in == '.')) && (in < e))
3270
54.2M
      in++;
3271
10.4M
  if (in >= e)
3272
15.2k
      goto complex;
3273
10.4M
  if ((*in > 0) && (*in < 0x80)) {
3274
10.2M
      count = in - ctxt->input->cur;
3275
10.2M
            if (count > maxLength) {
3276
344
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3277
344
                return(ret);
3278
344
            }
3279
10.2M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3280
10.2M
      ctxt->input->cur = in;
3281
10.2M
      ctxt->input->col += count;
3282
10.2M
      if (ret.name == NULL) {
3283
27
          xmlErrMemory(ctxt);
3284
27
      }
3285
10.2M
      return(ret);
3286
10.2M
  }
3287
10.4M
    }
3288
1.89M
complex:
3289
1.89M
    return(xmlParseNCNameComplex(ctxt));
3290
12.1M
}
3291
3292
/**
3293
 * Parse an XML name and compares for match
3294
 * (specialized for endtag parsing)
3295
 *
3296
 * @param ctxt  an XML parser context
3297
 * @param other  the name to compare with
3298
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
3299
 * and the name for mismatch
3300
 */
3301
3302
static const xmlChar *
3303
905k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3304
905k
    register const xmlChar *cmp = other;
3305
905k
    register const xmlChar *in;
3306
905k
    const xmlChar *ret;
3307
3308
905k
    GROW;
3309
3310
905k
    in = ctxt->input->cur;
3311
3.41M
    while (*in != 0 && *in == *cmp) {
3312
2.51M
  ++in;
3313
2.51M
  ++cmp;
3314
2.51M
    }
3315
905k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3316
  /* success */
3317
800k
  ctxt->input->col += in - ctxt->input->cur;
3318
800k
  ctxt->input->cur = in;
3319
800k
  return (const xmlChar*) 1;
3320
800k
    }
3321
    /* failure (or end of input buffer), check with full function */
3322
104k
    ret = xmlParseName (ctxt);
3323
    /* strings coming from the dictionary direct compare possible */
3324
104k
    if (ret == other) {
3325
11.9k
  return (const xmlChar*) 1;
3326
11.9k
    }
3327
93.0k
    return ret;
3328
104k
}
3329
3330
/**
3331
 * Parse an XML name.
3332
 *
3333
 * @param ctxt  an XML parser context
3334
 * @param str  a pointer to the string pointer (IN/OUT)
3335
 * @returns the Name parsed or NULL. The `str` pointer
3336
 * is updated to the current location in the string.
3337
 */
3338
3339
static xmlChar *
3340
7.04M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3341
7.04M
    xmlChar *ret;
3342
7.04M
    const xmlChar *cur = *str;
3343
7.04M
    int flags = 0;
3344
7.04M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3345
3.30M
                    XML_MAX_TEXT_LENGTH :
3346
7.04M
                    XML_MAX_NAME_LENGTH;
3347
3348
7.04M
    if (ctxt->options & XML_PARSE_OLD10)
3349
2.84M
        flags |= XML_SCAN_OLD10;
3350
3351
7.04M
    cur = xmlScanName(*str, maxLength, flags);
3352
7.04M
    if (cur == NULL) {
3353
517
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3354
517
        return(NULL);
3355
517
    }
3356
7.04M
    if (cur == *str)
3357
38.5k
        return(NULL);
3358
3359
7.01M
    ret = xmlStrndup(*str, cur - *str);
3360
7.01M
    if (ret == NULL)
3361
276
        xmlErrMemory(ctxt);
3362
7.01M
    *str = cur;
3363
7.01M
    return(ret);
3364
7.04M
}
3365
3366
/**
3367
 * Parse an XML Nmtoken.
3368
 *
3369
 * @deprecated Internal function, don't use.
3370
 *
3371
 *     [7] Nmtoken ::= (NameChar)+
3372
 *
3373
 *     [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3374
 *
3375
 * @param ctxt  an XML parser context
3376
 * @returns the Nmtoken parsed or NULL
3377
 */
3378
3379
xmlChar *
3380
301k
xmlParseNmtoken(xmlParserCtxt *ctxt) {
3381
301k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3382
301k
    xmlChar *ret;
3383
301k
    int len = 0, l;
3384
301k
    int c;
3385
301k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3386
74.9k
                    XML_MAX_TEXT_LENGTH :
3387
301k
                    XML_MAX_NAME_LENGTH;
3388
301k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3389
3390
301k
    c = xmlCurrentChar(ctxt, &l);
3391
3392
1.23M
    while (xmlIsNameChar(c, old10)) {
3393
940k
  COPY_BUF(buf, len, c);
3394
940k
  NEXTL(l);
3395
940k
  c = xmlCurrentChar(ctxt, &l);
3396
940k
  if (len >= XML_MAX_NAMELEN) {
3397
      /*
3398
       * Okay someone managed to make a huge token, so he's ready to pay
3399
       * for the processing speed.
3400
       */
3401
6.65k
      xmlChar *buffer;
3402
6.65k
      int max = len * 2;
3403
3404
6.65k
      buffer = xmlMalloc(max);
3405
6.65k
      if (buffer == NULL) {
3406
28
          xmlErrMemory(ctxt);
3407
28
    return(NULL);
3408
28
      }
3409
6.62k
      memcpy(buffer, buf, len);
3410
32.5M
      while (xmlIsNameChar(c, old10)) {
3411
32.5M
    if (len + 10 > max) {
3412
24.7k
        xmlChar *tmp;
3413
24.7k
                    int newSize;
3414
3415
24.7k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3416
24.7k
                    if (newSize < 0) {
3417
707
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3418
707
                        xmlFree(buffer);
3419
707
                        return(NULL);
3420
707
                    }
3421
24.0k
        tmp = xmlRealloc(buffer, newSize);
3422
24.0k
        if (tmp == NULL) {
3423
20
      xmlErrMemory(ctxt);
3424
20
      xmlFree(buffer);
3425
20
      return(NULL);
3426
20
        }
3427
24.0k
        buffer = tmp;
3428
24.0k
                    max = newSize;
3429
24.0k
    }
3430
32.5M
    COPY_BUF(buffer, len, c);
3431
32.5M
    NEXTL(l);
3432
32.5M
    c = xmlCurrentChar(ctxt, &l);
3433
32.5M
      }
3434
5.90k
      buffer[len] = 0;
3435
5.90k
      return(buffer);
3436
6.62k
  }
3437
940k
    }
3438
294k
    if (len == 0)
3439
53.9k
        return(NULL);
3440
240k
    if (len > maxLength) {
3441
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3442
0
        return(NULL);
3443
0
    }
3444
240k
    ret = xmlStrndup(buf, len);
3445
240k
    if (ret == NULL)
3446
59
        xmlErrMemory(ctxt);
3447
240k
    return(ret);
3448
240k
}
3449
3450
/**
3451
 * Validate an entity value and expand parameter entities.
3452
 *
3453
 * @param ctxt  parser context
3454
 * @param buf  string buffer
3455
 * @param str  entity value
3456
 * @param length  size of entity value
3457
 * @param depth  nesting depth
3458
 */
3459
static void
3460
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3461
678k
                          const xmlChar *str, int length, int depth) {
3462
678k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3463
678k
    const xmlChar *end, *chunk;
3464
678k
    int c, l;
3465
3466
678k
    if (str == NULL)
3467
233k
        return;
3468
3469
445k
    depth += 1;
3470
445k
    if (depth > maxDepth) {
3471
7
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3472
7
                       "Maximum entity nesting depth exceeded");
3473
7
  return;
3474
7
    }
3475
3476
445k
    end = str + length;
3477
445k
    chunk = str;
3478
3479
2.99G
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3480
2.99G
        c = *str;
3481
3482
2.99G
        if (c >= 0x80) {
3483
2.74G
            l = xmlUTF8MultibyteLen(ctxt, str,
3484
2.74G
                    "invalid character in entity value\n");
3485
2.74G
            if (l == 0) {
3486
219M
                if (chunk < str)
3487
510k
                    xmlSBufAddString(buf, chunk, str - chunk);
3488
219M
                xmlSBufAddReplChar(buf);
3489
219M
                str += 1;
3490
219M
                chunk = str;
3491
2.52G
            } else {
3492
2.52G
                str += l;
3493
2.52G
            }
3494
2.74G
        } else if (c == '&') {
3495
521k
            if (str[1] == '#') {
3496
199k
                if (chunk < str)
3497
152k
                    xmlSBufAddString(buf, chunk, str - chunk);
3498
3499
199k
                c = xmlParseStringCharRef(ctxt, &str);
3500
199k
                if (c == 0)
3501
37.5k
                    return;
3502
3503
161k
                xmlSBufAddChar(buf, c);
3504
3505
161k
                chunk = str;
3506
322k
            } else {
3507
322k
                xmlChar *name;
3508
3509
                /*
3510
                 * General entity references are checked for
3511
                 * syntactic validity.
3512
                 */
3513
322k
                str++;
3514
322k
                name = xmlParseStringName(ctxt, &str);
3515
3516
322k
                if ((name == NULL) || (*str++ != ';')) {
3517
31.3k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3518
31.3k
                            "EntityValue: '&' forbidden except for entities "
3519
31.3k
                            "references\n");
3520
31.3k
                    xmlFree(name);
3521
31.3k
                    return;
3522
31.3k
                }
3523
3524
291k
                xmlFree(name);
3525
291k
            }
3526
251M
        } else if (c == '%') {
3527
434k
            xmlEntityPtr ent;
3528
3529
434k
            if (chunk < str)
3530
162k
                xmlSBufAddString(buf, chunk, str - chunk);
3531
3532
434k
            ent = xmlParseStringPEReference(ctxt, &str);
3533
434k
            if (ent == NULL)
3534
89.1k
                return;
3535
3536
345k
            if (!PARSER_EXTERNAL(ctxt)) {
3537
1.98k
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3538
1.98k
                return;
3539
1.98k
            }
3540
3541
343k
            if (ent->content == NULL) {
3542
                /*
3543
                 * Note: external parsed entities will not be loaded,
3544
                 * it is not required for a non-validating parser to
3545
                 * complete external PEReferences coming from the
3546
                 * internal subset
3547
                 */
3548
234k
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3549
234k
                    ((ctxt->replaceEntities) ||
3550
231k
                     (ctxt->validate))) {
3551
231k
                    xmlLoadEntityContent(ctxt, ent);
3552
231k
                } else {
3553
3.29k
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3554
3.29k
                                  "not validating will not read content for "
3555
3.29k
                                  "PE entity %s\n", ent->name, NULL);
3556
3.29k
                }
3557
234k
            }
3558
3559
            /*
3560
             * TODO: Skip if ent->content is still NULL.
3561
             */
3562
3563
343k
            if (xmlParserEntityCheck(ctxt, ent->length))
3564
63
                return;
3565
3566
343k
            if (ent->flags & XML_ENT_EXPANDING) {
3567
521
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3568
521
                return;
3569
521
            }
3570
3571
342k
            ent->flags |= XML_ENT_EXPANDING;
3572
342k
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3573
342k
                                      depth);
3574
342k
            ent->flags &= ~XML_ENT_EXPANDING;
3575
3576
342k
            chunk = str;
3577
250M
        } else {
3578
            /* Normal ASCII char */
3579
250M
            if (!IS_BYTE_CHAR(c)) {
3580
18.9M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3581
18.9M
                        "invalid character in entity value\n");
3582
18.9M
                if (chunk < str)
3583
71.6k
                    xmlSBufAddString(buf, chunk, str - chunk);
3584
18.9M
                xmlSBufAddReplChar(buf);
3585
18.9M
                str += 1;
3586
18.9M
                chunk = str;
3587
231M
            } else {
3588
231M
                str += 1;
3589
231M
            }
3590
250M
        }
3591
2.99G
    }
3592
3593
284k
    if (chunk < str)
3594
247k
        xmlSBufAddString(buf, chunk, str - chunk);
3595
284k
}
3596
3597
/**
3598
 * Parse a value for ENTITY declarations
3599
 *
3600
 * @deprecated Internal function, don't use.
3601
 *
3602
 *     [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3603
 *                         "'" ([^%&'] | PEReference | Reference)* "'"
3604
 *
3605
 * @param ctxt  an XML parser context
3606
 * @param orig  if non-NULL store a copy of the original entity value
3607
 * @returns the EntityValue parsed with reference substituted or NULL
3608
 */
3609
xmlChar *
3610
350k
xmlParseEntityValue(xmlParserCtxt *ctxt, xmlChar **orig) {
3611
350k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3612
101k
                         XML_MAX_HUGE_LENGTH :
3613
350k
                         XML_MAX_TEXT_LENGTH;
3614
350k
    xmlSBuf buf;
3615
350k
    const xmlChar *start;
3616
350k
    int quote, length;
3617
3618
350k
    xmlSBufInit(&buf, maxLength);
3619
3620
350k
    GROW;
3621
3622
350k
    quote = CUR;
3623
350k
    if ((quote != '"') && (quote != '\'')) {
3624
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3625
0
  return(NULL);
3626
0
    }
3627
350k
    CUR_PTR++;
3628
3629
350k
    length = 0;
3630
3631
    /*
3632
     * Copy raw content of the entity into a buffer
3633
     */
3634
2.20G
    while (1) {
3635
2.20G
        int c;
3636
3637
2.20G
        if (PARSER_STOPPED(ctxt))
3638
47
            goto error;
3639
3640
2.20G
        if (CUR_PTR >= ctxt->input->end) {
3641
14.1k
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3642
14.1k
            goto error;
3643
14.1k
        }
3644
3645
2.20G
        c = CUR;
3646
3647
2.20G
        if (c == 0) {
3648
847
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3649
847
                    "invalid character in entity value\n");
3650
847
            goto error;
3651
847
        }
3652
2.20G
        if (c == quote)
3653
335k
            break;
3654
2.20G
        NEXTL(1);
3655
2.20G
        length += 1;
3656
3657
        /*
3658
         * TODO: Check growth threshold
3659
         */
3660
2.20G
        if (ctxt->input->end - CUR_PTR < 10)
3661
453k
            GROW;
3662
2.20G
    }
3663
3664
335k
    start = CUR_PTR - length;
3665
3666
335k
    if (orig != NULL) {
3667
335k
        *orig = xmlStrndup(start, length);
3668
335k
        if (*orig == NULL)
3669
133
            xmlErrMemory(ctxt);
3670
335k
    }
3671
3672
335k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3673
3674
335k
    NEXTL(1);
3675
3676
335k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3677
3678
15.0k
error:
3679
15.0k
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3680
15.0k
    return(NULL);
3681
350k
}
3682
3683
/**
3684
 * Check an entity reference in an attribute value for validity
3685
 * without expanding it.
3686
 *
3687
 * @param ctxt  parser context
3688
 * @param pent  entity
3689
 * @param depth  nesting depth
3690
 */
3691
static void
3692
29.7k
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3693
29.7k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3694
29.7k
    const xmlChar *str;
3695
29.7k
    unsigned long expandedSize = pent->length;
3696
29.7k
    int c, flags;
3697
3698
29.7k
    depth += 1;
3699
29.7k
    if (depth > maxDepth) {
3700
13
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3701
13
                       "Maximum entity nesting depth exceeded");
3702
13
  return;
3703
13
    }
3704
3705
29.7k
    if (pent->flags & XML_ENT_EXPANDING) {
3706
603
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3707
603
        return;
3708
603
    }
3709
3710
    /*
3711
     * If we're parsing a default attribute value in DTD content,
3712
     * the entity might reference other entities which weren't
3713
     * defined yet, so the check isn't reliable.
3714
     */
3715
29.1k
    if (ctxt->inSubset == 0)
3716
28.2k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3717
898
    else
3718
898
        flags = XML_ENT_VALIDATED;
3719
3720
29.1k
    str = pent->content;
3721
29.1k
    if (str == NULL)
3722
97
        goto done;
3723
3724
    /*
3725
     * Note that entity values are already validated. We only check
3726
     * for illegal less-than signs and compute the expanded size
3727
     * of the entity. No special handling for multi-byte characters
3728
     * is needed.
3729
     */
3730
257M
    while (!PARSER_STOPPED(ctxt)) {
3731
257M
        c = *str;
3732
3733
257M
  if (c != '&') {
3734
257M
            if (c == 0)
3735
27.6k
                break;
3736
3737
257M
            if (c == '<')
3738
12.5k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3739
12.5k
                        "'<' in entity '%s' is not allowed in attributes "
3740
12.5k
                        "values\n", pent->name);
3741
3742
257M
            str += 1;
3743
257M
        } else if (str[1] == '#') {
3744
8.80k
            int val;
3745
3746
8.80k
      val = xmlParseStringCharRef(ctxt, &str);
3747
8.80k
      if (val == 0) {
3748
70
                pent->content[0] = 0;
3749
70
                break;
3750
70
            }
3751
81.3k
  } else {
3752
81.3k
            xmlChar *name;
3753
81.3k
            xmlEntityPtr ent;
3754
3755
81.3k
      name = xmlParseStringEntityRef(ctxt, &str);
3756
81.3k
      if (name == NULL) {
3757
95
                pent->content[0] = 0;
3758
95
                break;
3759
95
            }
3760
3761
81.2k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3762
81.2k
            xmlFree(name);
3763
3764
81.2k
            if ((ent != NULL) &&
3765
71.4k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
3766
48.0k
                if ((ent->flags & flags) != flags) {
3767
22.0k
                    pent->flags |= XML_ENT_EXPANDING;
3768
22.0k
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
3769
22.0k
                    pent->flags &= ~XML_ENT_EXPANDING;
3770
22.0k
                }
3771
3772
48.0k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
3773
48.0k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
3774
48.0k
            }
3775
81.2k
        }
3776
257M
    }
3777
3778
29.1k
done:
3779
29.1k
    if (ctxt->inSubset == 0)
3780
28.2k
        pent->expandedSize = expandedSize;
3781
3782
29.1k
    pent->flags |= flags;
3783
29.1k
}
3784
3785
/**
3786
 * Expand general entity references in an entity or attribute value.
3787
 * Perform attribute value normalization.
3788
 *
3789
 * @param ctxt  parser context
3790
 * @param buf  string buffer
3791
 * @param str  entity or attribute value
3792
 * @param pent  entity for entity value, NULL for attribute values
3793
 * @param normalize  whether to collapse whitespace
3794
 * @param inSpace  whitespace state
3795
 * @param depth  nesting depth
3796
 * @param check  whether to check for amplification
3797
 * @returns  whether there was a normalization change
3798
 */
3799
static int
3800
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3801
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
3802
4.37M
                          int *inSpace, int depth, int check) {
3803
4.37M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3804
4.37M
    int c, chunkSize;
3805
4.37M
    int normChange = 0;
3806
3807
4.37M
    if (str == NULL)
3808
1.08k
        return(0);
3809
3810
4.37M
    depth += 1;
3811
4.37M
    if (depth > maxDepth) {
3812
13
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3813
13
                       "Maximum entity nesting depth exceeded");
3814
13
  return(0);
3815
13
    }
3816
3817
4.37M
    if (pent != NULL) {
3818
4.30M
        if (pent->flags & XML_ENT_EXPANDING) {
3819
231
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3820
231
            return(0);
3821
231
        }
3822
3823
4.30M
        if (check) {
3824
4.29M
            if (xmlParserEntityCheck(ctxt, pent->length))
3825
1.30k
                return(0);
3826
4.29M
        }
3827
4.30M
    }
3828
3829
4.36M
    chunkSize = 0;
3830
3831
    /*
3832
     * Note that entity values are already validated. No special
3833
     * handling for multi-byte characters is needed.
3834
     */
3835
13.2G
    while (!PARSER_STOPPED(ctxt)) {
3836
13.2G
        c = *str;
3837
3838
13.2G
  if (c != '&') {
3839
13.2G
            if (c == 0)
3840
4.06M
                break;
3841
3842
            /*
3843
             * If this function is called without an entity, it is used to
3844
             * expand entities in an attribute content where less-than was
3845
             * already unscaped and is allowed.
3846
             */
3847
13.2G
            if ((pent != NULL) && (c == '<')) {
3848
300k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3849
300k
                        "'<' in entity '%s' is not allowed in attributes "
3850
300k
                        "values\n", pent->name);
3851
300k
                break;
3852
300k
            }
3853
3854
13.2G
            if (c <= 0x20) {
3855
109M
                if ((normalize) && (*inSpace)) {
3856
                    /* Skip char */
3857
1.45M
                    if (chunkSize > 0) {
3858
291k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3859
291k
                        chunkSize = 0;
3860
291k
                    }
3861
1.45M
                    normChange = 1;
3862
107M
                } else if (c < 0x20) {
3863
91.1M
                    if (chunkSize > 0) {
3864
1.94M
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3865
1.94M
                        chunkSize = 0;
3866
1.94M
                    }
3867
3868
91.1M
                    xmlSBufAddCString(buf, " ", 1);
3869
91.1M
                } else {
3870
16.4M
                    chunkSize += 1;
3871
16.4M
                }
3872
3873
109M
                *inSpace = 1;
3874
13.1G
            } else {
3875
13.1G
                chunkSize += 1;
3876
13.1G
                *inSpace = 0;
3877
13.1G
            }
3878
3879
13.2G
            str += 1;
3880
13.2G
        } else if (str[1] == '#') {
3881
1.93M
            int val;
3882
3883
1.93M
            if (chunkSize > 0) {
3884
1.89M
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3885
1.89M
                chunkSize = 0;
3886
1.89M
            }
3887
3888
1.93M
      val = xmlParseStringCharRef(ctxt, &str);
3889
1.93M
      if (val == 0) {
3890
68
                if (pent != NULL)
3891
68
                    pent->content[0] = 0;
3892
68
                break;
3893
68
            }
3894
3895
1.93M
            if (val == ' ') {
3896
50.5k
                if ((normalize) && (*inSpace))
3897
2.40k
                    normChange = 1;
3898
48.1k
                else
3899
48.1k
                    xmlSBufAddCString(buf, " ", 1);
3900
50.5k
                *inSpace = 1;
3901
1.88M
            } else {
3902
1.88M
                xmlSBufAddChar(buf, val);
3903
1.88M
                *inSpace = 0;
3904
1.88M
            }
3905
6.21M
  } else {
3906
6.21M
            xmlChar *name;
3907
6.21M
            xmlEntityPtr ent;
3908
3909
6.21M
            if (chunkSize > 0) {
3910
3.08M
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3911
3.08M
                chunkSize = 0;
3912
3.08M
            }
3913
3914
6.21M
      name = xmlParseStringEntityRef(ctxt, &str);
3915
6.21M
            if (name == NULL) {
3916
249
                if (pent != NULL)
3917
238
                    pent->content[0] = 0;
3918
249
                break;
3919
249
            }
3920
3921
6.21M
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3922
6.21M
            xmlFree(name);
3923
3924
6.21M
      if ((ent != NULL) &&
3925
5.78M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3926
2.52M
    if (ent->content == NULL) {
3927
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
3928
0
          "predefined entity has no content\n");
3929
0
                    break;
3930
0
                }
3931
3932
2.52M
                xmlSBufAddString(buf, ent->content, ent->length);
3933
3934
2.52M
                *inSpace = 0;
3935
3.68M
      } else if ((ent != NULL) && (ent->content != NULL)) {
3936
3.25M
                if (pent != NULL)
3937
3.24M
                    pent->flags |= XML_ENT_EXPANDING;
3938
3.25M
    normChange |= xmlExpandEntityInAttValue(ctxt, buf,
3939
3.25M
                        ent->content, ent, normalize, inSpace, depth, check);
3940
3.25M
                if (pent != NULL)
3941
3.24M
                    pent->flags &= ~XML_ENT_EXPANDING;
3942
3.25M
      }
3943
6.21M
        }
3944
13.2G
    }
3945
3946
4.36M
    if (chunkSize > 0)
3947
2.66M
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3948
3949
4.36M
    return(normChange);
3950
4.37M
}
3951
3952
/**
3953
 * Expand general entity references in an entity or attribute value.
3954
 * Perform attribute value normalization.
3955
 *
3956
 * @param ctxt  parser context
3957
 * @param str  entity or attribute value
3958
 * @param normalize  whether to collapse whitespace
3959
 * @returns the expanded attribtue value.
3960
 */
3961
xmlChar *
3962
xmlExpandEntitiesInAttValue(xmlParserCtxt *ctxt, const xmlChar *str,
3963
61.5k
                            int normalize) {
3964
61.5k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3965
32.4k
                         XML_MAX_HUGE_LENGTH :
3966
61.5k
                         XML_MAX_TEXT_LENGTH;
3967
61.5k
    xmlSBuf buf;
3968
61.5k
    int inSpace = 1;
3969
3970
61.5k
    xmlSBufInit(&buf, maxLength);
3971
3972
61.5k
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
3973
61.5k
                              ctxt->inputNr, /* check */ 0);
3974
3975
61.5k
    if ((normalize) && (inSpace) && (buf.size > 0))
3976
0
        buf.size--;
3977
3978
61.5k
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
3979
61.5k
}
3980
3981
/**
3982
 * Parse a value for an attribute.
3983
 *
3984
 * NOTE: if no normalization is needed, the routine will return pointers
3985
 * directly from the data buffer.
3986
 *
3987
 * 3.3.3 Attribute-Value Normalization:
3988
 *
3989
 * Before the value of an attribute is passed to the application or
3990
 * checked for validity, the XML processor must normalize it as follows:
3991
 *
3992
 * - a character reference is processed by appending the referenced
3993
 *   character to the attribute value
3994
 * - an entity reference is processed by recursively processing the
3995
 *   replacement text of the entity
3996
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
3997
 *   appending \#x20 to the normalized value, except that only a single
3998
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
3999
 *   parsed entity or the literal entity value of an internal parsed entity
4000
 * - other characters are processed by appending them to the normalized value
4001
 *
4002
 * If the declared value is not CDATA, then the XML processor must further
4003
 * process the normalized attribute value by discarding any leading and
4004
 * trailing space (\#x20) characters, and by replacing sequences of space
4005
 * (\#x20) characters by a single space (\#x20) character.
4006
 * All attributes for which no declaration has been read should be treated
4007
 * by a non-validating parser as if declared CDATA.
4008
 *
4009
 * @param ctxt  an XML parser context
4010
 * @param attlen  attribute len result
4011
 * @param outFlags  resulting XML_ATTVAL_* flags
4012
 * @param special  value from attsSpecial
4013
 * @param isNamespace  whether this is a namespace declaration
4014
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4015
 *     caller if it was copied, this can be detected by val[*len] == 0.
4016
 */
4017
static xmlChar *
4018
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *outFlags,
4019
3.90M
                         int special, int isNamespace) {
4020
3.90M
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4021
994k
                         XML_MAX_HUGE_LENGTH :
4022
3.90M
                         XML_MAX_TEXT_LENGTH;
4023
3.90M
    xmlSBuf buf;
4024
3.90M
    xmlChar *ret;
4025
3.90M
    int c, l, quote, entFlags, chunkSize;
4026
3.90M
    int inSpace = 1;
4027
3.90M
    int replaceEntities;
4028
3.90M
    int normalize = (special & XML_SPECIAL_TYPE_MASK) > XML_ATTRIBUTE_CDATA;
4029
3.90M
    int attvalFlags = 0;
4030
4031
    /* Always expand namespace URIs */
4032
3.90M
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4033
4034
3.90M
    xmlSBufInit(&buf, maxLength);
4035
4036
3.90M
    GROW;
4037
4038
3.90M
    quote = CUR;
4039
3.90M
    if ((quote != '"') && (quote != '\'')) {
4040
49.4k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4041
49.4k
  return(NULL);
4042
49.4k
    }
4043
3.85M
    NEXTL(1);
4044
4045
3.85M
    if (ctxt->inSubset == 0)
4046
3.54M
        entFlags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4047
308k
    else
4048
308k
        entFlags = XML_ENT_VALIDATED;
4049
4050
3.85M
    inSpace = 1;
4051
3.85M
    chunkSize = 0;
4052
4053
1.07G
    while (1) {
4054
1.07G
        if (PARSER_STOPPED(ctxt))
4055
2.89k
            goto error;
4056
4057
1.07G
        if (CUR_PTR >= ctxt->input->end) {
4058
78.7k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4059
78.7k
                           "AttValue: ' expected\n");
4060
78.7k
            goto error;
4061
78.7k
        }
4062
4063
        /*
4064
         * TODO: Check growth threshold
4065
         */
4066
1.07G
        if (ctxt->input->end - CUR_PTR < 10)
4067
1.05M
            GROW;
4068
4069
1.07G
        c = CUR;
4070
4071
1.07G
        if (c >= 0x80) {
4072
696M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4073
696M
                    "invalid character in attribute value\n");
4074
696M
            if (l == 0) {
4075
236M
                if (chunkSize > 0) {
4076
785k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4077
785k
                    chunkSize = 0;
4078
785k
                }
4079
236M
                xmlSBufAddReplChar(&buf);
4080
236M
                NEXTL(1);
4081
460M
            } else {
4082
460M
                chunkSize += l;
4083
460M
                NEXTL(l);
4084
460M
            }
4085
4086
696M
            inSpace = 0;
4087
696M
        } else if (c != '&') {
4088
372M
            if (c > 0x20) {
4089
130M
                if (c == quote)
4090
3.76M
                    break;
4091
4092
126M
                if (c == '<')
4093
1.42M
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4094
4095
126M
                chunkSize += 1;
4096
126M
                inSpace = 0;
4097
242M
            } else if (!IS_BYTE_CHAR(c)) {
4098
200M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4099
200M
                        "invalid character in attribute value\n");
4100
200M
                if (chunkSize > 0) {
4101
358k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4102
358k
                    chunkSize = 0;
4103
358k
                }
4104
200M
                xmlSBufAddReplChar(&buf);
4105
200M
                inSpace = 0;
4106
200M
            } else {
4107
                /* Whitespace */
4108
41.8M
                if ((normalize) && (inSpace)) {
4109
                    /* Skip char */
4110
2.17M
                    if (chunkSize > 0) {
4111
55.8k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4112
55.8k
                        chunkSize = 0;
4113
55.8k
                    }
4114
2.17M
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4115
39.6M
                } else if (c < 0x20) {
4116
                    /* Convert to space */
4117
35.0M
                    if (chunkSize > 0) {
4118
1.06M
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4119
1.06M
                        chunkSize = 0;
4120
1.06M
                    }
4121
4122
35.0M
                    xmlSBufAddCString(&buf, " ", 1);
4123
35.0M
                } else {
4124
4.59M
                    chunkSize += 1;
4125
4.59M
                }
4126
4127
41.8M
                inSpace = 1;
4128
4129
41.8M
                if ((c == 0xD) && (NXT(1) == 0xA))
4130
74.4k
                    CUR_PTR++;
4131
41.8M
            }
4132
4133
368M
            NEXTL(1);
4134
368M
        } else if (NXT(1) == '#') {
4135
266k
            int val;
4136
4137
266k
            if (chunkSize > 0) {
4138
168k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4139
168k
                chunkSize = 0;
4140
168k
            }
4141
4142
266k
            val = xmlParseCharRef(ctxt);
4143
266k
            if (val == 0)
4144
8.05k
                goto error;
4145
4146
258k
            if ((val == '&') && (!replaceEntities)) {
4147
                /*
4148
                 * The reparsing will be done in xmlNodeParseContent()
4149
                 * called from SAX2.c
4150
                 */
4151
20.1k
                xmlSBufAddCString(&buf, "&#38;", 5);
4152
20.1k
                inSpace = 0;
4153
238k
            } else if (val == ' ') {
4154
33.9k
                if ((normalize) && (inSpace))
4155
2.66k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4156
31.2k
                else
4157
31.2k
                    xmlSBufAddCString(&buf, " ", 1);
4158
33.9k
                inSpace = 1;
4159
204k
            } else {
4160
204k
                xmlSBufAddChar(&buf, val);
4161
204k
                inSpace = 0;
4162
204k
            }
4163
2.80M
        } else {
4164
2.80M
            const xmlChar *name;
4165
2.80M
            xmlEntityPtr ent;
4166
4167
2.80M
            if (chunkSize > 0) {
4168
1.06M
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4169
1.06M
                chunkSize = 0;
4170
1.06M
            }
4171
4172
2.80M
            name = xmlParseEntityRefInternal(ctxt);
4173
2.80M
            if (name == NULL) {
4174
                /*
4175
                 * Probably a literal '&' which wasn't escaped.
4176
                 * TODO: Handle gracefully in recovery mode.
4177
                 */
4178
351k
                continue;
4179
351k
            }
4180
4181
2.44M
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4182
2.44M
            if (ent == NULL)
4183
316k
                continue;
4184
4185
2.13M
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4186
329k
                if ((ent->content[0] == '&') && (!replaceEntities))
4187
50.0k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4188
279k
                else
4189
279k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4190
329k
                inSpace = 0;
4191
1.80M
            } else if (replaceEntities) {
4192
1.05M
                if (xmlExpandEntityInAttValue(ctxt, &buf,
4193
1.05M
                        ent->content, ent, normalize, &inSpace, ctxt->inputNr,
4194
1.05M
                        /* check */ 1) > 0)
4195
127k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4196
1.05M
            } else {
4197
751k
                if ((ent->flags & entFlags) != entFlags)
4198
7.68k
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4199
4200
751k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4201
755
                    ent->content[0] = 0;
4202
755
                    goto error;
4203
755
                }
4204
4205
                /*
4206
                 * Just output the reference
4207
                 */
4208
750k
                xmlSBufAddCString(&buf, "&", 1);
4209
750k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4210
750k
                xmlSBufAddCString(&buf, ";", 1);
4211
4212
750k
                inSpace = 0;
4213
750k
            }
4214
2.13M
  }
4215
1.07G
    }
4216
4217
3.76M
    if ((buf.mem == NULL) && (outFlags != NULL)) {
4218
2.90M
        ret = (xmlChar *) CUR_PTR - chunkSize;
4219
4220
2.90M
        if (attlen != NULL)
4221
2.90M
            *attlen = chunkSize;
4222
2.90M
        if ((normalize) && (inSpace) && (chunkSize > 0)) {
4223
1.90k
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4224
1.90k
            *attlen -= 1;
4225
1.90k
        }
4226
4227
        /* Report potential error */
4228
2.90M
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4229
2.90M
    } else {
4230
856k
        if (chunkSize > 0)
4231
646k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4232
4233
856k
        if ((normalize) && (inSpace) && (buf.size > 0)) {
4234
7.63k
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4235
7.63k
            buf.size--;
4236
7.63k
        }
4237
4238
856k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4239
856k
        attvalFlags |= XML_ATTVAL_ALLOC;
4240
4241
856k
        if (ret != NULL) {
4242
855k
            if (attlen != NULL)
4243
350k
                *attlen = buf.size;
4244
855k
        }
4245
856k
    }
4246
4247
3.76M
    if (outFlags != NULL)
4248
3.25M
        *outFlags = attvalFlags;
4249
4250
3.76M
    NEXTL(1);
4251
4252
3.76M
    return(ret);
4253
4254
90.4k
error:
4255
90.4k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4256
90.4k
    return(NULL);
4257
3.85M
}
4258
4259
/**
4260
 * Parse a value for an attribute
4261
 * Note: the parser won't do substitution of entities here, this
4262
 * will be handled later in #xmlStringGetNodeList
4263
 *
4264
 * @deprecated Internal function, don't use.
4265
 *
4266
 *     [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4267
 *                       "'" ([^<&'] | Reference)* "'"
4268
 *
4269
 * 3.3.3 Attribute-Value Normalization:
4270
 *
4271
 * Before the value of an attribute is passed to the application or
4272
 * checked for validity, the XML processor must normalize it as follows:
4273
 *
4274
 * - a character reference is processed by appending the referenced
4275
 *   character to the attribute value
4276
 * - an entity reference is processed by recursively processing the
4277
 *   replacement text of the entity
4278
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4279
 *   appending \#x20 to the normalized value, except that only a single
4280
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4281
 *   parsed entity or the literal entity value of an internal parsed entity
4282
 * - other characters are processed by appending them to the normalized value
4283
 *
4284
 * If the declared value is not CDATA, then the XML processor must further
4285
 * process the normalized attribute value by discarding any leading and
4286
 * trailing space (\#x20) characters, and by replacing sequences of space
4287
 * (\#x20) characters by a single space (\#x20) character.
4288
 * All attributes for which no declaration has been read should be treated
4289
 * by a non-validating parser as if declared CDATA.
4290
 *
4291
 * @param ctxt  an XML parser context
4292
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4293
 * caller.
4294
 */
4295
xmlChar *
4296
602k
xmlParseAttValue(xmlParserCtxt *ctxt) {
4297
602k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4298
602k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4299
602k
}
4300
4301
/**
4302
 * Parse an XML Literal
4303
 *
4304
 * @deprecated Internal function, don't use.
4305
 *
4306
 *     [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4307
 *
4308
 * @param ctxt  an XML parser context
4309
 * @returns the SystemLiteral parsed or NULL
4310
 */
4311
4312
xmlChar *
4313
235k
xmlParseSystemLiteral(xmlParserCtxt *ctxt) {
4314
235k
    xmlChar *buf = NULL;
4315
235k
    int len = 0;
4316
235k
    int size = XML_PARSER_BUFFER_SIZE;
4317
235k
    int cur, l;
4318
235k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4319
60.5k
                    XML_MAX_TEXT_LENGTH :
4320
235k
                    XML_MAX_NAME_LENGTH;
4321
235k
    xmlChar stop;
4322
4323
235k
    if (RAW == '"') {
4324
170k
        NEXT;
4325
170k
  stop = '"';
4326
170k
    } else if (RAW == '\'') {
4327
51.3k
        NEXT;
4328
51.3k
  stop = '\'';
4329
51.3k
    } else {
4330
13.3k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4331
13.3k
  return(NULL);
4332
13.3k
    }
4333
4334
221k
    buf = xmlMalloc(size);
4335
221k
    if (buf == NULL) {
4336
76
        xmlErrMemory(ctxt);
4337
76
  return(NULL);
4338
76
    }
4339
221k
    cur = xmlCurrentCharRecover(ctxt, &l);
4340
44.5M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4341
44.3M
  if (len + 5 >= size) {
4342
31.5k
      xmlChar *tmp;
4343
31.5k
            int newSize;
4344
4345
31.5k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4346
31.5k
            if (newSize < 0) {
4347
125
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4348
125
                xmlFree(buf);
4349
125
                return(NULL);
4350
125
            }
4351
31.3k
      tmp = xmlRealloc(buf, newSize);
4352
31.3k
      if (tmp == NULL) {
4353
23
          xmlFree(buf);
4354
23
    xmlErrMemory(ctxt);
4355
23
    return(NULL);
4356
23
      }
4357
31.3k
      buf = tmp;
4358
31.3k
            size = newSize;
4359
31.3k
  }
4360
44.3M
  COPY_BUF(buf, len, cur);
4361
44.3M
  NEXTL(l);
4362
44.3M
  cur = xmlCurrentCharRecover(ctxt, &l);
4363
44.3M
    }
4364
221k
    buf[len] = 0;
4365
221k
    if (!IS_CHAR(cur)) {
4366
14.6k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4367
206k
    } else {
4368
206k
  NEXT;
4369
206k
    }
4370
221k
    return(buf);
4371
221k
}
4372
4373
/**
4374
 * Parse an XML public literal
4375
 *
4376
 * @deprecated Internal function, don't use.
4377
 *
4378
 *     [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4379
 *
4380
 * @param ctxt  an XML parser context
4381
 * @returns the PubidLiteral parsed or NULL.
4382
 */
4383
4384
xmlChar *
4385
99.5k
xmlParsePubidLiteral(xmlParserCtxt *ctxt) {
4386
99.5k
    xmlChar *buf = NULL;
4387
99.5k
    int len = 0;
4388
99.5k
    int size = XML_PARSER_BUFFER_SIZE;
4389
99.5k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4390
22.8k
                    XML_MAX_TEXT_LENGTH :
4391
99.5k
                    XML_MAX_NAME_LENGTH;
4392
99.5k
    xmlChar cur;
4393
99.5k
    xmlChar stop;
4394
4395
99.5k
    if (RAW == '"') {
4396
62.9k
        NEXT;
4397
62.9k
  stop = '"';
4398
62.9k
    } else if (RAW == '\'') {
4399
31.3k
        NEXT;
4400
31.3k
  stop = '\'';
4401
31.3k
    } else {
4402
5.19k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4403
5.19k
  return(NULL);
4404
5.19k
    }
4405
94.3k
    buf = xmlMalloc(size);
4406
94.3k
    if (buf == NULL) {
4407
44
  xmlErrMemory(ctxt);
4408
44
  return(NULL);
4409
44
    }
4410
94.3k
    cur = CUR;
4411
2.91M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4412
2.82M
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4413
2.82M
  if (len + 1 >= size) {
4414
3.32k
      xmlChar *tmp;
4415
3.32k
            int newSize;
4416
4417
3.32k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4418
3.32k
            if (newSize < 0) {
4419
13
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4420
13
                xmlFree(buf);
4421
13
                return(NULL);
4422
13
            }
4423
3.31k
      tmp = xmlRealloc(buf, newSize);
4424
3.31k
      if (tmp == NULL) {
4425
16
    xmlErrMemory(ctxt);
4426
16
    xmlFree(buf);
4427
16
    return(NULL);
4428
16
      }
4429
3.29k
      buf = tmp;
4430
3.29k
            size = newSize;
4431
3.29k
  }
4432
2.82M
  buf[len++] = cur;
4433
2.82M
  NEXT;
4434
2.82M
  cur = CUR;
4435
2.82M
    }
4436
94.3k
    buf[len] = 0;
4437
94.3k
    if (cur != stop) {
4438
14.0k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4439
80.2k
    } else {
4440
80.2k
  NEXTL(1);
4441
80.2k
    }
4442
94.3k
    return(buf);
4443
94.3k
}
4444
4445
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4446
4447
/*
4448
 * used for the test in the inner loop of the char data testing
4449
 */
4450
static const unsigned char test_char_data[256] = {
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4456
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4457
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4458
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4459
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4460
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4461
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4462
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4463
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4464
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4465
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4466
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4467
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4468
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4469
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4470
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4471
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4472
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4474
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4475
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4476
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4477
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4478
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4479
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4482
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4483
};
4484
4485
static void
4486
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4487
14.3M
              int isBlank) {
4488
14.3M
    int checkBlanks;
4489
4490
14.3M
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4491
3.62M
        return;
4492
4493
10.7M
    checkBlanks = (!ctxt->keepBlanks) ||
4494
7.68M
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4495
4496
    /*
4497
     * Calling areBlanks with only parts of a text node
4498
     * is fundamentally broken, making the NOBLANKS option
4499
     * essentially unusable.
4500
     */
4501
10.7M
    if ((checkBlanks) &&
4502
3.08M
        (areBlanks(ctxt, buf, size, isBlank))) {
4503
75.6k
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4504
75.6k
            (ctxt->keepBlanks))
4505
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4506
10.6M
    } else {
4507
10.6M
        if (ctxt->sax->characters != NULL)
4508
10.6M
            ctxt->sax->characters(ctxt->userData, buf, size);
4509
4510
        /*
4511
         * The old code used to update this value for "complex" data
4512
         * even if checkBlanks was false. This was probably a bug.
4513
         */
4514
10.6M
        if ((checkBlanks) && (*ctxt->space == -1))
4515
378k
            *ctxt->space = -2;
4516
10.6M
    }
4517
10.7M
}
4518
4519
/**
4520
 * Parse character data. Always makes progress if the first char isn't
4521
 * '<' or '&'.
4522
 *
4523
 * The right angle bracket (>) may be represented using the string "&gt;",
4524
 * and must, for compatibility, be escaped using "&gt;" or a character
4525
 * reference when it appears in the string "]]>" in content, when that
4526
 * string is not marking the end of a CDATA section.
4527
 *
4528
 *     [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4529
 * @param ctxt  an XML parser context
4530
 * @param partial  buffer may contain partial UTF-8 sequences
4531
 */
4532
static void
4533
108M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4534
108M
    const xmlChar *in;
4535
108M
    int line = ctxt->input->line;
4536
108M
    int col = ctxt->input->col;
4537
108M
    int ccol;
4538
108M
    int terminate = 0;
4539
4540
108M
    GROW;
4541
    /*
4542
     * Accelerated common case where input don't need to be
4543
     * modified before passing it to the handler.
4544
     */
4545
108M
    in = ctxt->input->cur;
4546
108M
    do {
4547
110M
get_more_space:
4548
113M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4549
110M
        if (*in == 0xA) {
4550
27.2M
            do {
4551
27.2M
                ctxt->input->line++; ctxt->input->col = 1;
4552
27.2M
                in++;
4553
27.2M
            } while (*in == 0xA);
4554
1.16M
            goto get_more_space;
4555
1.16M
        }
4556
108M
        if (*in == '<') {
4557
1.65M
            while (in > ctxt->input->cur) {
4558
825k
                const xmlChar *tmp = ctxt->input->cur;
4559
825k
                size_t nbchar = in - tmp;
4560
4561
825k
                if (nbchar > XML_MAX_ITEMS)
4562
0
                    nbchar = XML_MAX_ITEMS;
4563
825k
                ctxt->input->cur += nbchar;
4564
4565
825k
                xmlCharacters(ctxt, tmp, nbchar, 1);
4566
825k
            }
4567
825k
            return;
4568
825k
        }
4569
4570
110M
get_more:
4571
110M
        ccol = ctxt->input->col;
4572
187M
        while (test_char_data[*in]) {
4573
77.2M
            in++;
4574
77.2M
            ccol++;
4575
77.2M
        }
4576
110M
        ctxt->input->col = ccol;
4577
110M
        if (*in == 0xA) {
4578
16.2M
            do {
4579
16.2M
                ctxt->input->line++; ctxt->input->col = 1;
4580
16.2M
                in++;
4581
16.2M
            } while (*in == 0xA);
4582
496k
            goto get_more;
4583
496k
        }
4584
109M
        if (*in == ']') {
4585
1.50M
            size_t avail = ctxt->input->end - in;
4586
4587
1.50M
            if (partial && avail < 2) {
4588
307
                terminate = 1;
4589
307
                goto invoke_callback;
4590
307
            }
4591
1.50M
            if (in[1] == ']') {
4592
1.17M
                if (partial && avail < 3) {
4593
1.56k
                    terminate = 1;
4594
1.56k
                    goto invoke_callback;
4595
1.56k
                }
4596
1.17M
                if (in[2] == '>')
4597
27.4k
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4598
1.17M
            }
4599
4600
1.50M
            in++;
4601
1.50M
            ctxt->input->col++;
4602
1.50M
            goto get_more;
4603
1.50M
        }
4604
4605
108M
invoke_callback:
4606
114M
        while (in > ctxt->input->cur) {
4607
6.39M
            const xmlChar *tmp = ctxt->input->cur;
4608
6.39M
            size_t nbchar = in - tmp;
4609
4610
6.39M
            if (nbchar > XML_MAX_ITEMS)
4611
0
                nbchar = XML_MAX_ITEMS;
4612
6.39M
            ctxt->input->cur += nbchar;
4613
4614
6.39M
            xmlCharacters(ctxt, tmp, nbchar, 0);
4615
4616
6.39M
            line = ctxt->input->line;
4617
6.39M
            col = ctxt->input->col;
4618
6.39M
        }
4619
108M
        ctxt->input->cur = in;
4620
108M
        if (*in == 0xD) {
4621
304k
            in++;
4622
304k
            if (*in == 0xA) {
4623
159k
                ctxt->input->cur = in;
4624
159k
                in++;
4625
159k
                ctxt->input->line++; ctxt->input->col = 1;
4626
159k
                continue; /* while */
4627
159k
            }
4628
145k
            in--;
4629
145k
        }
4630
107M
        if (*in == '<') {
4631
1.99M
            return;
4632
1.99M
        }
4633
105M
        if (*in == '&') {
4634
647k
            return;
4635
647k
        }
4636
105M
        if (terminate) {
4637
1.87k
            return;
4638
1.87k
        }
4639
105M
        SHRINK;
4640
105M
        GROW;
4641
105M
        in = ctxt->input->cur;
4642
105M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4643
105M
             (*in == 0x09) || (*in == 0x0a));
4644
105M
    ctxt->input->line = line;
4645
105M
    ctxt->input->col = col;
4646
105M
    xmlParseCharDataComplex(ctxt, partial);
4647
105M
}
4648
4649
/**
4650
 * Always makes progress if the first char isn't '<' or '&'.
4651
 *
4652
 * parse a CharData section.this is the fallback function
4653
 * of #xmlParseCharData when the parsing requires handling
4654
 * of non-ASCII characters.
4655
 *
4656
 * @param ctxt  an XML parser context
4657
 * @param partial  whether the input can end with truncated UTF-8
4658
 */
4659
static void
4660
105M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4661
105M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4662
105M
    int nbchar = 0;
4663
105M
    int cur, l;
4664
4665
105M
    cur = xmlCurrentCharRecover(ctxt, &l);
4666
404M
    while ((cur != '<') && /* checked */
4667
404M
           (cur != '&') &&
4668
403M
     (IS_CHAR(cur))) {
4669
299M
        if (cur == ']') {
4670
2.45M
            size_t avail = ctxt->input->end - ctxt->input->cur;
4671
4672
2.45M
            if (partial && avail < 2)
4673
562
                break;
4674
2.45M
            if (NXT(1) == ']') {
4675
1.98M
                if (partial && avail < 3)
4676
2.56k
                    break;
4677
1.98M
                if (NXT(2) == '>')
4678
154k
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4679
1.98M
            }
4680
2.45M
        }
4681
4682
299M
  COPY_BUF(buf, nbchar, cur);
4683
  /* move current position before possible calling of ctxt->sax->characters */
4684
299M
  NEXTL(l);
4685
299M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4686
2.43M
      buf[nbchar] = 0;
4687
4688
2.43M
            xmlCharacters(ctxt, buf, nbchar, 0);
4689
2.43M
      nbchar = 0;
4690
2.43M
            SHRINK;
4691
2.43M
  }
4692
299M
  cur = xmlCurrentCharRecover(ctxt, &l);
4693
299M
    }
4694
105M
    if (nbchar != 0) {
4695
4.74M
        buf[nbchar] = 0;
4696
4697
4.74M
        xmlCharacters(ctxt, buf, nbchar, 0);
4698
4.74M
    }
4699
    /*
4700
     * cur == 0 can mean
4701
     *
4702
     * - End of buffer.
4703
     * - An actual 0 character.
4704
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4705
     */
4706
105M
    if (ctxt->input->cur < ctxt->input->end) {
4707
105M
        if ((cur == 0) && (CUR != 0)) {
4708
7.90k
            if (partial == 0) {
4709
6.42k
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4710
6.42k
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4711
6.42k
                NEXTL(1);
4712
6.42k
            }
4713
105M
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4714
            /* Generate the error and skip the offending character */
4715
104M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4716
104M
                              "PCDATA invalid Char value %d\n", cur);
4717
104M
            NEXTL(l);
4718
104M
        }
4719
105M
    }
4720
105M
}
4721
4722
/**
4723
 * @deprecated Internal function, don't use.
4724
 * @param ctxt  an XML parser context
4725
 * @param cdata  unused
4726
 */
4727
void
4728
0
xmlParseCharData(xmlParserCtxt *ctxt, ATTRIBUTE_UNUSED int cdata) {
4729
0
    xmlParseCharDataInternal(ctxt, 0);
4730
0
}
4731
4732
/**
4733
 * Parse an External ID or a Public ID
4734
 *
4735
 * @deprecated Internal function, don't use.
4736
 *
4737
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4738
 * `'PUBLIC' S PubidLiteral S SystemLiteral`
4739
 *
4740
 *     [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4741
 *                       | 'PUBLIC' S PubidLiteral S SystemLiteral
4742
 *
4743
 *     [83] PublicID ::= 'PUBLIC' S PubidLiteral
4744
 *
4745
 * @param ctxt  an XML parser context
4746
 * @param publicId  a xmlChar** receiving PubidLiteral
4747
 * @param strict  indicate whether we should restrict parsing to only
4748
 *          production [75], see NOTE below
4749
 * @returns the function returns SystemLiteral and in the second
4750
 *                case publicID receives PubidLiteral, is strict is off
4751
 *                it is possible to return NULL and have publicID set.
4752
 */
4753
4754
xmlChar *
4755
382k
xmlParseExternalID(xmlParserCtxt *ctxt, xmlChar **publicId, int strict) {
4756
382k
    xmlChar *URI = NULL;
4757
4758
382k
    *publicId = NULL;
4759
382k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4760
149k
        SKIP(6);
4761
149k
  if (SKIP_BLANKS == 0) {
4762
8.21k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4763
8.21k
                     "Space required after 'SYSTEM'\n");
4764
8.21k
  }
4765
149k
  URI = xmlParseSystemLiteral(ctxt);
4766
149k
  if (URI == NULL) {
4767
3.62k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4768
3.62k
        }
4769
233k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4770
99.5k
        SKIP(6);
4771
99.5k
  if (SKIP_BLANKS == 0) {
4772
12.8k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4773
12.8k
        "Space required after 'PUBLIC'\n");
4774
12.8k
  }
4775
99.5k
  *publicId = xmlParsePubidLiteral(ctxt);
4776
99.5k
  if (*publicId == NULL) {
4777
5.26k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4778
5.26k
  }
4779
99.5k
  if (strict) {
4780
      /*
4781
       * We don't handle [83] so "S SystemLiteral" is required.
4782
       */
4783
81.5k
      if (SKIP_BLANKS == 0) {
4784
15.1k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4785
15.1k
      "Space required after the Public Identifier\n");
4786
15.1k
      }
4787
81.5k
  } else {
4788
      /*
4789
       * We handle [83] so we return immediately, if
4790
       * "S SystemLiteral" is not detected. We skip blanks if no
4791
             * system literal was found, but this is harmless since we must
4792
             * be at the end of a NotationDecl.
4793
       */
4794
18.0k
      if (SKIP_BLANKS == 0) return(NULL);
4795
8.07k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4796
8.07k
  }
4797
85.4k
  URI = xmlParseSystemLiteral(ctxt);
4798
85.4k
  if (URI == NULL) {
4799
9.92k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4800
9.92k
        }
4801
85.4k
    }
4802
368k
    return(URI);
4803
382k
}
4804
4805
/**
4806
 * Skip an XML (SGML) comment <!-- .... -->
4807
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4808
 *  must not occur within comments. "
4809
 * This is the slow routine in case the accelerator for ascii didn't work
4810
 *
4811
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4812
 * @param ctxt  an XML parser context
4813
 * @param buf  the already parsed part of the buffer
4814
 * @param len  number of bytes in the buffer
4815
 * @param size  allocated size of the buffer
4816
 */
4817
static void
4818
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4819
330k
                       size_t len, size_t size) {
4820
330k
    int q, ql;
4821
330k
    int r, rl;
4822
330k
    int cur, l;
4823
330k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4824
81.0k
                    XML_MAX_HUGE_LENGTH :
4825
330k
                    XML_MAX_TEXT_LENGTH;
4826
4827
330k
    if (buf == NULL) {
4828
62.6k
        len = 0;
4829
62.6k
  size = XML_PARSER_BUFFER_SIZE;
4830
62.6k
  buf = xmlMalloc(size);
4831
62.6k
  if (buf == NULL) {
4832
138
      xmlErrMemory(ctxt);
4833
138
      return;
4834
138
  }
4835
62.6k
    }
4836
330k
    q = xmlCurrentCharRecover(ctxt, &ql);
4837
330k
    if (q == 0)
4838
19.9k
        goto not_terminated;
4839
310k
    if (!IS_CHAR(q)) {
4840
6.25k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4841
6.25k
                          "xmlParseComment: invalid xmlChar value %d\n",
4842
6.25k
                    q);
4843
6.25k
  xmlFree (buf);
4844
6.25k
  return;
4845
6.25k
    }
4846
304k
    NEXTL(ql);
4847
304k
    r = xmlCurrentCharRecover(ctxt, &rl);
4848
304k
    if (r == 0)
4849
3.94k
        goto not_terminated;
4850
300k
    if (!IS_CHAR(r)) {
4851
4.93k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4852
4.93k
                          "xmlParseComment: invalid xmlChar value %d\n",
4853
4.93k
                    r);
4854
4.93k
  xmlFree (buf);
4855
4.93k
  return;
4856
4.93k
    }
4857
295k
    NEXTL(rl);
4858
295k
    cur = xmlCurrentCharRecover(ctxt, &l);
4859
295k
    if (cur == 0)
4860
3.96k
        goto not_terminated;
4861
72.8M
    while (IS_CHAR(cur) && /* checked */
4862
72.7M
           ((cur != '>') ||
4863
72.5M
      (r != '-') || (q != '-'))) {
4864
72.5M
  if ((r == '-') && (q == '-')) {
4865
3.11M
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4866
3.11M
  }
4867
72.5M
  if (len + 5 >= size) {
4868
126k
      xmlChar *tmp;
4869
126k
            int newSize;
4870
4871
126k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4872
126k
            if (newSize < 0) {
4873
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4874
0
                             "Comment too big found", NULL);
4875
0
                xmlFree (buf);
4876
0
                return;
4877
0
            }
4878
126k
      tmp = xmlRealloc(buf, newSize);
4879
126k
      if (tmp == NULL) {
4880
46
    xmlErrMemory(ctxt);
4881
46
    xmlFree(buf);
4882
46
    return;
4883
46
      }
4884
126k
      buf = tmp;
4885
126k
            size = newSize;
4886
126k
  }
4887
72.5M
  COPY_BUF(buf, len, q);
4888
4889
72.5M
  q = r;
4890
72.5M
  ql = rl;
4891
72.5M
  r = cur;
4892
72.5M
  rl = l;
4893
4894
72.5M
  NEXTL(l);
4895
72.5M
  cur = xmlCurrentCharRecover(ctxt, &l);
4896
4897
72.5M
    }
4898
291k
    buf[len] = 0;
4899
291k
    if (cur == 0) {
4900
36.1k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4901
36.1k
                       "Comment not terminated \n<!--%.50s\n", buf);
4902
255k
    } else if (!IS_CHAR(cur)) {
4903
8.21k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4904
8.21k
                          "xmlParseComment: invalid xmlChar value %d\n",
4905
8.21k
                    cur);
4906
247k
    } else {
4907
247k
        NEXT;
4908
247k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4909
247k
      (!ctxt->disableSAX))
4910
214k
      ctxt->sax->comment(ctxt->userData, buf);
4911
247k
    }
4912
291k
    xmlFree(buf);
4913
291k
    return;
4914
27.8k
not_terminated:
4915
27.8k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4916
27.8k
       "Comment not terminated\n", NULL);
4917
27.8k
    xmlFree(buf);
4918
27.8k
}
4919
4920
/**
4921
 * Parse an XML (SGML) comment. Always consumes '<!'.
4922
 *
4923
 * @deprecated Internal function, don't use.
4924
 *
4925
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4926
 *  must not occur within comments. "
4927
 *
4928
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4929
 * @param ctxt  an XML parser context
4930
 */
4931
void
4932
2.78M
xmlParseComment(xmlParserCtxt *ctxt) {
4933
2.78M
    xmlChar *buf = NULL;
4934
2.78M
    size_t size = XML_PARSER_BUFFER_SIZE;
4935
2.78M
    size_t len = 0;
4936
2.78M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4937
243k
                       XML_MAX_HUGE_LENGTH :
4938
2.78M
                       XML_MAX_TEXT_LENGTH;
4939
2.78M
    const xmlChar *in;
4940
2.78M
    size_t nbchar = 0;
4941
2.78M
    int ccol;
4942
4943
    /*
4944
     * Check that there is a comment right here.
4945
     */
4946
2.78M
    if ((RAW != '<') || (NXT(1) != '!'))
4947
0
        return;
4948
2.78M
    SKIP(2);
4949
2.78M
    if ((RAW != '-') || (NXT(1) != '-'))
4950
319
        return;
4951
2.78M
    SKIP(2);
4952
2.78M
    GROW;
4953
4954
    /*
4955
     * Accelerated common case where input don't need to be
4956
     * modified before passing it to the handler.
4957
     */
4958
2.78M
    in = ctxt->input->cur;
4959
2.79M
    do {
4960
2.79M
  if (*in == 0xA) {
4961
1.75M
      do {
4962
1.75M
    ctxt->input->line++; ctxt->input->col = 1;
4963
1.75M
    in++;
4964
1.75M
      } while (*in == 0xA);
4965
11.2k
  }
4966
7.66M
get_more:
4967
7.66M
        ccol = ctxt->input->col;
4968
36.9M
  while (((*in > '-') && (*in <= 0x7F)) ||
4969
14.2M
         ((*in >= 0x20) && (*in < '-')) ||
4970
29.2M
         (*in == 0x09)) {
4971
29.2M
        in++;
4972
29.2M
        ccol++;
4973
29.2M
  }
4974
7.66M
  ctxt->input->col = ccol;
4975
7.66M
  if (*in == 0xA) {
4976
3.03M
      do {
4977
3.03M
    ctxt->input->line++; ctxt->input->col = 1;
4978
3.03M
    in++;
4979
3.03M
      } while (*in == 0xA);
4980
135k
      goto get_more;
4981
135k
  }
4982
7.52M
  nbchar = in - ctxt->input->cur;
4983
  /*
4984
   * save current set of data
4985
   */
4986
7.52M
  if (nbchar > 0) {
4987
5.12M
            if (nbchar > maxLength - len) {
4988
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4989
0
                                  "Comment too big found", NULL);
4990
0
                xmlFree(buf);
4991
0
                return;
4992
0
            }
4993
5.12M
            if (buf == NULL) {
4994
511k
                if ((*in == '-') && (in[1] == '-'))
4995
239k
                    size = nbchar + 1;
4996
272k
                else
4997
272k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
4998
511k
                buf = xmlMalloc(size);
4999
511k
                if (buf == NULL) {
5000
85
                    xmlErrMemory(ctxt);
5001
85
                    return;
5002
85
                }
5003
511k
                len = 0;
5004
4.60M
            } else if (len + nbchar + 1 >= size) {
5005
117k
                xmlChar *new_buf;
5006
117k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5007
117k
                new_buf = xmlRealloc(buf, size);
5008
117k
                if (new_buf == NULL) {
5009
17
                    xmlErrMemory(ctxt);
5010
17
                    xmlFree(buf);
5011
17
                    return;
5012
17
                }
5013
117k
                buf = new_buf;
5014
117k
            }
5015
5.12M
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5016
5.12M
            len += nbchar;
5017
5.12M
            buf[len] = 0;
5018
5.12M
  }
5019
7.52M
  ctxt->input->cur = in;
5020
7.52M
  if (*in == 0xA) {
5021
0
      in++;
5022
0
      ctxt->input->line++; ctxt->input->col = 1;
5023
0
  }
5024
7.52M
  if (*in == 0xD) {
5025
146k
      in++;
5026
146k
      if (*in == 0xA) {
5027
37.1k
    ctxt->input->cur = in;
5028
37.1k
    in++;
5029
37.1k
    ctxt->input->line++; ctxt->input->col = 1;
5030
37.1k
    goto get_more;
5031
37.1k
      }
5032
109k
      in--;
5033
109k
  }
5034
7.48M
  SHRINK;
5035
7.48M
  GROW;
5036
7.48M
  in = ctxt->input->cur;
5037
7.48M
  if (*in == '-') {
5038
7.15M
      if (in[1] == '-') {
5039
5.28M
          if (in[2] == '>') {
5040
2.45M
        SKIP(3);
5041
2.45M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5042
2.45M
            (!ctxt->disableSAX)) {
5043
1.93M
      if (buf != NULL)
5044
192k
          ctxt->sax->comment(ctxt->userData, buf);
5045
1.74M
      else
5046
1.74M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5047
1.93M
        }
5048
2.45M
        if (buf != NULL)
5049
243k
            xmlFree(buf);
5050
2.45M
        return;
5051
2.45M
    }
5052
2.82M
    if (buf != NULL) {
5053
2.76M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5054
2.76M
                          "Double hyphen within comment: "
5055
2.76M
                                      "<!--%.50s\n",
5056
2.76M
              buf);
5057
2.76M
    } else
5058
64.1k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5059
64.1k
                          "Double hyphen within comment\n", NULL);
5060
2.82M
    in++;
5061
2.82M
    ctxt->input->col++;
5062
2.82M
      }
5063
4.69M
      in++;
5064
4.69M
      ctxt->input->col++;
5065
4.69M
      goto get_more;
5066
7.15M
  }
5067
7.48M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5068
330k
    xmlParseCommentComplex(ctxt, buf, len, size);
5069
330k
}
5070
5071
5072
/**
5073
 * Parse the name of a PI
5074
 *
5075
 * @deprecated Internal function, don't use.
5076
 *
5077
 *     [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5078
 *
5079
 * @param ctxt  an XML parser context
5080
 * @returns the PITarget name or NULL
5081
 */
5082
5083
const xmlChar *
5084
403k
xmlParsePITarget(xmlParserCtxt *ctxt) {
5085
403k
    const xmlChar *name;
5086
5087
403k
    name = xmlParseName(ctxt);
5088
403k
    if ((name != NULL) &&
5089
357k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5090
156k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5091
132k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5092
84.6k
  int i;
5093
84.6k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5094
80.3k
      (name[2] == 'l') && (name[3] == 0)) {
5095
71.6k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5096
71.6k
     "XML declaration allowed only at the start of the document\n");
5097
71.6k
      return(name);
5098
71.6k
  } else if (name[3] == 0) {
5099
4.63k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5100
4.63k
      return(name);
5101
4.63k
  }
5102
23.1k
  for (i = 0;;i++) {
5103
23.1k
      if (xmlW3CPIs[i] == NULL) break;
5104
16.3k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5105
1.55k
          return(name);
5106
16.3k
  }
5107
6.81k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5108
6.81k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5109
6.81k
          NULL, NULL);
5110
6.81k
    }
5111
325k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5112
6.02k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5113
6.02k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5114
6.02k
    }
5115
325k
    return(name);
5116
403k
}
5117
5118
#ifdef LIBXML_CATALOG_ENABLED
5119
/**
5120
 * Parse an XML Catalog Processing Instruction.
5121
 *
5122
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5123
 *
5124
 * Occurs only if allowed by the user and if happening in the Misc
5125
 * part of the document before any doctype information
5126
 * This will add the given catalog to the parsing context in order
5127
 * to be used if there is a resolution need further down in the document
5128
 *
5129
 * @param ctxt  an XML parser context
5130
 * @param catalog  the PI value string
5131
 */
5132
5133
static void
5134
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5135
0
    xmlChar *URL = NULL;
5136
0
    const xmlChar *tmp, *base;
5137
0
    xmlChar marker;
5138
5139
0
    tmp = catalog;
5140
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5141
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5142
0
  goto error;
5143
0
    tmp += 7;
5144
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5145
0
    if (*tmp != '=') {
5146
0
  return;
5147
0
    }
5148
0
    tmp++;
5149
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5150
0
    marker = *tmp;
5151
0
    if ((marker != '\'') && (marker != '"'))
5152
0
  goto error;
5153
0
    tmp++;
5154
0
    base = tmp;
5155
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5156
0
    if (*tmp == 0)
5157
0
  goto error;
5158
0
    URL = xmlStrndup(base, tmp - base);
5159
0
    tmp++;
5160
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5161
0
    if (*tmp != 0)
5162
0
  goto error;
5163
5164
0
    if (URL != NULL) {
5165
        /*
5166
         * Unfortunately, the catalog API doesn't report OOM errors.
5167
         * xmlGetLastError isn't very helpful since we don't know
5168
         * where the last error came from. We'd have to reset it
5169
         * before this call and restore it afterwards.
5170
         */
5171
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5172
0
  xmlFree(URL);
5173
0
    }
5174
0
    return;
5175
5176
0
error:
5177
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5178
0
            "Catalog PI syntax error: %s\n",
5179
0
      catalog, NULL);
5180
0
    if (URL != NULL)
5181
0
  xmlFree(URL);
5182
0
}
5183
#endif
5184
5185
/**
5186
 * Parse an XML Processing Instruction.
5187
 *
5188
 * @deprecated Internal function, don't use.
5189
 *
5190
 *     [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5191
 *
5192
 * The processing is transferred to SAX once parsed.
5193
 *
5194
 * @param ctxt  an XML parser context
5195
 */
5196
5197
void
5198
403k
xmlParsePI(xmlParserCtxt *ctxt) {
5199
403k
    xmlChar *buf = NULL;
5200
403k
    size_t len = 0;
5201
403k
    size_t size = XML_PARSER_BUFFER_SIZE;
5202
403k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5203
106k
                       XML_MAX_HUGE_LENGTH :
5204
403k
                       XML_MAX_TEXT_LENGTH;
5205
403k
    int cur, l;
5206
403k
    const xmlChar *target;
5207
5208
403k
    if ((RAW == '<') && (NXT(1) == '?')) {
5209
  /*
5210
   * this is a Processing Instruction.
5211
   */
5212
403k
  SKIP(2);
5213
5214
  /*
5215
   * Parse the target name and check for special support like
5216
   * namespace.
5217
   */
5218
403k
        target = xmlParsePITarget(ctxt);
5219
403k
  if (target != NULL) {
5220
357k
      if ((RAW == '?') && (NXT(1) == '>')) {
5221
162k
    SKIP(2);
5222
5223
    /*
5224
     * SAX: PI detected.
5225
     */
5226
162k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5227
158k
        (ctxt->sax->processingInstruction != NULL))
5228
158k
        ctxt->sax->processingInstruction(ctxt->userData,
5229
158k
                                         target, NULL);
5230
162k
    return;
5231
162k
      }
5232
195k
      buf = xmlMalloc(size);
5233
195k
      if (buf == NULL) {
5234
171
    xmlErrMemory(ctxt);
5235
171
    return;
5236
171
      }
5237
194k
      if (SKIP_BLANKS == 0) {
5238
80.0k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5239
80.0k
        "ParsePI: PI %s space expected\n", target);
5240
80.0k
      }
5241
194k
      cur = xmlCurrentCharRecover(ctxt, &l);
5242
25.1M
      while (IS_CHAR(cur) && /* checked */
5243
25.0M
       ((cur != '?') || (NXT(1) != '>'))) {
5244
24.9M
    if (len + 5 >= size) {
5245
63.1k
        xmlChar *tmp;
5246
63.1k
                    int newSize;
5247
5248
63.1k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5249
63.1k
                    if (newSize < 0) {
5250
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5251
0
                                          "PI %s too big found", target);
5252
0
                        xmlFree(buf);
5253
0
                        return;
5254
0
                    }
5255
63.1k
        tmp = xmlRealloc(buf, newSize);
5256
63.1k
        if (tmp == NULL) {
5257
53
      xmlErrMemory(ctxt);
5258
53
      xmlFree(buf);
5259
53
      return;
5260
53
        }
5261
63.1k
        buf = tmp;
5262
63.1k
                    size = newSize;
5263
63.1k
    }
5264
24.9M
    COPY_BUF(buf, len, cur);
5265
24.9M
    NEXTL(l);
5266
24.9M
    cur = xmlCurrentCharRecover(ctxt, &l);
5267
24.9M
      }
5268
194k
      buf[len] = 0;
5269
194k
      if (cur != '?') {
5270
58.6k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5271
58.6k
          "ParsePI: PI %s never end ...\n", target);
5272
136k
      } else {
5273
136k
    SKIP(2);
5274
5275
136k
#ifdef LIBXML_CATALOG_ENABLED
5276
136k
    if ((ctxt->inSubset == 0) &&
5277
93.1k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5278
12.8k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5279
5280
12.8k
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5281
11.0k
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5282
11.0k
       (allow == XML_CATA_ALLOW_ALL)))
5283
0
      xmlParseCatalogPI(ctxt, buf);
5284
12.8k
    }
5285
136k
#endif
5286
5287
    /*
5288
     * SAX: PI detected.
5289
     */
5290
136k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5291
110k
        (ctxt->sax->processingInstruction != NULL))
5292
110k
        ctxt->sax->processingInstruction(ctxt->userData,
5293
110k
                                         target, buf);
5294
136k
      }
5295
194k
      xmlFree(buf);
5296
194k
  } else {
5297
45.7k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5298
45.7k
  }
5299
403k
    }
5300
403k
}
5301
5302
/**
5303
 * Parse a notation declaration. Always consumes '<!'.
5304
 *
5305
 * @deprecated Internal function, don't use.
5306
 *
5307
 *     [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID)
5308
 *                           S? '>'
5309
 *
5310
 * Hence there is actually 3 choices:
5311
 *
5312
 *     'PUBLIC' S PubidLiteral
5313
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5314
 *     'SYSTEM' S SystemLiteral
5315
 *
5316
 * See the NOTE on #xmlParseExternalID.
5317
 *
5318
 * @param ctxt  an XML parser context
5319
 */
5320
5321
void
5322
45.1k
xmlParseNotationDecl(xmlParserCtxt *ctxt) {
5323
45.1k
    const xmlChar *name;
5324
45.1k
    xmlChar *Pubid;
5325
45.1k
    xmlChar *Systemid;
5326
5327
45.1k
    if ((CUR != '<') || (NXT(1) != '!'))
5328
0
        return;
5329
45.1k
    SKIP(2);
5330
5331
45.1k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5332
43.3k
#ifdef LIBXML_VALID_ENABLED
5333
43.3k
  int oldInputNr = ctxt->inputNr;
5334
43.3k
#endif
5335
5336
43.3k
  SKIP(8);
5337
43.3k
  if (SKIP_BLANKS_PE == 0) {
5338
2.38k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5339
2.38k
         "Space required after '<!NOTATION'\n");
5340
2.38k
      return;
5341
2.38k
  }
5342
5343
41.0k
        name = xmlParseName(ctxt);
5344
41.0k
  if (name == NULL) {
5345
3.56k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5346
3.56k
      return;
5347
3.56k
  }
5348
37.4k
  if (xmlStrchr(name, ':') != NULL) {
5349
3.57k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5350
3.57k
         "colons are forbidden from notation names '%s'\n",
5351
3.57k
         name, NULL, NULL);
5352
3.57k
  }
5353
37.4k
  if (SKIP_BLANKS_PE == 0) {
5354
2.14k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5355
2.14k
         "Space required after the NOTATION name'\n");
5356
2.14k
      return;
5357
2.14k
  }
5358
5359
  /*
5360
   * Parse the IDs.
5361
   */
5362
35.3k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5363
35.3k
  SKIP_BLANKS_PE;
5364
5365
35.3k
  if (RAW == '>') {
5366
24.1k
#ifdef LIBXML_VALID_ENABLED
5367
24.1k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5368
10
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5369
10
                           "Notation declaration doesn't start and stop"
5370
10
                                 " in the same entity\n",
5371
10
                                 NULL, NULL);
5372
10
      }
5373
24.1k
#endif
5374
24.1k
      NEXT;
5375
24.1k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5376
21.3k
    (ctxt->sax->notationDecl != NULL))
5377
21.3k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5378
24.1k
  } else {
5379
11.1k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5380
11.1k
  }
5381
35.3k
  if (Systemid != NULL) xmlFree(Systemid);
5382
35.3k
  if (Pubid != NULL) xmlFree(Pubid);
5383
35.3k
    }
5384
45.1k
}
5385
5386
/**
5387
 * Parse an entity declaration. Always consumes '<!'.
5388
 *
5389
 * @deprecated Internal function, don't use.
5390
 *
5391
 *     [70] EntityDecl ::= GEDecl | PEDecl
5392
 *
5393
 *     [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5394
 *
5395
 *     [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5396
 *
5397
 *     [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5398
 *
5399
 *     [74] PEDef ::= EntityValue | ExternalID
5400
 *
5401
 *     [76] NDataDecl ::= S 'NDATA' S Name
5402
 *
5403
 * [ VC: Notation Declared ]
5404
 * The Name must match the declared name of a notation.
5405
 *
5406
 * @param ctxt  an XML parser context
5407
 */
5408
5409
void
5410
513k
xmlParseEntityDecl(xmlParserCtxt *ctxt) {
5411
513k
    const xmlChar *name = NULL;
5412
513k
    xmlChar *value = NULL;
5413
513k
    xmlChar *URI = NULL, *literal = NULL;
5414
513k
    const xmlChar *ndata = NULL;
5415
513k
    int isParameter = 0;
5416
513k
    xmlChar *orig = NULL;
5417
5418
513k
    if ((CUR != '<') || (NXT(1) != '!'))
5419
0
        return;
5420
513k
    SKIP(2);
5421
5422
    /* GROW; done in the caller */
5423
513k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5424
511k
#ifdef LIBXML_VALID_ENABLED
5425
511k
  int oldInputNr = ctxt->inputNr;
5426
511k
#endif
5427
5428
511k
  SKIP(6);
5429
511k
  if (SKIP_BLANKS_PE == 0) {
5430
40.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5431
40.9k
         "Space required after '<!ENTITY'\n");
5432
40.9k
  }
5433
5434
511k
  if (RAW == '%') {
5435
193k
      NEXT;
5436
193k
      if (SKIP_BLANKS_PE == 0) {
5437
9.71k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5438
9.71k
             "Space required after '%%'\n");
5439
9.71k
      }
5440
193k
      isParameter = 1;
5441
193k
  }
5442
5443
511k
        name = xmlParseName(ctxt);
5444
511k
  if (name == NULL) {
5445
41.5k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5446
41.5k
                     "xmlParseEntityDecl: no name\n");
5447
41.5k
            return;
5448
41.5k
  }
5449
470k
  if (xmlStrchr(name, ':') != NULL) {
5450
10.4k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5451
10.4k
         "colons are forbidden from entities names '%s'\n",
5452
10.4k
         name, NULL, NULL);
5453
10.4k
  }
5454
470k
  if (SKIP_BLANKS_PE == 0) {
5455
38.5k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5456
38.5k
         "Space required after the entity name\n");
5457
38.5k
  }
5458
5459
  /*
5460
   * handle the various case of definitions...
5461
   */
5462
470k
  if (isParameter) {
5463
188k
      if ((RAW == '"') || (RAW == '\'')) {
5464
135k
          value = xmlParseEntityValue(ctxt, &orig);
5465
135k
    if (value) {
5466
130k
        if ((ctxt->sax != NULL) &&
5467
130k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5468
107k
      ctxt->sax->entityDecl(ctxt->userData, name,
5469
107k
                        XML_INTERNAL_PARAMETER_ENTITY,
5470
107k
            NULL, NULL, value);
5471
130k
    }
5472
135k
      } else {
5473
52.5k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5474
52.5k
    if ((URI == NULL) && (literal == NULL)) {
5475
5.20k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5476
5.20k
    }
5477
52.5k
    if (URI) {
5478
46.2k
                    if (xmlStrchr(URI, '#')) {
5479
2.39k
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5480
43.8k
                    } else {
5481
43.8k
                        if ((ctxt->sax != NULL) &&
5482
43.8k
                            (!ctxt->disableSAX) &&
5483
41.0k
                            (ctxt->sax->entityDecl != NULL))
5484
41.0k
                            ctxt->sax->entityDecl(ctxt->userData, name,
5485
41.0k
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5486
41.0k
                                        literal, URI, NULL);
5487
43.8k
                    }
5488
46.2k
    }
5489
52.5k
      }
5490
282k
  } else {
5491
282k
      if ((RAW == '"') || (RAW == '\'')) {
5492
215k
          value = xmlParseEntityValue(ctxt, &orig);
5493
215k
    if ((ctxt->sax != NULL) &&
5494
215k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5495
181k
        ctxt->sax->entityDecl(ctxt->userData, name,
5496
181k
        XML_INTERNAL_GENERAL_ENTITY,
5497
181k
        NULL, NULL, value);
5498
    /*
5499
     * For expat compatibility in SAX mode.
5500
     */
5501
215k
    if ((ctxt->myDoc == NULL) ||
5502
212k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5503
18.3k
        if (ctxt->myDoc == NULL) {
5504
3.16k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5505
3.16k
      if (ctxt->myDoc == NULL) {
5506
8
          xmlErrMemory(ctxt);
5507
8
          goto done;
5508
8
      }
5509
3.16k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5510
3.16k
        }
5511
18.3k
        if (ctxt->myDoc->intSubset == NULL) {
5512
3.16k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5513
3.16k
              BAD_CAST "fake", NULL, NULL);
5514
3.16k
                        if (ctxt->myDoc->intSubset == NULL) {
5515
10
                            xmlErrMemory(ctxt);
5516
10
                            goto done;
5517
10
                        }
5518
3.16k
                    }
5519
5520
18.3k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5521
18.3k
                    NULL, NULL, value);
5522
18.3k
    }
5523
215k
      } else {
5524
66.9k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5525
66.9k
    if ((URI == NULL) && (literal == NULL)) {
5526
24.9k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5527
24.9k
    }
5528
66.9k
    if (URI) {
5529
38.9k
                    if (xmlStrchr(URI, '#')) {
5530
3.14k
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5531
3.14k
                    }
5532
38.9k
    }
5533
66.9k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5534
23.6k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5535
23.6k
           "Space required before 'NDATA'\n");
5536
23.6k
    }
5537
66.9k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5538
6.71k
        SKIP(5);
5539
6.71k
        if (SKIP_BLANKS_PE == 0) {
5540
2.46k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5541
2.46k
               "Space required after 'NDATA'\n");
5542
2.46k
        }
5543
6.71k
        ndata = xmlParseName(ctxt);
5544
6.71k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5545
4.48k
            (ctxt->sax->unparsedEntityDecl != NULL))
5546
4.48k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5547
4.48k
            literal, URI, ndata);
5548
60.2k
    } else {
5549
60.2k
        if ((ctxt->sax != NULL) &&
5550
60.2k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5551
50.0k
      ctxt->sax->entityDecl(ctxt->userData, name,
5552
50.0k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5553
50.0k
            literal, URI, NULL);
5554
        /*
5555
         * For expat compatibility in SAX mode.
5556
         * assuming the entity replacement was asked for
5557
         */
5558
60.2k
        if ((ctxt->replaceEntities != 0) &&
5559
34.2k
      ((ctxt->myDoc == NULL) ||
5560
32.6k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5561
4.27k
      if (ctxt->myDoc == NULL) {
5562
1.60k
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5563
1.60k
          if (ctxt->myDoc == NULL) {
5564
7
              xmlErrMemory(ctxt);
5565
7
        goto done;
5566
7
          }
5567
1.59k
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5568
1.59k
      }
5569
5570
4.26k
      if (ctxt->myDoc->intSubset == NULL) {
5571
1.59k
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5572
1.59k
            BAD_CAST "fake", NULL, NULL);
5573
1.59k
                            if (ctxt->myDoc->intSubset == NULL) {
5574
8
                                xmlErrMemory(ctxt);
5575
8
                                goto done;
5576
8
                            }
5577
1.59k
                        }
5578
4.25k
      xmlSAX2EntityDecl(ctxt, name,
5579
4.25k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5580
4.25k
                  literal, URI, NULL);
5581
4.25k
        }
5582
60.2k
    }
5583
66.9k
      }
5584
282k
  }
5585
470k
  SKIP_BLANKS_PE;
5586
470k
  if (RAW != '>') {
5587
76.7k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5588
76.7k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5589
393k
  } else {
5590
393k
#ifdef LIBXML_VALID_ENABLED
5591
393k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5592
221
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5593
221
                           "Entity declaration doesn't start and stop in"
5594
221
                                 " the same entity\n",
5595
221
                                 NULL, NULL);
5596
221
      }
5597
393k
#endif
5598
393k
      NEXT;
5599
393k
  }
5600
470k
  if (orig != NULL) {
5601
      /*
5602
       * Ugly mechanism to save the raw entity value.
5603
       */
5604
335k
      xmlEntityPtr cur = NULL;
5605
5606
335k
      if (isParameter) {
5607
130k
          if ((ctxt->sax != NULL) &&
5608
130k
        (ctxt->sax->getParameterEntity != NULL))
5609
130k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5610
204k
      } else {
5611
204k
          if ((ctxt->sax != NULL) &&
5612
204k
        (ctxt->sax->getEntity != NULL))
5613
204k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5614
204k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5615
11.2k
        cur = xmlSAX2GetEntity(ctxt, name);
5616
11.2k
    }
5617
204k
      }
5618
335k
            if ((cur != NULL) && (cur->orig == NULL)) {
5619
111k
    cur->orig = orig;
5620
111k
                orig = NULL;
5621
111k
      }
5622
335k
  }
5623
5624
470k
done:
5625
470k
  if (value != NULL) xmlFree(value);
5626
470k
  if (URI != NULL) xmlFree(URI);
5627
470k
  if (literal != NULL) xmlFree(literal);
5628
470k
        if (orig != NULL) xmlFree(orig);
5629
470k
    }
5630
513k
}
5631
5632
/**
5633
 * Parse an attribute default declaration
5634
 *
5635
 * @deprecated Internal function, don't use.
5636
 *
5637
 *     [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5638
 *
5639
 * [ VC: Required Attribute ]
5640
 * if the default declaration is the keyword \#REQUIRED, then the
5641
 * attribute must be specified for all elements of the type in the
5642
 * attribute-list declaration.
5643
 *
5644
 * [ VC: Attribute Default Legal ]
5645
 * The declared default value must meet the lexical constraints of
5646
 * the declared attribute type c.f. #xmlValidateAttributeDecl
5647
 *
5648
 * [ VC: Fixed Attribute Default ]
5649
 * if an attribute has a default value declared with the \#FIXED
5650
 * keyword, instances of that attribute must match the default value.
5651
 *
5652
 * [ WFC: No < in Attribute Values ]
5653
 * handled in #xmlParseAttValue
5654
 *
5655
 * @param ctxt  an XML parser context
5656
 * @param value  Receive a possible fixed default value for the attribute
5657
 * @returns XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5658
 *          or XML_ATTRIBUTE_FIXED.
5659
 */
5660
5661
int
5662
454k
xmlParseDefaultDecl(xmlParserCtxt *ctxt, xmlChar **value) {
5663
454k
    int val;
5664
454k
    xmlChar *ret;
5665
5666
454k
    *value = NULL;
5667
454k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5668
27.7k
  SKIP(9);
5669
27.7k
  return(XML_ATTRIBUTE_REQUIRED);
5670
27.7k
    }
5671
426k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5672
101k
  SKIP(8);
5673
101k
  return(XML_ATTRIBUTE_IMPLIED);
5674
101k
    }
5675
324k
    val = XML_ATTRIBUTE_NONE;
5676
324k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5677
34.0k
  SKIP(6);
5678
34.0k
  val = XML_ATTRIBUTE_FIXED;
5679
34.0k
  if (SKIP_BLANKS_PE == 0) {
5680
11.1k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5681
11.1k
         "Space required after '#FIXED'\n");
5682
11.1k
  }
5683
34.0k
    }
5684
324k
    ret = xmlParseAttValue(ctxt);
5685
324k
    if (ret == NULL) {
5686
85.1k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5687
85.1k
           "Attribute default value declaration error\n");
5688
85.1k
    } else
5689
239k
        *value = ret;
5690
324k
    return(val);
5691
426k
}
5692
5693
/**
5694
 * Parse an Notation attribute type.
5695
 *
5696
 * @deprecated Internal function, don't use.
5697
 *
5698
 * Note: the leading 'NOTATION' S part has already being parsed...
5699
 *
5700
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5701
 *
5702
 * [ VC: Notation Attributes ]
5703
 * Values of this type must match one of the notation names included
5704
 * in the declaration; all notation names in the declaration must be declared.
5705
 *
5706
 * @param ctxt  an XML parser context
5707
 * @returns the notation attribute tree built while parsing
5708
 */
5709
5710
xmlEnumeration *
5711
10.2k
xmlParseNotationType(xmlParserCtxt *ctxt) {
5712
10.2k
    const xmlChar *name;
5713
10.2k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5714
5715
10.2k
    if (RAW != '(') {
5716
1.70k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5717
1.70k
  return(NULL);
5718
1.70k
    }
5719
12.1k
    do {
5720
12.1k
        NEXT;
5721
12.1k
  SKIP_BLANKS_PE;
5722
12.1k
        name = xmlParseName(ctxt);
5723
12.1k
  if (name == NULL) {
5724
1.42k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5725
1.42k
         "Name expected in NOTATION declaration\n");
5726
1.42k
            xmlFreeEnumeration(ret);
5727
1.42k
      return(NULL);
5728
1.42k
  }
5729
10.7k
        tmp = NULL;
5730
10.7k
#ifdef LIBXML_VALID_ENABLED
5731
10.7k
        if (ctxt->validate) {
5732
4.04k
            tmp = ret;
5733
10.4k
            while (tmp != NULL) {
5734
7.40k
                if (xmlStrEqual(name, tmp->name)) {
5735
995
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5736
995
              "standalone: attribute notation value token %s duplicated\n",
5737
995
                                     name, NULL);
5738
995
                    if (!xmlDictOwns(ctxt->dict, name))
5739
0
                        xmlFree((xmlChar *) name);
5740
995
                    break;
5741
995
                }
5742
6.40k
                tmp = tmp->next;
5743
6.40k
            }
5744
4.04k
        }
5745
10.7k
#endif /* LIBXML_VALID_ENABLED */
5746
10.7k
  if (tmp == NULL) {
5747
9.72k
      cur = xmlCreateEnumeration(name);
5748
9.72k
      if (cur == NULL) {
5749
33
                xmlErrMemory(ctxt);
5750
33
                xmlFreeEnumeration(ret);
5751
33
                return(NULL);
5752
33
            }
5753
9.69k
      if (last == NULL) ret = last = cur;
5754
2.61k
      else {
5755
2.61k
    last->next = cur;
5756
2.61k
    last = cur;
5757
2.61k
      }
5758
9.69k
  }
5759
10.6k
  SKIP_BLANKS_PE;
5760
10.6k
    } while (RAW == '|');
5761
7.05k
    if (RAW != ')') {
5762
2.32k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5763
2.32k
        xmlFreeEnumeration(ret);
5764
2.32k
  return(NULL);
5765
2.32k
    }
5766
4.72k
    NEXT;
5767
4.72k
    return(ret);
5768
7.05k
}
5769
5770
/**
5771
 * Parse an Enumeration attribute type.
5772
 *
5773
 * @deprecated Internal function, don't use.
5774
 *
5775
 *     [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5776
 *
5777
 * [ VC: Enumeration ]
5778
 * Values of this type must match one of the Nmtoken tokens in
5779
 * the declaration
5780
 *
5781
 * @param ctxt  an XML parser context
5782
 * @returns the enumeration attribute tree built while parsing
5783
 */
5784
5785
xmlEnumeration *
5786
108k
xmlParseEnumerationType(xmlParserCtxt *ctxt) {
5787
108k
    xmlChar *name;
5788
108k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5789
5790
108k
    if (RAW != '(') {
5791
14.8k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5792
14.8k
  return(NULL);
5793
14.8k
    }
5794
133k
    do {
5795
133k
        NEXT;
5796
133k
  SKIP_BLANKS_PE;
5797
133k
        name = xmlParseNmtoken(ctxt);
5798
133k
  if (name == NULL) {
5799
1.57k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5800
1.57k
      return(ret);
5801
1.57k
  }
5802
131k
        tmp = NULL;
5803
131k
#ifdef LIBXML_VALID_ENABLED
5804
131k
        if (ctxt->validate) {
5805
47.1k
            tmp = ret;
5806
99.9k
            while (tmp != NULL) {
5807
54.1k
                if (xmlStrEqual(name, tmp->name)) {
5808
1.36k
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5809
1.36k
              "standalone: attribute enumeration value token %s duplicated\n",
5810
1.36k
                                     name, NULL);
5811
1.36k
                    if (!xmlDictOwns(ctxt->dict, name))
5812
1.36k
                        xmlFree(name);
5813
1.36k
                    break;
5814
1.36k
                }
5815
52.8k
                tmp = tmp->next;
5816
52.8k
            }
5817
47.1k
        }
5818
131k
#endif /* LIBXML_VALID_ENABLED */
5819
131k
  if (tmp == NULL) {
5820
130k
      cur = xmlCreateEnumeration(name);
5821
130k
      if (!xmlDictOwns(ctxt->dict, name))
5822
130k
    xmlFree(name);
5823
130k
      if (cur == NULL) {
5824
73
                xmlErrMemory(ctxt);
5825
73
                xmlFreeEnumeration(ret);
5826
73
                return(NULL);
5827
73
            }
5828
130k
      if (last == NULL) ret = last = cur;
5829
38.0k
      else {
5830
38.0k
    last->next = cur;
5831
38.0k
    last = cur;
5832
38.0k
      }
5833
130k
  }
5834
131k
  SKIP_BLANKS_PE;
5835
131k
    } while (RAW == '|');
5836
92.0k
    if (RAW != ')') {
5837
5.08k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5838
5.08k
  return(ret);
5839
5.08k
    }
5840
86.9k
    NEXT;
5841
86.9k
    return(ret);
5842
92.0k
}
5843
5844
/**
5845
 * Parse an Enumerated attribute type.
5846
 *
5847
 * @deprecated Internal function, don't use.
5848
 *
5849
 *     [57] EnumeratedType ::= NotationType | Enumeration
5850
 *
5851
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5852
 *
5853
 * @param ctxt  an XML parser context
5854
 * @param tree  the enumeration tree built while parsing
5855
 * @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5856
 */
5857
5858
int
5859
121k
xmlParseEnumeratedType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5860
121k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5861
13.3k
  SKIP(8);
5862
13.3k
  if (SKIP_BLANKS_PE == 0) {
5863
3.14k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5864
3.14k
         "Space required after 'NOTATION'\n");
5865
3.14k
      return(0);
5866
3.14k
  }
5867
10.2k
  *tree = xmlParseNotationType(ctxt);
5868
10.2k
  if (*tree == NULL) return(0);
5869
4.72k
  return(XML_ATTRIBUTE_NOTATION);
5870
10.2k
    }
5871
108k
    *tree = xmlParseEnumerationType(ctxt);
5872
108k
    if (*tree == NULL) return(0);
5873
92.1k
    return(XML_ATTRIBUTE_ENUMERATION);
5874
108k
}
5875
5876
/**
5877
 * Parse the Attribute list def for an element
5878
 *
5879
 * @deprecated Internal function, don't use.
5880
 *
5881
 *     [54] AttType ::= StringType | TokenizedType | EnumeratedType
5882
 *
5883
 *     [55] StringType ::= 'CDATA'
5884
 *
5885
 *     [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5886
 *                            'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5887
 *
5888
 * Validity constraints for attribute values syntax are checked in
5889
 * #xmlValidateAttributeValue
5890
 *
5891
 * [ VC: ID ]
5892
 * Values of type ID must match the Name production. A name must not
5893
 * appear more than once in an XML document as a value of this type;
5894
 * i.e., ID values must uniquely identify the elements which bear them.
5895
 *
5896
 * [ VC: One ID per Element Type ]
5897
 * No element type may have more than one ID attribute specified.
5898
 *
5899
 * [ VC: ID Attribute Default ]
5900
 * An ID attribute must have a declared default of \#IMPLIED or \#REQUIRED.
5901
 *
5902
 * [ VC: IDREF ]
5903
 * Values of type IDREF must match the Name production, and values
5904
 * of type IDREFS must match Names; each IDREF Name must match the value
5905
 * of an ID attribute on some element in the XML document; i.e. IDREF
5906
 * values must match the value of some ID attribute.
5907
 *
5908
 * [ VC: Entity Name ]
5909
 * Values of type ENTITY must match the Name production, values
5910
 * of type ENTITIES must match Names; each Entity Name must match the
5911
 * name of an unparsed entity declared in the DTD.
5912
 *
5913
 * [ VC: Name Token ]
5914
 * Values of type NMTOKEN must match the Nmtoken production; values
5915
 * of type NMTOKENS must match Nmtokens.
5916
 *
5917
 * @param ctxt  an XML parser context
5918
 * @param tree  the enumeration tree built while parsing
5919
 * @returns the attribute type
5920
 */
5921
int
5922
514k
xmlParseAttributeType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5923
514k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5924
80.2k
  SKIP(5);
5925
80.2k
  return(XML_ATTRIBUTE_CDATA);
5926
433k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5927
36.6k
  SKIP(6);
5928
36.6k
  return(XML_ATTRIBUTE_IDREFS);
5929
397k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5930
15.6k
  SKIP(5);
5931
15.6k
  return(XML_ATTRIBUTE_IDREF);
5932
381k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5933
82.1k
        SKIP(2);
5934
82.1k
  return(XML_ATTRIBUTE_ID);
5935
299k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5936
70.6k
  SKIP(6);
5937
70.6k
  return(XML_ATTRIBUTE_ENTITY);
5938
228k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5939
24.1k
  SKIP(8);
5940
24.1k
  return(XML_ATTRIBUTE_ENTITIES);
5941
204k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5942
45.1k
  SKIP(8);
5943
45.1k
  return(XML_ATTRIBUTE_NMTOKENS);
5944
159k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5945
37.5k
  SKIP(7);
5946
37.5k
  return(XML_ATTRIBUTE_NMTOKEN);
5947
37.5k
     }
5948
121k
     return(xmlParseEnumeratedType(ctxt, tree));
5949
514k
}
5950
5951
/**
5952
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5953
 *
5954
 * @deprecated Internal function, don't use.
5955
 *
5956
 *     [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5957
 *
5958
 *     [53] AttDef ::= S Name S AttType S DefaultDecl
5959
 * @param ctxt  an XML parser context
5960
 */
5961
void
5962
437k
xmlParseAttributeListDecl(xmlParserCtxt *ctxt) {
5963
437k
    const xmlChar *elemName;
5964
437k
    const xmlChar *attrName;
5965
437k
    xmlEnumerationPtr tree;
5966
5967
437k
    if ((CUR != '<') || (NXT(1) != '!'))
5968
0
        return;
5969
437k
    SKIP(2);
5970
5971
437k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5972
435k
#ifdef LIBXML_VALID_ENABLED
5973
435k
  int oldInputNr = ctxt->inputNr;
5974
435k
#endif
5975
5976
435k
  SKIP(7);
5977
435k
  if (SKIP_BLANKS_PE == 0) {
5978
55.0k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5979
55.0k
                     "Space required after '<!ATTLIST'\n");
5980
55.0k
  }
5981
435k
        elemName = xmlParseName(ctxt);
5982
435k
  if (elemName == NULL) {
5983
18.7k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5984
18.7k
         "ATTLIST: no name for Element\n");
5985
18.7k
      return;
5986
18.7k
  }
5987
417k
  SKIP_BLANKS_PE;
5988
417k
  GROW;
5989
780k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
5990
583k
      int type;
5991
583k
      int def;
5992
583k
      xmlChar *defaultValue = NULL;
5993
5994
583k
      GROW;
5995
583k
            tree = NULL;
5996
583k
      attrName = xmlParseName(ctxt);
5997
583k
      if (attrName == NULL) {
5998
50.0k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5999
50.0k
             "ATTLIST: no name for Attribute\n");
6000
50.0k
    break;
6001
50.0k
      }
6002
533k
      GROW;
6003
533k
      if (SKIP_BLANKS_PE == 0) {
6004
19.3k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6005
19.3k
            "Space required after the attribute name\n");
6006
19.3k
    break;
6007
19.3k
      }
6008
6009
514k
      type = xmlParseAttributeType(ctxt, &tree);
6010
514k
      if (type <= 0) {
6011
25.0k
          break;
6012
25.0k
      }
6013
6014
489k
      GROW;
6015
489k
      if (SKIP_BLANKS_PE == 0) {
6016
34.5k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6017
34.5k
             "Space required after the attribute type\n");
6018
34.5k
          if (tree != NULL)
6019
5.88k
        xmlFreeEnumeration(tree);
6020
34.5k
    break;
6021
34.5k
      }
6022
6023
454k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6024
454k
      if (def <= 0) {
6025
0
                if (defaultValue != NULL)
6026
0
        xmlFree(defaultValue);
6027
0
          if (tree != NULL)
6028
0
        xmlFreeEnumeration(tree);
6029
0
          break;
6030
0
      }
6031
454k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6032
215k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6033
6034
454k
      GROW;
6035
454k
            if (RAW != '>') {
6036
291k
    if (SKIP_BLANKS_PE == 0) {
6037
90.9k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6038
90.9k
      "Space required after the attribute default value\n");
6039
90.9k
        if (defaultValue != NULL)
6040
8.19k
      xmlFree(defaultValue);
6041
90.9k
        if (tree != NULL)
6042
8.86k
      xmlFreeEnumeration(tree);
6043
90.9k
        break;
6044
90.9k
    }
6045
291k
      }
6046
363k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6047
335k
    (ctxt->sax->attributeDecl != NULL))
6048
335k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6049
335k
                          type, def, defaultValue, tree);
6050
28.3k
      else if (tree != NULL)
6051
10.7k
    xmlFreeEnumeration(tree);
6052
6053
363k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6054
206k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6055
206k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6056
206k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6057
206k
      }
6058
363k
      if (ctxt->sax2) {
6059
314k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6060
314k
      }
6061
363k
      if (defaultValue != NULL)
6062
231k
          xmlFree(defaultValue);
6063
363k
      GROW;
6064
363k
  }
6065
417k
  if (RAW == '>') {
6066
205k
#ifdef LIBXML_VALID_ENABLED
6067
205k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6068
379
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6069
379
                                 "Attribute list declaration doesn't start and"
6070
379
                                 " stop in the same entity\n",
6071
379
                                 NULL, NULL);
6072
379
      }
6073
205k
#endif
6074
205k
      NEXT;
6075
205k
  }
6076
417k
    }
6077
437k
}
6078
6079
/**
6080
 * Handle PEs and check that we don't pop the entity that started
6081
 * a balanced group.
6082
 *
6083
 * @param ctxt  parser context
6084
 * @param openInputNr  input nr of the entity with opening '('
6085
 */
6086
static void
6087
3.38M
xmlSkipBlankCharsPEBalanced(xmlParserCtxt *ctxt, int openInputNr) {
6088
3.38M
    SKIP_BLANKS;
6089
3.38M
    GROW;
6090
6091
3.38M
    (void) openInputNr;
6092
6093
3.38M
    if (!PARSER_EXTERNAL(ctxt) && !PARSER_IN_PE(ctxt))
6094
2.80M
        return;
6095
6096
641k
    while (!PARSER_STOPPED(ctxt)) {
6097
638k
        if (ctxt->input->cur >= ctxt->input->end) {
6098
26.8k
#ifdef LIBXML_VALID_ENABLED
6099
26.8k
            if ((ctxt->validate) && (ctxt->inputNr <= openInputNr)) {
6100
3.66k
                xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6101
3.66k
                                 "Element content declaration doesn't start "
6102
3.66k
                                 "and stop in the same entity\n",
6103
3.66k
                                 NULL, NULL);
6104
3.66k
            }
6105
26.8k
#endif
6106
26.8k
            if (PARSER_IN_PE(ctxt))
6107
26.0k
                xmlPopPE(ctxt);
6108
801
            else
6109
801
                break;
6110
611k
        } else if (RAW == '%') {
6111
37.9k
            xmlParsePERefInternal(ctxt, 0);
6112
574k
        } else {
6113
574k
            break;
6114
574k
        }
6115
6116
64.0k
        SKIP_BLANKS;
6117
64.0k
        GROW;
6118
64.0k
    }
6119
577k
}
6120
6121
/**
6122
 * Parse the declaration for a Mixed Element content
6123
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6124
 *
6125
 * @deprecated Internal function, don't use.
6126
 *
6127
 *     [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6128
 *                    '(' S? '#PCDATA' S? ')'
6129
 *
6130
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6131
 *
6132
 * [ VC: No Duplicate Types ]
6133
 * The same name must not appear more than once in a single
6134
 * mixed-content declaration.
6135
 *
6136
 * @param ctxt  an XML parser context
6137
 * @param openInputNr  the input used for the current entity, needed for
6138
 * boundary checks
6139
 * @returns the list of the xmlElementContent describing the element choices
6140
 */
6141
xmlElementContent *
6142
41.0k
xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int openInputNr) {
6143
41.0k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6144
41.0k
    const xmlChar *elem = NULL;
6145
6146
41.0k
    GROW;
6147
41.0k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6148
41.0k
  SKIP(7);
6149
41.0k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6150
41.0k
  if (RAW == ')') {
6151
23.2k
#ifdef LIBXML_VALID_ENABLED
6152
23.2k
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6153
13
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6154
13
                                 "Element content declaration doesn't start "
6155
13
                                 "and stop in the same entity\n",
6156
13
                                 NULL, NULL);
6157
13
      }
6158
23.2k
#endif
6159
23.2k
      NEXT;
6160
23.2k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6161
23.2k
      if (ret == NULL)
6162
23
                goto mem_error;
6163
23.1k
      if (RAW == '*') {
6164
1.88k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6165
1.88k
    NEXT;
6166
1.88k
      }
6167
23.1k
      return(ret);
6168
23.2k
  }
6169
17.8k
  if ((RAW == '(') || (RAW == '|')) {
6170
14.8k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6171
14.8k
      if (ret == NULL)
6172
17
                goto mem_error;
6173
14.8k
  }
6174
103k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6175
86.7k
      NEXT;
6176
86.7k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6177
86.7k
            if (n == NULL)
6178
26
                goto mem_error;
6179
86.6k
      if (elem == NULL) {
6180
14.5k
    n->c1 = cur;
6181
14.5k
    if (cur != NULL)
6182
14.5k
        cur->parent = n;
6183
14.5k
    ret = cur = n;
6184
72.1k
      } else {
6185
72.1k
          cur->c2 = n;
6186
72.1k
    n->parent = cur;
6187
72.1k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6188
72.1k
                if (n->c1 == NULL)
6189
21
                    goto mem_error;
6190
72.1k
    n->c1->parent = n;
6191
72.1k
    cur = n;
6192
72.1k
      }
6193
86.6k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6194
86.6k
      elem = xmlParseName(ctxt);
6195
86.6k
      if (elem == NULL) {
6196
1.12k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6197
1.12k
      "xmlParseElementMixedContentDecl : Name expected\n");
6198
1.12k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6199
1.12k
    return(NULL);
6200
1.12k
      }
6201
85.5k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6202
85.5k
  }
6203
16.6k
  if ((RAW == ')') && (NXT(1) == '*')) {
6204
11.2k
      if (elem != NULL) {
6205
11.2k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6206
11.2k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6207
11.2k
    if (cur->c2 == NULL)
6208
23
                    goto mem_error;
6209
11.1k
    cur->c2->parent = cur;
6210
11.1k
            }
6211
11.1k
            if (ret != NULL)
6212
11.1k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6213
11.1k
#ifdef LIBXML_VALID_ENABLED
6214
11.1k
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6215
6
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6216
6
                                 "Element content declaration doesn't start "
6217
6
                                 "and stop in the same entity\n",
6218
6
                                 NULL, NULL);
6219
6
      }
6220
11.1k
#endif
6221
11.1k
      SKIP(2);
6222
11.1k
  } else {
6223
5.42k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6224
5.42k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6225
5.42k
      return(NULL);
6226
5.42k
  }
6227
6228
16.6k
    } else {
6229
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6230
0
    }
6231
11.1k
    return(ret);
6232
6233
110
mem_error:
6234
110
    xmlErrMemory(ctxt);
6235
110
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6236
110
    return(NULL);
6237
41.0k
}
6238
6239
/**
6240
 * Parse the declaration for a Mixed Element content
6241
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6242
 *
6243
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6244
 *
6245
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6246
 *
6247
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6248
 *
6249
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6250
 *
6251
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6252
 * TODO Parameter-entity replacement text must be properly nested
6253
 *  with parenthesized groups. That is to say, if either of the
6254
 *  opening or closing parentheses in a choice, seq, or Mixed
6255
 *  construct is contained in the replacement text for a parameter
6256
 *  entity, both must be contained in the same replacement text. For
6257
 *  interoperability, if a parameter-entity reference appears in a
6258
 *  choice, seq, or Mixed construct, its replacement text should not
6259
 *  be empty, and neither the first nor last non-blank character of
6260
 *  the replacement text should be a connector (| or ,).
6261
 *
6262
 * @param ctxt  an XML parser context
6263
 * @param openInputNr  the input used for the current entity, needed for
6264
 * boundary checks
6265
 * @param depth  the level of recursion
6266
 * @returns the tree of xmlElementContent describing the element
6267
 *          hierarchy.
6268
 */
6269
static xmlElementContentPtr
6270
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int openInputNr,
6271
382k
                                       int depth) {
6272
382k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6273
382k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6274
382k
    const xmlChar *elem;
6275
382k
    xmlChar type = 0;
6276
6277
382k
    if (depth > maxDepth) {
6278
26
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6279
26
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6280
26
                "use XML_PARSE_HUGE\n", depth);
6281
26
  return(NULL);
6282
26
    }
6283
382k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6284
382k
    if (RAW == '(') {
6285
235k
        int newInputNr = ctxt->inputNr;
6286
6287
        /* Recurse on first child */
6288
235k
  NEXT;
6289
235k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6290
235k
                                                           depth + 1);
6291
235k
        if (cur == NULL)
6292
205k
            return(NULL);
6293
235k
    } else {
6294
147k
  elem = xmlParseName(ctxt);
6295
147k
  if (elem == NULL) {
6296
5.79k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6297
5.79k
      return(NULL);
6298
5.79k
  }
6299
141k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6300
141k
  if (cur == NULL) {
6301
133
      xmlErrMemory(ctxt);
6302
133
      return(NULL);
6303
133
  }
6304
141k
  GROW;
6305
141k
  if (RAW == '?') {
6306
12.0k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6307
12.0k
      NEXT;
6308
129k
  } else if (RAW == '*') {
6309
13.8k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6310
13.8k
      NEXT;
6311
115k
  } else if (RAW == '+') {
6312
7.54k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6313
7.54k
      NEXT;
6314
107k
  } else {
6315
107k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6316
107k
  }
6317
141k
  GROW;
6318
141k
    }
6319
1.41M
    while (!PARSER_STOPPED(ctxt)) {
6320
1.40M
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6321
1.40M
        if (RAW == ')')
6322
130k
            break;
6323
        /*
6324
   * Each loop we parse one separator and one element.
6325
   */
6326
1.27M
        if (RAW == ',') {
6327
906k
      if (type == 0) type = CUR;
6328
6329
      /*
6330
       * Detect "Name | Name , Name" error
6331
       */
6332
874k
      else if (type != CUR) {
6333
209
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6334
209
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6335
209
                      type);
6336
209
    if ((last != NULL) && (last != ret))
6337
209
        xmlFreeDocElementContent(ctxt->myDoc, last);
6338
209
    if (ret != NULL)
6339
209
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6340
209
    return(NULL);
6341
209
      }
6342
906k
      NEXT;
6343
6344
906k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6345
906k
      if (op == NULL) {
6346
56
                xmlErrMemory(ctxt);
6347
56
    if ((last != NULL) && (last != ret))
6348
27
        xmlFreeDocElementContent(ctxt->myDoc, last);
6349
56
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6350
56
    return(NULL);
6351
56
      }
6352
906k
      if (last == NULL) {
6353
32.2k
    op->c1 = ret;
6354
32.2k
    if (ret != NULL)
6355
32.2k
        ret->parent = op;
6356
32.2k
    ret = cur = op;
6357
874k
      } else {
6358
874k
          cur->c2 = op;
6359
874k
    if (op != NULL)
6360
874k
        op->parent = cur;
6361
874k
    op->c1 = last;
6362
874k
    if (last != NULL)
6363
874k
        last->parent = op;
6364
874k
    cur =op;
6365
874k
    last = NULL;
6366
874k
      }
6367
906k
  } else if (RAW == '|') {
6368
356k
      if (type == 0) type = CUR;
6369
6370
      /*
6371
       * Detect "Name , Name | Name" error
6372
       */
6373
287k
      else if (type != CUR) {
6374
211
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6375
211
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6376
211
          type);
6377
211
    if ((last != NULL) && (last != ret))
6378
211
        xmlFreeDocElementContent(ctxt->myDoc, last);
6379
211
    if (ret != NULL)
6380
211
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6381
211
    return(NULL);
6382
211
      }
6383
355k
      NEXT;
6384
6385
355k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6386
355k
      if (op == NULL) {
6387
46
                xmlErrMemory(ctxt);
6388
46
    if ((last != NULL) && (last != ret))
6389
18
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
46
    if (ret != NULL)
6391
46
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
46
    return(NULL);
6393
46
      }
6394
355k
      if (last == NULL) {
6395
68.9k
    op->c1 = ret;
6396
68.9k
    if (ret != NULL)
6397
68.9k
        ret->parent = op;
6398
68.9k
    ret = cur = op;
6399
286k
      } else {
6400
286k
          cur->c2 = op;
6401
286k
    if (op != NULL)
6402
286k
        op->parent = cur;
6403
286k
    op->c1 = last;
6404
286k
    if (last != NULL)
6405
286k
        last->parent = op;
6406
286k
    cur =op;
6407
286k
    last = NULL;
6408
286k
      }
6409
355k
  } else {
6410
16.2k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6411
16.2k
      if ((last != NULL) && (last != ret))
6412
9.73k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6413
16.2k
      if (ret != NULL)
6414
16.2k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6415
16.2k
      return(NULL);
6416
16.2k
  }
6417
1.26M
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6418
1.26M
        if (RAW == '(') {
6419
69.3k
            int newInputNr = ctxt->inputNr;
6420
6421
      /* Recurse on second child */
6422
69.3k
      NEXT;
6423
69.3k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6424
69.3k
                                                          depth + 1);
6425
69.3k
            if (last == NULL) {
6426
11.6k
    if (ret != NULL)
6427
11.6k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6428
11.6k
    return(NULL);
6429
11.6k
            }
6430
1.19M
  } else {
6431
1.19M
      elem = xmlParseName(ctxt);
6432
1.19M
      if (elem == NULL) {
6433
3.70k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6434
3.70k
    if (ret != NULL)
6435
3.70k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6436
3.70k
    return(NULL);
6437
3.70k
      }
6438
1.18M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6439
1.18M
      if (last == NULL) {
6440
65
                xmlErrMemory(ctxt);
6441
65
    if (ret != NULL)
6442
65
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6443
65
    return(NULL);
6444
65
      }
6445
1.18M
      if (RAW == '?') {
6446
110k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6447
110k
    NEXT;
6448
1.07M
      } else if (RAW == '*') {
6449
23.2k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6450
23.2k
    NEXT;
6451
1.05M
      } else if (RAW == '+') {
6452
18.1k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6453
18.1k
    NEXT;
6454
1.03M
      } else {
6455
1.03M
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6456
1.03M
      }
6457
1.18M
  }
6458
1.26M
    }
6459
138k
    if ((cur != NULL) && (last != NULL)) {
6460
75.5k
        cur->c2 = last;
6461
75.5k
  if (last != NULL)
6462
75.5k
      last->parent = cur;
6463
75.5k
    }
6464
138k
#ifdef LIBXML_VALID_ENABLED
6465
138k
    if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6466
91
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6467
91
                         "Element content declaration doesn't start "
6468
91
                         "and stop in the same entity\n",
6469
91
                         NULL, NULL);
6470
91
    }
6471
138k
#endif
6472
138k
    NEXT;
6473
138k
    if (RAW == '?') {
6474
22.3k
  if (ret != NULL) {
6475
22.3k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6476
20.4k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6477
3.29k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6478
19.0k
      else
6479
19.0k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6480
22.3k
  }
6481
22.3k
  NEXT;
6482
116k
    } else if (RAW == '*') {
6483
29.7k
  if (ret != NULL) {
6484
29.7k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6485
29.7k
      cur = ret;
6486
      /*
6487
       * Some normalization:
6488
       * (a | b* | c?)* == (a | b | c)*
6489
       */
6490
98.4k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6491
68.6k
    if ((cur->c1 != NULL) &&
6492
68.6k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6493
65.2k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6494
7.13k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6495
68.6k
    if ((cur->c2 != NULL) &&
6496
68.6k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6497
66.1k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6498
4.70k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6499
68.6k
    cur = cur->c2;
6500
68.6k
      }
6501
29.7k
  }
6502
29.7k
  NEXT;
6503
86.6k
    } else if (RAW == '+') {
6504
23.3k
  if (ret != NULL) {
6505
23.3k
      int found = 0;
6506
6507
23.3k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6508
20.9k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6509
4.49k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6510
18.8k
      else
6511
18.8k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6512
      /*
6513
       * Some normalization:
6514
       * (a | b*)+ == (a | b)*
6515
       * (a | b?)+ == (a | b)*
6516
       */
6517
43.7k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6518
20.3k
    if ((cur->c1 != NULL) &&
6519
20.3k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6520
17.3k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6521
5.95k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6522
5.95k
        found = 1;
6523
5.95k
    }
6524
20.3k
    if ((cur->c2 != NULL) &&
6525
20.3k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6526
18.9k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6527
4.23k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6528
4.23k
        found = 1;
6529
4.23k
    }
6530
20.3k
    cur = cur->c2;
6531
20.3k
      }
6532
23.3k
      if (found)
6533
6.24k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6534
23.3k
  }
6535
23.3k
  NEXT;
6536
23.3k
    }
6537
138k
    return(ret);
6538
170k
}
6539
6540
/**
6541
 * Parse the declaration for a Mixed Element content
6542
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6543
 *
6544
 * @deprecated Internal function, don't use.
6545
 *
6546
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6547
 *
6548
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6549
 *
6550
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6551
 *
6552
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6553
 *
6554
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6555
 * TODO Parameter-entity replacement text must be properly nested
6556
 *  with parenthesized groups. That is to say, if either of the
6557
 *  opening or closing parentheses in a choice, seq, or Mixed
6558
 *  construct is contained in the replacement text for a parameter
6559
 *  entity, both must be contained in the same replacement text. For
6560
 *  interoperability, if a parameter-entity reference appears in a
6561
 *  choice, seq, or Mixed construct, its replacement text should not
6562
 *  be empty, and neither the first nor last non-blank character of
6563
 *  the replacement text should be a connector (| or ,).
6564
 *
6565
 * @param ctxt  an XML parser context
6566
 * @param inputchk  the input used for the current entity, needed for boundary checks
6567
 * @returns the tree of xmlElementContent describing the element
6568
 *          hierarchy.
6569
 */
6570
xmlElementContent *
6571
0
xmlParseElementChildrenContentDecl(xmlParserCtxt *ctxt, int inputchk) {
6572
    /* stub left for API/ABI compat */
6573
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6574
0
}
6575
6576
/**
6577
 * Parse the declaration for an Element content either Mixed or Children,
6578
 * the cases EMPTY and ANY are handled directly in #xmlParseElementDecl
6579
 *
6580
 * @deprecated Internal function, don't use.
6581
 *
6582
 *     [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6583
 *
6584
 * @param ctxt  an XML parser context
6585
 * @param name  the name of the element being defined.
6586
 * @param result  the Element Content pointer will be stored here if any
6587
 * @returns an xmlElementTypeVal value or -1 on error
6588
 */
6589
6590
int
6591
xmlParseElementContentDecl(xmlParserCtxt *ctxt, const xmlChar *name,
6592
118k
                           xmlElementContent **result) {
6593
6594
118k
    xmlElementContentPtr tree = NULL;
6595
118k
    int openInputNr = ctxt->inputNr;
6596
118k
    int res;
6597
6598
118k
    *result = NULL;
6599
6600
118k
    if (RAW != '(') {
6601
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6602
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6603
0
  return(-1);
6604
0
    }
6605
118k
    NEXT;
6606
118k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6607
118k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6608
41.0k
        tree = xmlParseElementMixedContentDecl(ctxt, openInputNr);
6609
41.0k
  res = XML_ELEMENT_TYPE_MIXED;
6610
77.7k
    } else {
6611
77.7k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, openInputNr, 1);
6612
77.7k
  res = XML_ELEMENT_TYPE_ELEMENT;
6613
77.7k
    }
6614
118k
    if (tree == NULL)
6615
33.1k
        return(-1);
6616
85.6k
    SKIP_BLANKS_PE;
6617
85.6k
    *result = tree;
6618
85.6k
    return(res);
6619
118k
}
6620
6621
/**
6622
 * Parse an element declaration. Always consumes '<!'.
6623
 *
6624
 * @deprecated Internal function, don't use.
6625
 *
6626
 *     [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6627
 *
6628
 * [ VC: Unique Element Type Declaration ]
6629
 * No element type may be declared more than once
6630
 *
6631
 * @param ctxt  an XML parser context
6632
 * @returns the type of the element, or -1 in case of error
6633
 */
6634
int
6635
159k
xmlParseElementDecl(xmlParserCtxt *ctxt) {
6636
159k
    const xmlChar *name;
6637
159k
    int ret = -1;
6638
159k
    xmlElementContentPtr content  = NULL;
6639
6640
159k
    if ((CUR != '<') || (NXT(1) != '!'))
6641
0
        return(ret);
6642
159k
    SKIP(2);
6643
6644
    /* GROW; done in the caller */
6645
159k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6646
157k
#ifdef LIBXML_VALID_ENABLED
6647
157k
  int oldInputNr = ctxt->inputNr;
6648
157k
#endif
6649
6650
157k
  SKIP(7);
6651
157k
  if (SKIP_BLANKS_PE == 0) {
6652
1.74k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6653
1.74k
               "Space required after 'ELEMENT'\n");
6654
1.74k
      return(-1);
6655
1.74k
  }
6656
156k
        name = xmlParseName(ctxt);
6657
156k
  if (name == NULL) {
6658
4.00k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6659
4.00k
         "xmlParseElementDecl: no name for Element\n");
6660
4.00k
      return(-1);
6661
4.00k
  }
6662
152k
  if (SKIP_BLANKS_PE == 0) {
6663
34.4k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6664
34.4k
         "Space required after the element name\n");
6665
34.4k
  }
6666
152k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6667
19.4k
      SKIP(5);
6668
      /*
6669
       * Element must always be empty.
6670
       */
6671
19.4k
      ret = XML_ELEMENT_TYPE_EMPTY;
6672
132k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6673
7.79k
             (NXT(2) == 'Y')) {
6674
7.51k
      SKIP(3);
6675
      /*
6676
       * Element is a generic container.
6677
       */
6678
7.51k
      ret = XML_ELEMENT_TYPE_ANY;
6679
125k
  } else if (RAW == '(') {
6680
118k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6681
118k
            if (ret <= 0)
6682
33.1k
                return(-1);
6683
118k
  } else {
6684
      /*
6685
       * [ WFC: PEs in Internal Subset ] error handling.
6686
       */
6687
6.33k
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6688
6.33k
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6689
6.33k
      return(-1);
6690
6.33k
  }
6691
6692
112k
  SKIP_BLANKS_PE;
6693
6694
112k
  if (RAW != '>') {
6695
13.3k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6696
13.3k
      if (content != NULL) {
6697
10.8k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6698
10.8k
      }
6699
99.1k
  } else {
6700
99.1k
#ifdef LIBXML_VALID_ENABLED
6701
99.1k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6702
75
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6703
75
                                 "Element declaration doesn't start and stop in"
6704
75
                                 " the same entity\n",
6705
75
                                 NULL, NULL);
6706
75
      }
6707
99.1k
#endif
6708
6709
99.1k
      NEXT;
6710
99.1k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6711
82.9k
    (ctxt->sax->elementDecl != NULL)) {
6712
82.9k
    if (content != NULL)
6713
60.4k
        content->parent = NULL;
6714
82.9k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6715
82.9k
                           content);
6716
82.9k
    if ((content != NULL) && (content->parent == NULL)) {
6717
        /*
6718
         * this is a trick: if xmlAddElementDecl is called,
6719
         * instead of copying the full tree it is plugged directly
6720
         * if called from the parser. Avoid duplicating the
6721
         * interfaces or change the API/ABI
6722
         */
6723
15.3k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6724
15.3k
    }
6725
82.9k
      } else if (content != NULL) {
6726
14.2k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6727
14.2k
      }
6728
99.1k
  }
6729
112k
    }
6730
113k
    return(ret);
6731
159k
}
6732
6733
/**
6734
 * Parse a conditional section. Always consumes '<!['.
6735
 *
6736
 *     [61] conditionalSect ::= includeSect | ignoreSect
6737
 *     [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6738
 *     [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6739
 *     [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>'
6740
 *                                 Ignore)*
6741
 *     [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6742
 * @param ctxt  an XML parser context
6743
 */
6744
6745
static void
6746
30.4k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6747
30.4k
    size_t depth = 0;
6748
30.4k
    int isFreshPE = 0;
6749
30.4k
    int oldInputNr = ctxt->inputNr;
6750
30.4k
    int declInputNr = ctxt->inputNr;
6751
6752
78.9k
    while (!PARSER_STOPPED(ctxt)) {
6753
78.8k
        if (ctxt->input->cur >= ctxt->input->end) {
6754
3.84k
            if (ctxt->inputNr <= oldInputNr) {
6755
1.93k
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6756
1.93k
                return;
6757
1.93k
            }
6758
6759
1.91k
            xmlPopPE(ctxt);
6760
1.91k
            declInputNr = ctxt->inputNr;
6761
75.0k
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6762
33.4k
            SKIP(3);
6763
33.4k
            SKIP_BLANKS_PE;
6764
6765
33.4k
            isFreshPE = 0;
6766
6767
33.4k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6768
12.2k
                SKIP(7);
6769
12.2k
                SKIP_BLANKS_PE;
6770
12.2k
                if (RAW != '[') {
6771
1.43k
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6772
1.43k
                    return;
6773
1.43k
                }
6774
10.8k
#ifdef LIBXML_VALID_ENABLED
6775
10.8k
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6776
71
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6777
71
                                     "All markup of the conditional section is"
6778
71
                                     " not in the same entity\n",
6779
71
                                     NULL, NULL);
6780
71
                }
6781
10.8k
#endif
6782
10.8k
                NEXT;
6783
6784
10.8k
                depth++;
6785
21.2k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6786
11.1k
                size_t ignoreDepth = 0;
6787
6788
11.1k
                SKIP(6);
6789
11.1k
                SKIP_BLANKS_PE;
6790
11.1k
                if (RAW != '[') {
6791
1.75k
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6792
1.75k
                    return;
6793
1.75k
                }
6794
9.43k
#ifdef LIBXML_VALID_ENABLED
6795
9.43k
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6796
58
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6797
58
                                     "All markup of the conditional section is"
6798
58
                                     " not in the same entity\n",
6799
58
                                     NULL, NULL);
6800
58
                }
6801
9.43k
#endif
6802
9.43k
                NEXT;
6803
6804
2.33M
                while (PARSER_STOPPED(ctxt) == 0) {
6805
2.33M
                    if (RAW == 0) {
6806
3.92k
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6807
3.92k
                        return;
6808
3.92k
                    }
6809
2.33M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6810
8.88k
                        SKIP(3);
6811
8.88k
                        ignoreDepth++;
6812
                        /* Check for integer overflow */
6813
8.88k
                        if (ignoreDepth == 0) {
6814
0
                            xmlErrMemory(ctxt);
6815
0
                            return;
6816
0
                        }
6817
2.32M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6818
12.3k
                               (NXT(2) == '>')) {
6819
8.20k
                        SKIP(3);
6820
8.20k
                        if (ignoreDepth == 0)
6821
5.46k
                            break;
6822
2.73k
                        ignoreDepth--;
6823
2.31M
                    } else {
6824
2.31M
                        NEXT;
6825
2.31M
                    }
6826
2.33M
                }
6827
6828
5.50k
#ifdef LIBXML_VALID_ENABLED
6829
5.50k
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6830
48
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6831
48
                                     "All markup of the conditional section is"
6832
48
                                     " not in the same entity\n",
6833
48
                                     NULL, NULL);
6834
48
                }
6835
5.50k
#endif
6836
10.0k
            } else {
6837
10.0k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6838
10.0k
                return;
6839
10.0k
            }
6840
41.5k
        } else if ((depth > 0) &&
6841
41.5k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6842
5.79k
            if (isFreshPE) {
6843
14
                xmlFatalErrMsg(ctxt, XML_ERR_CONDSEC_INVALID,
6844
14
                               "Parameter entity must match "
6845
14
                               "extSubsetDecl\n");
6846
14
                return;
6847
14
            }
6848
6849
5.77k
            depth--;
6850
5.77k
#ifdef LIBXML_VALID_ENABLED
6851
5.77k
            if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6852
29
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6853
29
                                 "All markup of the conditional section is not"
6854
29
                                 " in the same entity\n",
6855
29
                                 NULL, NULL);
6856
29
            }
6857
5.77k
#endif
6858
5.77k
            SKIP(3);
6859
35.7k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6860
27.9k
            isFreshPE = 0;
6861
27.9k
            xmlParseMarkupDecl(ctxt);
6862
27.9k
        } else if (RAW == '%') {
6863
7.46k
            xmlParsePERefInternal(ctxt, 1);
6864
7.46k
            if (ctxt->inputNr > declInputNr) {
6865
1.95k
                isFreshPE = 1;
6866
1.95k
                declInputNr = ctxt->inputNr;
6867
1.95k
            }
6868
7.46k
        } else {
6869
281
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6870
281
            return;
6871
281
        }
6872
6873
59.4k
        if (depth == 0)
6874
11.0k
            break;
6875
6876
48.4k
        SKIP_BLANKS;
6877
48.4k
        SHRINK;
6878
48.4k
        GROW;
6879
48.4k
    }
6880
30.4k
}
6881
6882
/**
6883
 * Parse markup declarations. Always consumes '<!' or '<?'.
6884
 *
6885
 * @deprecated Internal function, don't use.
6886
 *
6887
 *     [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6888
 *                         NotationDecl | PI | Comment
6889
 *
6890
 * [ VC: Proper Declaration/PE Nesting ]
6891
 * Parameter-entity replacement text must be properly nested with
6892
 * markup declarations. That is to say, if either the first character
6893
 * or the last character of a markup declaration (markupdecl above) is
6894
 * contained in the replacement text for a parameter-entity reference,
6895
 * both must be contained in the same replacement text.
6896
 *
6897
 * [ WFC: PEs in Internal Subset ]
6898
 * In the internal DTD subset, parameter-entity references can occur
6899
 * only where markup declarations can occur, not within markup declarations.
6900
 * (This does not apply to references that occur in external parameter
6901
 * entities or to the external subset.)
6902
 *
6903
 * @param ctxt  an XML parser context
6904
 */
6905
void
6906
3.46M
xmlParseMarkupDecl(xmlParserCtxt *ctxt) {
6907
3.46M
    GROW;
6908
3.46M
    if (CUR == '<') {
6909
3.46M
        if (NXT(1) == '!') {
6910
3.35M
      switch (NXT(2)) {
6911
672k
          case 'E':
6912
672k
        if (NXT(3) == 'L')
6913
159k
      xmlParseElementDecl(ctxt);
6914
513k
        else if (NXT(3) == 'N')
6915
513k
      xmlParseEntityDecl(ctxt);
6916
389
                    else
6917
389
                        SKIP(2);
6918
672k
        break;
6919
437k
          case 'A':
6920
437k
        xmlParseAttributeListDecl(ctxt);
6921
437k
        break;
6922
45.1k
          case 'N':
6923
45.1k
        xmlParseNotationDecl(ctxt);
6924
45.1k
        break;
6925
2.15M
          case '-':
6926
2.15M
        xmlParseComment(ctxt);
6927
2.15M
        break;
6928
51.5k
    default:
6929
51.5k
                    xmlFatalErr(ctxt,
6930
51.5k
                                ctxt->inSubset == 2 ?
6931
4.34k
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
6932
51.5k
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
6933
51.5k
                                NULL);
6934
51.5k
                    SKIP(2);
6935
51.5k
        break;
6936
3.35M
      }
6937
3.35M
  } else if (NXT(1) == '?') {
6938
110k
      xmlParsePI(ctxt);
6939
110k
  }
6940
3.46M
    }
6941
3.46M
}
6942
6943
/**
6944
 * Parse an XML declaration header for external entities
6945
 *
6946
 * @deprecated Internal function, don't use.
6947
 *
6948
 *     [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6949
 * @param ctxt  an XML parser context
6950
 */
6951
6952
void
6953
302k
xmlParseTextDecl(xmlParserCtxt *ctxt) {
6954
302k
    xmlChar *version;
6955
6956
    /*
6957
     * We know that '<?xml' is here.
6958
     */
6959
302k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6960
302k
  SKIP(5);
6961
302k
    } else {
6962
43
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6963
43
  return;
6964
43
    }
6965
6966
302k
    if (SKIP_BLANKS == 0) {
6967
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6968
0
           "Space needed after '<?xml'\n");
6969
0
    }
6970
6971
    /*
6972
     * We may have the VersionInfo here.
6973
     */
6974
302k
    version = xmlParseVersionInfo(ctxt);
6975
302k
    if (version == NULL) {
6976
243k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6977
243k
        if (version == NULL) {
6978
79
            xmlErrMemory(ctxt);
6979
79
            return;
6980
79
        }
6981
243k
    } else {
6982
59.3k
  if (SKIP_BLANKS == 0) {
6983
3.09k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6984
3.09k
               "Space needed here\n");
6985
3.09k
  }
6986
59.3k
    }
6987
302k
    ctxt->input->version = version;
6988
6989
    /*
6990
     * We must have the encoding declaration
6991
     */
6992
302k
    xmlParseEncodingDecl(ctxt);
6993
6994
302k
    SKIP_BLANKS;
6995
302k
    if ((RAW == '?') && (NXT(1) == '>')) {
6996
9.17k
        SKIP(2);
6997
293k
    } else if (RAW == '>') {
6998
        /* Deprecated old WD ... */
6999
2.06k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7000
2.06k
  NEXT;
7001
291k
    } else {
7002
291k
        int c;
7003
7004
291k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7005
2.08G
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7006
2.08G
            NEXT;
7007
2.08G
            if (c == '>')
7008
221k
                break;
7009
2.08G
        }
7010
291k
    }
7011
302k
}
7012
7013
/**
7014
 * Parse Markup declarations from an external subset
7015
 *
7016
 * @deprecated Internal function, don't use.
7017
 *
7018
 *     [30] extSubset ::= textDecl? extSubsetDecl
7019
 *
7020
 *     [31] extSubsetDecl ::= (markupdecl | conditionalSect |
7021
 *                             PEReference | S) *
7022
 * @param ctxt  an XML parser context
7023
 * @param publicId  the public identifier
7024
 * @param systemId  the system identifier (URL)
7025
 */
7026
void
7027
xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
7028
12.2k
                       const xmlChar *systemId) {
7029
12.2k
    int oldInputNr;
7030
7031
12.2k
    xmlCtxtInitializeLate(ctxt);
7032
7033
12.2k
    xmlDetectEncoding(ctxt);
7034
7035
12.2k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7036
631
  xmlParseTextDecl(ctxt);
7037
631
    }
7038
12.2k
    if (ctxt->myDoc == NULL) {
7039
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7040
0
  if (ctxt->myDoc == NULL) {
7041
0
      xmlErrMemory(ctxt);
7042
0
      return;
7043
0
  }
7044
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7045
0
    }
7046
12.2k
    if ((ctxt->myDoc->intSubset == NULL) &&
7047
2.17k
        (xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) {
7048
11
        xmlErrMemory(ctxt);
7049
11
    }
7050
7051
12.2k
    ctxt->inSubset = 2;
7052
12.2k
    oldInputNr = ctxt->inputNr;
7053
7054
12.2k
    SKIP_BLANKS;
7055
1.65M
    while (!PARSER_STOPPED(ctxt)) {
7056
1.65M
        if (ctxt->input->cur >= ctxt->input->end) {
7057
5.53k
            if (ctxt->inputNr <= oldInputNr) {
7058
2.96k
                xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7059
2.96k
                break;
7060
2.96k
            }
7061
7062
2.56k
            xmlPopPE(ctxt);
7063
1.64M
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7064
15.9k
            xmlParseConditionalSections(ctxt);
7065
1.63M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7066
1.61M
            xmlParseMarkupDecl(ctxt);
7067
1.61M
        } else if (RAW == '%') {
7068
6.94k
            xmlParsePERefInternal(ctxt, 1);
7069
6.94k
        } else {
7070
5.89k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7071
7072
6.74k
            while (ctxt->inputNr > oldInputNr)
7073
857
                xmlPopPE(ctxt);
7074
5.89k
            break;
7075
5.89k
        }
7076
1.64M
        SKIP_BLANKS;
7077
1.64M
        SHRINK;
7078
1.64M
        GROW;
7079
1.64M
    }
7080
12.2k
}
7081
7082
/**
7083
 * Parse and handle entity references in content, depending on the SAX
7084
 * interface, this may end-up in a call to character() if this is a
7085
 * CharRef, a predefined entity, if there is no reference() callback.
7086
 * or if the parser was asked to switch to that mode.
7087
 *
7088
 * @deprecated Internal function, don't use.
7089
 *
7090
 * Always consumes '&'.
7091
 *
7092
 *     [67] Reference ::= EntityRef | CharRef
7093
 * @param ctxt  an XML parser context
7094
 */
7095
void
7096
1.56M
xmlParseReference(xmlParserCtxt *ctxt) {
7097
1.56M
    xmlEntityPtr ent = NULL;
7098
1.56M
    const xmlChar *name;
7099
1.56M
    xmlChar *val;
7100
7101
1.56M
    if (RAW != '&')
7102
0
        return;
7103
7104
    /*
7105
     * Simple case of a CharRef
7106
     */
7107
1.56M
    if (NXT(1) == '#') {
7108
524k
  int i = 0;
7109
524k
  xmlChar out[16];
7110
524k
  int value = xmlParseCharRef(ctxt);
7111
7112
524k
  if (value == 0)
7113
167k
      return;
7114
7115
        /*
7116
         * Just encode the value in UTF-8
7117
         */
7118
357k
        COPY_BUF(out, i, value);
7119
357k
        out[i] = 0;
7120
357k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7121
357k
            (!ctxt->disableSAX))
7122
308k
            ctxt->sax->characters(ctxt->userData, out, i);
7123
357k
  return;
7124
524k
    }
7125
7126
    /*
7127
     * We are seeing an entity reference
7128
     */
7129
1.04M
    name = xmlParseEntityRefInternal(ctxt);
7130
1.04M
    if (name == NULL)
7131
388k
        return;
7132
655k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7133
655k
    if (ent == NULL) {
7134
        /*
7135
         * Create a reference for undeclared entities.
7136
         */
7137
376k
        if ((ctxt->replaceEntities == 0) &&
7138
305k
            (ctxt->sax != NULL) &&
7139
305k
            (ctxt->disableSAX == 0) &&
7140
299k
            (ctxt->sax->reference != NULL)) {
7141
299k
            ctxt->sax->reference(ctxt->userData, name);
7142
299k
        }
7143
376k
        return;
7144
376k
    }
7145
279k
    if (!ctxt->wellFormed)
7146
140k
  return;
7147
7148
    /* special case of predefined entities */
7149
138k
    if ((ent->name == NULL) ||
7150
138k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7151
37.3k
  val = ent->content;
7152
37.3k
  if (val == NULL) return;
7153
  /*
7154
   * inline the entity.
7155
   */
7156
37.3k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7157
37.3k
      (!ctxt->disableSAX))
7158
37.3k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7159
37.3k
  return;
7160
37.3k
    }
7161
7162
    /*
7163
     * Some users try to parse entities on their own and used to set
7164
     * the renamed "checked" member. Fix the flags to cover this
7165
     * case.
7166
     */
7167
101k
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7168
0
        ent->flags |= XML_ENT_PARSED;
7169
7170
    /*
7171
     * The first reference to the entity trigger a parsing phase
7172
     * where the ent->children is filled with the result from
7173
     * the parsing.
7174
     * Note: external parsed entities will not be loaded, it is not
7175
     * required for a non-validating parser, unless the parsing option
7176
     * of validating, or substituting entities were given. Doing so is
7177
     * far more secure as the parser will only process data coming from
7178
     * the document entity by default.
7179
     *
7180
     * FIXME: This doesn't work correctly since entities can be
7181
     * expanded with different namespace declarations in scope.
7182
     * For example:
7183
     *
7184
     * <!DOCTYPE doc [
7185
     *   <!ENTITY ent "<ns:elem/>">
7186
     * ]>
7187
     * <doc>
7188
     *   <decl1 xmlns:ns="urn:ns1">
7189
     *     &ent;
7190
     *   </decl1>
7191
     *   <decl2 xmlns:ns="urn:ns2">
7192
     *     &ent;
7193
     *   </decl2>
7194
     * </doc>
7195
     *
7196
     * Proposed fix:
7197
     *
7198
     * - Ignore current namespace declarations when parsing the
7199
     *   entity. If a prefix can't be resolved, don't report an error
7200
     *   but mark it as unresolved.
7201
     * - Try to resolve these prefixes when expanding the entity.
7202
     *   This will require a specialized version of xmlStaticCopyNode
7203
     *   which can also make use of the namespace hash table to avoid
7204
     *   quadratic behavior.
7205
     *
7206
     * Alternatively, we could simply reparse the entity on each
7207
     * expansion like we already do with custom SAX callbacks.
7208
     * External entity content should be cached in this case.
7209
     */
7210
101k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7211
39.0k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7212
37.8k
         ((ctxt->replaceEntities) ||
7213
98.6k
          (ctxt->validate)))) {
7214
98.6k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7215
26.5k
            xmlCtxtParseEntity(ctxt, ent);
7216
72.1k
        } else if (ent->children == NULL) {
7217
            /*
7218
             * Probably running in SAX mode and the callbacks don't
7219
             * build the entity content. Parse the entity again.
7220
             *
7221
             * This will also be triggered in normal tree builder mode
7222
             * if an entity happens to be empty, causing unnecessary
7223
             * reloads. It's hard to come up with a reliable check in
7224
             * which mode we're running.
7225
             */
7226
24.5k
            xmlCtxtParseEntity(ctxt, ent);
7227
24.5k
        }
7228
98.6k
    }
7229
7230
    /*
7231
     * We also check for amplification if entities aren't substituted.
7232
     * They might be expanded later.
7233
     */
7234
101k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7235
581
        return;
7236
7237
101k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7238
8.51k
        return;
7239
7240
92.5k
    if (ctxt->replaceEntities == 0) {
7241
  /*
7242
   * Create a reference
7243
   */
7244
22.5k
        if (ctxt->sax->reference != NULL)
7245
22.5k
      ctxt->sax->reference(ctxt->userData, ent->name);
7246
70.0k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7247
46.2k
        xmlNodePtr copy, cur;
7248
7249
        /*
7250
         * Seems we are generating the DOM content, copy the tree
7251
   */
7252
46.2k
        cur = ent->children;
7253
7254
        /*
7255
         * Handle first text node with SAX to coalesce text efficiently
7256
         */
7257
46.2k
        if ((cur->type == XML_TEXT_NODE) ||
7258
35.9k
            (cur->type == XML_CDATA_SECTION_NODE)) {
7259
35.9k
            int len = xmlStrlen(cur->content);
7260
7261
35.9k
            if ((cur->type == XML_TEXT_NODE) ||
7262
34.7k
                (ctxt->options & XML_PARSE_NOCDATA)) {
7263
34.7k
                if (ctxt->sax->characters != NULL)
7264
34.7k
                    ctxt->sax->characters(ctxt->userData, cur->content, len);
7265
34.7k
            } else {
7266
1.24k
                if (ctxt->sax->cdataBlock != NULL)
7267
1.24k
                    ctxt->sax->cdataBlock(ctxt->userData, cur->content, len);
7268
1.24k
            }
7269
7270
35.9k
            cur = cur->next;
7271
35.9k
        }
7272
7273
361k
        while (cur != NULL) {
7274
326k
            xmlNodePtr last;
7275
7276
            /*
7277
             * Handle last text node with SAX to coalesce text efficiently
7278
             */
7279
326k
            if ((cur->next == NULL) &&
7280
27.0k
                ((cur->type == XML_TEXT_NODE) ||
7281
18.4k
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7282
10.3k
                int len = xmlStrlen(cur->content);
7283
7284
10.3k
                if ((cur->type == XML_TEXT_NODE) ||
7285
8.59k
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7286
8.59k
                    if (ctxt->sax->characters != NULL)
7287
8.59k
                        ctxt->sax->characters(ctxt->userData, cur->content,
7288
8.59k
                                              len);
7289
8.59k
                } else {
7290
1.80k
                    if (ctxt->sax->cdataBlock != NULL)
7291
1.80k
                        ctxt->sax->cdataBlock(ctxt->userData, cur->content,
7292
1.80k
                                              len);
7293
1.80k
                }
7294
7295
10.3k
                break;
7296
10.3k
            }
7297
7298
            /*
7299
             * Reset coalesce buffer stats only for non-text nodes.
7300
             */
7301
316k
            ctxt->nodemem = 0;
7302
316k
            ctxt->nodelen = 0;
7303
7304
316k
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7305
7306
316k
            if (copy == NULL) {
7307
671
                xmlErrMemory(ctxt);
7308
671
                break;
7309
671
            }
7310
7311
315k
            if (ctxt->parseMode == XML_PARSE_READER) {
7312
                /* Needed for reader */
7313
48.1k
                copy->extra = cur->extra;
7314
                /* Maybe needed for reader */
7315
48.1k
                copy->_private = cur->_private;
7316
48.1k
            }
7317
7318
315k
            copy->parent = ctxt->node;
7319
315k
            last = ctxt->node->last;
7320
315k
            if (last == NULL) {
7321
2.05k
                ctxt->node->children = copy;
7322
313k
            } else {
7323
313k
                last->next = copy;
7324
313k
                copy->prev = last;
7325
313k
            }
7326
315k
            ctxt->node->last = copy;
7327
7328
315k
            cur = cur->next;
7329
315k
        }
7330
46.2k
    }
7331
92.5k
}
7332
7333
static void
7334
1.39M
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7335
    /*
7336
     * [ WFC: Entity Declared ]
7337
     * In a document without any DTD, a document with only an
7338
     * internal DTD subset which contains no parameter entity
7339
     * references, or a document with "standalone='yes'", the
7340
     * Name given in the entity reference must match that in an
7341
     * entity declaration, except that well-formed documents
7342
     * need not declare any of the following entities: amp, lt,
7343
     * gt, apos, quot.
7344
     * The declaration of a parameter entity must precede any
7345
     * reference to it.
7346
     * Similarly, the declaration of a general entity must
7347
     * precede any reference to it which appears in a default
7348
     * value in an attribute-list declaration. Note that if
7349
     * entities are declared in the external subset or in
7350
     * external parameter entities, a non-validating processor
7351
     * is not obligated to read and process their declarations;
7352
     * for such documents, the rule that an entity must be
7353
     * declared is a well-formedness constraint only if
7354
     * standalone='yes'.
7355
     */
7356
1.39M
    if ((ctxt->standalone == 1) ||
7357
1.39M
        ((ctxt->hasExternalSubset == 0) &&
7358
1.37M
         (ctxt->hasPErefs == 0))) {
7359
849k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7360
849k
                          "Entity '%s' not defined\n", name);
7361
849k
#ifdef LIBXML_VALID_ENABLED
7362
849k
    } else if (ctxt->validate) {
7363
        /*
7364
         * [ VC: Entity Declared ]
7365
         * In a document with an external subset or external
7366
         * parameter entities with "standalone='no'", ...
7367
         * ... The declaration of a parameter entity must
7368
         * precede any reference to it...
7369
         */
7370
234k
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7371
234k
                         "Entity '%s' not defined\n", name, NULL);
7372
234k
#endif
7373
314k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7374
163k
               ((ctxt->replaceEntities) &&
7375
287k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7376
        /*
7377
         * Also raise a non-fatal error
7378
         *
7379
         * - if the external subset is loaded and all entity declarations
7380
         *   should be available, or
7381
         * - entity substition was requested without restricting
7382
         *   external entity access.
7383
         */
7384
287k
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7385
287k
                     "Entity '%s' not defined\n", name);
7386
287k
    } else {
7387
27.2k
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7388
27.2k
                      "Entity '%s' not defined\n", name, NULL);
7389
27.2k
    }
7390
7391
1.39M
    ctxt->valid = 0;
7392
1.39M
}
7393
7394
static xmlEntityPtr
7395
9.39M
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7396
9.39M
    xmlEntityPtr ent = NULL;
7397
7398
    /*
7399
     * Predefined entities override any extra definition
7400
     */
7401
9.39M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7402
5.89M
        ent = xmlGetPredefinedEntity(name);
7403
5.89M
        if (ent != NULL)
7404
1.87M
            return(ent);
7405
5.89M
    }
7406
7407
    /*
7408
     * Ask first SAX for entity resolution, otherwise try the
7409
     * entities which may have stored in the parser context.
7410
     */
7411
7.52M
    if (ctxt->sax != NULL) {
7412
7.52M
  if (ctxt->sax->getEntity != NULL)
7413
7.52M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7414
7.52M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7415
40.9k
      (ctxt->options & XML_PARSE_OLDSAX))
7416
3.88k
      ent = xmlGetPredefinedEntity(name);
7417
7.52M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7418
40.9k
      (ctxt->userData==ctxt)) {
7419
40.9k
      ent = xmlSAX2GetEntity(ctxt, name);
7420
40.9k
  }
7421
7.52M
    }
7422
7423
7.52M
    if (ent == NULL) {
7424
1.11M
        xmlHandleUndeclaredEntity(ctxt, name);
7425
1.11M
    }
7426
7427
    /*
7428
     * [ WFC: Parsed Entity ]
7429
     * An entity reference must not contain the name of an
7430
     * unparsed entity
7431
     */
7432
6.41M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7433
1.56k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7434
1.56k
     "Entity reference to unparsed entity %s\n", name);
7435
1.56k
        ent = NULL;
7436
1.56k
    }
7437
7438
    /*
7439
     * [ WFC: No External Entity References ]
7440
     * Attribute values cannot contain direct or indirect
7441
     * entity references to external entities.
7442
     */
7443
6.40M
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7444
100k
        if (inAttr) {
7445
8.66k
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7446
8.66k
                 "Attribute references external entity '%s'\n", name);
7447
8.66k
            ent = NULL;
7448
8.66k
        }
7449
100k
    }
7450
7451
7.52M
    return(ent);
7452
9.39M
}
7453
7454
/**
7455
 * Parse an entity reference. Always consumes '&'.
7456
 *
7457
 *     [68] EntityRef ::= '&' Name ';'
7458
 *
7459
 * @param ctxt  an XML parser context
7460
 * @returns the name, or NULL in case of error.
7461
 */
7462
static const xmlChar *
7463
3.84M
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7464
3.84M
    const xmlChar *name;
7465
7466
3.84M
    GROW;
7467
7468
3.84M
    if (RAW != '&')
7469
0
        return(NULL);
7470
3.84M
    NEXT;
7471
3.84M
    name = xmlParseName(ctxt);
7472
3.84M
    if (name == NULL) {
7473
454k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7474
454k
           "xmlParseEntityRef: no name\n");
7475
454k
        return(NULL);
7476
454k
    }
7477
3.39M
    if (RAW != ';') {
7478
286k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7479
286k
  return(NULL);
7480
286k
    }
7481
3.10M
    NEXT;
7482
7483
3.10M
    return(name);
7484
3.39M
}
7485
7486
/**
7487
 * @deprecated Internal function, don't use.
7488
 *
7489
 * @param ctxt  an XML parser context
7490
 * @returns the xmlEntity if found, or NULL otherwise.
7491
 */
7492
xmlEntity *
7493
0
xmlParseEntityRef(xmlParserCtxt *ctxt) {
7494
0
    const xmlChar *name;
7495
7496
0
    if (ctxt == NULL)
7497
0
        return(NULL);
7498
7499
0
    name = xmlParseEntityRefInternal(ctxt);
7500
0
    if (name == NULL)
7501
0
        return(NULL);
7502
7503
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7504
0
}
7505
7506
/**
7507
 * Parse ENTITY references declarations, but this version parses it from
7508
 * a string value.
7509
 *
7510
 *     [68] EntityRef ::= '&' Name ';'
7511
 *
7512
 * [ WFC: Entity Declared ]
7513
 * In a document without any DTD, a document with only an internal DTD
7514
 * subset which contains no parameter entity references, or a document
7515
 * with "standalone='yes'", the Name given in the entity reference
7516
 * must match that in an entity declaration, except that well-formed
7517
 * documents need not declare any of the following entities: amp, lt,
7518
 * gt, apos, quot.  The declaration of a parameter entity must precede
7519
 * any reference to it.  Similarly, the declaration of a general entity
7520
 * must precede any reference to it which appears in a default value in an
7521
 * attribute-list declaration. Note that if entities are declared in the
7522
 * external subset or in external parameter entities, a non-validating
7523
 * processor is not obligated to read and process their declarations;
7524
 * for such documents, the rule that an entity must be declared is a
7525
 * well-formedness constraint only if standalone='yes'.
7526
 *
7527
 * [ WFC: Parsed Entity ]
7528
 * An entity reference must not contain the name of an unparsed entity
7529
 *
7530
 * @param ctxt  an XML parser context
7531
 * @param str  a pointer to an index in the string
7532
 * @returns the xmlEntity if found, or NULL otherwise. The str pointer
7533
 * is updated to the current location in the string.
7534
 */
7535
static xmlChar *
7536
6.29M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7537
6.29M
    xmlChar *name;
7538
6.29M
    const xmlChar *ptr;
7539
6.29M
    xmlChar cur;
7540
7541
6.29M
    if ((str == NULL) || (*str == NULL))
7542
0
        return(NULL);
7543
6.29M
    ptr = *str;
7544
6.29M
    cur = *ptr;
7545
6.29M
    if (cur != '&')
7546
0
  return(NULL);
7547
7548
6.29M
    ptr++;
7549
6.29M
    name = xmlParseStringName(ctxt, &ptr);
7550
6.29M
    if (name == NULL) {
7551
253
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7552
253
           "xmlParseStringEntityRef: no name\n");
7553
253
  *str = ptr;
7554
253
  return(NULL);
7555
253
    }
7556
6.29M
    if (*ptr != ';') {
7557
91
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7558
91
        xmlFree(name);
7559
91
  *str = ptr;
7560
91
  return(NULL);
7561
91
    }
7562
6.29M
    ptr++;
7563
7564
6.29M
    *str = ptr;
7565
6.29M
    return(name);
7566
6.29M
}
7567
7568
/**
7569
 * Parse a parameter entity reference. Always consumes '%'.
7570
 *
7571
 * The entity content is handled directly by pushing it's content as
7572
 * a new input stream.
7573
 *
7574
 *     [69] PEReference ::= '%' Name ';'
7575
 *
7576
 * [ WFC: No Recursion ]
7577
 * A parsed entity must not contain a recursive
7578
 * reference to itself, either directly or indirectly.
7579
 *
7580
 * [ WFC: Entity Declared ]
7581
 * In a document without any DTD, a document with only an internal DTD
7582
 * subset which contains no parameter entity references, or a document
7583
 * with "standalone='yes'", ...  ... The declaration of a parameter
7584
 * entity must precede any reference to it...
7585
 *
7586
 * [ VC: Entity Declared ]
7587
 * In a document with an external subset or external parameter entities
7588
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7589
 * must precede any reference to it...
7590
 *
7591
 * [ WFC: In DTD ]
7592
 * Parameter-entity references may only appear in the DTD.
7593
 * NOTE: misleading but this is handled.
7594
 *
7595
 * @param ctxt  an XML parser context
7596
 * @param markupDecl  whether the PERef starts a markup declaration
7597
 */
7598
static void
7599
941k
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl) {
7600
941k
    const xmlChar *name;
7601
941k
    xmlEntityPtr entity = NULL;
7602
941k
    xmlParserInputPtr input;
7603
7604
941k
    if (RAW != '%')
7605
0
        return;
7606
941k
    NEXT;
7607
941k
    name = xmlParseName(ctxt);
7608
941k
    if (name == NULL) {
7609
73.1k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7610
73.1k
  return;
7611
73.1k
    }
7612
867k
    if (RAW != ';') {
7613
86.3k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7614
86.3k
        return;
7615
86.3k
    }
7616
7617
781k
    NEXT;
7618
7619
    /* Must be set before xmlHandleUndeclaredEntity */
7620
781k
    ctxt->hasPErefs = 1;
7621
7622
    /*
7623
     * Request the entity from SAX
7624
     */
7625
781k
    if ((ctxt->sax != NULL) &&
7626
781k
  (ctxt->sax->getParameterEntity != NULL))
7627
781k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7628
7629
781k
    if (entity == NULL) {
7630
234k
        xmlHandleUndeclaredEntity(ctxt, name);
7631
546k
    } else {
7632
  /*
7633
   * Internal checking in case the entity quest barfed
7634
   */
7635
546k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7636
381k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7637
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7638
0
      "Internal: %%%s; is not a parameter entity\n",
7639
0
        name, NULL);
7640
546k
  } else {
7641
546k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7642
381k
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7643
380k
     (((ctxt->loadsubset & ~XML_SKIP_IDS) == 0) &&
7644
144k
      (ctxt->replaceEntities == 0) &&
7645
14.6k
      (ctxt->validate == 0))))
7646
2.95k
    return;
7647
7648
543k
            if (entity->flags & XML_ENT_EXPANDING) {
7649
651
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7650
651
                return;
7651
651
            }
7652
7653
543k
      input = xmlNewEntityInputStream(ctxt, entity);
7654
543k
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7655
14.3k
                xmlFreeInputStream(input);
7656
14.3k
    return;
7657
14.3k
            }
7658
7659
528k
            entity->flags |= XML_ENT_EXPANDING;
7660
7661
528k
            if (markupDecl)
7662
472k
                input->flags |= XML_INPUT_MARKUP_DECL;
7663
7664
528k
            GROW;
7665
7666
528k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7667
364k
                xmlDetectEncoding(ctxt);
7668
7669
364k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7670
116k
                    (IS_BLANK_CH(NXT(5)))) {
7671
112k
                    xmlParseTextDecl(ctxt);
7672
112k
                }
7673
364k
            }
7674
528k
  }
7675
546k
    }
7676
781k
}
7677
7678
/**
7679
 * Parse a parameter entity reference.
7680
 *
7681
 * @deprecated Internal function, don't use.
7682
 *
7683
 * @param ctxt  an XML parser context
7684
 */
7685
void
7686
0
xmlParsePEReference(xmlParserCtxt *ctxt) {
7687
0
    xmlParsePERefInternal(ctxt, 0);
7688
0
}
7689
7690
/**
7691
 * Load the content of an entity.
7692
 *
7693
 * @param ctxt  an XML parser context
7694
 * @param entity  an unloaded system entity
7695
 * @returns 0 in case of success and -1 in case of failure
7696
 */
7697
static int
7698
231k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7699
231k
    xmlParserInputPtr oldinput, input = NULL;
7700
231k
    xmlParserInputPtr *oldinputTab;
7701
231k
    xmlChar *oldencoding;
7702
231k
    xmlChar *content = NULL;
7703
231k
    xmlResourceType rtype;
7704
231k
    size_t length, i;
7705
231k
    int oldinputNr, oldinputMax;
7706
231k
    int ret = -1;
7707
231k
    int res;
7708
7709
231k
    if ((ctxt == NULL) || (entity == NULL) ||
7710
231k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7711
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7712
231k
  (entity->content != NULL)) {
7713
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7714
0
              "xmlLoadEntityContent parameter error");
7715
0
        return(-1);
7716
0
    }
7717
7718
231k
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7719
231k
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7720
0
    else
7721
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7722
7723
231k
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7724
231k
                            (char *) entity->ExternalID, rtype);
7725
231k
    if (input == NULL)
7726
4.40k
        return(-1);
7727
7728
226k
    oldinput = ctxt->input;
7729
226k
    oldinputNr = ctxt->inputNr;
7730
226k
    oldinputMax = ctxt->inputMax;
7731
226k
    oldinputTab = ctxt->inputTab;
7732
226k
    oldencoding = ctxt->encoding;
7733
7734
226k
    ctxt->input = NULL;
7735
226k
    ctxt->inputNr = 0;
7736
226k
    ctxt->inputMax = 1;
7737
226k
    ctxt->encoding = NULL;
7738
226k
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7739
226k
    if (ctxt->inputTab == NULL) {
7740
30
        xmlErrMemory(ctxt);
7741
30
        xmlFreeInputStream(input);
7742
30
        goto error;
7743
30
    }
7744
7745
226k
    xmlBufResetInput(input->buf->buffer, input);
7746
7747
226k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7748
61
        xmlFreeInputStream(input);
7749
61
        goto error;
7750
61
    }
7751
7752
226k
    xmlDetectEncoding(ctxt);
7753
7754
    /*
7755
     * Parse a possible text declaration first
7756
     */
7757
226k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7758
188k
  xmlParseTextDecl(ctxt);
7759
        /*
7760
         * An XML-1.0 document can't reference an entity not XML-1.0
7761
         */
7762
188k
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7763
58.9k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7764
16.2k
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7765
16.2k
                           "Version mismatch between document and entity\n");
7766
16.2k
        }
7767
188k
    }
7768
7769
226k
    length = input->cur - input->base;
7770
226k
    xmlBufShrink(input->buf->buffer, length);
7771
226k
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7772
7773
387k
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7774
160k
        ;
7775
7776
226k
    xmlBufResetInput(input->buf->buffer, input);
7777
7778
226k
    if (res < 0) {
7779
13.4k
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7780
13.4k
        goto error;
7781
13.4k
    }
7782
7783
213k
    length = xmlBufUse(input->buf->buffer);
7784
213k
    if (length > INT_MAX) {
7785
0
        xmlErrMemory(ctxt);
7786
0
        goto error;
7787
0
    }
7788
7789
213k
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
7790
213k
    if (content == NULL) {
7791
74
        xmlErrMemory(ctxt);
7792
74
        goto error;
7793
74
    }
7794
7795
51.5M
    for (i = 0; i < length; ) {
7796
51.5M
        int clen = length - i;
7797
51.5M
        int c = xmlGetUTF8Char(content + i, &clen);
7798
7799
51.5M
        if ((c < 0) || (!IS_CHAR(c))) {
7800
212k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7801
212k
                              "xmlLoadEntityContent: invalid char value %d\n",
7802
212k
                              content[i]);
7803
212k
            goto error;
7804
212k
        }
7805
51.3M
        i += clen;
7806
51.3M
    }
7807
7808
720
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7809
720
    entity->content = content;
7810
720
    entity->length = length;
7811
720
    content = NULL;
7812
720
    ret = 0;
7813
7814
226k
error:
7815
453k
    while (ctxt->inputNr > 0)
7816
226k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
7817
226k
    xmlFree(ctxt->inputTab);
7818
226k
    xmlFree(ctxt->encoding);
7819
7820
226k
    ctxt->input = oldinput;
7821
226k
    ctxt->inputNr = oldinputNr;
7822
226k
    ctxt->inputMax = oldinputMax;
7823
226k
    ctxt->inputTab = oldinputTab;
7824
226k
    ctxt->encoding = oldencoding;
7825
7826
226k
    xmlFree(content);
7827
7828
226k
    return(ret);
7829
720
}
7830
7831
/**
7832
 * Parse PEReference declarations
7833
 *
7834
 *     [69] PEReference ::= '%' Name ';'
7835
 *
7836
 * [ WFC: No Recursion ]
7837
 * A parsed entity must not contain a recursive
7838
 * reference to itself, either directly or indirectly.
7839
 *
7840
 * [ WFC: Entity Declared ]
7841
 * In a document without any DTD, a document with only an internal DTD
7842
 * subset which contains no parameter entity references, or a document
7843
 * with "standalone='yes'", ...  ... The declaration of a parameter
7844
 * entity must precede any reference to it...
7845
 *
7846
 * [ VC: Entity Declared ]
7847
 * In a document with an external subset or external parameter entities
7848
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7849
 * must precede any reference to it...
7850
 *
7851
 * [ WFC: In DTD ]
7852
 * Parameter-entity references may only appear in the DTD.
7853
 * NOTE: misleading but this is handled.
7854
 *
7855
 * @param ctxt  an XML parser context
7856
 * @param str  a pointer to an index in the string
7857
 * @returns the string of the entity content.
7858
 *         str is updated to the current value of the index
7859
 */
7860
static xmlEntityPtr
7861
434k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7862
434k
    const xmlChar *ptr;
7863
434k
    xmlChar cur;
7864
434k
    xmlChar *name;
7865
434k
    xmlEntityPtr entity = NULL;
7866
7867
434k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7868
434k
    ptr = *str;
7869
434k
    cur = *ptr;
7870
434k
    if (cur != '%')
7871
0
        return(NULL);
7872
434k
    ptr++;
7873
434k
    name = xmlParseStringName(ctxt, &ptr);
7874
434k
    if (name == NULL) {
7875
21.8k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7876
21.8k
           "xmlParseStringPEReference: no name\n");
7877
21.8k
  *str = ptr;
7878
21.8k
  return(NULL);
7879
21.8k
    }
7880
412k
    cur = *ptr;
7881
412k
    if (cur != ';') {
7882
18.2k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7883
18.2k
  xmlFree(name);
7884
18.2k
  *str = ptr;
7885
18.2k
  return(NULL);
7886
18.2k
    }
7887
394k
    ptr++;
7888
7889
    /* Must be set before xmlHandleUndeclaredEntity */
7890
394k
    ctxt->hasPErefs = 1;
7891
7892
    /*
7893
     * Request the entity from SAX
7894
     */
7895
394k
    if ((ctxt->sax != NULL) &&
7896
394k
  (ctxt->sax->getParameterEntity != NULL))
7897
394k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7898
7899
394k
    if (entity == NULL) {
7900
49.0k
        xmlHandleUndeclaredEntity(ctxt, name);
7901
345k
    } else {
7902
  /*
7903
   * Internal checking in case the entity quest barfed
7904
   */
7905
345k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7906
239k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7907
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7908
0
        "%%%s; is not a parameter entity\n",
7909
0
        name, NULL);
7910
0
  }
7911
345k
    }
7912
7913
394k
    xmlFree(name);
7914
394k
    *str = ptr;
7915
394k
    return(entity);
7916
412k
}
7917
7918
/**
7919
 * Parse a DOCTYPE declaration
7920
 *
7921
 * @deprecated Internal function, don't use.
7922
 *
7923
 *     [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7924
 *                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7925
 *
7926
 * [ VC: Root Element Type ]
7927
 * The Name in the document type declaration must match the element
7928
 * type of the root element.
7929
 *
7930
 * @param ctxt  an XML parser context
7931
 */
7932
7933
void
7934
228k
xmlParseDocTypeDecl(xmlParserCtxt *ctxt) {
7935
228k
    const xmlChar *name = NULL;
7936
228k
    xmlChar *publicId = NULL;
7937
228k
    xmlChar *URI = NULL;
7938
7939
    /*
7940
     * We know that '<!DOCTYPE' has been detected.
7941
     */
7942
228k
    SKIP(9);
7943
7944
228k
    if (SKIP_BLANKS == 0) {
7945
40.4k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7946
40.4k
                       "Space required after 'DOCTYPE'\n");
7947
40.4k
    }
7948
7949
    /*
7950
     * Parse the DOCTYPE name.
7951
     */
7952
228k
    name = xmlParseName(ctxt);
7953
228k
    if (name == NULL) {
7954
35.2k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7955
35.2k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7956
35.2k
    }
7957
228k
    ctxt->intSubName = name;
7958
7959
228k
    SKIP_BLANKS;
7960
7961
    /*
7962
     * Check for public and system identifier (URI)
7963
     */
7964
228k
    URI = xmlParseExternalID(ctxt, &publicId, 1);
7965
7966
228k
    if ((URI != NULL) || (publicId != NULL)) {
7967
120k
        ctxt->hasExternalSubset = 1;
7968
120k
    }
7969
228k
    ctxt->extSubURI = URI;
7970
228k
    ctxt->extSubSystem = publicId;
7971
7972
228k
    SKIP_BLANKS;
7973
7974
    /*
7975
     * Create and update the internal subset.
7976
     */
7977
228k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7978
228k
  (!ctxt->disableSAX))
7979
200k
  ctxt->sax->internalSubset(ctxt->userData, name, publicId, URI);
7980
7981
228k
    if ((RAW != '[') && (RAW != '>')) {
7982
10.3k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7983
10.3k
    }
7984
228k
}
7985
7986
/**
7987
 * Parse the internal subset declaration
7988
 *
7989
 *     [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7990
 * @param ctxt  an XML parser context
7991
 */
7992
7993
static void
7994
182k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7995
    /*
7996
     * Is there any DTD definition ?
7997
     */
7998
182k
    if (RAW == '[') {
7999
182k
        int oldInputNr = ctxt->inputNr;
8000
8001
182k
        NEXT;
8002
  /*
8003
   * Parse the succession of Markup declarations and
8004
   * PEReferences.
8005
   * Subsequence (markupdecl | PEReference | S)*
8006
   */
8007
182k
  SKIP_BLANKS;
8008
3.29M
        while (1) {
8009
3.29M
            if (PARSER_STOPPED(ctxt)) {
8010
10.2k
                return;
8011
3.28M
            } else if (ctxt->input->cur >= ctxt->input->end) {
8012
475k
                if (ctxt->inputNr <= oldInputNr) {
8013
21.6k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8014
21.6k
                    return;
8015
21.6k
                }
8016
453k
                xmlPopPE(ctxt);
8017
2.80M
            } else if ((RAW == ']') && (ctxt->inputNr <= oldInputNr)) {
8018
76.9k
                NEXT;
8019
76.9k
                SKIP_BLANKS;
8020
76.9k
                break;
8021
2.73M
            } else if ((PARSER_EXTERNAL(ctxt)) &&
8022
1.87M
                       (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8023
                /*
8024
                 * Conditional sections are allowed in external entities
8025
                 * included by PE References in the internal subset.
8026
                 */
8027
14.5k
                xmlParseConditionalSections(ctxt);
8028
2.71M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8029
1.82M
                xmlParseMarkupDecl(ctxt);
8030
1.82M
            } else if (RAW == '%') {
8031
820k
                xmlParsePERefInternal(ctxt, 1);
8032
820k
            } else {
8033
73.2k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8034
8035
78.2k
                while (ctxt->inputNr > oldInputNr)
8036
5.00k
                    xmlPopPE(ctxt);
8037
73.2k
                return;
8038
73.2k
            }
8039
3.11M
            SKIP_BLANKS;
8040
3.11M
            SHRINK;
8041
3.11M
            GROW;
8042
3.11M
        }
8043
182k
    }
8044
8045
    /*
8046
     * We should be at the end of the DOCTYPE declaration.
8047
     */
8048
76.9k
    if (RAW != '>') {
8049
2.14k
        xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8050
2.14k
        return;
8051
2.14k
    }
8052
74.7k
    NEXT;
8053
74.7k
}
8054
8055
#ifdef LIBXML_SAX1_ENABLED
8056
/**
8057
 * Parse an attribute
8058
 *
8059
 * @deprecated Internal function, don't use.
8060
 *
8061
 *     [41] Attribute ::= Name Eq AttValue
8062
 *
8063
 * [ WFC: No External Entity References ]
8064
 * Attribute values cannot contain direct or indirect entity references
8065
 * to external entities.
8066
 *
8067
 * [ WFC: No < in Attribute Values ]
8068
 * The replacement text of any entity referred to directly or indirectly in
8069
 * an attribute value (other than "&lt;") must not contain a <.
8070
 *
8071
 * [ VC: Attribute Value Type ]
8072
 * The attribute must have been declared; the value must be of the type
8073
 * declared for it.
8074
 *
8075
 *     [25] Eq ::= S? '=' S?
8076
 *
8077
 * With namespace:
8078
 *
8079
 *     [NS 11] Attribute ::= QName Eq AttValue
8080
 *
8081
 * Also the case QName == xmlns:??? is handled independently as a namespace
8082
 * definition.
8083
 *
8084
 * @param ctxt  an XML parser context
8085
 * @param value  a xmlChar ** used to store the value of the attribute
8086
 * @returns the attribute name, and the value in *value.
8087
 */
8088
8089
const xmlChar *
8090
448k
xmlParseAttribute(xmlParserCtxt *ctxt, xmlChar **value) {
8091
448k
    const xmlChar *name;
8092
448k
    xmlChar *val;
8093
8094
448k
    *value = NULL;
8095
448k
    GROW;
8096
448k
    name = xmlParseName(ctxt);
8097
448k
    if (name == NULL) {
8098
125k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8099
125k
                 "error parsing attribute name\n");
8100
125k
        return(NULL);
8101
125k
    }
8102
8103
    /*
8104
     * read the value
8105
     */
8106
323k
    SKIP_BLANKS;
8107
323k
    if (RAW == '=') {
8108
277k
        NEXT;
8109
277k
  SKIP_BLANKS;
8110
277k
  val = xmlParseAttValue(ctxt);
8111
277k
    } else {
8112
45.4k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8113
45.4k
         "Specification mandates value for attribute %s\n", name);
8114
45.4k
  return(name);
8115
45.4k
    }
8116
8117
    /*
8118
     * Check that xml:lang conforms to the specification
8119
     * No more registered as an error, just generate a warning now
8120
     * since this was deprecated in XML second edition
8121
     */
8122
277k
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8123
30.4k
  if (!xmlCheckLanguageID(val)) {
8124
20.4k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8125
20.4k
              "Malformed value for xml:lang : %s\n",
8126
20.4k
        val, NULL);
8127
20.4k
  }
8128
30.4k
    }
8129
8130
    /*
8131
     * Check that xml:space conforms to the specification
8132
     */
8133
277k
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8134
4.21k
  if (xmlStrEqual(val, BAD_CAST "default"))
8135
1.79k
      *(ctxt->space) = 0;
8136
2.42k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8137
1.51k
      *(ctxt->space) = 1;
8138
911
  else {
8139
911
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8140
911
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8141
911
                                 val, NULL);
8142
911
  }
8143
4.21k
    }
8144
8145
277k
    *value = val;
8146
277k
    return(name);
8147
323k
}
8148
8149
/**
8150
 * Parse a start tag. Always consumes '<'.
8151
 *
8152
 * @deprecated Internal function, don't use.
8153
 *
8154
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8155
 *
8156
 * [ WFC: Unique Att Spec ]
8157
 * No attribute name may appear more than once in the same start-tag or
8158
 * empty-element tag.
8159
 *
8160
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8161
 *
8162
 * [ WFC: Unique Att Spec ]
8163
 * No attribute name may appear more than once in the same start-tag or
8164
 * empty-element tag.
8165
 *
8166
 * With namespace:
8167
 *
8168
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8169
 *
8170
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8171
 *
8172
 * @param ctxt  an XML parser context
8173
 * @returns the element name parsed
8174
 */
8175
8176
const xmlChar *
8177
1.31M
xmlParseStartTag(xmlParserCtxt *ctxt) {
8178
1.31M
    const xmlChar *name;
8179
1.31M
    const xmlChar *attname;
8180
1.31M
    xmlChar *attvalue;
8181
1.31M
    const xmlChar **atts = ctxt->atts;
8182
1.31M
    int nbatts = 0;
8183
1.31M
    int maxatts = ctxt->maxatts;
8184
1.31M
    int i;
8185
8186
1.31M
    if (RAW != '<') return(NULL);
8187
1.31M
    NEXT1;
8188
8189
1.31M
    name = xmlParseName(ctxt);
8190
1.31M
    if (name == NULL) {
8191
428k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8192
428k
       "xmlParseStartTag: invalid element name\n");
8193
428k
        return(NULL);
8194
428k
    }
8195
8196
    /*
8197
     * Now parse the attributes, it ends up with the ending
8198
     *
8199
     * (S Attribute)* S?
8200
     */
8201
883k
    SKIP_BLANKS;
8202
883k
    GROW;
8203
8204
1.04M
    while (((RAW != '>') &&
8205
574k
     ((RAW != '/') || (NXT(1) != '>')) &&
8206
472k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8207
448k
  attname = xmlParseAttribute(ctxt, &attvalue);
8208
448k
        if (attname == NULL)
8209
125k
      break;
8210
323k
        if (attvalue != NULL) {
8211
      /*
8212
       * [ WFC: Unique Att Spec ]
8213
       * No attribute name may appear more than once in the same
8214
       * start-tag or empty-element tag.
8215
       */
8216
721k
      for (i = 0; i < nbatts;i += 2) {
8217
478k
          if (xmlStrEqual(atts[i], attname)) {
8218
21.8k
        xmlErrAttributeDup(ctxt, NULL, attname);
8219
21.8k
        goto failed;
8220
21.8k
    }
8221
478k
      }
8222
      /*
8223
       * Add the pair to atts
8224
       */
8225
243k
      if (nbatts + 4 > maxatts) {
8226
41.4k
          const xmlChar **n;
8227
41.4k
                int newSize;
8228
8229
41.4k
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8230
41.4k
                                          11, XML_MAX_ATTRS);
8231
41.4k
                if (newSize < 0) {
8232
0
        xmlErrMemory(ctxt);
8233
0
        goto failed;
8234
0
    }
8235
41.4k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8236
41.4k
                if (newSize < 2)
8237
30.5k
                    newSize = 2;
8238
41.4k
#endif
8239
41.4k
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8240
41.4k
    if (n == NULL) {
8241
28
        xmlErrMemory(ctxt);
8242
28
        goto failed;
8243
28
    }
8244
41.3k
    atts = n;
8245
41.3k
                maxatts = newSize * 2;
8246
41.3k
    ctxt->atts = atts;
8247
41.3k
    ctxt->maxatts = maxatts;
8248
41.3k
      }
8249
8250
243k
      atts[nbatts++] = attname;
8251
243k
      atts[nbatts++] = attvalue;
8252
243k
      atts[nbatts] = NULL;
8253
243k
      atts[nbatts + 1] = NULL;
8254
8255
243k
            attvalue = NULL;
8256
243k
  }
8257
8258
323k
failed:
8259
8260
323k
        if (attvalue != NULL)
8261
21.8k
            xmlFree(attvalue);
8262
8263
323k
  GROW
8264
323k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8265
158k
      break;
8266
165k
  if (SKIP_BLANKS == 0) {
8267
83.8k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8268
83.8k
         "attributes construct error\n");
8269
83.8k
  }
8270
165k
  SHRINK;
8271
165k
        GROW;
8272
165k
    }
8273
8274
    /*
8275
     * SAX: Start of Element !
8276
     */
8277
883k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8278
883k
  (!ctxt->disableSAX)) {
8279
783k
  if (nbatts > 0)
8280
184k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8281
599k
  else
8282
599k
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8283
783k
    }
8284
8285
883k
    if (atts != NULL) {
8286
        /* Free only the content strings */
8287
679k
        for (i = 1;i < nbatts;i+=2)
8288
243k
      if (atts[i] != NULL)
8289
243k
         xmlFree((xmlChar *) atts[i]);
8290
436k
    }
8291
883k
    return(name);
8292
883k
}
8293
8294
/**
8295
 * Parse an end tag. Always consumes '</'.
8296
 *
8297
 *     [42] ETag ::= '</' Name S? '>'
8298
 *
8299
 * With namespace
8300
 *
8301
 *     [NS 9] ETag ::= '</' QName S? '>'
8302
 * @param ctxt  an XML parser context
8303
 * @param line  line of the start tag
8304
 */
8305
8306
static void
8307
146k
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8308
146k
    const xmlChar *name;
8309
8310
146k
    GROW;
8311
146k
    if ((RAW != '<') || (NXT(1) != '/')) {
8312
2.54k
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8313
2.54k
           "xmlParseEndTag: '</' not found\n");
8314
2.54k
  return;
8315
2.54k
    }
8316
143k
    SKIP(2);
8317
8318
143k
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8319
8320
    /*
8321
     * We should definitely be at the ending "S? '>'" part
8322
     */
8323
143k
    GROW;
8324
143k
    SKIP_BLANKS;
8325
143k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8326
16.6k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8327
16.6k
    } else
8328
127k
  NEXT1;
8329
8330
    /*
8331
     * [ WFC: Element Type Match ]
8332
     * The Name in an element's end-tag must match the element type in the
8333
     * start-tag.
8334
     *
8335
     */
8336
143k
    if (name != (xmlChar*)1) {
8337
15.5k
        if (name == NULL) name = BAD_CAST "unparsable";
8338
15.5k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8339
15.5k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8340
15.5k
                    ctxt->name, line, name);
8341
15.5k
    }
8342
8343
    /*
8344
     * SAX: End of Tag
8345
     */
8346
143k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8347
143k
  (!ctxt->disableSAX))
8348
136k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8349
8350
143k
    namePop(ctxt);
8351
143k
    spacePop(ctxt);
8352
143k
}
8353
8354
/**
8355
 * Parse an end of tag
8356
 *
8357
 * @deprecated Internal function, don't use.
8358
 *
8359
 *     [42] ETag ::= '</' Name S? '>'
8360
 *
8361
 * With namespace
8362
 *
8363
 *     [NS 9] ETag ::= '</' QName S? '>'
8364
 * @param ctxt  an XML parser context
8365
 */
8366
8367
void
8368
0
xmlParseEndTag(xmlParserCtxt *ctxt) {
8369
0
    xmlParseEndTag1(ctxt, 0);
8370
0
}
8371
#endif /* LIBXML_SAX1_ENABLED */
8372
8373
/************************************************************************
8374
 *                  *
8375
 *          SAX 2 specific operations       *
8376
 *                  *
8377
 ************************************************************************/
8378
8379
/**
8380
 * Parse an XML Namespace QName
8381
 *
8382
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8383
 *     [7]  Prefix  ::= NCName
8384
 *     [8]  LocalPart  ::= NCName
8385
 *
8386
 * @param ctxt  an XML parser context
8387
 * @param prefix  pointer to store the prefix part
8388
 * @returns the Name parsed or NULL
8389
 */
8390
8391
static xmlHashedString
8392
9.50M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8393
9.50M
    xmlHashedString l, p;
8394
9.50M
    int start, isNCName = 0;
8395
8396
9.50M
    l.name = NULL;
8397
9.50M
    p.name = NULL;
8398
8399
9.50M
    GROW;
8400
9.50M
    start = CUR_PTR - BASE_PTR;
8401
8402
9.50M
    l = xmlParseNCName(ctxt);
8403
9.50M
    if (l.name != NULL) {
8404
7.90M
        isNCName = 1;
8405
7.90M
        if (CUR == ':') {
8406
2.68M
            NEXT;
8407
2.68M
            p = l;
8408
2.68M
            l = xmlParseNCName(ctxt);
8409
2.68M
        }
8410
7.90M
    }
8411
9.50M
    if ((l.name == NULL) || (CUR == ':')) {
8412
1.68M
        xmlChar *tmp;
8413
8414
1.68M
        l.name = NULL;
8415
1.68M
        p.name = NULL;
8416
1.68M
        if ((isNCName == 0) && (CUR != ':'))
8417
1.51M
            return(l);
8418
168k
        tmp = xmlParseNmtoken(ctxt);
8419
168k
        if (tmp != NULL)
8420
114k
            xmlFree(tmp);
8421
168k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8422
168k
                                CUR_PTR - (BASE_PTR + start));
8423
168k
        if (l.name == NULL) {
8424
13
            xmlErrMemory(ctxt);
8425
13
            return(l);
8426
13
        }
8427
168k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8428
168k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8429
168k
    }
8430
8431
7.99M
    *prefix = p;
8432
7.99M
    return(l);
8433
9.50M
}
8434
8435
/**
8436
 * Parse an XML Namespace QName
8437
 *
8438
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8439
 *     [7]  Prefix  ::= NCName
8440
 *     [8]  LocalPart  ::= NCName
8441
 *
8442
 * @param ctxt  an XML parser context
8443
 * @param prefix  pointer to store the prefix part
8444
 * @returns the Name parsed or NULL
8445
 */
8446
8447
static const xmlChar *
8448
35.9k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8449
35.9k
    xmlHashedString n, p;
8450
8451
35.9k
    n = xmlParseQNameHashed(ctxt, &p);
8452
35.9k
    if (n.name == NULL)
8453
8.17k
        return(NULL);
8454
27.8k
    *prefix = p.name;
8455
27.8k
    return(n.name);
8456
35.9k
}
8457
8458
/**
8459
 * Parse an XML name and compares for match
8460
 * (specialized for endtag parsing)
8461
 *
8462
 * @param ctxt  an XML parser context
8463
 * @param name  the localname
8464
 * @param prefix  the prefix, if any.
8465
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
8466
 * and the name for mismatch
8467
 */
8468
8469
static const xmlChar *
8470
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8471
248k
                        xmlChar const *prefix) {
8472
248k
    const xmlChar *cmp;
8473
248k
    const xmlChar *in;
8474
248k
    const xmlChar *ret;
8475
248k
    const xmlChar *prefix2;
8476
8477
248k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8478
8479
248k
    GROW;
8480
248k
    in = ctxt->input->cur;
8481
8482
248k
    cmp = prefix;
8483
803k
    while (*in != 0 && *in == *cmp) {
8484
555k
  ++in;
8485
555k
  ++cmp;
8486
555k
    }
8487
248k
    if ((*cmp == 0) && (*in == ':')) {
8488
223k
        in++;
8489
223k
  cmp = name;
8490
1.84M
  while (*in != 0 && *in == *cmp) {
8491
1.62M
      ++in;
8492
1.62M
      ++cmp;
8493
1.62M
  }
8494
223k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8495
      /* success */
8496
212k
            ctxt->input->col += in - ctxt->input->cur;
8497
212k
      ctxt->input->cur = in;
8498
212k
      return((const xmlChar*) 1);
8499
212k
  }
8500
223k
    }
8501
    /*
8502
     * all strings coms from the dictionary, equality can be done directly
8503
     */
8504
35.9k
    ret = xmlParseQName (ctxt, &prefix2);
8505
35.9k
    if (ret == NULL)
8506
8.17k
        return(NULL);
8507
27.8k
    if ((ret == name) && (prefix == prefix2))
8508
4.35k
  return((const xmlChar*) 1);
8509
23.4k
    return ret;
8510
27.8k
}
8511
8512
/**
8513
 * Parse an attribute in the new SAX2 framework.
8514
 *
8515
 * @param ctxt  an XML parser context
8516
 * @param pref  the element prefix
8517
 * @param elem  the element name
8518
 * @param hprefix  resulting attribute prefix
8519
 * @param value  resulting value of the attribute
8520
 * @param len  resulting length of the attribute
8521
 * @param alloc  resulting indicator if the attribute was allocated
8522
 * @returns the attribute name, and the value in *value, .
8523
 */
8524
8525
static xmlHashedString
8526
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8527
                   const xmlChar * pref, const xmlChar * elem,
8528
                   xmlHashedString * hprefix, xmlChar ** value,
8529
                   int *len, int *alloc)
8530
4.50M
{
8531
4.50M
    xmlHashedString hname;
8532
4.50M
    const xmlChar *prefix, *name;
8533
4.50M
    xmlChar *val = NULL, *internal_val = NULL;
8534
4.50M
    int special = 0;
8535
4.50M
    int isNamespace;
8536
4.50M
    int flags;
8537
8538
4.50M
    *value = NULL;
8539
4.50M
    GROW;
8540
4.50M
    hname = xmlParseQNameHashed(ctxt, hprefix);
8541
4.50M
    if (hname.name == NULL) {
8542
1.06M
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8543
1.06M
                       "error parsing attribute name\n");
8544
1.06M
        return(hname);
8545
1.06M
    }
8546
3.43M
    name = hname.name;
8547
3.43M
    prefix = hprefix->name;
8548
8549
    /*
8550
     * get the type if needed
8551
     */
8552
3.43M
    if (ctxt->attsSpecial != NULL) {
8553
337k
        special = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8554
337k
                                              prefix, name));
8555
337k
    }
8556
8557
    /*
8558
     * read the value
8559
     */
8560
3.43M
    SKIP_BLANKS;
8561
3.43M
    if (RAW != '=') {
8562
138k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8563
138k
                          "Specification mandates value for attribute %s\n",
8564
138k
                          name);
8565
138k
        goto error;
8566
138k
    }
8567
8568
8569
3.30M
    NEXT;
8570
3.30M
    SKIP_BLANKS;
8571
3.30M
    flags = 0;
8572
3.30M
    isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8573
3.04M
                   (prefix == ctxt->str_xmlns));
8574
3.30M
    val = xmlParseAttValueInternal(ctxt, len, &flags, special,
8575
3.30M
                                   isNamespace);
8576
3.30M
    if (val == NULL)
8577
43.0k
        goto error;
8578
8579
3.25M
    *alloc = (flags & XML_ATTVAL_ALLOC) != 0;
8580
8581
3.25M
#ifdef LIBXML_VALID_ENABLED
8582
3.25M
    if ((ctxt->validate) &&
8583
194k
        (ctxt->standalone == 1) &&
8584
8.57k
        (special & XML_SPECIAL_EXTERNAL) &&
8585
542
        (flags & XML_ATTVAL_NORM_CHANGE)) {
8586
229
        xmlValidityError(ctxt, XML_DTD_NOT_STANDALONE,
8587
229
                         "standalone: normalization of attribute %s on %s "
8588
229
                         "by external subset declaration\n",
8589
229
                         name, elem);
8590
229
    }
8591
3.25M
#endif
8592
8593
3.25M
    if (prefix == ctxt->str_xml) {
8594
        /*
8595
         * Check that xml:lang conforms to the specification
8596
         * No more registered as an error, just generate a warning now
8597
         * since this was deprecated in XML second edition
8598
         */
8599
167k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8600
80.8k
            internal_val = xmlStrndup(val, *len);
8601
80.8k
            if (internal_val == NULL)
8602
26
                goto mem_error;
8603
80.8k
            if (!xmlCheckLanguageID(internal_val)) {
8604
67.0k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8605
67.0k
                              "Malformed value for xml:lang : %s\n",
8606
67.0k
                              internal_val, NULL);
8607
67.0k
            }
8608
80.8k
        }
8609
8610
        /*
8611
         * Check that xml:space conforms to the specification
8612
         */
8613
166k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8614
10.5k
            internal_val = xmlStrndup(val, *len);
8615
10.5k
            if (internal_val == NULL)
8616
20
                goto mem_error;
8617
10.5k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8618
4.56k
                *(ctxt->space) = 0;
8619
5.93k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8620
2.47k
                *(ctxt->space) = 1;
8621
3.45k
            else {
8622
3.45k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8623
3.45k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8624
3.45k
                              internal_val, NULL);
8625
3.45k
            }
8626
10.5k
        }
8627
166k
        if (internal_val) {
8628
91.3k
            xmlFree(internal_val);
8629
91.3k
        }
8630
166k
    }
8631
8632
3.25M
    *value = val;
8633
3.25M
    return (hname);
8634
8635
46
mem_error:
8636
46
    xmlErrMemory(ctxt);
8637
181k
error:
8638
181k
    if ((val != NULL) && (*alloc != 0))
8639
18
        xmlFree(val);
8640
181k
    return(hname);
8641
46
}
8642
8643
/**
8644
 * Inserts a new attribute into the hash table.
8645
 *
8646
 * @param ctxt  parser context
8647
 * @param size  size of the hash table
8648
 * @param name  attribute name
8649
 * @param uri  namespace uri
8650
 * @param hashValue  combined hash value of name and uri
8651
 * @param aindex  attribute index (this is a multiple of 5)
8652
 * @returns INT_MAX if no existing attribute was found, the attribute
8653
 * index if an attribute was found, -1 if a memory allocation failed.
8654
 */
8655
static int
8656
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8657
1.66M
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8658
1.66M
    xmlAttrHashBucket *table = ctxt->attrHash;
8659
1.66M
    xmlAttrHashBucket *bucket;
8660
1.66M
    unsigned hindex;
8661
8662
1.66M
    hindex = hashValue & (size - 1);
8663
1.66M
    bucket = &table[hindex];
8664
8665
1.94M
    while (bucket->index >= 0) {
8666
512k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8667
8668
512k
        if (name == atts[0]) {
8669
243k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8670
8671
243k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8672
243k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8673
97.4k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8674
229k
                return(bucket->index);
8675
243k
        }
8676
8677
282k
        hindex++;
8678
282k
        bucket++;
8679
282k
        if (hindex >= size) {
8680
35.2k
            hindex = 0;
8681
35.2k
            bucket = table;
8682
35.2k
        }
8683
282k
    }
8684
8685
1.43M
    bucket->index = aindex;
8686
8687
1.43M
    return(INT_MAX);
8688
1.66M
}
8689
8690
static int
8691
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8692
                       const xmlChar *name, const xmlChar *prefix,
8693
25.5k
                       unsigned hashValue, int aindex) {
8694
25.5k
    xmlAttrHashBucket *table = ctxt->attrHash;
8695
25.5k
    xmlAttrHashBucket *bucket;
8696
25.5k
    unsigned hindex;
8697
8698
25.5k
    hindex = hashValue & (size - 1);
8699
25.5k
    bucket = &table[hindex];
8700
8701
39.4k
    while (bucket->index >= 0) {
8702
23.2k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8703
8704
23.2k
        if ((name == atts[0]) && (prefix == atts[1]))
8705
9.34k
            return(bucket->index);
8706
8707
13.8k
        hindex++;
8708
13.8k
        bucket++;
8709
13.8k
        if (hindex >= size) {
8710
2.32k
            hindex = 0;
8711
2.32k
            bucket = table;
8712
2.32k
        }
8713
13.8k
    }
8714
8715
16.2k
    bucket->index = aindex;
8716
8717
16.2k
    return(INT_MAX);
8718
25.5k
}
8719
/**
8720
 * Parse a start tag. Always consumes '<'.
8721
 *
8722
 * This routine is called when running SAX2 parsing
8723
 *
8724
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8725
 *
8726
 * [ WFC: Unique Att Spec ]
8727
 * No attribute name may appear more than once in the same start-tag or
8728
 * empty-element tag.
8729
 *
8730
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8731
 *
8732
 * [ WFC: Unique Att Spec ]
8733
 * No attribute name may appear more than once in the same start-tag or
8734
 * empty-element tag.
8735
 *
8736
 * With namespace:
8737
 *
8738
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8739
 *
8740
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8741
 *
8742
 * @param ctxt  an XML parser context
8743
 * @param pref  resulting namespace prefix
8744
 * @param URI  resulting namespace URI
8745
 * @param nbNsPtr  resulting number of namespace declarations
8746
 * @returns the element name parsed
8747
 */
8748
8749
static const xmlChar *
8750
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8751
4.96M
                  const xmlChar **URI, int *nbNsPtr) {
8752
4.96M
    xmlHashedString hlocalname;
8753
4.96M
    xmlHashedString hprefix;
8754
4.96M
    xmlHashedString hattname;
8755
4.96M
    xmlHashedString haprefix;
8756
4.96M
    const xmlChar *localname;
8757
4.96M
    const xmlChar *prefix;
8758
4.96M
    const xmlChar *attname;
8759
4.96M
    const xmlChar *aprefix;
8760
4.96M
    const xmlChar *uri;
8761
4.96M
    xmlChar *attvalue = NULL;
8762
4.96M
    const xmlChar **atts = ctxt->atts;
8763
4.96M
    unsigned attrHashSize = 0;
8764
4.96M
    int maxatts = ctxt->maxatts;
8765
4.96M
    int nratts, nbatts, nbdef;
8766
4.96M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8767
4.96M
    int alloc = 0;
8768
4.96M
    int numNsErr = 0;
8769
4.96M
    int numDupErr = 0;
8770
8771
4.96M
    if (RAW != '<') return(NULL);
8772
4.96M
    NEXT1;
8773
8774
4.96M
    nbatts = 0;
8775
4.96M
    nratts = 0;
8776
4.96M
    nbdef = 0;
8777
4.96M
    nbNs = 0;
8778
4.96M
    nbTotalDef = 0;
8779
4.96M
    attval = 0;
8780
8781
4.96M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8782
0
        xmlErrMemory(ctxt);
8783
0
        return(NULL);
8784
0
    }
8785
8786
4.96M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8787
4.96M
    if (hlocalname.name == NULL) {
8788
440k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8789
440k
           "StartTag: invalid element name\n");
8790
440k
        return(NULL);
8791
440k
    }
8792
4.52M
    localname = hlocalname.name;
8793
4.52M
    prefix = hprefix.name;
8794
8795
    /*
8796
     * Now parse the attributes, it ends up with the ending
8797
     *
8798
     * (S Attribute)* S?
8799
     */
8800
4.52M
    SKIP_BLANKS;
8801
4.52M
    GROW;
8802
8803
    /*
8804
     * The ctxt->atts array will be ultimately passed to the SAX callback
8805
     * containing five xmlChar pointers for each attribute:
8806
     *
8807
     * [0] attribute name
8808
     * [1] attribute prefix
8809
     * [2] namespace URI
8810
     * [3] attribute value
8811
     * [4] end of attribute value
8812
     *
8813
     * To save memory, we reuse this array temporarily and store integers
8814
     * in these pointer variables.
8815
     *
8816
     * [0] attribute name
8817
     * [1] attribute prefix
8818
     * [2] hash value of attribute prefix, and later namespace index
8819
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
8820
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
8821
     *
8822
     * The ctxt->attallocs array contains an additional unsigned int for
8823
     * each attribute, containing the hash value of the attribute name
8824
     * and the alloc flag in bit 31.
8825
     */
8826
8827
6.64M
    while (((RAW != '>') &&
8828
5.03M
     ((RAW != '/') || (NXT(1) != '>')) &&
8829
4.58M
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8830
4.50M
  int len = -1;
8831
8832
4.50M
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
8833
4.50M
                                          &haprefix, &attvalue, &len,
8834
4.50M
                                          &alloc);
8835
4.50M
        if (hattname.name == NULL)
8836
1.06M
      break;
8837
3.43M
        if (attvalue == NULL)
8838
181k
            goto next_attr;
8839
3.25M
        attname = hattname.name;
8840
3.25M
        aprefix = haprefix.name;
8841
3.25M
  if (len < 0) len = xmlStrlen(attvalue);
8842
8843
3.25M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8844
250k
            xmlHashedString huri;
8845
250k
            xmlURIPtr parsedUri;
8846
8847
250k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8848
250k
            uri = huri.name;
8849
250k
            if (uri == NULL) {
8850
18
                xmlErrMemory(ctxt);
8851
18
                goto next_attr;
8852
18
            }
8853
250k
            if (*uri != 0) {
8854
230k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8855
227
                    xmlErrMemory(ctxt);
8856
227
                    goto next_attr;
8857
227
                }
8858
230k
                if (parsedUri == NULL) {
8859
129k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8860
129k
                             "xmlns: '%s' is not a valid URI\n",
8861
129k
                                       uri, NULL, NULL);
8862
129k
                } else {
8863
100k
                    if (parsedUri->scheme == NULL) {
8864
57.8k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8865
57.8k
                                  "xmlns: URI %s is not absolute\n",
8866
57.8k
                                  uri, NULL, NULL);
8867
57.8k
                    }
8868
100k
                    xmlFreeURI(parsedUri);
8869
100k
                }
8870
230k
                if (uri == ctxt->str_xml_ns) {
8871
1.25k
                    if (attname != ctxt->str_xml) {
8872
1.25k
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8873
1.25k
                     "xml namespace URI cannot be the default namespace\n",
8874
1.25k
                                 NULL, NULL, NULL);
8875
1.25k
                    }
8876
1.25k
                    goto next_attr;
8877
1.25k
                }
8878
229k
                if ((len == 29) &&
8879
5.56k
                    (xmlStrEqual(uri,
8880
5.56k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8881
1.48k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8882
1.48k
                         "reuse of the xmlns namespace name is forbidden\n",
8883
1.48k
                             NULL, NULL, NULL);
8884
1.48k
                    goto next_attr;
8885
1.48k
                }
8886
229k
            }
8887
8888
247k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
8889
191k
                nbNs++;
8890
3.00M
        } else if (aprefix == ctxt->str_xmlns) {
8891
341k
            xmlHashedString huri;
8892
341k
            xmlURIPtr parsedUri;
8893
8894
341k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8895
341k
            uri = huri.name;
8896
341k
            if (uri == NULL) {
8897
13
                xmlErrMemory(ctxt);
8898
13
                goto next_attr;
8899
13
            }
8900
8901
341k
            if (attname == ctxt->str_xml) {
8902
3.40k
                if (uri != ctxt->str_xml_ns) {
8903
2.27k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8904
2.27k
                             "xml namespace prefix mapped to wrong URI\n",
8905
2.27k
                             NULL, NULL, NULL);
8906
2.27k
                }
8907
                /*
8908
                 * Do not keep a namespace definition node
8909
                 */
8910
3.40k
                goto next_attr;
8911
3.40k
            }
8912
338k
            if (uri == ctxt->str_xml_ns) {
8913
1.27k
                if (attname != ctxt->str_xml) {
8914
1.27k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8915
1.27k
                             "xml namespace URI mapped to wrong prefix\n",
8916
1.27k
                             NULL, NULL, NULL);
8917
1.27k
                }
8918
1.27k
                goto next_attr;
8919
1.27k
            }
8920
337k
            if (attname == ctxt->str_xmlns) {
8921
1.62k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8922
1.62k
                         "redefinition of the xmlns prefix is forbidden\n",
8923
1.62k
                         NULL, NULL, NULL);
8924
1.62k
                goto next_attr;
8925
1.62k
            }
8926
335k
            if ((len == 29) &&
8927
3.38k
                (xmlStrEqual(uri,
8928
3.38k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8929
1.39k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8930
1.39k
                         "reuse of the xmlns namespace name is forbidden\n",
8931
1.39k
                         NULL, NULL, NULL);
8932
1.39k
                goto next_attr;
8933
1.39k
            }
8934
333k
            if ((uri == NULL) || (uri[0] == 0)) {
8935
4.64k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8936
4.64k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
8937
4.64k
                              attname, NULL, NULL);
8938
4.64k
                goto next_attr;
8939
329k
            } else {
8940
329k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8941
103
                    xmlErrMemory(ctxt);
8942
103
                    goto next_attr;
8943
103
                }
8944
329k
                if (parsedUri == NULL) {
8945
112k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8946
112k
                         "xmlns:%s: '%s' is not a valid URI\n",
8947
112k
                                       attname, uri, NULL);
8948
216k
                } else {
8949
216k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
8950
46.1k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8951
46.1k
                                  "xmlns:%s: URI %s is not absolute\n",
8952
46.1k
                                  attname, uri, NULL);
8953
46.1k
                    }
8954
216k
                    xmlFreeURI(parsedUri);
8955
216k
                }
8956
329k
            }
8957
8958
329k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
8959
308k
                nbNs++;
8960
2.66M
        } else {
8961
            /*
8962
             * Populate attributes array, see above for repurposing
8963
             * of xmlChar pointers.
8964
             */
8965
2.66M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8966
134k
                int res = xmlCtxtGrowAttrs(ctxt);
8967
8968
134k
                maxatts = ctxt->maxatts;
8969
134k
                atts = ctxt->atts;
8970
8971
134k
                if (res < 0)
8972
142
                    goto next_attr;
8973
134k
            }
8974
2.66M
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
8975
2.66M
                                        ((unsigned) alloc << 31);
8976
2.66M
            atts[nbatts++] = attname;
8977
2.66M
            atts[nbatts++] = aprefix;
8978
2.66M
            atts[nbatts++] = XML_INT_TO_PTR(haprefix.hashValue);
8979
2.66M
            if (alloc) {
8980
225k
                atts[nbatts++] = attvalue;
8981
225k
                attvalue += len;
8982
225k
                atts[nbatts++] = attvalue;
8983
2.43M
            } else {
8984
                /*
8985
                 * attvalue points into the input buffer which can be
8986
                 * reallocated. Store differences to input->base instead.
8987
                 * The pointers will be reconstructed later.
8988
                 */
8989
2.43M
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8990
2.43M
                attvalue += len;
8991
2.43M
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8992
2.43M
            }
8993
            /*
8994
             * tag if some deallocation is needed
8995
             */
8996
2.66M
            if (alloc != 0) attval = 1;
8997
2.66M
            attvalue = NULL; /* moved into atts */
8998
2.66M
        }
8999
9000
3.43M
next_attr:
9001
3.43M
        if ((attvalue != NULL) && (alloc != 0)) {
9002
125k
            xmlFree(attvalue);
9003
125k
            attvalue = NULL;
9004
125k
        }
9005
9006
3.43M
  GROW
9007
3.43M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9008
960k
      break;
9009
2.47M
  if (SKIP_BLANKS == 0) {
9010
365k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9011
365k
         "attributes construct error\n");
9012
365k
      break;
9013
365k
  }
9014
2.11M
        GROW;
9015
2.11M
    }
9016
9017
    /*
9018
     * Namespaces from default attributes
9019
     */
9020
4.52M
    if (ctxt->attsDefault != NULL) {
9021
849k
        xmlDefAttrsPtr defaults;
9022
9023
849k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9024
849k
  if (defaults != NULL) {
9025
2.98M
      for (i = 0; i < defaults->nbAttrs; i++) {
9026
2.32M
                xmlDefAttr *attr = &defaults->attrs[i];
9027
9028
2.32M
          attname = attr->name.name;
9029
2.32M
    aprefix = attr->prefix.name;
9030
9031
2.32M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9032
72.4k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9033
9034
72.4k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9035
67.8k
                        nbNs++;
9036
2.24M
    } else if (aprefix == ctxt->str_xmlns) {
9037
1.23M
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9038
9039
1.23M
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9040
1.23M
                                      NULL, 1) > 0)
9041
1.22M
                        nbNs++;
9042
1.23M
    } else {
9043
1.01M
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9044
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9045
0
                                    "Maximum number of attributes exceeded");
9046
0
                        break;
9047
0
                    }
9048
1.01M
                    nbTotalDef += 1;
9049
1.01M
                }
9050
2.32M
      }
9051
658k
  }
9052
849k
    }
9053
9054
    /*
9055
     * Resolve attribute namespaces
9056
     */
9057
7.19M
    for (i = 0; i < nbatts; i += 5) {
9058
2.66M
        attname = atts[i];
9059
2.66M
        aprefix = atts[i+1];
9060
9061
        /*
9062
  * The default namespace does not apply to attribute names.
9063
  */
9064
2.66M
  if (aprefix == NULL) {
9065
1.31M
            nsIndex = NS_INDEX_EMPTY;
9066
1.34M
        } else if (aprefix == ctxt->str_xml) {
9067
166k
            nsIndex = NS_INDEX_XML;
9068
1.17M
        } else {
9069
1.17M
            haprefix.name = aprefix;
9070
1.17M
            haprefix.hashValue = (size_t) atts[i+2];
9071
1.17M
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9072
9073
1.17M
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9074
1.09M
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9075
1.09M
        "Namespace prefix %s for %s on %s is not defined\n",
9076
1.09M
        aprefix, attname, localname);
9077
1.09M
                nsIndex = NS_INDEX_EMPTY;
9078
1.09M
            }
9079
1.17M
        }
9080
9081
2.66M
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9082
2.66M
    }
9083
9084
    /*
9085
     * Maximum number of attributes including default attributes.
9086
     */
9087
4.52M
    maxAtts = nratts + nbTotalDef;
9088
9089
    /*
9090
     * Verify that attribute names are unique.
9091
     */
9092
4.52M
    if (maxAtts > 1) {
9093
398k
        attrHashSize = 4;
9094
603k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9095
205k
            attrHashSize *= 2;
9096
9097
398k
        if (attrHashSize > ctxt->attrHashMax) {
9098
41.7k
            xmlAttrHashBucket *tmp;
9099
9100
41.7k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9101
41.7k
            if (tmp == NULL) {
9102
30
                xmlErrMemory(ctxt);
9103
30
                goto done;
9104
30
            }
9105
9106
41.7k
            ctxt->attrHash = tmp;
9107
41.7k
            ctxt->attrHashMax = attrHashSize;
9108
41.7k
        }
9109
9110
398k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9111
9112
2.32M
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9113
1.93M
            const xmlChar *nsuri;
9114
1.93M
            unsigned hashValue, nameHashValue, uriHashValue;
9115
1.93M
            int res;
9116
9117
1.93M
            attname = atts[i];
9118
1.93M
            aprefix = atts[i+1];
9119
1.93M
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9120
            /* Hash values always have bit 31 set, see dict.c */
9121
1.93M
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9122
9123
1.93M
            if (nsIndex == NS_INDEX_EMPTY) {
9124
                /*
9125
                 * Prefix with empty namespace means an undeclared
9126
                 * prefix which was already reported above.
9127
                 */
9128
1.75M
                if (aprefix != NULL)
9129
1.07M
                    continue;
9130
678k
                nsuri = NULL;
9131
678k
                uriHashValue = URI_HASH_EMPTY;
9132
678k
            } else if (nsIndex == NS_INDEX_XML) {
9133
100k
                nsuri = ctxt->str_xml_ns;
9134
100k
                uriHashValue = URI_HASH_XML;
9135
100k
            } else {
9136
73.3k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9137
73.3k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9138
73.3k
            }
9139
9140
852k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9141
852k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9142
852k
                                    hashValue, i);
9143
852k
            if (res < 0)
9144
0
                continue;
9145
9146
            /*
9147
             * [ WFC: Unique Att Spec ]
9148
             * No attribute name may appear more than once in the same
9149
             * start-tag or empty-element tag.
9150
             * As extended by the Namespace in XML REC.
9151
             */
9152
852k
            if (res < INT_MAX) {
9153
190k
                if (aprefix == atts[res+1]) {
9154
176k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9155
176k
                    numDupErr += 1;
9156
176k
                } else {
9157
13.7k
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9158
13.7k
                             "Namespaced Attribute %s in '%s' redefined\n",
9159
13.7k
                             attname, nsuri, NULL);
9160
13.7k
                    numNsErr += 1;
9161
13.7k
                }
9162
190k
            }
9163
852k
        }
9164
398k
    }
9165
9166
    /*
9167
     * Default attributes
9168
     */
9169
4.52M
    if (ctxt->attsDefault != NULL) {
9170
849k
        xmlDefAttrsPtr defaults;
9171
9172
849k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9173
849k
  if (defaults != NULL) {
9174
2.98M
      for (i = 0; i < defaults->nbAttrs; i++) {
9175
2.32M
                xmlDefAttr *attr = &defaults->attrs[i];
9176
2.32M
                const xmlChar *nsuri = NULL;
9177
2.32M
                unsigned hashValue, uriHashValue = 0;
9178
2.32M
                int res;
9179
9180
2.32M
          attname = attr->name.name;
9181
2.32M
    aprefix = attr->prefix.name;
9182
9183
2.32M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9184
72.4k
                    continue;
9185
2.24M
    if (aprefix == ctxt->str_xmlns)
9186
1.23M
                    continue;
9187
9188
1.01M
                if (aprefix == NULL) {
9189
694k
                    nsIndex = NS_INDEX_EMPTY;
9190
694k
                    nsuri = NULL;
9191
694k
                    uriHashValue = URI_HASH_EMPTY;
9192
694k
                } else if (aprefix == ctxt->str_xml) {
9193
95.6k
                    nsIndex = NS_INDEX_XML;
9194
95.6k
                    nsuri = ctxt->str_xml_ns;
9195
95.6k
                    uriHashValue = URI_HASH_XML;
9196
227k
                } else {
9197
227k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9198
227k
                    if ((nsIndex == INT_MAX) ||
9199
188k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9200
188k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9201
188k
                                 "Namespace prefix %s for %s on %s is not "
9202
188k
                                 "defined\n",
9203
188k
                                 aprefix, attname, localname);
9204
188k
                        nsIndex = NS_INDEX_EMPTY;
9205
188k
                        nsuri = NULL;
9206
188k
                        uriHashValue = URI_HASH_EMPTY;
9207
188k
                    } else {
9208
39.0k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9209
39.0k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9210
39.0k
                    }
9211
227k
                }
9212
9213
                /*
9214
                 * Check whether the attribute exists
9215
                 */
9216
1.01M
                if (maxAtts > 1) {
9217
808k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9218
808k
                                                   uriHashValue);
9219
808k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9220
808k
                                            hashValue, nbatts);
9221
808k
                    if (res < 0)
9222
0
                        continue;
9223
808k
                    if (res < INT_MAX) {
9224
39.3k
                        if (aprefix == atts[res+1])
9225
18.1k
                            continue;
9226
21.2k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9227
21.2k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9228
21.2k
                                 attname, nsuri, NULL);
9229
21.2k
                    }
9230
808k
                }
9231
9232
998k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9233
9234
998k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9235
19.2k
                    res = xmlCtxtGrowAttrs(ctxt);
9236
9237
19.2k
                    maxatts = ctxt->maxatts;
9238
19.2k
                    atts = ctxt->atts;
9239
9240
19.2k
                    if (res < 0) {
9241
46
                        localname = NULL;
9242
46
                        goto done;
9243
46
                    }
9244
19.2k
                }
9245
9246
998k
                atts[nbatts++] = attname;
9247
998k
                atts[nbatts++] = aprefix;
9248
998k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9249
998k
                atts[nbatts++] = attr->value.name;
9250
998k
                atts[nbatts++] = attr->valueEnd;
9251
9252
998k
#ifdef LIBXML_VALID_ENABLED
9253
                /*
9254
                 * This should be moved to valid.c, but we don't keep track
9255
                 * whether an attribute was defaulted.
9256
                 */
9257
998k
                if ((ctxt->validate) &&
9258
241k
                    (ctxt->standalone == 1) &&
9259
9.68k
                    (attr->external != 0)) {
9260
1.24k
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9261
1.24k
                            "standalone: attribute %s on %s defaulted "
9262
1.24k
                            "from external subset\n",
9263
1.24k
                            attname, localname);
9264
1.24k
                }
9265
998k
#endif
9266
998k
                nbdef++;
9267
998k
      }
9268
658k
  }
9269
849k
    }
9270
9271
    /*
9272
     * Using a single hash table for nsUri/localName pairs cannot
9273
     * detect duplicate QNames reliably. The following example will
9274
     * only result in two namespace errors.
9275
     *
9276
     * <doc xmlns:a="a" xmlns:b="a">
9277
     *   <elem a:a="" b:a="" b:a=""/>
9278
     * </doc>
9279
     *
9280
     * If we saw more than one namespace error but no duplicate QNames
9281
     * were found, we have to scan for duplicate QNames.
9282
     */
9283
4.52M
    if ((numDupErr == 0) && (numNsErr > 1)) {
9284
4.54k
        memset(ctxt->attrHash, -1,
9285
4.54k
               attrHashSize * sizeof(ctxt->attrHash[0]));
9286
9287
32.4k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9288
27.9k
            unsigned hashValue, nameHashValue, prefixHashValue;
9289
27.9k
            int res;
9290
9291
27.9k
            aprefix = atts[i+1];
9292
27.9k
            if (aprefix == NULL)
9293
2.40k
                continue;
9294
9295
25.5k
            attname = atts[i];
9296
            /* Hash values always have bit 31 set, see dict.c */
9297
25.5k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9298
25.5k
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9299
9300
25.5k
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9301
25.5k
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9302
25.5k
                                         aprefix, hashValue, i);
9303
25.5k
            if (res < INT_MAX)
9304
9.34k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9305
25.5k
        }
9306
4.54k
    }
9307
9308
    /*
9309
     * Reconstruct attribute pointers
9310
     */
9311
8.19M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9312
        /* namespace URI */
9313
3.66M
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9314
3.66M
        if (nsIndex == INT_MAX)
9315
3.28M
            atts[i+2] = NULL;
9316
380k
        else if (nsIndex == INT_MAX - 1)
9317
260k
            atts[i+2] = ctxt->str_xml_ns;
9318
119k
        else
9319
119k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9320
9321
3.66M
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9322
2.43M
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9323
2.43M
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9324
2.43M
        }
9325
3.66M
    }
9326
9327
4.52M
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9328
4.52M
    if ((prefix != NULL) && (uri == NULL)) {
9329
133k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9330
133k
           "Namespace prefix %s on %s is not defined\n",
9331
133k
     prefix, localname, NULL);
9332
133k
    }
9333
4.52M
    *pref = prefix;
9334
4.52M
    *URI = uri;
9335
9336
    /*
9337
     * SAX callback
9338
     */
9339
4.52M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9340
4.52M
  (!ctxt->disableSAX)) {
9341
3.64M
  if (nbNs > 0)
9342
477k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9343
477k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9344
477k
        nbatts / 5, nbdef, atts);
9345
3.17M
  else
9346
3.17M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9347
3.17M
                          0, NULL, nbatts / 5, nbdef, atts);
9348
3.64M
    }
9349
9350
4.52M
done:
9351
    /*
9352
     * Free allocated attribute values
9353
     */
9354
4.52M
    if (attval != 0) {
9355
690k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9356
491k
      if (ctxt->attallocs[j] & 0x80000000)
9357
225k
          xmlFree((xmlChar *) atts[i+3]);
9358
198k
    }
9359
9360
4.52M
    *nbNsPtr = nbNs;
9361
4.52M
    return(localname);
9362
4.52M
}
9363
9364
/**
9365
 * Parse an end tag. Always consumes '</'.
9366
 *
9367
 *     [42] ETag ::= '</' Name S? '>'
9368
 *
9369
 * With namespace
9370
 *
9371
 *     [NS 9] ETag ::= '</' QName S? '>'
9372
 * @param ctxt  an XML parser context
9373
 * @param tag  the corresponding start tag
9374
 */
9375
9376
static void
9377
1.01M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9378
1.01M
    const xmlChar *name;
9379
9380
1.01M
    GROW;
9381
1.01M
    if ((RAW != '<') || (NXT(1) != '/')) {
9382
5.62k
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9383
5.62k
  return;
9384
5.62k
    }
9385
1.00M
    SKIP(2);
9386
9387
1.00M
    if (tag->prefix == NULL)
9388
761k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9389
248k
    else
9390
248k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9391
9392
    /*
9393
     * We should definitely be at the ending "S? '>'" part
9394
     */
9395
1.00M
    GROW;
9396
1.00M
    SKIP_BLANKS;
9397
1.00M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9398
85.8k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9399
85.8k
    } else
9400
924k
  NEXT1;
9401
9402
    /*
9403
     * [ WFC: Element Type Match ]
9404
     * The Name in an element's end-tag must match the element type in the
9405
     * start-tag.
9406
     *
9407
     */
9408
1.00M
    if (name != (xmlChar*)1) {
9409
109k
        if (name == NULL) name = BAD_CAST "unparsable";
9410
109k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9411
109k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9412
109k
                    ctxt->name, tag->line, name);
9413
109k
    }
9414
9415
    /*
9416
     * SAX: End of Tag
9417
     */
9418
1.00M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9419
1.00M
  (!ctxt->disableSAX))
9420
876k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9421
876k
                                tag->URI);
9422
9423
1.00M
    spacePop(ctxt);
9424
1.00M
    if (tag->nsNr != 0)
9425
115k
  xmlParserNsPop(ctxt, tag->nsNr);
9426
1.00M
}
9427
9428
/**
9429
 * Parse escaped pure raw content. Always consumes '<!['.
9430
 *
9431
 * @deprecated Internal function, don't use.
9432
 *
9433
 *     [18] CDSect ::= CDStart CData CDEnd
9434
 *
9435
 *     [19] CDStart ::= '<![CDATA['
9436
 *
9437
 *     [20] Data ::= (Char* - (Char* ']]>' Char*))
9438
 *
9439
 *     [21] CDEnd ::= ']]>'
9440
 * @param ctxt  an XML parser context
9441
 */
9442
void
9443
163k
xmlParseCDSect(xmlParserCtxt *ctxt) {
9444
163k
    xmlChar *buf = NULL;
9445
163k
    int len = 0;
9446
163k
    int size = XML_PARSER_BUFFER_SIZE;
9447
163k
    int r, rl;
9448
163k
    int s, sl;
9449
163k
    int cur, l;
9450
163k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9451
51.6k
                    XML_MAX_HUGE_LENGTH :
9452
163k
                    XML_MAX_TEXT_LENGTH;
9453
9454
163k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9455
0
        return;
9456
163k
    SKIP(3);
9457
9458
163k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9459
0
        return;
9460
163k
    SKIP(6);
9461
9462
163k
    r = xmlCurrentCharRecover(ctxt, &rl);
9463
163k
    if (!IS_CHAR(r)) {
9464
3.48k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9465
3.48k
        goto out;
9466
3.48k
    }
9467
159k
    NEXTL(rl);
9468
159k
    s = xmlCurrentCharRecover(ctxt, &sl);
9469
159k
    if (!IS_CHAR(s)) {
9470
8.21k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9471
8.21k
        goto out;
9472
8.21k
    }
9473
151k
    NEXTL(sl);
9474
151k
    cur = xmlCurrentCharRecover(ctxt, &l);
9475
151k
    buf = xmlMalloc(size);
9476
151k
    if (buf == NULL) {
9477
35
  xmlErrMemory(ctxt);
9478
35
        goto out;
9479
35
    }
9480
5.58M
    while (IS_CHAR(cur) &&
9481
5.54M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9482
5.43M
  if (len + 5 >= size) {
9483
13.9k
      xmlChar *tmp;
9484
13.9k
            int newSize;
9485
9486
13.9k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9487
13.9k
            if (newSize < 0) {
9488
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9489
0
                               "CData section too big found\n");
9490
0
                goto out;
9491
0
            }
9492
13.9k
      tmp = xmlRealloc(buf, newSize);
9493
13.9k
      if (tmp == NULL) {
9494
22
    xmlErrMemory(ctxt);
9495
22
                goto out;
9496
22
      }
9497
13.9k
      buf = tmp;
9498
13.9k
      size = newSize;
9499
13.9k
  }
9500
5.43M
  COPY_BUF(buf, len, r);
9501
5.43M
  r = s;
9502
5.43M
  rl = sl;
9503
5.43M
  s = cur;
9504
5.43M
  sl = l;
9505
5.43M
  NEXTL(l);
9506
5.43M
  cur = xmlCurrentCharRecover(ctxt, &l);
9507
5.43M
    }
9508
151k
    buf[len] = 0;
9509
151k
    if (cur != '>') {
9510
41.3k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9511
41.3k
                       "CData section not finished\n%.50s\n", buf);
9512
41.3k
        goto out;
9513
41.3k
    }
9514
109k
    NEXTL(l);
9515
9516
    /*
9517
     * OK the buffer is to be consumed as cdata.
9518
     */
9519
109k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9520
105k
        if ((ctxt->sax->cdataBlock != NULL) &&
9521
105k
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9522
83.9k
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9523
83.9k
        } else if (ctxt->sax->characters != NULL) {
9524
21.6k
            ctxt->sax->characters(ctxt->userData, buf, len);
9525
21.6k
        }
9526
105k
    }
9527
9528
163k
out:
9529
163k
    xmlFree(buf);
9530
163k
}
9531
9532
/**
9533
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9534
 * unexpected EOF to the caller.
9535
 *
9536
 * @param ctxt  an XML parser context
9537
 */
9538
9539
static void
9540
184k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9541
184k
    int oldNameNr = ctxt->nameNr;
9542
184k
    int oldSpaceNr = ctxt->spaceNr;
9543
184k
    int oldNodeNr = ctxt->nodeNr;
9544
9545
184k
    GROW;
9546
106M
    while ((ctxt->input->cur < ctxt->input->end) &&
9547
106M
     (PARSER_STOPPED(ctxt) == 0)) {
9548
106M
  const xmlChar *cur = ctxt->input->cur;
9549
9550
  /*
9551
   * First case : a Processing Instruction.
9552
   */
9553
106M
  if ((*cur == '<') && (cur[1] == '?')) {
9554
146k
      xmlParsePI(ctxt);
9555
146k
  }
9556
9557
  /*
9558
   * Second case : a CDSection
9559
   */
9560
  /* 2.6.0 test was *cur not RAW */
9561
106M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9562
147k
      xmlParseCDSect(ctxt);
9563
147k
  }
9564
9565
  /*
9566
   * Third case :  a comment
9567
   */
9568
105M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9569
448k
     (NXT(2) == '-') && (NXT(3) == '-')) {
9570
263k
      xmlParseComment(ctxt);
9571
263k
  }
9572
9573
  /*
9574
   * Fourth case :  a sub-element.
9575
   */
9576
105M
  else if (*cur == '<') {
9577
6.56M
            if (NXT(1) == '/') {
9578
1.08M
                if (ctxt->nameNr <= oldNameNr)
9579
64.4k
                    break;
9580
1.01M
          xmlParseElementEnd(ctxt);
9581
5.48M
            } else {
9582
5.48M
          xmlParseElementStart(ctxt);
9583
5.48M
            }
9584
6.56M
  }
9585
9586
  /*
9587
   * Fifth case : a reference. If if has not been resolved,
9588
   *    parsing returns it's Name, create the node
9589
   */
9590
9591
99.1M
  else if (*cur == '&') {
9592
1.51M
      xmlParseReference(ctxt);
9593
1.51M
  }
9594
9595
  /*
9596
   * Last case, text. Note that References are handled directly.
9597
   */
9598
97.5M
  else {
9599
97.5M
      xmlParseCharDataInternal(ctxt, 0);
9600
97.5M
  }
9601
9602
106M
  SHRINK;
9603
106M
  GROW;
9604
106M
    }
9605
9606
184k
    if ((ctxt->nameNr > oldNameNr) &&
9607
48.7k
        (ctxt->input->cur >= ctxt->input->end) &&
9608
43.6k
        (ctxt->wellFormed)) {
9609
1.60k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9610
1.60k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9611
1.60k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9612
1.60k
                "Premature end of data in tag %s line %d\n",
9613
1.60k
                name, line, NULL);
9614
1.60k
    }
9615
9616
    /*
9617
     * Clean up in error case
9618
     */
9619
9620
1.16M
    while (ctxt->nodeNr > oldNodeNr)
9621
985k
        nodePop(ctxt);
9622
9623
1.44M
    while (ctxt->nameNr > oldNameNr) {
9624
1.25M
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9625
9626
1.25M
        if (tag->nsNr != 0)
9627
267k
            xmlParserNsPop(ctxt, tag->nsNr);
9628
9629
1.25M
        namePop(ctxt);
9630
1.25M
    }
9631
9632
1.44M
    while (ctxt->spaceNr > oldSpaceNr)
9633
1.25M
        spacePop(ctxt);
9634
184k
}
9635
9636
/**
9637
 * Parse XML element content. This is useful if you're only interested
9638
 * in custom SAX callbacks. If you want a node list, use
9639
 * #xmlCtxtParseContent.
9640
 *
9641
 * @param ctxt  an XML parser context
9642
 */
9643
void
9644
0
xmlParseContent(xmlParserCtxt *ctxt) {
9645
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9646
0
        return;
9647
9648
0
    xmlCtxtInitializeLate(ctxt);
9649
9650
0
    xmlParseContentInternal(ctxt);
9651
9652
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9653
0
}
9654
9655
/**
9656
 * Parse an XML element
9657
 *
9658
 * @deprecated Internal function, don't use.
9659
 *
9660
 *     [39] element ::= EmptyElemTag | STag content ETag
9661
 *
9662
 * [ WFC: Element Type Match ]
9663
 * The Name in an element's end-tag must match the element type in the
9664
 * start-tag.
9665
 *
9666
 * @param ctxt  an XML parser context
9667
 */
9668
9669
void
9670
198k
xmlParseElement(xmlParserCtxt *ctxt) {
9671
198k
    if (xmlParseElementStart(ctxt) != 0)
9672
56.5k
        return;
9673
9674
142k
    xmlParseContentInternal(ctxt);
9675
9676
142k
    if (ctxt->input->cur >= ctxt->input->end) {
9677
68.9k
        if (ctxt->wellFormed) {
9678
3.14k
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9679
3.14k
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9680
3.14k
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9681
3.14k
                    "Premature end of data in tag %s line %d\n",
9682
3.14k
                    name, line, NULL);
9683
3.14k
        }
9684
68.9k
        return;
9685
68.9k
    }
9686
9687
73.1k
    xmlParseElementEnd(ctxt);
9688
73.1k
}
9689
9690
/**
9691
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9692
 * opening tag was parsed, 1 if an empty element was parsed.
9693
 *
9694
 * Always consumes '<'.
9695
 *
9696
 * @param ctxt  an XML parser context
9697
 */
9698
static int
9699
5.68M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9700
5.68M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9701
5.68M
    const xmlChar *name;
9702
5.68M
    const xmlChar *prefix = NULL;
9703
5.68M
    const xmlChar *URI = NULL;
9704
5.68M
    xmlParserNodeInfo node_info;
9705
5.68M
    int line;
9706
5.68M
    xmlNodePtr cur;
9707
5.68M
    int nbNs = 0;
9708
9709
5.68M
    if (ctxt->nameNr > maxDepth) {
9710
334
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9711
334
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9712
334
                ctxt->nameNr);
9713
334
  return(-1);
9714
334
    }
9715
9716
    /* Capture start position */
9717
5.68M
    if (ctxt->record_info) {
9718
0
        node_info.begin_pos = ctxt->input->consumed +
9719
0
                          (CUR_PTR - ctxt->input->base);
9720
0
  node_info.begin_line = ctxt->input->line;
9721
0
    }
9722
9723
5.68M
    if (ctxt->spaceNr == 0)
9724
107k
  spacePush(ctxt, -1);
9725
5.57M
    else if (*ctxt->space == -2)
9726
739k
  spacePush(ctxt, -1);
9727
4.83M
    else
9728
4.83M
  spacePush(ctxt, *ctxt->space);
9729
9730
5.68M
    line = ctxt->input->line;
9731
5.68M
#ifdef LIBXML_SAX1_ENABLED
9732
5.68M
    if (ctxt->sax2)
9733
4.54M
#endif /* LIBXML_SAX1_ENABLED */
9734
4.54M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9735
1.13M
#ifdef LIBXML_SAX1_ENABLED
9736
1.13M
    else
9737
1.13M
  name = xmlParseStartTag(ctxt);
9738
5.68M
#endif /* LIBXML_SAX1_ENABLED */
9739
5.68M
    if (name == NULL) {
9740
860k
  spacePop(ctxt);
9741
860k
        return(-1);
9742
860k
    }
9743
4.82M
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9744
4.82M
    cur = ctxt->node;
9745
9746
4.82M
#ifdef LIBXML_VALID_ENABLED
9747
    /*
9748
     * [ VC: Root Element Type ]
9749
     * The Name in the document type declaration must match the element
9750
     * type of the root element.
9751
     */
9752
4.82M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9753
61.6k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9754
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9755
4.82M
#endif /* LIBXML_VALID_ENABLED */
9756
9757
    /*
9758
     * Check for an Empty Element.
9759
     */
9760
4.82M
    if ((RAW == '/') && (NXT(1) == '>')) {
9761
846k
        SKIP(2);
9762
846k
  if (ctxt->sax2) {
9763
717k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9764
717k
    (!ctxt->disableSAX))
9765
593k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9766
717k
#ifdef LIBXML_SAX1_ENABLED
9767
717k
  } else {
9768
129k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9769
129k
    (!ctxt->disableSAX))
9770
115k
    ctxt->sax->endElement(ctxt->userData, name);
9771
129k
#endif /* LIBXML_SAX1_ENABLED */
9772
129k
  }
9773
846k
  namePop(ctxt);
9774
846k
  spacePop(ctxt);
9775
846k
  if (nbNs > 0)
9776
62.5k
      xmlParserNsPop(ctxt, nbNs);
9777
846k
  if (cur != NULL && ctxt->record_info) {
9778
0
            node_info.node = cur;
9779
0
            node_info.end_pos = ctxt->input->consumed +
9780
0
                                (CUR_PTR - ctxt->input->base);
9781
0
            node_info.end_line = ctxt->input->line;
9782
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9783
0
  }
9784
846k
  return(1);
9785
846k
    }
9786
3.97M
    if (RAW == '>') {
9787
2.41M
        NEXT1;
9788
2.41M
        if (cur != NULL && ctxt->record_info) {
9789
0
            node_info.node = cur;
9790
0
            node_info.end_pos = 0;
9791
0
            node_info.end_line = 0;
9792
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9793
0
        }
9794
2.41M
    } else {
9795
1.55M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9796
1.55M
         "Couldn't find end of Start Tag %s line %d\n",
9797
1.55M
                    name, line, NULL);
9798
9799
  /*
9800
   * end of parsing of this node.
9801
   */
9802
1.55M
  nodePop(ctxt);
9803
1.55M
  namePop(ctxt);
9804
1.55M
  spacePop(ctxt);
9805
1.55M
  if (nbNs > 0)
9806
240k
      xmlParserNsPop(ctxt, nbNs);
9807
1.55M
  return(-1);
9808
1.55M
    }
9809
9810
2.41M
    return(0);
9811
3.97M
}
9812
9813
/**
9814
 * Parse the end of an XML element. Always consumes '</'.
9815
 *
9816
 * @param ctxt  an XML parser context
9817
 */
9818
static void
9819
1.09M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9820
1.09M
    xmlNodePtr cur = ctxt->node;
9821
9822
1.09M
    if (ctxt->nameNr <= 0) {
9823
192
        if ((RAW == '<') && (NXT(1) == '/'))
9824
55
            SKIP(2);
9825
192
        return;
9826
192
    }
9827
9828
    /*
9829
     * parse the end of tag: '</' should be here.
9830
     */
9831
1.09M
    if (ctxt->sax2) {
9832
964k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9833
964k
  namePop(ctxt);
9834
964k
    }
9835
128k
#ifdef LIBXML_SAX1_ENABLED
9836
128k
    else
9837
128k
  xmlParseEndTag1(ctxt, 0);
9838
1.09M
#endif /* LIBXML_SAX1_ENABLED */
9839
9840
    /*
9841
     * Capture end position
9842
     */
9843
1.09M
    if (cur != NULL && ctxt->record_info) {
9844
0
        xmlParserNodeInfoPtr node_info;
9845
9846
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9847
0
        if (node_info != NULL) {
9848
0
            node_info->end_pos = ctxt->input->consumed +
9849
0
                                 (CUR_PTR - ctxt->input->base);
9850
0
            node_info->end_line = ctxt->input->line;
9851
0
        }
9852
0
    }
9853
1.09M
}
9854
9855
/**
9856
 * Parse the XML version value.
9857
 *
9858
 * @deprecated Internal function, don't use.
9859
 *
9860
 *     [26] VersionNum ::= '1.' [0-9]+
9861
 *
9862
 * In practice allow [0-9].[0-9]+ at that level
9863
 *
9864
 * @param ctxt  an XML parser context
9865
 * @returns the string giving the XML version number, or NULL
9866
 */
9867
xmlChar *
9868
189k
xmlParseVersionNum(xmlParserCtxt *ctxt) {
9869
189k
    xmlChar *buf = NULL;
9870
189k
    int len = 0;
9871
189k
    int size = 10;
9872
189k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9873
37.2k
                    XML_MAX_TEXT_LENGTH :
9874
189k
                    XML_MAX_NAME_LENGTH;
9875
189k
    xmlChar cur;
9876
9877
189k
    buf = xmlMalloc(size);
9878
189k
    if (buf == NULL) {
9879
131
  xmlErrMemory(ctxt);
9880
131
  return(NULL);
9881
131
    }
9882
189k
    cur = CUR;
9883
189k
    if (!((cur >= '0') && (cur <= '9'))) {
9884
8.87k
  xmlFree(buf);
9885
8.87k
  return(NULL);
9886
8.87k
    }
9887
180k
    buf[len++] = cur;
9888
180k
    NEXT;
9889
180k
    cur=CUR;
9890
180k
    if (cur != '.') {
9891
5.17k
  xmlFree(buf);
9892
5.17k
  return(NULL);
9893
5.17k
    }
9894
175k
    buf[len++] = cur;
9895
175k
    NEXT;
9896
175k
    cur=CUR;
9897
9.78M
    while ((cur >= '0') && (cur <= '9')) {
9898
9.61M
  if (len + 1 >= size) {
9899
5.27k
      xmlChar *tmp;
9900
5.27k
            int newSize;
9901
9902
5.27k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9903
5.27k
            if (newSize < 0) {
9904
25
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
9905
25
                xmlFree(buf);
9906
25
                return(NULL);
9907
25
            }
9908
5.25k
      tmp = xmlRealloc(buf, newSize);
9909
5.25k
      if (tmp == NULL) {
9910
14
    xmlErrMemory(ctxt);
9911
14
          xmlFree(buf);
9912
14
    return(NULL);
9913
14
      }
9914
5.23k
      buf = tmp;
9915
5.23k
            size = newSize;
9916
5.23k
  }
9917
9.61M
  buf[len++] = cur;
9918
9.61M
  NEXT;
9919
9.61M
  cur=CUR;
9920
9.61M
    }
9921
175k
    buf[len] = 0;
9922
175k
    return(buf);
9923
175k
}
9924
9925
/**
9926
 * Parse the XML version.
9927
 *
9928
 * @deprecated Internal function, don't use.
9929
 *
9930
 *     [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9931
 *
9932
 *     [25] Eq ::= S? '=' S?
9933
 *
9934
 * @param ctxt  an XML parser context
9935
 * @returns the version string, e.g. "1.0"
9936
 */
9937
9938
xmlChar *
9939
446k
xmlParseVersionInfo(xmlParserCtxt *ctxt) {
9940
446k
    xmlChar *version = NULL;
9941
9942
446k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9943
197k
  SKIP(7);
9944
197k
  SKIP_BLANKS;
9945
197k
  if (RAW != '=') {
9946
5.65k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9947
5.65k
      return(NULL);
9948
5.65k
        }
9949
192k
  NEXT;
9950
192k
  SKIP_BLANKS;
9951
192k
  if (RAW == '"') {
9952
165k
      NEXT;
9953
165k
      version = xmlParseVersionNum(ctxt);
9954
165k
      if (RAW != '"') {
9955
8.61k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9956
8.61k
      } else
9957
157k
          NEXT;
9958
165k
  } else if (RAW == '\''){
9959
23.7k
      NEXT;
9960
23.7k
      version = xmlParseVersionNum(ctxt);
9961
23.7k
      if (RAW != '\'') {
9962
7.07k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9963
7.07k
      } else
9964
16.6k
          NEXT;
9965
23.7k
  } else {
9966
2.70k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9967
2.70k
  }
9968
192k
    }
9969
440k
    return(version);
9970
446k
}
9971
9972
/**
9973
 * Parse the XML encoding name
9974
 *
9975
 * @deprecated Internal function, don't use.
9976
 *
9977
 *     [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9978
 *
9979
 * @param ctxt  an XML parser context
9980
 * @returns the encoding name value or NULL
9981
 */
9982
xmlChar *
9983
96.2k
xmlParseEncName(xmlParserCtxt *ctxt) {
9984
96.2k
    xmlChar *buf = NULL;
9985
96.2k
    int len = 0;
9986
96.2k
    int size = 10;
9987
96.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9988
16.2k
                    XML_MAX_TEXT_LENGTH :
9989
96.2k
                    XML_MAX_NAME_LENGTH;
9990
96.2k
    xmlChar cur;
9991
9992
96.2k
    cur = CUR;
9993
96.2k
    if (((cur >= 'a') && (cur <= 'z')) ||
9994
86.9k
        ((cur >= 'A') && (cur <= 'Z'))) {
9995
86.9k
  buf = xmlMalloc(size);
9996
86.9k
  if (buf == NULL) {
9997
99
      xmlErrMemory(ctxt);
9998
99
      return(NULL);
9999
99
  }
10000
10001
86.8k
  buf[len++] = cur;
10002
86.8k
  NEXT;
10003
86.8k
  cur = CUR;
10004
80.0M
  while (((cur >= 'a') && (cur <= 'z')) ||
10005
24.5M
         ((cur >= 'A') && (cur <= 'Z')) ||
10006
1.34M
         ((cur >= '0') && (cur <= '9')) ||
10007
1.06M
         (cur == '.') || (cur == '_') ||
10008
79.9M
         (cur == '-')) {
10009
79.9M
      if (len + 1 >= size) {
10010
93.3k
          xmlChar *tmp;
10011
93.3k
                int newSize;
10012
10013
93.3k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10014
93.3k
                if (newSize < 0) {
10015
926
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10016
926
                    xmlFree(buf);
10017
926
                    return(NULL);
10018
926
                }
10019
92.4k
    tmp = xmlRealloc(buf, newSize);
10020
92.4k
    if (tmp == NULL) {
10021
49
        xmlErrMemory(ctxt);
10022
49
        xmlFree(buf);
10023
49
        return(NULL);
10024
49
    }
10025
92.3k
    buf = tmp;
10026
92.3k
                size = newSize;
10027
92.3k
      }
10028
79.9M
      buf[len++] = cur;
10029
79.9M
      NEXT;
10030
79.9M
      cur = CUR;
10031
79.9M
        }
10032
85.8k
  buf[len] = 0;
10033
85.8k
    } else {
10034
9.38k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10035
9.38k
    }
10036
95.2k
    return(buf);
10037
96.2k
}
10038
10039
/**
10040
 * Parse the XML encoding declaration
10041
 *
10042
 * @deprecated Internal function, don't use.
10043
 *
10044
 *     [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | 
10045
 *                           "'" EncName "'")
10046
 *
10047
 * this setups the conversion filters.
10048
 *
10049
 * @param ctxt  an XML parser context
10050
 * @returns the encoding value or NULL
10051
 */
10052
10053
const xmlChar *
10054
430k
xmlParseEncodingDecl(xmlParserCtxt *ctxt) {
10055
430k
    xmlChar *encoding = NULL;
10056
10057
430k
    SKIP_BLANKS;
10058
430k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10059
329k
        return(NULL);
10060
10061
101k
    SKIP(8);
10062
101k
    SKIP_BLANKS;
10063
101k
    if (RAW != '=') {
10064
2.60k
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10065
2.60k
        return(NULL);
10066
2.60k
    }
10067
99.1k
    NEXT;
10068
99.1k
    SKIP_BLANKS;
10069
99.1k
    if (RAW == '"') {
10070
77.5k
        NEXT;
10071
77.5k
        encoding = xmlParseEncName(ctxt);
10072
77.5k
        if (RAW != '"') {
10073
9.44k
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10074
9.44k
            xmlFree(encoding);
10075
9.44k
            return(NULL);
10076
9.44k
        } else
10077
68.0k
            NEXT;
10078
77.5k
    } else if (RAW == '\''){
10079
18.7k
        NEXT;
10080
18.7k
        encoding = xmlParseEncName(ctxt);
10081
18.7k
        if (RAW != '\'') {
10082
2.77k
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10083
2.77k
            xmlFree(encoding);
10084
2.77k
            return(NULL);
10085
2.77k
        } else
10086
16.0k
            NEXT;
10087
18.7k
    } else {
10088
2.88k
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10089
2.88k
    }
10090
10091
86.9k
    if (encoding == NULL)
10092
6.13k
        return(NULL);
10093
10094
80.8k
    xmlSetDeclaredEncoding(ctxt, encoding);
10095
10096
80.8k
    return(ctxt->encoding);
10097
86.9k
}
10098
10099
/**
10100
 * Parse the XML standalone declaration
10101
 *
10102
 * @deprecated Internal function, don't use.
10103
 *
10104
 *     [32] SDDecl ::= S 'standalone' Eq
10105
 *                     (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10106
 *
10107
 * [ VC: Standalone Document Declaration ]
10108
 * TODO The standalone document declaration must have the value "no"
10109
 * if any external markup declarations contain declarations of:
10110
 *  - attributes with default values, if elements to which these
10111
 *    attributes apply appear in the document without specifications
10112
 *    of values for these attributes, or
10113
 *  - entities (other than amp, lt, gt, apos, quot), if references
10114
 *    to those entities appear in the document, or
10115
 *  - attributes with values subject to normalization, where the
10116
 *    attribute appears in the document with a value which will change
10117
 *    as a result of normalization, or
10118
 *  - element types with element content, if white space occurs directly
10119
 *    within any instance of those types.
10120
 *
10121
 * @param ctxt  an XML parser context
10122
 * @returns
10123
 *   1 if standalone="yes"
10124
 *   0 if standalone="no"
10125
 *  -2 if standalone attribute is missing or invalid
10126
 *    (A standalone value of -2 means that the XML declaration was found,
10127
 *     but no value was specified for the standalone attribute).
10128
 */
10129
10130
int
10131
114k
xmlParseSDDecl(xmlParserCtxt *ctxt) {
10132
114k
    int standalone = -2;
10133
10134
114k
    SKIP_BLANKS;
10135
114k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10136
83.1k
  SKIP(10);
10137
83.1k
        SKIP_BLANKS;
10138
83.1k
  if (RAW != '=') {
10139
380
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10140
380
      return(standalone);
10141
380
        }
10142
82.7k
  NEXT;
10143
82.7k
  SKIP_BLANKS;
10144
82.7k
        if (RAW == '\''){
10145
2.28k
      NEXT;
10146
2.28k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10147
553
          standalone = 0;
10148
553
                SKIP(2);
10149
1.73k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10150
919
                 (NXT(2) == 's')) {
10151
712
          standalone = 1;
10152
712
    SKIP(3);
10153
1.02k
            } else {
10154
1.02k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10155
1.02k
      }
10156
2.28k
      if (RAW != '\'') {
10157
1.66k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10158
1.66k
      } else
10159
625
          NEXT;
10160
80.4k
  } else if (RAW == '"'){
10161
80.2k
      NEXT;
10162
80.2k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10163
705
          standalone = 0;
10164
705
    SKIP(2);
10165
79.5k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10166
79.0k
                 (NXT(2) == 's')) {
10167
78.8k
          standalone = 1;
10168
78.8k
                SKIP(3);
10169
78.8k
            } else {
10170
707
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10171
707
      }
10172
80.2k
      if (RAW != '"') {
10173
1.20k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10174
1.20k
      } else
10175
79.0k
          NEXT;
10176
80.2k
  } else {
10177
211
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10178
211
        }
10179
82.7k
    }
10180
114k
    return(standalone);
10181
114k
}
10182
10183
/**
10184
 * Parse an XML declaration header
10185
 *
10186
 * @deprecated Internal function, don't use.
10187
 *
10188
 *     [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10189
 * @param ctxt  an XML parser context
10190
 */
10191
10192
void
10193
143k
xmlParseXMLDecl(xmlParserCtxt *ctxt) {
10194
143k
    xmlChar *version;
10195
10196
    /*
10197
     * This value for standalone indicates that the document has an
10198
     * XML declaration but it does not have a standalone attribute.
10199
     * It will be overwritten later if a standalone attribute is found.
10200
     */
10201
10202
143k
    ctxt->standalone = -2;
10203
10204
    /*
10205
     * We know that '<?xml' is here.
10206
     */
10207
143k
    SKIP(5);
10208
10209
143k
    if (!IS_BLANK_CH(RAW)) {
10210
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10211
0
                 "Blank needed after '<?xml'\n");
10212
0
    }
10213
143k
    SKIP_BLANKS;
10214
10215
    /*
10216
     * We must have the VersionInfo here.
10217
     */
10218
143k
    version = xmlParseVersionInfo(ctxt);
10219
143k
    if (version == NULL) {
10220
27.5k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10221
115k
    } else {
10222
115k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10223
      /*
10224
       * Changed here for XML-1.0 5th edition
10225
       */
10226
8.73k
      if (ctxt->options & XML_PARSE_OLD10) {
10227
952
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10228
952
                "Unsupported version '%s'\n",
10229
952
                version);
10230
7.78k
      } else {
10231
7.78k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10232
5.52k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10233
5.52k
                      "Unsupported version '%s'\n",
10234
5.52k
          version, NULL);
10235
5.52k
    } else {
10236
2.26k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10237
2.26k
              "Unsupported version '%s'\n",
10238
2.26k
              version);
10239
2.26k
    }
10240
7.78k
      }
10241
8.73k
  }
10242
115k
  if (ctxt->version != NULL)
10243
0
      xmlFree(ctxt->version);
10244
115k
  ctxt->version = version;
10245
115k
    }
10246
10247
    /*
10248
     * We may have the encoding declaration
10249
     */
10250
143k
    if (!IS_BLANK_CH(RAW)) {
10251
43.0k
        if ((RAW == '?') && (NXT(1) == '>')) {
10252
15.1k
      SKIP(2);
10253
15.1k
      return;
10254
15.1k
  }
10255
27.8k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10256
27.8k
    }
10257
128k
    xmlParseEncodingDecl(ctxt);
10258
10259
    /*
10260
     * We may have the standalone status.
10261
     */
10262
128k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10263
16.8k
        if ((RAW == '?') && (NXT(1) == '>')) {
10264
13.7k
      SKIP(2);
10265
13.7k
      return;
10266
13.7k
  }
10267
3.14k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10268
3.14k
    }
10269
10270
    /*
10271
     * We can grow the input buffer freely at that point
10272
     */
10273
114k
    GROW;
10274
10275
114k
    SKIP_BLANKS;
10276
114k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10277
10278
114k
    SKIP_BLANKS;
10279
114k
    if ((RAW == '?') && (NXT(1) == '>')) {
10280
81.8k
        SKIP(2);
10281
81.8k
    } else if (RAW == '>') {
10282
        /* Deprecated old WD ... */
10283
2.75k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10284
2.75k
  NEXT;
10285
30.0k
    } else {
10286
30.0k
        int c;
10287
10288
30.0k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10289
6.05M
        while ((PARSER_STOPPED(ctxt) == 0) &&
10290
6.05M
               ((c = CUR) != 0)) {
10291
6.03M
            NEXT;
10292
6.03M
            if (c == '>')
10293
14.5k
                break;
10294
6.03M
        }
10295
30.0k
    }
10296
114k
}
10297
10298
/**
10299
 * @since 2.14.0
10300
 *
10301
 * @param ctxt  parser context
10302
 * @returns the version from the XML declaration.
10303
 */
10304
const xmlChar *
10305
0
xmlCtxtGetVersion(xmlParserCtxt *ctxt) {
10306
0
    if (ctxt == NULL)
10307
0
        return(NULL);
10308
10309
0
    return(ctxt->version);
10310
0
}
10311
10312
/**
10313
 * @since 2.14.0
10314
 *
10315
 * @param ctxt  parser context
10316
 * @returns the value from the standalone document declaration.
10317
 */
10318
int
10319
0
xmlCtxtGetStandalone(xmlParserCtxt *ctxt) {
10320
0
    if (ctxt == NULL)
10321
0
        return(0);
10322
10323
0
    return(ctxt->standalone);
10324
0
}
10325
10326
/**
10327
 * Parse an XML Misc* optional field.
10328
 *
10329
 * @deprecated Internal function, don't use.
10330
 *
10331
 *     [27] Misc ::= Comment | PI |  S
10332
 * @param ctxt  an XML parser context
10333
 */
10334
10335
void
10336
725k
xmlParseMisc(xmlParserCtxt *ctxt) {
10337
934k
    while (PARSER_STOPPED(ctxt) == 0) {
10338
900k
        SKIP_BLANKS;
10339
900k
        GROW;
10340
900k
        if ((RAW == '<') && (NXT(1) == '?')) {
10341
77.8k
      xmlParsePI(ctxt);
10342
823k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10343
131k
      xmlParseComment(ctxt);
10344
692k
        } else {
10345
692k
            break;
10346
692k
        }
10347
900k
    }
10348
725k
}
10349
10350
static void
10351
392k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10352
392k
    xmlDocPtr doc;
10353
10354
    /*
10355
     * SAX: end of the document processing.
10356
     */
10357
392k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10358
392k
        ctxt->sax->endDocument(ctxt->userData);
10359
10360
    /*
10361
     * Remove locally kept entity definitions if the tree was not built
10362
     */
10363
392k
    doc = ctxt->myDoc;
10364
392k
    if ((doc != NULL) &&
10365
368k
        (xmlStrEqual(doc->version, SAX_COMPAT_MODE))) {
10366
4.75k
        xmlFreeDoc(doc);
10367
4.75k
        ctxt->myDoc = NULL;
10368
4.75k
    }
10369
392k
}
10370
10371
/**
10372
 * Parse an XML document and invoke the SAX handlers. This is useful
10373
 * if you're only interested in custom SAX callbacks. If you want a
10374
 * document tree, use #xmlCtxtParseDocument.
10375
 *
10376
 * @param ctxt  an XML parser context
10377
 * @returns 0, -1 in case of error.
10378
 */
10379
10380
int
10381
348k
xmlParseDocument(xmlParserCtxt *ctxt) {
10382
348k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10383
0
        return(-1);
10384
10385
348k
    GROW;
10386
10387
    /*
10388
     * SAX: detecting the level.
10389
     */
10390
348k
    xmlCtxtInitializeLate(ctxt);
10391
10392
348k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10393
348k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10394
348k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10395
348k
    }
10396
10397
348k
    xmlDetectEncoding(ctxt);
10398
10399
348k
    if (CUR == 0) {
10400
3.20k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10401
3.20k
  return(-1);
10402
3.20k
    }
10403
10404
345k
    GROW;
10405
345k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10406
10407
  /*
10408
   * Note that we will switch encoding on the fly.
10409
   */
10410
134k
  xmlParseXMLDecl(ctxt);
10411
134k
  SKIP_BLANKS;
10412
211k
    } else {
10413
211k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10414
211k
        if (ctxt->version == NULL) {
10415
81
            xmlErrMemory(ctxt);
10416
81
            return(-1);
10417
81
        }
10418
211k
    }
10419
345k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10420
317k
        ctxt->sax->startDocument(ctxt->userData);
10421
345k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10422
317k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10423
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10424
0
    }
10425
10426
    /*
10427
     * The Misc part of the Prolog
10428
     */
10429
345k
    xmlParseMisc(ctxt);
10430
10431
    /*
10432
     * Then possibly doc type declaration(s) and more Misc
10433
     * (doctypedecl Misc*)?
10434
     */
10435
345k
    GROW;
10436
345k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10437
10438
181k
  ctxt->inSubset = 1;
10439
181k
  xmlParseDocTypeDecl(ctxt);
10440
181k
  if (RAW == '[') {
10441
146k
      xmlParseInternalSubset(ctxt);
10442
146k
  } else if (RAW == '>') {
10443
26.3k
            NEXT;
10444
26.3k
        }
10445
10446
  /*
10447
   * Create and update the external subset.
10448
   */
10449
181k
  ctxt->inSubset = 2;
10450
181k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10451
181k
      (!ctxt->disableSAX))
10452
134k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10453
134k
                                ctxt->extSubSystem, ctxt->extSubURI);
10454
181k
  ctxt->inSubset = 0;
10455
10456
181k
        xmlCleanSpecialAttr(ctxt);
10457
10458
181k
  xmlParseMisc(ctxt);
10459
181k
    }
10460
10461
    /*
10462
     * Time to start parsing the tree itself
10463
     */
10464
345k
    GROW;
10465
345k
    if (RAW != '<') {
10466
146k
        if (ctxt->wellFormed)
10467
24.1k
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10468
24.1k
                           "Start tag expected, '<' not found\n");
10469
198k
    } else {
10470
198k
  xmlParseElement(ctxt);
10471
10472
  /*
10473
   * The Misc part at the end
10474
   */
10475
198k
  xmlParseMisc(ctxt);
10476
10477
198k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10478
198k
    }
10479
10480
345k
    ctxt->instate = XML_PARSER_EOF;
10481
345k
    xmlFinishDocument(ctxt);
10482
10483
345k
    if (! ctxt->wellFormed) {
10484
288k
  ctxt->valid = 0;
10485
288k
  return(-1);
10486
288k
    }
10487
10488
56.7k
    return(0);
10489
345k
}
10490
10491
/**
10492
 * Parse a general parsed entity
10493
 * An external general parsed entity is well-formed if it matches the
10494
 * production labeled extParsedEnt.
10495
 *
10496
 * @deprecated Internal function, don't use.
10497
 *
10498
 *     [78] extParsedEnt ::= TextDecl? content
10499
 *
10500
 * @param ctxt  an XML parser context
10501
 * @returns 0, -1 in case of error. the parser context is augmented
10502
 *                as a result of the parsing.
10503
 */
10504
10505
int
10506
0
xmlParseExtParsedEnt(xmlParserCtxt *ctxt) {
10507
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10508
0
        return(-1);
10509
10510
0
    xmlCtxtInitializeLate(ctxt);
10511
10512
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10513
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10514
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10515
0
    }
10516
10517
0
    xmlDetectEncoding(ctxt);
10518
10519
0
    if (CUR == 0) {
10520
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10521
0
    }
10522
10523
    /*
10524
     * Check for the XMLDecl in the Prolog.
10525
     */
10526
0
    GROW;
10527
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10528
10529
  /*
10530
   * Note that we will switch encoding on the fly.
10531
   */
10532
0
  xmlParseXMLDecl(ctxt);
10533
0
  SKIP_BLANKS;
10534
0
    } else {
10535
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10536
0
    }
10537
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10538
0
        ctxt->sax->startDocument(ctxt->userData);
10539
10540
    /*
10541
     * Doing validity checking on chunk doesn't make sense
10542
     */
10543
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10544
0
    ctxt->validate = 0;
10545
0
    ctxt->depth = 0;
10546
10547
0
    xmlParseContentInternal(ctxt);
10548
10549
0
    if (ctxt->input->cur < ctxt->input->end)
10550
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10551
10552
    /*
10553
     * SAX: end of the document processing.
10554
     */
10555
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10556
0
        ctxt->sax->endDocument(ctxt->userData);
10557
10558
0
    if (! ctxt->wellFormed) return(-1);
10559
0
    return(0);
10560
0
}
10561
10562
#ifdef LIBXML_PUSH_ENABLED
10563
/************************************************************************
10564
 *                  *
10565
 *    Progressive parsing interfaces        *
10566
 *                  *
10567
 ************************************************************************/
10568
10569
/**
10570
 * Check whether the input buffer contains a character.
10571
 *
10572
 * @param ctxt  an XML parser context
10573
 * @param c  character
10574
 */
10575
static int
10576
187k
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10577
187k
    const xmlChar *cur;
10578
10579
187k
    if (ctxt->checkIndex == 0) {
10580
93.7k
        cur = ctxt->input->cur + 1;
10581
93.7k
    } else {
10582
93.4k
        cur = ctxt->input->cur + ctxt->checkIndex;
10583
93.4k
    }
10584
10585
187k
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10586
98.1k
        size_t index = ctxt->input->end - ctxt->input->cur;
10587
10588
98.1k
        if (index > LONG_MAX) {
10589
0
            ctxt->checkIndex = 0;
10590
0
            return(1);
10591
0
        }
10592
98.1k
        ctxt->checkIndex = index;
10593
98.1k
        return(0);
10594
98.1k
    } else {
10595
89.0k
        ctxt->checkIndex = 0;
10596
89.0k
        return(1);
10597
89.0k
    }
10598
187k
}
10599
10600
/**
10601
 * Check whether the input buffer contains a string.
10602
 *
10603
 * @param ctxt  an XML parser context
10604
 * @param startDelta  delta to apply at the start
10605
 * @param str  string
10606
 * @param strLen  length of string
10607
 */
10608
static const xmlChar *
10609
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10610
671k
                     const char *str, size_t strLen) {
10611
671k
    const xmlChar *cur, *term;
10612
10613
671k
    if (ctxt->checkIndex == 0) {
10614
318k
        cur = ctxt->input->cur + startDelta;
10615
352k
    } else {
10616
352k
        cur = ctxt->input->cur + ctxt->checkIndex;
10617
352k
    }
10618
10619
671k
    term = BAD_CAST strstr((const char *) cur, str);
10620
671k
    if (term == NULL) {
10621
359k
        const xmlChar *end = ctxt->input->end;
10622
359k
        size_t index;
10623
10624
        /* Rescan (strLen - 1) characters. */
10625
359k
        if ((size_t) (end - cur) < strLen)
10626
7.71k
            end = cur;
10627
351k
        else
10628
351k
            end -= strLen - 1;
10629
359k
        index = end - ctxt->input->cur;
10630
359k
        if (index > LONG_MAX) {
10631
0
            ctxt->checkIndex = 0;
10632
0
            return(ctxt->input->end - strLen);
10633
0
        }
10634
359k
        ctxt->checkIndex = index;
10635
359k
    } else {
10636
312k
        ctxt->checkIndex = 0;
10637
312k
    }
10638
10639
671k
    return(term);
10640
671k
}
10641
10642
/**
10643
 * Check whether the input buffer contains terminated char data.
10644
 *
10645
 * @param ctxt  an XML parser context
10646
 */
10647
static int
10648
208k
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10649
208k
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10650
208k
    const xmlChar *end = ctxt->input->end;
10651
208k
    size_t index;
10652
10653
9.24M
    while (cur < end) {
10654
9.18M
        if ((*cur == '<') || (*cur == '&')) {
10655
156k
            ctxt->checkIndex = 0;
10656
156k
            return(1);
10657
156k
        }
10658
9.03M
        cur++;
10659
9.03M
    }
10660
10661
52.8k
    index = cur - ctxt->input->cur;
10662
52.8k
    if (index > LONG_MAX) {
10663
0
        ctxt->checkIndex = 0;
10664
0
        return(1);
10665
0
    }
10666
52.8k
    ctxt->checkIndex = index;
10667
52.8k
    return(0);
10668
52.8k
}
10669
10670
/**
10671
 * Check whether there's enough data in the input buffer to finish parsing
10672
 * a start tag. This has to take quotes into account.
10673
 *
10674
 * @param ctxt  an XML parser context
10675
 */
10676
static int
10677
1.95M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10678
1.95M
    const xmlChar *cur;
10679
1.95M
    const xmlChar *end = ctxt->input->end;
10680
1.95M
    int state = ctxt->endCheckState;
10681
1.95M
    size_t index;
10682
10683
1.95M
    if (ctxt->checkIndex == 0)
10684
493k
        cur = ctxt->input->cur + 1;
10685
1.45M
    else
10686
1.45M
        cur = ctxt->input->cur + ctxt->checkIndex;
10687
10688
608M
    while (cur < end) {
10689
606M
        if (state) {
10690
534M
            if (*cur == state)
10691
764k
                state = 0;
10692
534M
        } else if (*cur == '\'' || *cur == '"') {
10693
773k
            state = *cur;
10694
71.1M
        } else if (*cur == '>') {
10695
466k
            ctxt->checkIndex = 0;
10696
466k
            ctxt->endCheckState = 0;
10697
466k
            return(1);
10698
466k
        }
10699
606M
        cur++;
10700
606M
    }
10701
10702
1.48M
    index = cur - ctxt->input->cur;
10703
1.48M
    if (index > LONG_MAX) {
10704
0
        ctxt->checkIndex = 0;
10705
0
        ctxt->endCheckState = 0;
10706
0
        return(1);
10707
0
    }
10708
1.48M
    ctxt->checkIndex = index;
10709
1.48M
    ctxt->endCheckState = state;
10710
1.48M
    return(0);
10711
1.48M
}
10712
10713
/**
10714
 * Check whether there's enough data in the input buffer to finish parsing
10715
 * the internal subset.
10716
 *
10717
 * @param ctxt  an XML parser context
10718
 */
10719
static int
10720
928k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10721
    /*
10722
     * Sorry, but progressive parsing of the internal subset is not
10723
     * supported. We first check that the full content of the internal
10724
     * subset is available and parsing is launched only at that point.
10725
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10726
     * not in a ']]>' sequence which are conditional sections.
10727
     */
10728
928k
    const xmlChar *cur, *start;
10729
928k
    const xmlChar *end = ctxt->input->end;
10730
928k
    int state = ctxt->endCheckState;
10731
928k
    size_t index;
10732
10733
928k
    if (ctxt->checkIndex == 0) {
10734
26.1k
        cur = ctxt->input->cur + 1;
10735
902k
    } else {
10736
902k
        cur = ctxt->input->cur + ctxt->checkIndex;
10737
902k
    }
10738
928k
    start = cur;
10739
10740
537M
    while (cur < end) {
10741
536M
        if (state == '-') {
10742
1.12M
            if ((*cur == '-') &&
10743
42.0k
                (cur[1] == '-') &&
10744
23.3k
                (cur[2] == '>')) {
10745
18.1k
                state = 0;
10746
18.1k
                cur += 3;
10747
18.1k
                start = cur;
10748
18.1k
                continue;
10749
18.1k
            }
10750
1.12M
        }
10751
535M
        else if (state == ']') {
10752
21.5k
            if (*cur == '>') {
10753
15.2k
                ctxt->checkIndex = 0;
10754
15.2k
                ctxt->endCheckState = 0;
10755
15.2k
                return(1);
10756
15.2k
            }
10757
6.21k
            if (IS_BLANK_CH(*cur)) {
10758
3.44k
                state = ' ';
10759
3.44k
            } else if (*cur != ']') {
10760
1.52k
                state = 0;
10761
1.52k
                start = cur;
10762
1.52k
                continue;
10763
1.52k
            }
10764
6.21k
        }
10765
535M
        else if (state == ' ') {
10766
6.86k
            if (*cur == '>') {
10767
379
                ctxt->checkIndex = 0;
10768
379
                ctxt->endCheckState = 0;
10769
379
                return(1);
10770
379
            }
10771
6.48k
            if (!IS_BLANK_CH(*cur)) {
10772
3.02k
                state = 0;
10773
3.02k
                start = cur;
10774
3.02k
                continue;
10775
3.02k
            }
10776
6.48k
        }
10777
535M
        else if (state != 0) {
10778
522M
            if (*cur == state) {
10779
82.2k
                state = 0;
10780
82.2k
                start = cur + 1;
10781
82.2k
            }
10782
522M
        }
10783
12.6M
        else if (*cur == '<') {
10784
131k
            if ((cur[1] == '!') &&
10785
82.0k
                (cur[2] == '-') &&
10786
19.0k
                (cur[3] == '-')) {
10787
18.2k
                state = '-';
10788
18.2k
                cur += 4;
10789
                /* Don't treat <!--> as comment */
10790
18.2k
                start = cur;
10791
18.2k
                continue;
10792
18.2k
            }
10793
131k
        }
10794
12.4M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10795
104k
            state = *cur;
10796
104k
        }
10797
10798
536M
        cur++;
10799
536M
    }
10800
10801
    /*
10802
     * Rescan the three last characters to detect "<!--" and "-->"
10803
     * split across chunks.
10804
     */
10805
912k
    if ((state == 0) || (state == '-')) {
10806
37.3k
        if (cur - start < 3)
10807
3.43k
            cur = start;
10808
33.9k
        else
10809
33.9k
            cur -= 3;
10810
37.3k
    }
10811
912k
    index = cur - ctxt->input->cur;
10812
912k
    if (index > LONG_MAX) {
10813
0
        ctxt->checkIndex = 0;
10814
0
        ctxt->endCheckState = 0;
10815
0
        return(1);
10816
0
    }
10817
912k
    ctxt->checkIndex = index;
10818
912k
    ctxt->endCheckState = state;
10819
912k
    return(0);
10820
912k
}
10821
10822
/**
10823
 * Try to progress on parsing
10824
 *
10825
 * @param ctxt  an XML parser context
10826
 * @param terminate  last chunk indicator
10827
 * @returns zero if no parsing was possible
10828
 */
10829
static int
10830
3.54M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10831
3.54M
    int ret = 0;
10832
3.54M
    size_t avail;
10833
3.54M
    xmlChar cur, next;
10834
10835
3.54M
    if (ctxt->input == NULL)
10836
0
        return(0);
10837
10838
3.54M
    if ((ctxt->input != NULL) &&
10839
3.54M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
10840
6.52k
        xmlParserShrink(ctxt);
10841
6.52k
    }
10842
10843
16.7M
    while (ctxt->disableSAX == 0) {
10844
16.6M
        avail = ctxt->input->end - ctxt->input->cur;
10845
16.6M
        if (avail < 1)
10846
58.9k
      goto done;
10847
16.6M
        switch (ctxt->instate) {
10848
502k
            case XML_PARSER_EOF:
10849
          /*
10850
     * Document parsing is done !
10851
     */
10852
502k
          goto done;
10853
99.9k
            case XML_PARSER_START:
10854
                /*
10855
                 * Very first chars read from the document flow.
10856
                 */
10857
99.9k
                if ((!terminate) && (avail < 4))
10858
9.81k
                    goto done;
10859
10860
                /*
10861
                 * We need more bytes to detect EBCDIC code pages.
10862
                 * See xmlDetectEBCDIC.
10863
                 */
10864
90.1k
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
10865
3.05k
                    (!terminate) && (avail < 200))
10866
2.01k
                    goto done;
10867
10868
88.1k
                xmlDetectEncoding(ctxt);
10869
88.1k
                ctxt->instate = XML_PARSER_XML_DECL;
10870
88.1k
    break;
10871
10872
220k
            case XML_PARSER_XML_DECL:
10873
220k
    if ((!terminate) && (avail < 2))
10874
219
        goto done;
10875
219k
    cur = ctxt->input->cur[0];
10876
219k
    next = ctxt->input->cur[1];
10877
219k
          if ((cur == '<') && (next == '?')) {
10878
        /* PI or XML decl */
10879
150k
        if ((!terminate) &&
10880
144k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
10881
131k
      goto done;
10882
18.3k
        if ((ctxt->input->cur[2] == 'x') &&
10883
14.8k
      (ctxt->input->cur[3] == 'm') &&
10884
13.5k
      (ctxt->input->cur[4] == 'l') &&
10885
9.62k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
10886
9.27k
      ret += 5;
10887
9.27k
      xmlParseXMLDecl(ctxt);
10888
9.27k
        } else {
10889
9.04k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10890
9.04k
                        if (ctxt->version == NULL) {
10891
14
                            xmlErrMemory(ctxt);
10892
14
                            break;
10893
14
                        }
10894
9.04k
        }
10895
69.5k
    } else {
10896
69.5k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10897
69.5k
        if (ctxt->version == NULL) {
10898
79
            xmlErrMemory(ctxt);
10899
79
      break;
10900
79
        }
10901
69.5k
    }
10902
87.8k
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10903
87.8k
                    ctxt->sax->setDocumentLocator(ctxt->userData,
10904
87.8k
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
10905
87.8k
                }
10906
87.8k
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10907
87.8k
                    (!ctxt->disableSAX))
10908
86.0k
                    ctxt->sax->startDocument(ctxt->userData);
10909
87.8k
                ctxt->instate = XML_PARSER_MISC;
10910
87.8k
    break;
10911
1.77M
            case XML_PARSER_START_TAG: {
10912
1.77M
          const xmlChar *name;
10913
1.77M
    const xmlChar *prefix = NULL;
10914
1.77M
    const xmlChar *URI = NULL;
10915
1.77M
                int line = ctxt->input->line;
10916
1.77M
    int nbNs = 0;
10917
10918
1.77M
    if ((!terminate) && (avail < 2))
10919
239
        goto done;
10920
1.77M
    cur = ctxt->input->cur[0];
10921
1.77M
          if (cur != '<') {
10922
7.11k
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10923
7.11k
                                   "Start tag expected, '<' not found");
10924
7.11k
                    ctxt->instate = XML_PARSER_EOF;
10925
7.11k
                    xmlFinishDocument(ctxt);
10926
7.11k
        goto done;
10927
7.11k
    }
10928
1.76M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
10929
1.16M
                    goto done;
10930
598k
    if (ctxt->spaceNr == 0)
10931
0
        spacePush(ctxt, -1);
10932
598k
    else if (*ctxt->space == -2)
10933
126k
        spacePush(ctxt, -1);
10934
471k
    else
10935
471k
        spacePush(ctxt, *ctxt->space);
10936
598k
#ifdef LIBXML_SAX1_ENABLED
10937
598k
    if (ctxt->sax2)
10938
422k
#endif /* LIBXML_SAX1_ENABLED */
10939
422k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10940
176k
#ifdef LIBXML_SAX1_ENABLED
10941
176k
    else
10942
176k
        name = xmlParseStartTag(ctxt);
10943
598k
#endif /* LIBXML_SAX1_ENABLED */
10944
598k
    if (name == NULL) {
10945
7.95k
        spacePop(ctxt);
10946
7.95k
                    ctxt->instate = XML_PARSER_EOF;
10947
7.95k
                    xmlFinishDocument(ctxt);
10948
7.95k
        goto done;
10949
7.95k
    }
10950
590k
#ifdef LIBXML_VALID_ENABLED
10951
    /*
10952
     * [ VC: Root Element Type ]
10953
     * The Name in the document type declaration must match
10954
     * the element type of the root element.
10955
     */
10956
590k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10957
172k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10958
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10959
590k
#endif /* LIBXML_VALID_ENABLED */
10960
10961
    /*
10962
     * Check for an Empty Element.
10963
     */
10964
590k
    if ((RAW == '/') && (NXT(1) == '>')) {
10965
152k
        SKIP(2);
10966
10967
152k
        if (ctxt->sax2) {
10968
115k
      if ((ctxt->sax != NULL) &&
10969
115k
          (ctxt->sax->endElementNs != NULL) &&
10970
115k
          (!ctxt->disableSAX))
10971
115k
          ctxt->sax->endElementNs(ctxt->userData, name,
10972
115k
                                  prefix, URI);
10973
115k
      if (nbNs > 0)
10974
15.8k
          xmlParserNsPop(ctxt, nbNs);
10975
115k
#ifdef LIBXML_SAX1_ENABLED
10976
115k
        } else {
10977
37.4k
      if ((ctxt->sax != NULL) &&
10978
37.4k
          (ctxt->sax->endElement != NULL) &&
10979
37.4k
          (!ctxt->disableSAX))
10980
37.3k
          ctxt->sax->endElement(ctxt->userData, name);
10981
37.4k
#endif /* LIBXML_SAX1_ENABLED */
10982
37.4k
        }
10983
152k
        spacePop(ctxt);
10984
437k
    } else if (RAW == '>') {
10985
333k
        NEXT;
10986
333k
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
10987
333k
    } else {
10988
104k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10989
104k
           "Couldn't find end of Start Tag %s\n",
10990
104k
           name);
10991
104k
        nodePop(ctxt);
10992
104k
        spacePop(ctxt);
10993
104k
                    if (nbNs > 0)
10994
15.5k
                        xmlParserNsPop(ctxt, nbNs);
10995
104k
    }
10996
10997
590k
                if (ctxt->nameNr == 0)
10998
17.8k
                    ctxt->instate = XML_PARSER_EPILOG;
10999
572k
                else
11000
572k
                    ctxt->instate = XML_PARSER_CONTENT;
11001
590k
                break;
11002
598k
      }
11003
12.2M
            case XML_PARSER_CONTENT: {
11004
12.2M
    cur = ctxt->input->cur[0];
11005
11006
12.2M
    if (cur == '<') {
11007
936k
                    if ((!terminate) && (avail < 2))
11008
4.32k
                        goto done;
11009
931k
        next = ctxt->input->cur[1];
11010
11011
931k
                    if (next == '/') {
11012
70.5k
                        ctxt->instate = XML_PARSER_END_TAG;
11013
70.5k
                        break;
11014
861k
                    } else if (next == '?') {
11015
54.5k
                        if ((!terminate) &&
11016
46.7k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11017
27.9k
                            goto done;
11018
26.6k
                        xmlParsePI(ctxt);
11019
26.6k
                        ctxt->instate = XML_PARSER_CONTENT;
11020
26.6k
                        break;
11021
806k
                    } else if (next == '!') {
11022
266k
                        if ((!terminate) && (avail < 3))
11023
1.19k
                            goto done;
11024
265k
                        next = ctxt->input->cur[2];
11025
11026
265k
                        if (next == '-') {
11027
142k
                            if ((!terminate) && (avail < 4))
11028
1.00k
                                goto done;
11029
141k
                            if (ctxt->input->cur[3] == '-') {
11030
141k
                                if ((!terminate) &&
11031
137k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11032
26.8k
                                    goto done;
11033
115k
                                xmlParseComment(ctxt);
11034
115k
                                ctxt->instate = XML_PARSER_CONTENT;
11035
115k
                                break;
11036
141k
                            }
11037
141k
                        } else if (next == '[') {
11038
121k
                            if ((!terminate) && (avail < 9))
11039
1.07k
                                goto done;
11040
120k
                            if ((ctxt->input->cur[2] == '[') &&
11041
120k
                                (ctxt->input->cur[3] == 'C') &&
11042
120k
                                (ctxt->input->cur[4] == 'D') &&
11043
120k
                                (ctxt->input->cur[5] == 'A') &&
11044
120k
                                (ctxt->input->cur[6] == 'T') &&
11045
120k
                                (ctxt->input->cur[7] == 'A') &&
11046
120k
                                (ctxt->input->cur[8] == '[')) {
11047
119k
                                if ((!terminate) &&
11048
113k
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11049
104k
                                    goto done;
11050
15.2k
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11051
15.2k
                                xmlParseCDSect(ctxt);
11052
15.2k
                                ctxt->instate = XML_PARSER_CONTENT;
11053
15.2k
                                break;
11054
119k
                            }
11055
120k
                        }
11056
265k
                    }
11057
11.3M
    } else if (cur == '&') {
11058
97.7k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11059
41.7k
      goto done;
11060
56.0k
        xmlParseReference(ctxt);
11061
56.0k
                    break;
11062
11.2M
    } else {
11063
        /* TODO Avoid the extra copy, handle directly !!! */
11064
        /*
11065
         * Goal of the following test is:
11066
         *  - minimize calls to the SAX 'character' callback
11067
         *    when they are mergeable
11068
         *  - handle an problem for isBlank when we only parse
11069
         *    a sequence of blank chars and the next one is
11070
         *    not available to check against '<' presence.
11071
         *  - tries to homogenize the differences in SAX
11072
         *    callbacks between the push and pull versions
11073
         *    of the parser.
11074
         */
11075
11.2M
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11076
262k
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11077
52.8k
          goto done;
11078
262k
                    }
11079
11.1M
                    ctxt->checkIndex = 0;
11080
11.1M
        xmlParseCharDataInternal(ctxt, !terminate);
11081
11.1M
                    break;
11082
11.2M
    }
11083
11084
541k
                ctxt->instate = XML_PARSER_START_TAG;
11085
541k
    break;
11086
12.2M
      }
11087
125k
            case XML_PARSER_END_TAG:
11088
125k
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11089
56.4k
        goto done;
11090
69.5k
    if (ctxt->sax2) {
11091
51.5k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11092
51.5k
        nameNsPop(ctxt);
11093
51.5k
    }
11094
18.0k
#ifdef LIBXML_SAX1_ENABLED
11095
18.0k
      else
11096
18.0k
        xmlParseEndTag1(ctxt, 0);
11097
69.5k
#endif /* LIBXML_SAX1_ENABLED */
11098
69.5k
    if (ctxt->nameNr == 0) {
11099
3.29k
        ctxt->instate = XML_PARSER_EPILOG;
11100
66.2k
    } else {
11101
66.2k
        ctxt->instate = XML_PARSER_CONTENT;
11102
66.2k
    }
11103
69.5k
    break;
11104
562k
            case XML_PARSER_MISC:
11105
673k
            case XML_PARSER_PROLOG:
11106
683k
            case XML_PARSER_EPILOG:
11107
683k
    SKIP_BLANKS;
11108
683k
                avail = ctxt->input->end - ctxt->input->cur;
11109
683k
    if (avail < 1)
11110
4.57k
        goto done;
11111
678k
    if (ctxt->input->cur[0] == '<') {
11112
668k
                    if ((!terminate) && (avail < 2))
11113
1.49k
                        goto done;
11114
666k
                    next = ctxt->input->cur[1];
11115
666k
                    if (next == '?') {
11116
78.2k
                        if ((!terminate) &&
11117
72.0k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11118
35.6k
                            goto done;
11119
42.5k
                        xmlParsePI(ctxt);
11120
42.5k
                        break;
11121
588k
                    } else if (next == '!') {
11122
529k
                        if ((!terminate) && (avail < 3))
11123
933
                            goto done;
11124
11125
528k
                        if (ctxt->input->cur[2] == '-') {
11126
160k
                            if ((!terminate) && (avail < 4))
11127
815
                                goto done;
11128
159k
                            if (ctxt->input->cur[3] == '-') {
11129
159k
                                if ((!terminate) &&
11130
157k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11131
31.9k
                                    goto done;
11132
127k
                                xmlParseComment(ctxt);
11133
127k
                                break;
11134
159k
                            }
11135
368k
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11136
368k
                            if ((!terminate) && (avail < 9))
11137
6.60k
                                goto done;
11138
361k
                            if ((ctxt->input->cur[2] == 'D') &&
11139
361k
                                (ctxt->input->cur[3] == 'O') &&
11140
361k
                                (ctxt->input->cur[4] == 'C') &&
11141
361k
                                (ctxt->input->cur[5] == 'T') &&
11142
361k
                                (ctxt->input->cur[6] == 'Y') &&
11143
361k
                                (ctxt->input->cur[7] == 'P') &&
11144
361k
                                (ctxt->input->cur[8] == 'E')) {
11145
361k
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11146
314k
                                    goto done;
11147
46.3k
                                ctxt->inSubset = 1;
11148
46.3k
                                xmlParseDocTypeDecl(ctxt);
11149
46.3k
                                if (RAW == '[') {
11150
36.6k
                                    ctxt->instate = XML_PARSER_DTD;
11151
36.6k
                                } else {
11152
9.60k
                                    if (RAW == '>')
11153
7.64k
                                        NEXT;
11154
                                    /*
11155
                                     * Create and update the external subset.
11156
                                     */
11157
9.60k
                                    ctxt->inSubset = 2;
11158
9.60k
                                    if ((ctxt->sax != NULL) &&
11159
9.60k
                                        (!ctxt->disableSAX) &&
11160
9.19k
                                        (ctxt->sax->externalSubset != NULL))
11161
9.19k
                                        ctxt->sax->externalSubset(
11162
9.19k
                                                ctxt->userData,
11163
9.19k
                                                ctxt->intSubName,
11164
9.19k
                                                ctxt->extSubSystem,
11165
9.19k
                                                ctxt->extSubURI);
11166
9.60k
                                    ctxt->inSubset = 0;
11167
9.60k
                                    xmlCleanSpecialAttr(ctxt);
11168
9.60k
                                    ctxt->instate = XML_PARSER_PROLOG;
11169
9.60k
                                }
11170
46.3k
                                break;
11171
361k
                            }
11172
361k
                        }
11173
528k
                    }
11174
666k
                }
11175
11176
69.5k
                if (ctxt->instate == XML_PARSER_EPILOG) {
11177
3.74k
                    if (ctxt->errNo == XML_ERR_OK)
11178
62
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11179
3.74k
        ctxt->instate = XML_PARSER_EOF;
11180
3.74k
                    xmlFinishDocument(ctxt);
11181
65.7k
                } else {
11182
65.7k
        ctxt->instate = XML_PARSER_START_TAG;
11183
65.7k
    }
11184
69.5k
    break;
11185
947k
            case XML_PARSER_DTD: {
11186
947k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11187
912k
                    goto done;
11188
35.0k
    xmlParseInternalSubset(ctxt);
11189
35.0k
    ctxt->inSubset = 2;
11190
35.0k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11191
28.8k
        (ctxt->sax->externalSubset != NULL))
11192
28.8k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11193
28.8k
          ctxt->extSubSystem, ctxt->extSubURI);
11194
35.0k
    ctxt->inSubset = 0;
11195
35.0k
    xmlCleanSpecialAttr(ctxt);
11196
35.0k
    ctxt->instate = XML_PARSER_PROLOG;
11197
35.0k
                break;
11198
947k
      }
11199
0
            default:
11200
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11201
0
      "PP: internal error\n");
11202
0
    ctxt->instate = XML_PARSER_EOF;
11203
0
    break;
11204
16.6M
  }
11205
16.6M
    }
11206
3.54M
done:
11207
3.54M
    return(ret);
11208
3.54M
}
11209
11210
/**
11211
 * Parse a chunk of memory in push parser mode.
11212
 *
11213
 * Assumes that the parser context was initialized with
11214
 * #xmlCreatePushParserCtxt.
11215
 *
11216
 * The last chunk, which will often be empty, must be marked with
11217
 * the `terminate` flag. With the default SAX callbacks, the resulting
11218
 * document will be available in ctxt->myDoc. This pointer will not
11219
 * be freed when calling #xmlFreeParserCtxt and must be freed by the
11220
 * caller. If the document isn't well-formed, it will still be returned
11221
 * in ctxt->myDoc.
11222
 *
11223
 * As an exception, #xmlCtxtResetPush will free the document in
11224
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11225
 * the document.
11226
 *
11227
 * Since 2.14.0, #xmlCtxtGetDocument can be used to retrieve the
11228
 * result document.
11229
 *
11230
 * @param ctxt  an XML parser context
11231
 * @param chunk  chunk of memory
11232
 * @param size  size of chunk in bytes
11233
 * @param terminate  last chunk indicator
11234
 * @returns an xmlParserErrors code (0 on success).
11235
 */
11236
int
11237
xmlParseChunk(xmlParserCtxt *ctxt, const char *chunk, int size,
11238
4.33M
              int terminate) {
11239
4.33M
    size_t curBase;
11240
4.33M
    size_t maxLength;
11241
4.33M
    size_t pos;
11242
4.33M
    int end_in_lf = 0;
11243
4.33M
    int res;
11244
11245
4.33M
    if ((ctxt == NULL) || (size < 0))
11246
0
        return(XML_ERR_ARGUMENT);
11247
4.33M
    if ((chunk == NULL) && (size > 0))
11248
0
        return(XML_ERR_ARGUMENT);
11249
4.33M
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11250
0
        return(XML_ERR_ARGUMENT);
11251
4.33M
    if (ctxt->disableSAX != 0)
11252
789k
        return(ctxt->errNo);
11253
11254
3.55M
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11255
3.55M
    if (ctxt->instate == XML_PARSER_START)
11256
100k
        xmlCtxtInitializeLate(ctxt);
11257
3.55M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11258
3.47M
        (chunk[size - 1] == '\r')) {
11259
4.82k
  end_in_lf = 1;
11260
4.82k
  size--;
11261
4.82k
    }
11262
11263
    /*
11264
     * Also push an empty chunk to make sure that the raw buffer
11265
     * will be flushed if there is an encoder.
11266
     */
11267
3.55M
    pos = ctxt->input->cur - ctxt->input->base;
11268
3.55M
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11269
3.55M
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11270
3.55M
    if (res < 0) {
11271
837
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11272
837
        return(ctxt->errNo);
11273
837
    }
11274
11275
3.54M
    xmlParseTryOrFinish(ctxt, terminate);
11276
11277
3.54M
    curBase = ctxt->input->cur - ctxt->input->base;
11278
3.54M
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11279
1.31M
                XML_MAX_HUGE_LENGTH :
11280
3.54M
                XML_MAX_LOOKUP_LIMIT;
11281
3.54M
    if (curBase > maxLength) {
11282
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11283
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11284
0
    }
11285
11286
3.54M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11287
34.7k
        return(ctxt->errNo);
11288
11289
3.51M
    if (end_in_lf == 1) {
11290
4.77k
  pos = ctxt->input->cur - ctxt->input->base;
11291
4.77k
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11292
4.77k
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11293
4.77k
        if (res < 0) {
11294
13
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11295
13
            return(ctxt->errNo);
11296
13
        }
11297
4.77k
    }
11298
3.51M
    if (terminate) {
11299
  /*
11300
   * Check for termination
11301
   */
11302
48.2k
        if ((ctxt->instate != XML_PARSER_EOF) &&
11303
28.5k
            (ctxt->instate != XML_PARSER_EPILOG)) {
11304
19.4k
            if (ctxt->nameNr > 0) {
11305
11.9k
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11306
11.9k
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11307
11.9k
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11308
11.9k
                        "Premature end of data in tag %s line %d\n",
11309
11.9k
                        name, line, NULL);
11310
11.9k
            } else if (ctxt->instate == XML_PARSER_START) {
11311
345
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11312
7.20k
            } else {
11313
7.20k
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11314
7.20k
                               "Start tag expected, '<' not found\n");
11315
7.20k
            }
11316
28.7k
        } else {
11317
28.7k
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11318
28.7k
        }
11319
48.2k
  if (ctxt->instate != XML_PARSER_EOF) {
11320
27.8k
            ctxt->instate = XML_PARSER_EOF;
11321
27.8k
            xmlFinishDocument(ctxt);
11322
27.8k
  }
11323
48.2k
    }
11324
3.51M
    if (ctxt->wellFormed == 0)
11325
1.76M
  return((xmlParserErrors) ctxt->errNo);
11326
1.74M
    else
11327
1.74M
        return(0);
11328
3.51M
}
11329
11330
/************************************************************************
11331
 *                  *
11332
 *    I/O front end functions to the parser     *
11333
 *                  *
11334
 ************************************************************************/
11335
11336
/**
11337
 * Create a parser context for using the XML parser in push mode.
11338
 * See #xmlParseChunk.
11339
 *
11340
 * Passing an initial chunk is useless and deprecated.
11341
 *
11342
 * The push parser doesn't support recovery mode or the
11343
 * XML_PARSE_NOBLANKS option.
11344
 *
11345
 * `filename` is used as base URI to fetch external entities and for
11346
 * error reports.
11347
 *
11348
 * @param sax  a SAX handler (optional)
11349
 * @param user_data  user data for SAX callbacks (optional)
11350
 * @param chunk  initial chunk (optional, deprecated)
11351
 * @param size  size of initial chunk in bytes
11352
 * @param filename  file name or URI (optional)
11353
 * @returns the new parser context or NULL if a memory allocation
11354
 * failed.
11355
 */
11356
11357
xmlParserCtxt *
11358
xmlCreatePushParserCtxt(xmlSAXHandler *sax, void *user_data,
11359
89.6k
                        const char *chunk, int size, const char *filename) {
11360
89.6k
    xmlParserCtxtPtr ctxt;
11361
89.6k
    xmlParserInputPtr input;
11362
11363
89.6k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11364
89.6k
    if (ctxt == NULL)
11365
84
  return(NULL);
11366
11367
89.5k
    ctxt->options &= ~XML_PARSE_NODICT;
11368
89.5k
    ctxt->dictNames = 1;
11369
11370
89.5k
    input = xmlNewPushInput(filename, chunk, size);
11371
89.5k
    if (input == NULL) {
11372
77
  xmlFreeParserCtxt(ctxt);
11373
77
  return(NULL);
11374
77
    }
11375
89.4k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11376
14
        xmlFreeInputStream(input);
11377
14
        xmlFreeParserCtxt(ctxt);
11378
14
        return(NULL);
11379
14
    }
11380
11381
89.4k
    return(ctxt);
11382
89.4k
}
11383
#endif /* LIBXML_PUSH_ENABLED */
11384
11385
/**
11386
 * Blocks further parser processing
11387
 *
11388
 * @param ctxt  an XML parser context
11389
 */
11390
void
11391
38.7k
xmlStopParser(xmlParserCtxt *ctxt) {
11392
38.7k
    if (ctxt == NULL)
11393
0
        return;
11394
11395
    /* This stops the parser */
11396
38.7k
    ctxt->disableSAX = 2;
11397
11398
    /*
11399
     * xmlStopParser is often called from error handlers,
11400
     * so we can't raise an error here to avoid infinite
11401
     * loops. Just make sure that an error condition is
11402
     * reported.
11403
     */
11404
38.7k
    if (ctxt->errNo == XML_ERR_OK) {
11405
3.32k
        ctxt->errNo = XML_ERR_USER_STOP;
11406
3.32k
        ctxt->lastError.code = XML_ERR_USER_STOP;
11407
3.32k
        ctxt->wellFormed = 0;
11408
3.32k
    }
11409
38.7k
}
11410
11411
/**
11412
 * Create a parser context for using the XML parser with an existing
11413
 * I/O stream
11414
 *
11415
 * @param sax  a SAX handler (optional)
11416
 * @param user_data  user data for SAX callbacks (optional)
11417
 * @param ioread  an I/O read function
11418
 * @param ioclose  an I/O close function (optional)
11419
 * @param ioctx  an I/O handler
11420
 * @param enc  the charset encoding if known (deprecated)
11421
 * @returns the new parser context or NULL
11422
 */
11423
xmlParserCtxt *
11424
xmlCreateIOParserCtxt(xmlSAXHandler *sax, void *user_data,
11425
                      xmlInputReadCallback ioread,
11426
                      xmlInputCloseCallback ioclose,
11427
0
                      void *ioctx, xmlCharEncoding enc) {
11428
0
    xmlParserCtxtPtr ctxt;
11429
0
    xmlParserInputPtr input;
11430
0
    const char *encoding;
11431
11432
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11433
0
    if (ctxt == NULL)
11434
0
  return(NULL);
11435
11436
0
    encoding = xmlGetCharEncodingName(enc);
11437
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11438
0
                                  encoding, 0);
11439
0
    if (input == NULL) {
11440
0
  xmlFreeParserCtxt(ctxt);
11441
0
        return (NULL);
11442
0
    }
11443
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11444
0
        xmlFreeInputStream(input);
11445
0
        xmlFreeParserCtxt(ctxt);
11446
0
        return(NULL);
11447
0
    }
11448
11449
0
    return(ctxt);
11450
0
}
11451
11452
#ifdef LIBXML_VALID_ENABLED
11453
/************************************************************************
11454
 *                  *
11455
 *    Front ends when parsing a DTD       *
11456
 *                  *
11457
 ************************************************************************/
11458
11459
/**
11460
 * Parse a DTD.
11461
 *
11462
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11463
 * to make external entities work.
11464
 *
11465
 * @since 2.14.0
11466
 *
11467
 * @param ctxt  a parser context
11468
 * @param input  a parser input
11469
 * @param publicId  public ID of the DTD (optional)
11470
 * @param systemId  system ID of the DTD (optional)
11471
 * @returns the resulting xmlDtd or NULL in case of error.
11472
 * `input` will be freed by the function in any case.
11473
 */
11474
xmlDtd *
11475
xmlCtxtParseDtd(xmlParserCtxt *ctxt, xmlParserInput *input,
11476
2.19k
                const xmlChar *publicId, const xmlChar *systemId) {
11477
2.19k
    xmlDtdPtr ret = NULL;
11478
11479
2.19k
    if ((ctxt == NULL) || (input == NULL)) {
11480
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11481
0
        xmlFreeInputStream(input);
11482
0
        return(NULL);
11483
0
    }
11484
11485
2.19k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11486
6
        xmlFreeInputStream(input);
11487
6
        return(NULL);
11488
6
    }
11489
11490
2.19k
    if (publicId == NULL)
11491
1.82k
        publicId = BAD_CAST "none";
11492
2.19k
    if (systemId == NULL)
11493
0
        systemId = BAD_CAST "none";
11494
11495
2.19k
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11496
2.19k
    if (ctxt->myDoc == NULL) {
11497
5
        xmlErrMemory(ctxt);
11498
5
        goto error;
11499
5
    }
11500
2.18k
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11501
2.18k
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11502
2.18k
                                       publicId, systemId);
11503
2.18k
    if (ctxt->myDoc->extSubset == NULL) {
11504
15
        xmlErrMemory(ctxt);
11505
15
        xmlFreeDoc(ctxt->myDoc);
11506
15
        goto error;
11507
15
    }
11508
11509
2.17k
    xmlParseExternalSubset(ctxt, publicId, systemId);
11510
11511
2.17k
    if (ctxt->wellFormed) {
11512
97
        ret = ctxt->myDoc->extSubset;
11513
97
        ctxt->myDoc->extSubset = NULL;
11514
97
        if (ret != NULL) {
11515
97
            xmlNodePtr tmp;
11516
11517
97
            ret->doc = NULL;
11518
97
            tmp = ret->children;
11519
1.38k
            while (tmp != NULL) {
11520
1.29k
                tmp->doc = NULL;
11521
1.29k
                tmp = tmp->next;
11522
1.29k
            }
11523
97
        }
11524
2.07k
    } else {
11525
2.07k
        ret = NULL;
11526
2.07k
    }
11527
2.17k
    xmlFreeDoc(ctxt->myDoc);
11528
2.17k
    ctxt->myDoc = NULL;
11529
11530
2.19k
error:
11531
2.19k
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11532
11533
2.19k
    return(ret);
11534
2.17k
}
11535
11536
/**
11537
 * Load and parse a DTD
11538
 *
11539
 * @deprecated Use #xmlCtxtParseDtd.
11540
 *
11541
 * @param sax  the SAX handler block or NULL
11542
 * @param input  an Input Buffer
11543
 * @param enc  the charset encoding if known
11544
 * @returns the resulting xmlDtd or NULL in case of error.
11545
 * `input` will be freed by the function in any case.
11546
 */
11547
11548
xmlDtd *
11549
xmlIOParseDTD(xmlSAXHandler *sax, xmlParserInputBuffer *input,
11550
0
        xmlCharEncoding enc) {
11551
0
    xmlDtdPtr ret = NULL;
11552
0
    xmlParserCtxtPtr ctxt;
11553
0
    xmlParserInputPtr pinput = NULL;
11554
11555
0
    if (input == NULL)
11556
0
  return(NULL);
11557
11558
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11559
0
    if (ctxt == NULL) {
11560
0
        xmlFreeParserInputBuffer(input);
11561
0
  return(NULL);
11562
0
    }
11563
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11564
11565
    /*
11566
     * generate a parser input from the I/O handler
11567
     */
11568
11569
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11570
0
    if (pinput == NULL) {
11571
0
        xmlFreeParserInputBuffer(input);
11572
0
  xmlFreeParserCtxt(ctxt);
11573
0
  return(NULL);
11574
0
    }
11575
11576
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11577
0
        xmlSwitchEncoding(ctxt, enc);
11578
0
    }
11579
11580
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11581
11582
0
    xmlFreeParserCtxt(ctxt);
11583
0
    return(ret);
11584
0
}
11585
11586
/**
11587
 * Load and parse an external subset.
11588
 *
11589
 * @deprecated Use #xmlCtxtParseDtd.
11590
 *
11591
 * @param sax  the SAX handler block
11592
 * @param publicId  public identifier of the DTD (optional)
11593
 * @param systemId  system identifier (URL) of the DTD
11594
 * @returns the resulting xmlDtd or NULL in case of error.
11595
 */
11596
11597
xmlDtd *
11598
xmlSAXParseDTD(xmlSAXHandler *sax, const xmlChar *publicId,
11599
11.4k
               const xmlChar *systemId) {
11600
11.4k
    xmlDtdPtr ret = NULL;
11601
11.4k
    xmlParserCtxtPtr ctxt;
11602
11.4k
    xmlParserInputPtr input = NULL;
11603
11.4k
    xmlChar* systemIdCanonic;
11604
11605
11.4k
    if ((publicId == NULL) && (systemId == NULL)) return(NULL);
11606
11607
11.4k
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11608
11.4k
    if (ctxt == NULL) {
11609
49
  return(NULL);
11610
49
    }
11611
11.3k
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11612
11613
    /*
11614
     * Canonicalise the system ID
11615
     */
11616
11.3k
    systemIdCanonic = xmlCanonicPath(systemId);
11617
11.3k
    if ((systemId != NULL) && (systemIdCanonic == NULL)) {
11618
8
  xmlFreeParserCtxt(ctxt);
11619
8
  return(NULL);
11620
8
    }
11621
11622
    /*
11623
     * Ask the Entity resolver to load the damn thing
11624
     */
11625
11626
11.3k
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11627
11.3k
  input = ctxt->sax->resolveEntity(ctxt->userData, publicId,
11628
11.3k
                                   systemIdCanonic);
11629
11.3k
    if (input == NULL) {
11630
9.86k
  xmlFreeParserCtxt(ctxt);
11631
9.86k
  if (systemIdCanonic != NULL)
11632
9.64k
      xmlFree(systemIdCanonic);
11633
9.86k
  return(NULL);
11634
9.86k
    }
11635
11636
1.50k
    if (input->filename == NULL)
11637
0
  input->filename = (char *) systemIdCanonic;
11638
1.50k
    else
11639
1.50k
  xmlFree(systemIdCanonic);
11640
11641
1.50k
    ret = xmlCtxtParseDtd(ctxt, input, publicId, systemId);
11642
11643
1.50k
    xmlFreeParserCtxt(ctxt);
11644
1.50k
    return(ret);
11645
11.3k
}
11646
11647
11648
/**
11649
 * Load and parse an external subset.
11650
 *
11651
 * @param publicId  public identifier of the DTD (optional)
11652
 * @param systemId  system identifier (URL) of the DTD
11653
 * @returns the resulting xmlDtd or NULL in case of error.
11654
 */
11655
11656
xmlDtd *
11657
11.4k
xmlParseDTD(const xmlChar *publicId, const xmlChar *systemId) {
11658
11.4k
    return(xmlSAXParseDTD(NULL, publicId, systemId));
11659
11.4k
}
11660
#endif /* LIBXML_VALID_ENABLED */
11661
11662
/************************************************************************
11663
 *                  *
11664
 *    Front ends when parsing an Entity     *
11665
 *                  *
11666
 ************************************************************************/
11667
11668
static xmlNodePtr
11669
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11670
42.1k
                            int hasTextDecl, int buildTree) {
11671
42.1k
    xmlNodePtr root = NULL;
11672
42.1k
    xmlNodePtr list = NULL;
11673
42.1k
    xmlChar *rootName = BAD_CAST "#root";
11674
42.1k
    int result;
11675
11676
42.1k
    if (buildTree) {
11677
42.1k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11678
42.1k
        if (root == NULL) {
11679
66
            xmlErrMemory(ctxt);
11680
66
            goto error;
11681
66
        }
11682
42.1k
    }
11683
11684
42.1k
    if (xmlCtxtPushInput(ctxt, input) < 0)
11685
45
        goto error;
11686
11687
42.0k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11688
42.0k
    spacePush(ctxt, -1);
11689
11690
42.0k
    if (buildTree)
11691
42.0k
        nodePush(ctxt, root);
11692
11693
42.0k
    if (hasTextDecl) {
11694
13.7k
        xmlDetectEncoding(ctxt);
11695
11696
        /*
11697
         * Parse a possible text declaration first
11698
         */
11699
13.7k
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11700
1.68k
            (IS_BLANK_CH(NXT(5)))) {
11701
1.64k
            xmlParseTextDecl(ctxt);
11702
            /*
11703
             * An XML-1.0 document can't reference an entity not XML-1.0
11704
             */
11705
1.64k
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11706
1.34k
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11707
71
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11708
71
                               "Version mismatch between document and "
11709
71
                               "entity\n");
11710
71
            }
11711
1.64k
        }
11712
13.7k
    }
11713
11714
42.0k
    xmlParseContentInternal(ctxt);
11715
11716
42.0k
    if (ctxt->input->cur < ctxt->input->end)
11717
3.72k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11718
11719
42.0k
    if ((ctxt->wellFormed) ||
11720
36.0k
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
11721
36.0k
        if (root != NULL) {
11722
36.0k
            xmlNodePtr cur;
11723
11724
            /*
11725
             * Unlink newly created node list.
11726
             */
11727
36.0k
            list = root->children;
11728
36.0k
            root->children = NULL;
11729
36.0k
            root->last = NULL;
11730
163k
            for (cur = list; cur != NULL; cur = cur->next)
11731
127k
                cur->parent = NULL;
11732
36.0k
        }
11733
36.0k
    }
11734
11735
    /*
11736
     * Read the rest of the stream in case of errors. We want
11737
     * to account for the whole entity size.
11738
     */
11739
122k
    do {
11740
122k
        ctxt->input->cur = ctxt->input->end;
11741
122k
        xmlParserShrink(ctxt);
11742
122k
        result = xmlParserGrow(ctxt);
11743
122k
    } while (result > 0);
11744
11745
42.0k
    if (buildTree)
11746
42.0k
        nodePop(ctxt);
11747
11748
42.0k
    namePop(ctxt);
11749
42.0k
    spacePop(ctxt);
11750
11751
42.0k
    xmlCtxtPopInput(ctxt);
11752
11753
42.1k
error:
11754
42.1k
    xmlFreeNode(root);
11755
11756
42.1k
    return(list);
11757
42.0k
}
11758
11759
static void
11760
51.1k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11761
51.1k
    xmlParserInputPtr input;
11762
51.1k
    xmlNodePtr list;
11763
51.1k
    unsigned long consumed;
11764
51.1k
    int isExternal;
11765
51.1k
    int buildTree;
11766
51.1k
    int oldMinNsIndex;
11767
51.1k
    int oldNodelen, oldNodemem;
11768
11769
51.1k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11770
51.1k
    buildTree = (ctxt->node != NULL);
11771
11772
    /*
11773
     * Recursion check
11774
     */
11775
51.1k
    if (ent->flags & XML_ENT_EXPANDING) {
11776
1.07k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11777
1.07k
        goto error;
11778
1.07k
    }
11779
11780
    /*
11781
     * Load entity
11782
     */
11783
50.0k
    input = xmlNewEntityInputStream(ctxt, ent);
11784
50.0k
    if (input == NULL)
11785
7.84k
        goto error;
11786
11787
    /*
11788
     * When building a tree, we need to limit the scope of namespace
11789
     * declarations, so that entities don't reference xmlNs structs
11790
     * from the parent of a reference.
11791
     */
11792
42.1k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
11793
42.1k
    if (buildTree)
11794
42.1k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
11795
11796
42.1k
    oldNodelen = ctxt->nodelen;
11797
42.1k
    oldNodemem = ctxt->nodemem;
11798
42.1k
    ctxt->nodelen = 0;
11799
42.1k
    ctxt->nodemem = 0;
11800
11801
    /*
11802
     * Parse content
11803
     *
11804
     * This initiates a recursive call chain:
11805
     *
11806
     * - xmlCtxtParseContentInternal
11807
     * - xmlParseContentInternal
11808
     * - xmlParseReference
11809
     * - xmlCtxtParseEntity
11810
     *
11811
     * The nesting depth is limited by the maximum number of inputs,
11812
     * see xmlCtxtPushInput.
11813
     *
11814
     * It's possible to make this non-recursive (minNsIndex must be
11815
     * stored in the input struct) at the expense of code readability.
11816
     */
11817
11818
42.1k
    ent->flags |= XML_ENT_EXPANDING;
11819
11820
42.1k
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
11821
11822
42.1k
    ent->flags &= ~XML_ENT_EXPANDING;
11823
11824
42.1k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
11825
42.1k
    ctxt->nodelen = oldNodelen;
11826
42.1k
    ctxt->nodemem = oldNodemem;
11827
11828
    /*
11829
     * Entity size accounting
11830
     */
11831
42.1k
    consumed = input->consumed;
11832
42.1k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
11833
11834
42.1k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
11835
23.4k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
11836
11837
42.1k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
11838
23.4k
        if (isExternal)
11839
12.7k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
11840
11841
23.4k
        ent->children = list;
11842
11843
150k
        while (list != NULL) {
11844
127k
            list->parent = (xmlNodePtr) ent;
11845
11846
            /*
11847
             * Downstream code like the nginx xslt module can set
11848
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
11849
             * might have a different or a NULL document.
11850
             */
11851
127k
            if (list->doc != ent->doc)
11852
0
                xmlSetTreeDoc(list, ent->doc);
11853
11854
127k
            if (list->next == NULL)
11855
16.6k
                ent->last = list;
11856
127k
            list = list->next;
11857
127k
        }
11858
23.4k
    } else {
11859
18.7k
        xmlFreeNodeList(list);
11860
18.7k
    }
11861
11862
42.1k
    xmlFreeInputStream(input);
11863
11864
51.1k
error:
11865
51.1k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
11866
51.1k
}
11867
11868
/**
11869
 * Parse an external general entity within an existing parsing context
11870
 * An external general parsed entity is well-formed if it matches the
11871
 * production labeled extParsedEnt.
11872
 *
11873
 *     [78] extParsedEnt ::= TextDecl? content
11874
 *
11875
 * @param ctxt  the existing parsing context
11876
 * @param URL  the URL for the entity to load
11877
 * @param ID  the System ID for the entity to load
11878
 * @param listOut  the return value for the set of parsed nodes
11879
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11880
 *    the parser error code otherwise
11881
 */
11882
11883
int
11884
xmlParseCtxtExternalEntity(xmlParserCtxt *ctxt, const xmlChar *URL,
11885
0
                           const xmlChar *ID, xmlNode **listOut) {
11886
0
    xmlParserInputPtr input;
11887
0
    xmlNodePtr list;
11888
11889
0
    if (listOut != NULL)
11890
0
        *listOut = NULL;
11891
11892
0
    if (ctxt == NULL)
11893
0
        return(XML_ERR_ARGUMENT);
11894
11895
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
11896
0
                            XML_RESOURCE_GENERAL_ENTITY);
11897
0
    if (input == NULL)
11898
0
        return(ctxt->errNo);
11899
11900
0
    xmlCtxtInitializeLate(ctxt);
11901
11902
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
11903
0
    if (listOut != NULL)
11904
0
        *listOut = list;
11905
0
    else
11906
0
        xmlFreeNodeList(list);
11907
11908
0
    xmlFreeInputStream(input);
11909
0
    return(ctxt->errNo);
11910
0
}
11911
11912
#ifdef LIBXML_SAX1_ENABLED
11913
/**
11914
 * Parse an external general entity
11915
 * An external general parsed entity is well-formed if it matches the
11916
 * production labeled extParsedEnt.
11917
 *
11918
 * This function uses deprecated global variables to set parser options
11919
 * which default to XML_PARSE_NODICT.
11920
 *
11921
 * @deprecated Use #xmlParseCtxtExternalEntity.
11922
 *
11923
 *     [78] extParsedEnt ::= TextDecl? content
11924
 *
11925
 * @param doc  the document the chunk pertains to
11926
 * @param sax  the SAX handler block (possibly NULL)
11927
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11928
 * @param depth  Used for loop detection, use 0
11929
 * @param URL  the URL for the entity to load
11930
 * @param ID  the System ID for the entity to load
11931
 * @param list  the return value for the set of parsed nodes
11932
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11933
 *    the parser error code otherwise
11934
 */
11935
11936
int
11937
xmlParseExternalEntity(xmlDoc *doc, xmlSAXHandler *sax, void *user_data,
11938
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNode **list) {
11939
0
    xmlParserCtxtPtr ctxt;
11940
0
    int ret;
11941
11942
0
    if (list != NULL)
11943
0
        *list = NULL;
11944
11945
0
    if (doc == NULL)
11946
0
        return(XML_ERR_ARGUMENT);
11947
11948
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11949
0
    if (ctxt == NULL)
11950
0
        return(XML_ERR_NO_MEMORY);
11951
11952
0
    ctxt->depth = depth;
11953
0
    ctxt->myDoc = doc;
11954
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
11955
11956
0
    xmlFreeParserCtxt(ctxt);
11957
0
    return(ret);
11958
0
}
11959
11960
/**
11961
 * Parse a well-balanced chunk of an XML document
11962
 * called by the parser
11963
 * The allowed sequence for the Well Balanced Chunk is the one defined by
11964
 * the content production in the XML grammar:
11965
 *
11966
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
11967
 *                       Comment)*
11968
 *
11969
 * This function uses deprecated global variables to set parser options
11970
 * which default to XML_PARSE_NODICT.
11971
 *
11972
 * @param doc  the document the chunk pertains to (must not be NULL)
11973
 * @param sax  the SAX handler block (possibly NULL)
11974
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11975
 * @param depth  Used for loop detection, use 0
11976
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
11977
 * @param lst  the return value for the set of parsed nodes
11978
 * @returns 0 if the chunk is well balanced, -1 in case of args problem and
11979
 *    the parser error code otherwise
11980
 */
11981
11982
int
11983
xmlParseBalancedChunkMemory(xmlDoc *doc, xmlSAXHandler *sax,
11984
0
     void *user_data, int depth, const xmlChar *string, xmlNode **lst) {
11985
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11986
0
                                                depth, string, lst, 0 );
11987
0
}
11988
#endif /* LIBXML_SAX1_ENABLED */
11989
11990
/**
11991
 * Parse a well-balanced chunk of XML matching the 'content' production.
11992
 *
11993
 * Namespaces in scope of `node` and entities of `node`'s document are
11994
 * recognized. When validating, the DTD of `node`'s document is used.
11995
 *
11996
 * Always consumes `input` even in error case.
11997
 *
11998
 * @since 2.14.0
11999
 *
12000
 * @param ctxt  parser context
12001
 * @param input  parser input
12002
 * @param node  target node or document
12003
 * @param hasTextDecl  whether to parse text declaration
12004
 * @returns a node list or NULL in case of error.
12005
 */
12006
xmlNode *
12007
xmlCtxtParseContent(xmlParserCtxt *ctxt, xmlParserInput *input,
12008
0
                    xmlNode *node, int hasTextDecl) {
12009
0
    xmlDocPtr doc;
12010
0
    xmlNodePtr cur, list = NULL;
12011
0
    int nsnr = 0;
12012
0
    xmlDictPtr oldDict;
12013
0
    int oldOptions, oldDictNames, oldLoadSubset;
12014
12015
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12016
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12017
0
        goto exit;
12018
0
    }
12019
12020
0
    doc = node->doc;
12021
0
    if (doc == NULL) {
12022
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12023
0
        goto exit;
12024
0
    }
12025
12026
0
    switch (node->type) {
12027
0
        case XML_ELEMENT_NODE:
12028
0
        case XML_DOCUMENT_NODE:
12029
0
        case XML_HTML_DOCUMENT_NODE:
12030
0
            break;
12031
12032
0
        case XML_ATTRIBUTE_NODE:
12033
0
        case XML_TEXT_NODE:
12034
0
        case XML_CDATA_SECTION_NODE:
12035
0
        case XML_ENTITY_REF_NODE:
12036
0
        case XML_PI_NODE:
12037
0
        case XML_COMMENT_NODE:
12038
0
            for (cur = node->parent; cur != NULL; cur = cur->parent) {
12039
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12040
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12041
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12042
0
                    node = cur;
12043
0
                    break;
12044
0
                }
12045
0
            }
12046
0
            break;
12047
12048
0
        default:
12049
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12050
0
            goto exit;
12051
0
    }
12052
12053
0
    xmlCtxtReset(ctxt);
12054
12055
0
    oldDict = ctxt->dict;
12056
0
    oldOptions = ctxt->options;
12057
0
    oldDictNames = ctxt->dictNames;
12058
0
    oldLoadSubset = ctxt->loadsubset;
12059
12060
    /*
12061
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12062
     */
12063
0
    if (doc->dict != NULL) {
12064
0
        ctxt->dict = doc->dict;
12065
0
    } else {
12066
0
        ctxt->options |= XML_PARSE_NODICT;
12067
0
        ctxt->dictNames = 0;
12068
0
    }
12069
12070
    /*
12071
     * Disable IDs
12072
     */
12073
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12074
0
    ctxt->options |= XML_PARSE_SKIP_IDS;
12075
12076
0
    ctxt->myDoc = doc;
12077
12078
0
#ifdef LIBXML_HTML_ENABLED
12079
0
    if (ctxt->html) {
12080
        /*
12081
         * When parsing in context, it makes no sense to add implied
12082
         * elements like html/body/etc...
12083
         */
12084
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12085
12086
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12087
0
    } else
12088
0
#endif
12089
0
    {
12090
0
        xmlCtxtInitializeLate(ctxt);
12091
12092
        /*
12093
         * initialize the SAX2 namespaces stack
12094
         */
12095
0
        cur = node;
12096
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12097
0
            xmlNsPtr ns = cur->nsDef;
12098
0
            xmlHashedString hprefix, huri;
12099
12100
0
            while (ns != NULL) {
12101
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12102
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12103
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12104
0
                    nsnr++;
12105
0
                ns = ns->next;
12106
0
            }
12107
0
            cur = cur->parent;
12108
0
        }
12109
12110
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12111
12112
0
        if (nsnr > 0)
12113
0
            xmlParserNsPop(ctxt, nsnr);
12114
0
    }
12115
12116
0
    ctxt->dict = oldDict;
12117
0
    ctxt->options = oldOptions;
12118
0
    ctxt->dictNames = oldDictNames;
12119
0
    ctxt->loadsubset = oldLoadSubset;
12120
0
    ctxt->myDoc = NULL;
12121
0
    ctxt->node = NULL;
12122
12123
0
exit:
12124
0
    xmlFreeInputStream(input);
12125
0
    return(list);
12126
0
}
12127
12128
/**
12129
 * Parse a well-balanced chunk of an XML document
12130
 * within the context (DTD, namespaces, etc ...) of the given node.
12131
 *
12132
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12133
 * the content production in the XML grammar:
12134
 *
12135
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12136
 *                       Comment)*
12137
 *
12138
 * This function assumes the encoding of `node`'s document which is
12139
 * typically not what you want. A better alternative is
12140
 * #xmlCtxtParseContent.
12141
 *
12142
 * @param node  the context node
12143
 * @param data  the input string
12144
 * @param datalen  the input string length in bytes
12145
 * @param options  a combination of xmlParserOption
12146
 * @param listOut  the return value for the set of parsed nodes
12147
 * @returns XML_ERR_OK if the chunk is well balanced, and the parser
12148
 * error code otherwise
12149
 */
12150
xmlParserErrors
12151
xmlParseInNodeContext(xmlNode *node, const char *data, int datalen,
12152
0
                      int options, xmlNode **listOut) {
12153
0
    xmlParserCtxtPtr ctxt;
12154
0
    xmlParserInputPtr input;
12155
0
    xmlDocPtr doc;
12156
0
    xmlNodePtr list;
12157
0
    xmlParserErrors ret;
12158
12159
0
    if (listOut == NULL)
12160
0
        return(XML_ERR_INTERNAL_ERROR);
12161
0
    *listOut = NULL;
12162
12163
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12164
0
        return(XML_ERR_INTERNAL_ERROR);
12165
12166
0
    doc = node->doc;
12167
0
    if (doc == NULL)
12168
0
        return(XML_ERR_INTERNAL_ERROR);
12169
12170
0
#ifdef LIBXML_HTML_ENABLED
12171
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12172
0
        ctxt = htmlNewParserCtxt();
12173
0
    }
12174
0
    else
12175
0
#endif
12176
0
        ctxt = xmlNewParserCtxt();
12177
12178
0
    if (ctxt == NULL)
12179
0
        return(XML_ERR_NO_MEMORY);
12180
12181
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12182
0
                                      (const char *) doc->encoding,
12183
0
                                      XML_INPUT_BUF_STATIC);
12184
0
    if (input == NULL) {
12185
0
        xmlFreeParserCtxt(ctxt);
12186
0
        return(XML_ERR_NO_MEMORY);
12187
0
    }
12188
12189
0
    xmlCtxtUseOptions(ctxt, options);
12190
12191
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12192
12193
0
    if (list == NULL) {
12194
0
        ret = ctxt->errNo;
12195
0
        if (ret == XML_ERR_ARGUMENT)
12196
0
            ret = XML_ERR_INTERNAL_ERROR;
12197
0
    } else {
12198
0
        ret = XML_ERR_OK;
12199
0
        *listOut = list;
12200
0
    }
12201
12202
0
    xmlFreeParserCtxt(ctxt);
12203
12204
0
    return(ret);
12205
0
}
12206
12207
#ifdef LIBXML_SAX1_ENABLED
12208
/**
12209
 * Parse a well-balanced chunk of an XML document
12210
 *
12211
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12212
 * the content production in the XML grammar:
12213
 *
12214
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12215
 *                       Comment)*
12216
 *
12217
 * In case recover is set to 1, the nodelist will not be empty even if
12218
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12219
 * some extent.
12220
 *
12221
 * This function uses deprecated global variables to set parser options
12222
 * which default to XML_PARSE_NODICT.
12223
 *
12224
 * @param doc  the document the chunk pertains to (must not be NULL)
12225
 * @param sax  the SAX handler block (possibly NULL)
12226
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
12227
 * @param depth  Used for loop detection, use 0
12228
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
12229
 * @param listOut  the return value for the set of parsed nodes
12230
 * @param recover  return nodes even if the data is broken (use 0)
12231
 * @returns 0 if the chunk is well balanced, or thehe parser error code
12232
 * otherwise.
12233
 */
12234
int
12235
xmlParseBalancedChunkMemoryRecover(xmlDoc *doc, xmlSAXHandler *sax,
12236
     void *user_data, int depth, const xmlChar *string, xmlNode **listOut,
12237
0
     int recover) {
12238
0
    xmlParserCtxtPtr ctxt;
12239
0
    xmlParserInputPtr input;
12240
0
    xmlNodePtr list;
12241
0
    int ret;
12242
12243
0
    if (listOut != NULL)
12244
0
        *listOut = NULL;
12245
12246
0
    if (string == NULL)
12247
0
        return(XML_ERR_ARGUMENT);
12248
12249
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12250
0
    if (ctxt == NULL)
12251
0
        return(XML_ERR_NO_MEMORY);
12252
12253
0
    xmlCtxtInitializeLate(ctxt);
12254
12255
0
    ctxt->depth = depth;
12256
0
    ctxt->myDoc = doc;
12257
0
    if (recover) {
12258
0
        ctxt->options |= XML_PARSE_RECOVER;
12259
0
        ctxt->recovery = 1;
12260
0
    }
12261
12262
0
    input = xmlNewStringInputStream(ctxt, string);
12263
0
    if (input == NULL) {
12264
0
        ret = ctxt->errNo;
12265
0
        goto error;
12266
0
    }
12267
12268
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12269
0
    if (listOut != NULL)
12270
0
        *listOut = list;
12271
0
    else
12272
0
        xmlFreeNodeList(list);
12273
12274
0
    if (!ctxt->wellFormed)
12275
0
        ret = ctxt->errNo;
12276
0
    else
12277
0
        ret = XML_ERR_OK;
12278
12279
0
error:
12280
0
    xmlFreeInputStream(input);
12281
0
    xmlFreeParserCtxt(ctxt);
12282
0
    return(ret);
12283
0
}
12284
12285
/**
12286
 * Parse an XML external entity out of context and build a tree.
12287
 * It use the given SAX function block to handle the parsing callback.
12288
 * If sax is NULL, fallback to the default DOM tree building routines.
12289
 *
12290
 * @deprecated Don't use.
12291
 *
12292
 *     [78] extParsedEnt ::= TextDecl? content
12293
 *
12294
 * This correspond to a "Well Balanced" chunk
12295
 *
12296
 * This function uses deprecated global variables to set parser options
12297
 * which default to XML_PARSE_NODICT.
12298
 *
12299
 * @param sax  the SAX handler block
12300
 * @param filename  the filename
12301
 * @returns the resulting document tree
12302
 */
12303
12304
xmlDoc *
12305
0
xmlSAXParseEntity(xmlSAXHandler *sax, const char *filename) {
12306
0
    xmlDocPtr ret;
12307
0
    xmlParserCtxtPtr ctxt;
12308
12309
0
    ctxt = xmlCreateFileParserCtxt(filename);
12310
0
    if (ctxt == NULL) {
12311
0
  return(NULL);
12312
0
    }
12313
0
    if (sax != NULL) {
12314
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12315
0
            *ctxt->sax = *sax;
12316
0
        } else {
12317
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12318
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12319
0
        }
12320
0
        ctxt->userData = NULL;
12321
0
    }
12322
12323
0
    xmlParseExtParsedEnt(ctxt);
12324
12325
0
    if (ctxt->wellFormed) {
12326
0
  ret = ctxt->myDoc;
12327
0
    } else {
12328
0
        ret = NULL;
12329
0
        xmlFreeDoc(ctxt->myDoc);
12330
0
    }
12331
12332
0
    xmlFreeParserCtxt(ctxt);
12333
12334
0
    return(ret);
12335
0
}
12336
12337
/**
12338
 * Parse an XML external entity out of context and build a tree.
12339
 *
12340
 *     [78] extParsedEnt ::= TextDecl? content
12341
 *
12342
 * This correspond to a "Well Balanced" chunk
12343
 *
12344
 * This function uses deprecated global variables to set parser options
12345
 * which default to XML_PARSE_NODICT.
12346
 *
12347
 * @deprecated Don't use.
12348
 *
12349
 * @param filename  the filename
12350
 * @returns the resulting document tree
12351
 */
12352
12353
xmlDoc *
12354
0
xmlParseEntity(const char *filename) {
12355
0
    return(xmlSAXParseEntity(NULL, filename));
12356
0
}
12357
#endif /* LIBXML_SAX1_ENABLED */
12358
12359
/**
12360
 * Create a parser context for an external entity
12361
 * Automatic support for ZLIB/Compress compressed document is provided
12362
 * by default if found at compile-time.
12363
 *
12364
 * @deprecated Don't use.
12365
 *
12366
 * @param URL  the entity URL
12367
 * @param ID  the entity PUBLIC ID
12368
 * @param base  a possible base for the target URI
12369
 * @returns the new parser context or NULL
12370
 */
12371
xmlParserCtxt *
12372
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12373
0
                    const xmlChar *base) {
12374
0
    xmlParserCtxtPtr ctxt;
12375
0
    xmlParserInputPtr input;
12376
0
    xmlChar *uri = NULL;
12377
12378
0
    ctxt = xmlNewParserCtxt();
12379
0
    if (ctxt == NULL)
12380
0
  return(NULL);
12381
12382
0
    if (base != NULL) {
12383
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12384
0
            goto error;
12385
0
        if (uri != NULL)
12386
0
            URL = uri;
12387
0
    }
12388
12389
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12390
0
                            XML_RESOURCE_UNKNOWN);
12391
0
    if (input == NULL)
12392
0
        goto error;
12393
12394
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12395
0
        xmlFreeInputStream(input);
12396
0
        goto error;
12397
0
    }
12398
12399
0
    xmlFree(uri);
12400
0
    return(ctxt);
12401
12402
0
error:
12403
0
    xmlFree(uri);
12404
0
    xmlFreeParserCtxt(ctxt);
12405
0
    return(NULL);
12406
0
}
12407
12408
/************************************************************************
12409
 *                  *
12410
 *    Front ends when parsing from a file     *
12411
 *                  *
12412
 ************************************************************************/
12413
12414
/**
12415
 * Create a parser context for a file or URL content.
12416
 * Automatic support for ZLIB/Compress compressed document is provided
12417
 * by default if found at compile-time and for file accesses
12418
 *
12419
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12420
 *
12421
 * @param filename  the filename or URL
12422
 * @param options  a combination of xmlParserOption
12423
 * @returns the new parser context or NULL
12424
 */
12425
xmlParserCtxt *
12426
xmlCreateURLParserCtxt(const char *filename, int options)
12427
0
{
12428
0
    xmlParserCtxtPtr ctxt;
12429
0
    xmlParserInputPtr input;
12430
12431
0
    ctxt = xmlNewParserCtxt();
12432
0
    if (ctxt == NULL)
12433
0
  return(NULL);
12434
12435
0
    xmlCtxtUseOptions(ctxt, options);
12436
12437
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12438
0
    if (input == NULL) {
12439
0
  xmlFreeParserCtxt(ctxt);
12440
0
  return(NULL);
12441
0
    }
12442
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12443
0
        xmlFreeInputStream(input);
12444
0
        xmlFreeParserCtxt(ctxt);
12445
0
        return(NULL);
12446
0
    }
12447
12448
0
    return(ctxt);
12449
0
}
12450
12451
/**
12452
 * Create a parser context for a file content.
12453
 * Automatic support for ZLIB/Compress compressed document is provided
12454
 * by default if found at compile-time.
12455
 *
12456
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12457
 *
12458
 * @param filename  the filename
12459
 * @returns the new parser context or NULL
12460
 */
12461
xmlParserCtxt *
12462
xmlCreateFileParserCtxt(const char *filename)
12463
0
{
12464
0
    return(xmlCreateURLParserCtxt(filename, 0));
12465
0
}
12466
12467
#ifdef LIBXML_SAX1_ENABLED
12468
/**
12469
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12470
 * compressed document is provided by default if found at compile-time.
12471
 * It use the given SAX function block to handle the parsing callback.
12472
 * If sax is NULL, fallback to the default DOM tree building routines.
12473
 *
12474
 * This function uses deprecated global variables to set parser options
12475
 * which default to XML_PARSE_NODICT.
12476
 *
12477
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12478
 *
12479
 * User data (void *) is stored within the parser context in the
12480
 * context's _private member, so it is available nearly everywhere in libxml
12481
 *
12482
 * @param sax  the SAX handler block
12483
 * @param filename  the filename
12484
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12485
 *             documents
12486
 * @param data  the userdata
12487
 * @returns the resulting document tree
12488
 */
12489
12490
xmlDoc *
12491
xmlSAXParseFileWithData(xmlSAXHandler *sax, const char *filename,
12492
0
                        int recovery, void *data) {
12493
0
    xmlDocPtr ret = NULL;
12494
0
    xmlParserCtxtPtr ctxt;
12495
0
    xmlParserInputPtr input;
12496
12497
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12498
0
    if (ctxt == NULL)
12499
0
  return(NULL);
12500
12501
0
    if (data != NULL)
12502
0
  ctxt->_private = data;
12503
12504
0
    if (recovery) {
12505
0
        ctxt->options |= XML_PARSE_RECOVER;
12506
0
        ctxt->recovery = 1;
12507
0
    }
12508
12509
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12510
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12511
0
    else
12512
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12513
12514
0
    if (input != NULL)
12515
0
        ret = xmlCtxtParseDocument(ctxt, input);
12516
12517
0
    xmlFreeParserCtxt(ctxt);
12518
0
    return(ret);
12519
0
}
12520
12521
/**
12522
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12523
 * compressed document is provided by default if found at compile-time.
12524
 * It use the given SAX function block to handle the parsing callback.
12525
 * If sax is NULL, fallback to the default DOM tree building routines.
12526
 *
12527
 * This function uses deprecated global variables to set parser options
12528
 * which default to XML_PARSE_NODICT.
12529
 *
12530
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12531
 *
12532
 * @param sax  the SAX handler block
12533
 * @param filename  the filename
12534
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12535
 *             documents
12536
 * @returns the resulting document tree
12537
 */
12538
12539
xmlDoc *
12540
xmlSAXParseFile(xmlSAXHandler *sax, const char *filename,
12541
0
                          int recovery) {
12542
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12543
0
}
12544
12545
/**
12546
 * Parse an XML in-memory document and build a tree.
12547
 * In the case the document is not Well Formed, a attempt to build a
12548
 * tree is tried anyway
12549
 *
12550
 * This function uses deprecated global variables to set parser options
12551
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12552
 *
12553
 * @deprecated Use #xmlReadDoc with XML_PARSE_RECOVER.
12554
 *
12555
 * @param cur  a pointer to an array of xmlChar
12556
 * @returns the resulting document tree or NULL in case of failure
12557
 */
12558
12559
xmlDoc *
12560
0
xmlRecoverDoc(const xmlChar *cur) {
12561
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12562
0
}
12563
12564
/**
12565
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12566
 * compressed document is provided by default if found at compile-time.
12567
 *
12568
 * This function uses deprecated global variables to set parser options
12569
 * which default to XML_PARSE_NODICT.
12570
 *
12571
 * @deprecated Use #xmlReadFile.
12572
 *
12573
 * @param filename  the filename
12574
 * @returns the resulting document tree if the file was wellformed,
12575
 * NULL otherwise.
12576
 */
12577
12578
xmlDoc *
12579
0
xmlParseFile(const char *filename) {
12580
0
    return(xmlSAXParseFile(NULL, filename, 0));
12581
0
}
12582
12583
/**
12584
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12585
 * compressed document is provided by default if found at compile-time.
12586
 * In the case the document is not Well Formed, it attempts to build
12587
 * a tree anyway
12588
 *
12589
 * This function uses deprecated global variables to set parser options
12590
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12591
 *
12592
 * @deprecated Use #xmlReadFile with XML_PARSE_RECOVER.
12593
 *
12594
 * @param filename  the filename
12595
 * @returns the resulting document tree or NULL in case of failure
12596
 */
12597
12598
xmlDoc *
12599
0
xmlRecoverFile(const char *filename) {
12600
0
    return(xmlSAXParseFile(NULL, filename, 1));
12601
0
}
12602
12603
12604
/**
12605
 * Setup the parser context to parse a new buffer; Clears any prior
12606
 * contents from the parser context. The buffer parameter must not be
12607
 * NULL, but the filename parameter can be
12608
 *
12609
 * @deprecated Don't use.
12610
 *
12611
 * @param ctxt  an XML parser context
12612
 * @param buffer  a xmlChar * buffer
12613
 * @param filename  a file name
12614
 */
12615
void
12616
xmlSetupParserForBuffer(xmlParserCtxt *ctxt, const xmlChar* buffer,
12617
                             const char* filename)
12618
0
{
12619
0
    xmlParserInputPtr input;
12620
12621
0
    if ((ctxt == NULL) || (buffer == NULL))
12622
0
        return;
12623
12624
0
    xmlCtxtReset(ctxt);
12625
12626
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12627
0
                                      NULL, 0);
12628
0
    if (input == NULL)
12629
0
        return;
12630
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12631
0
        xmlFreeInputStream(input);
12632
0
}
12633
12634
/**
12635
 * Parse an XML file and call the given SAX handler routines.
12636
 * Automatic support for ZLIB/Compress compressed document is provided
12637
 *
12638
 * This function uses deprecated global variables to set parser options
12639
 * which default to XML_PARSE_NODICT.
12640
 *
12641
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12642
 *
12643
 * @param sax  a SAX handler
12644
 * @param user_data  The user data returned on SAX callbacks
12645
 * @param filename  a file name
12646
 * @returns 0 in case of success or a error number otherwise
12647
 */
12648
int
12649
xmlSAXUserParseFile(xmlSAXHandler *sax, void *user_data,
12650
0
                    const char *filename) {
12651
0
    int ret = 0;
12652
0
    xmlParserCtxtPtr ctxt;
12653
12654
0
    ctxt = xmlCreateFileParserCtxt(filename);
12655
0
    if (ctxt == NULL) return -1;
12656
0
    if (sax != NULL) {
12657
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12658
0
            *ctxt->sax = *sax;
12659
0
        } else {
12660
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12661
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12662
0
        }
12663
0
  ctxt->userData = user_data;
12664
0
    }
12665
12666
0
    xmlParseDocument(ctxt);
12667
12668
0
    if (ctxt->wellFormed)
12669
0
  ret = 0;
12670
0
    else {
12671
0
        if (ctxt->errNo != 0)
12672
0
      ret = ctxt->errNo;
12673
0
  else
12674
0
      ret = -1;
12675
0
    }
12676
0
    if (ctxt->myDoc != NULL) {
12677
0
        xmlFreeDoc(ctxt->myDoc);
12678
0
  ctxt->myDoc = NULL;
12679
0
    }
12680
0
    xmlFreeParserCtxt(ctxt);
12681
12682
0
    return ret;
12683
0
}
12684
#endif /* LIBXML_SAX1_ENABLED */
12685
12686
/************************************************************************
12687
 *                  *
12688
 *    Front ends when parsing from memory     *
12689
 *                  *
12690
 ************************************************************************/
12691
12692
/**
12693
 * Create a parser context for an XML in-memory document. The input buffer
12694
 * must not contain a terminating null byte.
12695
 *
12696
 * @param buffer  a pointer to a char array
12697
 * @param size  the size of the array
12698
 * @returns the new parser context or NULL
12699
 */
12700
xmlParserCtxt *
12701
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12702
0
    xmlParserCtxtPtr ctxt;
12703
0
    xmlParserInputPtr input;
12704
12705
0
    if (size < 0)
12706
0
  return(NULL);
12707
12708
0
    ctxt = xmlNewParserCtxt();
12709
0
    if (ctxt == NULL)
12710
0
  return(NULL);
12711
12712
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12713
0
    if (input == NULL) {
12714
0
  xmlFreeParserCtxt(ctxt);
12715
0
  return(NULL);
12716
0
    }
12717
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12718
0
        xmlFreeInputStream(input);
12719
0
        xmlFreeParserCtxt(ctxt);
12720
0
        return(NULL);
12721
0
    }
12722
12723
0
    return(ctxt);
12724
0
}
12725
12726
#ifdef LIBXML_SAX1_ENABLED
12727
/**
12728
 * Parse an XML in-memory block and use the given SAX function block
12729
 * to handle the parsing callback. If sax is NULL, fallback to the default
12730
 * DOM tree building routines.
12731
 *
12732
 * This function uses deprecated global variables to set parser options
12733
 * which default to XML_PARSE_NODICT.
12734
 *
12735
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12736
 *
12737
 * User data (void *) is stored within the parser context in the
12738
 * context's _private member, so it is available nearly everywhere in libxml
12739
 *
12740
 * @param sax  the SAX handler block
12741
 * @param buffer  an pointer to a char array
12742
 * @param size  the size of the array
12743
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12744
 *             documents
12745
 * @param data  the userdata
12746
 * @returns the resulting document tree
12747
 */
12748
12749
xmlDoc *
12750
xmlSAXParseMemoryWithData(xmlSAXHandler *sax, const char *buffer,
12751
0
                          int size, int recovery, void *data) {
12752
0
    xmlDocPtr ret = NULL;
12753
0
    xmlParserCtxtPtr ctxt;
12754
0
    xmlParserInputPtr input;
12755
12756
0
    if (size < 0)
12757
0
        return(NULL);
12758
12759
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12760
0
    if (ctxt == NULL)
12761
0
        return(NULL);
12762
12763
0
    if (data != NULL)
12764
0
  ctxt->_private=data;
12765
12766
0
    if (recovery) {
12767
0
        ctxt->options |= XML_PARSE_RECOVER;
12768
0
        ctxt->recovery = 1;
12769
0
    }
12770
12771
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12772
0
                                      XML_INPUT_BUF_STATIC);
12773
12774
0
    if (input != NULL)
12775
0
        ret = xmlCtxtParseDocument(ctxt, input);
12776
12777
0
    xmlFreeParserCtxt(ctxt);
12778
0
    return(ret);
12779
0
}
12780
12781
/**
12782
 * Parse an XML in-memory block and use the given SAX function block
12783
 * to handle the parsing callback. If sax is NULL, fallback to the default
12784
 * DOM tree building routines.
12785
 *
12786
 * This function uses deprecated global variables to set parser options
12787
 * which default to XML_PARSE_NODICT.
12788
 *
12789
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12790
 *
12791
 * @param sax  the SAX handler block
12792
 * @param buffer  an pointer to a char array
12793
 * @param size  the size of the array
12794
 * @param recovery  work in recovery mode, i.e. tries to read not Well Formed
12795
 *             documents
12796
 * @returns the resulting document tree
12797
 */
12798
xmlDoc *
12799
xmlSAXParseMemory(xmlSAXHandler *sax, const char *buffer,
12800
0
            int size, int recovery) {
12801
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12802
0
}
12803
12804
/**
12805
 * Parse an XML in-memory block and build a tree.
12806
 *
12807
 * This function uses deprecated global variables to set parser options
12808
 * which default to XML_PARSE_NODICT.
12809
 *
12810
 * @deprecated Use #xmlReadMemory.
12811
 *
12812
 * @param buffer  an pointer to a char array
12813
 * @param size  the size of the array
12814
 * @returns the resulting document tree
12815
 */
12816
12817
0
xmlDoc *xmlParseMemory(const char *buffer, int size) {
12818
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
12819
0
}
12820
12821
/**
12822
 * Parse an XML in-memory block and build a tree.
12823
 * In the case the document is not Well Formed, an attempt to
12824
 * build a tree is tried anyway
12825
 *
12826
 * This function uses deprecated global variables to set parser options
12827
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12828
 *
12829
 * @deprecated Use #xmlReadMemory with XML_PARSE_RECOVER.
12830
 *
12831
 * @param buffer  an pointer to a char array
12832
 * @param size  the size of the array
12833
 * @returns the resulting document tree or NULL in case of error
12834
 */
12835
12836
0
xmlDoc *xmlRecoverMemory(const char *buffer, int size) {
12837
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
12838
0
}
12839
12840
/**
12841
 * Parse an XML in-memory buffer and call the given SAX handler routines.
12842
 *
12843
 * This function uses deprecated global variables to set parser options
12844
 * which default to XML_PARSE_NODICT.
12845
 *
12846
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12847
 *
12848
 * @param sax  a SAX handler
12849
 * @param user_data  The user data returned on SAX callbacks
12850
 * @param buffer  an in-memory XML document input
12851
 * @param size  the length of the XML document in bytes
12852
 * @returns 0 in case of success or a error number otherwise
12853
 */
12854
int xmlSAXUserParseMemory(xmlSAXHandler *sax, void *user_data,
12855
0
        const char *buffer, int size) {
12856
0
    int ret = 0;
12857
0
    xmlParserCtxtPtr ctxt;
12858
12859
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12860
0
    if (ctxt == NULL) return -1;
12861
0
    if (sax != NULL) {
12862
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12863
0
            *ctxt->sax = *sax;
12864
0
        } else {
12865
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12866
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12867
0
        }
12868
0
  ctxt->userData = user_data;
12869
0
    }
12870
12871
0
    xmlParseDocument(ctxt);
12872
12873
0
    if (ctxt->wellFormed)
12874
0
  ret = 0;
12875
0
    else {
12876
0
        if (ctxt->errNo != 0)
12877
0
      ret = ctxt->errNo;
12878
0
  else
12879
0
      ret = -1;
12880
0
    }
12881
0
    if (ctxt->myDoc != NULL) {
12882
0
        xmlFreeDoc(ctxt->myDoc);
12883
0
  ctxt->myDoc = NULL;
12884
0
    }
12885
0
    xmlFreeParserCtxt(ctxt);
12886
12887
0
    return ret;
12888
0
}
12889
#endif /* LIBXML_SAX1_ENABLED */
12890
12891
/**
12892
 * Creates a parser context for an XML in-memory document.
12893
 *
12894
 * @param str  a pointer to an array of xmlChar
12895
 * @returns the new parser context or NULL
12896
 */
12897
xmlParserCtxt *
12898
0
xmlCreateDocParserCtxt(const xmlChar *str) {
12899
0
    xmlParserCtxtPtr ctxt;
12900
0
    xmlParserInputPtr input;
12901
12902
0
    ctxt = xmlNewParserCtxt();
12903
0
    if (ctxt == NULL)
12904
0
  return(NULL);
12905
12906
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
12907
0
    if (input == NULL) {
12908
0
  xmlFreeParserCtxt(ctxt);
12909
0
  return(NULL);
12910
0
    }
12911
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12912
0
        xmlFreeInputStream(input);
12913
0
        xmlFreeParserCtxt(ctxt);
12914
0
        return(NULL);
12915
0
    }
12916
12917
0
    return(ctxt);
12918
0
}
12919
12920
#ifdef LIBXML_SAX1_ENABLED
12921
/**
12922
 * Parse an XML in-memory document and build a tree.
12923
 * It use the given SAX function block to handle the parsing callback.
12924
 * If sax is NULL, fallback to the default DOM tree building routines.
12925
 *
12926
 * This function uses deprecated global variables to set parser options
12927
 * which default to XML_PARSE_NODICT.
12928
 *
12929
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadDoc.
12930
 *
12931
 * @param sax  the SAX handler block
12932
 * @param cur  a pointer to an array of xmlChar
12933
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12934
 *             documents
12935
 * @returns the resulting document tree
12936
 */
12937
12938
xmlDoc *
12939
0
xmlSAXParseDoc(xmlSAXHandler *sax, const xmlChar *cur, int recovery) {
12940
0
    xmlDocPtr ret;
12941
0
    xmlParserCtxtPtr ctxt;
12942
0
    xmlSAXHandlerPtr oldsax = NULL;
12943
12944
0
    if (cur == NULL) return(NULL);
12945
12946
12947
0
    ctxt = xmlCreateDocParserCtxt(cur);
12948
0
    if (ctxt == NULL) return(NULL);
12949
0
    if (sax != NULL) {
12950
0
        oldsax = ctxt->sax;
12951
0
        ctxt->sax = sax;
12952
0
        ctxt->userData = NULL;
12953
0
    }
12954
12955
0
    xmlParseDocument(ctxt);
12956
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12957
0
    else {
12958
0
       ret = NULL;
12959
0
       xmlFreeDoc(ctxt->myDoc);
12960
0
       ctxt->myDoc = NULL;
12961
0
    }
12962
0
    if (sax != NULL)
12963
0
  ctxt->sax = oldsax;
12964
0
    xmlFreeParserCtxt(ctxt);
12965
12966
0
    return(ret);
12967
0
}
12968
12969
/**
12970
 * Parse an XML in-memory document and build a tree.
12971
 *
12972
 * This function uses deprecated global variables to set parser options
12973
 * which default to XML_PARSE_NODICT.
12974
 *
12975
 * @deprecated Use #xmlReadDoc.
12976
 *
12977
 * @param cur  a pointer to an array of xmlChar
12978
 * @returns the resulting document tree
12979
 */
12980
12981
xmlDoc *
12982
0
xmlParseDoc(const xmlChar *cur) {
12983
0
    return(xmlSAXParseDoc(NULL, cur, 0));
12984
0
}
12985
#endif /* LIBXML_SAX1_ENABLED */
12986
12987
/************************************************************************
12988
 *                  *
12989
 *  New set (2.6.0) of simpler and more flexible APIs   *
12990
 *                  *
12991
 ************************************************************************/
12992
12993
/**
12994
 * Reset a parser context
12995
 *
12996
 * @param ctxt  an XML parser context
12997
 */
12998
void
12999
xmlCtxtReset(xmlParserCtxt *ctxt)
13000
189k
{
13001
189k
    xmlParserInputPtr input;
13002
13003
189k
    if (ctxt == NULL)
13004
0
        return;
13005
13006
189k
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
13007
1
        xmlFreeInputStream(input);
13008
1
    }
13009
189k
    ctxt->inputNr = 0;
13010
189k
    ctxt->input = NULL;
13011
13012
189k
    ctxt->spaceNr = 0;
13013
189k
    if (ctxt->spaceTab != NULL) {
13014
177k
  ctxt->spaceTab[0] = -1;
13015
177k
  ctxt->space = &ctxt->spaceTab[0];
13016
177k
    } else {
13017
12.2k
        ctxt->space = NULL;
13018
12.2k
    }
13019
13020
13021
189k
    ctxt->nodeNr = 0;
13022
189k
    ctxt->node = NULL;
13023
13024
189k
    ctxt->nameNr = 0;
13025
189k
    ctxt->name = NULL;
13026
13027
189k
    ctxt->nsNr = 0;
13028
189k
    xmlParserNsReset(ctxt->nsdb);
13029
13030
189k
    if (ctxt->version != NULL) {
13031
14.9k
        xmlFree(ctxt->version);
13032
14.9k
        ctxt->version = NULL;
13033
14.9k
    }
13034
189k
    if (ctxt->encoding != NULL) {
13035
835
        xmlFree(ctxt->encoding);
13036
835
        ctxt->encoding = NULL;
13037
835
    }
13038
189k
    if (ctxt->extSubURI != NULL) {
13039
2.26k
        xmlFree(ctxt->extSubURI);
13040
2.26k
        ctxt->extSubURI = NULL;
13041
2.26k
    }
13042
189k
    if (ctxt->extSubSystem != NULL) {
13043
252
        xmlFree(ctxt->extSubSystem);
13044
252
        ctxt->extSubSystem = NULL;
13045
252
    }
13046
189k
    if (ctxt->directory != NULL) {
13047
15.4k
        xmlFree(ctxt->directory);
13048
15.4k
        ctxt->directory = NULL;
13049
15.4k
    }
13050
13051
189k
    if (ctxt->myDoc != NULL)
13052
0
        xmlFreeDoc(ctxt->myDoc);
13053
189k
    ctxt->myDoc = NULL;
13054
13055
189k
    ctxt->standalone = -1;
13056
189k
    ctxt->hasExternalSubset = 0;
13057
189k
    ctxt->hasPErefs = 0;
13058
189k
    ctxt->html = ctxt->html ? 1 : 0;
13059
189k
    ctxt->instate = XML_PARSER_START;
13060
13061
189k
    ctxt->wellFormed = 1;
13062
189k
    ctxt->nsWellFormed = 1;
13063
189k
    ctxt->disableSAX = 0;
13064
189k
    ctxt->valid = 1;
13065
189k
    ctxt->record_info = 0;
13066
189k
    ctxt->checkIndex = 0;
13067
189k
    ctxt->endCheckState = 0;
13068
189k
    ctxt->inSubset = 0;
13069
189k
    ctxt->errNo = XML_ERR_OK;
13070
189k
    ctxt->depth = 0;
13071
189k
    ctxt->catalogs = NULL;
13072
189k
    ctxt->sizeentities = 0;
13073
189k
    ctxt->sizeentcopy = 0;
13074
189k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13075
13076
189k
    if (ctxt->attsDefault != NULL) {
13077
1.38k
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13078
1.38k
        ctxt->attsDefault = NULL;
13079
1.38k
    }
13080
189k
    if (ctxt->attsSpecial != NULL) {
13081
1.87k
        xmlHashFree(ctxt->attsSpecial, NULL);
13082
1.87k
        ctxt->attsSpecial = NULL;
13083
1.87k
    }
13084
13085
189k
#ifdef LIBXML_CATALOG_ENABLED
13086
189k
    if (ctxt->catalogs != NULL)
13087
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13088
189k
#endif
13089
189k
    ctxt->nbErrors = 0;
13090
189k
    ctxt->nbWarnings = 0;
13091
189k
    if (ctxt->lastError.code != XML_ERR_OK)
13092
15.4k
        xmlResetError(&ctxt->lastError);
13093
189k
}
13094
13095
/**
13096
 * Reset a push parser context
13097
 *
13098
 * @param ctxt  an XML parser context
13099
 * @param chunk  a pointer to an array of chars
13100
 * @param size  number of chars in the array
13101
 * @param filename  an optional file name or URI
13102
 * @param encoding  the document encoding, or NULL
13103
 * @returns 0 in case of success and 1 in case of error
13104
 */
13105
int
13106
xmlCtxtResetPush(xmlParserCtxt *ctxt, const char *chunk,
13107
                 int size, const char *filename, const char *encoding)
13108
1
{
13109
1
    xmlParserInputPtr input;
13110
13111
1
    if (ctxt == NULL)
13112
0
        return(1);
13113
13114
1
    xmlCtxtReset(ctxt);
13115
13116
1
    input = xmlNewPushInput(filename, chunk, size);
13117
1
    if (input == NULL)
13118
0
        return(1);
13119
13120
1
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13121
0
        xmlFreeInputStream(input);
13122
0
        return(1);
13123
0
    }
13124
13125
1
    if (encoding != NULL)
13126
0
        xmlSwitchEncodingName(ctxt, encoding);
13127
13128
1
    return(0);
13129
1
}
13130
13131
static int
13132
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13133
469k
{
13134
469k
    int allMask;
13135
13136
469k
    if (ctxt == NULL)
13137
184
        return(-1);
13138
13139
    /*
13140
     * XInclude options aren't handled by the parser.
13141
     *
13142
     * XML_PARSE_XINCLUDE
13143
     * XML_PARSE_NOXINCNODE
13144
     * XML_PARSE_NOBASEFIX
13145
     */
13146
469k
    allMask = XML_PARSE_RECOVER |
13147
469k
              XML_PARSE_NOENT |
13148
469k
              XML_PARSE_DTDLOAD |
13149
469k
              XML_PARSE_DTDATTR |
13150
469k
              XML_PARSE_DTDVALID |
13151
469k
              XML_PARSE_NOERROR |
13152
469k
              XML_PARSE_NOWARNING |
13153
469k
              XML_PARSE_PEDANTIC |
13154
469k
              XML_PARSE_NOBLANKS |
13155
469k
#ifdef LIBXML_SAX1_ENABLED
13156
469k
              XML_PARSE_SAX1 |
13157
469k
#endif
13158
469k
              XML_PARSE_NONET |
13159
469k
              XML_PARSE_NODICT |
13160
469k
              XML_PARSE_NSCLEAN |
13161
469k
              XML_PARSE_NOCDATA |
13162
469k
              XML_PARSE_COMPACT |
13163
469k
              XML_PARSE_OLD10 |
13164
469k
              XML_PARSE_HUGE |
13165
469k
              XML_PARSE_OLDSAX |
13166
469k
              XML_PARSE_IGNORE_ENC |
13167
469k
              XML_PARSE_BIG_LINES |
13168
469k
              XML_PARSE_NO_XXE |
13169
469k
              XML_PARSE_UNZIP |
13170
469k
              XML_PARSE_NO_SYS_CATALOG |
13171
469k
              XML_PARSE_CATALOG_PI;
13172
13173
469k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13174
13175
    /*
13176
     * For some options, struct members are historically the source
13177
     * of truth. The values are initalized from global variables and
13178
     * old code could also modify them directly. Several older API
13179
     * functions that don't take an options argument rely on these
13180
     * deprecated mechanisms.
13181
     *
13182
     * Once public access to struct members and the globals are
13183
     * disabled, we can use the options bitmask as source of
13184
     * truth, making all these struct members obsolete.
13185
     *
13186
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13187
     * loading of the external subset.
13188
     */
13189
469k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13190
469k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13191
469k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13192
469k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13193
469k
    ctxt->loadsubset |= (options & XML_PARSE_SKIP_IDS) ? XML_SKIP_IDS : 0;
13194
469k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13195
469k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13196
469k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13197
469k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13198
13199
469k
    return(options & ~allMask);
13200
469k
}
13201
13202
/**
13203
 * Applies the options to the parser context. Unset options are
13204
 * cleared.
13205
 *
13206
 * @since 2.13.0
13207
 *
13208
 * With older versions, you can use #xmlCtxtUseOptions.
13209
 *
13210
 * @param ctxt  an XML parser context
13211
 * @param options  a bitmask of xmlParserOption values
13212
 * @returns 0 in case of success, the set of unknown or unimplemented options
13213
 *         in case of error.
13214
 */
13215
int
13216
xmlCtxtSetOptions(xmlParserCtxt *ctxt, int options)
13217
11.3k
{
13218
11.3k
#ifdef LIBXML_HTML_ENABLED
13219
11.3k
    if ((ctxt != NULL) && (ctxt->html))
13220
0
        return(htmlCtxtSetOptions(ctxt, options));
13221
11.3k
#endif
13222
13223
11.3k
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13224
11.3k
}
13225
13226
/**
13227
 * Get the current options of the parser context.
13228
 *
13229
 * @since 2.14.0
13230
 *
13231
 * @param ctxt  an XML parser context
13232
 * @returns the current options set in the parser context, or -1 if ctxt is NULL.
13233
 */
13234
int
13235
xmlCtxtGetOptions(xmlParserCtxt *ctxt)
13236
0
{
13237
0
    if (ctxt == NULL)
13238
0
        return(-1);
13239
13240
0
    return(ctxt->options);
13241
0
}
13242
13243
/**
13244
 * Applies the options to the parser context. The following options
13245
 * are never cleared and can only be enabled:
13246
 *
13247
 * - XML_PARSE_NOERROR
13248
 * - XML_PARSE_NOWARNING
13249
 * - XML_PARSE_NONET
13250
 * - XML_PARSE_NSCLEAN
13251
 * - XML_PARSE_NOCDATA
13252
 * - XML_PARSE_COMPACT
13253
 * - XML_PARSE_OLD10
13254
 * - XML_PARSE_HUGE
13255
 * - XML_PARSE_OLDSAX
13256
 * - XML_PARSE_IGNORE_ENC
13257
 * - XML_PARSE_BIG_LINES
13258
 *
13259
 * @deprecated Use #xmlCtxtSetOptions.
13260
 *
13261
 * @param ctxt  an XML parser context
13262
 * @param options  a combination of xmlParserOption
13263
 * @returns 0 in case of success, the set of unknown or unimplemented options
13264
 *         in case of error.
13265
 */
13266
int
13267
xmlCtxtUseOptions(xmlParserCtxt *ctxt, int options)
13268
458k
{
13269
458k
    int keepMask;
13270
13271
458k
#ifdef LIBXML_HTML_ENABLED
13272
458k
    if ((ctxt != NULL) && (ctxt->html))
13273
0
        return(htmlCtxtUseOptions(ctxt, options));
13274
458k
#endif
13275
13276
    /*
13277
     * For historic reasons, some options can only be enabled.
13278
     */
13279
458k
    keepMask = XML_PARSE_NOERROR |
13280
458k
               XML_PARSE_NOWARNING |
13281
458k
               XML_PARSE_NONET |
13282
458k
               XML_PARSE_NSCLEAN |
13283
458k
               XML_PARSE_NOCDATA |
13284
458k
               XML_PARSE_COMPACT |
13285
458k
               XML_PARSE_OLD10 |
13286
458k
               XML_PARSE_HUGE |
13287
458k
               XML_PARSE_OLDSAX |
13288
458k
               XML_PARSE_IGNORE_ENC |
13289
458k
               XML_PARSE_BIG_LINES;
13290
13291
458k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13292
458k
}
13293
13294
/**
13295
 * To protect against exponential entity expansion ("billion laughs"), the
13296
 * size of serialized output is (roughly) limited to the input size
13297
 * multiplied by this factor. The default value is 5.
13298
 *
13299
 * When working with documents making heavy use of entity expansion, it can
13300
 * be necessary to increase the value. For security reasons, this should only
13301
 * be considered when processing trusted input.
13302
 *
13303
 * @param ctxt  an XML parser context
13304
 * @param maxAmpl  maximum amplification factor
13305
 */
13306
void
13307
xmlCtxtSetMaxAmplification(xmlParserCtxt *ctxt, unsigned maxAmpl)
13308
491
{
13309
491
    if (ctxt == NULL)
13310
0
        return;
13311
491
    ctxt->maxAmpl = maxAmpl;
13312
491
}
13313
13314
/**
13315
 * Parse an XML document and return the resulting document tree.
13316
 * Takes ownership of the input object.
13317
 *
13318
 * @since 2.13.0
13319
 *
13320
 * @param ctxt  an XML parser context
13321
 * @param input  parser input
13322
 * @returns the resulting document tree or NULL
13323
 */
13324
xmlDoc *
13325
xmlCtxtParseDocument(xmlParserCtxt *ctxt, xmlParserInput *input)
13326
339k
{
13327
339k
    xmlDocPtr ret = NULL;
13328
13329
339k
    if ((ctxt == NULL) || (input == NULL)) {
13330
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13331
0
        xmlFreeInputStream(input);
13332
0
        return(NULL);
13333
0
    }
13334
13335
    /* assert(ctxt->inputNr == 0); */
13336
339k
    while (ctxt->inputNr > 0)
13337
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13338
13339
339k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13340
31
        xmlFreeInputStream(input);
13341
31
        return(NULL);
13342
31
    }
13343
13344
339k
    xmlParseDocument(ctxt);
13345
13346
339k
    ret = xmlCtxtGetDocument(ctxt);
13347
13348
    /* assert(ctxt->inputNr == 1); */
13349
682k
    while (ctxt->inputNr > 0)
13350
342k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13351
13352
339k
    return(ret);
13353
339k
}
13354
13355
/**
13356
 * Convenience function to parse an XML document from a
13357
 * zero-terminated string.
13358
 *
13359
 * See #xmlCtxtReadDoc for details.
13360
 *
13361
 * @param cur  a pointer to a zero terminated string
13362
 * @param URL  base URL (optional)
13363
 * @param encoding  the document encoding (optional)
13364
 * @param options  a combination of xmlParserOption
13365
 * @returns the resulting document tree
13366
 */
13367
xmlDoc *
13368
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13369
           int options)
13370
163k
{
13371
163k
    xmlParserCtxtPtr ctxt;
13372
163k
    xmlParserInputPtr input;
13373
163k
    xmlDocPtr doc = NULL;
13374
13375
163k
    ctxt = xmlNewParserCtxt();
13376
163k
    if (ctxt == NULL)
13377
103
        return(NULL);
13378
13379
163k
    xmlCtxtUseOptions(ctxt, options);
13380
13381
163k
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13382
163k
                                      XML_INPUT_BUF_STATIC);
13383
13384
163k
    if (input != NULL)
13385
161k
        doc = xmlCtxtParseDocument(ctxt, input);
13386
13387
163k
    xmlFreeParserCtxt(ctxt);
13388
163k
    return(doc);
13389
163k
}
13390
13391
/**
13392
 * Convenience function to parse an XML file from the filesystem
13393
 * or a global, user-defined resource loader.
13394
 *
13395
 * If a "-" filename is passed, the function will read from stdin.
13396
 * This feature is potentially insecure and might be removed from
13397
 * later versions.
13398
 *
13399
 * See #xmlCtxtReadFile for details.
13400
 *
13401
 * @param filename  a file or URL
13402
 * @param encoding  the document encoding (optional)
13403
 * @param options  a combination of xmlParserOption
13404
 * @returns the resulting document tree
13405
 */
13406
xmlDoc *
13407
xmlReadFile(const char *filename, const char *encoding, int options)
13408
0
{
13409
0
    xmlParserCtxtPtr ctxt;
13410
0
    xmlParserInputPtr input;
13411
0
    xmlDocPtr doc = NULL;
13412
13413
0
    ctxt = xmlNewParserCtxt();
13414
0
    if (ctxt == NULL)
13415
0
        return(NULL);
13416
13417
0
    xmlCtxtUseOptions(ctxt, options);
13418
13419
    /*
13420
     * Backward compatibility for users of command line utilities like
13421
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13422
     * should be removed at some point.
13423
     */
13424
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13425
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13426
0
                                      encoding, 0);
13427
0
    else
13428
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13429
13430
0
    if (input != NULL)
13431
0
        doc = xmlCtxtParseDocument(ctxt, input);
13432
13433
0
    xmlFreeParserCtxt(ctxt);
13434
0
    return(doc);
13435
0
}
13436
13437
/**
13438
 * Parse an XML in-memory document and build a tree. The input buffer must
13439
 * not contain a terminating null byte.
13440
 *
13441
 * See #xmlCtxtReadMemory for details.
13442
 *
13443
 * @param buffer  a pointer to a char array
13444
 * @param size  the size of the array
13445
 * @param url  base URL (optional)
13446
 * @param encoding  the document encoding (optional)
13447
 * @param options  a combination of xmlParserOption
13448
 * @returns the resulting document tree
13449
 */
13450
xmlDoc *
13451
xmlReadMemory(const char *buffer, int size, const char *url,
13452
              const char *encoding, int options)
13453
25.7k
{
13454
25.7k
    xmlParserCtxtPtr ctxt;
13455
25.7k
    xmlParserInputPtr input;
13456
25.7k
    xmlDocPtr doc = NULL;
13457
13458
25.7k
    if (size < 0)
13459
0
  return(NULL);
13460
13461
25.7k
    ctxt = xmlNewParserCtxt();
13462
25.7k
    if (ctxt == NULL)
13463
0
        return(NULL);
13464
13465
25.7k
    xmlCtxtUseOptions(ctxt, options);
13466
13467
25.7k
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13468
25.7k
                                      XML_INPUT_BUF_STATIC);
13469
13470
25.7k
    if (input != NULL)
13471
25.6k
        doc = xmlCtxtParseDocument(ctxt, input);
13472
13473
25.7k
    xmlFreeParserCtxt(ctxt);
13474
25.7k
    return(doc);
13475
25.7k
}
13476
13477
/**
13478
 * Parse an XML from a file descriptor and build a tree.
13479
 *
13480
 * See #xmlCtxtReadFd for details.
13481
 *
13482
 * NOTE that the file descriptor will not be closed when the
13483
 * context is freed or reset.
13484
 *
13485
 * @param fd  an open file descriptor
13486
 * @param URL  base URL (optional)
13487
 * @param encoding  the document encoding (optional)
13488
 * @param options  a combination of xmlParserOption
13489
 * @returns the resulting document tree
13490
 */
13491
xmlDoc *
13492
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13493
0
{
13494
0
    xmlParserCtxtPtr ctxt;
13495
0
    xmlParserInputPtr input;
13496
0
    xmlDocPtr doc = NULL;
13497
13498
0
    ctxt = xmlNewParserCtxt();
13499
0
    if (ctxt == NULL)
13500
0
        return(NULL);
13501
13502
0
    xmlCtxtUseOptions(ctxt, options);
13503
13504
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13505
13506
0
    if (input != NULL)
13507
0
        doc = xmlCtxtParseDocument(ctxt, input);
13508
13509
0
    xmlFreeParserCtxt(ctxt);
13510
0
    return(doc);
13511
0
}
13512
13513
/**
13514
 * Parse an XML document from I/O functions and context and build a tree.
13515
 *
13516
 * See #xmlCtxtReadIO for details.
13517
 *
13518
 * @param ioread  an I/O read function
13519
 * @param ioclose  an I/O close function (optional)
13520
 * @param ioctx  an I/O handler
13521
 * @param URL  base URL (optional)
13522
 * @param encoding  the document encoding (optional)
13523
 * @param options  a combination of xmlParserOption
13524
 * @returns the resulting document tree
13525
 */
13526
xmlDoc *
13527
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13528
          void *ioctx, const char *URL, const char *encoding, int options)
13529
0
{
13530
0
    xmlParserCtxtPtr ctxt;
13531
0
    xmlParserInputPtr input;
13532
0
    xmlDocPtr doc = NULL;
13533
13534
0
    ctxt = xmlNewParserCtxt();
13535
0
    if (ctxt == NULL)
13536
0
        return(NULL);
13537
13538
0
    xmlCtxtUseOptions(ctxt, options);
13539
13540
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13541
0
                                  encoding, 0);
13542
13543
0
    if (input != NULL)
13544
0
        doc = xmlCtxtParseDocument(ctxt, input);
13545
13546
0
    xmlFreeParserCtxt(ctxt);
13547
0
    return(doc);
13548
0
}
13549
13550
/**
13551
 * Parse an XML in-memory document and build a tree.
13552
 *
13553
 * `URL` is used as base to resolve external entities and for error
13554
 * reporting.
13555
 *
13556
 * @param ctxt  an XML parser context
13557
 * @param str  a pointer to a zero terminated string
13558
 * @param URL  base URL (optional)
13559
 * @param encoding  the document encoding (optional)
13560
 * @param options  a combination of xmlParserOption
13561
 * @returns the resulting document tree
13562
 */
13563
xmlDoc *
13564
xmlCtxtReadDoc(xmlParserCtxt *ctxt, const xmlChar *str,
13565
               const char *URL, const char *encoding, int options)
13566
0
{
13567
0
    xmlParserInputPtr input;
13568
13569
0
    if (ctxt == NULL)
13570
0
        return(NULL);
13571
13572
0
    xmlCtxtReset(ctxt);
13573
0
    xmlCtxtUseOptions(ctxt, options);
13574
13575
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13576
0
                                      XML_INPUT_BUF_STATIC);
13577
0
    if (input == NULL)
13578
0
        return(NULL);
13579
13580
0
    return(xmlCtxtParseDocument(ctxt, input));
13581
0
}
13582
13583
/**
13584
 * Parse an XML file from the filesystem or a global, user-defined
13585
 * resource loader.
13586
 *
13587
 * @param ctxt  an XML parser context
13588
 * @param filename  a file or URL
13589
 * @param encoding  the document encoding (optional)
13590
 * @param options  a combination of xmlParserOption
13591
 * @returns the resulting document tree
13592
 */
13593
xmlDoc *
13594
xmlCtxtReadFile(xmlParserCtxt *ctxt, const char *filename,
13595
                const char *encoding, int options)
13596
51.7k
{
13597
51.7k
    xmlParserInputPtr input;
13598
13599
51.7k
    if (ctxt == NULL)
13600
72
        return(NULL);
13601
13602
51.6k
    xmlCtxtReset(ctxt);
13603
51.6k
    xmlCtxtUseOptions(ctxt, options);
13604
13605
51.6k
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13606
51.6k
    if (input == NULL)
13607
9.00k
        return(NULL);
13608
13609
42.6k
    return(xmlCtxtParseDocument(ctxt, input));
13610
51.6k
}
13611
13612
/**
13613
 * Parse an XML in-memory document and build a tree. The input buffer must
13614
 * not contain a terminating null byte.
13615
 *
13616
 * `URL` is used as base to resolve external entities and for error
13617
 * reporting.
13618
 *
13619
 * @param ctxt  an XML parser context
13620
 * @param buffer  a pointer to a char array
13621
 * @param size  the size of the array
13622
 * @param URL  base URL (optional)
13623
 * @param encoding  the document encoding (optional)
13624
 * @param options  a combination of xmlParserOption
13625
 * @returns the resulting document tree
13626
 */
13627
xmlDoc *
13628
xmlCtxtReadMemory(xmlParserCtxt *ctxt, const char *buffer, int size,
13629
                  const char *URL, const char *encoding, int options)
13630
109k
{
13631
109k
    xmlParserInputPtr input;
13632
13633
109k
    if ((ctxt == NULL) || (size < 0))
13634
0
        return(NULL);
13635
13636
109k
    xmlCtxtReset(ctxt);
13637
109k
    xmlCtxtUseOptions(ctxt, options);
13638
13639
109k
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
13640
109k
                                      XML_INPUT_BUF_STATIC);
13641
109k
    if (input == NULL)
13642
71
        return(NULL);
13643
13644
109k
    return(xmlCtxtParseDocument(ctxt, input));
13645
109k
}
13646
13647
/**
13648
 * Parse an XML document from a file descriptor and build a tree.
13649
 *
13650
 * NOTE that the file descriptor will not be closed when the
13651
 * context is freed or reset.
13652
 *
13653
 * `URL` is used as base to resolve external entities and for error
13654
 * reporting.
13655
 *
13656
 * @param ctxt  an XML parser context
13657
 * @param fd  an open file descriptor
13658
 * @param URL  base URL (optional)
13659
 * @param encoding  the document encoding (optional)
13660
 * @param options  a combination of xmlParserOption
13661
 * @returns the resulting document tree
13662
 */
13663
xmlDoc *
13664
xmlCtxtReadFd(xmlParserCtxt *ctxt, int fd,
13665
              const char *URL, const char *encoding, int options)
13666
0
{
13667
0
    xmlParserInputPtr input;
13668
13669
0
    if (ctxt == NULL)
13670
0
        return(NULL);
13671
13672
0
    xmlCtxtReset(ctxt);
13673
0
    xmlCtxtUseOptions(ctxt, options);
13674
13675
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13676
0
    if (input == NULL)
13677
0
        return(NULL);
13678
13679
0
    return(xmlCtxtParseDocument(ctxt, input));
13680
0
}
13681
13682
/**
13683
 * Parse an XML document from I/O functions and source and build a tree.
13684
 * This reuses the existing `ctxt` parser context
13685
 *
13686
 * `URL` is used as base to resolve external entities and for error
13687
 * reporting.
13688
 *
13689
 * @param ctxt  an XML parser context
13690
 * @param ioread  an I/O read function
13691
 * @param ioclose  an I/O close function
13692
 * @param ioctx  an I/O handler
13693
 * @param URL  the base URL to use for the document
13694
 * @param encoding  the document encoding, or NULL
13695
 * @param options  a combination of xmlParserOption
13696
 * @returns the resulting document tree
13697
 */
13698
xmlDoc *
13699
xmlCtxtReadIO(xmlParserCtxt *ctxt, xmlInputReadCallback ioread,
13700
              xmlInputCloseCallback ioclose, void *ioctx,
13701
        const char *URL,
13702
              const char *encoding, int options)
13703
13
{
13704
13
    xmlParserInputPtr input;
13705
13706
13
    if (ctxt == NULL)
13707
0
        return(NULL);
13708
13709
13
    xmlCtxtReset(ctxt);
13710
13
    xmlCtxtUseOptions(ctxt, options);
13711
13712
13
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13713
13
                                  encoding, 0);
13714
13
    if (input == NULL)
13715
13
        return(NULL);
13716
13717
0
    return(xmlCtxtParseDocument(ctxt, input));
13718
13
}
13719