Coverage Report

Created: 2024-07-23 06:41

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#ifdef LIBXML_CATALOG_ENABLED
66
#include <libxml/catalog.h>
67
#endif
68
69
#include "private/buf.h"
70
#include "private/dict.h"
71
#include "private/entities.h"
72
#include "private/error.h"
73
#include "private/html.h"
74
#include "private/io.h"
75
#include "private/parser.h"
76
77
4.84M
#define NS_INDEX_EMPTY  INT_MAX
78
755k
#define NS_INDEX_XML    (INT_MAX - 1)
79
2.47M
#define URI_HASH_EMPTY  0xD943A04E
80
132k
#define URI_HASH_XML    0xF0451F02
81
82
#ifndef STDIN_FILENO
83
0
  #define STDIN_FILENO 0
84
#endif
85
86
struct _xmlStartTag {
87
    const xmlChar *prefix;
88
    const xmlChar *URI;
89
    int line;
90
    int nsNr;
91
};
92
93
typedef struct {
94
    void *saxData;
95
    unsigned prefixHashValue;
96
    unsigned uriHashValue;
97
    unsigned elementId;
98
    int oldIndex;
99
} xmlParserNsExtra;
100
101
typedef struct {
102
    unsigned hashValue;
103
    int index;
104
} xmlParserNsBucket;
105
106
struct _xmlParserNsData {
107
    xmlParserNsExtra *extra;
108
109
    unsigned hashSize;
110
    unsigned hashElems;
111
    xmlParserNsBucket *hash;
112
113
    unsigned elementId;
114
    int defaultNsIndex;
115
    int minNsIndex;
116
};
117
118
struct _xmlAttrHashBucket {
119
    int index;
120
};
121
122
static int
123
xmlParseElementStart(xmlParserCtxtPtr ctxt);
124
125
static void
126
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
127
128
static xmlEntityPtr
129
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
130
131
static const xmlChar *
132
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
133
134
/************************************************************************
135
 *                  *
136
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
137
 *                  *
138
 ************************************************************************/
139
140
#define XML_PARSER_BIG_ENTITY 1000
141
#define XML_PARSER_LOT_ENTITY 5000
142
143
/*
144
 * Constants for protection against abusive entity expansion
145
 * ("billion laughs").
146
 */
147
148
/*
149
 * A certain amount of entity expansion which is always allowed.
150
 */
151
10.0M
#define XML_PARSER_ALLOWED_EXPANSION 1000000
152
153
/*
154
 * Fixed cost for each entity reference. This crudely models processing time
155
 * as well to protect, for example, against exponential expansion of empty
156
 * or very short entities.
157
 */
158
10.0M
#define XML_ENT_FIXED_COST 20
159
160
/**
161
 * xmlParserMaxDepth:
162
 *
163
 * arbitrary depth limit for the XML documents that we allow to
164
 * process. This is not a limitation of the parser but a safety
165
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
166
 * parser option.
167
 */
168
const unsigned int xmlParserMaxDepth = 256;
169
170
171
172
272M
#define XML_PARSER_BIG_BUFFER_SIZE 300
173
5.72M
#define XML_PARSER_BUFFER_SIZE 100
174
619k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
175
176
/**
177
 * XML_PARSER_CHUNK_SIZE
178
 *
179
 * When calling GROW that's the minimal amount of data
180
 * the parser expected to have received. It is not a hard
181
 * limit but an optimization when reading strings like Names
182
 * It is not strictly needed as long as inputs available characters
183
 * are followed by 0, which should be provided by the I/O level
184
 */
185
#define XML_PARSER_CHUNK_SIZE 100
186
187
/**
188
 * xmlParserVersion:
189
 *
190
 * Constant string describing the internal version of the library
191
 */
192
const char *const
193
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
194
195
/*
196
 * List of XML prefixed PI allowed by W3C specs
197
 */
198
199
static const char* const xmlW3CPIs[] = {
200
    "xml-stylesheet",
201
    "xml-model",
202
    NULL
203
};
204
205
206
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
207
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
208
                                              const xmlChar **str);
209
210
static void
211
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
212
213
static int
214
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
215
216
/************************************************************************
217
 *                  *
218
 *    Some factorized error routines        *
219
 *                  *
220
 ************************************************************************/
221
222
static void
223
6.63k
xmlErrMemory(xmlParserCtxtPtr ctxt) {
224
6.63k
    xmlCtxtErrMemory(ctxt);
225
6.63k
}
226
227
/**
228
 * xmlErrAttributeDup:
229
 * @ctxt:  an XML parser context
230
 * @prefix:  the attribute prefix
231
 * @localname:  the attribute localname
232
 *
233
 * Handle a redefinition of attribute error
234
 */
235
static void
236
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237
                   const xmlChar * localname)
238
139k
{
239
139k
    if (prefix == NULL)
240
89.1k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
89.1k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
242
89.1k
                   "Attribute %s redefined\n", localname);
243
50.5k
    else
244
50.5k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245
50.5k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
246
50.5k
                   "Attribute %s:%s redefined\n", prefix, localname);
247
139k
}
248
249
/**
250
 * xmlFatalErrMsg:
251
 * @ctxt:  an XML parser context
252
 * @error:  the error number
253
 * @msg:  the error message
254
 *
255
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
256
 */
257
static void LIBXML_ATTR_FORMAT(3,0)
258
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
259
               const char *msg)
260
271M
{
261
271M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
262
271M
               NULL, NULL, NULL, 0, "%s", msg);
263
271M
}
264
265
/**
266
 * xmlWarningMsg:
267
 * @ctxt:  an XML parser context
268
 * @error:  the error number
269
 * @msg:  the error message
270
 * @str1:  extra data
271
 * @str2:  extra data
272
 *
273
 * Handle a warning.
274
 */
275
void LIBXML_ATTR_FORMAT(3,0)
276
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
277
              const char *msg, const xmlChar *str1, const xmlChar *str2)
278
287k
{
279
287k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
280
287k
               str1, str2, NULL, 0, msg, str1, str2);
281
287k
}
282
283
/**
284
 * xmlValidityError:
285
 * @ctxt:  an XML parser context
286
 * @error:  the error number
287
 * @msg:  the error message
288
 * @str1:  extra data
289
 *
290
 * Handle a validity error.
291
 */
292
static void LIBXML_ATTR_FORMAT(3,0)
293
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
294
              const char *msg, const xmlChar *str1, const xmlChar *str2)
295
165k
{
296
165k
    ctxt->valid = 0;
297
298
165k
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
299
165k
               str1, str2, NULL, 0, msg, str1, str2);
300
165k
}
301
302
/**
303
 * xmlFatalErrMsgInt:
304
 * @ctxt:  an XML parser context
305
 * @error:  the error number
306
 * @msg:  the error message
307
 * @val:  an integer value
308
 *
309
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
310
 */
311
static void LIBXML_ATTR_FORMAT(3,0)
312
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
313
                  const char *msg, int val)
314
45.0M
{
315
45.0M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
316
45.0M
               NULL, NULL, NULL, val, msg, val);
317
45.0M
}
318
319
/**
320
 * xmlFatalErrMsgStrIntStr:
321
 * @ctxt:  an XML parser context
322
 * @error:  the error number
323
 * @msg:  the error message
324
 * @str1:  an string info
325
 * @val:  an integer value
326
 * @str2:  an string info
327
 *
328
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
329
 */
330
static void LIBXML_ATTR_FORMAT(3,0)
331
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
332
                  const char *msg, const xmlChar *str1, int val,
333
      const xmlChar *str2)
334
3.99M
{
335
3.99M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
336
3.99M
               str1, str2, NULL, val, msg, str1, val, str2);
337
3.99M
}
338
339
/**
340
 * xmlFatalErrMsgStr:
341
 * @ctxt:  an XML parser context
342
 * @error:  the error number
343
 * @msg:  the error message
344
 * @val:  a string value
345
 *
346
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
347
 */
348
static void LIBXML_ATTR_FORMAT(3,0)
349
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
350
                  const char *msg, const xmlChar * val)
351
4.27M
{
352
4.27M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
353
4.27M
               val, NULL, NULL, 0, msg, val);
354
4.27M
}
355
356
/**
357
 * xmlErrMsgStr:
358
 * @ctxt:  an XML parser context
359
 * @error:  the error number
360
 * @msg:  the error message
361
 * @val:  a string value
362
 *
363
 * Handle a non fatal parser error
364
 */
365
static void LIBXML_ATTR_FORMAT(3,0)
366
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
367
                  const char *msg, const xmlChar * val)
368
741k
{
369
741k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
370
741k
               val, NULL, NULL, 0, msg, val);
371
741k
}
372
373
/**
374
 * xmlNsErr:
375
 * @ctxt:  an XML parser context
376
 * @error:  the error number
377
 * @msg:  the message
378
 * @info1:  extra information string
379
 * @info2:  extra information string
380
 *
381
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
382
 */
383
static void LIBXML_ATTR_FORMAT(3,0)
384
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
385
         const char *msg,
386
         const xmlChar * info1, const xmlChar * info2,
387
         const xmlChar * info3)
388
1.61M
{
389
1.61M
    ctxt->nsWellFormed = 0;
390
391
1.61M
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
392
1.61M
               info1, info2, info3, 0, msg, info1, info2, info3);
393
1.61M
}
394
395
/**
396
 * xmlNsWarn
397
 * @ctxt:  an XML parser context
398
 * @error:  the error number
399
 * @msg:  the message
400
 * @info1:  extra information string
401
 * @info2:  extra information string
402
 *
403
 * Handle a namespace warning error
404
 */
405
static void LIBXML_ATTR_FORMAT(3,0)
406
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
407
         const char *msg,
408
         const xmlChar * info1, const xmlChar * info2,
409
         const xmlChar * info3)
410
175k
{
411
175k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
412
175k
               info1, info2, info3, 0, msg, info1, info2, info3);
413
175k
}
414
415
static void
416
30.2M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
417
30.2M
    if (val > ULONG_MAX - *dst)
418
0
        *dst = ULONG_MAX;
419
30.2M
    else
420
30.2M
        *dst += val;
421
30.2M
}
422
423
static void
424
10.1M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
425
10.1M
    if (val > ULONG_MAX - *dst)
426
0
        *dst = ULONG_MAX;
427
10.1M
    else
428
10.1M
        *dst += val;
429
10.1M
}
430
431
/**
432
 * xmlParserEntityCheck:
433
 * @ctxt:  parser context
434
 * @extra:  sum of unexpanded entity sizes
435
 *
436
 * Check for non-linear entity expansion behaviour.
437
 *
438
 * In some cases like xmlExpandEntityInAttValue, this function is called
439
 * for each, possibly nested entity and its unexpanded content length.
440
 *
441
 * In other cases like xmlParseReference, it's only called for each
442
 * top-level entity with its unexpanded content length plus the sum of
443
 * the unexpanded content lengths (plus fixed cost) of all nested
444
 * entities.
445
 *
446
 * Summing the unexpanded lengths also adds the length of the reference.
447
 * This is by design. Taking the length of the entity name into account
448
 * discourages attacks that try to waste CPU time with abusively long
449
 * entity names. See test/recurse/lol6.xml for example. Each call also
450
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
451
 * short entities.
452
 *
453
 * Returns 1 on error, 0 on success.
454
 */
455
static int
456
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
457
10.5M
{
458
10.5M
    unsigned long consumed;
459
10.5M
    unsigned long *expandedSize;
460
10.5M
    xmlParserInputPtr input = ctxt->input;
461
10.5M
    xmlEntityPtr entity = input->entity;
462
463
10.5M
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
464
538k
        return(0);
465
466
    /*
467
     * Compute total consumed bytes so far, including input streams of
468
     * external entities.
469
     */
470
10.0M
    consumed = input->consumed;
471
10.0M
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
472
10.0M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
473
474
10.0M
    if (entity)
475
191k
        expandedSize = &entity->expandedSize;
476
9.84M
    else
477
9.84M
        expandedSize = &ctxt->sizeentcopy;
478
479
    /*
480
     * Add extra cost and some fixed cost.
481
     */
482
10.0M
    xmlSaturatedAdd(expandedSize, extra);
483
10.0M
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
484
485
    /*
486
     * It's important to always use saturation arithmetic when tracking
487
     * entity sizes to make the size checks reliable. If "sizeentcopy"
488
     * overflows, we have to abort.
489
     */
490
10.0M
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
491
10.0M
        ((*expandedSize >= ULONG_MAX) ||
492
2.49M
         (*expandedSize / ctxt->maxAmpl > consumed))) {
493
5.06k
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
494
5.06k
                       "Maximum entity amplification factor exceeded, see "
495
5.06k
                       "xmlCtxtSetMaxAmplification.\n");
496
5.06k
        xmlHaltParser(ctxt);
497
5.06k
        return(1);
498
5.06k
    }
499
500
10.0M
    return(0);
501
10.0M
}
502
503
/************************************************************************
504
 *                  *
505
 *    Library wide options          *
506
 *                  *
507
 ************************************************************************/
508
509
/**
510
  * xmlHasFeature:
511
  * @feature: the feature to be examined
512
  *
513
  * Examines if the library has been compiled with a given feature.
514
  *
515
  * Returns a non-zero value if the feature exist, otherwise zero.
516
  * Returns zero (0) if the feature does not exist or an unknown
517
  * unknown feature is requested, non-zero otherwise.
518
  */
519
int
520
xmlHasFeature(xmlFeature feature)
521
97.3k
{
522
97.3k
    switch (feature) {
523
3.24k
  case XML_WITH_THREAD:
524
3.24k
#ifdef LIBXML_THREAD_ENABLED
525
3.24k
      return(1);
526
#else
527
      return(0);
528
#endif
529
3.24k
        case XML_WITH_TREE:
530
3.24k
            return(1);
531
3.24k
        case XML_WITH_OUTPUT:
532
3.24k
#ifdef LIBXML_OUTPUT_ENABLED
533
3.24k
            return(1);
534
#else
535
            return(0);
536
#endif
537
3.24k
        case XML_WITH_PUSH:
538
3.24k
#ifdef LIBXML_PUSH_ENABLED
539
3.24k
            return(1);
540
#else
541
            return(0);
542
#endif
543
3.24k
        case XML_WITH_READER:
544
3.24k
#ifdef LIBXML_READER_ENABLED
545
3.24k
            return(1);
546
#else
547
            return(0);
548
#endif
549
3.24k
        case XML_WITH_PATTERN:
550
3.24k
#ifdef LIBXML_PATTERN_ENABLED
551
3.24k
            return(1);
552
#else
553
            return(0);
554
#endif
555
3.24k
        case XML_WITH_WRITER:
556
3.24k
#ifdef LIBXML_WRITER_ENABLED
557
3.24k
            return(1);
558
#else
559
            return(0);
560
#endif
561
3.24k
        case XML_WITH_SAX1:
562
3.24k
#ifdef LIBXML_SAX1_ENABLED
563
3.24k
            return(1);
564
#else
565
            return(0);
566
#endif
567
3.24k
        case XML_WITH_HTTP:
568
#ifdef LIBXML_HTTP_ENABLED
569
            return(1);
570
#else
571
3.24k
            return(0);
572
0
#endif
573
3.24k
        case XML_WITH_VALID:
574
3.24k
#ifdef LIBXML_VALID_ENABLED
575
3.24k
            return(1);
576
#else
577
            return(0);
578
#endif
579
3.24k
        case XML_WITH_HTML:
580
3.24k
#ifdef LIBXML_HTML_ENABLED
581
3.24k
            return(1);
582
#else
583
            return(0);
584
#endif
585
3.24k
        case XML_WITH_LEGACY:
586
#ifdef LIBXML_LEGACY_ENABLED
587
            return(1);
588
#else
589
3.24k
            return(0);
590
0
#endif
591
3.24k
        case XML_WITH_C14N:
592
3.24k
#ifdef LIBXML_C14N_ENABLED
593
3.24k
            return(1);
594
#else
595
            return(0);
596
#endif
597
3.24k
        case XML_WITH_CATALOG:
598
3.24k
#ifdef LIBXML_CATALOG_ENABLED
599
3.24k
            return(1);
600
#else
601
            return(0);
602
#endif
603
3.24k
        case XML_WITH_XPATH:
604
3.24k
#ifdef LIBXML_XPATH_ENABLED
605
3.24k
            return(1);
606
#else
607
            return(0);
608
#endif
609
3.24k
        case XML_WITH_XPTR:
610
3.24k
#ifdef LIBXML_XPTR_ENABLED
611
3.24k
            return(1);
612
#else
613
            return(0);
614
#endif
615
3.24k
        case XML_WITH_XINCLUDE:
616
3.24k
#ifdef LIBXML_XINCLUDE_ENABLED
617
3.24k
            return(1);
618
#else
619
            return(0);
620
#endif
621
3.24k
        case XML_WITH_ICONV:
622
3.24k
#ifdef LIBXML_ICONV_ENABLED
623
3.24k
            return(1);
624
#else
625
            return(0);
626
#endif
627
3.24k
        case XML_WITH_ISO8859X:
628
3.24k
#ifdef LIBXML_ISO8859X_ENABLED
629
3.24k
            return(1);
630
#else
631
            return(0);
632
#endif
633
3.24k
        case XML_WITH_UNICODE:
634
3.24k
#ifdef LIBXML_UNICODE_ENABLED
635
3.24k
            return(1);
636
#else
637
            return(0);
638
#endif
639
3.24k
        case XML_WITH_REGEXP:
640
3.24k
#ifdef LIBXML_REGEXP_ENABLED
641
3.24k
            return(1);
642
#else
643
            return(0);
644
#endif
645
3.24k
        case XML_WITH_AUTOMATA:
646
3.24k
#ifdef LIBXML_REGEXP_ENABLED
647
3.24k
            return(1);
648
#else
649
            return(0);
650
#endif
651
3.24k
        case XML_WITH_EXPR:
652
#ifdef LIBXML_EXPR_ENABLED
653
            return(1);
654
#else
655
3.24k
            return(0);
656
0
#endif
657
3.24k
        case XML_WITH_SCHEMAS:
658
3.24k
#ifdef LIBXML_SCHEMAS_ENABLED
659
3.24k
            return(1);
660
#else
661
            return(0);
662
#endif
663
3.24k
        case XML_WITH_SCHEMATRON:
664
3.24k
#ifdef LIBXML_SCHEMATRON_ENABLED
665
3.24k
            return(1);
666
#else
667
            return(0);
668
#endif
669
3.24k
        case XML_WITH_MODULES:
670
3.24k
#ifdef LIBXML_MODULES_ENABLED
671
3.24k
            return(1);
672
#else
673
            return(0);
674
#endif
675
3.24k
        case XML_WITH_DEBUG:
676
#ifdef LIBXML_DEBUG_ENABLED
677
            return(1);
678
#else
679
3.24k
            return(0);
680
0
#endif
681
0
        case XML_WITH_DEBUG_MEM:
682
0
            return(0);
683
3.24k
        case XML_WITH_ZLIB:
684
3.24k
#ifdef LIBXML_ZLIB_ENABLED
685
3.24k
            return(1);
686
#else
687
            return(0);
688
#endif
689
3.24k
        case XML_WITH_LZMA:
690
3.24k
#ifdef LIBXML_LZMA_ENABLED
691
3.24k
            return(1);
692
#else
693
            return(0);
694
#endif
695
3.24k
        case XML_WITH_ICU:
696
#ifdef LIBXML_ICU_ENABLED
697
            return(1);
698
#else
699
3.24k
            return(0);
700
0
#endif
701
0
        default:
702
0
      break;
703
97.3k
     }
704
0
     return(0);
705
97.3k
}
706
707
/************************************************************************
708
 *                  *
709
 *      Simple string buffer        *
710
 *                  *
711
 ************************************************************************/
712
713
typedef struct {
714
    xmlChar *mem;
715
    unsigned size;
716
    unsigned cap; /* size < cap */
717
    unsigned max; /* size <= max */
718
    xmlParserErrors code;
719
} xmlSBuf;
720
721
static void
722
5.58M
xmlSBufInit(xmlSBuf *buf, unsigned max) {
723
5.58M
    buf->mem = NULL;
724
5.58M
    buf->size = 0;
725
5.58M
    buf->cap = 0;
726
5.58M
    buf->max = max;
727
5.58M
    buf->code = XML_ERR_OK;
728
5.58M
}
729
730
static int
731
4.64M
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
732
4.64M
    xmlChar *mem;
733
4.64M
    unsigned cap;
734
735
4.64M
    if (len >= UINT_MAX / 2 - buf->size) {
736
0
        if (buf->code == XML_ERR_OK)
737
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
738
0
        return(-1);
739
0
    }
740
741
4.64M
    cap = (buf->size + len) * 2;
742
4.64M
    if (cap < 240)
743
2.44M
        cap = 240;
744
745
4.64M
    mem = xmlRealloc(buf->mem, cap);
746
4.64M
    if (mem == NULL) {
747
1.78M
        buf->code = XML_ERR_NO_MEMORY;
748
1.78M
        return(-1);
749
1.78M
    }
750
751
2.86M
    buf->mem = mem;
752
2.86M
    buf->cap = cap;
753
754
2.86M
    return(0);
755
4.64M
}
756
757
static void
758
675M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
759
675M
    if (buf->max - buf->size < len) {
760
84.3k
        if (buf->code == XML_ERR_OK)
761
177
            buf->code = XML_ERR_RESOURCE_LIMIT;
762
84.3k
        return;
763
84.3k
    }
764
765
675M
    if (buf->cap - buf->size <= len) {
766
4.54M
        if (xmlSBufGrow(buf, len) < 0)
767
1.78M
            return;
768
4.54M
    }
769
770
673M
    if (len > 0)
771
673M
        memcpy(buf->mem + buf->size, str, len);
772
673M
    buf->size += len;
773
673M
}
774
775
static void
776
646M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
777
646M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
778
646M
}
779
780
static void
781
3.80M
xmlSBufAddChar(xmlSBuf *buf, int c) {
782
3.80M
    xmlChar *end;
783
784
3.80M
    if (buf->max - buf->size < 4) {
785
182
        if (buf->code == XML_ERR_OK)
786
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
787
182
        return;
788
182
    }
789
790
3.80M
    if (buf->cap - buf->size <= 4) {
791
103k
        if (xmlSBufGrow(buf, 4) < 0)
792
1.86k
            return;
793
103k
    }
794
795
3.80M
    end = buf->mem + buf->size;
796
797
3.80M
    if (c < 0x80) {
798
3.63M
        *end = (xmlChar) c;
799
3.63M
        buf->size += 1;
800
3.63M
    } else {
801
167k
        buf->size += xmlCopyCharMultiByte(end, c);
802
167k
    }
803
3.80M
}
804
805
static void
806
574M
xmlSBufAddReplChar(xmlSBuf *buf) {
807
574M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
808
574M
}
809
810
static void
811
1.56k
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
812
1.56k
    if (buf->code == XML_ERR_NO_MEMORY)
813
1.38k
        xmlCtxtErrMemory(ctxt);
814
177
    else
815
177
        xmlFatalErr(ctxt, buf->code, errMsg);
816
1.56k
}
817
818
static xmlChar *
819
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
820
2.84M
              const char *errMsg) {
821
2.84M
    if (buf->mem == NULL) {
822
427k
        buf->mem = xmlMalloc(1);
823
427k
        if (buf->mem == NULL) {
824
65
            buf->code = XML_ERR_NO_MEMORY;
825
427k
        } else {
826
427k
            buf->mem[0] = 0;
827
427k
        }
828
2.41M
    } else {
829
2.41M
        buf->mem[buf->size] = 0;
830
2.41M
    }
831
832
2.84M
    if (buf->code == XML_ERR_OK) {
833
2.84M
        if (sizeOut != NULL)
834
654k
            *sizeOut = buf->size;
835
2.84M
        return(buf->mem);
836
2.84M
    }
837
838
1.11k
    xmlSBufReportError(buf, ctxt, errMsg);
839
840
1.11k
    xmlFree(buf->mem);
841
842
1.11k
    if (sizeOut != NULL)
843
175
        *sizeOut = 0;
844
1.11k
    return(NULL);
845
2.84M
}
846
847
static void
848
2.59M
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
849
2.59M
    if (buf->code != XML_ERR_OK)
850
449
        xmlSBufReportError(buf, ctxt, errMsg);
851
852
2.59M
    xmlFree(buf->mem);
853
2.59M
}
854
855
static int
856
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
857
672M
                    const char *errMsg) {
858
672M
    int c = str[0];
859
672M
    int c1 = str[1];
860
861
672M
    if ((c1 & 0xC0) != 0x80)
862
175M
        goto encoding_error;
863
864
496M
    if (c < 0xE0) {
865
        /* 2-byte sequence */
866
176M
        if (c < 0xC2)
867
134M
            goto encoding_error;
868
869
42.1M
        return(2);
870
320M
    } else {
871
320M
        int c2 = str[2];
872
873
320M
        if ((c2 & 0xC0) != 0x80)
874
105k
            goto encoding_error;
875
876
320M
        if (c < 0xF0) {
877
            /* 3-byte sequence */
878
319M
            if (c == 0xE0) {
879
                /* overlong */
880
19.4M
                if (c1 < 0xA0)
881
9.04k
                    goto encoding_error;
882
300M
            } else if (c == 0xED) {
883
                /* surrogate */
884
1.40M
                if (c1 >= 0xA0)
885
6.22k
                    goto encoding_error;
886
298M
            } else if (c == 0xEF) {
887
                /* U+FFFE and U+FFFF are invalid Chars */
888
25.4M
                if ((c1 == 0xBF) && (c2 >= 0xBE))
889
17.6k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
890
25.4M
            }
891
892
319M
            return(3);
893
319M
        } else {
894
            /* 4-byte sequence */
895
245k
            if ((str[3] & 0xC0) != 0x80)
896
22.7k
                goto encoding_error;
897
222k
            if (c == 0xF0) {
898
                /* overlong */
899
88.4k
                if (c1 < 0x90)
900
9.53k
                    goto encoding_error;
901
133k
            } else if (c >= 0xF4) {
902
                /* greater than 0x10FFFF */
903
35.2k
                if ((c > 0xF4) || (c1 >= 0x90))
904
31.1k
                    goto encoding_error;
905
35.2k
            }
906
907
181k
            return(4);
908
222k
        }
909
320M
    }
910
911
310M
encoding_error:
912
    /* Only report the first error */
913
310M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
914
79.6k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
915
79.6k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
916
79.6k
    }
917
918
310M
    return(0);
919
496M
}
920
921
/************************************************************************
922
 *                  *
923
 *    SAX2 defaulted attributes handling      *
924
 *                  *
925
 ************************************************************************/
926
927
/**
928
 * xmlCtxtInitializeLate:
929
 * @ctxt:  an XML parser context
930
 *
931
 * Final initialization of the parser context before starting to parse.
932
 *
933
 * This accounts for users modifying struct members of parser context
934
 * directly.
935
 */
936
static void
937
480k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
938
480k
    xmlSAXHandlerPtr sax;
939
940
    /* Avoid unused variable warning if features are disabled. */
941
480k
    (void) sax;
942
943
    /*
944
     * Changing the SAX struct directly is still widespread practice
945
     * in internal and external code.
946
     */
947
480k
    if (ctxt == NULL) return;
948
480k
    sax = ctxt->sax;
949
480k
#ifdef LIBXML_SAX1_ENABLED
950
    /*
951
     * Only enable SAX2 if there SAX2 element handlers, except when there
952
     * are no element handlers at all.
953
     */
954
480k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
955
480k
        (sax) &&
956
480k
        (sax->initialized == XML_SAX2_MAGIC) &&
957
480k
        ((sax->startElementNs != NULL) ||
958
324k
         (sax->endElementNs != NULL) ||
959
324k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
960
324k
        ctxt->sax2 = 1;
961
#else
962
    ctxt->sax2 = 1;
963
#endif /* LIBXML_SAX1_ENABLED */
964
965
    /*
966
     * Some users replace the dictionary directly in the context struct.
967
     * We really need an API function to do that cleanly.
968
     */
969
480k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
970
480k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
971
480k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
972
480k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
973
480k
    (ctxt->str_xml_ns == NULL)) {
974
1.20k
        xmlErrMemory(ctxt);
975
1.20k
    }
976
977
480k
    xmlDictSetLimit(ctxt->dict,
978
480k
                    (ctxt->options & XML_PARSE_HUGE) ?
979
135k
                        0 :
980
480k
                        XML_MAX_DICTIONARY_LIMIT);
981
480k
}
982
983
typedef struct {
984
    xmlHashedString prefix;
985
    xmlHashedString name;
986
    xmlHashedString value;
987
    const xmlChar *valueEnd;
988
    int external;
989
    int expandedSize;
990
} xmlDefAttr;
991
992
typedef struct _xmlDefAttrs xmlDefAttrs;
993
typedef xmlDefAttrs *xmlDefAttrsPtr;
994
struct _xmlDefAttrs {
995
    int nbAttrs;  /* number of defaulted attributes on that element */
996
    int maxAttrs;       /* the size of the array */
997
#if __STDC_VERSION__ >= 199901L
998
    /* Using a C99 flexible array member avoids UBSan errors. */
999
    xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
1000
#else
1001
    xmlDefAttr attrs[1];
1002
#endif
1003
};
1004
1005
/**
1006
 * xmlAttrNormalizeSpace:
1007
 * @src: the source string
1008
 * @dst: the target string
1009
 *
1010
 * Normalize the space in non CDATA attribute values:
1011
 * If the attribute type is not CDATA, then the XML processor MUST further
1012
 * process the normalized attribute value by discarding any leading and
1013
 * trailing space (#x20) characters, and by replacing sequences of space
1014
 * (#x20) characters by a single space (#x20) character.
1015
 * Note that the size of dst need to be at least src, and if one doesn't need
1016
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1017
 * passing src as dst is just fine.
1018
 *
1019
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1020
 *         is needed.
1021
 */
1022
static xmlChar *
1023
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1024
264k
{
1025
264k
    if ((src == NULL) || (dst == NULL))
1026
0
        return(NULL);
1027
1028
283k
    while (*src == 0x20) src++;
1029
323M
    while (*src != 0) {
1030
322M
  if (*src == 0x20) {
1031
17.6M
      while (*src == 0x20) src++;
1032
461k
      if (*src != 0)
1033
446k
    *dst++ = 0x20;
1034
322M
  } else {
1035
322M
      *dst++ = *src++;
1036
322M
  }
1037
322M
    }
1038
264k
    *dst = 0;
1039
264k
    if (dst == src)
1040
228k
       return(NULL);
1041
35.7k
    return(dst);
1042
264k
}
1043
1044
/**
1045
 * xmlAddDefAttrs:
1046
 * @ctxt:  an XML parser context
1047
 * @fullname:  the element fullname
1048
 * @fullattr:  the attribute fullname
1049
 * @value:  the attribute value
1050
 *
1051
 * Add a defaulted attribute for an element
1052
 */
1053
static void
1054
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1055
               const xmlChar *fullname,
1056
               const xmlChar *fullattr,
1057
282k
               const xmlChar *value) {
1058
282k
    xmlDefAttrsPtr defaults;
1059
282k
    xmlDefAttr *attr;
1060
282k
    int len, expandedSize;
1061
282k
    xmlHashedString name;
1062
282k
    xmlHashedString prefix;
1063
282k
    xmlHashedString hvalue;
1064
282k
    const xmlChar *localname;
1065
1066
    /*
1067
     * Allows to detect attribute redefinitions
1068
     */
1069
282k
    if (ctxt->attsSpecial != NULL) {
1070
238k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1071
147k
      return;
1072
238k
    }
1073
1074
134k
    if (ctxt->attsDefault == NULL) {
1075
44.9k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1076
44.9k
  if (ctxt->attsDefault == NULL)
1077
63
      goto mem_error;
1078
44.9k
    }
1079
1080
    /*
1081
     * split the element name into prefix:localname , the string found
1082
     * are within the DTD and then not associated to namespace names.
1083
     */
1084
134k
    localname = xmlSplitQName3(fullname, &len);
1085
134k
    if (localname == NULL) {
1086
106k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1087
106k
  prefix.name = NULL;
1088
106k
    } else {
1089
28.2k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1090
28.2k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1091
28.2k
        if (prefix.name == NULL)
1092
7
            goto mem_error;
1093
28.2k
    }
1094
134k
    if (name.name == NULL)
1095
7
        goto mem_error;
1096
1097
    /*
1098
     * make sure there is some storage
1099
     */
1100
134k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1101
134k
    if ((defaults == NULL) ||
1102
134k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1103
58.8k
        xmlDefAttrsPtr temp;
1104
58.8k
        int newSize;
1105
1106
58.8k
        newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1107
58.8k
        temp = xmlRealloc(defaults,
1108
58.8k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1109
58.8k
  if (temp == NULL)
1110
42
      goto mem_error;
1111
58.7k
        if (defaults == NULL)
1112
51.6k
            temp->nbAttrs = 0;
1113
58.7k
  temp->maxAttrs = newSize;
1114
58.7k
        defaults = temp;
1115
58.7k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1116
58.7k
                          defaults, NULL) < 0) {
1117
0
      xmlFree(defaults);
1118
0
      goto mem_error;
1119
0
  }
1120
58.7k
    }
1121
1122
    /*
1123
     * Split the attribute name into prefix:localname , the string found
1124
     * are within the DTD and hen not associated to namespace names.
1125
     */
1126
134k
    localname = xmlSplitQName3(fullattr, &len);
1127
134k
    if (localname == NULL) {
1128
96.7k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1129
96.7k
  prefix.name = NULL;
1130
96.7k
    } else {
1131
37.8k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1132
37.8k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1133
37.8k
        if (prefix.name == NULL)
1134
7
            goto mem_error;
1135
37.8k
    }
1136
134k
    if (name.name == NULL)
1137
8
        goto mem_error;
1138
1139
    /* intern the string and precompute the end */
1140
134k
    len = strlen((const char *) value);
1141
134k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1142
134k
    if (hvalue.name == NULL)
1143
22
        goto mem_error;
1144
1145
134k
    expandedSize = strlen((const char *) name.name);
1146
134k
    if (prefix.name != NULL)
1147
37.8k
        expandedSize += strlen((const char *) prefix.name);
1148
134k
    expandedSize += len;
1149
1150
134k
    attr = &defaults->attrs[defaults->nbAttrs++];
1151
134k
    attr->name = name;
1152
134k
    attr->prefix = prefix;
1153
134k
    attr->value = hvalue;
1154
134k
    attr->valueEnd = hvalue.name + len;
1155
134k
    attr->external = PARSER_EXTERNAL(ctxt);
1156
134k
    attr->expandedSize = expandedSize;
1157
1158
134k
    return;
1159
1160
156
mem_error:
1161
156
    xmlErrMemory(ctxt);
1162
156
}
1163
1164
/**
1165
 * xmlAddSpecialAttr:
1166
 * @ctxt:  an XML parser context
1167
 * @fullname:  the element fullname
1168
 * @fullattr:  the attribute fullname
1169
 * @type:  the attribute type
1170
 *
1171
 * Register this attribute type
1172
 */
1173
static void
1174
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1175
      const xmlChar *fullname,
1176
      const xmlChar *fullattr,
1177
      int type)
1178
511k
{
1179
511k
    if (ctxt->attsSpecial == NULL) {
1180
55.4k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1181
55.4k
  if (ctxt->attsSpecial == NULL)
1182
79
      goto mem_error;
1183
55.4k
    }
1184
1185
511k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1186
511k
                    (void *) (ptrdiff_t) type) < 0)
1187
10
        goto mem_error;
1188
511k
    return;
1189
1190
511k
mem_error:
1191
89
    xmlErrMemory(ctxt);
1192
89
}
1193
1194
/**
1195
 * xmlCleanSpecialAttrCallback:
1196
 *
1197
 * Removes CDATA attributes from the special attribute table
1198
 */
1199
static void
1200
xmlCleanSpecialAttrCallback(void *payload, void *data,
1201
                            const xmlChar *fullname, const xmlChar *fullattr,
1202
275k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1203
275k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1204
1205
275k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1206
71.7k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1207
71.7k
    }
1208
275k
}
1209
1210
/**
1211
 * xmlCleanSpecialAttr:
1212
 * @ctxt:  an XML parser context
1213
 *
1214
 * Trim the list of attributes defined to remove all those of type
1215
 * CDATA as they are not special. This call should be done when finishing
1216
 * to parse the DTD and before starting to parse the document root.
1217
 */
1218
static void
1219
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1220
258k
{
1221
258k
    if (ctxt->attsSpecial == NULL)
1222
202k
        return;
1223
1224
55.2k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1225
1226
55.2k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1227
12.5k
        xmlHashFree(ctxt->attsSpecial, NULL);
1228
12.5k
        ctxt->attsSpecial = NULL;
1229
12.5k
    }
1230
55.2k
}
1231
1232
/**
1233
 * xmlCheckLanguageID:
1234
 * @lang:  pointer to the string value
1235
 *
1236
 * DEPRECATED: Internal function, do not use.
1237
 *
1238
 * Checks that the value conforms to the LanguageID production:
1239
 *
1240
 * NOTE: this is somewhat deprecated, those productions were removed from
1241
 *       the XML Second edition.
1242
 *
1243
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1244
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1245
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1246
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1247
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1248
 * [38] Subcode ::= ([a-z] | [A-Z])+
1249
 *
1250
 * The current REC reference the successors of RFC 1766, currently 5646
1251
 *
1252
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1253
 * langtag       = language
1254
 *                 ["-" script]
1255
 *                 ["-" region]
1256
 *                 *("-" variant)
1257
 *                 *("-" extension)
1258
 *                 ["-" privateuse]
1259
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1260
 *                 ["-" extlang]       ; sometimes followed by
1261
 *                                     ; extended language subtags
1262
 *               / 4ALPHA              ; or reserved for future use
1263
 *               / 5*8ALPHA            ; or registered language subtag
1264
 *
1265
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1266
 *                 *2("-" 3ALPHA)      ; permanently reserved
1267
 *
1268
 * script        = 4ALPHA              ; ISO 15924 code
1269
 *
1270
 * region        = 2ALPHA              ; ISO 3166-1 code
1271
 *               / 3DIGIT              ; UN M.49 code
1272
 *
1273
 * variant       = 5*8alphanum         ; registered variants
1274
 *               / (DIGIT 3alphanum)
1275
 *
1276
 * extension     = singleton 1*("-" (2*8alphanum))
1277
 *
1278
 *                                     ; Single alphanumerics
1279
 *                                     ; "x" reserved for private use
1280
 * singleton     = DIGIT               ; 0 - 9
1281
 *               / %x41-57             ; A - W
1282
 *               / %x59-5A             ; Y - Z
1283
 *               / %x61-77             ; a - w
1284
 *               / %x79-7A             ; y - z
1285
 *
1286
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1287
 * The parser below doesn't try to cope with extension or privateuse
1288
 * that could be added but that's not interoperable anyway
1289
 *
1290
 * Returns 1 if correct 0 otherwise
1291
 **/
1292
int
1293
xmlCheckLanguageID(const xmlChar * lang)
1294
279k
{
1295
279k
    const xmlChar *cur = lang, *nxt;
1296
1297
279k
    if (cur == NULL)
1298
9.03k
        return (0);
1299
270k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1300
270k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1301
270k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1302
270k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1303
        /*
1304
         * Still allow IANA code and user code which were coming
1305
         * from the previous version of the XML-1.0 specification
1306
         * it's deprecated but we should not fail
1307
         */
1308
14.6k
        cur += 2;
1309
102k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1310
102k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1311
87.9k
            cur++;
1312
14.6k
        return(cur[0] == 0);
1313
14.6k
    }
1314
255k
    nxt = cur;
1315
918k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1316
918k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1317
662k
           nxt++;
1318
255k
    if (nxt - cur >= 4) {
1319
        /*
1320
         * Reserved
1321
         */
1322
10.9k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1323
7.67k
            return(0);
1324
3.29k
        return(1);
1325
10.9k
    }
1326
244k
    if (nxt - cur < 2)
1327
19.6k
        return(0);
1328
    /* we got an ISO 639 code */
1329
224k
    if (nxt[0] == 0)
1330
13.3k
        return(1);
1331
211k
    if (nxt[0] != '-')
1332
13.4k
        return(0);
1333
1334
198k
    nxt++;
1335
198k
    cur = nxt;
1336
    /* now we can have extlang or script or region or variant */
1337
198k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1338
7.04k
        goto region_m49;
1339
1340
826k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1341
826k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1342
635k
           nxt++;
1343
191k
    if (nxt - cur == 4)
1344
46.8k
        goto script;
1345
144k
    if (nxt - cur == 2)
1346
39.4k
        goto region;
1347
104k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1348
4.74k
        goto variant;
1349
100k
    if (nxt - cur != 3)
1350
13.5k
        return(0);
1351
    /* we parsed an extlang */
1352
86.5k
    if (nxt[0] == 0)
1353
4.47k
        return(1);
1354
82.0k
    if (nxt[0] != '-')
1355
7.42k
        return(0);
1356
1357
74.6k
    nxt++;
1358
74.6k
    cur = nxt;
1359
    /* now we can have script or region or variant */
1360
74.6k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1361
4.99k
        goto region_m49;
1362
1363
359k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1364
359k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1365
289k
           nxt++;
1366
69.6k
    if (nxt - cur == 2)
1367
7.95k
        goto region;
1368
61.6k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1369
4.05k
        goto variant;
1370
57.6k
    if (nxt - cur != 4)
1371
21.0k
        return(0);
1372
    /* we parsed a script */
1373
83.3k
script:
1374
83.3k
    if (nxt[0] == 0)
1375
1.61k
        return(1);
1376
81.7k
    if (nxt[0] != '-')
1377
11.5k
        return(0);
1378
1379
70.2k
    nxt++;
1380
70.2k
    cur = nxt;
1381
    /* now we can have region or variant */
1382
70.2k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1383
5.71k
        goto region_m49;
1384
1385
353k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1386
353k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1387
289k
           nxt++;
1388
1389
64.5k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1390
15.9k
        goto variant;
1391
48.5k
    if (nxt - cur != 2)
1392
44.3k
        return(0);
1393
    /* we parsed a region */
1394
59.4k
region:
1395
59.4k
    if (nxt[0] == 0)
1396
8.66k
        return(1);
1397
50.7k
    if (nxt[0] != '-')
1398
12.8k
        return(0);
1399
1400
37.9k
    nxt++;
1401
37.9k
    cur = nxt;
1402
    /* now we can just have a variant */
1403
269k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1404
269k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1405
231k
           nxt++;
1406
1407
37.9k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1408
18.4k
        return(0);
1409
1410
    /* we parsed a variant */
1411
44.2k
variant:
1412
44.2k
    if (nxt[0] == 0)
1413
1.80k
        return(1);
1414
42.4k
    if (nxt[0] != '-')
1415
38.5k
        return(0);
1416
    /* extensions and private use subtags not checked */
1417
3.95k
    return (1);
1418
1419
17.7k
region_m49:
1420
17.7k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1421
17.7k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1422
7.92k
        nxt += 3;
1423
7.92k
        goto region;
1424
7.92k
    }
1425
9.83k
    return(0);
1426
17.7k
}
1427
1428
/************************************************************************
1429
 *                  *
1430
 *    Parser stacks related functions and macros    *
1431
 *                  *
1432
 ************************************************************************/
1433
1434
static xmlChar *
1435
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1436
1437
/**
1438
 * xmlParserNsCreate:
1439
 *
1440
 * Create a new namespace database.
1441
 *
1442
 * Returns the new obejct.
1443
 */
1444
xmlParserNsData *
1445
512k
xmlParserNsCreate(void) {
1446
512k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1447
1448
512k
    if (nsdb == NULL)
1449
84
        return(NULL);
1450
512k
    memset(nsdb, 0, sizeof(*nsdb));
1451
512k
    nsdb->defaultNsIndex = INT_MAX;
1452
1453
512k
    return(nsdb);
1454
512k
}
1455
1456
/**
1457
 * xmlParserNsFree:
1458
 * @nsdb: namespace database
1459
 *
1460
 * Free a namespace database.
1461
 */
1462
void
1463
512k
xmlParserNsFree(xmlParserNsData *nsdb) {
1464
512k
    if (nsdb == NULL)
1465
0
        return;
1466
1467
512k
    xmlFree(nsdb->extra);
1468
512k
    xmlFree(nsdb->hash);
1469
512k
    xmlFree(nsdb);
1470
512k
}
1471
1472
/**
1473
 * xmlParserNsReset:
1474
 * @nsdb: namespace database
1475
 *
1476
 * Reset a namespace database.
1477
 */
1478
static void
1479
121k
xmlParserNsReset(xmlParserNsData *nsdb) {
1480
121k
    if (nsdb == NULL)
1481
0
        return;
1482
1483
121k
    nsdb->hashElems = 0;
1484
121k
    nsdb->elementId = 0;
1485
121k
    nsdb->defaultNsIndex = INT_MAX;
1486
1487
121k
    if (nsdb->hash)
1488
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1489
121k
}
1490
1491
/**
1492
 * xmlParserStartElement:
1493
 * @nsdb: namespace database
1494
 *
1495
 * Signal that a new element has started.
1496
 *
1497
 * Returns 0 on success, -1 if the element counter overflowed.
1498
 */
1499
static int
1500
8.23M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1501
8.23M
    if (nsdb->elementId == UINT_MAX)
1502
0
        return(-1);
1503
8.23M
    nsdb->elementId++;
1504
1505
8.23M
    return(0);
1506
8.23M
}
1507
1508
/**
1509
 * xmlParserNsLookup:
1510
 * @ctxt: parser context
1511
 * @prefix: namespace prefix
1512
 * @bucketPtr: optional bucket (return value)
1513
 *
1514
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1515
 * be set to the matching bucket, or the first empty bucket if no match
1516
 * was found.
1517
 *
1518
 * Returns the namespace index on success, INT_MAX if no namespace was
1519
 * found.
1520
 */
1521
static int
1522
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1523
11.4M
                  xmlParserNsBucket **bucketPtr) {
1524
11.4M
    xmlParserNsBucket *bucket, *tombstone;
1525
11.4M
    unsigned index, hashValue;
1526
1527
11.4M
    if (prefix->name == NULL)
1528
6.86M
        return(ctxt->nsdb->defaultNsIndex);
1529
1530
4.62M
    if (ctxt->nsdb->hashSize == 0)
1531
295k
        return(INT_MAX);
1532
1533
4.33M
    hashValue = prefix->hashValue;
1534
4.33M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1535
4.33M
    bucket = &ctxt->nsdb->hash[index];
1536
4.33M
    tombstone = NULL;
1537
1538
5.17M
    while (bucket->hashValue) {
1539
4.35M
        if (bucket->index == INT_MAX) {
1540
397k
            if (tombstone == NULL)
1541
371k
                tombstone = bucket;
1542
3.96M
        } else if (bucket->hashValue == hashValue) {
1543
3.51M
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1544
3.51M
                if (bucketPtr != NULL)
1545
932k
                    *bucketPtr = bucket;
1546
3.51M
                return(bucket->index);
1547
3.51M
            }
1548
3.51M
        }
1549
1550
844k
        index++;
1551
844k
        bucket++;
1552
844k
        if (index == ctxt->nsdb->hashSize) {
1553
83.6k
            index = 0;
1554
83.6k
            bucket = ctxt->nsdb->hash;
1555
83.6k
        }
1556
844k
    }
1557
1558
817k
    if (bucketPtr != NULL)
1559
245k
        *bucketPtr = tombstone ? tombstone : bucket;
1560
817k
    return(INT_MAX);
1561
4.33M
}
1562
1563
/**
1564
 * xmlParserNsLookupUri:
1565
 * @ctxt: parser context
1566
 * @prefix: namespace prefix
1567
 *
1568
 * Lookup namespace URI with given prefix.
1569
 *
1570
 * Returns the namespace URI on success, NULL if no namespace was found.
1571
 */
1572
static const xmlChar *
1573
6.60M
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1574
6.60M
    const xmlChar *ret;
1575
6.60M
    int nsIndex;
1576
1577
6.60M
    if (prefix->name == ctxt->str_xml)
1578
14.0k
        return(ctxt->str_xml_ns);
1579
1580
    /*
1581
     * minNsIndex is used when building an entity tree. We must
1582
     * ignore namespaces declared outside the entity.
1583
     */
1584
6.58M
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1585
6.58M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1586
4.37M
        return(NULL);
1587
1588
2.21M
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1589
2.21M
    if (ret[0] == 0)
1590
55.7k
        ret = NULL;
1591
2.21M
    return(ret);
1592
6.58M
}
1593
1594
/**
1595
 * xmlParserNsLookupSax:
1596
 * @ctxt: parser context
1597
 * @prefix: namespace prefix
1598
 *
1599
 * Lookup extra data for the given prefix. This returns data stored
1600
 * with xmlParserNsUdpateSax.
1601
 *
1602
 * Returns the data on success, NULL if no namespace was found.
1603
 */
1604
void *
1605
2.21M
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1606
2.21M
    xmlHashedString hprefix;
1607
2.21M
    int nsIndex;
1608
1609
2.21M
    if (prefix == ctxt->str_xml)
1610
350k
        return(NULL);
1611
1612
1.86M
    hprefix.name = prefix;
1613
1.86M
    if (prefix != NULL)
1614
935k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1615
932k
    else
1616
932k
        hprefix.hashValue = 0;
1617
1.86M
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1618
1.86M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1619
0
        return(NULL);
1620
1621
1.86M
    return(ctxt->nsdb->extra[nsIndex].saxData);
1622
1.86M
}
1623
1624
/**
1625
 * xmlParserNsUpdateSax:
1626
 * @ctxt: parser context
1627
 * @prefix: namespace prefix
1628
 * @saxData: extra data for SAX handler
1629
 *
1630
 * Sets or updates extra data for the given prefix. This value will be
1631
 * returned by xmlParserNsLookupSax as long as the namespace with the
1632
 * given prefix is in scope.
1633
 *
1634
 * Returns the data on success, NULL if no namespace was found.
1635
 */
1636
int
1637
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1638
929k
                     void *saxData) {
1639
929k
    xmlHashedString hprefix;
1640
929k
    int nsIndex;
1641
1642
929k
    if (prefix == ctxt->str_xml)
1643
0
        return(-1);
1644
1645
929k
    hprefix.name = prefix;
1646
929k
    if (prefix != NULL)
1647
526k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1648
403k
    else
1649
403k
        hprefix.hashValue = 0;
1650
929k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1651
929k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1652
0
        return(-1);
1653
1654
929k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1655
929k
    return(0);
1656
929k
}
1657
1658
/**
1659
 * xmlParserNsGrow:
1660
 * @ctxt: parser context
1661
 *
1662
 * Grows the namespace tables.
1663
 *
1664
 * Returns 0 on success, -1 if a memory allocation failed.
1665
 */
1666
static int
1667
70.6k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1668
70.6k
    const xmlChar **table;
1669
70.6k
    xmlParserNsExtra *extra;
1670
70.6k
    int newSize;
1671
1672
70.6k
    if (ctxt->nsMax > INT_MAX / 2)
1673
0
        goto error;
1674
70.6k
    newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1675
1676
70.6k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1677
70.6k
    if (table == NULL)
1678
53
        goto error;
1679
70.5k
    ctxt->nsTab = table;
1680
1681
70.5k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1682
70.5k
    if (extra == NULL)
1683
53
        goto error;
1684
70.5k
    ctxt->nsdb->extra = extra;
1685
1686
70.5k
    ctxt->nsMax = newSize;
1687
70.5k
    return(0);
1688
1689
106
error:
1690
106
    xmlErrMemory(ctxt);
1691
106
    return(-1);
1692
70.5k
}
1693
1694
/**
1695
 * xmlParserNsPush:
1696
 * @ctxt: parser context
1697
 * @prefix: prefix with hash value
1698
 * @uri: uri with hash value
1699
 * @saxData: extra data for SAX handler
1700
 * @defAttr: whether the namespace comes from a default attribute
1701
 *
1702
 * Push a new namespace on the table.
1703
 *
1704
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1705
 * -1 if a memory allocation failed.
1706
 */
1707
static int
1708
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1709
1.14M
                const xmlHashedString *uri, void *saxData, int defAttr) {
1710
1.14M
    xmlParserNsBucket *bucket = NULL;
1711
1.14M
    xmlParserNsExtra *extra;
1712
1.14M
    const xmlChar **ns;
1713
1.14M
    unsigned hashValue, nsIndex, oldIndex;
1714
1715
1.14M
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1716
1.09k
        return(0);
1717
1718
1.14M
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1719
106
        xmlErrMemory(ctxt);
1720
106
        return(-1);
1721
106
    }
1722
1723
    /*
1724
     * Default namespace and 'xml' namespace
1725
     */
1726
1.14M
    if ((prefix == NULL) || (prefix->name == NULL)) {
1727
500k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1728
1729
500k
        if (oldIndex != INT_MAX) {
1730
440k
            extra = &ctxt->nsdb->extra[oldIndex];
1731
1732
440k
            if (extra->elementId == ctxt->nsdb->elementId) {
1733
11.5k
                if (defAttr == 0)
1734
8.54k
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1735
11.5k
                return(0);
1736
11.5k
            }
1737
1738
428k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1739
428k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1740
23.8k
                return(0);
1741
428k
        }
1742
1743
464k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1744
464k
        goto populate_entry;
1745
500k
    }
1746
1747
    /*
1748
     * Hash table lookup
1749
     */
1750
645k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1751
645k
    if (oldIndex != INT_MAX) {
1752
349k
        extra = &ctxt->nsdb->extra[oldIndex];
1753
1754
        /*
1755
         * Check for duplicate definitions on the same element.
1756
         */
1757
349k
        if (extra->elementId == ctxt->nsdb->elementId) {
1758
10.2k
            if (defAttr == 0)
1759
8.51k
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1760
10.2k
            return(0);
1761
10.2k
        }
1762
1763
339k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1764
339k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1765
28.5k
            return(0);
1766
1767
310k
        bucket->index = ctxt->nsNr;
1768
310k
        goto populate_entry;
1769
339k
    }
1770
1771
    /*
1772
     * Insert new bucket
1773
     */
1774
1775
295k
    hashValue = prefix->hashValue;
1776
1777
    /*
1778
     * Grow hash table, 50% fill factor
1779
     */
1780
295k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1781
59.2k
        xmlParserNsBucket *newHash;
1782
59.2k
        unsigned newSize, i, index;
1783
1784
59.2k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1785
0
            xmlErrMemory(ctxt);
1786
0
            return(-1);
1787
0
        }
1788
59.2k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1789
59.2k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1790
59.2k
        if (newHash == NULL) {
1791
37
            xmlErrMemory(ctxt);
1792
37
            return(-1);
1793
37
        }
1794
59.2k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1795
1796
668k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1797
609k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1798
609k
            unsigned newIndex;
1799
1800
609k
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1801
595k
                continue;
1802
13.7k
            newIndex = hv & (newSize - 1);
1803
1804
17.7k
            while (newHash[newIndex].hashValue != 0) {
1805
3.97k
                newIndex++;
1806
3.97k
                if (newIndex == newSize)
1807
827
                    newIndex = 0;
1808
3.97k
            }
1809
1810
13.7k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1811
13.7k
        }
1812
1813
59.2k
        xmlFree(ctxt->nsdb->hash);
1814
59.2k
        ctxt->nsdb->hash = newHash;
1815
59.2k
        ctxt->nsdb->hashSize = newSize;
1816
1817
        /*
1818
         * Relookup
1819
         */
1820
59.2k
        index = hashValue & (newSize - 1);
1821
1822
61.6k
        while (newHash[index].hashValue != 0) {
1823
2.43k
            index++;
1824
2.43k
            if (index == newSize)
1825
242
                index = 0;
1826
2.43k
        }
1827
1828
59.2k
        bucket = &newHash[index];
1829
59.2k
    }
1830
1831
295k
    bucket->hashValue = hashValue;
1832
295k
    bucket->index = ctxt->nsNr;
1833
295k
    ctxt->nsdb->hashElems++;
1834
295k
    oldIndex = INT_MAX;
1835
1836
1.07M
populate_entry:
1837
1.07M
    nsIndex = ctxt->nsNr;
1838
1839
1.07M
    ns = &ctxt->nsTab[nsIndex * 2];
1840
1.07M
    ns[0] = prefix ? prefix->name : NULL;
1841
1.07M
    ns[1] = uri->name;
1842
1843
1.07M
    extra = &ctxt->nsdb->extra[nsIndex];
1844
1.07M
    extra->saxData = saxData;
1845
1.07M
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1846
1.07M
    extra->uriHashValue = uri->hashValue;
1847
1.07M
    extra->elementId = ctxt->nsdb->elementId;
1848
1.07M
    extra->oldIndex = oldIndex;
1849
1850
1.07M
    ctxt->nsNr++;
1851
1852
1.07M
    return(1);
1853
295k
}
1854
1855
/**
1856
 * xmlParserNsPop:
1857
 * @ctxt: an XML parser context
1858
 * @nr:  the number to pop
1859
 *
1860
 * Pops the top @nr namespaces and restores the hash table.
1861
 *
1862
 * Returns the number of namespaces popped.
1863
 */
1864
static int
1865
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1866
817k
{
1867
817k
    int i;
1868
1869
    /* assert(nr <= ctxt->nsNr); */
1870
1871
1.84M
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1872
1.02M
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1873
1.02M
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1874
1875
1.02M
        if (prefix == NULL) {
1876
443k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1877
582k
        } else {
1878
582k
            xmlHashedString hprefix;
1879
582k
            xmlParserNsBucket *bucket = NULL;
1880
1881
582k
            hprefix.name = prefix;
1882
582k
            hprefix.hashValue = extra->prefixHashValue;
1883
582k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1884
            /* assert(bucket && bucket->hashValue); */
1885
582k
            bucket->index = extra->oldIndex;
1886
582k
        }
1887
1.02M
    }
1888
1889
817k
    ctxt->nsNr -= nr;
1890
817k
    return(nr);
1891
817k
}
1892
1893
static int
1894
96.4k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1895
96.4k
    const xmlChar **atts;
1896
96.4k
    unsigned *attallocs;
1897
96.4k
    int maxatts;
1898
1899
96.4k
    if (nr + 5 > ctxt->maxatts) {
1900
96.4k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1901
96.4k
  atts = (const xmlChar **) xmlMalloc(
1902
96.4k
             maxatts * sizeof(const xmlChar *));
1903
96.4k
  if (atts == NULL) goto mem_error;
1904
96.4k
  attallocs = xmlRealloc(ctxt->attallocs,
1905
96.4k
                               (maxatts / 5) * sizeof(attallocs[0]));
1906
96.4k
  if (attallocs == NULL) {
1907
63
            xmlFree(atts);
1908
63
            goto mem_error;
1909
63
        }
1910
96.3k
        if (ctxt->maxatts > 0)
1911
8.02k
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1912
96.3k
        xmlFree(ctxt->atts);
1913
96.3k
  ctxt->atts = atts;
1914
96.3k
  ctxt->attallocs = attallocs;
1915
96.3k
  ctxt->maxatts = maxatts;
1916
96.3k
    }
1917
96.3k
    return(ctxt->maxatts);
1918
119
mem_error:
1919
119
    xmlErrMemory(ctxt);
1920
119
    return(-1);
1921
96.4k
}
1922
1923
/**
1924
 * inputPush:
1925
 * @ctxt:  an XML parser context
1926
 * @value:  the parser input
1927
 *
1928
 * Pushes a new parser input on top of the input stack
1929
 *
1930
 * Returns -1 in case of error, the index in the stack otherwise
1931
 */
1932
int
1933
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1934
1.26M
{
1935
1.26M
    char *directory = NULL;
1936
1937
1.26M
    if ((ctxt == NULL) || (value == NULL))
1938
0
        return(-1);
1939
1940
1.26M
    if (ctxt->inputNr >= ctxt->inputMax) {
1941
40.5k
        size_t newSize = ctxt->inputMax * 2;
1942
40.5k
        xmlParserInputPtr *tmp;
1943
1944
40.5k
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1945
40.5k
                                               newSize * sizeof(*tmp));
1946
40.5k
        if (tmp == NULL) {
1947
81
            xmlErrMemory(ctxt);
1948
81
            return (-1);
1949
81
        }
1950
40.4k
        ctxt->inputTab = tmp;
1951
40.4k
        ctxt->inputMax = newSize;
1952
40.4k
    }
1953
1954
1.26M
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1955
413k
        directory = xmlParserGetDirectory(value->filename);
1956
413k
        if (directory == NULL) {
1957
112
            xmlErrMemory(ctxt);
1958
112
            return(-1);
1959
112
        }
1960
413k
    }
1961
1962
1.26M
    ctxt->inputTab[ctxt->inputNr] = value;
1963
1.26M
    ctxt->input = value;
1964
1965
1.26M
    if (ctxt->inputNr == 0) {
1966
536k
        xmlFree(ctxt->directory);
1967
536k
        ctxt->directory = directory;
1968
536k
    }
1969
1970
1.26M
    return(ctxt->inputNr++);
1971
1.26M
}
1972
/**
1973
 * inputPop:
1974
 * @ctxt: an XML parser context
1975
 *
1976
 * Pops the top parser input from the input stack
1977
 *
1978
 * Returns the input just removed
1979
 */
1980
xmlParserInputPtr
1981
inputPop(xmlParserCtxtPtr ctxt)
1982
2.44M
{
1983
2.44M
    xmlParserInputPtr ret;
1984
1985
2.44M
    if (ctxt == NULL)
1986
0
        return(NULL);
1987
2.44M
    if (ctxt->inputNr <= 0)
1988
1.18M
        return (NULL);
1989
1.25M
    ctxt->inputNr--;
1990
1.25M
    if (ctxt->inputNr > 0)
1991
729k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1992
527k
    else
1993
527k
        ctxt->input = NULL;
1994
1.25M
    ret = ctxt->inputTab[ctxt->inputNr];
1995
1.25M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1996
1.25M
    return (ret);
1997
2.44M
}
1998
/**
1999
 * nodePush:
2000
 * @ctxt:  an XML parser context
2001
 * @value:  the element node
2002
 *
2003
 * DEPRECATED: Internal function, do not use.
2004
 *
2005
 * Pushes a new element node on top of the node stack
2006
 *
2007
 * Returns -1 in case of error, the index in the stack otherwise
2008
 */
2009
int
2010
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
2011
10.6M
{
2012
10.6M
    int maxDepth;
2013
2014
10.6M
    if (ctxt == NULL)
2015
0
        return(0);
2016
2017
10.6M
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2018
10.6M
    if (ctxt->nodeNr > maxDepth) {
2019
73
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2020
73
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2021
73
                ctxt->nodeNr);
2022
73
        xmlHaltParser(ctxt);
2023
73
        return(-1);
2024
73
    }
2025
10.6M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2026
362k
        xmlNodePtr *tmp;
2027
2028
362k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2029
362k
                                      ctxt->nodeMax * 2 *
2030
362k
                                      sizeof(ctxt->nodeTab[0]));
2031
362k
        if (tmp == NULL) {
2032
160
            xmlErrMemory(ctxt);
2033
160
            return (-1);
2034
160
        }
2035
362k
        ctxt->nodeTab = tmp;
2036
362k
  ctxt->nodeMax *= 2;
2037
362k
    }
2038
10.6M
    ctxt->nodeTab[ctxt->nodeNr] = value;
2039
10.6M
    ctxt->node = value;
2040
10.6M
    return (ctxt->nodeNr++);
2041
10.6M
}
2042
2043
/**
2044
 * nodePop:
2045
 * @ctxt: an XML parser context
2046
 *
2047
 * DEPRECATED: Internal function, do not use.
2048
 *
2049
 * Pops the top element node from the node stack
2050
 *
2051
 * Returns the node just removed
2052
 */
2053
xmlNodePtr
2054
nodePop(xmlParserCtxtPtr ctxt)
2055
10.4M
{
2056
10.4M
    xmlNodePtr ret;
2057
2058
10.4M
    if (ctxt == NULL) return(NULL);
2059
10.4M
    if (ctxt->nodeNr <= 0)
2060
277k
        return (NULL);
2061
10.1M
    ctxt->nodeNr--;
2062
10.1M
    if (ctxt->nodeNr > 0)
2063
10.0M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2064
149k
    else
2065
149k
        ctxt->node = NULL;
2066
10.1M
    ret = ctxt->nodeTab[ctxt->nodeNr];
2067
10.1M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2068
10.1M
    return (ret);
2069
10.4M
}
2070
2071
/**
2072
 * nameNsPush:
2073
 * @ctxt:  an XML parser context
2074
 * @value:  the element name
2075
 * @prefix:  the element prefix
2076
 * @URI:  the element namespace name
2077
 * @line:  the current line number for error messages
2078
 * @nsNr:  the number of namespaces pushed on the namespace table
2079
 *
2080
 * Pushes a new element name/prefix/URL on top of the name stack
2081
 *
2082
 * Returns -1 in case of error, the index in the stack otherwise
2083
 */
2084
static int
2085
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2086
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2087
9.80M
{
2088
9.80M
    xmlStartTag *tag;
2089
2090
9.80M
    if (ctxt->nameNr >= ctxt->nameMax) {
2091
382k
        const xmlChar * *tmp;
2092
382k
        xmlStartTag *tmp2;
2093
382k
        ctxt->nameMax *= 2;
2094
382k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2095
382k
                                    ctxt->nameMax *
2096
382k
                                    sizeof(ctxt->nameTab[0]));
2097
382k
        if (tmp == NULL) {
2098
141
      ctxt->nameMax /= 2;
2099
141
      goto mem_error;
2100
141
        }
2101
381k
  ctxt->nameTab = tmp;
2102
381k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2103
381k
                                    ctxt->nameMax *
2104
381k
                                    sizeof(ctxt->pushTab[0]));
2105
381k
        if (tmp2 == NULL) {
2106
195
      ctxt->nameMax /= 2;
2107
195
      goto mem_error;
2108
195
        }
2109
381k
  ctxt->pushTab = tmp2;
2110
9.42M
    } else if (ctxt->pushTab == NULL) {
2111
230k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2112
230k
                                            sizeof(ctxt->pushTab[0]));
2113
230k
        if (ctxt->pushTab == NULL)
2114
233
            goto mem_error;
2115
230k
    }
2116
9.80M
    ctxt->nameTab[ctxt->nameNr] = value;
2117
9.80M
    ctxt->name = value;
2118
9.80M
    tag = &ctxt->pushTab[ctxt->nameNr];
2119
9.80M
    tag->prefix = prefix;
2120
9.80M
    tag->URI = URI;
2121
9.80M
    tag->line = line;
2122
9.80M
    tag->nsNr = nsNr;
2123
9.80M
    return (ctxt->nameNr++);
2124
569
mem_error:
2125
569
    xmlErrMemory(ctxt);
2126
569
    return (-1);
2127
9.80M
}
2128
#ifdef LIBXML_PUSH_ENABLED
2129
/**
2130
 * nameNsPop:
2131
 * @ctxt: an XML parser context
2132
 *
2133
 * Pops the top element/prefix/URI name from the name stack
2134
 *
2135
 * Returns the name just removed
2136
 */
2137
static const xmlChar *
2138
nameNsPop(xmlParserCtxtPtr ctxt)
2139
194k
{
2140
194k
    const xmlChar *ret;
2141
2142
194k
    if (ctxt->nameNr <= 0)
2143
0
        return (NULL);
2144
194k
    ctxt->nameNr--;
2145
194k
    if (ctxt->nameNr > 0)
2146
193k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2147
1.08k
    else
2148
1.08k
        ctxt->name = NULL;
2149
194k
    ret = ctxt->nameTab[ctxt->nameNr];
2150
194k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2151
194k
    return (ret);
2152
194k
}
2153
#endif /* LIBXML_PUSH_ENABLED */
2154
2155
/**
2156
 * namePush:
2157
 * @ctxt:  an XML parser context
2158
 * @value:  the element name
2159
 *
2160
 * DEPRECATED: Internal function, do not use.
2161
 *
2162
 * Pushes a new element name on top of the name stack
2163
 *
2164
 * Returns -1 in case of error, the index in the stack otherwise
2165
 */
2166
int
2167
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2168
0
{
2169
0
    if (ctxt == NULL) return (-1);
2170
2171
0
    if (ctxt->nameNr >= ctxt->nameMax) {
2172
0
        const xmlChar * *tmp;
2173
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2174
0
                                    ctxt->nameMax * 2 *
2175
0
                                    sizeof(ctxt->nameTab[0]));
2176
0
        if (tmp == NULL) {
2177
0
      goto mem_error;
2178
0
        }
2179
0
  ctxt->nameTab = tmp;
2180
0
        ctxt->nameMax *= 2;
2181
0
    }
2182
0
    ctxt->nameTab[ctxt->nameNr] = value;
2183
0
    ctxt->name = value;
2184
0
    return (ctxt->nameNr++);
2185
0
mem_error:
2186
0
    xmlErrMemory(ctxt);
2187
0
    return (-1);
2188
0
}
2189
2190
/**
2191
 * namePop:
2192
 * @ctxt: an XML parser context
2193
 *
2194
 * DEPRECATED: Internal function, do not use.
2195
 *
2196
 * Pops the top element name from the name stack
2197
 *
2198
 * Returns the name just removed
2199
 */
2200
const xmlChar *
2201
namePop(xmlParserCtxtPtr ctxt)
2202
9.22M
{
2203
9.22M
    const xmlChar *ret;
2204
2205
9.22M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2206
78
        return (NULL);
2207
9.22M
    ctxt->nameNr--;
2208
9.22M
    if (ctxt->nameNr > 0)
2209
9.09M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2210
121k
    else
2211
121k
        ctxt->name = NULL;
2212
9.22M
    ret = ctxt->nameTab[ctxt->nameNr];
2213
9.22M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2214
9.22M
    return (ret);
2215
9.22M
}
2216
2217
12.3M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2218
12.3M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2219
524k
        int *tmp;
2220
2221
524k
  ctxt->spaceMax *= 2;
2222
524k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2223
524k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2224
524k
        if (tmp == NULL) {
2225
267
      xmlErrMemory(ctxt);
2226
267
      ctxt->spaceMax /=2;
2227
267
      return(-1);
2228
267
  }
2229
524k
  ctxt->spaceTab = tmp;
2230
524k
    }
2231
12.3M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2232
12.3M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2233
12.3M
    return(ctxt->spaceNr++);
2234
12.3M
}
2235
2236
11.9M
static int spacePop(xmlParserCtxtPtr ctxt) {
2237
11.9M
    int ret;
2238
11.9M
    if (ctxt->spaceNr <= 0) return(0);
2239
11.9M
    ctxt->spaceNr--;
2240
11.9M
    if (ctxt->spaceNr > 0)
2241
11.9M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2242
28.4k
    else
2243
28.4k
        ctxt->space = &ctxt->spaceTab[0];
2244
11.9M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2245
11.9M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2246
11.9M
    return(ret);
2247
11.9M
}
2248
2249
/*
2250
 * Macros for accessing the content. Those should be used only by the parser,
2251
 * and not exported.
2252
 *
2253
 * Dirty macros, i.e. one often need to make assumption on the context to
2254
 * use them
2255
 *
2256
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2257
 *           To be used with extreme caution since operations consuming
2258
 *           characters may move the input buffer to a different location !
2259
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2260
 *           This should be used internally by the parser
2261
 *           only to compare to ASCII values otherwise it would break when
2262
 *           running with UTF-8 encoding.
2263
 *   RAW     same as CUR but in the input buffer, bypass any token
2264
 *           extraction that may have been done
2265
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2266
 *           to compare on ASCII based substring.
2267
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2268
 *           strings without newlines within the parser.
2269
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2270
 *           defined char within the parser.
2271
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2272
 *
2273
 *   NEXT    Skip to the next character, this does the proper decoding
2274
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2275
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2276
 *   CUR_SCHAR  same but operate on a string instead of the context
2277
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2278
 *            the index
2279
 *   GROW, SHRINK  handling of input buffers
2280
 */
2281
2282
157M
#define RAW (*ctxt->input->cur)
2283
1.83G
#define CUR (*ctxt->input->cur)
2284
81.6M
#define NXT(val) ctxt->input->cur[(val)]
2285
3.76G
#define CUR_PTR ctxt->input->cur
2286
26.3M
#define BASE_PTR ctxt->input->base
2287
2288
#define CMP4( s, c1, c2, c3, c4 ) \
2289
157M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2290
79.0M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2291
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2292
154M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2293
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2294
148M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2295
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2296
144M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2297
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2298
141M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2299
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2300
70.0M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2301
70.0M
    ((unsigned char *) s)[ 8 ] == c9 )
2302
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2303
156k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2304
156k
    ((unsigned char *) s)[ 9 ] == c10 )
2305
2306
22.6M
#define SKIP(val) do {             \
2307
22.6M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2308
22.6M
    if (*ctxt->input->cur == 0)           \
2309
22.6M
        xmlParserGrow(ctxt);           \
2310
22.6M
  } while (0)
2311
2312
225k
#define SKIPL(val) do {             \
2313
225k
    int skipl;                \
2314
37.6M
    for(skipl=0; skipl<val; skipl++) {         \
2315
37.4M
  if (*(ctxt->input->cur) == '\n') {       \
2316
64.9k
  ctxt->input->line++; ctxt->input->col = 1;      \
2317
37.3M
  } else ctxt->input->col++;         \
2318
37.4M
  ctxt->input->cur++;           \
2319
37.4M
    }                  \
2320
225k
    if (*ctxt->input->cur == 0)           \
2321
225k
        xmlParserGrow(ctxt);           \
2322
225k
  } while (0)
2323
2324
#define SHRINK \
2325
129M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2326
129M
        (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2327
129M
  (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2328
129M
  xmlParserShrink(ctxt);
2329
2330
#define GROW \
2331
277M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2332
277M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2333
28.0M
  xmlParserGrow(ctxt);
2334
2335
31.9M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2336
2337
12.1M
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2338
2339
97.1M
#define NEXT xmlNextChar(ctxt)
2340
2341
18.9M
#define NEXT1 {               \
2342
18.9M
  ctxt->input->col++;           \
2343
18.9M
  ctxt->input->cur++;           \
2344
18.9M
  if (*ctxt->input->cur == 0)         \
2345
18.9M
      xmlParserGrow(ctxt);           \
2346
18.9M
    }
2347
2348
2.26G
#define NEXTL(l) do {             \
2349
2.26G
    if (*(ctxt->input->cur) == '\n') {         \
2350
68.0M
  ctxt->input->line++; ctxt->input->col = 1;      \
2351
2.19G
    } else ctxt->input->col++;           \
2352
2.26G
    ctxt->input->cur += l;        \
2353
2.26G
  } while (0)
2354
2355
32.8M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2356
2357
#define COPY_BUF(b, i, v)           \
2358
449M
    if (v < 0x80) b[i++] = v;           \
2359
449M
    else i += xmlCopyCharMultiByte(&b[i],v)
2360
2361
static int
2362
455M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2363
455M
    int c = xmlCurrentChar(ctxt, len);
2364
2365
455M
    if (c == XML_INVALID_CHAR)
2366
110M
        c = 0xFFFD; /* replacement character */
2367
2368
455M
    return(c);
2369
455M
}
2370
2371
/**
2372
 * xmlSkipBlankChars:
2373
 * @ctxt:  the XML parser context
2374
 *
2375
 * DEPRECATED: Internal function, do not use.
2376
 *
2377
 * Skip whitespace in the input stream.
2378
 *
2379
 * Returns the number of space chars skipped
2380
 */
2381
int
2382
35.6M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2383
35.6M
    const xmlChar *cur;
2384
35.6M
    int res = 0;
2385
2386
    /*
2387
     * It's Okay to use CUR/NEXT here since all the blanks are on
2388
     * the ASCII range.
2389
     */
2390
35.6M
    cur = ctxt->input->cur;
2391
35.6M
    while (IS_BLANK_CH(*cur)) {
2392
24.9M
        if (*cur == '\n') {
2393
11.4M
            ctxt->input->line++; ctxt->input->col = 1;
2394
13.4M
        } else {
2395
13.4M
            ctxt->input->col++;
2396
13.4M
        }
2397
24.9M
        cur++;
2398
24.9M
        if (res < INT_MAX)
2399
24.9M
            res++;
2400
24.9M
        if (*cur == 0) {
2401
148k
            ctxt->input->cur = cur;
2402
148k
            xmlParserGrow(ctxt);
2403
148k
            cur = ctxt->input->cur;
2404
148k
        }
2405
24.9M
    }
2406
35.6M
    ctxt->input->cur = cur;
2407
2408
35.6M
    return(res);
2409
35.6M
}
2410
2411
static void
2412
669k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2413
669k
    unsigned long consumed;
2414
669k
    xmlEntityPtr ent;
2415
2416
669k
    ent = ctxt->input->entity;
2417
2418
669k
    ent->flags &= ~XML_ENT_EXPANDING;
2419
2420
669k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2421
33.8k
        int result;
2422
2423
        /*
2424
         * Read the rest of the stream in case of errors. We want
2425
         * to account for the whole entity size.
2426
         */
2427
45.0k
        do {
2428
45.0k
            ctxt->input->cur = ctxt->input->end;
2429
45.0k
            xmlParserShrink(ctxt);
2430
45.0k
            result = xmlParserGrow(ctxt);
2431
45.0k
        } while (result > 0);
2432
2433
33.8k
        consumed = ctxt->input->consumed;
2434
33.8k
        xmlSaturatedAddSizeT(&consumed,
2435
33.8k
                             ctxt->input->end - ctxt->input->base);
2436
2437
33.8k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2438
2439
        /*
2440
         * Add to sizeentities when parsing an external entity
2441
         * for the first time.
2442
         */
2443
33.8k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2444
19.8k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2445
19.8k
        }
2446
2447
33.8k
        ent->flags |= XML_ENT_CHECKED;
2448
33.8k
    }
2449
2450
669k
    xmlPopInput(ctxt);
2451
2452
669k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2453
669k
}
2454
2455
/**
2456
 * xmlSkipBlankCharsPE:
2457
 * @ctxt:  the XML parser context
2458
 *
2459
 * Skip whitespace in the input stream, also handling parameter
2460
 * entities.
2461
 *
2462
 * Returns the number of space chars skipped
2463
 */
2464
static int
2465
12.1M
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2466
12.1M
    int res = 0;
2467
12.1M
    int inParam;
2468
12.1M
    int expandParam;
2469
2470
12.1M
    inParam = PARSER_IN_PE(ctxt);
2471
12.1M
    expandParam = PARSER_EXTERNAL(ctxt);
2472
2473
12.1M
    if (!inParam && !expandParam)
2474
3.69M
        return(xmlSkipBlankChars(ctxt));
2475
2476
21.7M
    while (PARSER_STOPPED(ctxt) == 0) {
2477
21.7M
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2478
10.5M
            NEXT;
2479
11.1M
        } else if (CUR == '%') {
2480
2.55M
            if ((expandParam == 0) ||
2481
2.55M
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2482
443k
                break;
2483
2484
            /*
2485
             * Expand parameter entity. We continue to consume
2486
             * whitespace at the start of the entity and possible
2487
             * even consume the whole entity and pop it. We might
2488
             * even pop multiple PEs in this loop.
2489
             */
2490
2.10M
            xmlParsePEReference(ctxt);
2491
2492
2.10M
            inParam = PARSER_IN_PE(ctxt);
2493
2.10M
            expandParam = PARSER_EXTERNAL(ctxt);
2494
8.64M
        } else if (CUR == 0) {
2495
662k
            if (inParam == 0)
2496
4.34k
                break;
2497
2498
658k
            xmlPopPE(ctxt);
2499
2500
658k
            inParam = PARSER_IN_PE(ctxt);
2501
658k
            expandParam = PARSER_EXTERNAL(ctxt);
2502
7.98M
        } else {
2503
7.98M
            break;
2504
7.98M
        }
2505
2506
        /*
2507
         * Also increase the counter when entering or exiting a PERef.
2508
         * The spec says: "When a parameter-entity reference is recognized
2509
         * in the DTD and included, its replacement text MUST be enlarged
2510
         * by the attachment of one leading and one following space (#x20)
2511
         * character."
2512
         */
2513
13.2M
        if (res < INT_MAX)
2514
13.2M
            res++;
2515
13.2M
    }
2516
2517
8.44M
    return(res);
2518
12.1M
}
2519
2520
/************************************************************************
2521
 *                  *
2522
 *    Commodity functions to handle entities      *
2523
 *                  *
2524
 ************************************************************************/
2525
2526
/**
2527
 * xmlPopInput:
2528
 * @ctxt:  an XML parser context
2529
 *
2530
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2531
 *          pop it and return the next char.
2532
 *
2533
 * Returns the current xmlChar in the parser context
2534
 */
2535
xmlChar
2536
669k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2537
669k
    xmlParserInputPtr input;
2538
2539
669k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2540
669k
    input = inputPop(ctxt);
2541
669k
    xmlFreeInputStream(input);
2542
669k
    if (*ctxt->input->cur == 0)
2543
78.1k
        xmlParserGrow(ctxt);
2544
669k
    return(CUR);
2545
669k
}
2546
2547
/**
2548
 * xmlPushInput:
2549
 * @ctxt:  an XML parser context
2550
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2551
 *
2552
 * Push an input stream onto the stack.
2553
 *
2554
 * Returns -1 in case of error or the index in the input stack
2555
 */
2556
int
2557
1.72M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2558
1.72M
    int maxDepth;
2559
1.72M
    int ret;
2560
2561
1.72M
    if ((ctxt == NULL) || (input == NULL))
2562
985k
        return(-1);
2563
2564
740k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2565
740k
    if (ctxt->inputNr > maxDepth) {
2566
18
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2567
18
                       "Maximum entity nesting depth exceeded");
2568
18
        xmlHaltParser(ctxt);
2569
18
  return(-1);
2570
18
    }
2571
740k
    ret = inputPush(ctxt, input);
2572
740k
    if (ret >= 0)
2573
740k
        GROW;
2574
740k
    return(ret);
2575
740k
}
2576
2577
/**
2578
 * xmlParseCharRef:
2579
 * @ctxt:  an XML parser context
2580
 *
2581
 * DEPRECATED: Internal function, don't use.
2582
 *
2583
 * Parse a numeric character reference. Always consumes '&'.
2584
 *
2585
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2586
 *                  '&#x' [0-9a-fA-F]+ ';'
2587
 *
2588
 * [ WFC: Legal Character ]
2589
 * Characters referred to using character references must match the
2590
 * production for Char.
2591
 *
2592
 * Returns the value parsed (as an int), 0 in case of error
2593
 */
2594
int
2595
1.46M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2596
1.46M
    int val = 0;
2597
1.46M
    int count = 0;
2598
2599
    /*
2600
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2601
     */
2602
1.46M
    if ((RAW == '&') && (NXT(1) == '#') &&
2603
1.46M
        (NXT(2) == 'x')) {
2604
735k
  SKIP(3);
2605
735k
  GROW;
2606
2.58M
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2607
1.99M
      if (count++ > 20) {
2608
21.7k
    count = 0;
2609
21.7k
    GROW;
2610
21.7k
      }
2611
1.99M
      if ((RAW >= '0') && (RAW <= '9'))
2612
836k
          val = val * 16 + (CUR - '0');
2613
1.16M
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2614
361k
          val = val * 16 + (CUR - 'a') + 10;
2615
800k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2616
653k
          val = val * 16 + (CUR - 'A') + 10;
2617
146k
      else {
2618
146k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2619
146k
    val = 0;
2620
146k
    break;
2621
146k
      }
2622
1.85M
      if (val > 0x110000)
2623
304k
          val = 0x110000;
2624
2625
1.85M
      NEXT;
2626
1.85M
      count++;
2627
1.85M
  }
2628
735k
  if (RAW == ';') {
2629
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2630
588k
      ctxt->input->col++;
2631
588k
      ctxt->input->cur++;
2632
588k
  }
2633
735k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2634
731k
  SKIP(2);
2635
731k
  GROW;
2636
2.40M
  while (RAW != ';') { /* loop blocked by count */
2637
1.79M
      if (count++ > 20) {
2638
23.4k
    count = 0;
2639
23.4k
    GROW;
2640
23.4k
      }
2641
1.79M
      if ((RAW >= '0') && (RAW <= '9'))
2642
1.67M
          val = val * 10 + (CUR - '0');
2643
126k
      else {
2644
126k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2645
126k
    val = 0;
2646
126k
    break;
2647
126k
      }
2648
1.67M
      if (val > 0x110000)
2649
234k
          val = 0x110000;
2650
2651
1.67M
      NEXT;
2652
1.67M
      count++;
2653
1.67M
  }
2654
731k
  if (RAW == ';') {
2655
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2656
604k
      ctxt->input->col++;
2657
604k
      ctxt->input->cur++;
2658
604k
  }
2659
731k
    } else {
2660
0
        if (RAW == '&')
2661
0
            SKIP(1);
2662
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2663
0
    }
2664
2665
    /*
2666
     * [ WFC: Legal Character ]
2667
     * Characters referred to using character references must match the
2668
     * production for Char.
2669
     */
2670
1.46M
    if (val >= 0x110000) {
2671
23.7k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2672
23.7k
                "xmlParseCharRef: character reference out of bounds\n",
2673
23.7k
          val);
2674
1.44M
    } else if (IS_CHAR(val)) {
2675
1.13M
        return(val);
2676
1.13M
    } else {
2677
312k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2678
312k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2679
312k
                    val);
2680
312k
    }
2681
336k
    return(0);
2682
1.46M
}
2683
2684
/**
2685
 * xmlParseStringCharRef:
2686
 * @ctxt:  an XML parser context
2687
 * @str:  a pointer to an index in the string
2688
 *
2689
 * parse Reference declarations, variant parsing from a string rather
2690
 * than an an input flow.
2691
 *
2692
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2693
 *                  '&#x' [0-9a-fA-F]+ ';'
2694
 *
2695
 * [ WFC: Legal Character ]
2696
 * Characters referred to using character references must match the
2697
 * production for Char.
2698
 *
2699
 * Returns the value parsed (as an int), 0 in case of error, str will be
2700
 *         updated to the current value of the index
2701
 */
2702
static int
2703
3.47M
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2704
3.47M
    const xmlChar *ptr;
2705
3.47M
    xmlChar cur;
2706
3.47M
    int val = 0;
2707
2708
3.47M
    if ((str == NULL) || (*str == NULL)) return(0);
2709
3.47M
    ptr = *str;
2710
3.47M
    cur = *ptr;
2711
3.47M
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2712
146k
  ptr += 3;
2713
146k
  cur = *ptr;
2714
532k
  while (cur != ';') { /* Non input consuming loop */
2715
393k
      if ((cur >= '0') && (cur <= '9'))
2716
157k
          val = val * 16 + (cur - '0');
2717
235k
      else if ((cur >= 'a') && (cur <= 'f'))
2718
114k
          val = val * 16 + (cur - 'a') + 10;
2719
120k
      else if ((cur >= 'A') && (cur <= 'F'))
2720
113k
          val = val * 16 + (cur - 'A') + 10;
2721
7.16k
      else {
2722
7.16k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2723
7.16k
    val = 0;
2724
7.16k
    break;
2725
7.16k
      }
2726
386k
      if (val > 0x110000)
2727
37.6k
          val = 0x110000;
2728
2729
386k
      ptr++;
2730
386k
      cur = *ptr;
2731
386k
  }
2732
146k
  if (cur == ';')
2733
139k
      ptr++;
2734
3.32M
    } else if  ((cur == '&') && (ptr[1] == '#')){
2735
3.32M
  ptr += 2;
2736
3.32M
  cur = *ptr;
2737
10.0M
  while (cur != ';') { /* Non input consuming loops */
2738
6.77M
      if ((cur >= '0') && (cur <= '9'))
2739
6.76M
          val = val * 10 + (cur - '0');
2740
10.5k
      else {
2741
10.5k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2742
10.5k
    val = 0;
2743
10.5k
    break;
2744
10.5k
      }
2745
6.76M
      if (val > 0x110000)
2746
20.0k
          val = 0x110000;
2747
2748
6.76M
      ptr++;
2749
6.76M
      cur = *ptr;
2750
6.76M
  }
2751
3.32M
  if (cur == ';')
2752
3.31M
      ptr++;
2753
3.32M
    } else {
2754
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2755
0
  return(0);
2756
0
    }
2757
3.47M
    *str = ptr;
2758
2759
    /*
2760
     * [ WFC: Legal Character ]
2761
     * Characters referred to using character references must match the
2762
     * production for Char.
2763
     */
2764
3.47M
    if (val >= 0x110000) {
2765
2.07k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2766
2.07k
                "xmlParseStringCharRef: character reference out of bounds\n",
2767
2.07k
                val);
2768
3.46M
    } else if (IS_CHAR(val)) {
2769
3.44M
        return(val);
2770
3.44M
    } else {
2771
26.8k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2772
26.8k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2773
26.8k
        val);
2774
26.8k
    }
2775
28.9k
    return(0);
2776
3.47M
}
2777
2778
/**
2779
 * xmlParserHandlePEReference:
2780
 * @ctxt:  the parser context
2781
 *
2782
 * DEPRECATED: Internal function, do not use.
2783
 *
2784
 * [69] PEReference ::= '%' Name ';'
2785
 *
2786
 * [ WFC: No Recursion ]
2787
 * A parsed entity must not contain a recursive
2788
 * reference to itself, either directly or indirectly.
2789
 *
2790
 * [ WFC: Entity Declared ]
2791
 * In a document without any DTD, a document with only an internal DTD
2792
 * subset which contains no parameter entity references, or a document
2793
 * with "standalone='yes'", ...  ... The declaration of a parameter
2794
 * entity must precede any reference to it...
2795
 *
2796
 * [ VC: Entity Declared ]
2797
 * In a document with an external subset or external parameter entities
2798
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2799
 * must precede any reference to it...
2800
 *
2801
 * [ WFC: In DTD ]
2802
 * Parameter-entity references may only appear in the DTD.
2803
 * NOTE: misleading but this is handled.
2804
 *
2805
 * A PEReference may have been detected in the current input stream
2806
 * the handling is done accordingly to
2807
 *      http://www.w3.org/TR/REC-xml#entproc
2808
 * i.e.
2809
 *   - Included in literal in entity values
2810
 *   - Included as Parameter Entity reference within DTDs
2811
 */
2812
void
2813
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2814
0
    xmlParsePEReference(ctxt);
2815
0
}
2816
2817
/**
2818
 * xmlStringLenDecodeEntities:
2819
 * @ctxt:  the parser context
2820
 * @str:  the input string
2821
 * @len: the string length
2822
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2823
 * @end:  an end marker xmlChar, 0 if none
2824
 * @end2:  an end marker xmlChar, 0 if none
2825
 * @end3:  an end marker xmlChar, 0 if none
2826
 *
2827
 * DEPRECATED: Internal function, don't use.
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what ATTRIBUTE_UNUSED,
2835
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2836
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
2839
0
    if ((str[len] != 0) ||
2840
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2841
0
        return(NULL);
2842
2843
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2844
0
}
2845
2846
/**
2847
 * xmlStringDecodeEntities:
2848
 * @ctxt:  the parser context
2849
 * @str:  the input string
2850
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2851
 * @end:  an end marker xmlChar, 0 if none
2852
 * @end2:  an end marker xmlChar, 0 if none
2853
 * @end3:  an end marker xmlChar, 0 if none
2854
 *
2855
 * DEPRECATED: Internal function, don't use.
2856
 *
2857
 * Returns A newly allocated string with the substitution done. The caller
2858
 *      must deallocate it !
2859
 */
2860
xmlChar *
2861
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2862
                        int what ATTRIBUTE_UNUSED,
2863
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2864
0
    if ((ctxt == NULL) || (str == NULL))
2865
0
        return(NULL);
2866
2867
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2868
0
        return(NULL);
2869
2870
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2871
0
}
2872
2873
/************************************************************************
2874
 *                  *
2875
 *    Commodity functions, cleanup needed ?     *
2876
 *                  *
2877
 ************************************************************************/
2878
2879
/**
2880
 * areBlanks:
2881
 * @ctxt:  an XML parser context
2882
 * @str:  a xmlChar *
2883
 * @len:  the size of @str
2884
 * @blank_chars: we know the chars are blanks
2885
 *
2886
 * Is this a sequence of blank chars that one can ignore ?
2887
 *
2888
 * Returns 1 if ignorable 0 otherwise.
2889
 */
2890
2891
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2892
6.65M
                     int blank_chars) {
2893
6.65M
    int i;
2894
6.65M
    xmlNodePtr lastChild;
2895
2896
    /*
2897
     * Don't spend time trying to differentiate them, the same callback is
2898
     * used !
2899
     */
2900
6.65M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2901
2.63M
  return(0);
2902
2903
    /*
2904
     * Check for xml:space value.
2905
     */
2906
4.02M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2907
4.02M
        (*(ctxt->space) == -2))
2908
2.98M
  return(0);
2909
2910
    /*
2911
     * Check that the string is made of blanks
2912
     */
2913
1.03M
    if (blank_chars == 0) {
2914
2.60M
  for (i = 0;i < len;i++)
2915
2.51M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2916
584k
    }
2917
2918
    /*
2919
     * Look if the element is mixed content in the DTD if available
2920
     */
2921
537k
    if (ctxt->node == NULL) return(0);
2922
531k
    if (ctxt->myDoc != NULL) {
2923
531k
        xmlElementPtr elemDecl = NULL;
2924
531k
        xmlDocPtr doc = ctxt->myDoc;
2925
531k
        const xmlChar *prefix = NULL;
2926
2927
531k
        if (ctxt->node->ns)
2928
122k
            prefix = ctxt->node->ns->prefix;
2929
531k
        if (doc->intSubset != NULL)
2930
428k
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2931
428k
                                      prefix);
2932
531k
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2933
19.9k
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2934
19.9k
                                      prefix);
2935
531k
        if (elemDecl != NULL) {
2936
175k
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2937
158k
                return(1);
2938
17.0k
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2939
17.0k
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2940
3.71k
                return(0);
2941
17.0k
        }
2942
531k
    }
2943
2944
    /*
2945
     * Otherwise, heuristic :-\
2946
     */
2947
369k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2948
324k
    if ((ctxt->node->children == NULL) &&
2949
324k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2950
2951
317k
    lastChild = xmlGetLastChild(ctxt->node);
2952
317k
    if (lastChild == NULL) {
2953
198k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2954
198k
            (ctxt->node->content != NULL)) return(0);
2955
198k
    } else if (xmlNodeIsText(lastChild))
2956
15.3k
        return(0);
2957
104k
    else if ((ctxt->node->children != NULL) &&
2958
104k
             (xmlNodeIsText(ctxt->node->children)))
2959
12.5k
        return(0);
2960
289k
    return(1);
2961
317k
}
2962
2963
/************************************************************************
2964
 *                  *
2965
 *    Extra stuff for namespace support     *
2966
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2967
 *                  *
2968
 ************************************************************************/
2969
2970
/**
2971
 * xmlSplitQName:
2972
 * @ctxt:  an XML parser context
2973
 * @name:  an XML parser context
2974
 * @prefixOut:  a xmlChar **
2975
 *
2976
 * DEPRECATED: Don't use.
2977
 *
2978
 * parse an UTF8 encoded XML qualified name string
2979
 *
2980
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2981
 *
2982
 * [NS 6] Prefix ::= NCName
2983
 *
2984
 * [NS 7] LocalPart ::= NCName
2985
 *
2986
 * Returns the local part, and prefix is updated
2987
 *   to get the Prefix if any.
2988
 */
2989
2990
xmlChar *
2991
0
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2992
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
2993
0
    xmlChar *buffer = NULL;
2994
0
    int len = 0;
2995
0
    int max = XML_MAX_NAMELEN;
2996
0
    xmlChar *ret = NULL;
2997
0
    xmlChar *prefix;
2998
0
    const xmlChar *cur = name;
2999
0
    int c;
3000
3001
0
    if (prefixOut == NULL) return(NULL);
3002
0
    *prefixOut = NULL;
3003
3004
0
    if (cur == NULL) return(NULL);
3005
3006
    /* nasty but well=formed */
3007
0
    if (cur[0] == ':')
3008
0
  return(xmlStrdup(name));
3009
3010
0
    c = *cur++;
3011
0
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3012
0
  buf[len++] = c;
3013
0
  c = *cur++;
3014
0
    }
3015
0
    if (len >= max) {
3016
  /*
3017
   * Okay someone managed to make a huge name, so he's ready to pay
3018
   * for the processing speed.
3019
   */
3020
0
  max = len * 2;
3021
3022
0
  buffer = xmlMalloc(max);
3023
0
  if (buffer == NULL) {
3024
0
      xmlErrMemory(ctxt);
3025
0
      return(NULL);
3026
0
  }
3027
0
  memcpy(buffer, buf, len);
3028
0
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3029
0
      if (len + 10 > max) {
3030
0
          xmlChar *tmp;
3031
3032
0
    max *= 2;
3033
0
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3034
0
    if (tmp == NULL) {
3035
0
        xmlFree(buffer);
3036
0
        xmlErrMemory(ctxt);
3037
0
        return(NULL);
3038
0
    }
3039
0
    buffer = tmp;
3040
0
      }
3041
0
      buffer[len++] = c;
3042
0
      c = *cur++;
3043
0
  }
3044
0
  buffer[len] = 0;
3045
0
    }
3046
3047
0
    if ((c == ':') && (*cur == 0)) {
3048
0
        if (buffer != NULL)
3049
0
      xmlFree(buffer);
3050
0
  return(xmlStrdup(name));
3051
0
    }
3052
3053
0
    if (buffer == NULL) {
3054
0
  ret = xmlStrndup(buf, len);
3055
0
        if (ret == NULL) {
3056
0
      xmlErrMemory(ctxt);
3057
0
      return(NULL);
3058
0
        }
3059
0
    } else {
3060
0
  ret = buffer;
3061
0
  buffer = NULL;
3062
0
  max = XML_MAX_NAMELEN;
3063
0
    }
3064
3065
3066
0
    if (c == ':') {
3067
0
  c = *cur;
3068
0
        prefix = ret;
3069
0
  if (c == 0) {
3070
0
      ret = xmlStrndup(BAD_CAST "", 0);
3071
0
            if (ret == NULL) {
3072
0
                xmlFree(prefix);
3073
0
                return(NULL);
3074
0
            }
3075
0
            *prefixOut = prefix;
3076
0
            return(ret);
3077
0
  }
3078
0
  len = 0;
3079
3080
  /*
3081
   * Check that the first character is proper to start
3082
   * a new name
3083
   */
3084
0
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3085
0
        ((c >= 0x41) && (c <= 0x5A)) ||
3086
0
        (c == '_') || (c == ':'))) {
3087
0
      int l;
3088
0
      int first = CUR_SCHAR(cur, l);
3089
3090
0
      if (!IS_LETTER(first) && (first != '_')) {
3091
0
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3092
0
          "Name %s is not XML Namespace compliant\n",
3093
0
          name);
3094
0
      }
3095
0
  }
3096
0
  cur++;
3097
3098
0
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3099
0
      buf[len++] = c;
3100
0
      c = *cur++;
3101
0
  }
3102
0
  if (len >= max) {
3103
      /*
3104
       * Okay someone managed to make a huge name, so he's ready to pay
3105
       * for the processing speed.
3106
       */
3107
0
      max = len * 2;
3108
3109
0
      buffer = xmlMalloc(max);
3110
0
      if (buffer == NULL) {
3111
0
          xmlErrMemory(ctxt);
3112
0
                xmlFree(prefix);
3113
0
    return(NULL);
3114
0
      }
3115
0
      memcpy(buffer, buf, len);
3116
0
      while (c != 0) { /* tested bigname2.xml */
3117
0
    if (len + 10 > max) {
3118
0
        xmlChar *tmp;
3119
3120
0
        max *= 2;
3121
0
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3122
0
        if (tmp == NULL) {
3123
0
      xmlErrMemory(ctxt);
3124
0
                        xmlFree(prefix);
3125
0
      xmlFree(buffer);
3126
0
      return(NULL);
3127
0
        }
3128
0
        buffer = tmp;
3129
0
    }
3130
0
    buffer[len++] = c;
3131
0
    c = *cur++;
3132
0
      }
3133
0
      buffer[len] = 0;
3134
0
  }
3135
3136
0
  if (buffer == NULL) {
3137
0
      ret = xmlStrndup(buf, len);
3138
0
            if (ret == NULL) {
3139
0
                xmlFree(prefix);
3140
0
                return(NULL);
3141
0
            }
3142
0
  } else {
3143
0
      ret = buffer;
3144
0
  }
3145
3146
0
        *prefixOut = prefix;
3147
0
    }
3148
3149
0
    return(ret);
3150
0
}
3151
3152
/************************************************************************
3153
 *                  *
3154
 *      The parser itself       *
3155
 *  Relates to http://www.w3.org/TR/REC-xml       *
3156
 *                  *
3157
 ************************************************************************/
3158
3159
/************************************************************************
3160
 *                  *
3161
 *  Routines to parse Name, NCName and NmToken      *
3162
 *                  *
3163
 ************************************************************************/
3164
3165
/*
3166
 * The two following functions are related to the change of accepted
3167
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3168
 * They correspond to the modified production [4] and the new production [4a]
3169
 * changes in that revision. Also note that the macros used for the
3170
 * productions Letter, Digit, CombiningChar and Extender are not needed
3171
 * anymore.
3172
 * We still keep compatibility to pre-revision5 parsing semantic if the
3173
 * new XML_PARSE_OLD10 option is given to the parser.
3174
 */
3175
static int
3176
12.9M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3177
12.9M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3178
        /*
3179
   * Use the new checks of production [4] [4a] amd [5] of the
3180
   * Update 5 of XML-1.0
3181
   */
3182
8.36M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3183
8.36M
      (((c >= 'a') && (c <= 'z')) ||
3184
8.35M
       ((c >= 'A') && (c <= 'Z')) ||
3185
8.35M
       (c == '_') || (c == ':') ||
3186
8.35M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3187
8.35M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3188
8.35M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3189
8.35M
       ((c >= 0x370) && (c <= 0x37D)) ||
3190
8.35M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3191
8.35M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3192
8.35M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3193
8.35M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3194
8.35M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3195
8.35M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3196
8.35M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3197
8.35M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3198
6.07M
      return(1);
3199
8.36M
    } else {
3200
4.57M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3201
3.85M
      return(1);
3202
4.57M
    }
3203
3.00M
    return(0);
3204
12.9M
}
3205
3206
static int
3207
110M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3208
110M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3209
        /*
3210
   * Use the new checks of production [4] [4a] amd [5] of the
3211
   * Update 5 of XML-1.0
3212
   */
3213
87.8M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3214
87.8M
      (((c >= 'a') && (c <= 'z')) ||
3215
87.7M
       ((c >= 'A') && (c <= 'Z')) ||
3216
87.7M
       ((c >= '0') && (c <= '9')) || /* !start */
3217
87.7M
       (c == '_') || (c == ':') ||
3218
87.7M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3219
87.7M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3220
87.7M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3221
87.7M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3222
87.7M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3223
87.7M
       ((c >= 0x370) && (c <= 0x37D)) ||
3224
87.7M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3225
87.7M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3226
87.7M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3227
87.7M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3228
87.7M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3229
87.7M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3230
87.7M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3231
87.7M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3232
87.7M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3233
81.5M
       return(1);
3234
87.8M
    } else {
3235
22.5M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3236
22.5M
            (c == '.') || (c == '-') ||
3237
22.5M
      (c == '_') || (c == ':') ||
3238
22.5M
      (IS_COMBINING(c)) ||
3239
22.5M
      (IS_EXTENDER(c)))
3240
18.7M
      return(1);
3241
22.5M
    }
3242
10.1M
    return(0);
3243
110M
}
3244
3245
static const xmlChar *
3246
3.61M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3247
3.61M
    const xmlChar *ret;
3248
3.61M
    int len = 0, l;
3249
3.61M
    int c;
3250
3.61M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3251
2.04M
                    XML_MAX_TEXT_LENGTH :
3252
3.61M
                    XML_MAX_NAME_LENGTH;
3253
3254
    /*
3255
     * Handler for more complex cases
3256
     */
3257
3.61M
    c = xmlCurrentChar(ctxt, &l);
3258
3.61M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3259
        /*
3260
   * Use the new checks of production [4] [4a] amd [5] of the
3261
   * Update 5 of XML-1.0
3262
   */
3263
2.60M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3264
2.60M
      (!(((c >= 'a') && (c <= 'z')) ||
3265
2.40M
         ((c >= 'A') && (c <= 'Z')) ||
3266
2.40M
         (c == '_') || (c == ':') ||
3267
2.40M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3268
2.40M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3269
2.40M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3270
2.40M
         ((c >= 0x370) && (c <= 0x37D)) ||
3271
2.40M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3272
2.40M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3273
2.40M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3274
2.40M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3275
2.40M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3276
2.40M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3277
2.40M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3278
2.40M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3279
2.03M
      return(NULL);
3280
2.03M
  }
3281
571k
  len += l;
3282
571k
  NEXTL(l);
3283
571k
  c = xmlCurrentChar(ctxt, &l);
3284
25.4M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3285
25.4M
         (((c >= 'a') && (c <= 'z')) ||
3286
25.3M
          ((c >= 'A') && (c <= 'Z')) ||
3287
25.3M
          ((c >= '0') && (c <= '9')) || /* !start */
3288
25.3M
          (c == '_') || (c == ':') ||
3289
25.3M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3290
25.3M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3291
25.3M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3292
25.3M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3293
25.3M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3294
25.3M
          ((c >= 0x370) && (c <= 0x37D)) ||
3295
25.3M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3296
25.3M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3297
25.3M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3298
25.3M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3299
25.3M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3300
25.3M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3301
25.3M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3302
25.3M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3303
25.3M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3304
25.3M
    )) {
3305
24.8M
            if (len <= INT_MAX - l)
3306
24.8M
          len += l;
3307
24.8M
      NEXTL(l);
3308
24.8M
      c = xmlCurrentChar(ctxt, &l);
3309
24.8M
  }
3310
1.01M
    } else {
3311
1.01M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3312
1.01M
      (!IS_LETTER(c) && (c != '_') &&
3313
950k
       (c != ':'))) {
3314
686k
      return(NULL);
3315
686k
  }
3316
325k
  len += l;
3317
325k
  NEXTL(l);
3318
325k
  c = xmlCurrentChar(ctxt, &l);
3319
3320
8.28M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3321
8.28M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3322
8.22M
    (c == '.') || (c == '-') ||
3323
8.22M
    (c == '_') || (c == ':') ||
3324
8.22M
    (IS_COMBINING(c)) ||
3325
8.22M
    (IS_EXTENDER(c)))) {
3326
7.95M
            if (len <= INT_MAX - l)
3327
7.95M
          len += l;
3328
7.95M
      NEXTL(l);
3329
7.95M
      c = xmlCurrentChar(ctxt, &l);
3330
7.95M
  }
3331
325k
    }
3332
896k
    if (len > maxLength) {
3333
409
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3334
409
        return(NULL);
3335
409
    }
3336
895k
    if (ctxt->input->cur - ctxt->input->base < len) {
3337
        /*
3338
         * There were a couple of bugs where PERefs lead to to a change
3339
         * of the buffer. Check the buffer size to avoid passing an invalid
3340
         * pointer to xmlDictLookup.
3341
         */
3342
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3343
0
                    "unexpected change of input buffer");
3344
0
        return (NULL);
3345
0
    }
3346
895k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3347
2.41k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3348
893k
    else
3349
893k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3350
895k
    if (ret == NULL)
3351
28
        xmlErrMemory(ctxt);
3352
895k
    return(ret);
3353
895k
}
3354
3355
/**
3356
 * xmlParseName:
3357
 * @ctxt:  an XML parser context
3358
 *
3359
 * DEPRECATED: Internal function, don't use.
3360
 *
3361
 * parse an XML name.
3362
 *
3363
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3364
 *                  CombiningChar | Extender
3365
 *
3366
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3367
 *
3368
 * [6] Names ::= Name (#x20 Name)*
3369
 *
3370
 * Returns the Name parsed or NULL
3371
 */
3372
3373
const xmlChar *
3374
19.3M
xmlParseName(xmlParserCtxtPtr ctxt) {
3375
19.3M
    const xmlChar *in;
3376
19.3M
    const xmlChar *ret;
3377
19.3M
    size_t count = 0;
3378
19.3M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3379
8.57M
                       XML_MAX_TEXT_LENGTH :
3380
19.3M
                       XML_MAX_NAME_LENGTH;
3381
3382
19.3M
    GROW;
3383
3384
    /*
3385
     * Accelerator for simple ASCII names
3386
     */
3387
19.3M
    in = ctxt->input->cur;
3388
19.3M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3389
19.3M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3390
19.3M
  (*in == '_') || (*in == ':')) {
3391
16.3M
  in++;
3392
77.3M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3393
77.3M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3394
77.3M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3395
77.3M
         (*in == '_') || (*in == '-') ||
3396
77.3M
         (*in == ':') || (*in == '.'))
3397
60.9M
      in++;
3398
16.3M
  if ((*in > 0) && (*in < 0x80)) {
3399
15.7M
      count = in - ctxt->input->cur;
3400
15.7M
            if (count > maxLength) {
3401
132
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3402
132
                return(NULL);
3403
132
            }
3404
15.7M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3405
15.7M
      ctxt->input->cur = in;
3406
15.7M
      ctxt->input->col += count;
3407
15.7M
      if (ret == NULL)
3408
72
          xmlErrMemory(ctxt);
3409
15.7M
      return(ret);
3410
15.7M
  }
3411
16.3M
    }
3412
    /* accelerator for special cases */
3413
3.61M
    return(xmlParseNameComplex(ctxt));
3414
19.3M
}
3415
3416
static xmlHashedString
3417
4.23M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3418
4.23M
    xmlHashedString ret;
3419
4.23M
    int len = 0, l;
3420
4.23M
    int c;
3421
4.23M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3422
1.97M
                    XML_MAX_TEXT_LENGTH :
3423
4.23M
                    XML_MAX_NAME_LENGTH;
3424
4.23M
    size_t startPosition = 0;
3425
3426
4.23M
    ret.name = NULL;
3427
4.23M
    ret.hashValue = 0;
3428
3429
    /*
3430
     * Handler for more complex cases
3431
     */
3432
4.23M
    startPosition = CUR_PTR - BASE_PTR;
3433
4.23M
    c = xmlCurrentChar(ctxt, &l);
3434
4.23M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
4.23M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
3.33M
  return(ret);
3437
3.33M
    }
3438
3439
64.9M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
64.9M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
64.0M
        if (len <= INT_MAX - l)
3442
64.0M
      len += l;
3443
64.0M
  NEXTL(l);
3444
64.0M
  c = xmlCurrentChar(ctxt, &l);
3445
64.0M
    }
3446
895k
    if (len > maxLength) {
3447
648
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3448
648
        return(ret);
3449
648
    }
3450
894k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3451
894k
    if (ret.name == NULL)
3452
19
        xmlErrMemory(ctxt);
3453
894k
    return(ret);
3454
895k
}
3455
3456
/**
3457
 * xmlParseNCName:
3458
 * @ctxt:  an XML parser context
3459
 * @len:  length of the string parsed
3460
 *
3461
 * parse an XML name.
3462
 *
3463
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3464
 *                      CombiningChar | Extender
3465
 *
3466
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3467
 *
3468
 * Returns the Name parsed or NULL
3469
 */
3470
3471
static xmlHashedString
3472
16.3M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3473
16.3M
    const xmlChar *in, *e;
3474
16.3M
    xmlHashedString ret;
3475
16.3M
    size_t count = 0;
3476
16.3M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3477
7.54M
                       XML_MAX_TEXT_LENGTH :
3478
16.3M
                       XML_MAX_NAME_LENGTH;
3479
3480
16.3M
    ret.name = NULL;
3481
3482
    /*
3483
     * Accelerator for simple ASCII names
3484
     */
3485
16.3M
    in = ctxt->input->cur;
3486
16.3M
    e = ctxt->input->end;
3487
16.3M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3488
16.3M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3489
16.3M
   (*in == '_')) && (in < e)) {
3490
12.8M
  in++;
3491
68.7M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3492
68.7M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3493
68.7M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3494
68.7M
          (*in == '_') || (*in == '-') ||
3495
68.7M
          (*in == '.')) && (in < e))
3496
55.9M
      in++;
3497
12.8M
  if (in >= e)
3498
9.47k
      goto complex;
3499
12.8M
  if ((*in > 0) && (*in < 0x80)) {
3500
12.1M
      count = in - ctxt->input->cur;
3501
12.1M
            if (count > maxLength) {
3502
227
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3503
227
                return(ret);
3504
227
            }
3505
12.1M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3506
12.1M
      ctxt->input->cur = in;
3507
12.1M
      ctxt->input->col += count;
3508
12.1M
      if (ret.name == NULL) {
3509
39
          xmlErrMemory(ctxt);
3510
39
      }
3511
12.1M
      return(ret);
3512
12.1M
  }
3513
12.8M
    }
3514
4.23M
complex:
3515
4.23M
    return(xmlParseNCNameComplex(ctxt));
3516
16.3M
}
3517
3518
/**
3519
 * xmlParseNameAndCompare:
3520
 * @ctxt:  an XML parser context
3521
 *
3522
 * parse an XML name and compares for match
3523
 * (specialized for endtag parsing)
3524
 *
3525
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3526
 * and the name for mismatch
3527
 */
3528
3529
static const xmlChar *
3530
2.37M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3531
2.37M
    register const xmlChar *cmp = other;
3532
2.37M
    register const xmlChar *in;
3533
2.37M
    const xmlChar *ret;
3534
3535
2.37M
    GROW;
3536
3537
2.37M
    in = ctxt->input->cur;
3538
6.66M
    while (*in != 0 && *in == *cmp) {
3539
4.28M
  ++in;
3540
4.28M
  ++cmp;
3541
4.28M
    }
3542
2.37M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3543
  /* success */
3544
1.90M
  ctxt->input->col += in - ctxt->input->cur;
3545
1.90M
  ctxt->input->cur = in;
3546
1.90M
  return (const xmlChar*) 1;
3547
1.90M
    }
3548
    /* failure (or end of input buffer), check with full function */
3549
475k
    ret = xmlParseName (ctxt);
3550
    /* strings coming from the dictionary direct compare possible */
3551
475k
    if (ret == other) {
3552
37.8k
  return (const xmlChar*) 1;
3553
37.8k
    }
3554
437k
    return ret;
3555
475k
}
3556
3557
/**
3558
 * xmlParseStringName:
3559
 * @ctxt:  an XML parser context
3560
 * @str:  a pointer to the string pointer (IN/OUT)
3561
 *
3562
 * parse an XML name.
3563
 *
3564
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3565
 *                  CombiningChar | Extender
3566
 *
3567
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3568
 *
3569
 * [6] Names ::= Name (#x20 Name)*
3570
 *
3571
 * Returns the Name parsed or NULL. The @str pointer
3572
 * is updated to the current location in the string.
3573
 */
3574
3575
static xmlChar *
3576
8.92M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3577
8.92M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3578
8.92M
    xmlChar *ret;
3579
8.92M
    const xmlChar *cur = *str;
3580
8.92M
    int len = 0, l;
3581
8.92M
    int c;
3582
8.92M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3583
3.49M
                    XML_MAX_TEXT_LENGTH :
3584
8.92M
                    XML_MAX_NAME_LENGTH;
3585
3586
8.92M
    c = CUR_SCHAR(cur, l);
3587
8.92M
    if (!xmlIsNameStartChar(ctxt, c)) {
3588
18.3k
  return(NULL);
3589
18.3k
    }
3590
3591
8.90M
    COPY_BUF(buf, len, c);
3592
8.90M
    cur += l;
3593
8.90M
    c = CUR_SCHAR(cur, l);
3594
20.1M
    while (xmlIsNameChar(ctxt, c)) {
3595
11.2M
  COPY_BUF(buf, len, c);
3596
11.2M
  cur += l;
3597
11.2M
  c = CUR_SCHAR(cur, l);
3598
11.2M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3599
      /*
3600
       * Okay someone managed to make a huge name, so he's ready to pay
3601
       * for the processing speed.
3602
       */
3603
7.18k
      xmlChar *buffer;
3604
7.18k
      int max = len * 2;
3605
3606
7.18k
      buffer = xmlMalloc(max);
3607
7.18k
      if (buffer == NULL) {
3608
20
          xmlErrMemory(ctxt);
3609
20
    return(NULL);
3610
20
      }
3611
7.16k
      memcpy(buffer, buf, len);
3612
3.81M
      while (xmlIsNameChar(ctxt, c)) {
3613
3.80M
    if (len + 10 > max) {
3614
10.7k
        xmlChar *tmp;
3615
3616
10.7k
        max *= 2;
3617
10.7k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3618
10.7k
        if (tmp == NULL) {
3619
15
      xmlErrMemory(ctxt);
3620
15
      xmlFree(buffer);
3621
15
      return(NULL);
3622
15
        }
3623
10.7k
        buffer = tmp;
3624
10.7k
    }
3625
3.80M
    COPY_BUF(buffer, len, c);
3626
3.80M
    cur += l;
3627
3.80M
    c = CUR_SCHAR(cur, l);
3628
3.80M
                if (len > maxLength) {
3629
104
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3630
104
                    xmlFree(buffer);
3631
104
                    return(NULL);
3632
104
                }
3633
3.80M
      }
3634
7.04k
      buffer[len] = 0;
3635
7.04k
      *str = cur;
3636
7.04k
      return(buffer);
3637
7.16k
  }
3638
11.2M
    }
3639
8.89M
    if (len > maxLength) {
3640
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3641
0
        return(NULL);
3642
0
    }
3643
8.89M
    *str = cur;
3644
8.89M
    ret = xmlStrndup(buf, len);
3645
8.89M
    if (ret == NULL)
3646
276
        xmlErrMemory(ctxt);
3647
8.89M
    return(ret);
3648
8.89M
}
3649
3650
/**
3651
 * xmlParseNmtoken:
3652
 * @ctxt:  an XML parser context
3653
 *
3654
 * DEPRECATED: Internal function, don't use.
3655
 *
3656
 * parse an XML Nmtoken.
3657
 *
3658
 * [7] Nmtoken ::= (NameChar)+
3659
 *
3660
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3661
 *
3662
 * Returns the Nmtoken parsed or NULL
3663
 */
3664
3665
xmlChar *
3666
495k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3667
495k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3668
495k
    xmlChar *ret;
3669
495k
    int len = 0, l;
3670
495k
    int c;
3671
495k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3672
160k
                    XML_MAX_TEXT_LENGTH :
3673
495k
                    XML_MAX_NAME_LENGTH;
3674
3675
495k
    c = xmlCurrentChar(ctxt, &l);
3676
3677
2.21M
    while (xmlIsNameChar(ctxt, c)) {
3678
1.72M
  COPY_BUF(buf, len, c);
3679
1.72M
  NEXTL(l);
3680
1.72M
  c = xmlCurrentChar(ctxt, &l);
3681
1.72M
  if (len >= XML_MAX_NAMELEN) {
3682
      /*
3683
       * Okay someone managed to make a huge token, so he's ready to pay
3684
       * for the processing speed.
3685
       */
3686
8.05k
      xmlChar *buffer;
3687
8.05k
      int max = len * 2;
3688
3689
8.05k
      buffer = xmlMalloc(max);
3690
8.05k
      if (buffer == NULL) {
3691
17
          xmlErrMemory(ctxt);
3692
17
    return(NULL);
3693
17
      }
3694
8.03k
      memcpy(buffer, buf, len);
3695
19.4M
      while (xmlIsNameChar(ctxt, c)) {
3696
19.4M
    if (len + 10 > max) {
3697
12.7k
        xmlChar *tmp;
3698
3699
12.7k
        max *= 2;
3700
12.7k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3701
12.7k
        if (tmp == NULL) {
3702
14
      xmlErrMemory(ctxt);
3703
14
      xmlFree(buffer);
3704
14
      return(NULL);
3705
14
        }
3706
12.6k
        buffer = tmp;
3707
12.6k
    }
3708
19.4M
    COPY_BUF(buffer, len, c);
3709
19.4M
                if (len > maxLength) {
3710
561
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3711
561
                    xmlFree(buffer);
3712
561
                    return(NULL);
3713
561
                }
3714
19.4M
    NEXTL(l);
3715
19.4M
    c = xmlCurrentChar(ctxt, &l);
3716
19.4M
      }
3717
7.46k
      buffer[len] = 0;
3718
7.46k
      return(buffer);
3719
8.03k
  }
3720
1.72M
    }
3721
487k
    if (len == 0)
3722
126k
        return(NULL);
3723
360k
    if (len > maxLength) {
3724
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3725
0
        return(NULL);
3726
0
    }
3727
360k
    ret = xmlStrndup(buf, len);
3728
360k
    if (ret == NULL)
3729
61
        xmlErrMemory(ctxt);
3730
360k
    return(ret);
3731
360k
}
3732
3733
/**
3734
 * xmlExpandPEsInEntityValue:
3735
 * @ctxt:  parser context
3736
 * @buf:  string buffer
3737
 * @str:  entity value
3738
 * @length:  size of entity value
3739
 * @depth:  nesting depth
3740
 *
3741
 * Validate an entity value and expand parameter entities.
3742
 */
3743
static void
3744
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3745
406k
                          const xmlChar *str, int length, int depth) {
3746
406k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3747
406k
    const xmlChar *end, *chunk;
3748
406k
    int c, l;
3749
3750
406k
    if (str == NULL)
3751
41.0k
        return;
3752
3753
365k
    depth += 1;
3754
365k
    if (depth > maxDepth) {
3755
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3756
0
                       "Maximum entity nesting depth exceeded");
3757
0
  return;
3758
0
    }
3759
3760
365k
    end = str + length;
3761
365k
    chunk = str;
3762
3763
564M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3764
564M
        c = *str;
3765
3766
564M
        if (c >= 0x80) {
3767
327M
            l = xmlUTF8MultibyteLen(ctxt, str,
3768
327M
                    "invalid character in entity value\n");
3769
327M
            if (l == 0) {
3770
59.0M
                if (chunk < str)
3771
683k
                    xmlSBufAddString(buf, chunk, str - chunk);
3772
59.0M
                xmlSBufAddReplChar(buf);
3773
59.0M
                str += 1;
3774
59.0M
                chunk = str;
3775
268M
            } else {
3776
268M
                str += l;
3777
268M
            }
3778
327M
        } else if (c == '&') {
3779
466k
            if (str[1] == '#') {
3780
237k
                if (chunk < str)
3781
68.2k
                    xmlSBufAddString(buf, chunk, str - chunk);
3782
3783
237k
                c = xmlParseStringCharRef(ctxt, &str);
3784
237k
                if (c == 0)
3785
28.8k
                    return;
3786
3787
208k
                xmlSBufAddChar(buf, c);
3788
3789
208k
                chunk = str;
3790
229k
            } else {
3791
229k
                xmlChar *name;
3792
3793
                /*
3794
                 * General entity references are checked for
3795
                 * syntactic validity.
3796
                 */
3797
229k
                str++;
3798
229k
                name = xmlParseStringName(ctxt, &str);
3799
3800
229k
                if ((name == NULL) || (*str++ != ';')) {
3801
18.1k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3802
18.1k
                            "EntityValue: '&' forbidden except for entities "
3803
18.1k
                            "references\n");
3804
18.1k
                    xmlFree(name);
3805
18.1k
                    return;
3806
18.1k
                }
3807
3808
210k
                xmlFree(name);
3809
210k
            }
3810
236M
        } else if (c == '%') {
3811
146k
            xmlEntityPtr ent;
3812
3813
146k
            if (chunk < str)
3814
88.5k
                xmlSBufAddString(buf, chunk, str - chunk);
3815
3816
146k
            ent = xmlParseStringPEReference(ctxt, &str);
3817
146k
            if (ent == NULL)
3818
35.4k
                return;
3819
3820
110k
            if (!PARSER_EXTERNAL(ctxt)) {
3821
1.65k
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3822
1.65k
                return;
3823
1.65k
            }
3824
3825
109k
            if (ent->content == NULL) {
3826
                /*
3827
                 * Note: external parsed entities will not be loaded,
3828
                 * it is not required for a non-validating parser to
3829
                 * complete external PEReferences coming from the
3830
                 * internal subset
3831
                 */
3832
41.7k
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3833
41.7k
                    ((ctxt->replaceEntities) ||
3834
41.7k
                     (ctxt->validate))) {
3835
36.8k
                    xmlLoadEntityContent(ctxt, ent);
3836
36.8k
                } else {
3837
4.94k
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3838
4.94k
                                  "not validating will not read content for "
3839
4.94k
                                  "PE entity %s\n", ent->name, NULL);
3840
4.94k
                }
3841
41.7k
            }
3842
3843
            /*
3844
             * TODO: Skip if ent->content is still NULL.
3845
             */
3846
3847
109k
            if (xmlParserEntityCheck(ctxt, ent->length))
3848
89
                return;
3849
3850
108k
            if (ent->flags & XML_ENT_EXPANDING) {
3851
152
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3852
152
                xmlHaltParser(ctxt);
3853
152
                return;
3854
152
            }
3855
3856
108k
            ent->flags |= XML_ENT_EXPANDING;
3857
108k
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3858
108k
                                      depth);
3859
108k
            ent->flags &= ~XML_ENT_EXPANDING;
3860
3861
108k
            chunk = str;
3862
236M
        } else {
3863
            /* Normal ASCII char */
3864
236M
            if (!IS_BYTE_CHAR(c)) {
3865
15.8M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3866
15.8M
                        "invalid character in entity value\n");
3867
15.8M
                if (chunk < str)
3868
111k
                    xmlSBufAddString(buf, chunk, str - chunk);
3869
15.8M
                xmlSBufAddReplChar(buf);
3870
15.8M
                str += 1;
3871
15.8M
                chunk = str;
3872
220M
            } else {
3873
220M
                str += 1;
3874
220M
            }
3875
236M
        }
3876
564M
    }
3877
3878
281k
    if (chunk < str)
3879
190k
        xmlSBufAddString(buf, chunk, str - chunk);
3880
281k
}
3881
3882
/**
3883
 * xmlParseEntityValue:
3884
 * @ctxt:  an XML parser context
3885
 * @orig:  if non-NULL store a copy of the original entity value
3886
 *
3887
 * DEPRECATED: Internal function, don't use.
3888
 *
3889
 * parse a value for ENTITY declarations
3890
 *
3891
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3892
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3893
 *
3894
 * Returns the EntityValue parsed with reference substituted or NULL
3895
 */
3896
xmlChar *
3897
299k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3898
299k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3899
92.0k
                         XML_MAX_HUGE_LENGTH :
3900
299k
                         XML_MAX_TEXT_LENGTH;
3901
299k
    xmlSBuf buf;
3902
299k
    const xmlChar *start;
3903
299k
    int quote, length;
3904
3905
299k
    xmlSBufInit(&buf, maxLength);
3906
3907
299k
    GROW;
3908
3909
299k
    quote = CUR;
3910
299k
    if ((quote != '"') && (quote != '\'')) {
3911
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3912
0
  return(NULL);
3913
0
    }
3914
299k
    CUR_PTR++;
3915
3916
299k
    length = 0;
3917
3918
    /*
3919
     * Copy raw content of the entity into a buffer
3920
     */
3921
902M
    while (1) {
3922
902M
        int c;
3923
3924
902M
        if (PARSER_STOPPED(ctxt))
3925
13
            goto error;
3926
3927
902M
        if (CUR_PTR >= ctxt->input->end) {
3928
1.30k
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3929
1.30k
            goto error;
3930
1.30k
        }
3931
3932
902M
        c = CUR;
3933
3934
902M
        if (c == 0) {
3935
311
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3936
311
                    "invalid character in entity value\n");
3937
311
            goto error;
3938
311
        }
3939
902M
        if (c == quote)
3940
297k
            break;
3941
902M
        NEXTL(1);
3942
902M
        length += 1;
3943
3944
        /*
3945
         * TODO: Check growth threshold
3946
         */
3947
902M
        if (ctxt->input->end - CUR_PTR < 10)
3948
142k
            GROW;
3949
902M
    }
3950
3951
297k
    start = CUR_PTR - length;
3952
3953
297k
    if (orig != NULL) {
3954
297k
        *orig = xmlStrndup(start, length);
3955
297k
        if (*orig == NULL)
3956
107
            xmlErrMemory(ctxt);
3957
297k
    }
3958
3959
297k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3960
3961
297k
    NEXTL(1);
3962
3963
297k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3964
3965
1.63k
error:
3966
1.63k
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3967
1.63k
    return(NULL);
3968
299k
}
3969
3970
/**
3971
 * xmlCheckEntityInAttValue:
3972
 * @ctxt:  parser context
3973
 * @pent:  entity
3974
 * @depth:  nesting depth
3975
 *
3976
 * Check an entity reference in an attribute value for validity
3977
 * without expanding it.
3978
 */
3979
static void
3980
20.9k
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3981
20.9k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3982
20.9k
    const xmlChar *str;
3983
20.9k
    unsigned long expandedSize = pent->length;
3984
20.9k
    int c, flags;
3985
3986
20.9k
    depth += 1;
3987
20.9k
    if (depth > maxDepth) {
3988
15
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3989
15
                       "Maximum entity nesting depth exceeded");
3990
15
  return;
3991
15
    }
3992
3993
20.9k
    if (pent->flags & XML_ENT_EXPANDING) {
3994
1.15k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3995
1.15k
        xmlHaltParser(ctxt);
3996
1.15k
        return;
3997
1.15k
    }
3998
3999
    /*
4000
     * If we're parsing a default attribute value in DTD content,
4001
     * the entity might reference other entities which weren't
4002
     * defined yet, so the check isn't reliable.
4003
     */
4004
19.7k
    if (ctxt->inSubset == 0)
4005
17.6k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4006
2.13k
    else
4007
2.13k
        flags = XML_ENT_VALIDATED;
4008
4009
19.7k
    str = pent->content;
4010
19.7k
    if (str == NULL)
4011
35
        goto done;
4012
4013
    /*
4014
     * Note that entity values are already validated. We only check
4015
     * for illegal less-than signs and compute the expanded size
4016
     * of the entity. No special handling for multi-byte characters
4017
     * is needed.
4018
     */
4019
65.0M
    while (!PARSER_STOPPED(ctxt)) {
4020
65.0M
        c = *str;
4021
4022
65.0M
  if (c != '&') {
4023
65.0M
            if (c == 0)
4024
18.3k
                break;
4025
4026
65.0M
            if (c == '<')
4027
8.32k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4028
8.32k
                        "'<' in entity '%s' is not allowed in attributes "
4029
8.32k
                        "values\n", pent->name);
4030
4031
65.0M
            str += 1;
4032
65.0M
        } else if (str[1] == '#') {
4033
3.86k
            int val;
4034
4035
3.86k
      val = xmlParseStringCharRef(ctxt, &str);
4036
3.86k
      if (val == 0) {
4037
76
                pent->content[0] = 0;
4038
76
                break;
4039
76
            }
4040
38.6k
  } else {
4041
38.6k
            xmlChar *name;
4042
38.6k
            xmlEntityPtr ent;
4043
4044
38.6k
      name = xmlParseStringEntityRef(ctxt, &str);
4045
38.6k
      if (name == NULL) {
4046
99
                pent->content[0] = 0;
4047
99
                break;
4048
99
            }
4049
4050
38.5k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4051
38.5k
            xmlFree(name);
4052
4053
38.5k
            if ((ent != NULL) &&
4054
38.5k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4055
28.6k
                if ((ent->flags & flags) != flags) {
4056
13.9k
                    pent->flags |= XML_ENT_EXPANDING;
4057
13.9k
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
4058
13.9k
                    pent->flags &= ~XML_ENT_EXPANDING;
4059
13.9k
                }
4060
4061
28.6k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4062
28.6k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4063
28.6k
            }
4064
38.5k
        }
4065
65.0M
    }
4066
4067
19.7k
done:
4068
19.7k
    if (ctxt->inSubset == 0)
4069
17.6k
        pent->expandedSize = expandedSize;
4070
4071
19.7k
    pent->flags |= flags;
4072
19.7k
}
4073
4074
/**
4075
 * xmlExpandEntityInAttValue:
4076
 * @ctxt:  parser context
4077
 * @buf:  string buffer
4078
 * @str:  entity or attribute value
4079
 * @pent:  entity for entity value, NULL for attribute values
4080
 * @normalize:  whether to collapse whitespace
4081
 * @inSpace:  whitespace state
4082
 * @depth:  nesting depth
4083
 * @check:  whether to check for amplification
4084
 *
4085
 * Expand general entity references in an entity or attribute value.
4086
 * Perform attribute value normalization.
4087
 */
4088
static void
4089
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4090
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4091
6.76M
                          int *inSpace, int depth, int check) {
4092
6.76M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4093
6.76M
    int c, chunkSize;
4094
4095
6.76M
    if (str == NULL)
4096
201
        return;
4097
4098
6.76M
    depth += 1;
4099
6.76M
    if (depth > maxDepth) {
4100
72.1k
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4101
72.1k
                       "Maximum entity nesting depth exceeded");
4102
72.1k
  return;
4103
72.1k
    }
4104
4105
6.69M
    if (pent != NULL) {
4106
6.32M
        if (pent->flags & XML_ENT_EXPANDING) {
4107
467
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4108
467
            xmlHaltParser(ctxt);
4109
467
            return;
4110
467
        }
4111
4112
6.32M
        if (check) {
4113
5.98M
            if (xmlParserEntityCheck(ctxt, pent->length))
4114
488
                return;
4115
5.98M
        }
4116
6.32M
    }
4117
4118
6.69M
    chunkSize = 0;
4119
4120
    /*
4121
     * Note that entity values are already validated. No special
4122
     * handling for multi-byte characters is needed.
4123
     */
4124
4.25G
    while (!PARSER_STOPPED(ctxt)) {
4125
4.25G
        c = *str;
4126
4127
4.25G
  if (c != '&') {
4128
4.24G
            if (c == 0)
4129
6.47M
                break;
4130
4131
            /*
4132
             * If this function is called without an entity, it is used to
4133
             * expand entities in an attribute content where less-than was
4134
             * already unscaped and is allowed.
4135
             */
4136
4.23G
            if ((pent != NULL) && (c == '<')) {
4137
208k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4138
208k
                        "'<' in entity '%s' is not allowed in attributes "
4139
208k
                        "values\n", pent->name);
4140
208k
                break;
4141
208k
            }
4142
4143
4.23G
            if (c <= 0x20) {
4144
63.7M
                if ((normalize) && (*inSpace)) {
4145
                    /* Skip char */
4146
1.14M
                    if (chunkSize > 0) {
4147
229k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4148
229k
                        chunkSize = 0;
4149
229k
                    }
4150
62.6M
                } else if (c < 0x20) {
4151
32.0M
                    if (chunkSize > 0) {
4152
3.38M
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4153
3.38M
                        chunkSize = 0;
4154
3.38M
                    }
4155
4156
32.0M
                    xmlSBufAddCString(buf, " ", 1);
4157
32.0M
                } else {
4158
30.5M
                    chunkSize += 1;
4159
30.5M
                }
4160
4161
63.7M
                *inSpace = 1;
4162
4.16G
            } else {
4163
4.16G
                chunkSize += 1;
4164
4.16G
                *inSpace = 0;
4165
4.16G
            }
4166
4167
4.23G
            str += 1;
4168
4.23G
        } else if (str[1] == '#') {
4169
3.23M
            int val;
4170
4171
3.23M
            if (chunkSize > 0) {
4172
3.16M
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4173
3.16M
                chunkSize = 0;
4174
3.16M
            }
4175
4176
3.23M
      val = xmlParseStringCharRef(ctxt, &str);
4177
3.23M
      if (val == 0) {
4178
48
                if (pent != NULL)
4179
48
                    pent->content[0] = 0;
4180
48
                break;
4181
48
            }
4182
4183
3.23M
            if (val == ' ') {
4184
61.3k
                if ((!normalize) || (!*inSpace))
4185
58.8k
                    xmlSBufAddCString(buf, " ", 1);
4186
61.3k
                *inSpace = 1;
4187
3.16M
            } else {
4188
3.16M
                xmlSBufAddChar(buf, val);
4189
3.16M
                *inSpace = 0;
4190
3.16M
            }
4191
8.50M
  } else {
4192
8.50M
            xmlChar *name;
4193
8.50M
            xmlEntityPtr ent;
4194
4195
8.50M
            if (chunkSize > 0) {
4196
3.53M
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4197
3.53M
                chunkSize = 0;
4198
3.53M
            }
4199
4200
8.50M
      name = xmlParseStringEntityRef(ctxt, &str);
4201
8.50M
            if (name == NULL) {
4202
210
                if (pent != NULL)
4203
198
                    pent->content[0] = 0;
4204
210
                break;
4205
210
            }
4206
4207
8.50M
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4208
8.50M
            xmlFree(name);
4209
4210
8.50M
      if ((ent != NULL) &&
4211
8.50M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4212
3.25M
    if (ent->content == NULL) {
4213
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4214
0
          "predefined entity has no content\n");
4215
0
                    break;
4216
0
                }
4217
4218
3.25M
                xmlSBufAddString(buf, ent->content, ent->length);
4219
4220
3.25M
                *inSpace = 0;
4221
5.25M
      } else if ((ent != NULL) && (ent->content != NULL)) {
4222
4.86M
                if (pent != NULL)
4223
4.76M
                    pent->flags |= XML_ENT_EXPANDING;
4224
4.86M
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4225
4.86M
                                          normalize, inSpace, depth, check);
4226
4.86M
                if (pent != NULL)
4227
4.76M
                    pent->flags &= ~XML_ENT_EXPANDING;
4228
4.86M
      }
4229
8.50M
        }
4230
4.25G
    }
4231
4232
6.69M
    if (chunkSize > 0)
4233
4.05M
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4234
6.69M
}
4235
4236
/**
4237
 * xmlExpandEntitiesInAttValue:
4238
 * @ctxt:  parser context
4239
 * @str:  entity or attribute value
4240
 * @normalize:  whether to collapse whitespace
4241
 *
4242
 * Expand general entity references in an entity or attribute value.
4243
 * Perform attribute value normalization.
4244
 *
4245
 * Returns the expanded attribtue value.
4246
 */
4247
xmlChar *
4248
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4249
361k
                            int normalize) {
4250
361k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4251
129k
                         XML_MAX_HUGE_LENGTH :
4252
361k
                         XML_MAX_TEXT_LENGTH;
4253
361k
    xmlSBuf buf;
4254
361k
    int inSpace = 1;
4255
4256
361k
    xmlSBufInit(&buf, maxLength);
4257
4258
361k
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4259
361k
                              ctxt->inputNr, /* check */ 0);
4260
4261
361k
    if ((normalize) && (inSpace) && (buf.size > 0))
4262
0
        buf.size--;
4263
4264
361k
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4265
361k
}
4266
4267
/**
4268
 * xmlParseAttValueInternal:
4269
 * @ctxt:  an XML parser context
4270
 * @len:  attribute len result
4271
 * @alloc:  whether the attribute was reallocated as a new string
4272
 * @normalize:  if 1 then further non-CDATA normalization must be done
4273
 *
4274
 * parse a value for an attribute.
4275
 * NOTE: if no normalization is needed, the routine will return pointers
4276
 *       directly from the data buffer.
4277
 *
4278
 * 3.3.3 Attribute-Value Normalization:
4279
 * Before the value of an attribute is passed to the application or
4280
 * checked for validity, the XML processor must normalize it as follows:
4281
 * - a character reference is processed by appending the referenced
4282
 *   character to the attribute value
4283
 * - an entity reference is processed by recursively processing the
4284
 *   replacement text of the entity
4285
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4286
 *   appending #x20 to the normalized value, except that only a single
4287
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4288
 *   parsed entity or the literal entity value of an internal parsed entity
4289
 * - other characters are processed by appending them to the normalized value
4290
 * If the declared value is not CDATA, then the XML processor must further
4291
 * process the normalized attribute value by discarding any leading and
4292
 * trailing space (#x20) characters, and by replacing sequences of space
4293
 * (#x20) characters by a single space (#x20) character.
4294
 * All attributes for which no declaration has been read should be treated
4295
 * by a non-validating parser as if declared CDATA.
4296
 *
4297
 * Returns the AttValue parsed or NULL. The value has to be freed by the
4298
 *     caller if it was copied, this can be detected by val[*len] == 0.
4299
 */
4300
static xmlChar *
4301
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4302
4.91M
                         int normalize, int isNamespace) {
4303
4.91M
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4304
2.24M
                         XML_MAX_HUGE_LENGTH :
4305
4.91M
                         XML_MAX_TEXT_LENGTH;
4306
4.91M
    xmlSBuf buf;
4307
4.91M
    xmlChar *ret;
4308
4.91M
    int c, l, quote, flags, chunkSize;
4309
4.91M
    int inSpace = 1;
4310
4.91M
    int replaceEntities;
4311
4312
    /* Always expand namespace URIs */
4313
4.91M
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4314
4315
4.91M
    xmlSBufInit(&buf, maxLength);
4316
4317
4.91M
    GROW;
4318
4319
4.91M
    quote = CUR;
4320
4.91M
    if ((quote != '"') && (quote != '\'')) {
4321
142k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4322
142k
  return(NULL);
4323
142k
    }
4324
4.77M
    NEXTL(1);
4325
4326
4.77M
    if (ctxt->inSubset == 0)
4327
4.41M
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4328
363k
    else
4329
363k
        flags = XML_ENT_VALIDATED;
4330
4331
4.77M
    inSpace = 1;
4332
4.77M
    chunkSize = 0;
4333
4334
790M
    while (1) {
4335
790M
        if (PARSER_STOPPED(ctxt))
4336
3.12k
            goto error;
4337
4338
790M
        if (CUR_PTR >= ctxt->input->end) {
4339
41.1k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4340
41.1k
                           "AttValue: ' expected\n");
4341
41.1k
            goto error;
4342
41.1k
        }
4343
4344
        /*
4345
         * TODO: Check growth threshold
4346
         */
4347
790M
        if (ctxt->input->end - CUR_PTR < 10)
4348
647k
            GROW;
4349
4350
790M
        c = CUR;
4351
4352
790M
        if (c >= 0x80) {
4353
345M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4354
345M
                    "invalid character in attribute value\n");
4355
345M
            if (l == 0) {
4356
251M
                if (chunkSize > 0) {
4357
1.91M
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4358
1.91M
                    chunkSize = 0;
4359
1.91M
                }
4360
251M
                xmlSBufAddReplChar(&buf);
4361
251M
                NEXTL(1);
4362
251M
            } else {
4363
93.6M
                chunkSize += l;
4364
93.6M
                NEXTL(l);
4365
93.6M
            }
4366
4367
345M
            inSpace = 0;
4368
445M
        } else if (c != '&') {
4369
440M
            if (c > 0x20) {
4370
147M
                if (c == quote)
4371
4.68M
                    break;
4372
4373
142M
                if (c == '<')
4374
4.03M
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4375
4376
142M
                chunkSize += 1;
4377
142M
                inSpace = 0;
4378
293M
            } else if (!IS_BYTE_CHAR(c)) {
4379
247M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4380
247M
                        "invalid character in attribute value\n");
4381
247M
                if (chunkSize > 0) {
4382
730k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4383
730k
                    chunkSize = 0;
4384
730k
                }
4385
247M
                xmlSBufAddReplChar(&buf);
4386
247M
                inSpace = 0;
4387
247M
            } else {
4388
                /* Whitespace */
4389
45.0M
                if ((normalize) && (inSpace)) {
4390
                    /* Skip char */
4391
1.21M
                    if (chunkSize > 0) {
4392
45.2k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4393
45.2k
                        chunkSize = 0;
4394
45.2k
                    }
4395
43.8M
                } else if (c < 0x20) {
4396
                    /* Convert to space */
4397
36.6M
                    if (chunkSize > 0) {
4398
1.45M
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4399
1.45M
                        chunkSize = 0;
4400
1.45M
                    }
4401
4402
36.6M
                    xmlSBufAddCString(&buf, " ", 1);
4403
36.6M
                } else {
4404
7.16M
                    chunkSize += 1;
4405
7.16M
                }
4406
4407
45.0M
                inSpace = 1;
4408
4409
45.0M
                if ((c == 0xD) && (NXT(1) == 0xA))
4410
67.7k
                    CUR_PTR++;
4411
45.0M
            }
4412
4413
435M
            NEXTL(1);
4414
435M
        } else if (NXT(1) == '#') {
4415
714k
            int val;
4416
4417
714k
            if (chunkSize > 0) {
4418
337k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4419
337k
                chunkSize = 0;
4420
337k
            }
4421
4422
714k
            val = xmlParseCharRef(ctxt);
4423
714k
            if (val == 0)
4424
45.0k
                goto error;
4425
4426
669k
            if ((val == '&') && (!replaceEntities)) {
4427
                /*
4428
                 * The reparsing will be done in xmlNodeParseContent()
4429
                 * called from SAX2.c
4430
                 */
4431
34.8k
                xmlSBufAddCString(&buf, "&#38;", 5);
4432
34.8k
                inSpace = 0;
4433
634k
            } else if (val == ' ') {
4434
205k
                if ((!normalize) || (!inSpace))
4435
201k
                    xmlSBufAddCString(&buf, " ", 1);
4436
205k
                inSpace = 1;
4437
429k
            } else {
4438
429k
                xmlSBufAddChar(&buf, val);
4439
429k
                inSpace = 0;
4440
429k
            }
4441
4.51M
        } else {
4442
4.51M
            const xmlChar *name;
4443
4.51M
            xmlEntityPtr ent;
4444
4445
4.51M
            if (chunkSize > 0) {
4446
2.10M
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4447
2.10M
                chunkSize = 0;
4448
2.10M
            }
4449
4450
4.51M
            name = xmlParseEntityRefInternal(ctxt);
4451
4.51M
            if (name == NULL) {
4452
                /*
4453
                 * Probably a literal '&' which wasn't escaped.
4454
                 * TODO: Handle gracefully in recovery mode.
4455
                 */
4456
508k
                continue;
4457
508k
            }
4458
4459
4.00M
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4460
4.00M
            if (ent == NULL)
4461
547k
                continue;
4462
4463
3.45M
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4464
424k
                if ((ent->content[0] == '&') && (!replaceEntities))
4465
63.4k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4466
361k
                else
4467
361k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4468
424k
                inSpace = 0;
4469
3.02M
            } else if (replaceEntities) {
4470
1.54M
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4471
1.54M
                                          normalize, &inSpace, ctxt->inputNr,
4472
1.54M
                                          /* check */ 1);
4473
1.54M
            } else {
4474
1.48M
                if ((ent->flags & flags) != flags)
4475
7.00k
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4476
4477
1.48M
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4478
397
                    ent->content[0] = 0;
4479
397
                    goto error;
4480
397
                }
4481
4482
                /*
4483
                 * Just output the reference
4484
                 */
4485
1.48M
                xmlSBufAddCString(&buf, "&", 1);
4486
1.48M
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4487
1.48M
                xmlSBufAddCString(&buf, ";", 1);
4488
4489
1.48M
                inSpace = 0;
4490
1.48M
            }
4491
3.45M
  }
4492
790M
    }
4493
4494
4.68M
    if ((buf.mem == NULL) && (alloc != NULL)) {
4495
2.50M
        ret = (xmlChar *) CUR_PTR - chunkSize;
4496
4497
2.50M
        if (attlen != NULL)
4498
2.50M
            *attlen = chunkSize;
4499
2.50M
        if ((normalize) && (inSpace) && (chunkSize > 0))
4500
4.07k
            *attlen -= 1;
4501
2.50M
        *alloc = 0;
4502
4503
        /* Report potential error */
4504
2.50M
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4505
2.50M
    } else {
4506
2.18M
        if (chunkSize > 0)
4507
1.54M
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4508
4509
2.18M
        if ((normalize) && (inSpace) && (buf.size > 0))
4510
10.8k
            buf.size--;
4511
4512
2.18M
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4513
4514
2.18M
        if (ret != NULL) {
4515
2.18M
            if (attlen != NULL)
4516
654k
                *attlen = buf.size;
4517
2.18M
            if (alloc != NULL)
4518
654k
                *alloc = 1;
4519
2.18M
        }
4520
2.18M
    }
4521
4522
4.68M
    NEXTL(1);
4523
4524
4.68M
    return(ret);
4525
4526
89.7k
error:
4527
89.7k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4528
89.7k
    return(NULL);
4529
4.77M
}
4530
4531
/**
4532
 * xmlParseAttValue:
4533
 * @ctxt:  an XML parser context
4534
 *
4535
 * DEPRECATED: Internal function, don't use.
4536
 *
4537
 * parse a value for an attribute
4538
 * Note: the parser won't do substitution of entities here, this
4539
 * will be handled later in xmlStringGetNodeList
4540
 *
4541
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4542
 *                   "'" ([^<&'] | Reference)* "'"
4543
 *
4544
 * 3.3.3 Attribute-Value Normalization:
4545
 * Before the value of an attribute is passed to the application or
4546
 * checked for validity, the XML processor must normalize it as follows:
4547
 * - a character reference is processed by appending the referenced
4548
 *   character to the attribute value
4549
 * - an entity reference is processed by recursively processing the
4550
 *   replacement text of the entity
4551
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4552
 *   appending #x20 to the normalized value, except that only a single
4553
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4554
 *   parsed entity or the literal entity value of an internal parsed entity
4555
 * - other characters are processed by appending them to the normalized value
4556
 * If the declared value is not CDATA, then the XML processor must further
4557
 * process the normalized attribute value by discarding any leading and
4558
 * trailing space (#x20) characters, and by replacing sequences of space
4559
 * (#x20) characters by a single space (#x20) character.
4560
 * All attributes for which no declaration has been read should be treated
4561
 * by a non-validating parser as if declared CDATA.
4562
 *
4563
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4564
 */
4565
4566
4567
xmlChar *
4568
1.64M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4569
1.64M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4570
1.64M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4571
1.64M
}
4572
4573
/**
4574
 * xmlParseSystemLiteral:
4575
 * @ctxt:  an XML parser context
4576
 *
4577
 * DEPRECATED: Internal function, don't use.
4578
 *
4579
 * parse an XML Literal
4580
 *
4581
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4582
 *
4583
 * Returns the SystemLiteral parsed or NULL
4584
 */
4585
4586
xmlChar *
4587
259k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4588
259k
    xmlChar *buf = NULL;
4589
259k
    int len = 0;
4590
259k
    int size = XML_PARSER_BUFFER_SIZE;
4591
259k
    int cur, l;
4592
259k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4593
66.7k
                    XML_MAX_TEXT_LENGTH :
4594
259k
                    XML_MAX_NAME_LENGTH;
4595
259k
    xmlChar stop;
4596
4597
259k
    if (RAW == '"') {
4598
219k
        NEXT;
4599
219k
  stop = '"';
4600
219k
    } else if (RAW == '\'') {
4601
29.3k
        NEXT;
4602
29.3k
  stop = '\'';
4603
29.3k
    } else {
4604
9.94k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4605
9.94k
  return(NULL);
4606
9.94k
    }
4607
4608
249k
    buf = xmlMalloc(size);
4609
249k
    if (buf == NULL) {
4610
103
        xmlErrMemory(ctxt);
4611
103
  return(NULL);
4612
103
    }
4613
249k
    cur = xmlCurrentCharRecover(ctxt, &l);
4614
15.0M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4615
14.8M
  if (len + 5 >= size) {
4616
14.6k
      xmlChar *tmp;
4617
4618
14.6k
      size *= 2;
4619
14.6k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4620
14.6k
      if (tmp == NULL) {
4621
19
          xmlFree(buf);
4622
19
    xmlErrMemory(ctxt);
4623
19
    return(NULL);
4624
19
      }
4625
14.6k
      buf = tmp;
4626
14.6k
  }
4627
14.8M
  COPY_BUF(buf, len, cur);
4628
14.8M
        if (len > maxLength) {
4629
35
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4630
35
            xmlFree(buf);
4631
35
            return(NULL);
4632
35
        }
4633
14.8M
  NEXTL(l);
4634
14.8M
  cur = xmlCurrentCharRecover(ctxt, &l);
4635
14.8M
    }
4636
249k
    buf[len] = 0;
4637
249k
    if (!IS_CHAR(cur)) {
4638
8.04k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4639
240k
    } else {
4640
240k
  NEXT;
4641
240k
    }
4642
249k
    return(buf);
4643
249k
}
4644
4645
/**
4646
 * xmlParsePubidLiteral:
4647
 * @ctxt:  an XML parser context
4648
 *
4649
 * DEPRECATED: Internal function, don't use.
4650
 *
4651
 * parse an XML public literal
4652
 *
4653
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4654
 *
4655
 * Returns the PubidLiteral parsed or NULL.
4656
 */
4657
4658
xmlChar *
4659
143k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4660
143k
    xmlChar *buf = NULL;
4661
143k
    int len = 0;
4662
143k
    int size = XML_PARSER_BUFFER_SIZE;
4663
143k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4664
34.4k
                    XML_MAX_TEXT_LENGTH :
4665
143k
                    XML_MAX_NAME_LENGTH;
4666
143k
    xmlChar cur;
4667
143k
    xmlChar stop;
4668
4669
143k
    if (RAW == '"') {
4670
105k
        NEXT;
4671
105k
  stop = '"';
4672
105k
    } else if (RAW == '\'') {
4673
35.5k
        NEXT;
4674
35.5k
  stop = '\'';
4675
35.5k
    } else {
4676
2.94k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4677
2.94k
  return(NULL);
4678
2.94k
    }
4679
140k
    buf = xmlMalloc(size);
4680
140k
    if (buf == NULL) {
4681
62
  xmlErrMemory(ctxt);
4682
62
  return(NULL);
4683
62
    }
4684
140k
    cur = CUR;
4685
1.14M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4686
1.14M
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4687
1.00M
  if (len + 1 >= size) {
4688
1.57k
      xmlChar *tmp;
4689
4690
1.57k
      size *= 2;
4691
1.57k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4692
1.57k
      if (tmp == NULL) {
4693
13
    xmlErrMemory(ctxt);
4694
13
    xmlFree(buf);
4695
13
    return(NULL);
4696
13
      }
4697
1.56k
      buf = tmp;
4698
1.56k
  }
4699
1.00M
  buf[len++] = cur;
4700
1.00M
        if (len > maxLength) {
4701
3
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4702
3
            xmlFree(buf);
4703
3
            return(NULL);
4704
3
        }
4705
1.00M
  NEXT;
4706
1.00M
  cur = CUR;
4707
1.00M
    }
4708
140k
    buf[len] = 0;
4709
140k
    if (cur != stop) {
4710
18.5k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4711
122k
    } else {
4712
122k
  NEXTL(1);
4713
122k
    }
4714
140k
    return(buf);
4715
140k
}
4716
4717
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4718
4719
/*
4720
 * used for the test in the inner loop of the char data testing
4721
 */
4722
static const unsigned char test_char_data[256] = {
4723
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4724
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4725
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4726
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4727
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4728
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4729
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4730
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4731
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4732
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4733
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4734
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4735
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4736
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4737
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4738
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4739
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4740
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4741
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4742
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4743
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4744
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4745
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4746
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4747
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4748
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4749
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4750
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4751
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4752
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4753
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4754
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4755
};
4756
4757
/**
4758
 * xmlParseCharDataInternal:
4759
 * @ctxt:  an XML parser context
4760
 * @partial:  buffer may contain partial UTF-8 sequences
4761
 *
4762
 * Parse character data. Always makes progress if the first char isn't
4763
 * '<' or '&'.
4764
 *
4765
 * The right angle bracket (>) may be represented using the string "&gt;",
4766
 * and must, for compatibility, be escaped using "&gt;" or a character
4767
 * reference when it appears in the string "]]>" in content, when that
4768
 * string is not marking the end of a CDATA section.
4769
 *
4770
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4771
 */
4772
static void
4773
53.7M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4774
53.7M
    const xmlChar *in;
4775
53.7M
    int nbchar = 0;
4776
53.7M
    int line = ctxt->input->line;
4777
53.7M
    int col = ctxt->input->col;
4778
53.7M
    int ccol;
4779
4780
53.7M
    GROW;
4781
    /*
4782
     * Accelerated common case where input don't need to be
4783
     * modified before passing it to the handler.
4784
     */
4785
53.7M
    in = ctxt->input->cur;
4786
54.0M
    do {
4787
55.6M
get_more_space:
4788
60.2M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4789
55.6M
        if (*in == 0xA) {
4790
25.0M
            do {
4791
25.0M
                ctxt->input->line++; ctxt->input->col = 1;
4792
25.0M
                in++;
4793
25.0M
            } while (*in == 0xA);
4794
1.62M
            goto get_more_space;
4795
1.62M
        }
4796
54.0M
        if (*in == '<') {
4797
1.35M
            nbchar = in - ctxt->input->cur;
4798
1.35M
            if (nbchar > 0) {
4799
1.35M
                const xmlChar *tmp = ctxt->input->cur;
4800
1.35M
                ctxt->input->cur = in;
4801
4802
1.35M
                if ((ctxt->sax != NULL) &&
4803
1.35M
                    (ctxt->disableSAX == 0) &&
4804
1.35M
                    (ctxt->sax->ignorableWhitespace !=
4805
1.28M
                     ctxt->sax->characters)) {
4806
635k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4807
413k
                        if (ctxt->sax->ignorableWhitespace != NULL)
4808
413k
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4809
413k
                                                   tmp, nbchar);
4810
413k
                    } else {
4811
222k
                        if (ctxt->sax->characters != NULL)
4812
216k
                            ctxt->sax->characters(ctxt->userData,
4813
216k
                                                  tmp, nbchar);
4814
222k
                        if (*ctxt->space == -1)
4815
34.4k
                            *ctxt->space = -2;
4816
222k
                    }
4817
724k
                } else if ((ctxt->sax != NULL) &&
4818
724k
                           (ctxt->disableSAX == 0) &&
4819
724k
                           (ctxt->sax->characters != NULL)) {
4820
644k
                    ctxt->sax->characters(ctxt->userData,
4821
644k
                                          tmp, nbchar);
4822
644k
                }
4823
1.35M
            }
4824
1.35M
            return;
4825
1.35M
        }
4826
4827
54.6M
get_more:
4828
54.6M
        ccol = ctxt->input->col;
4829
175M
        while (test_char_data[*in]) {
4830
120M
            in++;
4831
120M
            ccol++;
4832
120M
        }
4833
54.6M
        ctxt->input->col = ccol;
4834
54.6M
        if (*in == 0xA) {
4835
16.5M
            do {
4836
16.5M
                ctxt->input->line++; ctxt->input->col = 1;
4837
16.5M
                in++;
4838
16.5M
            } while (*in == 0xA);
4839
1.48M
            goto get_more;
4840
1.48M
        }
4841
53.1M
        if (*in == ']') {
4842
543k
            if ((in[1] == ']') && (in[2] == '>')) {
4843
79.8k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4844
79.8k
                ctxt->input->cur = in + 1;
4845
79.8k
                return;
4846
79.8k
            }
4847
463k
            in++;
4848
463k
            ctxt->input->col++;
4849
463k
            goto get_more;
4850
543k
        }
4851
52.5M
        nbchar = in - ctxt->input->cur;
4852
52.5M
        if (nbchar > 0) {
4853
9.25M
            if ((ctxt->sax != NULL) &&
4854
9.25M
                (ctxt->disableSAX == 0) &&
4855
9.25M
                (ctxt->sax->ignorableWhitespace !=
4856
6.85M
                 ctxt->sax->characters) &&
4857
9.25M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4858
484k
                const xmlChar *tmp = ctxt->input->cur;
4859
484k
                ctxt->input->cur = in;
4860
4861
484k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4862
30.5k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4863
30.5k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4864
30.5k
                                                       tmp, nbchar);
4865
454k
                } else {
4866
454k
                    if (ctxt->sax->characters != NULL)
4867
450k
                        ctxt->sax->characters(ctxt->userData,
4868
450k
                                              tmp, nbchar);
4869
454k
                    if (*ctxt->space == -1)
4870
176k
                        *ctxt->space = -2;
4871
454k
                }
4872
484k
                line = ctxt->input->line;
4873
484k
                col = ctxt->input->col;
4874
8.76M
            } else if ((ctxt->sax != NULL) &&
4875
8.76M
                       (ctxt->disableSAX == 0)) {
4876
6.37M
                if (ctxt->sax->characters != NULL)
4877
6.34M
                    ctxt->sax->characters(ctxt->userData,
4878
6.34M
                                          ctxt->input->cur, nbchar);
4879
6.37M
                line = ctxt->input->line;
4880
6.37M
                col = ctxt->input->col;
4881
6.37M
            }
4882
9.25M
        }
4883
52.5M
        ctxt->input->cur = in;
4884
52.5M
        if (*in == 0xD) {
4885
366k
            in++;
4886
366k
            if (*in == 0xA) {
4887
263k
                ctxt->input->cur = in;
4888
263k
                in++;
4889
263k
                ctxt->input->line++; ctxt->input->col = 1;
4890
263k
                continue; /* while */
4891
263k
            }
4892
102k
            in--;
4893
102k
        }
4894
52.3M
        if (*in == '<') {
4895
3.78M
            return;
4896
3.78M
        }
4897
48.5M
        if (*in == '&') {
4898
1.02M
            return;
4899
1.02M
        }
4900
47.5M
        SHRINK;
4901
47.5M
        GROW;
4902
47.5M
        in = ctxt->input->cur;
4903
47.7M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4904
47.7M
             (*in == 0x09) || (*in == 0x0a));
4905
47.5M
    ctxt->input->line = line;
4906
47.5M
    ctxt->input->col = col;
4907
47.5M
    xmlParseCharDataComplex(ctxt, partial);
4908
47.5M
}
4909
4910
/**
4911
 * xmlParseCharDataComplex:
4912
 * @ctxt:  an XML parser context
4913
 * @cdata:  int indicating whether we are within a CDATA section
4914
 *
4915
 * Always makes progress if the first char isn't '<' or '&'.
4916
 *
4917
 * parse a CharData section.this is the fallback function
4918
 * of xmlParseCharData() when the parsing requires handling
4919
 * of non-ASCII characters.
4920
 */
4921
static void
4922
47.5M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4923
47.5M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4924
47.5M
    int nbchar = 0;
4925
47.5M
    int cur, l;
4926
4927
47.5M
    cur = xmlCurrentCharRecover(ctxt, &l);
4928
316M
    while ((cur != '<') && /* checked */
4929
316M
           (cur != '&') &&
4930
316M
     (IS_CHAR(cur))) {
4931
268M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4932
37.9k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4933
37.9k
  }
4934
268M
  COPY_BUF(buf, nbchar, cur);
4935
  /* move current position before possible calling of ctxt->sax->characters */
4936
268M
  NEXTL(l);
4937
268M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4938
1.62M
      buf[nbchar] = 0;
4939
4940
      /*
4941
       * OK the segment is to be consumed as chars.
4942
       */
4943
1.62M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4944
1.59M
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4945
738
        if (ctxt->sax->ignorableWhitespace != NULL)
4946
738
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4947
738
                                     buf, nbchar);
4948
1.59M
    } else {
4949
1.59M
        if (ctxt->sax->characters != NULL)
4950
1.59M
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4951
1.59M
        if ((ctxt->sax->characters !=
4952
1.59M
             ctxt->sax->ignorableWhitespace) &&
4953
1.59M
      (*ctxt->space == -1))
4954
7.52k
      *ctxt->space = -2;
4955
1.59M
    }
4956
1.59M
      }
4957
1.62M
      nbchar = 0;
4958
1.62M
            SHRINK;
4959
1.62M
  }
4960
268M
  cur = xmlCurrentCharRecover(ctxt, &l);
4961
268M
    }
4962
47.5M
    if (nbchar != 0) {
4963
6.58M
        buf[nbchar] = 0;
4964
  /*
4965
   * OK the segment is to be consumed as chars.
4966
   */
4967
6.58M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4968
3.93M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4969
4.00k
    if (ctxt->sax->ignorableWhitespace != NULL)
4970
4.00k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4971
3.93M
      } else {
4972
3.93M
    if (ctxt->sax->characters != NULL)
4973
3.91M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4974
3.93M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4975
3.93M
        (*ctxt->space == -1))
4976
357k
        *ctxt->space = -2;
4977
3.93M
      }
4978
3.93M
  }
4979
6.58M
    }
4980
    /*
4981
     * cur == 0 can mean
4982
     *
4983
     * - End of buffer.
4984
     * - An actual 0 character.
4985
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4986
     */
4987
47.5M
    if (ctxt->input->cur < ctxt->input->end) {
4988
47.4M
        if ((cur == 0) && (CUR != 0)) {
4989
11.3k
            if (partial == 0) {
4990
9.63k
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4991
9.63k
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4992
9.63k
                NEXTL(1);
4993
9.63k
            }
4994
47.4M
        } else if ((cur != '<') && (cur != '&')) {
4995
            /* Generate the error and skip the offending character */
4996
44.5M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4997
44.5M
                              "PCDATA invalid Char value %d\n", cur);
4998
44.5M
            NEXTL(l);
4999
44.5M
        }
5000
47.4M
    }
5001
47.5M
}
5002
5003
/**
5004
 * xmlParseCharData:
5005
 * @ctxt:  an XML parser context
5006
 * @cdata:  unused
5007
 *
5008
 * DEPRECATED: Internal function, don't use.
5009
 */
5010
void
5011
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
5012
0
    xmlParseCharDataInternal(ctxt, 0);
5013
0
}
5014
5015
/**
5016
 * xmlParseExternalID:
5017
 * @ctxt:  an XML parser context
5018
 * @publicID:  a xmlChar** receiving PubidLiteral
5019
 * @strict: indicate whether we should restrict parsing to only
5020
 *          production [75], see NOTE below
5021
 *
5022
 * DEPRECATED: Internal function, don't use.
5023
 *
5024
 * Parse an External ID or a Public ID
5025
 *
5026
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5027
 *       'PUBLIC' S PubidLiteral S SystemLiteral
5028
 *
5029
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5030
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5031
 *
5032
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5033
 *
5034
 * Returns the function returns SystemLiteral and in the second
5035
 *                case publicID receives PubidLiteral, is strict is off
5036
 *                it is possible to return NULL and have publicID set.
5037
 */
5038
5039
xmlChar *
5040
388k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5041
388k
    xmlChar *URI = NULL;
5042
5043
388k
    *publicID = NULL;
5044
388k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5045
137k
        SKIP(6);
5046
137k
  if (SKIP_BLANKS == 0) {
5047
2.14k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5048
2.14k
                     "Space required after 'SYSTEM'\n");
5049
2.14k
  }
5050
137k
  URI = xmlParseSystemLiteral(ctxt);
5051
137k
  if (URI == NULL) {
5052
1.12k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5053
1.12k
        }
5054
251k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5055
143k
        SKIP(6);
5056
143k
  if (SKIP_BLANKS == 0) {
5057
6.95k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5058
6.95k
        "Space required after 'PUBLIC'\n");
5059
6.95k
  }
5060
143k
  *publicID = xmlParsePubidLiteral(ctxt);
5061
143k
  if (*publicID == NULL) {
5062
3.01k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5063
3.01k
  }
5064
143k
  if (strict) {
5065
      /*
5066
       * We don't handle [83] so "S SystemLiteral" is required.
5067
       */
5068
117k
      if (SKIP_BLANKS == 0) {
5069
13.8k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5070
13.8k
      "Space required after the Public Identifier\n");
5071
13.8k
      }
5072
117k
  } else {
5073
      /*
5074
       * We handle [83] so we return immediately, if
5075
       * "S SystemLiteral" is not detected. We skip blanks if no
5076
             * system literal was found, but this is harmless since we must
5077
             * be at the end of a NotationDecl.
5078
       */
5079
26.5k
      if (SKIP_BLANKS == 0) return(NULL);
5080
6.73k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
5081
6.73k
  }
5082
122k
  URI = xmlParseSystemLiteral(ctxt);
5083
122k
  if (URI == NULL) {
5084
8.98k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5085
8.98k
        }
5086
122k
    }
5087
366k
    return(URI);
5088
388k
}
5089
5090
/**
5091
 * xmlParseCommentComplex:
5092
 * @ctxt:  an XML parser context
5093
 * @buf:  the already parsed part of the buffer
5094
 * @len:  number of bytes in the buffer
5095
 * @size:  allocated size of the buffer
5096
 *
5097
 * Skip an XML (SGML) comment <!-- .... -->
5098
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5099
 *  must not occur within comments. "
5100
 * This is the slow routine in case the accelerator for ascii didn't work
5101
 *
5102
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5103
 */
5104
static void
5105
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5106
347k
                       size_t len, size_t size) {
5107
347k
    int q, ql;
5108
347k
    int r, rl;
5109
347k
    int cur, l;
5110
347k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5111
70.6k
                       XML_MAX_HUGE_LENGTH :
5112
347k
                       XML_MAX_TEXT_LENGTH;
5113
5114
347k
    if (buf == NULL) {
5115
53.2k
        len = 0;
5116
53.2k
  size = XML_PARSER_BUFFER_SIZE;
5117
53.2k
  buf = xmlMalloc(size);
5118
53.2k
  if (buf == NULL) {
5119
40
      xmlErrMemory(ctxt);
5120
40
      return;
5121
40
  }
5122
53.2k
    }
5123
347k
    q = xmlCurrentCharRecover(ctxt, &ql);
5124
347k
    if (q == 0)
5125
69.6k
        goto not_terminated;
5126
278k
    if (!IS_CHAR(q)) {
5127
18.3k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5128
18.3k
                          "xmlParseComment: invalid xmlChar value %d\n",
5129
18.3k
                    q);
5130
18.3k
  xmlFree (buf);
5131
18.3k
  return;
5132
18.3k
    }
5133
259k
    NEXTL(ql);
5134
259k
    r = xmlCurrentCharRecover(ctxt, &rl);
5135
259k
    if (r == 0)
5136
6.06k
        goto not_terminated;
5137
253k
    if (!IS_CHAR(r)) {
5138
9.87k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5139
9.87k
                          "xmlParseComment: invalid xmlChar value %d\n",
5140
9.87k
                    r);
5141
9.87k
  xmlFree (buf);
5142
9.87k
  return;
5143
9.87k
    }
5144
243k
    NEXTL(rl);
5145
243k
    cur = xmlCurrentCharRecover(ctxt, &l);
5146
243k
    if (cur == 0)
5147
4.00k
        goto not_terminated;
5148
48.0M
    while (IS_CHAR(cur) && /* checked */
5149
48.0M
           ((cur != '>') ||
5150
47.9M
      (r != '-') || (q != '-'))) {
5151
47.8M
  if ((r == '-') && (q == '-')) {
5152
1.27M
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5153
1.27M
  }
5154
47.8M
  if (len + 5 >= size) {
5155
175k
      xmlChar *new_buf;
5156
175k
            size_t new_size;
5157
5158
175k
      new_size = size * 2;
5159
175k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5160
175k
      if (new_buf == NULL) {
5161
50
    xmlFree (buf);
5162
50
    xmlErrMemory(ctxt);
5163
50
    return;
5164
50
      }
5165
175k
      buf = new_buf;
5166
175k
            size = new_size;
5167
175k
  }
5168
47.8M
  COPY_BUF(buf, len, q);
5169
47.8M
        if (len > maxLength) {
5170
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5171
0
                         "Comment too big found", NULL);
5172
0
            xmlFree (buf);
5173
0
            return;
5174
0
        }
5175
5176
47.8M
  q = r;
5177
47.8M
  ql = rl;
5178
47.8M
  r = cur;
5179
47.8M
  rl = l;
5180
5181
47.8M
  NEXTL(l);
5182
47.8M
  cur = xmlCurrentCharRecover(ctxt, &l);
5183
5184
47.8M
    }
5185
239k
    buf[len] = 0;
5186
239k
    if (cur == 0) {
5187
58.4k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5188
58.4k
                       "Comment not terminated \n<!--%.50s\n", buf);
5189
181k
    } else if (!IS_CHAR(cur)) {
5190
31.1k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5191
31.1k
                          "xmlParseComment: invalid xmlChar value %d\n",
5192
31.1k
                    cur);
5193
150k
    } else {
5194
150k
        NEXT;
5195
150k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5196
150k
      (!ctxt->disableSAX))
5197
99.6k
      ctxt->sax->comment(ctxt->userData, buf);
5198
150k
    }
5199
239k
    xmlFree(buf);
5200
239k
    return;
5201
79.7k
not_terminated:
5202
79.7k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5203
79.7k
       "Comment not terminated\n", NULL);
5204
79.7k
    xmlFree(buf);
5205
79.7k
}
5206
5207
/**
5208
 * xmlParseComment:
5209
 * @ctxt:  an XML parser context
5210
 *
5211
 * DEPRECATED: Internal function, don't use.
5212
 *
5213
 * Parse an XML (SGML) comment. Always consumes '<!'.
5214
 *
5215
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5216
 *  must not occur within comments. "
5217
 *
5218
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5219
 */
5220
void
5221
3.35M
xmlParseComment(xmlParserCtxtPtr ctxt) {
5222
3.35M
    xmlChar *buf = NULL;
5223
3.35M
    size_t size = XML_PARSER_BUFFER_SIZE;
5224
3.35M
    size_t len = 0;
5225
3.35M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5226
246k
                       XML_MAX_HUGE_LENGTH :
5227
3.35M
                       XML_MAX_TEXT_LENGTH;
5228
3.35M
    const xmlChar *in;
5229
3.35M
    size_t nbchar = 0;
5230
3.35M
    int ccol;
5231
5232
    /*
5233
     * Check that there is a comment right here.
5234
     */
5235
3.35M
    if ((RAW != '<') || (NXT(1) != '!'))
5236
0
        return;
5237
3.35M
    SKIP(2);
5238
3.35M
    if ((RAW != '-') || (NXT(1) != '-'))
5239
270
        return;
5240
3.35M
    SKIP(2);
5241
3.35M
    GROW;
5242
5243
    /*
5244
     * Accelerated common case where input don't need to be
5245
     * modified before passing it to the handler.
5246
     */
5247
3.35M
    in = ctxt->input->cur;
5248
3.35M
    do {
5249
3.35M
  if (*in == 0xA) {
5250
184k
      do {
5251
184k
    ctxt->input->line++; ctxt->input->col = 1;
5252
184k
    in++;
5253
184k
      } while (*in == 0xA);
5254
37.0k
  }
5255
6.03M
get_more:
5256
6.03M
        ccol = ctxt->input->col;
5257
54.6M
  while (((*in > '-') && (*in <= 0x7F)) ||
5258
54.6M
         ((*in >= 0x20) && (*in < '-')) ||
5259
54.6M
         (*in == 0x09)) {
5260
48.6M
        in++;
5261
48.6M
        ccol++;
5262
48.6M
  }
5263
6.03M
  ctxt->input->col = ccol;
5264
6.03M
  if (*in == 0xA) {
5265
2.81M
      do {
5266
2.81M
    ctxt->input->line++; ctxt->input->col = 1;
5267
2.81M
    in++;
5268
2.81M
      } while (*in == 0xA);
5269
287k
      goto get_more;
5270
287k
  }
5271
5.74M
  nbchar = in - ctxt->input->cur;
5272
  /*
5273
   * save current set of data
5274
   */
5275
5.74M
  if (nbchar > 0) {
5276
2.78M
            if (buf == NULL) {
5277
628k
                if ((*in == '-') && (in[1] == '-'))
5278
270k
                    size = nbchar + 1;
5279
358k
                else
5280
358k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5281
628k
                buf = xmlMalloc(size);
5282
628k
                if (buf == NULL) {
5283
103
                    xmlErrMemory(ctxt);
5284
103
                    return;
5285
103
                }
5286
628k
                len = 0;
5287
2.15M
            } else if (len + nbchar + 1 >= size) {
5288
212k
                xmlChar *new_buf;
5289
212k
                size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5290
212k
                new_buf = (xmlChar *) xmlRealloc(buf, size);
5291
212k
                if (new_buf == NULL) {
5292
25
                    xmlFree (buf);
5293
25
                    xmlErrMemory(ctxt);
5294
25
                    return;
5295
25
                }
5296
212k
                buf = new_buf;
5297
212k
            }
5298
2.78M
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5299
2.78M
            len += nbchar;
5300
2.78M
            buf[len] = 0;
5301
2.78M
  }
5302
5.74M
        if (len > maxLength) {
5303
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5304
0
                         "Comment too big found", NULL);
5305
0
            xmlFree (buf);
5306
0
            return;
5307
0
        }
5308
5.74M
  ctxt->input->cur = in;
5309
5.74M
  if (*in == 0xA) {
5310
0
      in++;
5311
0
      ctxt->input->line++; ctxt->input->col = 1;
5312
0
  }
5313
5.74M
  if (*in == 0xD) {
5314
300k
      in++;
5315
300k
      if (*in == 0xA) {
5316
289k
    ctxt->input->cur = in;
5317
289k
    in++;
5318
289k
    ctxt->input->line++; ctxt->input->col = 1;
5319
289k
    goto get_more;
5320
289k
      }
5321
11.0k
      in--;
5322
11.0k
  }
5323
5.45M
  SHRINK;
5324
5.45M
  GROW;
5325
5.45M
  in = ctxt->input->cur;
5326
5.45M
  if (*in == '-') {
5327
5.10M
      if (in[1] == '-') {
5328
4.26M
          if (in[2] == '>') {
5329
3.00M
        SKIP(3);
5330
3.00M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5331
3.00M
            (!ctxt->disableSAX)) {
5332
1.90M
      if (buf != NULL)
5333
253k
          ctxt->sax->comment(ctxt->userData, buf);
5334
1.64M
      else
5335
1.64M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5336
1.90M
        }
5337
3.00M
        if (buf != NULL)
5338
333k
            xmlFree(buf);
5339
3.00M
        return;
5340
3.00M
    }
5341
1.26M
    if (buf != NULL) {
5342
1.13M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5343
1.13M
                          "Double hyphen within comment: "
5344
1.13M
                                      "<!--%.50s\n",
5345
1.13M
              buf);
5346
1.13M
    } else
5347
127k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5348
127k
                          "Double hyphen within comment\n", NULL);
5349
1.26M
    in++;
5350
1.26M
    ctxt->input->col++;
5351
1.26M
      }
5352
2.10M
      in++;
5353
2.10M
      ctxt->input->col++;
5354
2.10M
      goto get_more;
5355
5.10M
  }
5356
5.45M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5357
347k
    xmlParseCommentComplex(ctxt, buf, len, size);
5358
347k
}
5359
5360
5361
/**
5362
 * xmlParsePITarget:
5363
 * @ctxt:  an XML parser context
5364
 *
5365
 * DEPRECATED: Internal function, don't use.
5366
 *
5367
 * parse the name of a PI
5368
 *
5369
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5370
 *
5371
 * Returns the PITarget name or NULL
5372
 */
5373
5374
const xmlChar *
5375
376k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5376
376k
    const xmlChar *name;
5377
5378
376k
    name = xmlParseName(ctxt);
5379
376k
    if ((name != NULL) &&
5380
376k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5381
376k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5382
376k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5383
108k
  int i;
5384
108k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5385
108k
      (name[2] == 'l') && (name[3] == 0)) {
5386
80.7k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5387
80.7k
     "XML declaration allowed only at the start of the document\n");
5388
80.7k
      return(name);
5389
80.7k
  } else if (name[3] == 0) {
5390
14.3k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5391
14.3k
      return(name);
5392
14.3k
  }
5393
36.5k
  for (i = 0;;i++) {
5394
36.5k
      if (xmlW3CPIs[i] == NULL) break;
5395
25.3k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5396
2.43k
          return(name);
5397
25.3k
  }
5398
11.1k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5399
11.1k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5400
11.1k
          NULL, NULL);
5401
11.1k
    }
5402
278k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5403
4.92k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5404
4.92k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5405
4.92k
    }
5406
278k
    return(name);
5407
376k
}
5408
5409
#ifdef LIBXML_CATALOG_ENABLED
5410
/**
5411
 * xmlParseCatalogPI:
5412
 * @ctxt:  an XML parser context
5413
 * @catalog:  the PI value string
5414
 *
5415
 * parse an XML Catalog Processing Instruction.
5416
 *
5417
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5418
 *
5419
 * Occurs only if allowed by the user and if happening in the Misc
5420
 * part of the document before any doctype information
5421
 * This will add the given catalog to the parsing context in order
5422
 * to be used if there is a resolution need further down in the document
5423
 */
5424
5425
static void
5426
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5427
0
    xmlChar *URL = NULL;
5428
0
    const xmlChar *tmp, *base;
5429
0
    xmlChar marker;
5430
5431
0
    tmp = catalog;
5432
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5433
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5434
0
  goto error;
5435
0
    tmp += 7;
5436
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5437
0
    if (*tmp != '=') {
5438
0
  return;
5439
0
    }
5440
0
    tmp++;
5441
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5442
0
    marker = *tmp;
5443
0
    if ((marker != '\'') && (marker != '"'))
5444
0
  goto error;
5445
0
    tmp++;
5446
0
    base = tmp;
5447
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5448
0
    if (*tmp == 0)
5449
0
  goto error;
5450
0
    URL = xmlStrndup(base, tmp - base);
5451
0
    tmp++;
5452
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5453
0
    if (*tmp != 0)
5454
0
  goto error;
5455
5456
0
    if (URL != NULL) {
5457
        /*
5458
         * Unfortunately, the catalog API doesn't report OOM errors.
5459
         * xmlGetLastError isn't very helpful since we don't know
5460
         * where the last error came from. We'd have to reset it
5461
         * before this call and restore it afterwards.
5462
         */
5463
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5464
0
  xmlFree(URL);
5465
0
    }
5466
0
    return;
5467
5468
0
error:
5469
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5470
0
            "Catalog PI syntax error: %s\n",
5471
0
      catalog, NULL);
5472
0
    if (URL != NULL)
5473
0
  xmlFree(URL);
5474
0
}
5475
#endif
5476
5477
/**
5478
 * xmlParsePI:
5479
 * @ctxt:  an XML parser context
5480
 *
5481
 * DEPRECATED: Internal function, don't use.
5482
 *
5483
 * parse an XML Processing Instruction.
5484
 *
5485
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5486
 *
5487
 * The processing is transferred to SAX once parsed.
5488
 */
5489
5490
void
5491
376k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5492
376k
    xmlChar *buf = NULL;
5493
376k
    size_t len = 0;
5494
376k
    size_t size = XML_PARSER_BUFFER_SIZE;
5495
376k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5496
140k
                       XML_MAX_HUGE_LENGTH :
5497
376k
                       XML_MAX_TEXT_LENGTH;
5498
376k
    int cur, l;
5499
376k
    const xmlChar *target;
5500
5501
376k
    if ((RAW == '<') && (NXT(1) == '?')) {
5502
  /*
5503
   * this is a Processing Instruction.
5504
   */
5505
376k
  SKIP(2);
5506
5507
  /*
5508
   * Parse the target name and check for special support like
5509
   * namespace.
5510
   */
5511
376k
        target = xmlParsePITarget(ctxt);
5512
376k
  if (target != NULL) {
5513
313k
      if ((RAW == '?') && (NXT(1) == '>')) {
5514
82.6k
    SKIP(2);
5515
5516
    /*
5517
     * SAX: PI detected.
5518
     */
5519
82.6k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5520
82.6k
        (ctxt->sax->processingInstruction != NULL))
5521
76.6k
        ctxt->sax->processingInstruction(ctxt->userData,
5522
76.6k
                                         target, NULL);
5523
82.6k
    return;
5524
82.6k
      }
5525
231k
      buf = xmlMalloc(size);
5526
231k
      if (buf == NULL) {
5527
125
    xmlErrMemory(ctxt);
5528
125
    return;
5529
125
      }
5530
230k
      if (SKIP_BLANKS == 0) {
5531
90.7k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5532
90.7k
        "ParsePI: PI %s space expected\n", target);
5533
90.7k
      }
5534
230k
      cur = xmlCurrentCharRecover(ctxt, &l);
5535
27.1M
      while (IS_CHAR(cur) && /* checked */
5536
27.1M
       ((cur != '?') || (NXT(1) != '>'))) {
5537
26.9M
    if (len + 5 >= size) {
5538
38.2k
        xmlChar *tmp;
5539
38.2k
                    size_t new_size = size * 2;
5540
38.2k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5541
38.2k
        if (tmp == NULL) {
5542
26
      xmlErrMemory(ctxt);
5543
26
      xmlFree(buf);
5544
26
      return;
5545
26
        }
5546
38.2k
        buf = tmp;
5547
38.2k
                    size = new_size;
5548
38.2k
    }
5549
26.9M
    COPY_BUF(buf, len, cur);
5550
26.9M
                if (len > maxLength) {
5551
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5552
0
                                      "PI %s too big found", target);
5553
0
                    xmlFree(buf);
5554
0
                    return;
5555
0
                }
5556
26.9M
    NEXTL(l);
5557
26.9M
    cur = xmlCurrentCharRecover(ctxt, &l);
5558
26.9M
      }
5559
230k
      buf[len] = 0;
5560
230k
      if (cur != '?') {
5561
90.4k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5562
90.4k
          "ParsePI: PI %s never end ...\n", target);
5563
140k
      } else {
5564
140k
    SKIP(2);
5565
5566
140k
#ifdef LIBXML_CATALOG_ENABLED
5567
140k
    if ((ctxt->inSubset == 0) &&
5568
140k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5569
3.84k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5570
5571
3.84k
        if (((ctxt->options & XML_PARSE_NO_CATALOG_PI) == 0) &&
5572
3.84k
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5573
3.84k
       (allow == XML_CATA_ALLOW_ALL)))
5574
0
      xmlParseCatalogPI(ctxt, buf);
5575
3.84k
    }
5576
140k
#endif
5577
5578
    /*
5579
     * SAX: PI detected.
5580
     */
5581
140k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5582
140k
        (ctxt->sax->processingInstruction != NULL))
5583
113k
        ctxt->sax->processingInstruction(ctxt->userData,
5584
113k
                                         target, buf);
5585
140k
      }
5586
230k
      xmlFree(buf);
5587
230k
  } else {
5588
62.2k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5589
62.2k
  }
5590
376k
    }
5591
376k
}
5592
5593
/**
5594
 * xmlParseNotationDecl:
5595
 * @ctxt:  an XML parser context
5596
 *
5597
 * DEPRECATED: Internal function, don't use.
5598
 *
5599
 * Parse a notation declaration. Always consumes '<!'.
5600
 *
5601
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5602
 *
5603
 * Hence there is actually 3 choices:
5604
 *     'PUBLIC' S PubidLiteral
5605
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5606
 * and 'SYSTEM' S SystemLiteral
5607
 *
5608
 * See the NOTE on xmlParseExternalID().
5609
 */
5610
5611
void
5612
43.6k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5613
43.6k
    const xmlChar *name;
5614
43.6k
    xmlChar *Pubid;
5615
43.6k
    xmlChar *Systemid;
5616
5617
43.6k
    if ((CUR != '<') || (NXT(1) != '!'))
5618
0
        return;
5619
43.6k
    SKIP(2);
5620
5621
43.6k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5622
41.8k
  int inputid = ctxt->input->id;
5623
41.8k
  SKIP(8);
5624
41.8k
  if (SKIP_BLANKS_PE == 0) {
5625
1.24k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5626
1.24k
         "Space required after '<!NOTATION'\n");
5627
1.24k
      return;
5628
1.24k
  }
5629
5630
40.5k
        name = xmlParseName(ctxt);
5631
40.5k
  if (name == NULL) {
5632
2.36k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5633
2.36k
      return;
5634
2.36k
  }
5635
38.2k
  if (xmlStrchr(name, ':') != NULL) {
5636
2.66k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5637
2.66k
         "colons are forbidden from notation names '%s'\n",
5638
2.66k
         name, NULL, NULL);
5639
2.66k
  }
5640
38.2k
  if (SKIP_BLANKS_PE == 0) {
5641
1.70k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5642
1.70k
         "Space required after the NOTATION name'\n");
5643
1.70k
      return;
5644
1.70k
  }
5645
5646
  /*
5647
   * Parse the IDs.
5648
   */
5649
36.5k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5650
36.5k
  SKIP_BLANKS_PE;
5651
5652
36.5k
  if (RAW == '>') {
5653
19.6k
      if (inputid != ctxt->input->id) {
5654
424
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5655
424
                         "Notation declaration doesn't start and stop"
5656
424
                               " in the same entity\n");
5657
424
      }
5658
19.6k
      NEXT;
5659
19.6k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5660
19.6k
    (ctxt->sax->notationDecl != NULL))
5661
15.1k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5662
19.6k
  } else {
5663
16.8k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5664
16.8k
  }
5665
36.5k
  if (Systemid != NULL) xmlFree(Systemid);
5666
36.5k
  if (Pubid != NULL) xmlFree(Pubid);
5667
36.5k
    }
5668
43.6k
}
5669
5670
/**
5671
 * xmlParseEntityDecl:
5672
 * @ctxt:  an XML parser context
5673
 *
5674
 * DEPRECATED: Internal function, don't use.
5675
 *
5676
 * Parse an entity declaration. Always consumes '<!'.
5677
 *
5678
 * [70] EntityDecl ::= GEDecl | PEDecl
5679
 *
5680
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5681
 *
5682
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5683
 *
5684
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5685
 *
5686
 * [74] PEDef ::= EntityValue | ExternalID
5687
 *
5688
 * [76] NDataDecl ::= S 'NDATA' S Name
5689
 *
5690
 * [ VC: Notation Declared ]
5691
 * The Name must match the declared name of a notation.
5692
 */
5693
5694
void
5695
413k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5696
413k
    const xmlChar *name = NULL;
5697
413k
    xmlChar *value = NULL;
5698
413k
    xmlChar *URI = NULL, *literal = NULL;
5699
413k
    const xmlChar *ndata = NULL;
5700
413k
    int isParameter = 0;
5701
413k
    xmlChar *orig = NULL;
5702
5703
413k
    if ((CUR != '<') || (NXT(1) != '!'))
5704
0
        return;
5705
413k
    SKIP(2);
5706
5707
    /* GROW; done in the caller */
5708
413k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5709
411k
  int inputid = ctxt->input->id;
5710
411k
  SKIP(6);
5711
411k
  if (SKIP_BLANKS_PE == 0) {
5712
23.1k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5713
23.1k
         "Space required after '<!ENTITY'\n");
5714
23.1k
  }
5715
5716
411k
  if (RAW == '%') {
5717
164k
      NEXT;
5718
164k
      if (SKIP_BLANKS_PE == 0) {
5719
2.83k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5720
2.83k
             "Space required after '%%'\n");
5721
2.83k
      }
5722
164k
      isParameter = 1;
5723
164k
  }
5724
5725
411k
        name = xmlParseName(ctxt);
5726
411k
  if (name == NULL) {
5727
19.2k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5728
19.2k
                     "xmlParseEntityDecl: no name\n");
5729
19.2k
            return;
5730
19.2k
  }
5731
392k
  if (xmlStrchr(name, ':') != NULL) {
5732
3.68k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5733
3.68k
         "colons are forbidden from entities names '%s'\n",
5734
3.68k
         name, NULL, NULL);
5735
3.68k
  }
5736
392k
  if (SKIP_BLANKS_PE == 0) {
5737
20.5k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5738
20.5k
         "Space required after the entity name\n");
5739
20.5k
  }
5740
5741
  /*
5742
   * handle the various case of definitions...
5743
   */
5744
392k
  if (isParameter) {
5745
154k
      if ((RAW == '"') || (RAW == '\'')) {
5746
111k
          value = xmlParseEntityValue(ctxt, &orig);
5747
111k
    if (value) {
5748
110k
        if ((ctxt->sax != NULL) &&
5749
110k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5750
85.3k
      ctxt->sax->entityDecl(ctxt->userData, name,
5751
85.3k
                        XML_INTERNAL_PARAMETER_ENTITY,
5752
85.3k
            NULL, NULL, value);
5753
110k
    }
5754
111k
      } else {
5755
43.0k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5756
43.0k
    if ((URI == NULL) && (literal == NULL)) {
5757
2.05k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5758
2.05k
    }
5759
43.0k
    if (URI) {
5760
40.3k
                    if (xmlStrchr(URI, '#')) {
5761
1.16k
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5762
39.2k
                    } else {
5763
39.2k
                        if ((ctxt->sax != NULL) &&
5764
39.2k
                            (!ctxt->disableSAX) &&
5765
39.2k
                            (ctxt->sax->entityDecl != NULL))
5766
36.0k
                            ctxt->sax->entityDecl(ctxt->userData, name,
5767
36.0k
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5768
36.0k
                                        literal, URI, NULL);
5769
39.2k
                    }
5770
40.3k
    }
5771
43.0k
      }
5772
238k
  } else {
5773
238k
      if ((RAW == '"') || (RAW == '\'')) {
5774
188k
          value = xmlParseEntityValue(ctxt, &orig);
5775
188k
    if ((ctxt->sax != NULL) &&
5776
188k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5777
153k
        ctxt->sax->entityDecl(ctxt->userData, name,
5778
153k
        XML_INTERNAL_GENERAL_ENTITY,
5779
153k
        NULL, NULL, value);
5780
    /*
5781
     * For expat compatibility in SAX mode.
5782
     */
5783
188k
    if ((ctxt->myDoc == NULL) ||
5784
188k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5785
10.7k
        if (ctxt->myDoc == NULL) {
5786
2.84k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5787
2.84k
      if (ctxt->myDoc == NULL) {
5788
7
          xmlErrMemory(ctxt);
5789
7
          goto done;
5790
7
      }
5791
2.83k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5792
2.83k
        }
5793
10.7k
        if (ctxt->myDoc->intSubset == NULL) {
5794
2.83k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5795
2.83k
              BAD_CAST "fake", NULL, NULL);
5796
2.83k
                        if (ctxt->myDoc->intSubset == NULL) {
5797
8
                            xmlErrMemory(ctxt);
5798
8
                            goto done;
5799
8
                        }
5800
2.83k
                    }
5801
5802
10.7k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5803
10.7k
                    NULL, NULL, value);
5804
10.7k
    }
5805
188k
      } else {
5806
49.6k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5807
49.6k
    if ((URI == NULL) && (literal == NULL)) {
5808
4.96k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5809
4.96k
    }
5810
49.6k
    if (URI) {
5811
40.5k
                    if (xmlStrchr(URI, '#')) {
5812
3.23k
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5813
3.23k
                    }
5814
40.5k
    }
5815
49.6k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5816
5.89k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5817
5.89k
           "Space required before 'NDATA'\n");
5818
5.89k
    }
5819
49.6k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5820
12.5k
        SKIP(5);
5821
12.5k
        if (SKIP_BLANKS_PE == 0) {
5822
1.77k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5823
1.77k
               "Space required after 'NDATA'\n");
5824
1.77k
        }
5825
12.5k
        ndata = xmlParseName(ctxt);
5826
12.5k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5827
12.5k
            (ctxt->sax->unparsedEntityDecl != NULL))
5828
10.7k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5829
10.7k
            literal, URI, ndata);
5830
37.1k
    } else {
5831
37.1k
        if ((ctxt->sax != NULL) &&
5832
37.1k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5833
30.0k
      ctxt->sax->entityDecl(ctxt->userData, name,
5834
30.0k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5835
30.0k
            literal, URI, NULL);
5836
        /*
5837
         * For expat compatibility in SAX mode.
5838
         * assuming the entity replacement was asked for
5839
         */
5840
37.1k
        if ((ctxt->replaceEntities != 0) &&
5841
37.1k
      ((ctxt->myDoc == NULL) ||
5842
25.5k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5843
2.17k
      if (ctxt->myDoc == NULL) {
5844
1.05k
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5845
1.05k
          if (ctxt->myDoc == NULL) {
5846
6
              xmlErrMemory(ctxt);
5847
6
        goto done;
5848
6
          }
5849
1.04k
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5850
1.04k
      }
5851
5852
2.16k
      if (ctxt->myDoc->intSubset == NULL) {
5853
1.04k
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5854
1.04k
            BAD_CAST "fake", NULL, NULL);
5855
1.04k
                            if (ctxt->myDoc->intSubset == NULL) {
5856
6
                                xmlErrMemory(ctxt);
5857
6
                                goto done;
5858
6
                            }
5859
1.04k
                        }
5860
2.16k
      xmlSAX2EntityDecl(ctxt, name,
5861
2.16k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5862
2.16k
                  literal, URI, NULL);
5863
2.16k
        }
5864
37.1k
    }
5865
49.6k
      }
5866
238k
  }
5867
392k
  SKIP_BLANKS_PE;
5868
392k
  if (RAW != '>') {
5869
13.7k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5870
13.7k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5871
13.7k
      xmlHaltParser(ctxt);
5872
378k
  } else {
5873
378k
      if (inputid != ctxt->input->id) {
5874
612
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5875
612
                         "Entity declaration doesn't start and stop in"
5876
612
                               " the same entity\n");
5877
612
      }
5878
378k
      NEXT;
5879
378k
  }
5880
392k
  if (orig != NULL) {
5881
      /*
5882
       * Ugly mechanism to save the raw entity value.
5883
       */
5884
297k
      xmlEntityPtr cur = NULL;
5885
5886
297k
      if (isParameter) {
5887
110k
          if ((ctxt->sax != NULL) &&
5888
110k
        (ctxt->sax->getParameterEntity != NULL))
5889
110k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5890
187k
      } else {
5891
187k
          if ((ctxt->sax != NULL) &&
5892
187k
        (ctxt->sax->getEntity != NULL))
5893
186k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5894
187k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5895
20.6k
        cur = xmlSAX2GetEntity(ctxt, name);
5896
20.6k
    }
5897
187k
      }
5898
297k
            if ((cur != NULL) && (cur->orig == NULL)) {
5899
118k
    cur->orig = orig;
5900
118k
                orig = NULL;
5901
118k
      }
5902
297k
  }
5903
5904
392k
done:
5905
392k
  if (value != NULL) xmlFree(value);
5906
392k
  if (URI != NULL) xmlFree(URI);
5907
392k
  if (literal != NULL) xmlFree(literal);
5908
392k
        if (orig != NULL) xmlFree(orig);
5909
392k
    }
5910
413k
}
5911
5912
/**
5913
 * xmlParseDefaultDecl:
5914
 * @ctxt:  an XML parser context
5915
 * @value:  Receive a possible fixed default value for the attribute
5916
 *
5917
 * DEPRECATED: Internal function, don't use.
5918
 *
5919
 * Parse an attribute default declaration
5920
 *
5921
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5922
 *
5923
 * [ VC: Required Attribute ]
5924
 * if the default declaration is the keyword #REQUIRED, then the
5925
 * attribute must be specified for all elements of the type in the
5926
 * attribute-list declaration.
5927
 *
5928
 * [ VC: Attribute Default Legal ]
5929
 * The declared default value must meet the lexical constraints of
5930
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5931
 *
5932
 * [ VC: Fixed Attribute Default ]
5933
 * if an attribute has a default value declared with the #FIXED
5934
 * keyword, instances of that attribute must match the default value.
5935
 *
5936
 * [ WFC: No < in Attribute Values ]
5937
 * handled in xmlParseAttValue()
5938
 *
5939
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5940
 *          or XML_ATTRIBUTE_FIXED.
5941
 */
5942
5943
int
5944
606k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5945
606k
    int val;
5946
606k
    xmlChar *ret;
5947
5948
606k
    *value = NULL;
5949
606k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5950
30.5k
  SKIP(9);
5951
30.5k
  return(XML_ATTRIBUTE_REQUIRED);
5952
30.5k
    }
5953
576k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5954
194k
  SKIP(8);
5955
194k
  return(XML_ATTRIBUTE_IMPLIED);
5956
194k
    }
5957
381k
    val = XML_ATTRIBUTE_NONE;
5958
381k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5959
43.4k
  SKIP(6);
5960
43.4k
  val = XML_ATTRIBUTE_FIXED;
5961
43.4k
  if (SKIP_BLANKS_PE == 0) {
5962
5.14k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5963
5.14k
         "Space required after '#FIXED'\n");
5964
5.14k
  }
5965
43.4k
    }
5966
381k
    ret = xmlParseAttValue(ctxt);
5967
381k
    if (ret == NULL) {
5968
52.3k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5969
52.3k
           "Attribute default value declaration error\n");
5970
52.3k
    } else
5971
328k
        *value = ret;
5972
381k
    return(val);
5973
576k
}
5974
5975
/**
5976
 * xmlParseNotationType:
5977
 * @ctxt:  an XML parser context
5978
 *
5979
 * DEPRECATED: Internal function, don't use.
5980
 *
5981
 * parse an Notation attribute type.
5982
 *
5983
 * Note: the leading 'NOTATION' S part has already being parsed...
5984
 *
5985
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5986
 *
5987
 * [ VC: Notation Attributes ]
5988
 * Values of this type must match one of the notation names included
5989
 * in the declaration; all notation names in the declaration must be declared.
5990
 *
5991
 * Returns: the notation attribute tree built while parsing
5992
 */
5993
5994
xmlEnumerationPtr
5995
12.3k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5996
12.3k
    const xmlChar *name;
5997
12.3k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5998
5999
12.3k
    if (RAW != '(') {
6000
3.07k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
6001
3.07k
  return(NULL);
6002
3.07k
    }
6003
15.5k
    do {
6004
15.5k
        NEXT;
6005
15.5k
  SKIP_BLANKS_PE;
6006
15.5k
        name = xmlParseName(ctxt);
6007
15.5k
  if (name == NULL) {
6008
1.96k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6009
1.96k
         "Name expected in NOTATION declaration\n");
6010
1.96k
            xmlFreeEnumeration(ret);
6011
1.96k
      return(NULL);
6012
1.96k
  }
6013
13.5k
  tmp = ret;
6014
30.6k
  while (tmp != NULL) {
6015
20.0k
      if (xmlStrEqual(name, tmp->name)) {
6016
2.95k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6017
2.95k
    "standalone: attribute notation value token %s duplicated\n",
6018
2.95k
         name, NULL);
6019
2.95k
    if (!xmlDictOwns(ctxt->dict, name))
6020
0
        xmlFree((xmlChar *) name);
6021
2.95k
    break;
6022
2.95k
      }
6023
17.1k
      tmp = tmp->next;
6024
17.1k
  }
6025
13.5k
  if (tmp == NULL) {
6026
10.6k
      cur = xmlCreateEnumeration(name);
6027
10.6k
      if (cur == NULL) {
6028
36
                xmlErrMemory(ctxt);
6029
36
                xmlFreeEnumeration(ret);
6030
36
                return(NULL);
6031
36
            }
6032
10.5k
      if (last == NULL) ret = last = cur;
6033
2.96k
      else {
6034
2.96k
    last->next = cur;
6035
2.96k
    last = cur;
6036
2.96k
      }
6037
10.5k
  }
6038
13.5k
  SKIP_BLANKS_PE;
6039
13.5k
    } while (RAW == '|');
6040
7.30k
    if (RAW != ')') {
6041
1.35k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6042
1.35k
        xmlFreeEnumeration(ret);
6043
1.35k
  return(NULL);
6044
1.35k
    }
6045
5.94k
    NEXT;
6046
5.94k
    return(ret);
6047
7.30k
}
6048
6049
/**
6050
 * xmlParseEnumerationType:
6051
 * @ctxt:  an XML parser context
6052
 *
6053
 * DEPRECATED: Internal function, don't use.
6054
 *
6055
 * parse an Enumeration attribute type.
6056
 *
6057
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6058
 *
6059
 * [ VC: Enumeration ]
6060
 * Values of this type must match one of the Nmtoken tokens in
6061
 * the declaration
6062
 *
6063
 * Returns: the enumeration attribute tree built while parsing
6064
 */
6065
6066
xmlEnumerationPtr
6067
148k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6068
148k
    xmlChar *name;
6069
148k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6070
6071
148k
    if (RAW != '(') {
6072
14.9k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6073
14.9k
  return(NULL);
6074
14.9k
    }
6075
187k
    do {
6076
187k
        NEXT;
6077
187k
  SKIP_BLANKS_PE;
6078
187k
        name = xmlParseNmtoken(ctxt);
6079
187k
  if (name == NULL) {
6080
2.01k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6081
2.01k
      return(ret);
6082
2.01k
  }
6083
185k
  tmp = ret;
6084
318k
  while (tmp != NULL) {
6085
136k
      if (xmlStrEqual(name, tmp->name)) {
6086
3.28k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6087
3.28k
    "standalone: attribute enumeration value token %s duplicated\n",
6088
3.28k
         name, NULL);
6089
3.28k
    if (!xmlDictOwns(ctxt->dict, name))
6090
3.28k
        xmlFree(name);
6091
3.28k
    break;
6092
3.28k
      }
6093
133k
      tmp = tmp->next;
6094
133k
  }
6095
185k
  if (tmp == NULL) {
6096
181k
      cur = xmlCreateEnumeration(name);
6097
181k
      if (!xmlDictOwns(ctxt->dict, name))
6098
181k
    xmlFree(name);
6099
181k
      if (cur == NULL) {
6100
85
                xmlErrMemory(ctxt);
6101
85
                xmlFreeEnumeration(ret);
6102
85
                return(NULL);
6103
85
            }
6104
181k
      if (last == NULL) ret = last = cur;
6105
50.2k
      else {
6106
50.2k
    last->next = cur;
6107
50.2k
    last = cur;
6108
50.2k
      }
6109
181k
  }
6110
185k
  SKIP_BLANKS_PE;
6111
185k
    } while (RAW == '|');
6112
131k
    if (RAW != ')') {
6113
3.18k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6114
3.18k
  return(ret);
6115
3.18k
    }
6116
128k
    NEXT;
6117
128k
    return(ret);
6118
131k
}
6119
6120
/**
6121
 * xmlParseEnumeratedType:
6122
 * @ctxt:  an XML parser context
6123
 * @tree:  the enumeration tree built while parsing
6124
 *
6125
 * DEPRECATED: Internal function, don't use.
6126
 *
6127
 * parse an Enumerated attribute type.
6128
 *
6129
 * [57] EnumeratedType ::= NotationType | Enumeration
6130
 *
6131
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6132
 *
6133
 *
6134
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6135
 */
6136
6137
int
6138
161k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6139
161k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6140
13.4k
  SKIP(8);
6141
13.4k
  if (SKIP_BLANKS_PE == 0) {
6142
1.12k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6143
1.12k
         "Space required after 'NOTATION'\n");
6144
1.12k
      return(0);
6145
1.12k
  }
6146
12.3k
  *tree = xmlParseNotationType(ctxt);
6147
12.3k
  if (*tree == NULL) return(0);
6148
5.94k
  return(XML_ATTRIBUTE_NOTATION);
6149
12.3k
    }
6150
148k
    *tree = xmlParseEnumerationType(ctxt);
6151
148k
    if (*tree == NULL) return(0);
6152
131k
    return(XML_ATTRIBUTE_ENUMERATION);
6153
148k
}
6154
6155
/**
6156
 * xmlParseAttributeType:
6157
 * @ctxt:  an XML parser context
6158
 * @tree:  the enumeration tree built while parsing
6159
 *
6160
 * DEPRECATED: Internal function, don't use.
6161
 *
6162
 * parse the Attribute list def for an element
6163
 *
6164
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6165
 *
6166
 * [55] StringType ::= 'CDATA'
6167
 *
6168
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6169
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6170
 *
6171
 * Validity constraints for attribute values syntax are checked in
6172
 * xmlValidateAttributeValue()
6173
 *
6174
 * [ VC: ID ]
6175
 * Values of type ID must match the Name production. A name must not
6176
 * appear more than once in an XML document as a value of this type;
6177
 * i.e., ID values must uniquely identify the elements which bear them.
6178
 *
6179
 * [ VC: One ID per Element Type ]
6180
 * No element type may have more than one ID attribute specified.
6181
 *
6182
 * [ VC: ID Attribute Default ]
6183
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6184
 *
6185
 * [ VC: IDREF ]
6186
 * Values of type IDREF must match the Name production, and values
6187
 * of type IDREFS must match Names; each IDREF Name must match the value
6188
 * of an ID attribute on some element in the XML document; i.e. IDREF
6189
 * values must match the value of some ID attribute.
6190
 *
6191
 * [ VC: Entity Name ]
6192
 * Values of type ENTITY must match the Name production, values
6193
 * of type ENTITIES must match Names; each Entity Name must match the
6194
 * name of an unparsed entity declared in the DTD.
6195
 *
6196
 * [ VC: Name Token ]
6197
 * Values of type NMTOKEN must match the Nmtoken production; values
6198
 * of type NMTOKENS must match Nmtokens.
6199
 *
6200
 * Returns the attribute type
6201
 */
6202
int
6203
640k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6204
640k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6205
168k
  SKIP(5);
6206
168k
  return(XML_ATTRIBUTE_CDATA);
6207
471k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6208
31.6k
  SKIP(6);
6209
31.6k
  return(XML_ATTRIBUTE_IDREFS);
6210
439k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6211
25.7k
  SKIP(5);
6212
25.7k
  return(XML_ATTRIBUTE_IDREF);
6213
414k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6214
132k
        SKIP(2);
6215
132k
  return(XML_ATTRIBUTE_ID);
6216
281k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6217
32.3k
  SKIP(6);
6218
32.3k
  return(XML_ATTRIBUTE_ENTITY);
6219
249k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6220
21.5k
  SKIP(8);
6221
21.5k
  return(XML_ATTRIBUTE_ENTITIES);
6222
227k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6223
17.3k
  SKIP(8);
6224
17.3k
  return(XML_ATTRIBUTE_NMTOKENS);
6225
210k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6226
48.3k
  SKIP(7);
6227
48.3k
  return(XML_ATTRIBUTE_NMTOKEN);
6228
48.3k
     }
6229
161k
     return(xmlParseEnumeratedType(ctxt, tree));
6230
640k
}
6231
6232
/**
6233
 * xmlParseAttributeListDecl:
6234
 * @ctxt:  an XML parser context
6235
 *
6236
 * DEPRECATED: Internal function, don't use.
6237
 *
6238
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6239
 *
6240
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6241
 *
6242
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6243
 *
6244
 */
6245
void
6246
405k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6247
405k
    const xmlChar *elemName;
6248
405k
    const xmlChar *attrName;
6249
405k
    xmlEnumerationPtr tree;
6250
6251
405k
    if ((CUR != '<') || (NXT(1) != '!'))
6252
0
        return;
6253
405k
    SKIP(2);
6254
6255
405k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6256
403k
  int inputid = ctxt->input->id;
6257
6258
403k
  SKIP(7);
6259
403k
  if (SKIP_BLANKS_PE == 0) {
6260
14.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6261
14.7k
                     "Space required after '<!ATTLIST'\n");
6262
14.7k
  }
6263
403k
        elemName = xmlParseName(ctxt);
6264
403k
  if (elemName == NULL) {
6265
8.46k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6266
8.46k
         "ATTLIST: no name for Element\n");
6267
8.46k
      return;
6268
8.46k
  }
6269
394k
  SKIP_BLANKS_PE;
6270
394k
  GROW;
6271
978k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6272
712k
      int type;
6273
712k
      int def;
6274
712k
      xmlChar *defaultValue = NULL;
6275
6276
712k
      GROW;
6277
712k
            tree = NULL;
6278
712k
      attrName = xmlParseName(ctxt);
6279
712k
      if (attrName == NULL) {
6280
63.2k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6281
63.2k
             "ATTLIST: no name for Attribute\n");
6282
63.2k
    break;
6283
63.2k
      }
6284
648k
      GROW;
6285
648k
      if (SKIP_BLANKS_PE == 0) {
6286
8.53k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6287
8.53k
            "Space required after the attribute name\n");
6288
8.53k
    break;
6289
8.53k
      }
6290
6291
640k
      type = xmlParseAttributeType(ctxt, &tree);
6292
640k
      if (type <= 0) {
6293
24.3k
          break;
6294
24.3k
      }
6295
6296
615k
      GROW;
6297
615k
      if (SKIP_BLANKS_PE == 0) {
6298
9.27k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6299
9.27k
             "Space required after the attribute type\n");
6300
9.27k
          if (tree != NULL)
6301
3.97k
        xmlFreeEnumeration(tree);
6302
9.27k
    break;
6303
9.27k
      }
6304
6305
606k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6306
606k
      if (def <= 0) {
6307
0
                if (defaultValue != NULL)
6308
0
        xmlFree(defaultValue);
6309
0
          if (tree != NULL)
6310
0
        xmlFreeEnumeration(tree);
6311
0
          break;
6312
0
      }
6313
606k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6314
264k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6315
6316
606k
      GROW;
6317
606k
            if (RAW != '>') {
6318
419k
    if (SKIP_BLANKS_PE == 0) {
6319
22.6k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6320
22.6k
      "Space required after the attribute default value\n");
6321
22.6k
        if (defaultValue != NULL)
6322
2.15k
      xmlFree(defaultValue);
6323
22.6k
        if (tree != NULL)
6324
2.42k
      xmlFreeEnumeration(tree);
6325
22.6k
        break;
6326
22.6k
    }
6327
419k
      }
6328
583k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6329
583k
    (ctxt->sax->attributeDecl != NULL))
6330
522k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6331
522k
                          type, def, defaultValue, tree);
6332
61.1k
      else if (tree != NULL)
6333
17.9k
    xmlFreeEnumeration(tree);
6334
6335
583k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6336
583k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6337
583k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6338
282k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6339
282k
      }
6340
583k
      if (ctxt->sax2) {
6341
511k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6342
511k
      }
6343
583k
      if (defaultValue != NULL)
6344
326k
          xmlFree(defaultValue);
6345
583k
      GROW;
6346
583k
  }
6347
394k
  if (RAW == '>') {
6348
273k
      if (inputid != ctxt->input->id) {
6349
2.68k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6350
2.68k
                               "Attribute list declaration doesn't start and"
6351
2.68k
                               " stop in the same entity\n");
6352
2.68k
      }
6353
273k
      NEXT;
6354
273k
  }
6355
394k
    }
6356
405k
}
6357
6358
/**
6359
 * xmlParseElementMixedContentDecl:
6360
 * @ctxt:  an XML parser context
6361
 * @inputchk:  the input used for the current entity, needed for boundary checks
6362
 *
6363
 * DEPRECATED: Internal function, don't use.
6364
 *
6365
 * parse the declaration for a Mixed Element content
6366
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6367
 *
6368
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6369
 *                '(' S? '#PCDATA' S? ')'
6370
 *
6371
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6372
 *
6373
 * [ VC: No Duplicate Types ]
6374
 * The same name must not appear more than once in a single
6375
 * mixed-content declaration.
6376
 *
6377
 * returns: the list of the xmlElementContentPtr describing the element choices
6378
 */
6379
xmlElementContentPtr
6380
61.2k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6381
61.2k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6382
61.2k
    const xmlChar *elem = NULL;
6383
6384
61.2k
    GROW;
6385
61.2k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6386
61.2k
  SKIP(7);
6387
61.2k
  SKIP_BLANKS_PE;
6388
61.2k
  if (RAW == ')') {
6389
36.9k
      if (ctxt->input->id != inputchk) {
6390
276
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6391
276
                               "Element content declaration doesn't start and"
6392
276
                               " stop in the same entity\n");
6393
276
      }
6394
36.9k
      NEXT;
6395
36.9k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6396
36.9k
      if (ret == NULL)
6397
25
                goto mem_error;
6398
36.9k
      if (RAW == '*') {
6399
832
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6400
832
    NEXT;
6401
832
      }
6402
36.9k
      return(ret);
6403
36.9k
  }
6404
24.2k
  if ((RAW == '(') || (RAW == '|')) {
6405
20.3k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6406
20.3k
      if (ret == NULL)
6407
9
                goto mem_error;
6408
20.3k
  }
6409
182k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6410
158k
      NEXT;
6411
158k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6412
158k
            if (n == NULL)
6413
24
                goto mem_error;
6414
158k
      if (elem == NULL) {
6415
20.1k
    n->c1 = cur;
6416
20.1k
    if (cur != NULL)
6417
20.1k
        cur->parent = n;
6418
20.1k
    ret = cur = n;
6419
138k
      } else {
6420
138k
          cur->c2 = n;
6421
138k
    n->parent = cur;
6422
138k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6423
138k
                if (n->c1 == NULL)
6424
25
                    goto mem_error;
6425
138k
    n->c1->parent = n;
6426
138k
    cur = n;
6427
138k
      }
6428
158k
      SKIP_BLANKS_PE;
6429
158k
      elem = xmlParseName(ctxt);
6430
158k
      if (elem == NULL) {
6431
743
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6432
743
      "xmlParseElementMixedContentDecl : Name expected\n");
6433
743
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6434
743
    return(NULL);
6435
743
      }
6436
157k
      SKIP_BLANKS_PE;
6437
157k
      GROW;
6438
157k
  }
6439
23.4k
  if ((RAW == ')') && (NXT(1) == '*')) {
6440
17.6k
      if (elem != NULL) {
6441
17.6k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6442
17.6k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6443
17.6k
    if (cur->c2 == NULL)
6444
17
                    goto mem_error;
6445
17.5k
    cur->c2->parent = cur;
6446
17.5k
            }
6447
17.5k
            if (ret != NULL)
6448
17.5k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6449
17.5k
      if (ctxt->input->id != inputchk) {
6450
7
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6451
7
                               "Element content declaration doesn't start and"
6452
7
                               " stop in the same entity\n");
6453
7
      }
6454
17.5k
      SKIP(2);
6455
17.5k
  } else {
6456
5.87k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6457
5.87k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6458
5.87k
      return(NULL);
6459
5.87k
  }
6460
6461
23.4k
    } else {
6462
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6463
0
    }
6464
17.5k
    return(ret);
6465
6466
100
mem_error:
6467
100
    xmlErrMemory(ctxt);
6468
100
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6469
100
    return(NULL);
6470
61.2k
}
6471
6472
/**
6473
 * xmlParseElementChildrenContentDeclPriv:
6474
 * @ctxt:  an XML parser context
6475
 * @inputchk:  the input used for the current entity, needed for boundary checks
6476
 * @depth: the level of recursion
6477
 *
6478
 * parse the declaration for a Mixed Element content
6479
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6480
 *
6481
 *
6482
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6483
 *
6484
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6485
 *
6486
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6487
 *
6488
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6489
 *
6490
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6491
 * TODO Parameter-entity replacement text must be properly nested
6492
 *  with parenthesized groups. That is to say, if either of the
6493
 *  opening or closing parentheses in a choice, seq, or Mixed
6494
 *  construct is contained in the replacement text for a parameter
6495
 *  entity, both must be contained in the same replacement text. For
6496
 *  interoperability, if a parameter-entity reference appears in a
6497
 *  choice, seq, or Mixed construct, its replacement text should not
6498
 *  be empty, and neither the first nor last non-blank character of
6499
 *  the replacement text should be a connector (| or ,).
6500
 *
6501
 * Returns the tree of xmlElementContentPtr describing the element
6502
 *          hierarchy.
6503
 */
6504
static xmlElementContentPtr
6505
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6506
427k
                                       int depth) {
6507
427k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6508
427k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6509
427k
    const xmlChar *elem;
6510
427k
    xmlChar type = 0;
6511
6512
427k
    if (depth > maxDepth) {
6513
22
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6514
22
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6515
22
                "use XML_PARSE_HUGE\n", depth);
6516
22
  return(NULL);
6517
22
    }
6518
427k
    SKIP_BLANKS_PE;
6519
427k
    GROW;
6520
427k
    if (RAW == '(') {
6521
253k
  int inputid = ctxt->input->id;
6522
6523
        /* Recurse on first child */
6524
253k
  NEXT;
6525
253k
  SKIP_BLANKS_PE;
6526
253k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6527
253k
                                                           depth + 1);
6528
253k
        if (cur == NULL)
6529
214k
            return(NULL);
6530
39.6k
  SKIP_BLANKS_PE;
6531
39.6k
  GROW;
6532
173k
    } else {
6533
173k
  elem = xmlParseName(ctxt);
6534
173k
  if (elem == NULL) {
6535
7.17k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6536
7.17k
      return(NULL);
6537
7.17k
  }
6538
165k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6539
165k
  if (cur == NULL) {
6540
70
      xmlErrMemory(ctxt);
6541
70
      return(NULL);
6542
70
  }
6543
165k
  GROW;
6544
165k
  if (RAW == '?') {
6545
13.3k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6546
13.3k
      NEXT;
6547
152k
  } else if (RAW == '*') {
6548
30.2k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6549
30.2k
      NEXT;
6550
122k
  } else if (RAW == '+') {
6551
14.1k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6552
14.1k
      NEXT;
6553
108k
  } else {
6554
108k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6555
108k
  }
6556
165k
  GROW;
6557
165k
    }
6558
205k
    SKIP_BLANKS_PE;
6559
651k
    while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6560
        /*
6561
   * Each loop we parse one separator and one element.
6562
   */
6563
477k
        if (RAW == ',') {
6564
148k
      if (type == 0) type = CUR;
6565
6566
      /*
6567
       * Detect "Name | Name , Name" error
6568
       */
6569
100k
      else if (type != CUR) {
6570
235
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6571
235
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6572
235
                      type);
6573
235
    if ((last != NULL) && (last != ret))
6574
235
        xmlFreeDocElementContent(ctxt->myDoc, last);
6575
235
    if (ret != NULL)
6576
235
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6577
235
    return(NULL);
6578
235
      }
6579
148k
      NEXT;
6580
6581
148k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6582
148k
      if (op == NULL) {
6583
40
                xmlErrMemory(ctxt);
6584
40
    if ((last != NULL) && (last != ret))
6585
16
        xmlFreeDocElementContent(ctxt->myDoc, last);
6586
40
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6587
40
    return(NULL);
6588
40
      }
6589
148k
      if (last == NULL) {
6590
48.1k
    op->c1 = ret;
6591
48.1k
    if (ret != NULL)
6592
48.1k
        ret->parent = op;
6593
48.1k
    ret = cur = op;
6594
99.9k
      } else {
6595
99.9k
          cur->c2 = op;
6596
99.9k
    if (op != NULL)
6597
99.9k
        op->parent = cur;
6598
99.9k
    op->c1 = last;
6599
99.9k
    if (last != NULL)
6600
99.9k
        last->parent = op;
6601
99.9k
    cur =op;
6602
99.9k
    last = NULL;
6603
99.9k
      }
6604
329k
  } else if (RAW == '|') {
6605
318k
      if (type == 0) type = CUR;
6606
6607
      /*
6608
       * Detect "Name , Name | Name" error
6609
       */
6610
235k
      else if (type != CUR) {
6611
256
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6612
256
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6613
256
          type);
6614
256
    if ((last != NULL) && (last != ret))
6615
256
        xmlFreeDocElementContent(ctxt->myDoc, last);
6616
256
    if (ret != NULL)
6617
256
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6618
256
    return(NULL);
6619
256
      }
6620
318k
      NEXT;
6621
6622
318k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6623
318k
      if (op == NULL) {
6624
48
                xmlErrMemory(ctxt);
6625
48
    if ((last != NULL) && (last != ret))
6626
24
        xmlFreeDocElementContent(ctxt->myDoc, last);
6627
48
    if (ret != NULL)
6628
48
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6629
48
    return(NULL);
6630
48
      }
6631
318k
      if (last == NULL) {
6632
82.7k
    op->c1 = ret;
6633
82.7k
    if (ret != NULL)
6634
82.7k
        ret->parent = op;
6635
82.7k
    ret = cur = op;
6636
235k
      } else {
6637
235k
          cur->c2 = op;
6638
235k
    if (op != NULL)
6639
235k
        op->parent = cur;
6640
235k
    op->c1 = last;
6641
235k
    if (last != NULL)
6642
235k
        last->parent = op;
6643
235k
    cur =op;
6644
235k
    last = NULL;
6645
235k
      }
6646
318k
  } else {
6647
10.7k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6648
10.7k
      if ((last != NULL) && (last != ret))
6649
4.94k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6650
10.7k
      if (ret != NULL)
6651
10.7k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6652
10.7k
      return(NULL);
6653
10.7k
  }
6654
466k
  GROW;
6655
466k
  SKIP_BLANKS_PE;
6656
466k
  GROW;
6657
466k
  if (RAW == '(') {
6658
87.2k
      int inputid = ctxt->input->id;
6659
      /* Recurse on second child */
6660
87.2k
      NEXT;
6661
87.2k
      SKIP_BLANKS_PE;
6662
87.2k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6663
87.2k
                                                          depth + 1);
6664
87.2k
            if (last == NULL) {
6665
17.6k
    if (ret != NULL)
6666
17.6k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6667
17.6k
    return(NULL);
6668
17.6k
            }
6669
69.6k
      SKIP_BLANKS_PE;
6670
378k
  } else {
6671
378k
      elem = xmlParseName(ctxt);
6672
378k
      if (elem == NULL) {
6673
2.20k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6674
2.20k
    if (ret != NULL)
6675
2.20k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6676
2.20k
    return(NULL);
6677
2.20k
      }
6678
376k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6679
376k
      if (last == NULL) {
6680
80
                xmlErrMemory(ctxt);
6681
80
    if (ret != NULL)
6682
80
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6683
80
    return(NULL);
6684
80
      }
6685
376k
      if (RAW == '?') {
6686
58.3k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6687
58.3k
    NEXT;
6688
318k
      } else if (RAW == '*') {
6689
20.9k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6690
20.9k
    NEXT;
6691
297k
      } else if (RAW == '+') {
6692
19.6k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6693
19.6k
    NEXT;
6694
277k
      } else {
6695
277k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6696
277k
      }
6697
376k
  }
6698
446k
  SKIP_BLANKS_PE;
6699
446k
  GROW;
6700
446k
    }
6701
174k
    if ((cur != NULL) && (last != NULL)) {
6702
105k
        cur->c2 = last;
6703
105k
  if (last != NULL)
6704
105k
      last->parent = cur;
6705
105k
    }
6706
174k
    if (ctxt->input->id != inputchk) {
6707
179
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6708
179
                       "Element content declaration doesn't start and stop in"
6709
179
                       " the same entity\n");
6710
179
    }
6711
174k
    NEXT;
6712
174k
    if (RAW == '?') {
6713
17.8k
  if (ret != NULL) {
6714
17.8k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6715
17.8k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6716
4.05k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6717
13.7k
      else
6718
13.7k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6719
17.8k
  }
6720
17.8k
  NEXT;
6721
156k
    } else if (RAW == '*') {
6722
40.3k
  if (ret != NULL) {
6723
40.3k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6724
40.3k
      cur = ret;
6725
      /*
6726
       * Some normalization:
6727
       * (a | b* | c?)* == (a | b | c)*
6728
       */
6729
186k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6730
145k
    if ((cur->c1 != NULL) &&
6731
145k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6732
145k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6733
17.7k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6734
145k
    if ((cur->c2 != NULL) &&
6735
145k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6736
145k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6737
14.1k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6738
145k
    cur = cur->c2;
6739
145k
      }
6740
40.3k
  }
6741
40.3k
  NEXT;
6742
116k
    } else if (RAW == '+') {
6743
44.9k
  if (ret != NULL) {
6744
44.9k
      int found = 0;
6745
6746
44.9k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6747
44.9k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6748
8.13k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6749
36.8k
      else
6750
36.8k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6751
      /*
6752
       * Some normalization:
6753
       * (a | b*)+ == (a | b)*
6754
       * (a | b?)+ == (a | b)*
6755
       */
6756
93.4k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6757
48.5k
    if ((cur->c1 != NULL) &&
6758
48.5k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6759
48.5k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6760
7.69k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6761
7.69k
        found = 1;
6762
7.69k
    }
6763
48.5k
    if ((cur->c2 != NULL) &&
6764
48.5k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6765
48.5k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6766
14.9k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6767
14.9k
        found = 1;
6768
14.9k
    }
6769
48.5k
    cur = cur->c2;
6770
48.5k
      }
6771
44.9k
      if (found)
6772
16.3k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6773
44.9k
  }
6774
44.9k
  NEXT;
6775
44.9k
    }
6776
174k
    return(ret);
6777
205k
}
6778
6779
/**
6780
 * xmlParseElementChildrenContentDecl:
6781
 * @ctxt:  an XML parser context
6782
 * @inputchk:  the input used for the current entity, needed for boundary checks
6783
 *
6784
 * DEPRECATED: Internal function, don't use.
6785
 *
6786
 * parse the declaration for a Mixed Element content
6787
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6788
 *
6789
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6790
 *
6791
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6792
 *
6793
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6794
 *
6795
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6796
 *
6797
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6798
 * TODO Parameter-entity replacement text must be properly nested
6799
 *  with parenthesized groups. That is to say, if either of the
6800
 *  opening or closing parentheses in a choice, seq, or Mixed
6801
 *  construct is contained in the replacement text for a parameter
6802
 *  entity, both must be contained in the same replacement text. For
6803
 *  interoperability, if a parameter-entity reference appears in a
6804
 *  choice, seq, or Mixed construct, its replacement text should not
6805
 *  be empty, and neither the first nor last non-blank character of
6806
 *  the replacement text should be a connector (| or ,).
6807
 *
6808
 * Returns the tree of xmlElementContentPtr describing the element
6809
 *          hierarchy.
6810
 */
6811
xmlElementContentPtr
6812
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6813
    /* stub left for API/ABI compat */
6814
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6815
0
}
6816
6817
/**
6818
 * xmlParseElementContentDecl:
6819
 * @ctxt:  an XML parser context
6820
 * @name:  the name of the element being defined.
6821
 * @result:  the Element Content pointer will be stored here if any
6822
 *
6823
 * DEPRECATED: Internal function, don't use.
6824
 *
6825
 * parse the declaration for an Element content either Mixed or Children,
6826
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6827
 *
6828
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6829
 *
6830
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6831
 */
6832
6833
int
6834
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6835
147k
                           xmlElementContentPtr *result) {
6836
6837
147k
    xmlElementContentPtr tree = NULL;
6838
147k
    int inputid = ctxt->input->id;
6839
147k
    int res;
6840
6841
147k
    *result = NULL;
6842
6843
147k
    if (RAW != '(') {
6844
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6845
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6846
0
  return(-1);
6847
0
    }
6848
147k
    NEXT;
6849
147k
    GROW;
6850
147k
    SKIP_BLANKS_PE;
6851
147k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6852
61.2k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6853
61.2k
  res = XML_ELEMENT_TYPE_MIXED;
6854
85.8k
    } else {
6855
85.8k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6856
85.8k
  res = XML_ELEMENT_TYPE_ELEMENT;
6857
85.8k
    }
6858
147k
    SKIP_BLANKS_PE;
6859
147k
    *result = tree;
6860
147k
    return(res);
6861
147k
}
6862
6863
/**
6864
 * xmlParseElementDecl:
6865
 * @ctxt:  an XML parser context
6866
 *
6867
 * DEPRECATED: Internal function, don't use.
6868
 *
6869
 * Parse an element declaration. Always consumes '<!'.
6870
 *
6871
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6872
 *
6873
 * [ VC: Unique Element Type Declaration ]
6874
 * No element type may be declared more than once
6875
 *
6876
 * Returns the type of the element, or -1 in case of error
6877
 */
6878
int
6879
216k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6880
216k
    const xmlChar *name;
6881
216k
    int ret = -1;
6882
216k
    xmlElementContentPtr content  = NULL;
6883
6884
216k
    if ((CUR != '<') || (NXT(1) != '!'))
6885
0
        return(ret);
6886
216k
    SKIP(2);
6887
6888
    /* GROW; done in the caller */
6889
216k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6890
214k
  int inputid = ctxt->input->id;
6891
6892
214k
  SKIP(7);
6893
214k
  if (SKIP_BLANKS_PE == 0) {
6894
2.40k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6895
2.40k
               "Space required after 'ELEMENT'\n");
6896
2.40k
      return(-1);
6897
2.40k
  }
6898
212k
        name = xmlParseName(ctxt);
6899
212k
  if (name == NULL) {
6900
4.20k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6901
4.20k
         "xmlParseElementDecl: no name for Element\n");
6902
4.20k
      return(-1);
6903
4.20k
  }
6904
208k
  if (SKIP_BLANKS_PE == 0) {
6905
19.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6906
19.7k
         "Space required after the element name\n");
6907
19.7k
  }
6908
208k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6909
43.0k
      SKIP(5);
6910
      /*
6911
       * Element must always be empty.
6912
       */
6913
43.0k
      ret = XML_ELEMENT_TYPE_EMPTY;
6914
165k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6915
165k
             (NXT(2) == 'Y')) {
6916
9.57k
      SKIP(3);
6917
      /*
6918
       * Element is a generic container.
6919
       */
6920
9.57k
      ret = XML_ELEMENT_TYPE_ANY;
6921
155k
  } else if (RAW == '(') {
6922
147k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6923
147k
  } else {
6924
      /*
6925
       * [ WFC: PEs in Internal Subset ] error handling.
6926
       */
6927
8.35k
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6928
8.35k
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6929
8.35k
      return(-1);
6930
8.35k
  }
6931
6932
199k
  SKIP_BLANKS_PE;
6933
6934
199k
  if (RAW != '>') {
6935
29.0k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6936
29.0k
      if (content != NULL) {
6937
3.80k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6938
3.80k
      }
6939
170k
  } else {
6940
170k
      if (inputid != ctxt->input->id) {
6941
453
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6942
453
                               "Element declaration doesn't start and stop in"
6943
453
                               " the same entity\n");
6944
453
      }
6945
6946
170k
      NEXT;
6947
170k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6948
170k
    (ctxt->sax->elementDecl != NULL)) {
6949
152k
    if (content != NULL)
6950
103k
        content->parent = NULL;
6951
152k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6952
152k
                           content);
6953
152k
    if ((content != NULL) && (content->parent == NULL)) {
6954
        /*
6955
         * this is a trick: if xmlAddElementDecl is called,
6956
         * instead of copying the full tree it is plugged directly
6957
         * if called from the parser. Avoid duplicating the
6958
         * interfaces or change the API/ABI
6959
         */
6960
26.0k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6961
26.0k
    }
6962
152k
      } else if (content != NULL) {
6963
12.5k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6964
12.5k
      }
6965
170k
  }
6966
199k
    }
6967
201k
    return(ret);
6968
216k
}
6969
6970
/**
6971
 * xmlParseConditionalSections
6972
 * @ctxt:  an XML parser context
6973
 *
6974
 * Parse a conditional section. Always consumes '<!['.
6975
 *
6976
 * [61] conditionalSect ::= includeSect | ignoreSect
6977
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6978
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6979
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6980
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6981
 */
6982
6983
static void
6984
19.1k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6985
19.1k
    int *inputIds = NULL;
6986
19.1k
    size_t inputIdsSize = 0;
6987
19.1k
    size_t depth = 0;
6988
6989
53.6k
    while (PARSER_STOPPED(ctxt) == 0) {
6990
53.5k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6991
30.2k
            int id = ctxt->input->id;
6992
6993
30.2k
            SKIP(3);
6994
30.2k
            SKIP_BLANKS_PE;
6995
6996
30.2k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6997
18.7k
                SKIP(7);
6998
18.7k
                SKIP_BLANKS_PE;
6999
18.7k
                if (RAW != '[') {
7000
18
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7001
18
                    xmlHaltParser(ctxt);
7002
18
                    goto error;
7003
18
                }
7004
18.7k
                if (ctxt->input->id != id) {
7005
5
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7006
5
                                   "All markup of the conditional section is"
7007
5
                                   " not in the same entity\n");
7008
5
                }
7009
18.7k
                NEXT;
7010
7011
18.7k
                if (inputIdsSize <= depth) {
7012
11.4k
                    int *tmp;
7013
7014
11.4k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
7015
11.4k
                    tmp = (int *) xmlRealloc(inputIds,
7016
11.4k
                            inputIdsSize * sizeof(int));
7017
11.4k
                    if (tmp == NULL) {
7018
28
                        xmlErrMemory(ctxt);
7019
28
                        goto error;
7020
28
                    }
7021
11.3k
                    inputIds = tmp;
7022
11.3k
                }
7023
18.7k
                inputIds[depth] = id;
7024
18.7k
                depth++;
7025
18.7k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7026
11.2k
                size_t ignoreDepth = 0;
7027
7028
11.2k
                SKIP(6);
7029
11.2k
                SKIP_BLANKS_PE;
7030
11.2k
                if (RAW != '[') {
7031
32
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7032
32
                    xmlHaltParser(ctxt);
7033
32
                    goto error;
7034
32
                }
7035
11.2k
                if (ctxt->input->id != id) {
7036
151
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7037
151
                                   "All markup of the conditional section is"
7038
151
                                   " not in the same entity\n");
7039
151
                }
7040
11.2k
                NEXT;
7041
7042
3.35M
                while (PARSER_STOPPED(ctxt) == 0) {
7043
3.35M
                    if (RAW == 0) {
7044
5.62k
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7045
5.62k
                        goto error;
7046
5.62k
                    }
7047
3.35M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7048
22.4k
                        SKIP(3);
7049
22.4k
                        ignoreDepth++;
7050
                        /* Check for integer overflow */
7051
22.4k
                        if (ignoreDepth == 0) {
7052
0
                            xmlErrMemory(ctxt);
7053
0
                            goto error;
7054
0
                        }
7055
3.33M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
7056
3.33M
                               (NXT(2) == '>')) {
7057
17.5k
                        SKIP(3);
7058
17.5k
                        if (ignoreDepth == 0)
7059
5.55k
                            break;
7060
11.9k
                        ignoreDepth--;
7061
3.31M
                    } else {
7062
3.31M
                        NEXT;
7063
3.31M
                    }
7064
3.35M
                }
7065
7066
5.58k
                if (ctxt->input->id != id) {
7067
138
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7068
138
                                   "All markup of the conditional section is"
7069
138
                                   " not in the same entity\n");
7070
138
                }
7071
5.58k
            } else {
7072
224
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7073
224
                xmlHaltParser(ctxt);
7074
224
                goto error;
7075
224
            }
7076
30.2k
        } else if ((depth > 0) &&
7077
23.3k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7078
12.4k
            depth--;
7079
12.4k
            if (ctxt->input->id != inputIds[depth]) {
7080
49
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7081
49
                               "All markup of the conditional section is not"
7082
49
                               " in the same entity\n");
7083
49
            }
7084
12.4k
            SKIP(3);
7085
12.4k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7086
10.5k
            xmlParseMarkupDecl(ctxt);
7087
10.5k
        } else {
7088
341
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7089
341
            xmlHaltParser(ctxt);
7090
341
            goto error;
7091
341
        }
7092
7093
47.2k
        if (depth == 0)
7094
12.8k
            break;
7095
7096
34.4k
        SKIP_BLANKS_PE;
7097
34.4k
        SHRINK;
7098
34.4k
        GROW;
7099
34.4k
    }
7100
7101
19.1k
error:
7102
19.1k
    xmlFree(inputIds);
7103
19.1k
}
7104
7105
/**
7106
 * xmlParseMarkupDecl:
7107
 * @ctxt:  an XML parser context
7108
 *
7109
 * DEPRECATED: Internal function, don't use.
7110
 *
7111
 * Parse markup declarations. Always consumes '<!' or '<?'.
7112
 *
7113
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7114
 *                     NotationDecl | PI | Comment
7115
 *
7116
 * [ VC: Proper Declaration/PE Nesting ]
7117
 * Parameter-entity replacement text must be properly nested with
7118
 * markup declarations. That is to say, if either the first character
7119
 * or the last character of a markup declaration (markupdecl above) is
7120
 * contained in the replacement text for a parameter-entity reference,
7121
 * both must be contained in the same replacement text.
7122
 *
7123
 * [ WFC: PEs in Internal Subset ]
7124
 * In the internal DTD subset, parameter-entity references can occur
7125
 * only where markup declarations can occur, not within markup declarations.
7126
 * (This does not apply to references that occur in external parameter
7127
 * entities or to the external subset.)
7128
 */
7129
void
7130
3.89M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7131
3.89M
    GROW;
7132
3.89M
    if (CUR == '<') {
7133
3.89M
        if (NXT(1) == '!') {
7134
3.84M
      switch (NXT(2)) {
7135
629k
          case 'E':
7136
629k
        if (NXT(3) == 'L')
7137
216k
      xmlParseElementDecl(ctxt);
7138
413k
        else if (NXT(3) == 'N')
7139
413k
      xmlParseEntityDecl(ctxt);
7140
454
                    else
7141
454
                        SKIP(2);
7142
629k
        break;
7143
405k
          case 'A':
7144
405k
        xmlParseAttributeListDecl(ctxt);
7145
405k
        break;
7146
43.6k
          case 'N':
7147
43.6k
        xmlParseNotationDecl(ctxt);
7148
43.6k
        break;
7149
2.71M
          case '-':
7150
2.71M
        xmlParseComment(ctxt);
7151
2.71M
        break;
7152
47.7k
    default:
7153
        /* there is an error but it will be detected later */
7154
47.7k
                    SKIP(2);
7155
47.7k
        break;
7156
3.84M
      }
7157
3.84M
  } else if (NXT(1) == '?') {
7158
55.9k
      xmlParsePI(ctxt);
7159
55.9k
  }
7160
3.89M
    }
7161
3.89M
}
7162
7163
/**
7164
 * xmlParseTextDecl:
7165
 * @ctxt:  an XML parser context
7166
 *
7167
 * DEPRECATED: Internal function, don't use.
7168
 *
7169
 * parse an XML declaration header for external entities
7170
 *
7171
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7172
 */
7173
7174
void
7175
94.7k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7176
94.7k
    xmlChar *version;
7177
7178
    /*
7179
     * We know that '<?xml' is here.
7180
     */
7181
94.7k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7182
94.7k
  SKIP(5);
7183
94.7k
    } else {
7184
34
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7185
34
  return;
7186
34
    }
7187
7188
94.7k
    if (SKIP_BLANKS == 0) {
7189
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7190
0
           "Space needed after '<?xml'\n");
7191
0
    }
7192
7193
    /*
7194
     * We may have the VersionInfo here.
7195
     */
7196
94.7k
    version = xmlParseVersionInfo(ctxt);
7197
94.7k
    if (version == NULL) {
7198
69.7k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7199
69.7k
        if (version == NULL) {
7200
68
            xmlErrMemory(ctxt);
7201
68
            return;
7202
68
        }
7203
69.7k
    } else {
7204
24.9k
  if (SKIP_BLANKS == 0) {
7205
5.00k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7206
5.00k
               "Space needed here\n");
7207
5.00k
  }
7208
24.9k
    }
7209
94.6k
    ctxt->input->version = version;
7210
7211
    /*
7212
     * We must have the encoding declaration
7213
     */
7214
94.6k
    xmlParseEncodingDecl(ctxt);
7215
7216
94.6k
    SKIP_BLANKS;
7217
94.6k
    if ((RAW == '?') && (NXT(1) == '>')) {
7218
6.66k
        SKIP(2);
7219
88.0k
    } else if (RAW == '>') {
7220
        /* Deprecated old WD ... */
7221
782
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7222
782
  NEXT;
7223
87.2k
    } else {
7224
87.2k
        int c;
7225
7226
87.2k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7227
40.0M
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7228
40.0M
            NEXT;
7229
40.0M
            if (c == '>')
7230
46.1k
                break;
7231
40.0M
        }
7232
87.2k
    }
7233
94.6k
}
7234
7235
/**
7236
 * xmlParseExternalSubset:
7237
 * @ctxt:  an XML parser context
7238
 * @ExternalID: the external identifier
7239
 * @SystemID: the system identifier (or URL)
7240
 *
7241
 * parse Markup declarations from an external subset
7242
 *
7243
 * [30] extSubset ::= textDecl? extSubsetDecl
7244
 *
7245
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7246
 */
7247
void
7248
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7249
10.6k
                       const xmlChar *SystemID) {
7250
10.6k
    int oldInputNr;
7251
7252
10.6k
    xmlCtxtInitializeLate(ctxt);
7253
7254
10.6k
    xmlDetectEncoding(ctxt);
7255
7256
10.6k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7257
555
  xmlParseTextDecl(ctxt);
7258
555
    }
7259
10.6k
    if (ctxt->myDoc == NULL) {
7260
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7261
0
  if (ctxt->myDoc == NULL) {
7262
0
      xmlErrMemory(ctxt);
7263
0
      return;
7264
0
  }
7265
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7266
0
    }
7267
10.6k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7268
10.6k
        (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7269
12
        xmlErrMemory(ctxt);
7270
12
    }
7271
7272
10.6k
    ctxt->inSubset = 2;
7273
10.6k
    oldInputNr = ctxt->inputNr;
7274
7275
10.6k
    SKIP_BLANKS_PE;
7276
889k
    while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7277
889k
           (!PARSER_STOPPED(ctxt))) {
7278
883k
  GROW;
7279
883k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7280
4.72k
            xmlParseConditionalSections(ctxt);
7281
878k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7282
874k
            xmlParseMarkupDecl(ctxt);
7283
874k
        } else {
7284
3.98k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7285
3.98k
            xmlHaltParser(ctxt);
7286
3.98k
            return;
7287
3.98k
        }
7288
879k
        SKIP_BLANKS_PE;
7289
879k
        SHRINK;
7290
879k
    }
7291
7292
7.23k
    while (ctxt->inputNr > oldInputNr)
7293
599
        xmlPopPE(ctxt);
7294
7295
6.63k
    if (RAW != 0) {
7296
1.39k
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7297
1.39k
    }
7298
6.63k
}
7299
7300
/**
7301
 * xmlParseReference:
7302
 * @ctxt:  an XML parser context
7303
 *
7304
 * DEPRECATED: Internal function, don't use.
7305
 *
7306
 * parse and handle entity references in content, depending on the SAX
7307
 * interface, this may end-up in a call to character() if this is a
7308
 * CharRef, a predefined entity, if there is no reference() callback.
7309
 * or if the parser was asked to switch to that mode.
7310
 *
7311
 * Always consumes '&'.
7312
 *
7313
 * [67] Reference ::= EntityRef | CharRef
7314
 */
7315
void
7316
2.74M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7317
2.74M
    xmlEntityPtr ent = NULL;
7318
2.74M
    const xmlChar *name;
7319
2.74M
    xmlChar *val;
7320
7321
2.74M
    if (RAW != '&')
7322
0
        return;
7323
7324
    /*
7325
     * Simple case of a CharRef
7326
     */
7327
2.74M
    if (NXT(1) == '#') {
7328
752k
  int i = 0;
7329
752k
  xmlChar out[16];
7330
752k
  int value = xmlParseCharRef(ctxt);
7331
7332
752k
  if (value == 0)
7333
291k
      return;
7334
7335
        /*
7336
         * Just encode the value in UTF-8
7337
         */
7338
460k
        COPY_BUF(out, i, value);
7339
460k
        out[i] = 0;
7340
460k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7341
460k
            (!ctxt->disableSAX))
7342
413k
            ctxt->sax->characters(ctxt->userData, out, i);
7343
460k
  return;
7344
752k
    }
7345
7346
    /*
7347
     * We are seeing an entity reference
7348
     */
7349
1.99M
    name = xmlParseEntityRefInternal(ctxt);
7350
1.99M
    if (name == NULL)
7351
908k
        return;
7352
1.08M
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7353
1.08M
    if (ent == NULL) {
7354
        /*
7355
         * Create a reference for undeclared entities.
7356
         */
7357
583k
        if ((ctxt->replaceEntities == 0) &&
7358
583k
            (ctxt->sax != NULL) &&
7359
583k
            (ctxt->disableSAX == 0) &&
7360
583k
            (ctxt->sax->reference != NULL)) {
7361
306k
            ctxt->sax->reference(ctxt->userData, name);
7362
306k
        }
7363
583k
        return;
7364
583k
    }
7365
499k
    if (!ctxt->wellFormed)
7366
344k
  return;
7367
7368
    /* special case of predefined entities */
7369
155k
    if ((ent->name == NULL) ||
7370
155k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7371
40.0k
  val = ent->content;
7372
40.0k
  if (val == NULL) return;
7373
  /*
7374
   * inline the entity.
7375
   */
7376
40.0k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7377
40.0k
      (!ctxt->disableSAX))
7378
40.0k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7379
40.0k
  return;
7380
40.0k
    }
7381
7382
    /*
7383
     * The first reference to the entity trigger a parsing phase
7384
     * where the ent->children is filled with the result from
7385
     * the parsing.
7386
     * Note: external parsed entities will not be loaded, it is not
7387
     * required for a non-validating parser, unless the parsing option
7388
     * of validating, or substituting entities were given. Doing so is
7389
     * far more secure as the parser will only process data coming from
7390
     * the document entity by default.
7391
     *
7392
     * FIXME: This doesn't work correctly since entities can be
7393
     * expanded with different namespace declarations in scope.
7394
     * For example:
7395
     *
7396
     * <!DOCTYPE doc [
7397
     *   <!ENTITY ent "<ns:elem/>">
7398
     * ]>
7399
     * <doc>
7400
     *   <decl1 xmlns:ns="urn:ns1">
7401
     *     &ent;
7402
     *   </decl1>
7403
     *   <decl2 xmlns:ns="urn:ns2">
7404
     *     &ent;
7405
     *   </decl2>
7406
     * </doc>
7407
     *
7408
     * Proposed fix:
7409
     *
7410
     * - Ignore current namespace declarations when parsing the
7411
     *   entity. If a prefix can't be resolved, don't report an error
7412
     *   but mark it as unresolved.
7413
     * - Try to resolve these prefixes when expanding the entity.
7414
     *   This will require a specialized version of xmlStaticCopyNode
7415
     *   which can also make use of the namespace hash table to avoid
7416
     *   quadratic behavior.
7417
     *
7418
     * Alternatively, we could simply reparse the entity on each
7419
     * expansion like we already do with custom SAX callbacks.
7420
     * External entity content should be cached in this case.
7421
     */
7422
115k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7423
115k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7424
34.1k
         ((ctxt->replaceEntities) ||
7425
112k
          (ctxt->validate)))) {
7426
112k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7427
23.4k
            xmlCtxtParseEntity(ctxt, ent);
7428
89.0k
        } else if (ent->children == NULL) {
7429
            /*
7430
             * Probably running in SAX mode and the callbacks don't
7431
             * build the entity content. Parse the entity again.
7432
             *
7433
             * This will also be triggered in normal tree builder mode
7434
             * if an entity happens to be empty, causing unnecessary
7435
             * reloads. It's hard to come up with a reliable check in
7436
             * which mode we're running.
7437
             */
7438
46.6k
            xmlCtxtParseEntity(ctxt, ent);
7439
46.6k
        }
7440
112k
    }
7441
7442
    /*
7443
     * We also check for amplification if entities aren't substituted.
7444
     * They might be expanded later.
7445
     */
7446
115k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7447
435
        return;
7448
7449
114k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7450
7.76k
        return;
7451
7452
107k
    if (ctxt->replaceEntities == 0) {
7453
  /*
7454
   * Create a reference
7455
   */
7456
21.6k
        if (ctxt->sax->reference != NULL)
7457
21.6k
      ctxt->sax->reference(ctxt->userData, ent->name);
7458
85.4k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7459
39.4k
        xmlNodePtr copy, cur;
7460
7461
        /*
7462
         * Seems we are generating the DOM content, copy the tree
7463
   */
7464
39.4k
        cur = ent->children;
7465
7466
        /*
7467
         * Handle first text node with SAX to coalesce text efficiently
7468
         */
7469
39.4k
        if ((cur->type == XML_TEXT_NODE) ||
7470
39.4k
            (cur->type == XML_CDATA_SECTION_NODE)) {
7471
30.8k
            int len = xmlStrlen(cur->content);
7472
7473
30.8k
            if ((cur->type == XML_TEXT_NODE) ||
7474
30.8k
                (ctxt->sax->cdataBlock == NULL)) {
7475
29.9k
                if (ctxt->sax->characters != NULL)
7476
29.9k
                    ctxt->sax->characters(ctxt, cur->content, len);
7477
29.9k
            } else {
7478
892
                if (ctxt->sax->cdataBlock != NULL)
7479
892
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7480
892
            }
7481
7482
30.8k
            cur = cur->next;
7483
30.8k
        }
7484
7485
149k
        while (cur != NULL) {
7486
115k
            xmlNodePtr last;
7487
7488
            /*
7489
             * Handle last text node with SAX to coalesce text efficiently
7490
             */
7491
115k
            if ((cur->next == NULL) &&
7492
115k
                ((cur->type == XML_TEXT_NODE) ||
7493
21.4k
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7494
5.23k
                int len = xmlStrlen(cur->content);
7495
7496
5.23k
                if ((cur->type == XML_TEXT_NODE) ||
7497
5.23k
                    (ctxt->sax->cdataBlock == NULL)) {
7498
4.62k
                    if (ctxt->sax->characters != NULL)
7499
4.62k
                        ctxt->sax->characters(ctxt, cur->content, len);
7500
4.62k
                } else {
7501
609
                    if (ctxt->sax->cdataBlock != NULL)
7502
609
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7503
609
                }
7504
7505
5.23k
                break;
7506
5.23k
            }
7507
7508
            /*
7509
             * Reset coalesce buffer stats only for non-text nodes.
7510
             */
7511
110k
            ctxt->nodemem = 0;
7512
110k
            ctxt->nodelen = 0;
7513
7514
110k
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7515
7516
110k
            if (copy == NULL) {
7517
739
                xmlErrMemory(ctxt);
7518
739
                break;
7519
739
            }
7520
7521
109k
            if (ctxt->parseMode == XML_PARSE_READER) {
7522
                /* Needed for reader */
7523
22.7k
                copy->extra = cur->extra;
7524
                /* Maybe needed for reader */
7525
22.7k
                copy->_private = cur->_private;
7526
22.7k
            }
7527
7528
109k
            copy->parent = ctxt->node;
7529
109k
            last = ctxt->node->last;
7530
109k
            if (last == NULL) {
7531
2.50k
                ctxt->node->children = copy;
7532
107k
            } else {
7533
107k
                last->next = copy;
7534
107k
                copy->prev = last;
7535
107k
            }
7536
109k
            ctxt->node->last = copy;
7537
7538
109k
            cur = cur->next;
7539
109k
        }
7540
39.4k
    }
7541
107k
}
7542
7543
static void
7544
2.19M
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7545
    /*
7546
     * [ WFC: Entity Declared ]
7547
     * In a document without any DTD, a document with only an
7548
     * internal DTD subset which contains no parameter entity
7549
     * references, or a document with "standalone='yes'", the
7550
     * Name given in the entity reference must match that in an
7551
     * entity declaration, except that well-formed documents
7552
     * need not declare any of the following entities: amp, lt,
7553
     * gt, apos, quot.
7554
     * The declaration of a parameter entity must precede any
7555
     * reference to it.
7556
     * Similarly, the declaration of a general entity must
7557
     * precede any reference to it which appears in a default
7558
     * value in an attribute-list declaration. Note that if
7559
     * entities are declared in the external subset or in
7560
     * external parameter entities, a non-validating processor
7561
     * is not obligated to read and process their declarations;
7562
     * for such documents, the rule that an entity must be
7563
     * declared is a well-formedness constraint only if
7564
     * standalone='yes'.
7565
     */
7566
2.19M
    if ((ctxt->standalone == 1) ||
7567
2.19M
        ((ctxt->hasExternalSubset == 0) &&
7568
2.16M
         (ctxt->hasPErefs == 0))) {
7569
1.27M
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7570
1.27M
                          "Entity '%s' not defined\n", name);
7571
1.27M
    } else if (ctxt->validate) {
7572
        /*
7573
         * [ VC: Entity Declared ]
7574
         * In a document with an external subset or external
7575
         * parameter entities with "standalone='no'", ...
7576
         * ... The declaration of a parameter entity must
7577
         * precede any reference to it...
7578
         */
7579
158k
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7580
158k
                         "Entity '%s' not defined\n", name, NULL);
7581
758k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7582
758k
               ((ctxt->replaceEntities) &&
7583
741k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7584
        /*
7585
         * Also raise a non-fatal error
7586
         *
7587
         * - if the external subset is loaded and all entity declarations
7588
         *   should be available, or
7589
         * - entity substition was requested without restricting
7590
         *   external entity access.
7591
         */
7592
741k
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7593
741k
                     "Entity '%s' not defined\n", name);
7594
741k
    } else {
7595
16.8k
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7596
16.8k
                      "Entity '%s' not defined\n", name, NULL);
7597
16.8k
    }
7598
7599
2.19M
    ctxt->valid = 0;
7600
2.19M
}
7601
7602
static xmlEntityPtr
7603
13.6M
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7604
13.6M
    xmlEntityPtr ent;
7605
7606
    /*
7607
     * Predefined entities override any extra definition
7608
     */
7609
13.6M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7610
11.0M
        ent = xmlGetPredefinedEntity(name);
7611
11.0M
        if (ent != NULL)
7612
3.04M
            return(ent);
7613
11.0M
    }
7614
7615
    /*
7616
     * Ask first SAX for entity resolution, otherwise try the
7617
     * entities which may have stored in the parser context.
7618
     */
7619
10.5M
    if (ctxt->sax != NULL) {
7620
10.5M
  if (ctxt->sax->getEntity != NULL)
7621
10.5M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7622
10.5M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7623
10.5M
      (ctxt->options & XML_PARSE_OLDSAX))
7624
67.5k
      ent = xmlGetPredefinedEntity(name);
7625
10.5M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7626
10.5M
      (ctxt->userData==ctxt)) {
7627
314k
      ent = xmlSAX2GetEntity(ctxt, name);
7628
314k
  }
7629
10.5M
    }
7630
7631
10.5M
    if (ent == NULL) {
7632
1.45M
        xmlHandleUndeclaredEntity(ctxt, name);
7633
1.45M
    }
7634
7635
    /*
7636
     * [ WFC: Parsed Entity ]
7637
     * An entity reference must not contain the name of an
7638
     * unparsed entity
7639
     */
7640
9.12M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7641
1.44k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7642
1.44k
     "Entity reference to unparsed entity %s\n", name);
7643
1.44k
        ent = NULL;
7644
1.44k
    }
7645
7646
    /*
7647
     * [ WFC: No External Entity References ]
7648
     * Attribute values cannot contain direct or indirect
7649
     * entity references to external entities.
7650
     */
7651
9.12M
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7652
213k
        if (inAttr) {
7653
67.5k
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7654
67.5k
                 "Attribute references external entity '%s'\n", name);
7655
67.5k
            ent = NULL;
7656
67.5k
        }
7657
213k
    }
7658
7659
10.5M
    return(ent);
7660
13.6M
}
7661
7662
/**
7663
 * xmlParseEntityRefInternal:
7664
 * @ctxt:  an XML parser context
7665
 * @inAttr:  whether we are in an attribute value
7666
 *
7667
 * Parse an entity reference. Always consumes '&'.
7668
 *
7669
 * [68] EntityRef ::= '&' Name ';'
7670
 *
7671
 * Returns the name, or NULL in case of error.
7672
 */
7673
static const xmlChar *
7674
6.50M
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7675
6.50M
    const xmlChar *name;
7676
7677
6.50M
    GROW;
7678
7679
6.50M
    if (RAW != '&')
7680
0
        return(NULL);
7681
6.50M
    NEXT;
7682
6.50M
    name = xmlParseName(ctxt);
7683
6.50M
    if (name == NULL) {
7684
902k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7685
902k
           "xmlParseEntityRef: no name\n");
7686
902k
        return(NULL);
7687
902k
    }
7688
5.59M
    if (RAW != ';') {
7689
514k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7690
514k
  return(NULL);
7691
514k
    }
7692
5.08M
    NEXT;
7693
7694
5.08M
    return(name);
7695
5.59M
}
7696
7697
/**
7698
 * xmlParseEntityRef:
7699
 * @ctxt:  an XML parser context
7700
 *
7701
 * DEPRECATED: Internal function, don't use.
7702
 *
7703
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7704
 */
7705
xmlEntityPtr
7706
0
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7707
0
    const xmlChar *name;
7708
7709
0
    if (ctxt == NULL)
7710
0
        return(NULL);
7711
7712
0
    name = xmlParseEntityRefInternal(ctxt);
7713
0
    if (name == NULL)
7714
0
        return(NULL);
7715
7716
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7717
0
}
7718
7719
/**
7720
 * xmlParseStringEntityRef:
7721
 * @ctxt:  an XML parser context
7722
 * @str:  a pointer to an index in the string
7723
 *
7724
 * parse ENTITY references declarations, but this version parses it from
7725
 * a string value.
7726
 *
7727
 * [68] EntityRef ::= '&' Name ';'
7728
 *
7729
 * [ WFC: Entity Declared ]
7730
 * In a document without any DTD, a document with only an internal DTD
7731
 * subset which contains no parameter entity references, or a document
7732
 * with "standalone='yes'", the Name given in the entity reference
7733
 * must match that in an entity declaration, except that well-formed
7734
 * documents need not declare any of the following entities: amp, lt,
7735
 * gt, apos, quot.  The declaration of a parameter entity must precede
7736
 * any reference to it.  Similarly, the declaration of a general entity
7737
 * must precede any reference to it which appears in a default value in an
7738
 * attribute-list declaration. Note that if entities are declared in the
7739
 * external subset or in external parameter entities, a non-validating
7740
 * processor is not obligated to read and process their declarations;
7741
 * for such documents, the rule that an entity must be declared is a
7742
 * well-formedness constraint only if standalone='yes'.
7743
 *
7744
 * [ WFC: Parsed Entity ]
7745
 * An entity reference must not contain the name of an unparsed entity
7746
 *
7747
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7748
 * is updated to the current location in the string.
7749
 */
7750
static xmlChar *
7751
8.54M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7752
8.54M
    xmlChar *name;
7753
8.54M
    const xmlChar *ptr;
7754
8.54M
    xmlChar cur;
7755
7756
8.54M
    if ((str == NULL) || (*str == NULL))
7757
0
        return(NULL);
7758
8.54M
    ptr = *str;
7759
8.54M
    cur = *ptr;
7760
8.54M
    if (cur != '&')
7761
0
  return(NULL);
7762
7763
8.54M
    ptr++;
7764
8.54M
    name = xmlParseStringName(ctxt, &ptr);
7765
8.54M
    if (name == NULL) {
7766
207
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7767
207
           "xmlParseStringEntityRef: no name\n");
7768
207
  *str = ptr;
7769
207
  return(NULL);
7770
207
    }
7771
8.54M
    if (*ptr != ';') {
7772
102
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7773
102
        xmlFree(name);
7774
102
  *str = ptr;
7775
102
  return(NULL);
7776
102
    }
7777
8.54M
    ptr++;
7778
7779
8.54M
    *str = ptr;
7780
8.54M
    return(name);
7781
8.54M
}
7782
7783
/**
7784
 * xmlParsePEReference:
7785
 * @ctxt:  an XML parser context
7786
 *
7787
 * DEPRECATED: Internal function, don't use.
7788
 *
7789
 * Parse a parameter entity reference. Always consumes '%'.
7790
 *
7791
 * The entity content is handled directly by pushing it's content as
7792
 * a new input stream.
7793
 *
7794
 * [69] PEReference ::= '%' Name ';'
7795
 *
7796
 * [ WFC: No Recursion ]
7797
 * A parsed entity must not contain a recursive
7798
 * reference to itself, either directly or indirectly.
7799
 *
7800
 * [ WFC: Entity Declared ]
7801
 * In a document without any DTD, a document with only an internal DTD
7802
 * subset which contains no parameter entity references, or a document
7803
 * with "standalone='yes'", ...  ... The declaration of a parameter
7804
 * entity must precede any reference to it...
7805
 *
7806
 * [ VC: Entity Declared ]
7807
 * In a document with an external subset or external parameter entities
7808
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7809
 * must precede any reference to it...
7810
 *
7811
 * [ WFC: In DTD ]
7812
 * Parameter-entity references may only appear in the DTD.
7813
 * NOTE: misleading but this is handled.
7814
 */
7815
void
7816
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7817
2.49M
{
7818
2.49M
    const xmlChar *name;
7819
2.49M
    xmlEntityPtr entity = NULL;
7820
2.49M
    xmlParserInputPtr input;
7821
7822
2.49M
    if (RAW != '%')
7823
0
        return;
7824
2.49M
    NEXT;
7825
2.49M
    name = xmlParseName(ctxt);
7826
2.49M
    if (name == NULL) {
7827
44.1k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7828
44.1k
  return;
7829
44.1k
    }
7830
2.45M
    if (RAW != ';') {
7831
77.3k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7832
77.3k
        return;
7833
77.3k
    }
7834
7835
2.37M
    NEXT;
7836
7837
    /* Must be set before xmlHandleUndeclaredEntity */
7838
2.37M
    ctxt->hasPErefs = 1;
7839
7840
    /*
7841
     * Request the entity from SAX
7842
     */
7843
2.37M
    if ((ctxt->sax != NULL) &&
7844
2.37M
  (ctxt->sax->getParameterEntity != NULL))
7845
2.37M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7846
7847
2.37M
    if (entity == NULL) {
7848
720k
        xmlHandleUndeclaredEntity(ctxt, name);
7849
1.65M
    } else {
7850
  /*
7851
   * Internal checking in case the entity quest barfed
7852
   */
7853
1.65M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7854
1.65M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7855
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7856
0
      "Internal: %%%s; is not a parameter entity\n",
7857
0
        name, NULL);
7858
1.65M
  } else {
7859
1.65M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7860
1.65M
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7861
1.38M
     ((ctxt->loadsubset == 0) &&
7862
1.38M
      (ctxt->replaceEntities == 0) &&
7863
1.38M
      (ctxt->validate == 0))))
7864
1.88k
    return;
7865
7866
1.65M
            if (entity->flags & XML_ENT_EXPANDING) {
7867
110
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7868
110
                xmlHaltParser(ctxt);
7869
110
                return;
7870
110
            }
7871
7872
1.65M
            if (ctxt->input_id >= INT_MAX) {
7873
0
                xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
7874
0
                            "Input ID overflow\n");
7875
0
                return;
7876
0
            }
7877
7878
1.65M
      input = xmlNewEntityInputStream(ctxt, entity);
7879
1.65M
      if (xmlPushInput(ctxt, input) < 0) {
7880
985k
                xmlFreeInputStream(input);
7881
985k
    return;
7882
985k
            }
7883
7884
669k
            input->id = ++ctxt->input_id;
7885
7886
669k
            entity->flags |= XML_ENT_EXPANDING;
7887
7888
669k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7889
398k
                xmlDetectEncoding(ctxt);
7890
7891
398k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7892
398k
                    (IS_BLANK_CH(NXT(5)))) {
7893
77.1k
                    xmlParseTextDecl(ctxt);
7894
77.1k
                }
7895
398k
            }
7896
669k
  }
7897
1.65M
    }
7898
2.37M
}
7899
7900
/**
7901
 * xmlLoadEntityContent:
7902
 * @ctxt:  an XML parser context
7903
 * @entity: an unloaded system entity
7904
 *
7905
 * Load the content of an entity.
7906
 *
7907
 * Returns 0 in case of success and -1 in case of failure
7908
 */
7909
static int
7910
36.8k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7911
36.8k
    xmlParserInputPtr oldinput, input = NULL;
7912
36.8k
    xmlParserInputPtr *oldinputTab;
7913
36.8k
    const xmlChar *oldencoding;
7914
36.8k
    xmlChar *content = NULL;
7915
36.8k
    xmlResourceType rtype;
7916
36.8k
    size_t length, i;
7917
36.8k
    int oldinputNr, oldinputMax;
7918
36.8k
    int ret = -1;
7919
36.8k
    int res;
7920
7921
36.8k
    if ((ctxt == NULL) || (entity == NULL) ||
7922
36.8k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7923
36.8k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7924
36.8k
  (entity->content != NULL)) {
7925
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7926
0
              "xmlLoadEntityContent parameter error");
7927
0
        return(-1);
7928
0
    }
7929
7930
36.8k
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7931
36.8k
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7932
0
    else
7933
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7934
7935
36.8k
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7936
36.8k
                            (char *) entity->ExternalID, rtype);
7937
36.8k
    if (input == NULL)
7938
4.23k
        return(-1);
7939
7940
32.5k
    oldinput = ctxt->input;
7941
32.5k
    oldinputNr = ctxt->inputNr;
7942
32.5k
    oldinputMax = ctxt->inputMax;
7943
32.5k
    oldinputTab = ctxt->inputTab;
7944
32.5k
    oldencoding = ctxt->encoding;
7945
7946
32.5k
    ctxt->input = NULL;
7947
32.5k
    ctxt->inputNr = 0;
7948
32.5k
    ctxt->inputMax = 1;
7949
32.5k
    ctxt->encoding = NULL;
7950
32.5k
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7951
32.5k
    if (ctxt->inputTab == NULL) {
7952
15
        xmlErrMemory(ctxt);
7953
15
        xmlFreeInputStream(input);
7954
15
        goto error;
7955
15
    }
7956
7957
32.5k
    xmlBufResetInput(input->buf->buffer, input);
7958
7959
32.5k
    if (inputPush(ctxt, input) < 0) {
7960
37
        xmlFreeInputStream(input);
7961
37
        goto error;
7962
37
    }
7963
7964
32.5k
    xmlDetectEncoding(ctxt);
7965
7966
    /*
7967
     * Parse a possible text declaration first
7968
     */
7969
32.5k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7970
15.0k
  xmlParseTextDecl(ctxt);
7971
        /*
7972
         * An XML-1.0 document can't reference an entity not XML-1.0
7973
         */
7974
15.0k
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7975
15.0k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7976
2.35k
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7977
2.35k
                           "Version mismatch between document and entity\n");
7978
2.35k
        }
7979
15.0k
    }
7980
7981
32.5k
    length = input->cur - input->base;
7982
32.5k
    xmlBufShrink(input->buf->buffer, length);
7983
32.5k
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7984
7985
58.9k
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7986
26.4k
        ;
7987
7988
32.5k
    xmlBufResetInput(input->buf->buffer, input);
7989
7990
32.5k
    if (res < 0) {
7991
3.63k
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7992
3.63k
        goto error;
7993
3.63k
    }
7994
7995
28.8k
    length = xmlBufUse(input->buf->buffer);
7996
28.8k
    if (length > INT_MAX) {
7997
0
        xmlErrMemory(ctxt);
7998
0
        goto error;
7999
0
    }
8000
8001
28.8k
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
8002
28.8k
    if (content == NULL) {
8003
46
        xmlErrMemory(ctxt);
8004
46
        goto error;
8005
46
    }
8006
8007
13.7M
    for (i = 0; i < length; ) {
8008
13.7M
        int clen = length - i;
8009
13.7M
        int c = xmlGetUTF8Char(content + i, &clen);
8010
8011
13.7M
        if ((c < 0) || (!IS_CHAR(c))) {
8012
28.2k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8013
28.2k
                              "xmlLoadEntityContent: invalid char value %d\n",
8014
28.2k
                              content[i]);
8015
28.2k
            goto error;
8016
28.2k
        }
8017
13.7M
        i += clen;
8018
13.7M
    }
8019
8020
576
    xmlSaturatedAdd(&ctxt->sizeentities, length);
8021
576
    entity->content = content;
8022
576
    entity->length = length;
8023
576
    content = NULL;
8024
576
    ret = 0;
8025
8026
32.5k
error:
8027
65.1k
    while (ctxt->inputNr > 0)
8028
32.5k
        xmlFreeInputStream(inputPop(ctxt));
8029
32.5k
    xmlFree(ctxt->inputTab);
8030
32.5k
    xmlFree((xmlChar *) ctxt->encoding);
8031
8032
32.5k
    ctxt->input = oldinput;
8033
32.5k
    ctxt->inputNr = oldinputNr;
8034
32.5k
    ctxt->inputMax = oldinputMax;
8035
32.5k
    ctxt->inputTab = oldinputTab;
8036
32.5k
    ctxt->encoding = oldencoding;
8037
8038
32.5k
    xmlFree(content);
8039
8040
32.5k
    return(ret);
8041
576
}
8042
8043
/**
8044
 * xmlParseStringPEReference:
8045
 * @ctxt:  an XML parser context
8046
 * @str:  a pointer to an index in the string
8047
 *
8048
 * parse PEReference declarations
8049
 *
8050
 * [69] PEReference ::= '%' Name ';'
8051
 *
8052
 * [ WFC: No Recursion ]
8053
 * A parsed entity must not contain a recursive
8054
 * reference to itself, either directly or indirectly.
8055
 *
8056
 * [ WFC: Entity Declared ]
8057
 * In a document without any DTD, a document with only an internal DTD
8058
 * subset which contains no parameter entity references, or a document
8059
 * with "standalone='yes'", ...  ... The declaration of a parameter
8060
 * entity must precede any reference to it...
8061
 *
8062
 * [ VC: Entity Declared ]
8063
 * In a document with an external subset or external parameter entities
8064
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8065
 * must precede any reference to it...
8066
 *
8067
 * [ WFC: In DTD ]
8068
 * Parameter-entity references may only appear in the DTD.
8069
 * NOTE: misleading but this is handled.
8070
 *
8071
 * Returns the string of the entity content.
8072
 *         str is updated to the current value of the index
8073
 */
8074
static xmlEntityPtr
8075
146k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8076
146k
    const xmlChar *ptr;
8077
146k
    xmlChar cur;
8078
146k
    xmlChar *name;
8079
146k
    xmlEntityPtr entity = NULL;
8080
8081
146k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8082
146k
    ptr = *str;
8083
146k
    cur = *ptr;
8084
146k
    if (cur != '%')
8085
0
        return(NULL);
8086
146k
    ptr++;
8087
146k
    name = xmlParseStringName(ctxt, &ptr);
8088
146k
    if (name == NULL) {
8089
7.86k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8090
7.86k
           "xmlParseStringPEReference: no name\n");
8091
7.86k
  *str = ptr;
8092
7.86k
  return(NULL);
8093
7.86k
    }
8094
138k
    cur = *ptr;
8095
138k
    if (cur != ';') {
8096
16.2k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8097
16.2k
  xmlFree(name);
8098
16.2k
  *str = ptr;
8099
16.2k
  return(NULL);
8100
16.2k
    }
8101
122k
    ptr++;
8102
8103
    /* Must be set before xmlHandleUndeclaredEntity */
8104
122k
    ctxt->hasPErefs = 1;
8105
8106
    /*
8107
     * Request the entity from SAX
8108
     */
8109
122k
    if ((ctxt->sax != NULL) &&
8110
122k
  (ctxt->sax->getParameterEntity != NULL))
8111
122k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8112
8113
122k
    if (entity == NULL) {
8114
11.3k
        xmlHandleUndeclaredEntity(ctxt, name);
8115
110k
    } else {
8116
  /*
8117
   * Internal checking in case the entity quest barfed
8118
   */
8119
110k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8120
110k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8121
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8122
0
        "%%%s; is not a parameter entity\n",
8123
0
        name, NULL);
8124
0
  }
8125
110k
    }
8126
8127
122k
    xmlFree(name);
8128
122k
    *str = ptr;
8129
122k
    return(entity);
8130
138k
}
8131
8132
/**
8133
 * xmlParseDocTypeDecl:
8134
 * @ctxt:  an XML parser context
8135
 *
8136
 * DEPRECATED: Internal function, don't use.
8137
 *
8138
 * parse a DOCTYPE declaration
8139
 *
8140
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8141
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8142
 *
8143
 * [ VC: Root Element Type ]
8144
 * The Name in the document type declaration must match the element
8145
 * type of the root element.
8146
 */
8147
8148
void
8149
258k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8150
258k
    const xmlChar *name = NULL;
8151
258k
    xmlChar *ExternalID = NULL;
8152
258k
    xmlChar *URI = NULL;
8153
8154
    /*
8155
     * We know that '<!DOCTYPE' has been detected.
8156
     */
8157
258k
    SKIP(9);
8158
8159
258k
    SKIP_BLANKS;
8160
8161
    /*
8162
     * Parse the DOCTYPE name.
8163
     */
8164
258k
    name = xmlParseName(ctxt);
8165
258k
    if (name == NULL) {
8166
9.63k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8167
9.63k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8168
9.63k
    }
8169
258k
    ctxt->intSubName = name;
8170
8171
258k
    SKIP_BLANKS;
8172
8173
    /*
8174
     * Check for SystemID and ExternalID
8175
     */
8176
258k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8177
8178
258k
    if ((URI != NULL) || (ExternalID != NULL)) {
8179
160k
        ctxt->hasExternalSubset = 1;
8180
160k
    }
8181
258k
    ctxt->extSubURI = URI;
8182
258k
    ctxt->extSubSystem = ExternalID;
8183
8184
258k
    SKIP_BLANKS;
8185
8186
    /*
8187
     * Create and update the internal subset.
8188
     */
8189
258k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8190
258k
  (!ctxt->disableSAX))
8191
226k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8192
8193
    /*
8194
     * Is there any internal subset declarations ?
8195
     * they are handled separately in xmlParseInternalSubset()
8196
     */
8197
258k
    if (RAW == '[')
8198
197k
  return;
8199
8200
    /*
8201
     * We should be at the end of the DOCTYPE declaration.
8202
     */
8203
61.5k
    if (RAW != '>') {
8204
18.8k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8205
18.8k
    }
8206
61.5k
    NEXT;
8207
61.5k
}
8208
8209
/**
8210
 * xmlParseInternalSubset:
8211
 * @ctxt:  an XML parser context
8212
 *
8213
 * parse the internal subset declaration
8214
 *
8215
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8216
 */
8217
8218
static void
8219
201k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8220
    /*
8221
     * Is there any DTD definition ?
8222
     */
8223
201k
    if (RAW == '[') {
8224
201k
        int oldInputNr = ctxt->inputNr;
8225
8226
201k
        NEXT;
8227
  /*
8228
   * Parse the succession of Markup declarations and
8229
   * PEReferences.
8230
   * Subsequence (markupdecl | PEReference | S)*
8231
   */
8232
201k
  SKIP_BLANKS;
8233
3.62M
  while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8234
3.62M
               (PARSER_STOPPED(ctxt) == 0)) {
8235
8236
            /*
8237
             * Conditional sections are allowed from external entities included
8238
             * by PE References in the internal subset.
8239
             */
8240
3.49M
            if ((PARSER_EXTERNAL(ctxt)) &&
8241
3.49M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8242
14.4k
                xmlParseConditionalSections(ctxt);
8243
3.47M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8244
3.01M
          xmlParseMarkupDecl(ctxt);
8245
3.01M
            } else if (RAW == '%') {
8246
390k
          xmlParsePEReference(ctxt);
8247
390k
            } else {
8248
72.8k
    xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8249
72.8k
                break;
8250
72.8k
            }
8251
3.41M
      SKIP_BLANKS_PE;
8252
3.41M
            SHRINK;
8253
3.41M
            GROW;
8254
3.41M
  }
8255
8256
212k
        while (ctxt->inputNr > oldInputNr)
8257
10.5k
            xmlPopPE(ctxt);
8258
8259
201k
  if (RAW == ']') {
8260
107k
      NEXT;
8261
107k
      SKIP_BLANKS;
8262
107k
  }
8263
201k
    }
8264
8265
    /*
8266
     * We should be at the end of the DOCTYPE declaration.
8267
     */
8268
201k
    if ((ctxt->wellFormed) && (RAW != '>')) {
8269
2.90k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8270
2.90k
  return;
8271
2.90k
    }
8272
198k
    NEXT;
8273
198k
}
8274
8275
#ifdef LIBXML_SAX1_ENABLED
8276
/**
8277
 * xmlParseAttribute:
8278
 * @ctxt:  an XML parser context
8279
 * @value:  a xmlChar ** used to store the value of the attribute
8280
 *
8281
 * DEPRECATED: Internal function, don't use.
8282
 *
8283
 * parse an attribute
8284
 *
8285
 * [41] Attribute ::= Name Eq AttValue
8286
 *
8287
 * [ WFC: No External Entity References ]
8288
 * Attribute values cannot contain direct or indirect entity references
8289
 * to external entities.
8290
 *
8291
 * [ WFC: No < in Attribute Values ]
8292
 * The replacement text of any entity referred to directly or indirectly in
8293
 * an attribute value (other than "&lt;") must not contain a <.
8294
 *
8295
 * [ VC: Attribute Value Type ]
8296
 * The attribute must have been declared; the value must be of the type
8297
 * declared for it.
8298
 *
8299
 * [25] Eq ::= S? '=' S?
8300
 *
8301
 * With namespace:
8302
 *
8303
 * [NS 11] Attribute ::= QName Eq AttValue
8304
 *
8305
 * Also the case QName == xmlns:??? is handled independently as a namespace
8306
 * definition.
8307
 *
8308
 * Returns the attribute name, and the value in *value.
8309
 */
8310
8311
const xmlChar *
8312
2.66M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8313
2.66M
    const xmlChar *name;
8314
2.66M
    xmlChar *val;
8315
8316
2.66M
    *value = NULL;
8317
2.66M
    GROW;
8318
2.66M
    name = xmlParseName(ctxt);
8319
2.66M
    if (name == NULL) {
8320
956k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8321
956k
                 "error parsing attribute name\n");
8322
956k
        return(NULL);
8323
956k
    }
8324
8325
    /*
8326
     * read the value
8327
     */
8328
1.71M
    SKIP_BLANKS;
8329
1.71M
    if (RAW == '=') {
8330
1.25M
        NEXT;
8331
1.25M
  SKIP_BLANKS;
8332
1.25M
  val = xmlParseAttValue(ctxt);
8333
1.25M
    } else {
8334
452k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8335
452k
         "Specification mandates value for attribute %s\n", name);
8336
452k
  return(name);
8337
452k
    }
8338
8339
    /*
8340
     * Check that xml:lang conforms to the specification
8341
     * No more registered as an error, just generate a warning now
8342
     * since this was deprecated in XML second edition
8343
     */
8344
1.25M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8345
173k
  if (!xmlCheckLanguageID(val)) {
8346
154k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8347
154k
              "Malformed value for xml:lang : %s\n",
8348
154k
        val, NULL);
8349
154k
  }
8350
173k
    }
8351
8352
    /*
8353
     * Check that xml:space conforms to the specification
8354
     */
8355
1.25M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8356
8.75k
  if (xmlStrEqual(val, BAD_CAST "default"))
8357
1.50k
      *(ctxt->space) = 0;
8358
7.25k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8359
2.93k
      *(ctxt->space) = 1;
8360
4.31k
  else {
8361
4.31k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8362
4.31k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8363
4.31k
                                 val, NULL);
8364
4.31k
  }
8365
8.75k
    }
8366
8367
1.25M
    *value = val;
8368
1.25M
    return(name);
8369
1.71M
}
8370
8371
/**
8372
 * xmlParseStartTag:
8373
 * @ctxt:  an XML parser context
8374
 *
8375
 * DEPRECATED: Internal function, don't use.
8376
 *
8377
 * Parse a start tag. Always consumes '<'.
8378
 *
8379
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8380
 *
8381
 * [ WFC: Unique Att Spec ]
8382
 * No attribute name may appear more than once in the same start-tag or
8383
 * empty-element tag.
8384
 *
8385
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8386
 *
8387
 * [ WFC: Unique Att Spec ]
8388
 * No attribute name may appear more than once in the same start-tag or
8389
 * empty-element tag.
8390
 *
8391
 * With namespace:
8392
 *
8393
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8394
 *
8395
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8396
 *
8397
 * Returns the element name parsed
8398
 */
8399
8400
const xmlChar *
8401
4.05M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8402
4.05M
    const xmlChar *name;
8403
4.05M
    const xmlChar *attname;
8404
4.05M
    xmlChar *attvalue;
8405
4.05M
    const xmlChar **atts = ctxt->atts;
8406
4.05M
    int nbatts = 0;
8407
4.05M
    int maxatts = ctxt->maxatts;
8408
4.05M
    int i;
8409
8410
4.05M
    if (RAW != '<') return(NULL);
8411
4.05M
    NEXT1;
8412
8413
4.05M
    name = xmlParseName(ctxt);
8414
4.05M
    if (name == NULL) {
8415
565k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8416
565k
       "xmlParseStartTag: invalid element name\n");
8417
565k
        return(NULL);
8418
565k
    }
8419
8420
    /*
8421
     * Now parse the attributes, it ends up with the ending
8422
     *
8423
     * (S Attribute)* S?
8424
     */
8425
3.48M
    SKIP_BLANKS;
8426
3.48M
    GROW;
8427
8428
4.84M
    while (((RAW != '>') &&
8429
4.84M
     ((RAW != '/') || (NXT(1) != '>')) &&
8430
4.84M
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8431
2.66M
  attname = xmlParseAttribute(ctxt, &attvalue);
8432
2.66M
        if (attname == NULL)
8433
956k
      break;
8434
1.71M
        if (attvalue != NULL) {
8435
      /*
8436
       * [ WFC: Unique Att Spec ]
8437
       * No attribute name may appear more than once in the same
8438
       * start-tag or empty-element tag.
8439
       */
8440
8.08M
      for (i = 0; i < nbatts;i += 2) {
8441
6.91M
          if (xmlStrEqual(atts[i], attname)) {
8442
38.5k
        xmlErrAttributeDup(ctxt, NULL, attname);
8443
38.5k
        xmlFree(attvalue);
8444
38.5k
        goto failed;
8445
38.5k
    }
8446
6.91M
      }
8447
      /*
8448
       * Add the pair to atts
8449
       */
8450
1.16M
      if (atts == NULL) {
8451
49.1k
          maxatts = 22; /* allow for 10 attrs by default */
8452
49.1k
          atts = (const xmlChar **)
8453
49.1k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8454
49.1k
    if (atts == NULL) {
8455
27
        xmlErrMemory(ctxt);
8456
27
        if (attvalue != NULL)
8457
27
      xmlFree(attvalue);
8458
27
        goto failed;
8459
27
    }
8460
49.0k
    ctxt->atts = atts;
8461
49.0k
    ctxt->maxatts = maxatts;
8462
1.11M
      } else if (nbatts + 4 > maxatts) {
8463
4.59k
          const xmlChar **n;
8464
8465
4.59k
          maxatts *= 2;
8466
4.59k
          n = (const xmlChar **) xmlRealloc((void *) atts,
8467
4.59k
               maxatts * sizeof(const xmlChar *));
8468
4.59k
    if (n == NULL) {
8469
10
        xmlErrMemory(ctxt);
8470
10
        if (attvalue != NULL)
8471
10
      xmlFree(attvalue);
8472
10
        goto failed;
8473
10
    }
8474
4.58k
    atts = n;
8475
4.58k
    ctxt->atts = atts;
8476
4.58k
    ctxt->maxatts = maxatts;
8477
4.58k
      }
8478
1.16M
      atts[nbatts++] = attname;
8479
1.16M
      atts[nbatts++] = attvalue;
8480
1.16M
      atts[nbatts] = NULL;
8481
1.16M
      atts[nbatts + 1] = NULL;
8482
1.16M
  } else {
8483
510k
      if (attvalue != NULL)
8484
0
    xmlFree(attvalue);
8485
510k
  }
8486
8487
1.71M
failed:
8488
8489
1.71M
  GROW
8490
1.71M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8491
350k
      break;
8492
1.36M
  if (SKIP_BLANKS == 0) {
8493
780k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8494
780k
         "attributes construct error\n");
8495
780k
  }
8496
1.36M
  SHRINK;
8497
1.36M
        GROW;
8498
1.36M
    }
8499
8500
    /*
8501
     * SAX: Start of Element !
8502
     */
8503
3.48M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8504
3.48M
  (!ctxt->disableSAX)) {
8505
3.36M
  if (nbatts > 0)
8506
740k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8507
2.62M
  else
8508
2.62M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8509
3.36M
    }
8510
8511
3.48M
    if (atts != NULL) {
8512
        /* Free only the content strings */
8513
4.33M
        for (i = 1;i < nbatts;i+=2)
8514
1.16M
      if (atts[i] != NULL)
8515
1.16M
         xmlFree((xmlChar *) atts[i]);
8516
3.16M
    }
8517
3.48M
    return(name);
8518
3.48M
}
8519
8520
/**
8521
 * xmlParseEndTag1:
8522
 * @ctxt:  an XML parser context
8523
 * @line:  line of the start tag
8524
 * @nsNr:  number of namespaces on the start tag
8525
 *
8526
 * Parse an end tag. Always consumes '</'.
8527
 *
8528
 * [42] ETag ::= '</' Name S? '>'
8529
 *
8530
 * With namespace
8531
 *
8532
 * [NS 9] ETag ::= '</' QName S? '>'
8533
 */
8534
8535
static void
8536
1.16M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8537
1.16M
    const xmlChar *name;
8538
8539
1.16M
    GROW;
8540
1.16M
    if ((RAW != '<') || (NXT(1) != '/')) {
8541
3.92k
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8542
3.92k
           "xmlParseEndTag: '</' not found\n");
8543
3.92k
  return;
8544
3.92k
    }
8545
1.15M
    SKIP(2);
8546
8547
1.15M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8548
8549
    /*
8550
     * We should definitely be at the ending "S? '>'" part
8551
     */
8552
1.15M
    GROW;
8553
1.15M
    SKIP_BLANKS;
8554
1.15M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8555
104k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8556
104k
    } else
8557
1.05M
  NEXT1;
8558
8559
    /*
8560
     * [ WFC: Element Type Match ]
8561
     * The Name in an element's end-tag must match the element type in the
8562
     * start-tag.
8563
     *
8564
     */
8565
1.15M
    if (name != (xmlChar*)1) {
8566
182k
        if (name == NULL) name = BAD_CAST "unparsable";
8567
182k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8568
182k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8569
182k
                    ctxt->name, line, name);
8570
182k
    }
8571
8572
    /*
8573
     * SAX: End of Tag
8574
     */
8575
1.15M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8576
1.15M
  (!ctxt->disableSAX))
8577
1.12M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8578
8579
1.15M
    namePop(ctxt);
8580
1.15M
    spacePop(ctxt);
8581
1.15M
}
8582
8583
/**
8584
 * xmlParseEndTag:
8585
 * @ctxt:  an XML parser context
8586
 *
8587
 * DEPRECATED: Internal function, don't use.
8588
 *
8589
 * parse an end of tag
8590
 *
8591
 * [42] ETag ::= '</' Name S? '>'
8592
 *
8593
 * With namespace
8594
 *
8595
 * [NS 9] ETag ::= '</' QName S? '>'
8596
 */
8597
8598
void
8599
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8600
0
    xmlParseEndTag1(ctxt, 0);
8601
0
}
8602
#endif /* LIBXML_SAX1_ENABLED */
8603
8604
/************************************************************************
8605
 *                  *
8606
 *          SAX 2 specific operations       *
8607
 *                  *
8608
 ************************************************************************/
8609
8610
/**
8611
 * xmlParseQNameHashed:
8612
 * @ctxt:  an XML parser context
8613
 * @prefix:  pointer to store the prefix part
8614
 *
8615
 * parse an XML Namespace QName
8616
 *
8617
 * [6]  QName  ::= (Prefix ':')? LocalPart
8618
 * [7]  Prefix  ::= NCName
8619
 * [8]  LocalPart  ::= NCName
8620
 *
8621
 * Returns the Name parsed or NULL
8622
 */
8623
8624
static xmlHashedString
8625
13.4M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8626
13.4M
    xmlHashedString l, p;
8627
13.4M
    int start, isNCName = 0;
8628
8629
13.4M
    l.name = NULL;
8630
13.4M
    p.name = NULL;
8631
8632
13.4M
    GROW;
8633
13.4M
    start = CUR_PTR - BASE_PTR;
8634
8635
13.4M
    l = xmlParseNCName(ctxt);
8636
13.4M
    if (l.name != NULL) {
8637
10.2M
        isNCName = 1;
8638
10.2M
        if (CUR == ':') {
8639
2.90M
            NEXT;
8640
2.90M
            p = l;
8641
2.90M
            l = xmlParseNCName(ctxt);
8642
2.90M
        }
8643
10.2M
    }
8644
13.4M
    if ((l.name == NULL) || (CUR == ':')) {
8645
3.37M
        xmlChar *tmp;
8646
8647
3.37M
        l.name = NULL;
8648
3.37M
        p.name = NULL;
8649
3.37M
        if ((isNCName == 0) && (CUR != ':'))
8650
3.07M
            return(l);
8651
308k
        tmp = xmlParseNmtoken(ctxt);
8652
308k
        if (tmp != NULL)
8653
183k
            xmlFree(tmp);
8654
308k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8655
308k
                                CUR_PTR - (BASE_PTR + start));
8656
308k
        if (l.name == NULL) {
8657
21
            xmlErrMemory(ctxt);
8658
21
            return(l);
8659
21
        }
8660
308k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8661
308k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8662
308k
    }
8663
8664
10.3M
    *prefix = p;
8665
10.3M
    return(l);
8666
13.4M
}
8667
8668
/**
8669
 * xmlParseQName:
8670
 * @ctxt:  an XML parser context
8671
 * @prefix:  pointer to store the prefix part
8672
 *
8673
 * parse an XML Namespace QName
8674
 *
8675
 * [6]  QName  ::= (Prefix ':')? LocalPart
8676
 * [7]  Prefix  ::= NCName
8677
 * [8]  LocalPart  ::= NCName
8678
 *
8679
 * Returns the Name parsed or NULL
8680
 */
8681
8682
static const xmlChar *
8683
113k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8684
113k
    xmlHashedString n, p;
8685
8686
113k
    n = xmlParseQNameHashed(ctxt, &p);
8687
113k
    if (n.name == NULL)
8688
12.3k
        return(NULL);
8689
101k
    *prefix = p.name;
8690
101k
    return(n.name);
8691
113k
}
8692
8693
/**
8694
 * xmlParseQNameAndCompare:
8695
 * @ctxt:  an XML parser context
8696
 * @name:  the localname
8697
 * @prefix:  the prefix, if any.
8698
 *
8699
 * parse an XML name and compares for match
8700
 * (specialized for endtag parsing)
8701
 *
8702
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8703
 * and the name for mismatch
8704
 */
8705
8706
static const xmlChar *
8707
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8708
261k
                        xmlChar const *prefix) {
8709
261k
    const xmlChar *cmp;
8710
261k
    const xmlChar *in;
8711
261k
    const xmlChar *ret;
8712
261k
    const xmlChar *prefix2;
8713
8714
261k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8715
8716
261k
    GROW;
8717
261k
    in = ctxt->input->cur;
8718
8719
261k
    cmp = prefix;
8720
727k
    while (*in != 0 && *in == *cmp) {
8721
465k
  ++in;
8722
465k
  ++cmp;
8723
465k
    }
8724
261k
    if ((*cmp == 0) && (*in == ':')) {
8725
192k
        in++;
8726
192k
  cmp = name;
8727
1.17M
  while (*in != 0 && *in == *cmp) {
8728
978k
      ++in;
8729
978k
      ++cmp;
8730
978k
  }
8731
192k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8732
      /* success */
8733
148k
            ctxt->input->col += in - ctxt->input->cur;
8734
148k
      ctxt->input->cur = in;
8735
148k
      return((const xmlChar*) 1);
8736
148k
  }
8737
192k
    }
8738
    /*
8739
     * all strings coms from the dictionary, equality can be done directly
8740
     */
8741
113k
    ret = xmlParseQName (ctxt, &prefix2);
8742
113k
    if (ret == NULL)
8743
12.3k
        return(NULL);
8744
101k
    if ((ret == name) && (prefix == prefix2))
8745
10.3k
  return((const xmlChar*) 1);
8746
90.7k
    return ret;
8747
101k
}
8748
8749
/**
8750
 * xmlParseAttribute2:
8751
 * @ctxt:  an XML parser context
8752
 * @pref:  the element prefix
8753
 * @elem:  the element name
8754
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8755
 * @value:  a xmlChar ** used to store the value of the attribute
8756
 * @len:  an int * to save the length of the attribute
8757
 * @alloc:  an int * to indicate if the attribute was allocated
8758
 *
8759
 * parse an attribute in the new SAX2 framework.
8760
 *
8761
 * Returns the attribute name, and the value in *value, .
8762
 */
8763
8764
static xmlHashedString
8765
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8766
                   const xmlChar * pref, const xmlChar * elem,
8767
                   xmlHashedString * hprefix, xmlChar ** value,
8768
                   int *len, int *alloc)
8769
5.11M
{
8770
5.11M
    xmlHashedString hname;
8771
5.11M
    const xmlChar *prefix, *name;
8772
5.11M
    xmlChar *val = NULL, *internal_val = NULL;
8773
5.11M
    int normalize = 0;
8774
5.11M
    int isNamespace;
8775
8776
5.11M
    *value = NULL;
8777
5.11M
    GROW;
8778
5.11M
    hname = xmlParseQNameHashed(ctxt, hprefix);
8779
5.11M
    if (hname.name == NULL) {
8780
1.42M
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8781
1.42M
                       "error parsing attribute name\n");
8782
1.42M
        return(hname);
8783
1.42M
    }
8784
3.69M
    name = hname.name;
8785
3.69M
    if (hprefix->name != NULL)
8786
1.59M
        prefix = hprefix->name;
8787
2.09M
    else
8788
2.09M
        prefix = NULL;
8789
8790
    /*
8791
     * get the type if needed
8792
     */
8793
3.69M
    if (ctxt->attsSpecial != NULL) {
8794
1.45M
        int type;
8795
8796
1.45M
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8797
1.45M
                                                 pref, elem,
8798
1.45M
                                                 prefix, name);
8799
1.45M
        if (type != 0)
8800
211k
            normalize = 1;
8801
1.45M
    }
8802
8803
    /*
8804
     * read the value
8805
     */
8806
3.69M
    SKIP_BLANKS;
8807
3.69M
    if (RAW == '=') {
8808
3.27M
        NEXT;
8809
3.27M
        SKIP_BLANKS;
8810
3.27M
        isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8811
3.27M
                       (prefix == ctxt->str_xmlns));
8812
3.27M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize,
8813
3.27M
                                       isNamespace);
8814
3.27M
        if (val == NULL)
8815
123k
            goto error;
8816
3.27M
    } else {
8817
413k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8818
413k
                          "Specification mandates value for attribute %s\n",
8819
413k
                          name);
8820
413k
        goto error;
8821
413k
    }
8822
8823
3.15M
    if (prefix == ctxt->str_xml) {
8824
        /*
8825
         * Check that xml:lang conforms to the specification
8826
         * No more registered as an error, just generate a warning now
8827
         * since this was deprecated in XML second edition
8828
         */
8829
331k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8830
105k
            internal_val = xmlStrndup(val, *len);
8831
105k
            if (internal_val == NULL)
8832
46
                goto mem_error;
8833
105k
            if (!xmlCheckLanguageID(internal_val)) {
8834
86.6k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8835
86.6k
                              "Malformed value for xml:lang : %s\n",
8836
86.6k
                              internal_val, NULL);
8837
86.6k
            }
8838
105k
        }
8839
8840
        /*
8841
         * Check that xml:space conforms to the specification
8842
         */
8843
331k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8844
10.7k
            internal_val = xmlStrndup(val, *len);
8845
10.7k
            if (internal_val == NULL)
8846
15
                goto mem_error;
8847
10.7k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8848
2.71k
                *(ctxt->space) = 0;
8849
8.05k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8850
4.88k
                *(ctxt->space) = 1;
8851
3.17k
            else {
8852
3.17k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8853
3.17k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8854
3.17k
                              internal_val, NULL);
8855
3.17k
            }
8856
10.7k
        }
8857
331k
        if (internal_val) {
8858
116k
            xmlFree(internal_val);
8859
116k
        }
8860
331k
    }
8861
8862
3.15M
    *value = val;
8863
3.15M
    return (hname);
8864
8865
61
mem_error:
8866
61
    xmlErrMemory(ctxt);
8867
537k
error:
8868
537k
    if ((val != NULL) && (*alloc != 0))
8869
31
        xmlFree(val);
8870
537k
    return(hname);
8871
61
}
8872
8873
/**
8874
 * xmlAttrHashInsert:
8875
 * @ctxt: parser context
8876
 * @size: size of the hash table
8877
 * @name: attribute name
8878
 * @uri: namespace uri
8879
 * @hashValue: combined hash value of name and uri
8880
 * @aindex: attribute index (this is a multiple of 5)
8881
 *
8882
 * Inserts a new attribute into the hash table.
8883
 *
8884
 * Returns INT_MAX if no existing attribute was found, the attribute
8885
 * index if an attribute was found, -1 if a memory allocation failed.
8886
 */
8887
static int
8888
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8889
2.69M
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8890
2.69M
    xmlAttrHashBucket *table = ctxt->attrHash;
8891
2.69M
    xmlAttrHashBucket *bucket;
8892
2.69M
    unsigned hindex;
8893
8894
2.69M
    hindex = hashValue & (size - 1);
8895
2.69M
    bucket = &table[hindex];
8896
8897
3.32M
    while (bucket->index >= 0) {
8898
823k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8899
8900
823k
        if (name == atts[0]) {
8901
213k
            int nsIndex = (int) (ptrdiff_t) atts[2];
8902
8903
213k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8904
213k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8905
69.4k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8906
194k
                return(bucket->index);
8907
213k
        }
8908
8909
629k
        hindex++;
8910
629k
        bucket++;
8911
629k
        if (hindex >= size) {
8912
39.6k
            hindex = 0;
8913
39.6k
            bucket = table;
8914
39.6k
        }
8915
629k
    }
8916
8917
2.49M
    bucket->index = aindex;
8918
8919
2.49M
    return(INT_MAX);
8920
2.69M
}
8921
8922
/**
8923
 * xmlParseStartTag2:
8924
 * @ctxt:  an XML parser context
8925
 *
8926
 * Parse a start tag. Always consumes '<'.
8927
 *
8928
 * This routine is called when running SAX2 parsing
8929
 *
8930
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8931
 *
8932
 * [ WFC: Unique Att Spec ]
8933
 * No attribute name may appear more than once in the same start-tag or
8934
 * empty-element tag.
8935
 *
8936
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8937
 *
8938
 * [ WFC: Unique Att Spec ]
8939
 * No attribute name may appear more than once in the same start-tag or
8940
 * empty-element tag.
8941
 *
8942
 * With namespace:
8943
 *
8944
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8945
 *
8946
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8947
 *
8948
 * Returns the element name parsed
8949
 */
8950
8951
static const xmlChar *
8952
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8953
8.23M
                  const xmlChar **URI, int *nbNsPtr) {
8954
8.23M
    xmlHashedString hlocalname;
8955
8.23M
    xmlHashedString hprefix;
8956
8.23M
    xmlHashedString hattname;
8957
8.23M
    xmlHashedString haprefix;
8958
8.23M
    const xmlChar *localname;
8959
8.23M
    const xmlChar *prefix;
8960
8.23M
    const xmlChar *attname;
8961
8.23M
    const xmlChar *aprefix;
8962
8.23M
    const xmlChar *uri;
8963
8.23M
    xmlChar *attvalue = NULL;
8964
8.23M
    const xmlChar **atts = ctxt->atts;
8965
8.23M
    unsigned attrHashSize = 0;
8966
8.23M
    int maxatts = ctxt->maxatts;
8967
8.23M
    int nratts, nbatts, nbdef;
8968
8.23M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8969
8.23M
    int alloc = 0;
8970
8971
8.23M
    if (RAW != '<') return(NULL);
8972
8.23M
    NEXT1;
8973
8974
8.23M
    nbatts = 0;
8975
8.23M
    nratts = 0;
8976
8.23M
    nbdef = 0;
8977
8.23M
    nbNs = 0;
8978
8.23M
    nbTotalDef = 0;
8979
8.23M
    attval = 0;
8980
8981
8.23M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8982
0
        xmlErrMemory(ctxt);
8983
0
        return(NULL);
8984
0
    }
8985
8986
8.23M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8987
8.23M
    if (hlocalname.name == NULL) {
8988
1.63M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8989
1.63M
           "StartTag: invalid element name\n");
8990
1.63M
        return(NULL);
8991
1.63M
    }
8992
6.60M
    localname = hlocalname.name;
8993
6.60M
    prefix = hprefix.name;
8994
8995
    /*
8996
     * Now parse the attributes, it ends up with the ending
8997
     *
8998
     * (S Attribute)* S?
8999
     */
9000
6.60M
    SKIP_BLANKS;
9001
6.60M
    GROW;
9002
9003
    /*
9004
     * The ctxt->atts array will be ultimately passed to the SAX callback
9005
     * containing five xmlChar pointers for each attribute:
9006
     *
9007
     * [0] attribute name
9008
     * [1] attribute prefix
9009
     * [2] namespace URI
9010
     * [3] attribute value
9011
     * [4] end of attribute value
9012
     *
9013
     * To save memory, we reuse this array temporarily and store integers
9014
     * in these pointer variables.
9015
     *
9016
     * [0] attribute name
9017
     * [1] attribute prefix
9018
     * [2] hash value of attribute prefix, and later namespace index
9019
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
9020
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
9021
     *
9022
     * The ctxt->attallocs array contains an additional unsigned int for
9023
     * each attribute, containing the hash value of the attribute name
9024
     * and the alloc flag in bit 31.
9025
     */
9026
9027
8.12M
    while (((RAW != '>') &&
9028
8.12M
     ((RAW != '/') || (NXT(1) != '>')) &&
9029
8.12M
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
9030
5.11M
  int len = -1;
9031
9032
5.11M
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
9033
5.11M
                                          &haprefix, &attvalue, &len,
9034
5.11M
                                          &alloc);
9035
5.11M
        if (hattname.name == NULL)
9036
1.42M
      break;
9037
3.69M
        if (attvalue == NULL)
9038
537k
            goto next_attr;
9039
3.15M
        attname = hattname.name;
9040
3.15M
        aprefix = haprefix.name;
9041
3.15M
  if (len < 0) len = xmlStrlen(attvalue);
9042
9043
3.15M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9044
375k
            xmlHashedString huri;
9045
375k
            xmlURIPtr parsedUri;
9046
9047
375k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9048
375k
            uri = huri.name;
9049
375k
            if (uri == NULL) {
9050
15
                xmlErrMemory(ctxt);
9051
15
                goto next_attr;
9052
15
            }
9053
375k
            if (*uri != 0) {
9054
361k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9055
155
                    xmlErrMemory(ctxt);
9056
155
                    goto next_attr;
9057
155
                }
9058
361k
                if (parsedUri == NULL) {
9059
246k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9060
246k
                             "xmlns: '%s' is not a valid URI\n",
9061
246k
                                       uri, NULL, NULL);
9062
246k
                } else {
9063
114k
                    if (parsedUri->scheme == NULL) {
9064
73.3k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9065
73.3k
                                  "xmlns: URI %s is not absolute\n",
9066
73.3k
                                  uri, NULL, NULL);
9067
73.3k
                    }
9068
114k
                    xmlFreeURI(parsedUri);
9069
114k
                }
9070
361k
                if (uri == ctxt->str_xml_ns) {
9071
477
                    if (attname != ctxt->str_xml) {
9072
477
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9073
477
                     "xml namespace URI cannot be the default namespace\n",
9074
477
                                 NULL, NULL, NULL);
9075
477
                    }
9076
477
                    goto next_attr;
9077
477
                }
9078
360k
                if ((len == 29) &&
9079
360k
                    (xmlStrEqual(uri,
9080
9.04k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9081
2.68k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9082
2.68k
                         "reuse of the xmlns namespace name is forbidden\n",
9083
2.68k
                             NULL, NULL, NULL);
9084
2.68k
                    goto next_attr;
9085
2.68k
                }
9086
360k
            }
9087
9088
372k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9089
344k
                nbNs++;
9090
2.77M
        } else if (aprefix == ctxt->str_xmlns) {
9091
546k
            xmlHashedString huri;
9092
546k
            xmlURIPtr parsedUri;
9093
9094
546k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9095
546k
            uri = huri.name;
9096
546k
            if (uri == NULL) {
9097
25
                xmlErrMemory(ctxt);
9098
25
                goto next_attr;
9099
25
            }
9100
9101
546k
            if (attname == ctxt->str_xml) {
9102
2.56k
                if (uri != ctxt->str_xml_ns) {
9103
2.28k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9104
2.28k
                             "xml namespace prefix mapped to wrong URI\n",
9105
2.28k
                             NULL, NULL, NULL);
9106
2.28k
                }
9107
                /*
9108
                 * Do not keep a namespace definition node
9109
                 */
9110
2.56k
                goto next_attr;
9111
2.56k
            }
9112
544k
            if (uri == ctxt->str_xml_ns) {
9113
567
                if (attname != ctxt->str_xml) {
9114
567
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9115
567
                             "xml namespace URI mapped to wrong prefix\n",
9116
567
                             NULL, NULL, NULL);
9117
567
                }
9118
567
                goto next_attr;
9119
567
            }
9120
543k
            if (attname == ctxt->str_xmlns) {
9121
3.35k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9122
3.35k
                         "redefinition of the xmlns prefix is forbidden\n",
9123
3.35k
                         NULL, NULL, NULL);
9124
3.35k
                goto next_attr;
9125
3.35k
            }
9126
540k
            if ((len == 29) &&
9127
540k
                (xmlStrEqual(uri,
9128
11.3k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9129
1.24k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9130
1.24k
                         "reuse of the xmlns namespace name is forbidden\n",
9131
1.24k
                         NULL, NULL, NULL);
9132
1.24k
                goto next_attr;
9133
1.24k
            }
9134
538k
            if ((uri == NULL) || (uri[0] == 0)) {
9135
19.8k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9136
19.8k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9137
19.8k
                              attname, NULL, NULL);
9138
19.8k
                goto next_attr;
9139
518k
            } else {
9140
518k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9141
106
                    xmlErrMemory(ctxt);
9142
106
                    goto next_attr;
9143
106
                }
9144
518k
                if (parsedUri == NULL) {
9145
108k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9146
108k
                         "xmlns:%s: '%s' is not a valid URI\n",
9147
108k
                                       attname, uri, NULL);
9148
410k
                } else {
9149
410k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9150
102k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9151
102k
                                  "xmlns:%s: URI %s is not absolute\n",
9152
102k
                                  attname, uri, NULL);
9153
102k
                    }
9154
410k
                    xmlFreeURI(parsedUri);
9155
410k
                }
9156
518k
            }
9157
9158
518k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9159
486k
                nbNs++;
9160
2.23M
        } else {
9161
            /*
9162
             * Populate attributes array, see above for repurposing
9163
             * of xmlChar pointers.
9164
             */
9165
2.23M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9166
83.5k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9167
85
                    goto next_attr;
9168
85
                }
9169
83.4k
                maxatts = ctxt->maxatts;
9170
83.4k
                atts = ctxt->atts;
9171
83.4k
            }
9172
2.23M
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9173
2.23M
                                        ((unsigned) alloc << 31);
9174
2.23M
            atts[nbatts++] = attname;
9175
2.23M
            atts[nbatts++] = aprefix;
9176
2.23M
            atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9177
2.23M
            if (alloc) {
9178
459k
                atts[nbatts++] = attvalue;
9179
459k
                attvalue += len;
9180
459k
                atts[nbatts++] = attvalue;
9181
1.77M
            } else {
9182
                /*
9183
                 * attvalue points into the input buffer which can be
9184
                 * reallocated. Store differences to input->base instead.
9185
                 * The pointers will be reconstructed later.
9186
                 */
9187
1.77M
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9188
1.77M
                attvalue += len;
9189
1.77M
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9190
1.77M
            }
9191
            /*
9192
             * tag if some deallocation is needed
9193
             */
9194
2.23M
            if (alloc != 0) attval = 1;
9195
2.23M
            attvalue = NULL; /* moved into atts */
9196
2.23M
        }
9197
9198
3.69M
next_attr:
9199
3.69M
        if ((attvalue != NULL) && (alloc != 0)) {
9200
194k
            xmlFree(attvalue);
9201
194k
            attvalue = NULL;
9202
194k
        }
9203
9204
3.69M
  GROW
9205
3.69M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9206
1.17M
      break;
9207
2.51M
  if (SKIP_BLANKS == 0) {
9208
989k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9209
989k
         "attributes construct error\n");
9210
989k
      break;
9211
989k
  }
9212
1.52M
        GROW;
9213
1.52M
    }
9214
9215
    /*
9216
     * Namespaces from default attributes
9217
     */
9218
6.60M
    if (ctxt->attsDefault != NULL) {
9219
1.61M
        xmlDefAttrsPtr defaults;
9220
9221
1.61M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9222
1.61M
  if (defaults != NULL) {
9223
2.80M
      for (i = 0; i < defaults->nbAttrs; i++) {
9224
2.23M
                xmlDefAttr *attr = &defaults->attrs[i];
9225
9226
2.23M
          attname = attr->name.name;
9227
2.23M
    aprefix = attr->prefix.name;
9228
9229
2.23M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9230
127k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9231
9232
127k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9233
119k
                        nbNs++;
9234
2.10M
    } else if (aprefix == ctxt->str_xmlns) {
9235
127k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9236
9237
127k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9238
127k
                                      NULL, 1) > 0)
9239
119k
                        nbNs++;
9240
1.97M
    } else {
9241
1.97M
                    nbTotalDef += 1;
9242
1.97M
                }
9243
2.23M
      }
9244
570k
  }
9245
1.61M
    }
9246
9247
    /*
9248
     * Resolve attribute namespaces
9249
     */
9250
8.83M
    for (i = 0; i < nbatts; i += 5) {
9251
2.23M
        attname = atts[i];
9252
2.23M
        aprefix = atts[i+1];
9253
9254
        /*
9255
  * The default namespace does not apply to attribute names.
9256
  */
9257
2.23M
  if (aprefix == NULL) {
9258
1.30M
            nsIndex = NS_INDEX_EMPTY;
9259
1.30M
        } else if (aprefix == ctxt->str_xml) {
9260
331k
            nsIndex = NS_INDEX_XML;
9261
597k
        } else {
9262
597k
            haprefix.name = aprefix;
9263
597k
            haprefix.hashValue = (size_t) atts[i+2];
9264
597k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9265
9266
597k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9267
289k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9268
289k
        "Namespace prefix %s for %s on %s is not defined\n",
9269
289k
        aprefix, attname, localname);
9270
289k
                nsIndex = NS_INDEX_EMPTY;
9271
289k
            }
9272
597k
        }
9273
9274
2.23M
        atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9275
2.23M
    }
9276
9277
    /*
9278
     * Maximum number of attributes including default attributes.
9279
     */
9280
6.60M
    maxAtts = nratts + nbTotalDef;
9281
9282
    /*
9283
     * Verify that attribute names are unique.
9284
     */
9285
6.60M
    if (maxAtts > 1) {
9286
481k
        attrHashSize = 4;
9287
766k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9288
284k
            attrHashSize *= 2;
9289
9290
481k
        if (attrHashSize > ctxt->attrHashMax) {
9291
37.5k
            xmlAttrHashBucket *tmp;
9292
9293
37.5k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9294
37.5k
            if (tmp == NULL) {
9295
21
                xmlErrMemory(ctxt);
9296
21
                goto done;
9297
21
            }
9298
9299
37.4k
            ctxt->attrHash = tmp;
9300
37.4k
            ctxt->attrHashMax = attrHashSize;
9301
37.4k
        }
9302
9303
481k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9304
9305
1.69M
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9306
1.21M
            const xmlChar *nsuri;
9307
1.21M
            unsigned hashValue, nameHashValue, uriHashValue;
9308
1.21M
            int res;
9309
9310
1.21M
            attname = atts[i];
9311
1.21M
            aprefix = atts[i+1];
9312
1.21M
            nsIndex = (ptrdiff_t) atts[i+2];
9313
            /* Hash values always have bit 31 set, see dict.c */
9314
1.21M
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9315
9316
1.21M
            if (nsIndex == NS_INDEX_EMPTY) {
9317
                /*
9318
                 * Prefix with empty namespace means an undeclared
9319
                 * prefix which was already reported above.
9320
                 */
9321
892k
                if (aprefix != NULL)
9322
246k
                    continue;
9323
645k
                nsuri = NULL;
9324
645k
                uriHashValue = URI_HASH_EMPTY;
9325
645k
            } else if (nsIndex == NS_INDEX_XML) {
9326
95.3k
                nsuri = ctxt->str_xml_ns;
9327
95.3k
                uriHashValue = URI_HASH_XML;
9328
222k
            } else {
9329
222k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9330
222k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9331
222k
            }
9332
9333
963k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9334
963k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9335
963k
                                    hashValue, i);
9336
963k
            if (res < 0)
9337
0
                continue;
9338
9339
            /*
9340
             * [ WFC: Unique Att Spec ]
9341
             * No attribute name may appear more than once in the same
9342
             * start-tag or empty-element tag.
9343
             * As extended by the Namespace in XML REC.
9344
             */
9345
963k
            if (res < INT_MAX) {
9346
90.2k
                if (aprefix == atts[res+1]) {
9347
84.0k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9348
84.0k
                } else {
9349
6.28k
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9350
6.28k
                             "Namespaced Attribute %s in '%s' redefined\n",
9351
6.28k
                             attname, nsuri, NULL);
9352
6.28k
                }
9353
90.2k
            }
9354
963k
        }
9355
481k
    }
9356
9357
    /*
9358
     * Default attributes
9359
     */
9360
6.60M
    if (ctxt->attsDefault != NULL) {
9361
1.61M
        xmlDefAttrsPtr defaults;
9362
9363
1.61M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9364
1.61M
  if (defaults != NULL) {
9365
2.80M
      for (i = 0; i < defaults->nbAttrs; i++) {
9366
2.23M
                xmlDefAttr *attr = &defaults->attrs[i];
9367
2.23M
                const xmlChar *nsuri;
9368
2.23M
                unsigned hashValue, uriHashValue;
9369
2.23M
                int res;
9370
9371
2.23M
          attname = attr->name.name;
9372
2.23M
    aprefix = attr->prefix.name;
9373
9374
2.23M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9375
127k
                    continue;
9376
2.10M
    if (aprefix == ctxt->str_xmlns)
9377
127k
                    continue;
9378
9379
1.97M
                if (aprefix == NULL) {
9380
1.66M
                    nsIndex = NS_INDEX_EMPTY;
9381
1.66M
                    nsuri = NULL;
9382
1.66M
                    uriHashValue = URI_HASH_EMPTY;
9383
1.97M
                } if (aprefix == ctxt->str_xml) {
9384
36.9k
                    nsIndex = NS_INDEX_XML;
9385
36.9k
                    nsuri = ctxt->str_xml_ns;
9386
36.9k
                    uriHashValue = URI_HASH_XML;
9387
1.94M
                } else if (aprefix != NULL) {
9388
282k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9389
282k
                    if ((nsIndex == INT_MAX) ||
9390
282k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9391
163k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9392
163k
                                 "Namespace prefix %s for %s on %s is not "
9393
163k
                                 "defined\n",
9394
163k
                                 aprefix, attname, localname);
9395
163k
                        nsIndex = NS_INDEX_EMPTY;
9396
163k
                        nsuri = NULL;
9397
163k
                        uriHashValue = URI_HASH_EMPTY;
9398
163k
                    } else {
9399
118k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9400
118k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9401
118k
                    }
9402
282k
                }
9403
9404
                /*
9405
                 * Check whether the attribute exists
9406
                 */
9407
1.97M
                if (maxAtts > 1) {
9408
1.72M
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9409
1.72M
                                                   uriHashValue);
9410
1.72M
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9411
1.72M
                                            hashValue, nbatts);
9412
1.72M
                    if (res < 0)
9413
0
                        continue;
9414
1.72M
                    if (res < INT_MAX) {
9415
103k
                        if (aprefix == atts[res+1])
9416
24.8k
                            continue;
9417
78.9k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9418
78.9k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9419
78.9k
                                 attname, nsuri, NULL);
9420
78.9k
                    }
9421
1.72M
                }
9422
9423
1.95M
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9424
9425
1.95M
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9426
12.8k
                    if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9427
34
                        localname = NULL;
9428
34
                        goto done;
9429
34
                    }
9430
12.8k
                    maxatts = ctxt->maxatts;
9431
12.8k
                    atts = ctxt->atts;
9432
12.8k
                }
9433
9434
1.95M
                atts[nbatts++] = attname;
9435
1.95M
                atts[nbatts++] = aprefix;
9436
1.95M
                atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
9437
1.95M
                atts[nbatts++] = attr->value.name;
9438
1.95M
                atts[nbatts++] = attr->valueEnd;
9439
1.95M
                if ((ctxt->standalone == 1) && (attr->external != 0)) {
9440
784
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9441
784
                            "standalone: attribute %s on %s defaulted "
9442
784
                            "from external subset\n",
9443
784
                            attname, localname);
9444
784
                }
9445
1.95M
                nbdef++;
9446
1.95M
      }
9447
570k
  }
9448
1.61M
    }
9449
9450
    /*
9451
     * Reconstruct attribute pointers
9452
     */
9453
10.7M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9454
        /* namespace URI */
9455
4.18M
        nsIndex = (ptrdiff_t) atts[i+2];
9456
4.18M
        if (nsIndex == INT_MAX)
9457
3.40M
            atts[i+2] = NULL;
9458
786k
        else if (nsIndex == INT_MAX - 1)
9459
365k
            atts[i+2] = ctxt->str_xml_ns;
9460
421k
        else
9461
421k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9462
9463
4.18M
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9464
1.77M
            atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3];  /* value */
9465
1.77M
            atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4];  /* valuend */
9466
1.77M
        }
9467
4.18M
    }
9468
9469
6.60M
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9470
6.60M
    if ((prefix != NULL) && (uri == NULL)) {
9471
368k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9472
368k
           "Namespace prefix %s on %s is not defined\n",
9473
368k
     prefix, localname, NULL);
9474
368k
    }
9475
6.60M
    *pref = prefix;
9476
6.60M
    *URI = uri;
9477
9478
    /*
9479
     * SAX callback
9480
     */
9481
6.60M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9482
6.60M
  (!ctxt->disableSAX)) {
9483
6.01M
  if (nbNs > 0)
9484
748k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9485
748k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9486
748k
        nbatts / 5, nbdef, atts);
9487
5.26M
  else
9488
5.26M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9489
5.26M
                          0, NULL, nbatts / 5, nbdef, atts);
9490
6.01M
    }
9491
9492
6.60M
done:
9493
    /*
9494
     * Free allocated attribute values
9495
     */
9496
6.60M
    if (attval != 0) {
9497
950k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9498
566k
      if (ctxt->attallocs[j] & 0x80000000)
9499
459k
          xmlFree((xmlChar *) atts[i+3]);
9500
384k
    }
9501
9502
6.60M
    *nbNsPtr = nbNs;
9503
6.60M
    return(localname);
9504
6.60M
}
9505
9506
/**
9507
 * xmlParseEndTag2:
9508
 * @ctxt:  an XML parser context
9509
 * @line:  line of the start tag
9510
 * @nsNr:  number of namespaces on the start tag
9511
 *
9512
 * Parse an end tag. Always consumes '</'.
9513
 *
9514
 * [42] ETag ::= '</' Name S? '>'
9515
 *
9516
 * With namespace
9517
 *
9518
 * [NS 9] ETag ::= '</' QName S? '>'
9519
 */
9520
9521
static void
9522
1.48M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9523
1.48M
    const xmlChar *name;
9524
9525
1.48M
    GROW;
9526
1.48M
    if ((RAW != '<') || (NXT(1) != '/')) {
9527
6.51k
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9528
6.51k
  return;
9529
6.51k
    }
9530
1.48M
    SKIP(2);
9531
9532
1.48M
    if (tag->prefix == NULL)
9533
1.22M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9534
261k
    else
9535
261k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9536
9537
    /*
9538
     * We should definitely be at the ending "S? '>'" part
9539
     */
9540
1.48M
    GROW;
9541
1.48M
    SKIP_BLANKS;
9542
1.48M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9543
230k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9544
230k
    } else
9545
1.25M
  NEXT1;
9546
9547
    /*
9548
     * [ WFC: Element Type Match ]
9549
     * The Name in an element's end-tag must match the element type in the
9550
     * start-tag.
9551
     *
9552
     */
9553
1.48M
    if (name != (xmlChar*)1) {
9554
358k
        if (name == NULL) name = BAD_CAST "unparsable";
9555
358k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9556
358k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9557
358k
                    ctxt->name, tag->line, name);
9558
358k
    }
9559
9560
    /*
9561
     * SAX: End of Tag
9562
     */
9563
1.48M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9564
1.48M
  (!ctxt->disableSAX))
9565
1.35M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9566
1.35M
                                tag->URI);
9567
9568
1.48M
    spacePop(ctxt);
9569
1.48M
    if (tag->nsNr != 0)
9570
141k
  xmlParserNsPop(ctxt, tag->nsNr);
9571
1.48M
}
9572
9573
/**
9574
 * xmlParseCDSect:
9575
 * @ctxt:  an XML parser context
9576
 *
9577
 * DEPRECATED: Internal function, don't use.
9578
 *
9579
 * Parse escaped pure raw content. Always consumes '<!['.
9580
 *
9581
 * [18] CDSect ::= CDStart CData CDEnd
9582
 *
9583
 * [19] CDStart ::= '<![CDATA['
9584
 *
9585
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9586
 *
9587
 * [21] CDEnd ::= ']]>'
9588
 */
9589
void
9590
965k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9591
965k
    xmlChar *buf = NULL;
9592
965k
    int len = 0;
9593
965k
    int size = XML_PARSER_BUFFER_SIZE;
9594
965k
    int r, rl;
9595
965k
    int s, sl;
9596
965k
    int cur, l;
9597
965k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9598
127k
                    XML_MAX_HUGE_LENGTH :
9599
965k
                    XML_MAX_TEXT_LENGTH;
9600
9601
965k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9602
0
        return;
9603
965k
    SKIP(3);
9604
9605
965k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9606
0
        return;
9607
965k
    SKIP(6);
9608
9609
965k
    r = xmlCurrentCharRecover(ctxt, &rl);
9610
965k
    if (!IS_CHAR(r)) {
9611
7.36k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9612
7.36k
        goto out;
9613
7.36k
    }
9614
957k
    NEXTL(rl);
9615
957k
    s = xmlCurrentCharRecover(ctxt, &sl);
9616
957k
    if (!IS_CHAR(s)) {
9617
17.0k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9618
17.0k
        goto out;
9619
17.0k
    }
9620
940k
    NEXTL(sl);
9621
940k
    cur = xmlCurrentCharRecover(ctxt, &l);
9622
940k
    buf = xmlMalloc(size);
9623
940k
    if (buf == NULL) {
9624
40
  xmlErrMemory(ctxt);
9625
40
        goto out;
9626
40
    }
9627
46.5M
    while (IS_CHAR(cur) &&
9628
46.5M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9629
45.6M
  if (len + 5 >= size) {
9630
21.3k
      xmlChar *tmp;
9631
9632
21.3k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9633
21.3k
      if (tmp == NULL) {
9634
16
    xmlErrMemory(ctxt);
9635
16
                goto out;
9636
16
      }
9637
21.3k
      buf = tmp;
9638
21.3k
      size *= 2;
9639
21.3k
  }
9640
45.6M
  COPY_BUF(buf, len, r);
9641
45.6M
        if (len > maxLength) {
9642
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9643
0
                           "CData section too big found\n");
9644
0
            goto out;
9645
0
        }
9646
45.6M
  r = s;
9647
45.6M
  rl = sl;
9648
45.6M
  s = cur;
9649
45.6M
  sl = l;
9650
45.6M
  NEXTL(l);
9651
45.6M
  cur = xmlCurrentCharRecover(ctxt, &l);
9652
45.6M
    }
9653
940k
    buf[len] = 0;
9654
940k
    if (cur != '>') {
9655
41.7k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9656
41.7k
                       "CData section not finished\n%.50s\n", buf);
9657
41.7k
        goto out;
9658
41.7k
    }
9659
898k
    NEXTL(l);
9660
9661
    /*
9662
     * OK the buffer is to be consumed as cdata.
9663
     */
9664
898k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9665
893k
  if (ctxt->sax->cdataBlock != NULL)
9666
367k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9667
526k
  else if (ctxt->sax->characters != NULL)
9668
525k
      ctxt->sax->characters(ctxt->userData, buf, len);
9669
893k
    }
9670
9671
965k
out:
9672
965k
    xmlFree(buf);
9673
965k
}
9674
9675
/**
9676
 * xmlParseContentInternal:
9677
 * @ctxt:  an XML parser context
9678
 *
9679
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9680
 * unexpected EOF to the caller.
9681
 */
9682
9683
static void
9684
210k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9685
210k
    int oldNameNr = ctxt->nameNr;
9686
210k
    int oldSpaceNr = ctxt->spaceNr;
9687
210k
    int oldNodeNr = ctxt->nodeNr;
9688
9689
210k
    GROW;
9690
69.0M
    while ((ctxt->input->cur < ctxt->input->end) &&
9691
69.0M
     (PARSER_STOPPED(ctxt) == 0)) {
9692
68.9M
  const xmlChar *cur = ctxt->input->cur;
9693
9694
  /*
9695
   * First case : a Processing Instruction.
9696
   */
9697
68.9M
  if ((*cur == '<') && (cur[1] == '?')) {
9698
207k
      xmlParsePI(ctxt);
9699
207k
  }
9700
9701
  /*
9702
   * Second case : a CDSection
9703
   */
9704
  /* 2.6.0 test was *cur not RAW */
9705
68.7M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9706
965k
      xmlParseCDSect(ctxt);
9707
965k
  }
9708
9709
  /*
9710
   * Third case :  a comment
9711
   */
9712
67.7M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9713
67.7M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9714
462k
      xmlParseComment(ctxt);
9715
462k
  }
9716
9717
  /*
9718
   * Fourth case :  a sub-element.
9719
   */
9720
67.2M
  else if (*cur == '<') {
9721
13.5M
            if (NXT(1) == '/') {
9722
2.40M
                if (ctxt->nameNr <= oldNameNr)
9723
67.8k
                    break;
9724
2.33M
          xmlParseElementEnd(ctxt);
9725
11.1M
            } else {
9726
11.1M
          xmlParseElementStart(ctxt);
9727
11.1M
            }
9728
13.5M
  }
9729
9730
  /*
9731
   * Fifth case : a reference. If if has not been resolved,
9732
   *    parsing returns it's Name, create the node
9733
   */
9734
9735
53.7M
  else if (*cur == '&') {
9736
2.59M
      xmlParseReference(ctxt);
9737
2.59M
  }
9738
9739
  /*
9740
   * Last case, text. Note that References are handled directly.
9741
   */
9742
51.1M
  else {
9743
51.1M
      xmlParseCharDataInternal(ctxt, 0);
9744
51.1M
  }
9745
9746
68.8M
  SHRINK;
9747
68.8M
  GROW;
9748
68.8M
    }
9749
9750
210k
    if ((ctxt->nameNr > oldNameNr) &&
9751
210k
        (ctxt->input->cur >= ctxt->input->end) &&
9752
210k
        (ctxt->wellFormed)) {
9753
4.71k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9754
4.71k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9755
4.71k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9756
4.71k
                "Premature end of data in tag %s line %d\n",
9757
4.71k
                name, line, NULL);
9758
4.71k
    }
9759
9760
    /*
9761
     * Clean up in error case
9762
     */
9763
9764
1.90M
    while (ctxt->nodeNr > oldNodeNr)
9765
1.69M
        nodePop(ctxt);
9766
9767
2.12M
    while (ctxt->nameNr > oldNameNr) {
9768
1.91M
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9769
9770
1.91M
        if (tag->nsNr != 0)
9771
323k
            xmlParserNsPop(ctxt, tag->nsNr);
9772
9773
1.91M
        namePop(ctxt);
9774
1.91M
    }
9775
9776
2.12M
    while (ctxt->spaceNr > oldSpaceNr)
9777
1.91M
        spacePop(ctxt);
9778
210k
}
9779
9780
/**
9781
 * xmlParseContent:
9782
 * @ctxt:  an XML parser context
9783
 *
9784
 * Parse XML element content. This is useful if you're only interested
9785
 * in custom SAX callbacks. If you want a node list, use
9786
 * xmlCtxtParseContent.
9787
 */
9788
void
9789
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9790
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9791
0
        return;
9792
9793
0
    xmlCtxtInitializeLate(ctxt);
9794
9795
0
    xmlParseContentInternal(ctxt);
9796
9797
0
    if (ctxt->input->cur < ctxt->input->end)
9798
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9799
0
}
9800
9801
/**
9802
 * xmlParseElement:
9803
 * @ctxt:  an XML parser context
9804
 *
9805
 * DEPRECATED: Internal function, don't use.
9806
 *
9807
 * parse an XML element
9808
 *
9809
 * [39] element ::= EmptyElemTag | STag content ETag
9810
 *
9811
 * [ WFC: Element Type Match ]
9812
 * The Name in an element's end-tag must match the element type in the
9813
 * start-tag.
9814
 *
9815
 */
9816
9817
void
9818
213k
xmlParseElement(xmlParserCtxtPtr ctxt) {
9819
213k
    if (xmlParseElementStart(ctxt) != 0)
9820
63.4k
        return;
9821
9822
150k
    xmlParseContentInternal(ctxt);
9823
9824
150k
    if (ctxt->input->cur >= ctxt->input->end) {
9825
72.7k
        if (ctxt->wellFormed) {
9826
5.24k
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9827
5.24k
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9828
5.24k
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9829
5.24k
                    "Premature end of data in tag %s line %d\n",
9830
5.24k
                    name, line, NULL);
9831
5.24k
        }
9832
72.7k
        return;
9833
72.7k
    }
9834
9835
77.6k
    xmlParseElementEnd(ctxt);
9836
77.6k
}
9837
9838
/**
9839
 * xmlParseElementStart:
9840
 * @ctxt:  an XML parser context
9841
 *
9842
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9843
 * opening tag was parsed, 1 if an empty element was parsed.
9844
 *
9845
 * Always consumes '<'.
9846
 */
9847
static int
9848
11.3M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9849
11.3M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9850
11.3M
    const xmlChar *name;
9851
11.3M
    const xmlChar *prefix = NULL;
9852
11.3M
    const xmlChar *URI = NULL;
9853
11.3M
    xmlParserNodeInfo node_info;
9854
11.3M
    int line;
9855
11.3M
    xmlNodePtr cur;
9856
11.3M
    int nbNs = 0;
9857
9858
11.3M
    if (ctxt->nameNr > maxDepth) {
9859
217
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9860
217
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9861
217
                ctxt->nameNr);
9862
217
  xmlHaltParser(ctxt);
9863
217
  return(-1);
9864
217
    }
9865
9866
    /* Capture start position */
9867
11.3M
    if (ctxt->record_info) {
9868
0
        node_info.begin_pos = ctxt->input->consumed +
9869
0
                          (CUR_PTR - ctxt->input->base);
9870
0
  node_info.begin_line = ctxt->input->line;
9871
0
    }
9872
9873
11.3M
    if (ctxt->spaceNr == 0)
9874
83.5k
  spacePush(ctxt, -1);
9875
11.2M
    else if (*ctxt->space == -2)
9876
2.85M
  spacePush(ctxt, -1);
9877
8.44M
    else
9878
8.44M
  spacePush(ctxt, *ctxt->space);
9879
9880
11.3M
    line = ctxt->input->line;
9881
11.3M
#ifdef LIBXML_SAX1_ENABLED
9882
11.3M
    if (ctxt->sax2)
9883
7.58M
#endif /* LIBXML_SAX1_ENABLED */
9884
7.58M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9885
3.79M
#ifdef LIBXML_SAX1_ENABLED
9886
3.79M
    else
9887
3.79M
  name = xmlParseStartTag(ctxt);
9888
11.3M
#endif /* LIBXML_SAX1_ENABLED */
9889
11.3M
    if (name == NULL) {
9890
2.18M
  spacePop(ctxt);
9891
2.18M
        return(-1);
9892
2.18M
    }
9893
9.19M
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9894
9.19M
    cur = ctxt->node;
9895
9896
9.19M
#ifdef LIBXML_VALID_ENABLED
9897
    /*
9898
     * [ VC: Root Element Type ]
9899
     * The Name in the document type declaration must match the element
9900
     * type of the root element.
9901
     */
9902
9.19M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9903
9.19M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9904
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9905
9.19M
#endif /* LIBXML_VALID_ENABLED */
9906
9907
    /*
9908
     * Check for an Empty Element.
9909
     */
9910
9.19M
    if ((RAW == '/') && (NXT(1) == '>')) {
9911
1.36M
        SKIP(2);
9912
1.36M
  if (ctxt->sax2) {
9913
799k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9914
799k
    (!ctxt->disableSAX))
9915
757k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9916
799k
#ifdef LIBXML_SAX1_ENABLED
9917
799k
  } else {
9918
564k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9919
564k
    (!ctxt->disableSAX))
9920
555k
    ctxt->sax->endElement(ctxt->userData, name);
9921
564k
#endif /* LIBXML_SAX1_ENABLED */
9922
564k
  }
9923
1.36M
  namePop(ctxt);
9924
1.36M
  spacePop(ctxt);
9925
1.36M
  if (nbNs > 0)
9926
72.2k
      xmlParserNsPop(ctxt, nbNs);
9927
1.36M
  if (cur != NULL && ctxt->record_info) {
9928
0
            node_info.node = cur;
9929
0
            node_info.end_pos = ctxt->input->consumed +
9930
0
                                (CUR_PTR - ctxt->input->base);
9931
0
            node_info.end_line = ctxt->input->line;
9932
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9933
0
  }
9934
1.36M
  return(1);
9935
1.36M
    }
9936
7.82M
    if (RAW == '>') {
9937
4.39M
        NEXT1;
9938
4.39M
        if (cur != NULL && ctxt->record_info) {
9939
0
            node_info.node = cur;
9940
0
            node_info.end_pos = 0;
9941
0
            node_info.end_line = 0;
9942
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9943
0
        }
9944
4.39M
    } else {
9945
3.43M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9946
3.43M
         "Couldn't find end of Start Tag %s line %d\n",
9947
3.43M
                    name, line, NULL);
9948
9949
  /*
9950
   * end of parsing of this node.
9951
   */
9952
3.43M
  nodePop(ctxt);
9953
3.43M
  namePop(ctxt);
9954
3.43M
  spacePop(ctxt);
9955
3.43M
  if (nbNs > 0)
9956
245k
      xmlParserNsPop(ctxt, nbNs);
9957
3.43M
  return(-1);
9958
3.43M
    }
9959
9960
4.39M
    return(0);
9961
7.82M
}
9962
9963
/**
9964
 * xmlParseElementEnd:
9965
 * @ctxt:  an XML parser context
9966
 *
9967
 * Parse the end of an XML element. Always consumes '</'.
9968
 */
9969
static void
9970
2.41M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9971
2.41M
    xmlNodePtr cur = ctxt->node;
9972
9973
2.41M
    if (ctxt->nameNr <= 0) {
9974
139
        if ((RAW == '<') && (NXT(1) == '/'))
9975
23
            SKIP(2);
9976
139
        return;
9977
139
    }
9978
9979
    /*
9980
     * parse the end of tag: '</' should be here.
9981
     */
9982
2.41M
    if (ctxt->sax2) {
9983
1.29M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9984
1.29M
  namePop(ctxt);
9985
1.29M
    }
9986
1.11M
#ifdef LIBXML_SAX1_ENABLED
9987
1.11M
    else
9988
1.11M
  xmlParseEndTag1(ctxt, 0);
9989
2.41M
#endif /* LIBXML_SAX1_ENABLED */
9990
9991
    /*
9992
     * Capture end position
9993
     */
9994
2.41M
    if (cur != NULL && ctxt->record_info) {
9995
0
        xmlParserNodeInfoPtr node_info;
9996
9997
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9998
0
        if (node_info != NULL) {
9999
0
            node_info->end_pos = ctxt->input->consumed +
10000
0
                                 (CUR_PTR - ctxt->input->base);
10001
0
            node_info->end_line = ctxt->input->line;
10002
0
        }
10003
0
    }
10004
2.41M
}
10005
10006
/**
10007
 * xmlParseVersionNum:
10008
 * @ctxt:  an XML parser context
10009
 *
10010
 * DEPRECATED: Internal function, don't use.
10011
 *
10012
 * parse the XML version value.
10013
 *
10014
 * [26] VersionNum ::= '1.' [0-9]+
10015
 *
10016
 * In practice allow [0-9].[0-9]+ at that level
10017
 *
10018
 * Returns the string giving the XML version number, or NULL
10019
 */
10020
xmlChar *
10021
200k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10022
200k
    xmlChar *buf = NULL;
10023
200k
    int len = 0;
10024
200k
    int size = 10;
10025
200k
    xmlChar cur;
10026
10027
200k
    buf = xmlMalloc(size);
10028
200k
    if (buf == NULL) {
10029
185
  xmlErrMemory(ctxt);
10030
185
  return(NULL);
10031
185
    }
10032
200k
    cur = CUR;
10033
200k
    if (!((cur >= '0') && (cur <= '9'))) {
10034
13.3k
  xmlFree(buf);
10035
13.3k
  return(NULL);
10036
13.3k
    }
10037
187k
    buf[len++] = cur;
10038
187k
    NEXT;
10039
187k
    cur=CUR;
10040
187k
    if (cur != '.') {
10041
5.71k
  xmlFree(buf);
10042
5.71k
  return(NULL);
10043
5.71k
    }
10044
181k
    buf[len++] = cur;
10045
181k
    NEXT;
10046
181k
    cur=CUR;
10047
476k
    while ((cur >= '0') && (cur <= '9')) {
10048
294k
  if (len + 1 >= size) {
10049
5.40k
      xmlChar *tmp;
10050
10051
5.40k
      size *= 2;
10052
5.40k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10053
5.40k
      if (tmp == NULL) {
10054
16
          xmlFree(buf);
10055
16
    xmlErrMemory(ctxt);
10056
16
    return(NULL);
10057
16
      }
10058
5.39k
      buf = tmp;
10059
5.39k
  }
10060
294k
  buf[len++] = cur;
10061
294k
  NEXT;
10062
294k
  cur=CUR;
10063
294k
    }
10064
181k
    buf[len] = 0;
10065
181k
    return(buf);
10066
181k
}
10067
10068
/**
10069
 * xmlParseVersionInfo:
10070
 * @ctxt:  an XML parser context
10071
 *
10072
 * DEPRECATED: Internal function, don't use.
10073
 *
10074
 * parse the XML version.
10075
 *
10076
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10077
 *
10078
 * [25] Eq ::= S? '=' S?
10079
 *
10080
 * Returns the version string, e.g. "1.0"
10081
 */
10082
10083
xmlChar *
10084
279k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10085
279k
    xmlChar *version = NULL;
10086
10087
279k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10088
209k
  SKIP(7);
10089
209k
  SKIP_BLANKS;
10090
209k
  if (RAW != '=') {
10091
4.91k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10092
4.91k
      return(NULL);
10093
4.91k
        }
10094
204k
  NEXT;
10095
204k
  SKIP_BLANKS;
10096
204k
  if (RAW == '"') {
10097
166k
      NEXT;
10098
166k
      version = xmlParseVersionNum(ctxt);
10099
166k
      if (RAW != '"') {
10100
11.0k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10101
11.0k
      } else
10102
155k
          NEXT;
10103
166k
  } else if (RAW == '\''){
10104
34.1k
      NEXT;
10105
34.1k
      version = xmlParseVersionNum(ctxt);
10106
34.1k
      if (RAW != '\'') {
10107
11.0k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10108
11.0k
      } else
10109
23.1k
          NEXT;
10110
34.1k
  } else {
10111
3.97k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10112
3.97k
  }
10113
204k
    }
10114
274k
    return(version);
10115
279k
}
10116
10117
/**
10118
 * xmlParseEncName:
10119
 * @ctxt:  an XML parser context
10120
 *
10121
 * DEPRECATED: Internal function, don't use.
10122
 *
10123
 * parse the XML encoding name
10124
 *
10125
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10126
 *
10127
 * Returns the encoding name value or NULL
10128
 */
10129
xmlChar *
10130
69.6k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10131
69.6k
    xmlChar *buf = NULL;
10132
69.6k
    int len = 0;
10133
69.6k
    int size = 10;
10134
69.6k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10135
14.3k
                    XML_MAX_TEXT_LENGTH :
10136
69.6k
                    XML_MAX_NAME_LENGTH;
10137
69.6k
    xmlChar cur;
10138
10139
69.6k
    cur = CUR;
10140
69.6k
    if (((cur >= 'a') && (cur <= 'z')) ||
10141
69.6k
        ((cur >= 'A') && (cur <= 'Z'))) {
10142
48.0k
  buf = xmlMalloc(size);
10143
48.0k
  if (buf == NULL) {
10144
80
      xmlErrMemory(ctxt);
10145
80
      return(NULL);
10146
80
  }
10147
10148
48.0k
  buf[len++] = cur;
10149
48.0k
  NEXT;
10150
48.0k
  cur = CUR;
10151
6.79M
  while (((cur >= 'a') && (cur <= 'z')) ||
10152
6.79M
         ((cur >= 'A') && (cur <= 'Z')) ||
10153
6.79M
         ((cur >= '0') && (cur <= '9')) ||
10154
6.79M
         (cur == '.') || (cur == '_') ||
10155
6.79M
         (cur == '-')) {
10156
6.74M
      if (len + 1 >= size) {
10157
32.0k
          xmlChar *tmp;
10158
10159
32.0k
    size *= 2;
10160
32.0k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10161
32.0k
    if (tmp == NULL) {
10162
41
        xmlErrMemory(ctxt);
10163
41
        xmlFree(buf);
10164
41
        return(NULL);
10165
41
    }
10166
32.0k
    buf = tmp;
10167
32.0k
      }
10168
6.74M
      buf[len++] = cur;
10169
6.74M
            if (len > maxLength) {
10170
83
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10171
83
                xmlFree(buf);
10172
83
                return(NULL);
10173
83
            }
10174
6.74M
      NEXT;
10175
6.74M
      cur = CUR;
10176
6.74M
        }
10177
47.8k
  buf[len] = 0;
10178
47.8k
    } else {
10179
21.5k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10180
21.5k
    }
10181
69.4k
    return(buf);
10182
69.6k
}
10183
10184
/**
10185
 * xmlParseEncodingDecl:
10186
 * @ctxt:  an XML parser context
10187
 *
10188
 * DEPRECATED: Internal function, don't use.
10189
 *
10190
 * parse the XML encoding declaration
10191
 *
10192
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10193
 *
10194
 * this setups the conversion filters.
10195
 *
10196
 * Returns the encoding value or NULL
10197
 */
10198
10199
const xmlChar *
10200
266k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10201
266k
    xmlChar *encoding = NULL;
10202
10203
266k
    SKIP_BLANKS;
10204
266k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10205
190k
        return(NULL);
10206
10207
75.6k
    SKIP(8);
10208
75.6k
    SKIP_BLANKS;
10209
75.6k
    if (RAW != '=') {
10210
3.04k
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10211
3.04k
        return(NULL);
10212
3.04k
    }
10213
72.5k
    NEXT;
10214
72.5k
    SKIP_BLANKS;
10215
72.5k
    if (RAW == '"') {
10216
46.1k
        NEXT;
10217
46.1k
        encoding = xmlParseEncName(ctxt);
10218
46.1k
        if (RAW != '"') {
10219
6.91k
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10220
6.91k
            xmlFree((xmlChar *) encoding);
10221
6.91k
            return(NULL);
10222
6.91k
        } else
10223
39.2k
            NEXT;
10224
46.1k
    } else if (RAW == '\''){
10225
23.5k
        NEXT;
10226
23.5k
        encoding = xmlParseEncName(ctxt);
10227
23.5k
        if (RAW != '\'') {
10228
9.11k
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10229
9.11k
            xmlFree((xmlChar *) encoding);
10230
9.11k
            return(NULL);
10231
9.11k
        } else
10232
14.4k
            NEXT;
10233
23.5k
    } else {
10234
2.86k
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10235
2.86k
    }
10236
10237
56.5k
    if (encoding == NULL)
10238
15.4k
        return(NULL);
10239
10240
41.0k
    xmlSetDeclaredEncoding(ctxt, encoding);
10241
10242
41.0k
    return(ctxt->encoding);
10243
56.5k
}
10244
10245
/**
10246
 * xmlParseSDDecl:
10247
 * @ctxt:  an XML parser context
10248
 *
10249
 * DEPRECATED: Internal function, don't use.
10250
 *
10251
 * parse the XML standalone declaration
10252
 *
10253
 * [32] SDDecl ::= S 'standalone' Eq
10254
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10255
 *
10256
 * [ VC: Standalone Document Declaration ]
10257
 * TODO The standalone document declaration must have the value "no"
10258
 * if any external markup declarations contain declarations of:
10259
 *  - attributes with default values, if elements to which these
10260
 *    attributes apply appear in the document without specifications
10261
 *    of values for these attributes, or
10262
 *  - entities (other than amp, lt, gt, apos, quot), if references
10263
 *    to those entities appear in the document, or
10264
 *  - attributes with values subject to normalization, where the
10265
 *    attribute appears in the document with a value which will change
10266
 *    as a result of normalization, or
10267
 *  - element types with element content, if white space occurs directly
10268
 *    within any instance of those types.
10269
 *
10270
 * Returns:
10271
 *   1 if standalone="yes"
10272
 *   0 if standalone="no"
10273
 *  -2 if standalone attribute is missing or invalid
10274
 *    (A standalone value of -2 means that the XML declaration was found,
10275
 *     but no value was specified for the standalone attribute).
10276
 */
10277
10278
int
10279
156k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10280
156k
    int standalone = -2;
10281
10282
156k
    SKIP_BLANKS;
10283
156k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10284
113k
  SKIP(10);
10285
113k
        SKIP_BLANKS;
10286
113k
  if (RAW != '=') {
10287
297
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10288
297
      return(standalone);
10289
297
        }
10290
113k
  NEXT;
10291
113k
  SKIP_BLANKS;
10292
113k
        if (RAW == '\''){
10293
2.35k
      NEXT;
10294
2.35k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10295
558
          standalone = 0;
10296
558
                SKIP(2);
10297
1.79k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10298
1.79k
                 (NXT(2) == 's')) {
10299
583
          standalone = 1;
10300
583
    SKIP(3);
10301
1.21k
            } else {
10302
1.21k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10303
1.21k
      }
10304
2.35k
      if (RAW != '\'') {
10305
1.53k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10306
1.53k
      } else
10307
826
          NEXT;
10308
111k
  } else if (RAW == '"'){
10309
110k
      NEXT;
10310
110k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10311
912
          standalone = 0;
10312
912
    SKIP(2);
10313
109k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10314
109k
                 (NXT(2) == 's')) {
10315
108k
          standalone = 1;
10316
108k
                SKIP(3);
10317
108k
            } else {
10318
1.10k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10319
1.10k
      }
10320
110k
      if (RAW != '"') {
10321
1.74k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10322
1.74k
      } else
10323
109k
          NEXT;
10324
110k
  } else {
10325
307
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10326
307
        }
10327
113k
    }
10328
156k
    return(standalone);
10329
156k
}
10330
10331
/**
10332
 * xmlParseXMLDecl:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse an XML declaration header
10338
 *
10339
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10340
 */
10341
10342
void
10343
184k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10344
184k
    xmlChar *version;
10345
10346
    /*
10347
     * This value for standalone indicates that the document has an
10348
     * XML declaration but it does not have a standalone attribute.
10349
     * It will be overwritten later if a standalone attribute is found.
10350
     */
10351
10352
184k
    ctxt->standalone = -2;
10353
10354
    /*
10355
     * We know that '<?xml' is here.
10356
     */
10357
184k
    SKIP(5);
10358
10359
184k
    if (!IS_BLANK_CH(RAW)) {
10360
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10361
0
                 "Blank needed after '<?xml'\n");
10362
0
    }
10363
184k
    SKIP_BLANKS;
10364
10365
    /*
10366
     * We must have the VersionInfo here.
10367
     */
10368
184k
    version = xmlParseVersionInfo(ctxt);
10369
184k
    if (version == NULL) {
10370
28.4k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10371
156k
    } else {
10372
156k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10373
      /*
10374
       * Changed here for XML-1.0 5th edition
10375
       */
10376
12.6k
      if (ctxt->options & XML_PARSE_OLD10) {
10377
2.56k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10378
2.56k
                "Unsupported version '%s'\n",
10379
2.56k
                version);
10380
10.1k
      } else {
10381
10.1k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10382
5.11k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10383
5.11k
                      "Unsupported version '%s'\n",
10384
5.11k
          version, NULL);
10385
5.11k
    } else {
10386
5.01k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10387
5.01k
              "Unsupported version '%s'\n",
10388
5.01k
              version);
10389
5.01k
    }
10390
10.1k
      }
10391
12.6k
  }
10392
156k
  if (ctxt->version != NULL)
10393
0
      xmlFree((void *) ctxt->version);
10394
156k
  ctxt->version = version;
10395
156k
    }
10396
10397
    /*
10398
     * We may have the encoding declaration
10399
     */
10400
184k
    if (!IS_BLANK_CH(RAW)) {
10401
38.7k
        if ((RAW == '?') && (NXT(1) == '>')) {
10402
13.3k
      SKIP(2);
10403
13.3k
      return;
10404
13.3k
  }
10405
25.4k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10406
25.4k
    }
10407
171k
    xmlParseEncodingDecl(ctxt);
10408
10409
    /*
10410
     * We may have the standalone status.
10411
     */
10412
171k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10413
18.7k
        if ((RAW == '?') && (NXT(1) == '>')) {
10414
14.8k
      SKIP(2);
10415
14.8k
      return;
10416
14.8k
  }
10417
3.86k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10418
3.86k
    }
10419
10420
    /*
10421
     * We can grow the input buffer freely at that point
10422
     */
10423
156k
    GROW;
10424
10425
156k
    SKIP_BLANKS;
10426
156k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10427
10428
156k
    SKIP_BLANKS;
10429
156k
    if ((RAW == '?') && (NXT(1) == '>')) {
10430
111k
        SKIP(2);
10431
111k
    } else if (RAW == '>') {
10432
        /* Deprecated old WD ... */
10433
1.06k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10434
1.06k
  NEXT;
10435
44.2k
    } else {
10436
44.2k
        int c;
10437
10438
44.2k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10439
1.39M
        while ((PARSER_STOPPED(ctxt) == 0) &&
10440
1.39M
               ((c = CUR) != 0)) {
10441
1.37M
            NEXT;
10442
1.37M
            if (c == '>')
10443
26.8k
                break;
10444
1.37M
        }
10445
44.2k
    }
10446
156k
}
10447
10448
/**
10449
 * xmlCtxtGetVersion:
10450
 * ctxt:  parser context
10451
 *
10452
 * Available since 2.14.0.
10453
 *
10454
 * Returns the version from the XML declaration.
10455
 */
10456
const xmlChar *
10457
0
xmlCtxtGetVersion(xmlParserCtxtPtr ctxt) {
10458
0
    if (ctxt == NULL)
10459
0
        return(NULL);
10460
10461
0
    return(ctxt->version);
10462
0
}
10463
10464
/**
10465
 * xmlCtxtGetStandalone:
10466
 * ctxt:  parser context
10467
 *
10468
 * Available since 2.14.0.
10469
 *
10470
 * Returns the value from the standalone document declaration.
10471
 */
10472
int
10473
0
xmlCtxtGetStandalone(xmlParserCtxtPtr ctxt) {
10474
0
    if (ctxt == NULL)
10475
0
        return(0);
10476
10477
0
    return(ctxt->standalone);
10478
0
}
10479
10480
/**
10481
 * xmlParseMisc:
10482
 * @ctxt:  an XML parser context
10483
 *
10484
 * DEPRECATED: Internal function, don't use.
10485
 *
10486
 * parse an XML Misc* optional field.
10487
 *
10488
 * [27] Misc ::= Comment | PI |  S
10489
 */
10490
10491
void
10492
828k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10493
961k
    while (PARSER_STOPPED(ctxt) == 0) {
10494
918k
        SKIP_BLANKS;
10495
918k
        GROW;
10496
918k
        if ((RAW == '<') && (NXT(1) == '?')) {
10497
78.8k
      xmlParsePI(ctxt);
10498
839k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10499
54.1k
      xmlParseComment(ctxt);
10500
785k
        } else {
10501
785k
            break;
10502
785k
        }
10503
918k
    }
10504
828k
}
10505
10506
static void
10507
436k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10508
436k
    xmlDocPtr doc;
10509
10510
    /*
10511
     * SAX: end of the document processing.
10512
     */
10513
436k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10514
435k
        ctxt->sax->endDocument(ctxt->userData);
10515
10516
436k
    doc = ctxt->myDoc;
10517
436k
    if (doc != NULL) {
10518
405k
        if (ctxt->wellFormed) {
10519
61.3k
            doc->properties |= XML_DOC_WELLFORMED;
10520
61.3k
            if (ctxt->valid)
10521
30.0k
                doc->properties |= XML_DOC_DTDVALID;
10522
61.3k
            if (ctxt->nsWellFormed)
10523
46.3k
                doc->properties |= XML_DOC_NSVALID;
10524
61.3k
        }
10525
10526
405k
        if (ctxt->options & XML_PARSE_OLD10)
10527
132k
            doc->properties |= XML_DOC_OLD10;
10528
10529
        /*
10530
         * Remove locally kept entity definitions if the tree was not built
10531
         */
10532
405k
  if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10533
3.88k
            xmlFreeDoc(doc);
10534
3.88k
            ctxt->myDoc = NULL;
10535
3.88k
        }
10536
405k
    }
10537
436k
}
10538
10539
/**
10540
 * xmlParseDocument:
10541
 * @ctxt:  an XML parser context
10542
 *
10543
 * Parse an XML document and invoke the SAX handlers. This is useful
10544
 * if you're only interested in custom SAX callbacks. If you want a
10545
 * document tree, use xmlCtxtParseDocument.
10546
 *
10547
 * Returns 0, -1 in case of error.
10548
 */
10549
10550
int
10551
404k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10552
404k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10553
0
        return(-1);
10554
10555
404k
    GROW;
10556
10557
    /*
10558
     * SAX: detecting the level.
10559
     */
10560
404k
    xmlCtxtInitializeLate(ctxt);
10561
10562
404k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10563
404k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10564
404k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10565
404k
    }
10566
10567
404k
    xmlDetectEncoding(ctxt);
10568
10569
404k
    if (CUR == 0) {
10570
10.2k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10571
10.2k
  return(-1);
10572
10.2k
    }
10573
10574
394k
    GROW;
10575
394k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10576
10577
  /*
10578
   * Note that we will switch encoding on the fly.
10579
   */
10580
172k
  xmlParseXMLDecl(ctxt);
10581
172k
  SKIP_BLANKS;
10582
221k
    } else {
10583
221k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10584
221k
        if (ctxt->version == NULL) {
10585
64
            xmlErrMemory(ctxt);
10586
64
            return(-1);
10587
64
        }
10588
221k
    }
10589
394k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10590
360k
        ctxt->sax->startDocument(ctxt->userData);
10591
394k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10592
394k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10593
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10594
0
    }
10595
10596
    /*
10597
     * The Misc part of the Prolog
10598
     */
10599
394k
    xmlParseMisc(ctxt);
10600
10601
    /*
10602
     * Then possibly doc type declaration(s) and more Misc
10603
     * (doctypedecl Misc*)?
10604
     */
10605
394k
    GROW;
10606
394k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10607
10608
220k
  ctxt->inSubset = 1;
10609
220k
  xmlParseDocTypeDecl(ctxt);
10610
220k
  if (RAW == '[') {
10611
170k
      xmlParseInternalSubset(ctxt);
10612
170k
  }
10613
10614
  /*
10615
   * Create and update the external subset.
10616
   */
10617
220k
  ctxt->inSubset = 2;
10618
220k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10619
220k
      (!ctxt->disableSAX))
10620
158k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10621
158k
                                ctxt->extSubSystem, ctxt->extSubURI);
10622
220k
  ctxt->inSubset = 0;
10623
10624
220k
        xmlCleanSpecialAttr(ctxt);
10625
10626
220k
  xmlParseMisc(ctxt);
10627
220k
    }
10628
10629
    /*
10630
     * Time to start parsing the tree itself
10631
     */
10632
394k
    GROW;
10633
394k
    if (RAW != '<') {
10634
180k
        if (ctxt->wellFormed)
10635
35.1k
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10636
35.1k
                           "Start tag expected, '<' not found\n");
10637
213k
    } else {
10638
213k
  xmlParseElement(ctxt);
10639
10640
  /*
10641
   * The Misc part at the end
10642
   */
10643
213k
  xmlParseMisc(ctxt);
10644
10645
213k
        if (ctxt->input->cur < ctxt->input->end) {
10646
54.8k
            if (ctxt->wellFormed)
10647
1.90k
          xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10648
158k
        } else if ((ctxt->input->buf != NULL) &&
10649
158k
                   (ctxt->input->buf->encoder != NULL) &&
10650
158k
                   (ctxt->input->buf->error == 0) &&
10651
158k
                   (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
10652
810
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
10653
810
                           "Truncated multi-byte sequence at EOF\n");
10654
810
        }
10655
213k
    }
10656
10657
394k
    ctxt->instate = XML_PARSER_EOF;
10658
394k
    xmlFinishDocument(ctxt);
10659
10660
394k
    if (! ctxt->wellFormed) {
10661
335k
  ctxt->valid = 0;
10662
335k
  return(-1);
10663
335k
    }
10664
10665
58.8k
    return(0);
10666
394k
}
10667
10668
/**
10669
 * xmlParseExtParsedEnt:
10670
 * @ctxt:  an XML parser context
10671
 *
10672
 * parse a general parsed entity
10673
 * An external general parsed entity is well-formed if it matches the
10674
 * production labeled extParsedEnt.
10675
 *
10676
 * [78] extParsedEnt ::= TextDecl? content
10677
 *
10678
 * Returns 0, -1 in case of error. the parser context is augmented
10679
 *                as a result of the parsing.
10680
 */
10681
10682
int
10683
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10684
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10685
0
        return(-1);
10686
10687
0
    xmlCtxtInitializeLate(ctxt);
10688
10689
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10690
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10691
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10692
0
    }
10693
10694
0
    xmlDetectEncoding(ctxt);
10695
10696
0
    if (CUR == 0) {
10697
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10698
0
    }
10699
10700
    /*
10701
     * Check for the XMLDecl in the Prolog.
10702
     */
10703
0
    GROW;
10704
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10705
10706
  /*
10707
   * Note that we will switch encoding on the fly.
10708
   */
10709
0
  xmlParseXMLDecl(ctxt);
10710
0
  SKIP_BLANKS;
10711
0
    } else {
10712
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10713
0
    }
10714
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10715
0
        ctxt->sax->startDocument(ctxt->userData);
10716
10717
    /*
10718
     * Doing validity checking on chunk doesn't make sense
10719
     */
10720
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10721
0
    ctxt->validate = 0;
10722
0
    ctxt->depth = 0;
10723
10724
0
    xmlParseContentInternal(ctxt);
10725
10726
0
    if (ctxt->input->cur < ctxt->input->end)
10727
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10728
10729
    /*
10730
     * SAX: end of the document processing.
10731
     */
10732
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10733
0
        ctxt->sax->endDocument(ctxt->userData);
10734
10735
0
    if (! ctxt->wellFormed) return(-1);
10736
0
    return(0);
10737
0
}
10738
10739
#ifdef LIBXML_PUSH_ENABLED
10740
/************************************************************************
10741
 *                  *
10742
 *    Progressive parsing interfaces        *
10743
 *                  *
10744
 ************************************************************************/
10745
10746
/**
10747
 * xmlParseLookupChar:
10748
 * @ctxt:  an XML parser context
10749
 * @c:  character
10750
 *
10751
 * Check whether the input buffer contains a character.
10752
 */
10753
static int
10754
416k
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10755
416k
    const xmlChar *cur;
10756
10757
416k
    if (ctxt->checkIndex == 0) {
10758
307k
        cur = ctxt->input->cur + 1;
10759
307k
    } else {
10760
108k
        cur = ctxt->input->cur + ctxt->checkIndex;
10761
108k
    }
10762
10763
416k
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10764
111k
        size_t index = ctxt->input->end - ctxt->input->cur;
10765
10766
111k
        if (index > LONG_MAX) {
10767
0
            ctxt->checkIndex = 0;
10768
0
            return(1);
10769
0
        }
10770
111k
        ctxt->checkIndex = index;
10771
111k
        return(0);
10772
305k
    } else {
10773
305k
        ctxt->checkIndex = 0;
10774
305k
        return(1);
10775
305k
    }
10776
416k
}
10777
10778
/**
10779
 * xmlParseLookupString:
10780
 * @ctxt:  an XML parser context
10781
 * @startDelta: delta to apply at the start
10782
 * @str:  string
10783
 * @strLen:  length of string
10784
 *
10785
 * Check whether the input buffer contains a string.
10786
 */
10787
static const xmlChar *
10788
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10789
1.05M
                     const char *str, size_t strLen) {
10790
1.05M
    const xmlChar *cur, *term;
10791
10792
1.05M
    if (ctxt->checkIndex == 0) {
10793
549k
        cur = ctxt->input->cur + startDelta;
10794
549k
    } else {
10795
506k
        cur = ctxt->input->cur + ctxt->checkIndex;
10796
506k
    }
10797
10798
1.05M
    term = BAD_CAST strstr((const char *) cur, str);
10799
1.05M
    if (term == NULL) {
10800
777k
        const xmlChar *end = ctxt->input->end;
10801
777k
        size_t index;
10802
10803
        /* Rescan (strLen - 1) characters. */
10804
777k
        if ((size_t) (end - cur) < strLen)
10805
4.06k
            end = cur;
10806
773k
        else
10807
773k
            end -= strLen - 1;
10808
777k
        index = end - ctxt->input->cur;
10809
777k
        if (index > LONG_MAX) {
10810
0
            ctxt->checkIndex = 0;
10811
0
            return(ctxt->input->end - strLen);
10812
0
        }
10813
777k
        ctxt->checkIndex = index;
10814
777k
    } else {
10815
278k
        ctxt->checkIndex = 0;
10816
278k
    }
10817
10818
1.05M
    return(term);
10819
1.05M
}
10820
10821
/**
10822
 * xmlParseLookupCharData:
10823
 * @ctxt:  an XML parser context
10824
 *
10825
 * Check whether the input buffer contains terminated char data.
10826
 */
10827
static int
10828
424k
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10829
424k
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10830
424k
    const xmlChar *end = ctxt->input->end;
10831
424k
    size_t index;
10832
10833
10.0M
    while (cur < end) {
10834
10.0M
        if ((*cur == '<') || (*cur == '&')) {
10835
395k
            ctxt->checkIndex = 0;
10836
395k
            return(1);
10837
395k
        }
10838
9.65M
        cur++;
10839
9.65M
    }
10840
10841
28.2k
    index = cur - ctxt->input->cur;
10842
28.2k
    if (index > LONG_MAX) {
10843
0
        ctxt->checkIndex = 0;
10844
0
        return(1);
10845
0
    }
10846
28.2k
    ctxt->checkIndex = index;
10847
28.2k
    return(0);
10848
28.2k
}
10849
10850
/**
10851
 * xmlParseLookupGt:
10852
 * @ctxt:  an XML parser context
10853
 *
10854
 * Check whether there's enough data in the input buffer to finish parsing
10855
 * a start tag. This has to take quotes into account.
10856
 */
10857
static int
10858
2.13M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10859
2.13M
    const xmlChar *cur;
10860
2.13M
    const xmlChar *end = ctxt->input->end;
10861
2.13M
    int state = ctxt->endCheckState;
10862
2.13M
    size_t index;
10863
10864
2.13M
    if (ctxt->checkIndex == 0)
10865
741k
        cur = ctxt->input->cur + 1;
10866
1.39M
    else
10867
1.39M
        cur = ctxt->input->cur + ctxt->checkIndex;
10868
10869
391M
    while (cur < end) {
10870
390M
        if (state) {
10871
334M
            if (*cur == state)
10872
982k
                state = 0;
10873
334M
        } else if (*cur == '\'' || *cur == '"') {
10874
989k
            state = *cur;
10875
54.7M
        } else if (*cur == '>') {
10876
724k
            ctxt->checkIndex = 0;
10877
724k
            ctxt->endCheckState = 0;
10878
724k
            return(1);
10879
724k
        }
10880
389M
        cur++;
10881
389M
    }
10882
10883
1.41M
    index = cur - ctxt->input->cur;
10884
1.41M
    if (index > LONG_MAX) {
10885
0
        ctxt->checkIndex = 0;
10886
0
        ctxt->endCheckState = 0;
10887
0
        return(1);
10888
0
    }
10889
1.41M
    ctxt->checkIndex = index;
10890
1.41M
    ctxt->endCheckState = state;
10891
1.41M
    return(0);
10892
1.41M
}
10893
10894
/**
10895
 * xmlParseLookupInternalSubset:
10896
 * @ctxt:  an XML parser context
10897
 *
10898
 * Check whether there's enough data in the input buffer to finish parsing
10899
 * the internal subset.
10900
 */
10901
static int
10902
1.00M
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10903
    /*
10904
     * Sorry, but progressive parsing of the internal subset is not
10905
     * supported. We first check that the full content of the internal
10906
     * subset is available and parsing is launched only at that point.
10907
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10908
     * not in a ']]>' sequence which are conditional sections.
10909
     */
10910
1.00M
    const xmlChar *cur, *start;
10911
1.00M
    const xmlChar *end = ctxt->input->end;
10912
1.00M
    int state = ctxt->endCheckState;
10913
1.00M
    size_t index;
10914
10915
1.00M
    if (ctxt->checkIndex == 0) {
10916
28.5k
        cur = ctxt->input->cur + 1;
10917
973k
    } else {
10918
973k
        cur = ctxt->input->cur + ctxt->checkIndex;
10919
973k
    }
10920
1.00M
    start = cur;
10921
10922
411M
    while (cur < end) {
10923
410M
        if (state == '-') {
10924
746k
            if ((*cur == '-') &&
10925
746k
                (cur[1] == '-') &&
10926
746k
                (cur[2] == '>')) {
10927
23.9k
                state = 0;
10928
23.9k
                cur += 3;
10929
23.9k
                start = cur;
10930
23.9k
                continue;
10931
23.9k
            }
10932
746k
        }
10933
410M
        else if (state == ']') {
10934
36.7k
            if (*cur == '>') {
10935
21.4k
                ctxt->checkIndex = 0;
10936
21.4k
                ctxt->endCheckState = 0;
10937
21.4k
                return(1);
10938
21.4k
            }
10939
15.2k
            if (IS_BLANK_CH(*cur)) {
10940
5.07k
                state = ' ';
10941
10.2k
            } else if (*cur != ']') {
10942
3.32k
                state = 0;
10943
3.32k
                start = cur;
10944
3.32k
                continue;
10945
3.32k
            }
10946
15.2k
        }
10947
410M
        else if (state == ' ') {
10948
345k
            if (*cur == '>') {
10949
153
                ctxt->checkIndex = 0;
10950
153
                ctxt->endCheckState = 0;
10951
153
                return(1);
10952
153
            }
10953
345k
            if (!IS_BLANK_CH(*cur)) {
10954
4.91k
                state = 0;
10955
4.91k
                start = cur;
10956
4.91k
                continue;
10957
4.91k
            }
10958
345k
        }
10959
409M
        else if (state != 0) {
10960
383M
            if (*cur == state) {
10961
100k
                state = 0;
10962
100k
                start = cur + 1;
10963
100k
            }
10964
383M
        }
10965
26.3M
        else if (*cur == '<') {
10966
151k
            if ((cur[1] == '!') &&
10967
151k
                (cur[2] == '-') &&
10968
151k
                (cur[3] == '-')) {
10969
24.1k
                state = '-';
10970
24.1k
                cur += 4;
10971
                /* Don't treat <!--> as comment */
10972
24.1k
                start = cur;
10973
24.1k
                continue;
10974
24.1k
            }
10975
151k
        }
10976
26.1M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10977
132k
            state = *cur;
10978
132k
        }
10979
10980
410M
        cur++;
10981
410M
    }
10982
10983
    /*
10984
     * Rescan the three last characters to detect "<!--" and "-->"
10985
     * split across chunks.
10986
     */
10987
980k
    if ((state == 0) || (state == '-')) {
10988
63.3k
        if (cur - start < 3)
10989
2.39k
            cur = start;
10990
60.9k
        else
10991
60.9k
            cur -= 3;
10992
63.3k
    }
10993
980k
    index = cur - ctxt->input->cur;
10994
980k
    if (index > LONG_MAX) {
10995
0
        ctxt->checkIndex = 0;
10996
0
        ctxt->endCheckState = 0;
10997
0
        return(1);
10998
0
    }
10999
980k
    ctxt->checkIndex = index;
11000
980k
    ctxt->endCheckState = state;
11001
980k
    return(0);
11002
980k
}
11003
11004
/**
11005
 * xmlCheckCdataPush:
11006
 * @cur: pointer to the block of characters
11007
 * @len: length of the block in bytes
11008
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11009
 *
11010
 * Check that the block of characters is okay as SCdata content [20]
11011
 *
11012
 * Returns the number of bytes to pass if okay, a negative index where an
11013
 *         UTF-8 error occurred otherwise
11014
 */
11015
static int
11016
403k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11017
403k
    int ix;
11018
403k
    unsigned char c;
11019
403k
    int codepoint;
11020
11021
403k
    if ((utf == NULL) || (len <= 0))
11022
5.27k
        return(0);
11023
11024
19.7M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11025
19.5M
        c = utf[ix];
11026
19.5M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11027
2.99M
      if (c >= 0x20)
11028
2.54M
    ix++;
11029
443k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11030
377k
          ix++;
11031
65.3k
      else
11032
65.3k
          return(-ix);
11033
16.5M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11034
336k
      if (ix + 2 > len) return(complete ? -ix : ix);
11035
335k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11036
2.29k
          return(-ix);
11037
332k
      codepoint = (utf[ix] & 0x1f) << 6;
11038
332k
      codepoint |= utf[ix+1] & 0x3f;
11039
332k
      if (!xmlIsCharQ(codepoint))
11040
1.27k
          return(-ix);
11041
331k
      ix += 2;
11042
16.2M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11043
16.2M
      if (ix + 3 > len) return(complete ? -ix : ix);
11044
16.2M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11045
16.2M
          ((utf[ix+2] & 0xc0) != 0x80))
11046
15.5k
        return(-ix);
11047
16.1M
      codepoint = (utf[ix] & 0xf) << 12;
11048
16.1M
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11049
16.1M
      codepoint |= utf[ix+2] & 0x3f;
11050
16.1M
      if (!xmlIsCharQ(codepoint))
11051
72.0k
          return(-ix);
11052
16.1M
      ix += 3;
11053
16.1M
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11054
17.7k
      if (ix + 4 > len) return(complete ? -ix : ix);
11055
16.8k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11056
16.8k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11057
16.8k
    ((utf[ix+3] & 0xc0) != 0x80))
11058
1.66k
        return(-ix);
11059
15.1k
      codepoint = (utf[ix] & 0x7) << 18;
11060
15.1k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11061
15.1k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11062
15.1k
      codepoint |= utf[ix+3] & 0x3f;
11063
15.1k
      if (!xmlIsCharQ(codepoint))
11064
1.51k
          return(-ix);
11065
13.6k
      ix += 4;
11066
13.6k
  } else       /* unknown encoding */
11067
2.63k
      return(-ix);
11068
19.5M
      }
11069
213k
      return(ix);
11070
398k
}
11071
11072
/**
11073
 * xmlParseTryOrFinish:
11074
 * @ctxt:  an XML parser context
11075
 * @terminate:  last chunk indicator
11076
 *
11077
 * Try to progress on parsing
11078
 *
11079
 * Returns zero if no parsing was possible
11080
 */
11081
static int
11082
4.22M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11083
4.22M
    int ret = 0;
11084
4.22M
    size_t avail;
11085
4.22M
    xmlChar cur, next;
11086
11087
4.22M
    if (ctxt->input == NULL)
11088
0
        return(0);
11089
11090
4.22M
    if ((ctxt->input != NULL) &&
11091
4.22M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11092
11.6k
        xmlParserShrink(ctxt);
11093
11.6k
    }
11094
11095
10.0M
    while (ctxt->disableSAX == 0) {
11096
10.0M
        avail = ctxt->input->end - ctxt->input->cur;
11097
10.0M
        if (avail < 1)
11098
44.8k
      goto done;
11099
9.95M
        switch (ctxt->instate) {
11100
896k
            case XML_PARSER_EOF:
11101
          /*
11102
     * Document parsing is done !
11103
     */
11104
896k
          goto done;
11105
64.8k
            case XML_PARSER_START:
11106
                /*
11107
                 * Very first chars read from the document flow.
11108
                 */
11109
64.8k
                if ((!terminate) && (avail < 4))
11110
733
                    goto done;
11111
11112
                /*
11113
                 * We need more bytes to detect EBCDIC code pages.
11114
                 * See xmlDetectEBCDIC.
11115
                 */
11116
64.0k
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11117
64.0k
                    (!terminate) && (avail < 200))
11118
718
                    goto done;
11119
11120
63.3k
                xmlDetectEncoding(ctxt);
11121
63.3k
                ctxt->instate = XML_PARSER_XML_DECL;
11122
63.3k
    break;
11123
11124
297k
            case XML_PARSER_XML_DECL:
11125
297k
    if ((!terminate) && (avail < 2))
11126
31
        goto done;
11127
297k
    cur = ctxt->input->cur[0];
11128
297k
    next = ctxt->input->cur[1];
11129
297k
          if ((cur == '<') && (next == '?')) {
11130
        /* PI or XML decl */
11131
251k
        if ((!terminate) &&
11132
251k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11133
234k
      goto done;
11134
17.4k
        if ((ctxt->input->cur[2] == 'x') &&
11135
17.4k
      (ctxt->input->cur[3] == 'm') &&
11136
17.4k
      (ctxt->input->cur[4] == 'l') &&
11137
17.4k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11138
12.1k
      ret += 5;
11139
12.1k
      xmlParseXMLDecl(ctxt);
11140
12.1k
        } else {
11141
5.30k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11142
5.30k
                        if (ctxt->version == NULL) {
11143
11
                            xmlErrMemory(ctxt);
11144
11
                            break;
11145
11
                        }
11146
5.30k
        }
11147
45.7k
    } else {
11148
45.7k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11149
45.7k
        if (ctxt->version == NULL) {
11150
44
            xmlErrMemory(ctxt);
11151
44
      break;
11152
44
        }
11153
45.7k
    }
11154
63.1k
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11155
63.1k
                    ctxt->sax->setDocumentLocator(ctxt->userData,
11156
63.1k
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
11157
63.1k
                }
11158
63.1k
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11159
63.1k
                    (!ctxt->disableSAX))
11160
61.8k
                    ctxt->sax->startDocument(ctxt->userData);
11161
63.1k
                ctxt->instate = XML_PARSER_MISC;
11162
63.1k
    break;
11163
1.87M
            case XML_PARSER_START_TAG: {
11164
1.87M
          const xmlChar *name;
11165
1.87M
    const xmlChar *prefix = NULL;
11166
1.87M
    const xmlChar *URI = NULL;
11167
1.87M
                int line = ctxt->input->line;
11168
1.87M
    int nbNs = 0;
11169
11170
1.87M
    if ((!terminate) && (avail < 2))
11171
267
        goto done;
11172
1.87M
    cur = ctxt->input->cur[0];
11173
1.87M
          if (cur != '<') {
11174
6.29k
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11175
6.29k
                                   "Start tag expected, '<' not found");
11176
6.29k
                    ctxt->instate = XML_PARSER_EOF;
11177
6.29k
                    xmlFinishDocument(ctxt);
11178
6.29k
        goto done;
11179
6.29k
    }
11180
1.86M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11181
957k
                    goto done;
11182
908k
    if (ctxt->spaceNr == 0)
11183
0
        spacePush(ctxt, -1);
11184
908k
    else if (*ctxt->space == -2)
11185
150k
        spacePush(ctxt, -1);
11186
757k
    else
11187
757k
        spacePush(ctxt, *ctxt->space);
11188
908k
#ifdef LIBXML_SAX1_ENABLED
11189
908k
    if (ctxt->sax2)
11190
646k
#endif /* LIBXML_SAX1_ENABLED */
11191
646k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11192
261k
#ifdef LIBXML_SAX1_ENABLED
11193
261k
    else
11194
261k
        name = xmlParseStartTag(ctxt);
11195
908k
#endif /* LIBXML_SAX1_ENABLED */
11196
908k
    if (name == NULL) {
11197
10.6k
        spacePop(ctxt);
11198
10.6k
                    ctxt->instate = XML_PARSER_EOF;
11199
10.6k
                    xmlFinishDocument(ctxt);
11200
10.6k
        goto done;
11201
10.6k
    }
11202
897k
#ifdef LIBXML_VALID_ENABLED
11203
    /*
11204
     * [ VC: Root Element Type ]
11205
     * The Name in the document type declaration must match
11206
     * the element type of the root element.
11207
     */
11208
897k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11209
897k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11210
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11211
897k
#endif /* LIBXML_VALID_ENABLED */
11212
11213
    /*
11214
     * Check for an Empty Element.
11215
     */
11216
897k
    if ((RAW == '/') && (NXT(1) == '>')) {
11217
142k
        SKIP(2);
11218
11219
142k
        if (ctxt->sax2) {
11220
88.1k
      if ((ctxt->sax != NULL) &&
11221
88.1k
          (ctxt->sax->endElementNs != NULL) &&
11222
88.1k
          (!ctxt->disableSAX))
11223
87.9k
          ctxt->sax->endElementNs(ctxt->userData, name,
11224
87.9k
                                  prefix, URI);
11225
88.1k
      if (nbNs > 0)
11226
11.0k
          xmlParserNsPop(ctxt, nbNs);
11227
88.1k
#ifdef LIBXML_SAX1_ENABLED
11228
88.1k
        } else {
11229
54.6k
      if ((ctxt->sax != NULL) &&
11230
54.6k
          (ctxt->sax->endElement != NULL) &&
11231
54.6k
          (!ctxt->disableSAX))
11232
54.5k
          ctxt->sax->endElement(ctxt->userData, name);
11233
54.6k
#endif /* LIBXML_SAX1_ENABLED */
11234
54.6k
        }
11235
142k
        spacePop(ctxt);
11236
754k
    } else if (RAW == '>') {
11237
550k
        NEXT;
11238
550k
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11239
550k
    } else {
11240
204k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11241
204k
           "Couldn't find end of Start Tag %s\n",
11242
204k
           name);
11243
204k
        nodePop(ctxt);
11244
204k
        spacePop(ctxt);
11245
204k
                    if (nbNs > 0)
11246
24.0k
                        xmlParserNsPop(ctxt, nbNs);
11247
204k
    }
11248
11249
897k
                if (ctxt->nameNr == 0)
11250
7.53k
                    ctxt->instate = XML_PARSER_EPILOG;
11251
890k
                else
11252
890k
                    ctxt->instate = XML_PARSER_CONTENT;
11253
897k
                break;
11254
908k
      }
11255
4.41M
            case XML_PARSER_CONTENT: {
11256
4.41M
    cur = ctxt->input->cur[0];
11257
11258
4.41M
    if (cur == '<') {
11259
1.47M
                    if ((!terminate) && (avail < 2))
11260
5.48k
                        goto done;
11261
1.47M
        next = ctxt->input->cur[1];
11262
11263
1.47M
                    if (next == '/') {
11264
239k
                        ctxt->instate = XML_PARSER_END_TAG;
11265
239k
                        break;
11266
1.23M
                    } else if (next == '?') {
11267
41.7k
                        if ((!terminate) &&
11268
41.7k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11269
23.0k
                            goto done;
11270
18.6k
                        xmlParsePI(ctxt);
11271
18.6k
                        ctxt->instate = XML_PARSER_CONTENT;
11272
18.6k
                        break;
11273
1.19M
                    } else if (next == '!') {
11274
328k
                        if ((!terminate) && (avail < 3))
11275
1.51k
                            goto done;
11276
326k
                        next = ctxt->input->cur[2];
11277
11278
326k
                        if (next == '-') {
11279
201k
                            if ((!terminate) && (avail < 4))
11280
563
                                goto done;
11281
201k
                            if (ctxt->input->cur[3] == '-') {
11282
201k
                                if ((!terminate) &&
11283
201k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11284
106k
                                    goto done;
11285
94.4k
                                xmlParseComment(ctxt);
11286
94.4k
                                ctxt->instate = XML_PARSER_CONTENT;
11287
94.4k
                                break;
11288
201k
                            }
11289
201k
                        } else if (next == '[') {
11290
121k
                            if ((!terminate) && (avail < 9))
11291
4.86k
                                goto done;
11292
116k
                            if ((ctxt->input->cur[2] == '[') &&
11293
116k
                                (ctxt->input->cur[3] == 'C') &&
11294
116k
                                (ctxt->input->cur[4] == 'D') &&
11295
116k
                                (ctxt->input->cur[5] == 'A') &&
11296
116k
                                (ctxt->input->cur[6] == 'T') &&
11297
116k
                                (ctxt->input->cur[7] == 'A') &&
11298
116k
                                (ctxt->input->cur[8] == '[')) {
11299
115k
                                SKIP(9);
11300
115k
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11301
115k
                                break;
11302
115k
                            }
11303
116k
                        }
11304
326k
                    }
11305
2.94M
    } else if (cur == '&') {
11306
241k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11307
97.8k
      goto done;
11308
143k
        xmlParseReference(ctxt);
11309
143k
                    break;
11310
2.69M
    } else {
11311
        /* TODO Avoid the extra copy, handle directly !!! */
11312
        /*
11313
         * Goal of the following test is:
11314
         *  - minimize calls to the SAX 'character' callback
11315
         *    when they are mergeable
11316
         *  - handle an problem for isBlank when we only parse
11317
         *    a sequence of blank chars and the next one is
11318
         *    not available to check against '<' presence.
11319
         *  - tries to homogenize the differences in SAX
11320
         *    callbacks between the push and pull versions
11321
         *    of the parser.
11322
         */
11323
2.69M
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11324
486k
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11325
28.2k
          goto done;
11326
486k
                    }
11327
2.67M
                    ctxt->checkIndex = 0;
11328
2.67M
        xmlParseCharDataInternal(ctxt, !terminate);
11329
2.67M
                    break;
11330
2.69M
    }
11331
11332
868k
                ctxt->instate = XML_PARSER_START_TAG;
11333
868k
    break;
11334
4.41M
      }
11335
252k
            case XML_PARSER_END_TAG:
11336
252k
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11337
13.1k
        goto done;
11338
239k
    if (ctxt->sax2) {
11339
194k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11340
194k
        nameNsPop(ctxt);
11341
194k
    }
11342
45.5k
#ifdef LIBXML_SAX1_ENABLED
11343
45.5k
      else
11344
45.5k
        xmlParseEndTag1(ctxt, 0);
11345
239k
#endif /* LIBXML_SAX1_ENABLED */
11346
239k
    if (ctxt->nameNr == 0) {
11347
1.55k
        ctxt->instate = XML_PARSER_EPILOG;
11348
238k
    } else {
11349
238k
        ctxt->instate = XML_PARSER_CONTENT;
11350
238k
    }
11351
239k
    break;
11352
474k
            case XML_PARSER_CDATA_SECTION: {
11353
          /*
11354
     * The Push mode need to have the SAX callback for
11355
     * cdataBlock merge back contiguous callbacks.
11356
     */
11357
474k
    const xmlChar *term;
11358
11359
474k
                if (terminate) {
11360
                    /*
11361
                     * Don't call xmlParseLookupString. If 'terminate'
11362
                     * is set, checkIndex is invalid.
11363
                     */
11364
3.55k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11365
3.55k
                                           "]]>");
11366
471k
                } else {
11367
471k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11368
471k
                }
11369
11370
474k
    if (term == NULL) {
11371
335k
        int tmp, size;
11372
11373
335k
                    if (terminate) {
11374
                        /* Unfinished CDATA section */
11375
1.27k
                        size = ctxt->input->end - ctxt->input->cur;
11376
333k
                    } else {
11377
333k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11378
71.5k
                            goto done;
11379
262k
                        ctxt->checkIndex = 0;
11380
                        /* XXX: Why don't we pass the full buffer? */
11381
262k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11382
262k
                    }
11383
263k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11384
263k
                    if (tmp <= 0) {
11385
152k
                        tmp = -tmp;
11386
152k
                        ctxt->input->cur += tmp;
11387
152k
                        goto encoding_error;
11388
152k
                    }
11389
111k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11390
111k
                        if (ctxt->sax->cdataBlock != NULL)
11391
5.53k
                            ctxt->sax->cdataBlock(ctxt->userData,
11392
5.53k
                                                  ctxt->input->cur, tmp);
11393
105k
                        else if (ctxt->sax->characters != NULL)
11394
105k
                            ctxt->sax->characters(ctxt->userData,
11395
105k
                                                  ctxt->input->cur, tmp);
11396
111k
                    }
11397
111k
                    SKIPL(tmp);
11398
139k
    } else {
11399
139k
                    int base = term - CUR_PTR;
11400
139k
        int tmp;
11401
11402
139k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11403
139k
        if ((tmp < 0) || (tmp != base)) {
11404
25.4k
      tmp = -tmp;
11405
25.4k
      ctxt->input->cur += tmp;
11406
25.4k
      goto encoding_error;
11407
25.4k
        }
11408
114k
        if ((ctxt->sax != NULL) && (base == 0) &&
11409
114k
            (ctxt->sax->cdataBlock != NULL) &&
11410
114k
            (!ctxt->disableSAX)) {
11411
      /*
11412
       * Special case to provide identical behaviour
11413
       * between pull and push parsers on enpty CDATA
11414
       * sections
11415
       */
11416
2.81k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11417
2.81k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11418
2.81k
                     "<![CDATA[", 9)))
11419
2.81k
           ctxt->sax->cdataBlock(ctxt->userData,
11420
2.81k
                                 BAD_CAST "", 0);
11421
111k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11422
111k
      (!ctxt->disableSAX)) {
11423
109k
      if (ctxt->sax->cdataBlock != NULL)
11424
106k
          ctxt->sax->cdataBlock(ctxt->userData,
11425
106k
              ctxt->input->cur, base);
11426
2.37k
      else if (ctxt->sax->characters != NULL)
11427
2.37k
          ctxt->sax->characters(ctxt->userData,
11428
2.37k
              ctxt->input->cur, base);
11429
109k
        }
11430
114k
        SKIPL(base + 3);
11431
114k
        ctxt->instate = XML_PARSER_CONTENT;
11432
114k
    }
11433
225k
    break;
11434
474k
      }
11435
572k
            case XML_PARSER_MISC:
11436
605k
            case XML_PARSER_PROLOG:
11437
663k
            case XML_PARSER_EPILOG:
11438
663k
    SKIP_BLANKS;
11439
663k
                avail = ctxt->input->end - ctxt->input->cur;
11440
663k
    if (avail < 1)
11441
1.86k
        goto done;
11442
661k
    if (ctxt->input->cur[0] == '<') {
11443
653k
                    if ((!terminate) && (avail < 2))
11444
305
                        goto done;
11445
652k
                    next = ctxt->input->cur[1];
11446
652k
                    if (next == '?') {
11447
76.1k
                        if ((!terminate) &&
11448
76.1k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11449
60.7k
                            goto done;
11450
15.4k
                        xmlParsePI(ctxt);
11451
15.4k
                        break;
11452
576k
                    } else if (next == '!') {
11453
535k
                        if ((!terminate) && (avail < 3))
11454
206
                            goto done;
11455
11456
535k
                        if (ctxt->input->cur[2] == '-') {
11457
43.0k
                            if ((!terminate) && (avail < 4))
11458
265
                                goto done;
11459
42.7k
                            if (ctxt->input->cur[3] == '-') {
11460
42.7k
                                if ((!terminate) &&
11461
42.7k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11462
18.6k
                                    goto done;
11463
24.0k
                                xmlParseComment(ctxt);
11464
24.0k
                                break;
11465
42.7k
                            }
11466
491k
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11467
491k
                            if ((!terminate) && (avail < 9))
11468
65
                                goto done;
11469
491k
                            if ((ctxt->input->cur[2] == 'D') &&
11470
491k
                                (ctxt->input->cur[3] == 'O') &&
11471
491k
                                (ctxt->input->cur[4] == 'C') &&
11472
491k
                                (ctxt->input->cur[5] == 'T') &&
11473
491k
                                (ctxt->input->cur[6] == 'Y') &&
11474
491k
                                (ctxt->input->cur[7] == 'P') &&
11475
491k
                                (ctxt->input->cur[8] == 'E')) {
11476
491k
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11477
453k
                                    goto done;
11478
38.5k
                                ctxt->inSubset = 1;
11479
38.5k
                                xmlParseDocTypeDecl(ctxt);
11480
38.5k
                                if (RAW == '[') {
11481
32.0k
                                    ctxt->instate = XML_PARSER_DTD;
11482
32.0k
                                } else {
11483
                                    /*
11484
                                     * Create and update the external subset.
11485
                                     */
11486
6.53k
                                    ctxt->inSubset = 2;
11487
6.53k
                                    if ((ctxt->sax != NULL) &&
11488
6.53k
                                        (!ctxt->disableSAX) &&
11489
6.53k
                                        (ctxt->sax->externalSubset != NULL))
11490
6.27k
                                        ctxt->sax->externalSubset(
11491
6.27k
                                                ctxt->userData,
11492
6.27k
                                                ctxt->intSubName,
11493
6.27k
                                                ctxt->extSubSystem,
11494
6.27k
                                                ctxt->extSubURI);
11495
6.53k
                                    ctxt->inSubset = 0;
11496
6.53k
                                    xmlCleanSpecialAttr(ctxt);
11497
6.53k
                                    ctxt->instate = XML_PARSER_PROLOG;
11498
6.53k
                                }
11499
38.5k
                                break;
11500
491k
                            }
11501
491k
                        }
11502
535k
                    }
11503
652k
                }
11504
11505
50.3k
                if (ctxt->instate == XML_PARSER_EPILOG) {
11506
2.77k
                    if (ctxt->errNo == XML_ERR_OK)
11507
29
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11508
2.77k
        ctxt->instate = XML_PARSER_EOF;
11509
2.77k
                    xmlFinishDocument(ctxt);
11510
47.5k
                } else {
11511
47.5k
        ctxt->instate = XML_PARSER_START_TAG;
11512
47.5k
    }
11513
50.3k
    break;
11514
1.01M
            case XML_PARSER_DTD: {
11515
1.01M
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11516
980k
                    goto done;
11517
31.2k
    xmlParseInternalSubset(ctxt);
11518
31.2k
    ctxt->inSubset = 2;
11519
31.2k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11520
31.2k
        (ctxt->sax->externalSubset != NULL))
11521
24.8k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11522
24.8k
          ctxt->extSubSystem, ctxt->extSubURI);
11523
31.2k
    ctxt->inSubset = 0;
11524
31.2k
    xmlCleanSpecialAttr(ctxt);
11525
31.2k
    ctxt->instate = XML_PARSER_PROLOG;
11526
31.2k
                break;
11527
1.01M
      }
11528
0
            default:
11529
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11530
0
      "PP: internal error\n");
11531
0
    ctxt->instate = XML_PARSER_EOF;
11532
0
    break;
11533
9.95M
  }
11534
9.95M
    }
11535
4.04M
done:
11536
4.04M
    return(ret);
11537
177k
encoding_error:
11538
    /* Only report the first error */
11539
177k
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
11540
928
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
11541
928
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
11542
928
    }
11543
177k
    return(0);
11544
4.22M
}
11545
11546
/**
11547
 * xmlParseChunk:
11548
 * @ctxt:  an XML parser context
11549
 * @chunk:  chunk of memory
11550
 * @size:  size of chunk in bytes
11551
 * @terminate:  last chunk indicator
11552
 *
11553
 * Parse a chunk of memory in push parser mode.
11554
 *
11555
 * Assumes that the parser context was initialized with
11556
 * xmlCreatePushParserCtxt.
11557
 *
11558
 * The last chunk, which will often be empty, must be marked with
11559
 * the @terminate flag. With the default SAX callbacks, the resulting
11560
 * document will be available in ctxt->myDoc. This pointer will not
11561
 * be freed by the library.
11562
 *
11563
 * If the document isn't well-formed, ctxt->myDoc is set to NULL.
11564
 * The push parser doesn't support recovery mode.
11565
 *
11566
 * Returns an xmlParserErrors code (0 on success).
11567
 */
11568
int
11569
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11570
5.00M
              int terminate) {
11571
5.00M
    size_t curBase;
11572
5.00M
    size_t maxLength;
11573
5.00M
    size_t pos;
11574
5.00M
    int end_in_lf = 0;
11575
5.00M
    int res;
11576
11577
5.00M
    if ((ctxt == NULL) || (size < 0))
11578
0
        return(XML_ERR_ARGUMENT);
11579
5.00M
    if ((chunk == NULL) && (size > 0))
11580
0
        return(XML_ERR_ARGUMENT);
11581
5.00M
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11582
0
        return(XML_ERR_ARGUMENT);
11583
5.00M
    if (ctxt->disableSAX != 0)
11584
778k
        return(ctxt->errNo);
11585
11586
4.22M
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11587
4.22M
    if (ctxt->instate == XML_PARSER_START)
11588
65.3k
        xmlCtxtInitializeLate(ctxt);
11589
4.22M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11590
4.22M
        (chunk[size - 1] == '\r')) {
11591
14.7k
  end_in_lf = 1;
11592
14.7k
  size--;
11593
14.7k
    }
11594
11595
    /*
11596
     * Also push an empty chunk to make sure that the raw buffer
11597
     * will be flushed if there is an encoder.
11598
     */
11599
4.22M
    pos = ctxt->input->cur - ctxt->input->base;
11600
4.22M
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11601
4.22M
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11602
4.22M
    if (res < 0) {
11603
149
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11604
149
        xmlHaltParser(ctxt);
11605
149
        return(ctxt->errNo);
11606
149
    }
11607
11608
4.22M
    xmlParseTryOrFinish(ctxt, terminate);
11609
11610
4.22M
    curBase = ctxt->input->cur - ctxt->input->base;
11611
4.22M
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11612
1.28M
                XML_MAX_HUGE_LENGTH :
11613
4.22M
                XML_MAX_LOOKUP_LIMIT;
11614
4.22M
    if (curBase > maxLength) {
11615
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11616
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11617
0
        xmlHaltParser(ctxt);
11618
0
    }
11619
11620
4.22M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11621
11.3k
        return(ctxt->errNo);
11622
11623
4.21M
    if (end_in_lf == 1) {
11624
14.7k
  pos = ctxt->input->cur - ctxt->input->base;
11625
14.7k
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11626
14.7k
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11627
14.7k
        if (res < 0) {
11628
6
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11629
6
            xmlHaltParser(ctxt);
11630
6
            return(ctxt->errNo);
11631
6
        }
11632
14.7k
    }
11633
4.21M
    if (terminate) {
11634
  /*
11635
   * Check for termination
11636
   */
11637
55.4k
        if ((ctxt->instate != XML_PARSER_EOF) &&
11638
55.4k
            (ctxt->instate != XML_PARSER_EPILOG)) {
11639
18.6k
            if (ctxt->nameNr > 0) {
11640
11.3k
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11641
11.3k
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11642
11.3k
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11643
11.3k
                        "Premature end of data in tag %s line %d\n",
11644
11.3k
                        name, line, NULL);
11645
11.3k
            } else if (ctxt->instate == XML_PARSER_START) {
11646
160
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11647
7.13k
            } else {
11648
7.13k
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11649
7.13k
                               "Start tag expected, '<' not found\n");
11650
7.13k
            }
11651
36.7k
        } else if ((ctxt->input->buf->encoder != NULL) &&
11652
36.7k
                   (ctxt->input->buf->error == 0) &&
11653
36.7k
                   (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11654
571
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11655
571
                           "Truncated multi-byte sequence at EOF\n");
11656
571
        }
11657
55.4k
  if (ctxt->instate != XML_PARSER_EOF) {
11658
22.3k
            ctxt->instate = XML_PARSER_EOF;
11659
22.3k
            xmlFinishDocument(ctxt);
11660
22.3k
  }
11661
55.4k
    }
11662
4.21M
    if (ctxt->wellFormed == 0)
11663
2.12M
  return((xmlParserErrors) ctxt->errNo);
11664
2.09M
    else
11665
2.09M
        return(0);
11666
4.21M
}
11667
11668
/************************************************************************
11669
 *                  *
11670
 *    I/O front end functions to the parser     *
11671
 *                  *
11672
 ************************************************************************/
11673
11674
/**
11675
 * xmlCreatePushParserCtxt:
11676
 * @sax:  a SAX handler (optional)
11677
 * @user_data:  user data for SAX callbacks (optional)
11678
 * @chunk:  initial chunk (optional, deprecated)
11679
 * @size:  size of initial chunk in bytes
11680
 * @filename:  file name or URI (optional)
11681
 *
11682
 * Create a parser context for using the XML parser in push mode.
11683
 * See xmlParseChunk.
11684
 *
11685
 * Passing an initial chunk is useless and deprecated.
11686
 *
11687
 * @filename is used as base URI to fetch external entities and for
11688
 * error reports.
11689
 *
11690
 * Returns the new parser context or NULL if a memory allocation
11691
 * failed.
11692
 */
11693
11694
xmlParserCtxtPtr
11695
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11696
64.1k
                        const char *chunk, int size, const char *filename) {
11697
64.1k
    xmlParserCtxtPtr ctxt;
11698
64.1k
    xmlParserInputPtr input;
11699
11700
64.1k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11701
64.1k
    if (ctxt == NULL)
11702
50
  return(NULL);
11703
11704
64.0k
    ctxt->options &= ~XML_PARSE_NODICT;
11705
64.0k
    ctxt->dictNames = 1;
11706
11707
64.0k
    input = xmlNewPushInput(filename, chunk, size);
11708
64.0k
    if (input == NULL) {
11709
39
  xmlFreeParserCtxt(ctxt);
11710
39
  return(NULL);
11711
39
    }
11712
64.0k
    if (inputPush(ctxt, input) < 0) {
11713
14
        xmlFreeInputStream(input);
11714
14
        xmlFreeParserCtxt(ctxt);
11715
14
        return(NULL);
11716
14
    }
11717
11718
63.9k
    return(ctxt);
11719
64.0k
}
11720
#endif /* LIBXML_PUSH_ENABLED */
11721
11722
/**
11723
 * xmlStopParser:
11724
 * @ctxt:  an XML parser context
11725
 *
11726
 * Blocks further parser processing
11727
 */
11728
void
11729
27.8k
xmlStopParser(xmlParserCtxtPtr ctxt) {
11730
27.8k
    if (ctxt == NULL)
11731
0
        return;
11732
27.8k
    xmlHaltParser(ctxt);
11733
27.8k
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11734
19.0k
        ctxt->errNo = XML_ERR_USER_STOP;
11735
27.8k
}
11736
11737
/**
11738
 * xmlCreateIOParserCtxt:
11739
 * @sax:  a SAX handler (optional)
11740
 * @user_data:  user data for SAX callbacks (optional)
11741
 * @ioread:  an I/O read function
11742
 * @ioclose:  an I/O close function (optional)
11743
 * @ioctx:  an I/O handler
11744
 * @enc:  the charset encoding if known (deprecated)
11745
 *
11746
 * Create a parser context for using the XML parser with an existing
11747
 * I/O stream
11748
 *
11749
 * Returns the new parser context or NULL
11750
 */
11751
xmlParserCtxtPtr
11752
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11753
                      xmlInputReadCallback ioread,
11754
                      xmlInputCloseCallback ioclose,
11755
0
                      void *ioctx, xmlCharEncoding enc) {
11756
0
    xmlParserCtxtPtr ctxt;
11757
0
    xmlParserInputPtr input;
11758
0
    const char *encoding;
11759
11760
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11761
0
    if (ctxt == NULL)
11762
0
  return(NULL);
11763
11764
0
    encoding = xmlGetCharEncodingName(enc);
11765
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11766
0
                                  encoding, 0);
11767
0
    if (input == NULL) {
11768
0
  xmlFreeParserCtxt(ctxt);
11769
0
        return (NULL);
11770
0
    }
11771
0
    if (inputPush(ctxt, input) < 0) {
11772
0
        xmlFreeInputStream(input);
11773
0
        xmlFreeParserCtxt(ctxt);
11774
0
        return(NULL);
11775
0
    }
11776
11777
0
    return(ctxt);
11778
0
}
11779
11780
#ifdef LIBXML_VALID_ENABLED
11781
/************************************************************************
11782
 *                  *
11783
 *    Front ends when parsing a DTD       *
11784
 *                  *
11785
 ************************************************************************/
11786
11787
/**
11788
 * xmlIOParseDTD:
11789
 * @sax:  the SAX handler block or NULL
11790
 * @input:  an Input Buffer
11791
 * @enc:  the charset encoding if known
11792
 *
11793
 * Load and parse a DTD
11794
 *
11795
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11796
 * @input will be freed by the function in any case.
11797
 */
11798
11799
xmlDtdPtr
11800
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11801
0
        xmlCharEncoding enc) {
11802
0
    xmlDtdPtr ret = NULL;
11803
0
    xmlParserCtxtPtr ctxt;
11804
0
    xmlParserInputPtr pinput = NULL;
11805
11806
0
    if (input == NULL)
11807
0
  return(NULL);
11808
11809
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11810
0
    if (ctxt == NULL) {
11811
0
        xmlFreeParserInputBuffer(input);
11812
0
  return(NULL);
11813
0
    }
11814
11815
    /*
11816
     * generate a parser input from the I/O handler
11817
     */
11818
11819
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11820
0
    if (pinput == NULL) {
11821
0
        xmlFreeParserInputBuffer(input);
11822
0
  xmlFreeParserCtxt(ctxt);
11823
0
  return(NULL);
11824
0
    }
11825
11826
    /*
11827
     * plug some encoding conversion routines here.
11828
     */
11829
0
    if (xmlPushInput(ctxt, pinput) < 0) {
11830
0
        xmlFreeInputStream(pinput);
11831
0
  xmlFreeParserCtxt(ctxt);
11832
0
  return(NULL);
11833
0
    }
11834
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11835
0
        xmlSwitchEncoding(ctxt, enc);
11836
0
    }
11837
11838
    /*
11839
     * let's parse that entity knowing it's an external subset.
11840
     */
11841
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11842
0
    if (ctxt->myDoc == NULL) {
11843
0
  xmlErrMemory(ctxt);
11844
0
  return(NULL);
11845
0
    }
11846
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11847
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11848
0
                                 BAD_CAST "none", BAD_CAST "none");
11849
11850
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11851
11852
0
    if (ctxt->myDoc != NULL) {
11853
0
  if (ctxt->wellFormed) {
11854
0
      ret = ctxt->myDoc->extSubset;
11855
0
      ctxt->myDoc->extSubset = NULL;
11856
0
      if (ret != NULL) {
11857
0
    xmlNodePtr tmp;
11858
11859
0
    ret->doc = NULL;
11860
0
    tmp = ret->children;
11861
0
    while (tmp != NULL) {
11862
0
        tmp->doc = NULL;
11863
0
        tmp = tmp->next;
11864
0
    }
11865
0
      }
11866
0
  } else {
11867
0
      ret = NULL;
11868
0
  }
11869
0
        xmlFreeDoc(ctxt->myDoc);
11870
0
        ctxt->myDoc = NULL;
11871
0
    }
11872
0
    xmlFreeParserCtxt(ctxt);
11873
11874
0
    return(ret);
11875
0
}
11876
11877
/**
11878
 * xmlSAXParseDTD:
11879
 * @sax:  the SAX handler block
11880
 * @ExternalID:  a NAME* containing the External ID of the DTD
11881
 * @SystemID:  a NAME* containing the URL to the DTD
11882
 *
11883
 * DEPRECATED: Don't use.
11884
 *
11885
 * Load and parse an external subset.
11886
 *
11887
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11888
 */
11889
11890
xmlDtdPtr
11891
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11892
16.1k
                          const xmlChar *SystemID) {
11893
16.1k
    xmlDtdPtr ret = NULL;
11894
16.1k
    xmlParserCtxtPtr ctxt;
11895
16.1k
    xmlParserInputPtr input = NULL;
11896
16.1k
    xmlChar* systemIdCanonic;
11897
11898
16.1k
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11899
11900
16.1k
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11901
16.1k
    if (ctxt == NULL) {
11902
92
  return(NULL);
11903
92
    }
11904
11905
    /*
11906
     * Canonicalise the system ID
11907
     */
11908
16.0k
    systemIdCanonic = xmlCanonicPath(SystemID);
11909
16.0k
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11910
11
  xmlFreeParserCtxt(ctxt);
11911
11
  return(NULL);
11912
11
    }
11913
11914
    /*
11915
     * Ask the Entity resolver to load the damn thing
11916
     */
11917
11918
16.0k
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11919
16.0k
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11920
16.0k
                                   systemIdCanonic);
11921
16.0k
    if (input == NULL) {
11922
14.4k
  xmlFreeParserCtxt(ctxt);
11923
14.4k
  if (systemIdCanonic != NULL)
11924
13.9k
      xmlFree(systemIdCanonic);
11925
14.4k
  return(NULL);
11926
14.4k
    }
11927
11928
    /*
11929
     * plug some encoding conversion routines here.
11930
     */
11931
1.62k
    if (xmlPushInput(ctxt, input) < 0) {
11932
2
        xmlFreeInputStream(input);
11933
2
  xmlFreeParserCtxt(ctxt);
11934
2
  if (systemIdCanonic != NULL)
11935
2
      xmlFree(systemIdCanonic);
11936
2
  return(NULL);
11937
2
    }
11938
11939
1.62k
    xmlDetectEncoding(ctxt);
11940
11941
1.62k
    if (input->filename == NULL)
11942
772
  input->filename = (char *) systemIdCanonic;
11943
850
    else
11944
850
  xmlFree(systemIdCanonic);
11945
11946
    /*
11947
     * let's parse that entity knowing it's an external subset.
11948
     */
11949
1.62k
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11950
1.62k
    if (ctxt->myDoc == NULL) {
11951
12
  xmlErrMemory(ctxt);
11952
12
  xmlFreeParserCtxt(ctxt);
11953
12
  return(NULL);
11954
12
    }
11955
1.61k
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11956
1.61k
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11957
1.61k
                                 ExternalID, SystemID);
11958
1.61k
    if (ctxt->myDoc->extSubset == NULL) {
11959
19
        xmlFreeDoc(ctxt->myDoc);
11960
19
        xmlFreeParserCtxt(ctxt);
11961
19
        return(NULL);
11962
19
    }
11963
1.59k
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11964
11965
1.59k
    if (ctxt->myDoc != NULL) {
11966
1.59k
  if (ctxt->wellFormed) {
11967
92
      ret = ctxt->myDoc->extSubset;
11968
92
      ctxt->myDoc->extSubset = NULL;
11969
92
      if (ret != NULL) {
11970
92
    xmlNodePtr tmp;
11971
11972
92
    ret->doc = NULL;
11973
92
    tmp = ret->children;
11974
4.63k
    while (tmp != NULL) {
11975
4.54k
        tmp->doc = NULL;
11976
4.54k
        tmp = tmp->next;
11977
4.54k
    }
11978
92
      }
11979
1.49k
  } else {
11980
1.49k
      ret = NULL;
11981
1.49k
  }
11982
1.59k
        xmlFreeDoc(ctxt->myDoc);
11983
1.59k
        ctxt->myDoc = NULL;
11984
1.59k
    }
11985
1.59k
    xmlFreeParserCtxt(ctxt);
11986
11987
1.59k
    return(ret);
11988
1.61k
}
11989
11990
11991
/**
11992
 * xmlParseDTD:
11993
 * @ExternalID:  a NAME* containing the External ID of the DTD
11994
 * @SystemID:  a NAME* containing the URL to the DTD
11995
 *
11996
 * Load and parse an external subset.
11997
 *
11998
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11999
 */
12000
12001
xmlDtdPtr
12002
16.1k
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12003
16.1k
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12004
16.1k
}
12005
#endif /* LIBXML_VALID_ENABLED */
12006
12007
/************************************************************************
12008
 *                  *
12009
 *    Front ends when parsing an Entity     *
12010
 *                  *
12011
 ************************************************************************/
12012
12013
static xmlNodePtr
12014
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
12015
60.0k
                            int hasTextDecl, int buildTree) {
12016
60.0k
    xmlNodePtr root = NULL;
12017
60.0k
    xmlNodePtr list = NULL;
12018
60.0k
    xmlChar *rootName = BAD_CAST "#root";
12019
60.0k
    int result;
12020
12021
60.0k
    if (buildTree) {
12022
60.0k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
12023
60.0k
        if (root == NULL) {
12024
55
            xmlErrMemory(ctxt);
12025
55
            goto error;
12026
55
        }
12027
60.0k
    }
12028
12029
59.9k
    if (xmlPushInput(ctxt, input) < 0)
12030
46
        goto error;
12031
12032
59.9k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
12033
59.9k
    spacePush(ctxt, -1);
12034
12035
59.9k
    if (buildTree)
12036
59.9k
        nodePush(ctxt, root);
12037
12038
59.9k
    if (hasTextDecl) {
12039
14.2k
        xmlDetectEncoding(ctxt);
12040
12041
        /*
12042
         * Parse a possible text declaration first
12043
         */
12044
14.2k
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
12045
14.2k
            (IS_BLANK_CH(NXT(5)))) {
12046
1.99k
            xmlParseTextDecl(ctxt);
12047
            /*
12048
             * An XML-1.0 document can't reference an entity not XML-1.0
12049
             */
12050
1.99k
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
12051
1.99k
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12052
46
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12053
46
                               "Version mismatch between document and "
12054
46
                               "entity\n");
12055
46
            }
12056
1.99k
        }
12057
14.2k
    }
12058
12059
59.9k
    xmlParseContentInternal(ctxt);
12060
12061
59.9k
    if (ctxt->input->cur < ctxt->input->end)
12062
3.97k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12063
12064
59.9k
    if ((ctxt->wellFormed) ||
12065
59.9k
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12066
53.2k
        if (root != NULL) {
12067
53.2k
            xmlNodePtr cur;
12068
12069
            /*
12070
             * Unlink newly created node list.
12071
             */
12072
53.2k
            list = root->children;
12073
53.2k
            root->children = NULL;
12074
53.2k
            root->last = NULL;
12075
107k
            for (cur = list; cur != NULL; cur = cur->next)
12076
54.6k
                cur->parent = NULL;
12077
53.2k
        }
12078
53.2k
    }
12079
12080
    /*
12081
     * Read the rest of the stream in case of errors. We want
12082
     * to account for the whole entity size.
12083
     */
12084
188k
    do {
12085
188k
        ctxt->input->cur = ctxt->input->end;
12086
188k
        xmlParserShrink(ctxt);
12087
188k
        result = xmlParserGrow(ctxt);
12088
188k
    } while (result > 0);
12089
12090
59.9k
    if (buildTree)
12091
59.9k
        nodePop(ctxt);
12092
12093
59.9k
    namePop(ctxt);
12094
59.9k
    spacePop(ctxt);
12095
12096
    /* xmlPopInput would free the stream */
12097
59.9k
    inputPop(ctxt);
12098
12099
60.0k
error:
12100
60.0k
    xmlFreeNode(root);
12101
12102
60.0k
    return(list);
12103
59.9k
}
12104
12105
static void
12106
70.1k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
12107
70.1k
    xmlParserInputPtr input;
12108
70.1k
    xmlNodePtr list;
12109
70.1k
    unsigned long consumed;
12110
70.1k
    int isExternal;
12111
70.1k
    int buildTree;
12112
70.1k
    int oldMinNsIndex;
12113
70.1k
    int oldNodelen, oldNodemem;
12114
12115
70.1k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
12116
70.1k
    buildTree = (ctxt->node != NULL);
12117
12118
    /*
12119
     * Recursion check
12120
     */
12121
70.1k
    if (ent->flags & XML_ENT_EXPANDING) {
12122
750
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
12123
750
        xmlHaltParser(ctxt);
12124
750
        goto error;
12125
750
    }
12126
12127
    /*
12128
     * Load entity
12129
     */
12130
69.3k
    input = xmlNewEntityInputStream(ctxt, ent);
12131
69.3k
    if (input == NULL)
12132
9.33k
        goto error;
12133
12134
    /*
12135
     * When building a tree, we need to limit the scope of namespace
12136
     * declarations, so that entities don't reference xmlNs structs
12137
     * from the parent of a reference.
12138
     */
12139
60.0k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
12140
60.0k
    if (buildTree)
12141
60.0k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
12142
12143
60.0k
    oldNodelen = ctxt->nodelen;
12144
60.0k
    oldNodemem = ctxt->nodemem;
12145
60.0k
    ctxt->nodelen = 0;
12146
60.0k
    ctxt->nodemem = 0;
12147
12148
    /*
12149
     * Parse content
12150
     *
12151
     * This initiates a recursive call chain:
12152
     *
12153
     * - xmlCtxtParseContentInternal
12154
     * - xmlParseContentInternal
12155
     * - xmlParseReference
12156
     * - xmlCtxtParseEntity
12157
     *
12158
     * The nesting depth is limited by the maximum number of inputs,
12159
     * see xmlPushInput.
12160
     *
12161
     * It's possible to make this non-recursive (minNsIndex must be
12162
     * stored in the input struct) at the expense of code readability.
12163
     */
12164
12165
60.0k
    ent->flags |= XML_ENT_EXPANDING;
12166
12167
60.0k
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
12168
12169
60.0k
    ent->flags &= ~XML_ENT_EXPANDING;
12170
12171
60.0k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
12172
60.0k
    ctxt->nodelen = oldNodelen;
12173
60.0k
    ctxt->nodemem = oldNodemem;
12174
12175
    /*
12176
     * Entity size accounting
12177
     */
12178
60.0k
    consumed = input->consumed;
12179
60.0k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12180
12181
60.0k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
12182
21.2k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
12183
12184
60.0k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
12185
21.3k
        if (isExternal)
12186
12.9k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12187
12188
21.3k
        ent->children = list;
12189
12190
75.8k
        while (list != NULL) {
12191
54.5k
            list->parent = (xmlNodePtr) ent;
12192
54.5k
            if (list->next == NULL)
12193
14.4k
                ent->last = list;
12194
54.5k
            list = list->next;
12195
54.5k
        }
12196
38.7k
    } else {
12197
38.7k
        xmlFreeNodeList(list);
12198
38.7k
    }
12199
12200
60.0k
    xmlFreeInputStream(input);
12201
12202
70.1k
error:
12203
70.1k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12204
70.1k
}
12205
12206
/**
12207
 * xmlParseCtxtExternalEntity:
12208
 * @ctxt:  the existing parsing context
12209
 * @URL:  the URL for the entity to load
12210
 * @ID:  the System ID for the entity to load
12211
 * @listOut:  the return value for the set of parsed nodes
12212
 *
12213
 * Parse an external general entity within an existing parsing context
12214
 * An external general parsed entity is well-formed if it matches the
12215
 * production labeled extParsedEnt.
12216
 *
12217
 * [78] extParsedEnt ::= TextDecl? content
12218
 *
12219
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12220
 *    the parser error code otherwise
12221
 */
12222
12223
int
12224
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12225
0
                           const xmlChar *ID, xmlNodePtr *listOut) {
12226
0
    xmlParserInputPtr input;
12227
0
    xmlNodePtr list;
12228
12229
0
    if (listOut != NULL)
12230
0
        *listOut = NULL;
12231
12232
0
    if (ctxt == NULL)
12233
0
        return(XML_ERR_ARGUMENT);
12234
12235
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12236
0
                            XML_RESOURCE_GENERAL_ENTITY);
12237
0
    if (input == NULL)
12238
0
        return(ctxt->errNo);
12239
12240
0
    xmlCtxtInitializeLate(ctxt);
12241
12242
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
12243
0
    if (listOut != NULL)
12244
0
        *listOut = list;
12245
0
    else
12246
0
        xmlFreeNodeList(list);
12247
12248
0
    xmlFreeInputStream(input);
12249
0
    return(ctxt->errNo);
12250
0
}
12251
12252
#ifdef LIBXML_SAX1_ENABLED
12253
/**
12254
 * xmlParseExternalEntity:
12255
 * @doc:  the document the chunk pertains to
12256
 * @sax:  the SAX handler block (possibly NULL)
12257
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12258
 * @depth:  Used for loop detection, use 0
12259
 * @URL:  the URL for the entity to load
12260
 * @ID:  the System ID for the entity to load
12261
 * @list:  the return value for the set of parsed nodes
12262
 *
12263
 * DEPRECATED: Use xmlParseCtxtExternalEntity.
12264
 *
12265
 * Parse an external general entity
12266
 * An external general parsed entity is well-formed if it matches the
12267
 * production labeled extParsedEnt.
12268
 *
12269
 * [78] extParsedEnt ::= TextDecl? content
12270
 *
12271
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12272
 *    the parser error code otherwise
12273
 */
12274
12275
int
12276
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12277
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12278
0
    xmlParserCtxtPtr ctxt;
12279
0
    int ret;
12280
12281
0
    if (list != NULL)
12282
0
        *list = NULL;
12283
12284
0
    if (doc == NULL)
12285
0
        return(XML_ERR_ARGUMENT);
12286
12287
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12288
0
    if (ctxt == NULL)
12289
0
        return(XML_ERR_NO_MEMORY);
12290
12291
0
    ctxt->depth = depth;
12292
0
    ctxt->myDoc = doc;
12293
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12294
12295
0
    xmlFreeParserCtxt(ctxt);
12296
0
    return(ret);
12297
0
}
12298
12299
/**
12300
 * xmlParseBalancedChunkMemory:
12301
 * @doc:  the document the chunk pertains to (must not be NULL)
12302
 * @sax:  the SAX handler block (possibly NULL)
12303
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12304
 * @depth:  Used for loop detection, use 0
12305
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12306
 * @lst:  the return value for the set of parsed nodes
12307
 *
12308
 * Parse a well-balanced chunk of an XML document
12309
 * called by the parser
12310
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12311
 * the content production in the XML grammar:
12312
 *
12313
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12314
 *
12315
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12316
 *    the parser error code otherwise
12317
 */
12318
12319
int
12320
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12321
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12322
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12323
0
                                                depth, string, lst, 0 );
12324
0
}
12325
#endif /* LIBXML_SAX1_ENABLED */
12326
12327
/**
12328
 * xmlCtxtParseContent:
12329
 * @ctxt:  parser context
12330
 * @input:  parser input
12331
 * @node:  target node or document
12332
 * @hasTextDecl:  whether to parse text declaration
12333
 *
12334
 * Parse a well-balanced chunk of XML matching the 'content' production.
12335
 *
12336
 * Namespaces in scope of @node and entities of @node's document are
12337
 * recognized. When validating, the DTD of @node's document is used.
12338
 *
12339
 * Always consumes @input even in error case.
12340
 *
12341
 * Available since 2.14.0.
12342
 *
12343
 * Returns a node list or NULL in case of error.
12344
 */
12345
xmlNodePtr
12346
xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
12347
0
                    xmlNodePtr node, int hasTextDecl) {
12348
0
    xmlDocPtr doc;
12349
0
    xmlNodePtr cur, list = NULL;
12350
0
    int nsnr = 0;
12351
0
    xmlDictPtr oldDict;
12352
0
    int oldOptions, oldDictNames, oldLoadSubset;
12353
12354
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12355
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12356
0
        goto exit;
12357
0
    }
12358
12359
0
    doc = node->doc;
12360
0
    if (doc == NULL) {
12361
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12362
0
        goto exit;
12363
0
    }
12364
12365
0
    switch (node->type) {
12366
0
        case XML_ELEMENT_NODE:
12367
0
        case XML_DOCUMENT_NODE:
12368
0
        case XML_HTML_DOCUMENT_NODE:
12369
0
            break;
12370
12371
0
        case XML_ATTRIBUTE_NODE:
12372
0
        case XML_TEXT_NODE:
12373
0
        case XML_CDATA_SECTION_NODE:
12374
0
        case XML_ENTITY_REF_NODE:
12375
0
        case XML_PI_NODE:
12376
0
        case XML_COMMENT_NODE:
12377
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12378
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12379
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12380
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12381
0
                    node = cur;
12382
0
                    break;
12383
0
                }
12384
0
            }
12385
0
            break;
12386
12387
0
        default:
12388
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12389
0
            goto exit;
12390
0
    }
12391
12392
0
#ifdef LIBXML_HTML_ENABLED
12393
0
    if (ctxt->html)
12394
0
        htmlCtxtReset(ctxt);
12395
0
    else
12396
0
#endif
12397
0
        xmlCtxtReset(ctxt);
12398
12399
0
    oldDict = ctxt->dict;
12400
0
    oldOptions = ctxt->options;
12401
0
    oldDictNames = ctxt->dictNames;
12402
0
    oldLoadSubset = ctxt->loadsubset;
12403
12404
    /*
12405
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12406
     */
12407
0
    if (doc->dict != NULL) {
12408
0
        ctxt->dict = doc->dict;
12409
0
    } else {
12410
0
        ctxt->options |= XML_PARSE_NODICT;
12411
0
        ctxt->dictNames = 0;
12412
0
    }
12413
12414
    /*
12415
     * Disable IDs
12416
     */
12417
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12418
12419
0
    ctxt->myDoc = doc;
12420
12421
0
#ifdef LIBXML_HTML_ENABLED
12422
0
    if (ctxt->html) {
12423
        /*
12424
         * When parsing in context, it makes no sense to add implied
12425
         * elements like html/body/etc...
12426
         */
12427
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12428
12429
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12430
0
    } else
12431
0
#endif
12432
0
    {
12433
0
        xmlCtxtInitializeLate(ctxt);
12434
12435
        /*
12436
         * This hack lowers the error level of undeclared entities
12437
         * from XML_ERR_FATAL (well-formedness error) to XML_ERR_ERROR
12438
         * or XML_ERR_WARNING.
12439
         */
12440
0
        ctxt->hasExternalSubset = 1;
12441
12442
        /*
12443
         * initialize the SAX2 namespaces stack
12444
         */
12445
0
        cur = node;
12446
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12447
0
            xmlNsPtr ns = cur->nsDef;
12448
0
            xmlHashedString hprefix, huri;
12449
12450
0
            while (ns != NULL) {
12451
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12452
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12453
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12454
0
                    nsnr++;
12455
0
                ns = ns->next;
12456
0
            }
12457
0
            cur = cur->parent;
12458
0
        }
12459
12460
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12461
12462
0
        if (nsnr > 0)
12463
0
            xmlParserNsPop(ctxt, nsnr);
12464
0
    }
12465
12466
0
    ctxt->dict = oldDict;
12467
0
    ctxt->options = oldOptions;
12468
0
    ctxt->dictNames = oldDictNames;
12469
0
    ctxt->loadsubset = oldLoadSubset;
12470
0
    ctxt->myDoc = NULL;
12471
0
    ctxt->node = NULL;
12472
12473
0
exit:
12474
0
    xmlFreeInputStream(input);
12475
0
    return(list);
12476
0
}
12477
12478
/**
12479
 * xmlParseInNodeContext:
12480
 * @node:  the context node
12481
 * @data:  the input string
12482
 * @datalen:  the input string length in bytes
12483
 * @options:  a combination of xmlParserOption
12484
 * @listOut:  the return value for the set of parsed nodes
12485
 *
12486
 * Parse a well-balanced chunk of an XML document
12487
 * within the context (DTD, namespaces, etc ...) of the given node.
12488
 *
12489
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12490
 * the content production in the XML grammar:
12491
 *
12492
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12493
 *
12494
 * This function assumes the encoding of @node's document which is
12495
 * typically not what you want. A better alternative is
12496
 * xmlCtxtParseContent.
12497
 *
12498
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12499
 * error code otherwise
12500
 */
12501
xmlParserErrors
12502
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12503
0
                      int options, xmlNodePtr *listOut) {
12504
0
    xmlParserCtxtPtr ctxt;
12505
0
    xmlParserInputPtr input;
12506
0
    xmlDocPtr doc;
12507
0
    xmlNodePtr list;
12508
0
    xmlParserErrors ret;
12509
12510
0
    if (listOut == NULL)
12511
0
        return(XML_ERR_INTERNAL_ERROR);
12512
0
    *listOut = NULL;
12513
12514
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12515
0
        return(XML_ERR_INTERNAL_ERROR);
12516
12517
0
    doc = node->doc;
12518
0
    if (doc == NULL)
12519
0
        return(XML_ERR_INTERNAL_ERROR);
12520
12521
0
#ifdef LIBXML_HTML_ENABLED
12522
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12523
0
        ctxt = htmlNewParserCtxt();
12524
0
    }
12525
0
    else
12526
0
#endif
12527
0
        ctxt = xmlNewParserCtxt();
12528
12529
0
    if (ctxt == NULL)
12530
0
        return(XML_ERR_NO_MEMORY);
12531
12532
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12533
0
                                      (const char *) doc->encoding,
12534
0
                                      XML_INPUT_BUF_STATIC);
12535
0
    if (input == NULL) {
12536
0
        xmlFreeParserCtxt(ctxt);
12537
0
        return(XML_ERR_NO_MEMORY);
12538
0
    }
12539
12540
0
    xmlCtxtUseOptions(ctxt, options);
12541
12542
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12543
12544
0
    if (list == NULL) {
12545
0
        ret = ctxt->errNo;
12546
0
        if (ret == XML_ERR_ARGUMENT)
12547
0
            ret = XML_ERR_INTERNAL_ERROR;
12548
0
    } else {
12549
0
        ret = XML_ERR_OK;
12550
0
        *listOut = list;
12551
0
    }
12552
12553
0
    xmlFreeParserCtxt(ctxt);
12554
12555
0
    return(ret);
12556
0
}
12557
12558
#ifdef LIBXML_SAX1_ENABLED
12559
/**
12560
 * xmlParseBalancedChunkMemoryRecover:
12561
 * @doc:  the document the chunk pertains to (must not be NULL)
12562
 * @sax:  the SAX handler block (possibly NULL)
12563
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12564
 * @depth:  Used for loop detection, use 0
12565
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12566
 * @listOut:  the return value for the set of parsed nodes
12567
 * @recover: return nodes even if the data is broken (use 0)
12568
 *
12569
 * Parse a well-balanced chunk of an XML document
12570
 *
12571
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12572
 * the content production in the XML grammar:
12573
 *
12574
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12575
 *
12576
 * Returns 0 if the chunk is well balanced, or thehe parser error code
12577
 * otherwise.
12578
 *
12579
 * In case recover is set to 1, the nodelist will not be empty even if
12580
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12581
 * some extent.
12582
 */
12583
int
12584
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12585
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12586
0
     int recover) {
12587
0
    xmlParserCtxtPtr ctxt;
12588
0
    xmlParserInputPtr input;
12589
0
    xmlNodePtr list;
12590
0
    int ret;
12591
12592
0
    if (listOut != NULL)
12593
0
        *listOut = NULL;
12594
12595
0
    if (string == NULL)
12596
0
        return(XML_ERR_ARGUMENT);
12597
12598
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12599
0
    if (ctxt == NULL)
12600
0
        return(XML_ERR_NO_MEMORY);
12601
12602
0
    xmlCtxtInitializeLate(ctxt);
12603
12604
0
    ctxt->depth = depth;
12605
0
    ctxt->myDoc = doc;
12606
0
    if (recover) {
12607
0
        ctxt->options |= XML_PARSE_RECOVER;
12608
0
        ctxt->recovery = 1;
12609
0
    }
12610
12611
0
    input = xmlNewStringInputStream(ctxt, string);
12612
0
    if (input == NULL) {
12613
0
        ret = ctxt->errNo;
12614
0
        goto error;
12615
0
    }
12616
12617
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12618
0
    if (listOut != NULL)
12619
0
        *listOut = list;
12620
0
    else
12621
0
        xmlFreeNodeList(list);
12622
12623
0
    if (!ctxt->wellFormed)
12624
0
        ret = ctxt->errNo;
12625
0
    else
12626
0
        ret = XML_ERR_OK;
12627
12628
0
error:
12629
0
    xmlFreeInputStream(input);
12630
0
    xmlFreeParserCtxt(ctxt);
12631
0
    return(ret);
12632
0
}
12633
12634
/**
12635
 * xmlSAXParseEntity:
12636
 * @sax:  the SAX handler block
12637
 * @filename:  the filename
12638
 *
12639
 * DEPRECATED: Don't use.
12640
 *
12641
 * parse an XML external entity out of context and build a tree.
12642
 * It use the given SAX function block to handle the parsing callback.
12643
 * If sax is NULL, fallback to the default DOM tree building routines.
12644
 *
12645
 * [78] extParsedEnt ::= TextDecl? content
12646
 *
12647
 * This correspond to a "Well Balanced" chunk
12648
 *
12649
 * Returns the resulting document tree
12650
 */
12651
12652
xmlDocPtr
12653
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12654
0
    xmlDocPtr ret;
12655
0
    xmlParserCtxtPtr ctxt;
12656
12657
0
    ctxt = xmlCreateFileParserCtxt(filename);
12658
0
    if (ctxt == NULL) {
12659
0
  return(NULL);
12660
0
    }
12661
0
    if (sax != NULL) {
12662
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12663
0
            *ctxt->sax = *sax;
12664
0
        } else {
12665
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12666
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12667
0
        }
12668
0
        ctxt->userData = NULL;
12669
0
    }
12670
12671
0
    xmlParseExtParsedEnt(ctxt);
12672
12673
0
    if (ctxt->wellFormed) {
12674
0
  ret = ctxt->myDoc;
12675
0
    } else {
12676
0
        ret = NULL;
12677
0
        xmlFreeDoc(ctxt->myDoc);
12678
0
    }
12679
12680
0
    xmlFreeParserCtxt(ctxt);
12681
12682
0
    return(ret);
12683
0
}
12684
12685
/**
12686
 * xmlParseEntity:
12687
 * @filename:  the filename
12688
 *
12689
 * parse an XML external entity out of context and build a tree.
12690
 *
12691
 * [78] extParsedEnt ::= TextDecl? content
12692
 *
12693
 * This correspond to a "Well Balanced" chunk
12694
 *
12695
 * Returns the resulting document tree
12696
 */
12697
12698
xmlDocPtr
12699
0
xmlParseEntity(const char *filename) {
12700
0
    return(xmlSAXParseEntity(NULL, filename));
12701
0
}
12702
#endif /* LIBXML_SAX1_ENABLED */
12703
12704
/**
12705
 * xmlCreateEntityParserCtxt:
12706
 * @URL:  the entity URL
12707
 * @ID:  the entity PUBLIC ID
12708
 * @base:  a possible base for the target URI
12709
 *
12710
 * DEPRECATED: Don't use.
12711
 *
12712
 * Create a parser context for an external entity
12713
 * Automatic support for ZLIB/Compress compressed document is provided
12714
 * by default if found at compile-time.
12715
 *
12716
 * Returns the new parser context or NULL
12717
 */
12718
xmlParserCtxtPtr
12719
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12720
0
                    const xmlChar *base) {
12721
0
    xmlParserCtxtPtr ctxt;
12722
0
    xmlParserInputPtr input;
12723
0
    xmlChar *uri = NULL;
12724
12725
0
    ctxt = xmlNewParserCtxt();
12726
0
    if (ctxt == NULL)
12727
0
  return(NULL);
12728
12729
0
    if (base != NULL) {
12730
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12731
0
            goto error;
12732
0
        if (uri != NULL)
12733
0
            URL = uri;
12734
0
    }
12735
12736
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12737
0
                            XML_RESOURCE_UNKNOWN);
12738
0
    if (input == NULL)
12739
0
        goto error;
12740
12741
0
    if (inputPush(ctxt, input) < 0) {
12742
0
        xmlFreeInputStream(input);
12743
0
        goto error;
12744
0
    }
12745
12746
0
    xmlFree(uri);
12747
0
    return(ctxt);
12748
12749
0
error:
12750
0
    xmlFree(uri);
12751
0
    xmlFreeParserCtxt(ctxt);
12752
0
    return(NULL);
12753
0
}
12754
12755
/************************************************************************
12756
 *                  *
12757
 *    Front ends when parsing from a file     *
12758
 *                  *
12759
 ************************************************************************/
12760
12761
/**
12762
 * xmlCreateURLParserCtxt:
12763
 * @filename:  the filename or URL
12764
 * @options:  a combination of xmlParserOption
12765
 *
12766
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12767
 *
12768
 * Create a parser context for a file or URL content.
12769
 * Automatic support for ZLIB/Compress compressed document is provided
12770
 * by default if found at compile-time and for file accesses
12771
 *
12772
 * Returns the new parser context or NULL
12773
 */
12774
xmlParserCtxtPtr
12775
xmlCreateURLParserCtxt(const char *filename, int options)
12776
0
{
12777
0
    xmlParserCtxtPtr ctxt;
12778
0
    xmlParserInputPtr input;
12779
12780
0
    ctxt = xmlNewParserCtxt();
12781
0
    if (ctxt == NULL)
12782
0
  return(NULL);
12783
12784
0
    xmlCtxtUseOptions(ctxt, options);
12785
0
    ctxt->linenumbers = 1;
12786
12787
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12788
0
    if (input == NULL) {
12789
0
  xmlFreeParserCtxt(ctxt);
12790
0
  return(NULL);
12791
0
    }
12792
0
    if (inputPush(ctxt, input) < 0) {
12793
0
        xmlFreeInputStream(input);
12794
0
        xmlFreeParserCtxt(ctxt);
12795
0
        return(NULL);
12796
0
    }
12797
12798
0
    return(ctxt);
12799
0
}
12800
12801
/**
12802
 * xmlCreateFileParserCtxt:
12803
 * @filename:  the filename
12804
 *
12805
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12806
 *
12807
 * Create a parser context for a file content.
12808
 * Automatic support for ZLIB/Compress compressed document is provided
12809
 * by default if found at compile-time.
12810
 *
12811
 * Returns the new parser context or NULL
12812
 */
12813
xmlParserCtxtPtr
12814
xmlCreateFileParserCtxt(const char *filename)
12815
0
{
12816
0
    return(xmlCreateURLParserCtxt(filename, 0));
12817
0
}
12818
12819
#ifdef LIBXML_SAX1_ENABLED
12820
/**
12821
 * xmlSAXParseFileWithData:
12822
 * @sax:  the SAX handler block
12823
 * @filename:  the filename
12824
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12825
 *             documents
12826
 * @data:  the userdata
12827
 *
12828
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12829
 *
12830
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12831
 * compressed document is provided by default if found at compile-time.
12832
 * It use the given SAX function block to handle the parsing callback.
12833
 * If sax is NULL, fallback to the default DOM tree building routines.
12834
 *
12835
 * User data (void *) is stored within the parser context in the
12836
 * context's _private member, so it is available nearly everywhere in libxml
12837
 *
12838
 * Returns the resulting document tree
12839
 */
12840
12841
xmlDocPtr
12842
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12843
0
                        int recovery, void *data) {
12844
0
    xmlDocPtr ret;
12845
0
    xmlParserCtxtPtr ctxt;
12846
0
    xmlParserInputPtr input;
12847
12848
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12849
0
    if (ctxt == NULL)
12850
0
  return(NULL);
12851
12852
0
    if (data != NULL)
12853
0
  ctxt->_private = data;
12854
12855
0
    if (recovery) {
12856
0
        ctxt->options |= XML_PARSE_RECOVER;
12857
0
        ctxt->recovery = 1;
12858
0
    }
12859
12860
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12861
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12862
0
    else
12863
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12864
12865
0
    ret = xmlCtxtParseDocument(ctxt, input);
12866
12867
0
    xmlFreeParserCtxt(ctxt);
12868
0
    return(ret);
12869
0
}
12870
12871
/**
12872
 * xmlSAXParseFile:
12873
 * @sax:  the SAX handler block
12874
 * @filename:  the filename
12875
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12876
 *             documents
12877
 *
12878
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12879
 *
12880
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12881
 * compressed document is provided by default if found at compile-time.
12882
 * It use the given SAX function block to handle the parsing callback.
12883
 * If sax is NULL, fallback to the default DOM tree building routines.
12884
 *
12885
 * Returns the resulting document tree
12886
 */
12887
12888
xmlDocPtr
12889
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12890
0
                          int recovery) {
12891
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12892
0
}
12893
12894
/**
12895
 * xmlRecoverDoc:
12896
 * @cur:  a pointer to an array of xmlChar
12897
 *
12898
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12899
 *
12900
 * parse an XML in-memory document and build a tree.
12901
 * In the case the document is not Well Formed, a attempt to build a
12902
 * tree is tried anyway
12903
 *
12904
 * Returns the resulting document tree or NULL in case of failure
12905
 */
12906
12907
xmlDocPtr
12908
0
xmlRecoverDoc(const xmlChar *cur) {
12909
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12910
0
}
12911
12912
/**
12913
 * xmlParseFile:
12914
 * @filename:  the filename
12915
 *
12916
 * DEPRECATED: Use xmlReadFile.
12917
 *
12918
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12919
 * compressed document is provided by default if found at compile-time.
12920
 *
12921
 * Returns the resulting document tree if the file was wellformed,
12922
 * NULL otherwise.
12923
 */
12924
12925
xmlDocPtr
12926
0
xmlParseFile(const char *filename) {
12927
0
    return(xmlSAXParseFile(NULL, filename, 0));
12928
0
}
12929
12930
/**
12931
 * xmlRecoverFile:
12932
 * @filename:  the filename
12933
 *
12934
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12935
 *
12936
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12937
 * compressed document is provided by default if found at compile-time.
12938
 * In the case the document is not Well Formed, it attempts to build
12939
 * a tree anyway
12940
 *
12941
 * Returns the resulting document tree or NULL in case of failure
12942
 */
12943
12944
xmlDocPtr
12945
0
xmlRecoverFile(const char *filename) {
12946
0
    return(xmlSAXParseFile(NULL, filename, 1));
12947
0
}
12948
12949
12950
/**
12951
 * xmlSetupParserForBuffer:
12952
 * @ctxt:  an XML parser context
12953
 * @buffer:  a xmlChar * buffer
12954
 * @filename:  a file name
12955
 *
12956
 * DEPRECATED: Don't use.
12957
 *
12958
 * Setup the parser context to parse a new buffer; Clears any prior
12959
 * contents from the parser context. The buffer parameter must not be
12960
 * NULL, but the filename parameter can be
12961
 */
12962
void
12963
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12964
                             const char* filename)
12965
0
{
12966
0
    xmlParserInputPtr input;
12967
12968
0
    if ((ctxt == NULL) || (buffer == NULL))
12969
0
        return;
12970
12971
0
    xmlClearParserCtxt(ctxt);
12972
12973
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12974
0
                                      NULL, 0);
12975
0
    if (input == NULL)
12976
0
        return;
12977
0
    if (inputPush(ctxt, input) < 0)
12978
0
        xmlFreeInputStream(input);
12979
0
}
12980
12981
/**
12982
 * xmlSAXUserParseFile:
12983
 * @sax:  a SAX handler
12984
 * @user_data:  The user data returned on SAX callbacks
12985
 * @filename:  a file name
12986
 *
12987
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12988
 *
12989
 * parse an XML file and call the given SAX handler routines.
12990
 * Automatic support for ZLIB/Compress compressed document is provided
12991
 *
12992
 * Returns 0 in case of success or a error number otherwise
12993
 */
12994
int
12995
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12996
0
                    const char *filename) {
12997
0
    int ret = 0;
12998
0
    xmlParserCtxtPtr ctxt;
12999
13000
0
    ctxt = xmlCreateFileParserCtxt(filename);
13001
0
    if (ctxt == NULL) return -1;
13002
0
    if (sax != NULL) {
13003
0
        if (sax->initialized == XML_SAX2_MAGIC) {
13004
0
            *ctxt->sax = *sax;
13005
0
        } else {
13006
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13007
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13008
0
        }
13009
0
  ctxt->userData = user_data;
13010
0
    }
13011
13012
0
    xmlParseDocument(ctxt);
13013
13014
0
    if (ctxt->wellFormed)
13015
0
  ret = 0;
13016
0
    else {
13017
0
        if (ctxt->errNo != 0)
13018
0
      ret = ctxt->errNo;
13019
0
  else
13020
0
      ret = -1;
13021
0
    }
13022
0
    if (ctxt->myDoc != NULL) {
13023
0
        xmlFreeDoc(ctxt->myDoc);
13024
0
  ctxt->myDoc = NULL;
13025
0
    }
13026
0
    xmlFreeParserCtxt(ctxt);
13027
13028
0
    return ret;
13029
0
}
13030
#endif /* LIBXML_SAX1_ENABLED */
13031
13032
/************************************************************************
13033
 *                  *
13034
 *    Front ends when parsing from memory     *
13035
 *                  *
13036
 ************************************************************************/
13037
13038
/**
13039
 * xmlCreateMemoryParserCtxt:
13040
 * @buffer:  a pointer to a char array
13041
 * @size:  the size of the array
13042
 *
13043
 * Create a parser context for an XML in-memory document. The input buffer
13044
 * must not contain a terminating null byte.
13045
 *
13046
 * Returns the new parser context or NULL
13047
 */
13048
xmlParserCtxtPtr
13049
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13050
0
    xmlParserCtxtPtr ctxt;
13051
0
    xmlParserInputPtr input;
13052
13053
0
    if (size < 0)
13054
0
  return(NULL);
13055
13056
0
    ctxt = xmlNewParserCtxt();
13057
0
    if (ctxt == NULL)
13058
0
  return(NULL);
13059
13060
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
13061
0
    if (input == NULL) {
13062
0
  xmlFreeParserCtxt(ctxt);
13063
0
  return(NULL);
13064
0
    }
13065
0
    if (inputPush(ctxt, input) < 0) {
13066
0
        xmlFreeInputStream(input);
13067
0
        xmlFreeParserCtxt(ctxt);
13068
0
        return(NULL);
13069
0
    }
13070
13071
0
    return(ctxt);
13072
0
}
13073
13074
#ifdef LIBXML_SAX1_ENABLED
13075
/**
13076
 * xmlSAXParseMemoryWithData:
13077
 * @sax:  the SAX handler block
13078
 * @buffer:  an pointer to a char array
13079
 * @size:  the size of the array
13080
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13081
 *             documents
13082
 * @data:  the userdata
13083
 *
13084
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13085
 *
13086
 * parse an XML in-memory block and use the given SAX function block
13087
 * to handle the parsing callback. If sax is NULL, fallback to the default
13088
 * DOM tree building routines.
13089
 *
13090
 * User data (void *) is stored within the parser context in the
13091
 * context's _private member, so it is available nearly everywhere in libxml
13092
 *
13093
 * Returns the resulting document tree
13094
 */
13095
13096
xmlDocPtr
13097
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13098
0
                          int size, int recovery, void *data) {
13099
0
    xmlDocPtr ret;
13100
0
    xmlParserCtxtPtr ctxt;
13101
0
    xmlParserInputPtr input;
13102
13103
0
    if (size < 0)
13104
0
        return(NULL);
13105
13106
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
13107
0
    if (ctxt == NULL)
13108
0
        return(NULL);
13109
13110
0
    if (data != NULL)
13111
0
  ctxt->_private=data;
13112
13113
0
    if (recovery) {
13114
0
        ctxt->options |= XML_PARSE_RECOVER;
13115
0
        ctxt->recovery = 1;
13116
0
    }
13117
13118
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
13119
0
                                      XML_INPUT_BUF_STATIC);
13120
13121
0
    ret = xmlCtxtParseDocument(ctxt, input);
13122
13123
0
    xmlFreeParserCtxt(ctxt);
13124
0
    return(ret);
13125
0
}
13126
13127
/**
13128
 * xmlSAXParseMemory:
13129
 * @sax:  the SAX handler block
13130
 * @buffer:  an pointer to a char array
13131
 * @size:  the size of the array
13132
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13133
 *             documents
13134
 *
13135
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13136
 *
13137
 * parse an XML in-memory block and use the given SAX function block
13138
 * to handle the parsing callback. If sax is NULL, fallback to the default
13139
 * DOM tree building routines.
13140
 *
13141
 * Returns the resulting document tree
13142
 */
13143
xmlDocPtr
13144
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13145
0
            int size, int recovery) {
13146
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13147
0
}
13148
13149
/**
13150
 * xmlParseMemory:
13151
 * @buffer:  an pointer to a char array
13152
 * @size:  the size of the array
13153
 *
13154
 * DEPRECATED: Use xmlReadMemory.
13155
 *
13156
 * parse an XML in-memory block and build a tree.
13157
 *
13158
 * Returns the resulting document tree
13159
 */
13160
13161
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13162
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
13163
0
}
13164
13165
/**
13166
 * xmlRecoverMemory:
13167
 * @buffer:  an pointer to a char array
13168
 * @size:  the size of the array
13169
 *
13170
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13171
 *
13172
 * parse an XML in-memory block and build a tree.
13173
 * In the case the document is not Well Formed, an attempt to
13174
 * build a tree is tried anyway
13175
 *
13176
 * Returns the resulting document tree or NULL in case of error
13177
 */
13178
13179
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13180
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
13181
0
}
13182
13183
/**
13184
 * xmlSAXUserParseMemory:
13185
 * @sax:  a SAX handler
13186
 * @user_data:  The user data returned on SAX callbacks
13187
 * @buffer:  an in-memory XML document input
13188
 * @size:  the length of the XML document in bytes
13189
 *
13190
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13191
 *
13192
 * parse an XML in-memory buffer and call the given SAX handler routines.
13193
 *
13194
 * Returns 0 in case of success or a error number otherwise
13195
 */
13196
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13197
0
        const char *buffer, int size) {
13198
0
    int ret = 0;
13199
0
    xmlParserCtxtPtr ctxt;
13200
13201
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13202
0
    if (ctxt == NULL) return -1;
13203
0
    if (sax != NULL) {
13204
0
        if (sax->initialized == XML_SAX2_MAGIC) {
13205
0
            *ctxt->sax = *sax;
13206
0
        } else {
13207
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13208
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13209
0
        }
13210
0
  ctxt->userData = user_data;
13211
0
    }
13212
13213
0
    xmlParseDocument(ctxt);
13214
13215
0
    if (ctxt->wellFormed)
13216
0
  ret = 0;
13217
0
    else {
13218
0
        if (ctxt->errNo != 0)
13219
0
      ret = ctxt->errNo;
13220
0
  else
13221
0
      ret = -1;
13222
0
    }
13223
0
    if (ctxt->myDoc != NULL) {
13224
0
        xmlFreeDoc(ctxt->myDoc);
13225
0
  ctxt->myDoc = NULL;
13226
0
    }
13227
0
    xmlFreeParserCtxt(ctxt);
13228
13229
0
    return ret;
13230
0
}
13231
#endif /* LIBXML_SAX1_ENABLED */
13232
13233
/**
13234
 * xmlCreateDocParserCtxt:
13235
 * @str:  a pointer to an array of xmlChar
13236
 *
13237
 * Creates a parser context for an XML in-memory document.
13238
 *
13239
 * Returns the new parser context or NULL
13240
 */
13241
xmlParserCtxtPtr
13242
0
xmlCreateDocParserCtxt(const xmlChar *str) {
13243
0
    xmlParserCtxtPtr ctxt;
13244
0
    xmlParserInputPtr input;
13245
13246
0
    ctxt = xmlNewParserCtxt();
13247
0
    if (ctxt == NULL)
13248
0
  return(NULL);
13249
13250
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
13251
0
    if (input == NULL) {
13252
0
  xmlFreeParserCtxt(ctxt);
13253
0
  return(NULL);
13254
0
    }
13255
0
    if (inputPush(ctxt, input) < 0) {
13256
0
        xmlFreeInputStream(input);
13257
0
        xmlFreeParserCtxt(ctxt);
13258
0
        return(NULL);
13259
0
    }
13260
13261
0
    return(ctxt);
13262
0
}
13263
13264
#ifdef LIBXML_SAX1_ENABLED
13265
/**
13266
 * xmlSAXParseDoc:
13267
 * @sax:  the SAX handler block
13268
 * @cur:  a pointer to an array of xmlChar
13269
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13270
 *             documents
13271
 *
13272
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13273
 *
13274
 * parse an XML in-memory document and build a tree.
13275
 * It use the given SAX function block to handle the parsing callback.
13276
 * If sax is NULL, fallback to the default DOM tree building routines.
13277
 *
13278
 * Returns the resulting document tree
13279
 */
13280
13281
xmlDocPtr
13282
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13283
0
    xmlDocPtr ret;
13284
0
    xmlParserCtxtPtr ctxt;
13285
0
    xmlSAXHandlerPtr oldsax = NULL;
13286
13287
0
    if (cur == NULL) return(NULL);
13288
13289
13290
0
    ctxt = xmlCreateDocParserCtxt(cur);
13291
0
    if (ctxt == NULL) return(NULL);
13292
0
    if (sax != NULL) {
13293
0
        oldsax = ctxt->sax;
13294
0
        ctxt->sax = sax;
13295
0
        ctxt->userData = NULL;
13296
0
    }
13297
13298
0
    xmlParseDocument(ctxt);
13299
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13300
0
    else {
13301
0
       ret = NULL;
13302
0
       xmlFreeDoc(ctxt->myDoc);
13303
0
       ctxt->myDoc = NULL;
13304
0
    }
13305
0
    if (sax != NULL)
13306
0
  ctxt->sax = oldsax;
13307
0
    xmlFreeParserCtxt(ctxt);
13308
13309
0
    return(ret);
13310
0
}
13311
13312
/**
13313
 * xmlParseDoc:
13314
 * @cur:  a pointer to an array of xmlChar
13315
 *
13316
 * DEPRECATED: Use xmlReadDoc.
13317
 *
13318
 * parse an XML in-memory document and build a tree.
13319
 *
13320
 * Returns the resulting document tree
13321
 */
13322
13323
xmlDocPtr
13324
0
xmlParseDoc(const xmlChar *cur) {
13325
0
    return(xmlSAXParseDoc(NULL, cur, 0));
13326
0
}
13327
#endif /* LIBXML_SAX1_ENABLED */
13328
13329
/************************************************************************
13330
 *                  *
13331
 *  New set (2.6.0) of simpler and more flexible APIs   *
13332
 *                  *
13333
 ************************************************************************/
13334
13335
/**
13336
 * DICT_FREE:
13337
 * @str:  a string
13338
 *
13339
 * Free a string if it is not owned by the "dict" dictionary in the
13340
 * current scope
13341
 */
13342
#define DICT_FREE(str)            \
13343
487k
  if ((str) && ((!dict) ||       \
13344
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13345
487k
      xmlFree((char *)(str));
13346
13347
/**
13348
 * xmlCtxtReset:
13349
 * @ctxt: an XML parser context
13350
 *
13351
 * Reset a parser context
13352
 */
13353
void
13354
xmlCtxtReset(xmlParserCtxtPtr ctxt)
13355
121k
{
13356
121k
    xmlParserInputPtr input;
13357
121k
    xmlDictPtr dict;
13358
13359
121k
    if (ctxt == NULL)
13360
0
        return;
13361
13362
121k
    dict = ctxt->dict;
13363
13364
121k
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13365
0
        xmlFreeInputStream(input);
13366
0
    }
13367
121k
    ctxt->inputNr = 0;
13368
121k
    ctxt->input = NULL;
13369
13370
121k
    ctxt->spaceNr = 0;
13371
121k
    if (ctxt->spaceTab != NULL) {
13372
121k
  ctxt->spaceTab[0] = -1;
13373
121k
  ctxt->space = &ctxt->spaceTab[0];
13374
121k
    } else {
13375
0
        ctxt->space = NULL;
13376
0
    }
13377
13378
13379
121k
    ctxt->nodeNr = 0;
13380
121k
    ctxt->node = NULL;
13381
13382
121k
    ctxt->nameNr = 0;
13383
121k
    ctxt->name = NULL;
13384
13385
121k
    ctxt->nsNr = 0;
13386
121k
    xmlParserNsReset(ctxt->nsdb);
13387
13388
121k
    DICT_FREE(ctxt->version);
13389
121k
    ctxt->version = NULL;
13390
121k
    DICT_FREE(ctxt->encoding);
13391
121k
    ctxt->encoding = NULL;
13392
121k
    DICT_FREE(ctxt->extSubURI);
13393
121k
    ctxt->extSubURI = NULL;
13394
121k
    DICT_FREE(ctxt->extSubSystem);
13395
121k
    ctxt->extSubSystem = NULL;
13396
13397
121k
    if (ctxt->directory != NULL) {
13398
0
        xmlFree(ctxt->directory);
13399
0
        ctxt->directory = NULL;
13400
0
    }
13401
13402
121k
    if (ctxt->myDoc != NULL)
13403
0
        xmlFreeDoc(ctxt->myDoc);
13404
121k
    ctxt->myDoc = NULL;
13405
13406
121k
    ctxt->standalone = -1;
13407
121k
    ctxt->hasExternalSubset = 0;
13408
121k
    ctxt->hasPErefs = 0;
13409
121k
    ctxt->html = 0;
13410
121k
    ctxt->instate = XML_PARSER_START;
13411
13412
121k
    ctxt->wellFormed = 1;
13413
121k
    ctxt->nsWellFormed = 1;
13414
121k
    ctxt->disableSAX = 0;
13415
121k
    ctxt->valid = 1;
13416
121k
    ctxt->record_info = 0;
13417
121k
    ctxt->checkIndex = 0;
13418
121k
    ctxt->endCheckState = 0;
13419
121k
    ctxt->inSubset = 0;
13420
121k
    ctxt->errNo = XML_ERR_OK;
13421
121k
    ctxt->depth = 0;
13422
121k
    ctxt->catalogs = NULL;
13423
121k
    ctxt->sizeentities = 0;
13424
121k
    ctxt->sizeentcopy = 0;
13425
121k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13426
13427
121k
    if (ctxt->attsDefault != NULL) {
13428
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13429
0
        ctxt->attsDefault = NULL;
13430
0
    }
13431
121k
    if (ctxt->attsSpecial != NULL) {
13432
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13433
0
        ctxt->attsSpecial = NULL;
13434
0
    }
13435
13436
121k
#ifdef LIBXML_CATALOG_ENABLED
13437
121k
    if (ctxt->catalogs != NULL)
13438
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13439
121k
#endif
13440
121k
    ctxt->nbErrors = 0;
13441
121k
    ctxt->nbWarnings = 0;
13442
121k
    if (ctxt->lastError.code != XML_ERR_OK)
13443
0
        xmlResetError(&ctxt->lastError);
13444
121k
}
13445
13446
/**
13447
 * xmlCtxtResetPush:
13448
 * @ctxt: an XML parser context
13449
 * @chunk:  a pointer to an array of chars
13450
 * @size:  number of chars in the array
13451
 * @filename:  an optional file name or URI
13452
 * @encoding:  the document encoding, or NULL
13453
 *
13454
 * Reset a push parser context
13455
 *
13456
 * Returns 0 in case of success and 1 in case of error
13457
 */
13458
int
13459
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13460
                 int size, const char *filename, const char *encoding)
13461
0
{
13462
0
    xmlParserInputPtr input;
13463
13464
0
    if (ctxt == NULL)
13465
0
        return(1);
13466
13467
0
    xmlCtxtReset(ctxt);
13468
13469
0
    input = xmlNewPushInput(filename, chunk, size);
13470
0
    if (input == NULL)
13471
0
        return(1);
13472
13473
0
    if (inputPush(ctxt, input) < 0) {
13474
0
        xmlFreeInputStream(input);
13475
0
        return(1);
13476
0
    }
13477
13478
0
    if (encoding != NULL)
13479
0
        xmlSwitchEncodingName(ctxt, encoding);
13480
13481
0
    return(0);
13482
0
}
13483
13484
static int
13485
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13486
496k
{
13487
496k
    int allMask;
13488
13489
496k
    if (ctxt == NULL)
13490
0
        return(-1);
13491
13492
    /*
13493
     * XInclude options aren't handled by the parser.
13494
     *
13495
     * XML_PARSE_XINCLUDE
13496
     * XML_PARSE_NOXINCNODE
13497
     * XML_PARSE_NOBASEFIX
13498
     */
13499
496k
    allMask = XML_PARSE_RECOVER |
13500
496k
              XML_PARSE_NOENT |
13501
496k
              XML_PARSE_DTDLOAD |
13502
496k
              XML_PARSE_DTDATTR |
13503
496k
              XML_PARSE_DTDVALID |
13504
496k
              XML_PARSE_NOERROR |
13505
496k
              XML_PARSE_NOWARNING |
13506
496k
              XML_PARSE_PEDANTIC |
13507
496k
              XML_PARSE_NOBLANKS |
13508
496k
#ifdef LIBXML_SAX1_ENABLED
13509
496k
              XML_PARSE_SAX1 |
13510
496k
#endif
13511
496k
              XML_PARSE_NONET |
13512
496k
              XML_PARSE_NODICT |
13513
496k
              XML_PARSE_NSCLEAN |
13514
496k
              XML_PARSE_NOCDATA |
13515
496k
              XML_PARSE_COMPACT |
13516
496k
              XML_PARSE_OLD10 |
13517
496k
              XML_PARSE_HUGE |
13518
496k
              XML_PARSE_OLDSAX |
13519
496k
              XML_PARSE_IGNORE_ENC |
13520
496k
              XML_PARSE_BIG_LINES |
13521
496k
              XML_PARSE_NO_XXE;
13522
13523
496k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13524
13525
    /*
13526
     * For some options, struct members are historically the source
13527
     * of truth. The values are initalized from global variables and
13528
     * old code could also modify them directly. Several older API
13529
     * functions that don't take an options argument rely on these
13530
     * deprecated mechanisms.
13531
     *
13532
     * Once public access to struct members and the globals are
13533
     * disabled, we can use the options bitmask as source of
13534
     * truth, making all these struct members obsolete.
13535
     *
13536
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13537
     * loading of the external subset.
13538
     */
13539
496k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13540
496k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13541
496k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13542
496k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13543
496k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13544
496k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13545
496k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13546
496k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13547
13548
    /*
13549
     * Changing SAX callbacks is a bad idea. This should be fixed.
13550
     */
13551
496k
    if (options & XML_PARSE_NOBLANKS) {
13552
168k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13553
168k
    }
13554
496k
    if (options & XML_PARSE_NOCDATA) {
13555
143k
        ctxt->sax->cdataBlock = NULL;
13556
143k
    }
13557
496k
    if (options & XML_PARSE_HUGE) {
13558
140k
        if (ctxt->dict != NULL)
13559
140k
            xmlDictSetLimit(ctxt->dict, 0);
13560
140k
    }
13561
13562
496k
    ctxt->linenumbers = 1;
13563
13564
496k
    return(options & ~allMask);
13565
496k
}
13566
13567
/**
13568
 * xmlCtxtSetOptions:
13569
 * @ctxt: an XML parser context
13570
 * @options:  a bitmask of xmlParserOption values
13571
 *
13572
 * Applies the options to the parser context. Unset options are
13573
 * cleared.
13574
 *
13575
 * Available since 2.13.0. With older versions, you can use
13576
 * xmlCtxtUseOptions.
13577
 *
13578
 * XML_PARSE_RECOVER
13579
 *
13580
 * Enable "recovery" mode which allows non-wellformed documents.
13581
 * How this mode behaves exactly is unspecified and may change
13582
 * without further notice. Use of this feature is DISCOURAGED.
13583
 *
13584
 * XML_PARSE_NOENT
13585
 *
13586
 * Despite the confusing name, this option enables substitution
13587
 * of entities. The resulting tree won't contain any entity
13588
 * reference nodes.
13589
 *
13590
 * This option also enables loading of external entities (both
13591
 * general and parameter entities) which is dangerous. If you
13592
 * process untrusted data, it's recommended to set the
13593
 * XML_PARSE_NO_XXE option to disable loading of external
13594
 * entities.
13595
 *
13596
 * XML_PARSE_DTDLOAD
13597
 *
13598
 * Enables loading of an external DTD and the loading and
13599
 * substitution of external parameter entities. Has no effect
13600
 * if XML_PARSE_NO_XXE is set.
13601
 *
13602
 * XML_PARSE_DTDATTR
13603
 *
13604
 * Adds default attributes from the DTD to the result document.
13605
 *
13606
 * Implies XML_PARSE_DTDLOAD, but loading of external content
13607
 * can be disabled with XML_PARSE_NO_XXE.
13608
 *
13609
 * XML_PARSE_DTDVALID
13610
 *
13611
 * This option enables DTD validation which requires to load
13612
 * external DTDs and external entities (both general and
13613
 * parameter entities) unless XML_PARSE_NO_XXE was set.
13614
 *
13615
 * XML_PARSE_NO_XXE
13616
 *
13617
 * Disables loading of external DTDs or entities.
13618
 *
13619
 * XML_PARSE_NOERROR
13620
 *
13621
 * Disable error and warning reports to the error handlers.
13622
 * Errors are still accessible with xmlCtxtGetLastError.
13623
 *
13624
 * XML_PARSE_NOWARNING
13625
 *
13626
 * Disable warning reports.
13627
 *
13628
 * XML_PARSE_PEDANTIC
13629
 *
13630
 * Enable some pedantic warnings.
13631
 *
13632
 * XML_PARSE_NOBLANKS
13633
 *
13634
 * Remove some text nodes containing only whitespace from the
13635
 * result document. Which nodes are removed depends on DTD
13636
 * element declarations or a conservative heuristic. The
13637
 * reindenting feature of the serialization code relies on this
13638
 * option to be set when parsing. Use of this option is
13639
 * DISCOURAGED.
13640
 *
13641
 * XML_PARSE_SAX1
13642
 *
13643
 * Always invoke the deprecated SAX1 startElement and endElement
13644
 * handlers. This option is DEPRECATED.
13645
 *
13646
 * XML_PARSE_NONET
13647
 *
13648
 * Disable network access with the builtin HTTP client.
13649
 *
13650
 * XML_PARSE_NODICT
13651
 *
13652
 * Create a document without interned strings, making all
13653
 * strings separate memory allocations.
13654
 *
13655
 * XML_PARSE_NSCLEAN
13656
 *
13657
 * Remove redundant namespace declarations from the result
13658
 * document.
13659
 *
13660
 * XML_PARSE_NOCDATA
13661
 *
13662
 * Output normal text nodes instead of CDATA nodes.
13663
 *
13664
 * XML_PARSE_COMPACT
13665
 *
13666
 * Store small strings directly in the node struct to save
13667
 * memory.
13668
 *
13669
 * XML_PARSE_OLD10
13670
 *
13671
 * Use old Name productions from before XML 1.0 Fifth Edition.
13672
 * This options is DEPRECATED.
13673
 *
13674
 * XML_PARSE_HUGE
13675
 *
13676
 * Relax some internal limits.
13677
 *
13678
 * Maximum size of text nodes, tags, comments, processing instructions,
13679
 * CDATA sections, entity values
13680
 *
13681
 * normal: 10M
13682
 * huge:    1B
13683
 *
13684
 * Maximum size of names, system literals, pubid literals
13685
 *
13686
 * normal: 50K
13687
 * huge:   10M
13688
 *
13689
 * Maximum nesting depth of elements
13690
 *
13691
 * normal:  256
13692
 * huge:   2048
13693
 *
13694
 * Maximum nesting depth of entities
13695
 *
13696
 * normal: 20
13697
 * huge:   40
13698
 *
13699
 * XML_PARSE_OLDSAX
13700
 *
13701
 * Enable an unspecified legacy mode for SAX parsers. This
13702
 * option is DEPRECATED.
13703
 *
13704
 * XML_PARSE_IGNORE_ENC
13705
 *
13706
 * Ignore the encoding in the XML declaration. This option is
13707
 * mostly unneeded these days. The only effect is to enforce
13708
 * UTF-8 decoding of ASCII-like data.
13709
 *
13710
 * XML_PARSE_BIG_LINES
13711
 *
13712
 * Enable reporting of line numbers larger than 65535.
13713
 *
13714
 * XML_PARSE_NO_UNZIP
13715
 *
13716
 * Disables input decompression. Setting this option is recommended
13717
 * to avoid zip bombs.
13718
 *
13719
 * Available since 2.14.0.
13720
 *
13721
 * XML_PARSE_NO_SYS_CATALOG
13722
 *
13723
 * Disables the global system XML catalog.
13724
 *
13725
 * Available since 2.14.0.
13726
 *
13727
 * XML_PARSE_NO_CATALOG_PI
13728
 *
13729
 * Ignore XML catalog processing instructions.
13730
 *
13731
 * Available since 2.14.0.
13732
 *
13733
 * Returns 0 in case of success, the set of unknown or unimplemented options
13734
 *         in case of error.
13735
 */
13736
int
13737
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13738
0
{
13739
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13740
0
}
13741
13742
/**
13743
 * xmlCtxtGetOptions:
13744
 * @ctxt: an XML parser context
13745
 *
13746
 * Get the current options of the parser context.
13747
 *
13748
 * Available since 2.14.0.
13749
 *
13750
 * Returns the current options set in the parser context, or -1 if ctxt is NULL.
13751
 */
13752
int
13753
xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)
13754
0
{
13755
0
    if (ctxt == NULL)
13756
0
        return(-1);
13757
13758
0
    return(ctxt->options);
13759
0
}
13760
13761
/**
13762
 * xmlCtxtUseOptions:
13763
 * @ctxt: an XML parser context
13764
 * @options:  a combination of xmlParserOption
13765
 *
13766
 * DEPRECATED: Use xmlCtxtSetOptions.
13767
 *
13768
 * Applies the options to the parser context. The following options
13769
 * are never cleared and can only be enabled:
13770
 *
13771
 * XML_PARSE_NOERROR
13772
 * XML_PARSE_NOWARNING
13773
 * XML_PARSE_NONET
13774
 * XML_PARSE_NSCLEAN
13775
 * XML_PARSE_NOCDATA
13776
 * XML_PARSE_COMPACT
13777
 * XML_PARSE_OLD10
13778
 * XML_PARSE_HUGE
13779
 * XML_PARSE_OLDSAX
13780
 * XML_PARSE_IGNORE_ENC
13781
 * XML_PARSE_BIG_LINES
13782
 *
13783
 * Returns 0 in case of success, the set of unknown or unimplemented options
13784
 *         in case of error.
13785
 */
13786
int
13787
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13788
496k
{
13789
496k
    int keepMask;
13790
13791
    /*
13792
     * For historic reasons, some options can only be enabled.
13793
     */
13794
496k
    keepMask = XML_PARSE_NOERROR |
13795
496k
               XML_PARSE_NOWARNING |
13796
496k
               XML_PARSE_NONET |
13797
496k
               XML_PARSE_NSCLEAN |
13798
496k
               XML_PARSE_NOCDATA |
13799
496k
               XML_PARSE_COMPACT |
13800
496k
               XML_PARSE_OLD10 |
13801
496k
               XML_PARSE_HUGE |
13802
496k
               XML_PARSE_OLDSAX |
13803
496k
               XML_PARSE_IGNORE_ENC |
13804
496k
               XML_PARSE_BIG_LINES;
13805
13806
496k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13807
496k
}
13808
13809
/**
13810
 * xmlCtxtSetMaxAmplification:
13811
 * @ctxt: an XML parser context
13812
 * @maxAmpl:  maximum amplification factor
13813
 *
13814
 * To protect against exponential entity expansion ("billion laughs"), the
13815
 * size of serialized output is (roughly) limited to the input size
13816
 * multiplied by this factor. The default value is 5.
13817
 *
13818
 * When working with documents making heavy use of entity expansion, it can
13819
 * be necessary to increase the value. For security reasons, this should only
13820
 * be considered when processing trusted input.
13821
 */
13822
void
13823
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13824
244
{
13825
244
    ctxt->maxAmpl = maxAmpl;
13826
244
}
13827
13828
/**
13829
 * xmlCtxtParseDocument:
13830
 * @ctxt:  an XML parser context
13831
 * @input:  parser input
13832
 *
13833
 * Parse an XML document and return the resulting document tree.
13834
 * Takes ownership of the input object.
13835
 *
13836
 * Available since 2.13.0.
13837
 *
13838
 * Returns the resulting document tree or NULL
13839
 */
13840
xmlDocPtr
13841
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13842
422k
{
13843
422k
    xmlDocPtr ret = NULL;
13844
13845
422k
    if ((ctxt == NULL) || (input == NULL))
13846
21.0k
        return(NULL);
13847
13848
    /* assert(ctxt->inputNr == 0); */
13849
401k
    while (ctxt->inputNr > 0)
13850
0
        xmlFreeInputStream(inputPop(ctxt));
13851
13852
401k
    if (inputPush(ctxt, input) < 0) {
13853
32
        xmlFreeInputStream(input);
13854
32
        return(NULL);
13855
32
    }
13856
13857
401k
    xmlParseDocument(ctxt);
13858
13859
401k
    if ((ctxt->wellFormed) ||
13860
401k
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
13861
238k
        ret = ctxt->myDoc;
13862
238k
    } else {
13863
162k
        if (ctxt->errNo == XML_ERR_OK)
13864
0
            xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error\n");
13865
13866
162k
        ret = NULL;
13867
162k
  xmlFreeDoc(ctxt->myDoc);
13868
162k
    }
13869
401k
    ctxt->myDoc = NULL;
13870
13871
    /* assert(ctxt->inputNr == 1); */
13872
802k
    while (ctxt->inputNr > 0)
13873
401k
        xmlFreeInputStream(inputPop(ctxt));
13874
13875
401k
    return(ret);
13876
401k
}
13877
13878
/**
13879
 * xmlReadDoc:
13880
 * @cur:  a pointer to a zero terminated string
13881
 * @URL:  base URL (optional)
13882
 * @encoding:  the document encoding (optional)
13883
 * @options:  a combination of xmlParserOption
13884
 *
13885
 * Convenience function to parse an XML document from a
13886
 * zero-terminated string.
13887
 *
13888
 * See xmlCtxtReadDoc for details.
13889
 *
13890
 * Returns the resulting document tree
13891
 */
13892
xmlDocPtr
13893
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13894
           int options)
13895
276k
{
13896
276k
    xmlParserCtxtPtr ctxt;
13897
276k
    xmlParserInputPtr input;
13898
276k
    xmlDocPtr doc;
13899
13900
276k
    ctxt = xmlNewParserCtxt();
13901
276k
    if (ctxt == NULL)
13902
294
        return(NULL);
13903
13904
276k
    xmlCtxtUseOptions(ctxt, options);
13905
13906
276k
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13907
276k
                                      XML_INPUT_BUF_STATIC);
13908
13909
276k
    doc = xmlCtxtParseDocument(ctxt, input);
13910
13911
276k
    xmlFreeParserCtxt(ctxt);
13912
276k
    return(doc);
13913
276k
}
13914
13915
/**
13916
 * xmlReadFile:
13917
 * @filename:  a file or URL
13918
 * @encoding:  the document encoding (optional)
13919
 * @options:  a combination of xmlParserOption
13920
 *
13921
 * Convenience function to parse an XML file from the filesystem,
13922
 * the network or a global user-define resource loader.
13923
 *
13924
 * See xmlCtxtReadFile for details.
13925
 *
13926
 * Returns the resulting document tree
13927
 */
13928
xmlDocPtr
13929
xmlReadFile(const char *filename, const char *encoding, int options)
13930
0
{
13931
0
    xmlParserCtxtPtr ctxt;
13932
0
    xmlParserInputPtr input;
13933
0
    xmlDocPtr doc;
13934
13935
0
    ctxt = xmlNewParserCtxt();
13936
0
    if (ctxt == NULL)
13937
0
        return(NULL);
13938
13939
0
    xmlCtxtUseOptions(ctxt, options);
13940
13941
    /*
13942
     * Backward compatibility for users of command line utilities like
13943
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13944
     * should be removed at some point.
13945
     */
13946
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13947
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13948
0
                                      encoding, 0);
13949
0
    else
13950
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13951
13952
0
    doc = xmlCtxtParseDocument(ctxt, input);
13953
13954
0
    xmlFreeParserCtxt(ctxt);
13955
0
    return(doc);
13956
0
}
13957
13958
/**
13959
 * xmlReadMemory:
13960
 * @buffer:  a pointer to a char array
13961
 * @size:  the size of the array
13962
 * @url:  base URL (optional)
13963
 * @encoding:  the document encoding (optional)
13964
 * @options:  a combination of xmlParserOption
13965
 *
13966
 * Parse an XML in-memory document and build a tree. The input buffer must
13967
 * not contain a terminating null byte.
13968
 *
13969
 * See xmlCtxtReadMemory for details.
13970
 *
13971
 * Returns the resulting document tree
13972
 */
13973
xmlDocPtr
13974
xmlReadMemory(const char *buffer, int size, const char *url,
13975
              const char *encoding, int options)
13976
23.8k
{
13977
23.8k
    xmlParserCtxtPtr ctxt;
13978
23.8k
    xmlParserInputPtr input;
13979
23.8k
    xmlDocPtr doc;
13980
13981
23.8k
    if (size < 0)
13982
0
  return(NULL);
13983
13984
23.8k
    ctxt = xmlNewParserCtxt();
13985
23.8k
    if (ctxt == NULL)
13986
0
        return(NULL);
13987
13988
23.8k
    xmlCtxtUseOptions(ctxt, options);
13989
13990
23.8k
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13991
23.8k
                                      XML_INPUT_BUF_STATIC);
13992
13993
23.8k
    doc = xmlCtxtParseDocument(ctxt, input);
13994
13995
23.8k
    xmlFreeParserCtxt(ctxt);
13996
23.8k
    return(doc);
13997
23.8k
}
13998
13999
/**
14000
 * xmlReadFd:
14001
 * @fd:  an open file descriptor
14002
 * @URL:  base URL (optional)
14003
 * @encoding:  the document encoding (optional)
14004
 * @options:  a combination of xmlParserOption
14005
 *
14006
 * Parse an XML from a file descriptor and build a tree.
14007
 *
14008
 * See xmlCtxtReadFd for details.
14009
 *
14010
 * NOTE that the file descriptor will not be closed when the
14011
 * context is freed or reset.
14012
 *
14013
 * Returns the resulting document tree
14014
 */
14015
xmlDocPtr
14016
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14017
0
{
14018
0
    xmlParserCtxtPtr ctxt;
14019
0
    xmlParserInputPtr input;
14020
0
    xmlDocPtr doc;
14021
14022
0
    ctxt = xmlNewParserCtxt();
14023
0
    if (ctxt == NULL)
14024
0
        return(NULL);
14025
14026
0
    xmlCtxtUseOptions(ctxt, options);
14027
14028
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
14029
14030
0
    doc = xmlCtxtParseDocument(ctxt, input);
14031
14032
0
    xmlFreeParserCtxt(ctxt);
14033
0
    return(doc);
14034
0
}
14035
14036
/**
14037
 * xmlReadIO:
14038
 * @ioread:  an I/O read function
14039
 * @ioclose:  an I/O close function (optional)
14040
 * @ioctx:  an I/O handler
14041
 * @URL:  base URL (optional)
14042
 * @encoding:  the document encoding (optional)
14043
 * @options:  a combination of xmlParserOption
14044
 *
14045
 * Parse an XML document from I/O functions and context and build a tree.
14046
 *
14047
 * See xmlCtxtReadIO for details.
14048
 *
14049
 * Returns the resulting document tree
14050
 */
14051
xmlDocPtr
14052
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14053
          void *ioctx, const char *URL, const char *encoding, int options)
14054
0
{
14055
0
    xmlParserCtxtPtr ctxt;
14056
0
    xmlParserInputPtr input;
14057
0
    xmlDocPtr doc;
14058
14059
0
    ctxt = xmlNewParserCtxt();
14060
0
    if (ctxt == NULL)
14061
0
        return(NULL);
14062
14063
0
    xmlCtxtUseOptions(ctxt, options);
14064
14065
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
14066
0
                                  encoding, 0);
14067
14068
0
    doc = xmlCtxtParseDocument(ctxt, input);
14069
14070
0
    xmlFreeParserCtxt(ctxt);
14071
0
    return(doc);
14072
0
}
14073
14074
/**
14075
 * xmlCtxtReadDoc:
14076
 * @ctxt:  an XML parser context
14077
 * @str:  a pointer to a zero terminated string
14078
 * @URL:  base URL (optional)
14079
 * @encoding:  the document encoding (optional)
14080
 * @options:  a combination of xmlParserOption
14081
 *
14082
 * Parse an XML in-memory document and build a tree.
14083
 *
14084
 * @URL is used as base to resolve external entities and for error
14085
 * reporting.
14086
 *
14087
 * See xmlCtxtUseOptions for details.
14088
 *
14089
 * Returns the resulting document tree
14090
 */
14091
xmlDocPtr
14092
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
14093
               const char *URL, const char *encoding, int options)
14094
0
{
14095
0
    xmlParserInputPtr input;
14096
14097
0
    if (ctxt == NULL)
14098
0
        return(NULL);
14099
14100
0
    xmlCtxtReset(ctxt);
14101
0
    xmlCtxtUseOptions(ctxt, options);
14102
14103
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
14104
0
                                      XML_INPUT_BUF_STATIC);
14105
14106
0
    return(xmlCtxtParseDocument(ctxt, input));
14107
0
}
14108
14109
/**
14110
 * xmlCtxtReadFile:
14111
 * @ctxt:  an XML parser context
14112
 * @filename:  a file or URL
14113
 * @encoding:  the document encoding (optional)
14114
 * @options:  a combination of xmlParserOption
14115
 *
14116
 * Parse an XML file from the filesystem, the network or a user-defined
14117
 * resource loader.
14118
 *
14119
 * Returns the resulting document tree
14120
 */
14121
xmlDocPtr
14122
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14123
                const char *encoding, int options)
14124
32.7k
{
14125
32.7k
    xmlParserInputPtr input;
14126
14127
32.7k
    if (ctxt == NULL)
14128
0
        return(NULL);
14129
14130
32.7k
    xmlCtxtReset(ctxt);
14131
32.7k
    xmlCtxtUseOptions(ctxt, options);
14132
14133
32.7k
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
14134
14135
32.7k
    return(xmlCtxtParseDocument(ctxt, input));
14136
32.7k
}
14137
14138
/**
14139
 * xmlCtxtReadMemory:
14140
 * @ctxt:  an XML parser context
14141
 * @buffer:  a pointer to a char array
14142
 * @size:  the size of the array
14143
 * @URL:  base URL (optional)
14144
 * @encoding:  the document encoding (optional)
14145
 * @options:  a combination of xmlParserOption
14146
 *
14147
 * Parse an XML in-memory document and build a tree. The input buffer must
14148
 * not contain a terminating null byte.
14149
 *
14150
 * @URL is used as base to resolve external entities and for error
14151
 * reporting.
14152
 *
14153
 * See xmlCtxtUseOptions for details.
14154
 *
14155
 * Returns the resulting document tree
14156
 */
14157
xmlDocPtr
14158
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14159
                  const char *URL, const char *encoding, int options)
14160
89.1k
{
14161
89.1k
    xmlParserInputPtr input;
14162
14163
89.1k
    if ((ctxt == NULL) || (size < 0))
14164
0
        return(NULL);
14165
14166
89.1k
    xmlCtxtReset(ctxt);
14167
89.1k
    xmlCtxtUseOptions(ctxt, options);
14168
14169
89.1k
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
14170
89.1k
                                      XML_INPUT_BUF_STATIC);
14171
14172
89.1k
    return(xmlCtxtParseDocument(ctxt, input));
14173
89.1k
}
14174
14175
/**
14176
 * xmlCtxtReadFd:
14177
 * @ctxt:  an XML parser context
14178
 * @fd:  an open file descriptor
14179
 * @URL:  base URL (optional)
14180
 * @encoding:  the document encoding (optional)
14181
 * @options:  a combination of xmlParserOption
14182
 *
14183
 * Parse an XML document from a file descriptor and build a tree.
14184
 *
14185
 * NOTE that the file descriptor will not be closed when the
14186
 * context is freed or reset.
14187
 *
14188
 * @URL is used as base to resolve external entities and for error
14189
 * reporting.
14190
 *
14191
 * See xmlCtxtUseOptions for details.
14192
 *
14193
 * Returns the resulting document tree
14194
 */
14195
xmlDocPtr
14196
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14197
              const char *URL, const char *encoding, int options)
14198
0
{
14199
0
    xmlParserInputPtr input;
14200
14201
0
    if (ctxt == NULL)
14202
0
        return(NULL);
14203
14204
0
    xmlCtxtReset(ctxt);
14205
0
    xmlCtxtUseOptions(ctxt, options);
14206
14207
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
14208
14209
0
    return(xmlCtxtParseDocument(ctxt, input));
14210
0
}
14211
14212
/**
14213
 * xmlCtxtReadIO:
14214
 * @ctxt:  an XML parser context
14215
 * @ioread:  an I/O read function
14216
 * @ioclose:  an I/O close function
14217
 * @ioctx:  an I/O handler
14218
 * @URL:  the base URL to use for the document
14219
 * @encoding:  the document encoding, or NULL
14220
 * @options:  a combination of xmlParserOption
14221
 *
14222
 * parse an XML document from I/O functions and source and build a tree.
14223
 * This reuses the existing @ctxt parser context
14224
 *
14225
 * @URL is used as base to resolve external entities and for error
14226
 * reporting.
14227
 *
14228
 * See xmlCtxtUseOptions for details.
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
xmlDocPtr
14233
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14234
              xmlInputCloseCallback ioclose, void *ioctx,
14235
        const char *URL,
14236
              const char *encoding, int options)
14237
2
{
14238
2
    xmlParserInputPtr input;
14239
14240
2
    if (ctxt == NULL)
14241
0
        return(NULL);
14242
14243
2
    xmlCtxtReset(ctxt);
14244
2
    xmlCtxtUseOptions(ctxt, options);
14245
14246
2
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
14247
2
                                  encoding, 0);
14248
14249
2
    return(xmlCtxtParseDocument(ctxt, input));
14250
2
}
14251